1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci#include "config_components.h"
22cabdff1aSopenharmony_ci
23cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S"
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_ci/* chroma_mc8(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */
26cabdff1aSopenharmony_ci.macro  h264_chroma_mc8 type, codec=h264
27cabdff1aSopenharmony_cifunction ff_\type\()_\codec\()_chroma_mc8_neon, export=1
28cabdff1aSopenharmony_ci        push            {r4-r7, lr}
29cabdff1aSopenharmony_ci        ldrd            r4,  r5,  [sp, #20]
30cabdff1aSopenharmony_ci  .ifc \type,avg
31cabdff1aSopenharmony_ci        mov             lr,  r0
32cabdff1aSopenharmony_ci  .endif
33cabdff1aSopenharmony_ci        pld             [r1]
34cabdff1aSopenharmony_ci        pld             [r1, r2]
35cabdff1aSopenharmony_ci
36cabdff1aSopenharmony_ci  .ifc \codec,rv40
37cabdff1aSopenharmony_ci        movrel          r6,  rv40bias
38cabdff1aSopenharmony_ci        lsr             r7,  r5,  #1
39cabdff1aSopenharmony_ci        add             r6,  r6,  r7,  lsl #3
40cabdff1aSopenharmony_ci        lsr             r7,  r4,  #1
41cabdff1aSopenharmony_ci        add             r6,  r6,  r7,  lsl #1
42cabdff1aSopenharmony_ci        vld1.16         {d22[],d23[]}, [r6,:16]
43cabdff1aSopenharmony_ci  .endif
44cabdff1aSopenharmony_ci  .ifc \codec,vc1
45cabdff1aSopenharmony_ci        vmov.u16        q11, #28
46cabdff1aSopenharmony_ci  .endif
47cabdff1aSopenharmony_ci
48cabdff1aSopenharmony_ciA       muls            r7,  r4,  r5
49cabdff1aSopenharmony_ciT       mul             r7,  r4,  r5
50cabdff1aSopenharmony_ciT       cmp             r7,  #0
51cabdff1aSopenharmony_ci        rsb             r6,  r7,  r5,  lsl #3
52cabdff1aSopenharmony_ci        rsb             r12, r7,  r4,  lsl #3
53cabdff1aSopenharmony_ci        sub             r4,  r7,  r4,  lsl #3
54cabdff1aSopenharmony_ci        sub             r4,  r4,  r5,  lsl #3
55cabdff1aSopenharmony_ci        add             r4,  r4,  #64
56cabdff1aSopenharmony_ci
57cabdff1aSopenharmony_ci        beq             2f
58cabdff1aSopenharmony_ci
59cabdff1aSopenharmony_ci        vdup.8          d0,  r4
60cabdff1aSopenharmony_ci        vdup.8          d1,  r12
61cabdff1aSopenharmony_ci        vld1.8          {d4, d5}, [r1], r2
62cabdff1aSopenharmony_ci        vdup.8          d2,  r6
63cabdff1aSopenharmony_ci        vdup.8          d3,  r7
64cabdff1aSopenharmony_ci        vext.8          d5,  d4,  d5,  #1
65cabdff1aSopenharmony_ci
66cabdff1aSopenharmony_ci1:      vld1.8          {d6, d7}, [r1], r2
67cabdff1aSopenharmony_ci        vmull.u8        q8,  d4,  d0
68cabdff1aSopenharmony_ci        vmlal.u8        q8,  d5,  d1
69cabdff1aSopenharmony_ci        vext.8          d7,  d6,  d7,  #1
70cabdff1aSopenharmony_ci        vld1.8          {d4, d5}, [r1], r2
71cabdff1aSopenharmony_ci        vmlal.u8        q8,  d6,  d2
72cabdff1aSopenharmony_ci        pld             [r1]
73cabdff1aSopenharmony_ci        vext.8          d5,  d4,  d5,  #1
74cabdff1aSopenharmony_ci        vmlal.u8        q8,  d7,  d3
75cabdff1aSopenharmony_ci        vmull.u8        q9,  d6,  d0
76cabdff1aSopenharmony_ci        subs            r3,  r3,  #2
77cabdff1aSopenharmony_ci        vmlal.u8        q9,  d7,  d1
78cabdff1aSopenharmony_ci        vmlal.u8        q9,  d4,  d2
79cabdff1aSopenharmony_ci        vmlal.u8        q9,  d5,  d3
80cabdff1aSopenharmony_ci        pld             [r1, r2]
81cabdff1aSopenharmony_ci  .ifc \codec,h264
82cabdff1aSopenharmony_ci        vrshrn.u16      d16, q8,  #6
83cabdff1aSopenharmony_ci        vrshrn.u16      d17, q9,  #6
84cabdff1aSopenharmony_ci  .else
85cabdff1aSopenharmony_ci        vadd.u16        q8,  q8,  q11
86cabdff1aSopenharmony_ci        vadd.u16        q9,  q9,  q11
87cabdff1aSopenharmony_ci        vshrn.u16       d16, q8,  #6
88cabdff1aSopenharmony_ci        vshrn.u16       d17, q9,  #6
89cabdff1aSopenharmony_ci  .endif
90cabdff1aSopenharmony_ci  .ifc \type,avg
91cabdff1aSopenharmony_ci        vld1.8          {d20}, [lr,:64], r2
92cabdff1aSopenharmony_ci        vld1.8          {d21}, [lr,:64], r2
93cabdff1aSopenharmony_ci        vrhadd.u8       q8,  q8,  q10
94cabdff1aSopenharmony_ci  .endif
95cabdff1aSopenharmony_ci        vst1.8          {d16}, [r0,:64], r2
96cabdff1aSopenharmony_ci        vst1.8          {d17}, [r0,:64], r2
97cabdff1aSopenharmony_ci        bgt             1b
98cabdff1aSopenharmony_ci
99cabdff1aSopenharmony_ci        pop             {r4-r7, pc}
100cabdff1aSopenharmony_ci
101cabdff1aSopenharmony_ci2:      adds            r12, r12, r6
102cabdff1aSopenharmony_ci        vdup.8          d0,  r4
103cabdff1aSopenharmony_ci        beq             5f
104cabdff1aSopenharmony_ci        tst             r6,  r6
105cabdff1aSopenharmony_ci        vdup.8          d1,  r12
106cabdff1aSopenharmony_ci
107cabdff1aSopenharmony_ci        beq             4f
108cabdff1aSopenharmony_ci
109cabdff1aSopenharmony_ci        vld1.8          {d4}, [r1], r2
110cabdff1aSopenharmony_ci
111cabdff1aSopenharmony_ci3:      vld1.8          {d6}, [r1], r2
112cabdff1aSopenharmony_ci        vmull.u8        q8,  d4,  d0
113cabdff1aSopenharmony_ci        vmlal.u8        q8,  d6,  d1
114cabdff1aSopenharmony_ci        vld1.8          {d4}, [r1], r2
115cabdff1aSopenharmony_ci        vmull.u8        q9,  d6,  d0
116cabdff1aSopenharmony_ci        vmlal.u8        q9,  d4,  d1
117cabdff1aSopenharmony_ci        pld             [r1]
118cabdff1aSopenharmony_ci  .ifc \codec,h264
119cabdff1aSopenharmony_ci        vrshrn.u16      d16, q8,  #6
120cabdff1aSopenharmony_ci        vrshrn.u16      d17, q9,  #6
121cabdff1aSopenharmony_ci  .else
122cabdff1aSopenharmony_ci        vadd.u16        q8,  q8,  q11
123cabdff1aSopenharmony_ci        vadd.u16        q9,  q9,  q11
124cabdff1aSopenharmony_ci        vshrn.u16       d16, q8,  #6
125cabdff1aSopenharmony_ci        vshrn.u16       d17, q9,  #6
126cabdff1aSopenharmony_ci  .endif
127cabdff1aSopenharmony_ci        pld             [r1, r2]
128cabdff1aSopenharmony_ci  .ifc \type,avg
129cabdff1aSopenharmony_ci        vld1.8          {d20}, [lr,:64], r2
130cabdff1aSopenharmony_ci        vld1.8          {d21}, [lr,:64], r2
131cabdff1aSopenharmony_ci        vrhadd.u8       q8,  q8,  q10
132cabdff1aSopenharmony_ci  .endif
133cabdff1aSopenharmony_ci        subs            r3,  r3,  #2
134cabdff1aSopenharmony_ci        vst1.8          {d16}, [r0,:64], r2
135cabdff1aSopenharmony_ci        vst1.8          {d17}, [r0,:64], r2
136cabdff1aSopenharmony_ci        bgt             3b
137cabdff1aSopenharmony_ci
138cabdff1aSopenharmony_ci        pop             {r4-r7, pc}
139cabdff1aSopenharmony_ci
140cabdff1aSopenharmony_ci4:      vld1.8          {d4, d5}, [r1], r2
141cabdff1aSopenharmony_ci        vld1.8          {d6, d7}, [r1], r2
142cabdff1aSopenharmony_ci        vext.8          d5,  d4,  d5,  #1
143cabdff1aSopenharmony_ci        vext.8          d7,  d6,  d7,  #1
144cabdff1aSopenharmony_ci        pld             [r1]
145cabdff1aSopenharmony_ci        subs            r3,  r3,  #2
146cabdff1aSopenharmony_ci        vmull.u8        q8,  d4,  d0
147cabdff1aSopenharmony_ci        vmlal.u8        q8,  d5,  d1
148cabdff1aSopenharmony_ci        vmull.u8        q9,  d6,  d0
149cabdff1aSopenharmony_ci        vmlal.u8        q9,  d7,  d1
150cabdff1aSopenharmony_ci        pld             [r1, r2]
151cabdff1aSopenharmony_ci  .ifc \codec,h264
152cabdff1aSopenharmony_ci        vrshrn.u16      d16, q8,  #6
153cabdff1aSopenharmony_ci        vrshrn.u16      d17, q9,  #6
154cabdff1aSopenharmony_ci  .else
155cabdff1aSopenharmony_ci        vadd.u16        q8,  q8,  q11
156cabdff1aSopenharmony_ci        vadd.u16        q9,  q9,  q11
157cabdff1aSopenharmony_ci        vshrn.u16       d16, q8,  #6
158cabdff1aSopenharmony_ci        vshrn.u16       d17, q9,  #6
159cabdff1aSopenharmony_ci  .endif
160cabdff1aSopenharmony_ci  .ifc \type,avg
161cabdff1aSopenharmony_ci        vld1.8          {d20}, [lr,:64], r2
162cabdff1aSopenharmony_ci        vld1.8          {d21}, [lr,:64], r2
163cabdff1aSopenharmony_ci        vrhadd.u8       q8,  q8,  q10
164cabdff1aSopenharmony_ci  .endif
165cabdff1aSopenharmony_ci        vst1.8          {d16}, [r0,:64], r2
166cabdff1aSopenharmony_ci        vst1.8          {d17}, [r0,:64], r2
167cabdff1aSopenharmony_ci        bgt             4b
168cabdff1aSopenharmony_ci
169cabdff1aSopenharmony_ci        pop             {r4-r7, pc}
170cabdff1aSopenharmony_ci
171cabdff1aSopenharmony_ci5:      vld1.8          {d4}, [r1], r2
172cabdff1aSopenharmony_ci        vld1.8          {d5}, [r1], r2
173cabdff1aSopenharmony_ci        pld             [r1]
174cabdff1aSopenharmony_ci        subs            r3,  r3,  #2
175cabdff1aSopenharmony_ci        vmull.u8        q8,  d4,  d0
176cabdff1aSopenharmony_ci        vmull.u8        q9,  d5,  d0
177cabdff1aSopenharmony_ci        pld             [r1, r2]
178cabdff1aSopenharmony_ci  .ifc \codec,h264
179cabdff1aSopenharmony_ci        vrshrn.u16      d16, q8,  #6
180cabdff1aSopenharmony_ci        vrshrn.u16      d17, q9,  #6
181cabdff1aSopenharmony_ci  .else
182cabdff1aSopenharmony_ci        vadd.u16        q8,  q8,  q11
183cabdff1aSopenharmony_ci        vadd.u16        q9,  q9,  q11
184cabdff1aSopenharmony_ci        vshrn.u16       d16, q8,  #6
185cabdff1aSopenharmony_ci        vshrn.u16       d17, q9,  #6
186cabdff1aSopenharmony_ci  .endif
187cabdff1aSopenharmony_ci  .ifc \type,avg
188cabdff1aSopenharmony_ci        vld1.8          {d20}, [lr,:64], r2
189cabdff1aSopenharmony_ci        vld1.8          {d21}, [lr,:64], r2
190cabdff1aSopenharmony_ci        vrhadd.u8       q8,  q8,  q10
191cabdff1aSopenharmony_ci  .endif
192cabdff1aSopenharmony_ci        vst1.8          {d16}, [r0,:64], r2
193cabdff1aSopenharmony_ci        vst1.8          {d17}, [r0,:64], r2
194cabdff1aSopenharmony_ci        bgt             5b
195cabdff1aSopenharmony_ci
196cabdff1aSopenharmony_ci        pop             {r4-r7, pc}
197cabdff1aSopenharmony_ciendfunc
198cabdff1aSopenharmony_ci.endm
199cabdff1aSopenharmony_ci
200cabdff1aSopenharmony_ci/* chroma_mc4(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */
201cabdff1aSopenharmony_ci.macro  h264_chroma_mc4 type, codec=h264
202cabdff1aSopenharmony_cifunction ff_\type\()_\codec\()_chroma_mc4_neon, export=1
203cabdff1aSopenharmony_ci        push            {r4-r7, lr}
204cabdff1aSopenharmony_ci        ldrd            r4,  r5,  [sp, #20]
205cabdff1aSopenharmony_ci  .ifc \type,avg
206cabdff1aSopenharmony_ci        mov             lr,  r0
207cabdff1aSopenharmony_ci  .endif
208cabdff1aSopenharmony_ci        pld             [r1]
209cabdff1aSopenharmony_ci        pld             [r1, r2]
210cabdff1aSopenharmony_ci
211cabdff1aSopenharmony_ci  .ifc \codec,rv40
212cabdff1aSopenharmony_ci        movrel          r6,  rv40bias
213cabdff1aSopenharmony_ci        lsr             r7,  r5,  #1
214cabdff1aSopenharmony_ci        add             r6,  r6,  r7,  lsl #3
215cabdff1aSopenharmony_ci        lsr             r7,  r4,  #1
216cabdff1aSopenharmony_ci        add             r6,  r6,  r7,  lsl #1
217cabdff1aSopenharmony_ci        vld1.16         {d22[],d23[]}, [r6,:16]
218cabdff1aSopenharmony_ci  .endif
219cabdff1aSopenharmony_ci  .ifc \codec,vc1
220cabdff1aSopenharmony_ci        vmov.u16        q11, #28
221cabdff1aSopenharmony_ci  .endif
222cabdff1aSopenharmony_ci
223cabdff1aSopenharmony_ciA       muls            r7,  r4,  r5
224cabdff1aSopenharmony_ciT       mul             r7,  r4,  r5
225cabdff1aSopenharmony_ciT       cmp             r7,  #0
226cabdff1aSopenharmony_ci        rsb             r6,  r7,  r5,  lsl #3
227cabdff1aSopenharmony_ci        rsb             r12, r7,  r4,  lsl #3
228cabdff1aSopenharmony_ci        sub             r4,  r7,  r4,  lsl #3
229cabdff1aSopenharmony_ci        sub             r4,  r4,  r5,  lsl #3
230cabdff1aSopenharmony_ci        add             r4,  r4,  #64
231cabdff1aSopenharmony_ci
232cabdff1aSopenharmony_ci        beq             2f
233cabdff1aSopenharmony_ci
234cabdff1aSopenharmony_ci        vdup.8          d0,  r4
235cabdff1aSopenharmony_ci        vdup.8          d1,  r12
236cabdff1aSopenharmony_ci        vld1.8          {d4},     [r1], r2
237cabdff1aSopenharmony_ci        vdup.8          d2,  r6
238cabdff1aSopenharmony_ci        vdup.8          d3,  r7
239cabdff1aSopenharmony_ci
240cabdff1aSopenharmony_ci        vext.8          d5,  d4,  d5,  #1
241cabdff1aSopenharmony_ci        vtrn.32         d4,  d5
242cabdff1aSopenharmony_ci
243cabdff1aSopenharmony_ci        vtrn.32         d0,  d1
244cabdff1aSopenharmony_ci        vtrn.32         d2,  d3
245cabdff1aSopenharmony_ci
246cabdff1aSopenharmony_ci1:      vld1.8          {d6},     [r1], r2
247cabdff1aSopenharmony_ci        vext.8          d7,  d6,  d7,  #1
248cabdff1aSopenharmony_ci        vtrn.32         d6,  d7
249cabdff1aSopenharmony_ci        vmull.u8        q8,  d4,  d0
250cabdff1aSopenharmony_ci        vmlal.u8        q8,  d6,  d2
251cabdff1aSopenharmony_ci        vld1.8          {d4},     [r1], r2
252cabdff1aSopenharmony_ci        vext.8          d5,  d4,  d5,  #1
253cabdff1aSopenharmony_ci        vtrn.32         d4,  d5
254cabdff1aSopenharmony_ci        pld             [r1]
255cabdff1aSopenharmony_ci        vmull.u8        q9,  d6,  d0
256cabdff1aSopenharmony_ci        vmlal.u8        q9,  d4,  d2
257cabdff1aSopenharmony_ci        vadd.i16        d16, d16, d17
258cabdff1aSopenharmony_ci        vadd.i16        d17, d18, d19
259cabdff1aSopenharmony_ci  .ifc \codec,h264
260cabdff1aSopenharmony_ci        vrshrn.u16      d16, q8,  #6
261cabdff1aSopenharmony_ci  .else
262cabdff1aSopenharmony_ci        vadd.u16        q8,  q8,  q11
263cabdff1aSopenharmony_ci        vshrn.u16       d16, q8,  #6
264cabdff1aSopenharmony_ci  .endif
265cabdff1aSopenharmony_ci        subs            r3,  r3,  #2
266cabdff1aSopenharmony_ci        pld             [r1, r2]
267cabdff1aSopenharmony_ci  .ifc \type,avg
268cabdff1aSopenharmony_ci        vld1.32         {d20[0]}, [lr,:32], r2
269cabdff1aSopenharmony_ci        vld1.32         {d20[1]}, [lr,:32], r2
270cabdff1aSopenharmony_ci        vrhadd.u8       d16, d16, d20
271cabdff1aSopenharmony_ci  .endif
272cabdff1aSopenharmony_ci        vst1.32         {d16[0]}, [r0,:32], r2
273cabdff1aSopenharmony_ci        vst1.32         {d16[1]}, [r0,:32], r2
274cabdff1aSopenharmony_ci        bgt             1b
275cabdff1aSopenharmony_ci
276cabdff1aSopenharmony_ci        pop             {r4-r7, pc}
277cabdff1aSopenharmony_ci
278cabdff1aSopenharmony_ci2:      adds            r12, r12, r6
279cabdff1aSopenharmony_ci        vdup.8          d0,  r4
280cabdff1aSopenharmony_ci        beq             5f
281cabdff1aSopenharmony_ci        tst             r6,  r6
282cabdff1aSopenharmony_ci        vdup.8          d1,  r12
283cabdff1aSopenharmony_ci        vtrn.32         d0,  d1
284cabdff1aSopenharmony_ci
285cabdff1aSopenharmony_ci        beq             4f
286cabdff1aSopenharmony_ci
287cabdff1aSopenharmony_ci        vext.32         d1,  d0,  d1,  #1
288cabdff1aSopenharmony_ci        vld1.32         {d4[0]},  [r1], r2
289cabdff1aSopenharmony_ci
290cabdff1aSopenharmony_ci3:      vld1.32         {d4[1]},  [r1], r2
291cabdff1aSopenharmony_ci        vmull.u8        q8,  d4,  d0
292cabdff1aSopenharmony_ci        vld1.32         {d4[0]},  [r1], r2
293cabdff1aSopenharmony_ci        vmull.u8        q9,  d4,  d1
294cabdff1aSopenharmony_ci        vadd.i16        d16, d16, d17
295cabdff1aSopenharmony_ci        vadd.i16        d17, d18, d19
296cabdff1aSopenharmony_ci        pld             [r1]
297cabdff1aSopenharmony_ci  .ifc \codec,h264
298cabdff1aSopenharmony_ci        vrshrn.u16      d16, q8,  #6
299cabdff1aSopenharmony_ci  .else
300cabdff1aSopenharmony_ci        vadd.u16        q8,  q8,  q11
301cabdff1aSopenharmony_ci        vshrn.u16       d16, q8,  #6
302cabdff1aSopenharmony_ci  .endif
303cabdff1aSopenharmony_ci  .ifc \type,avg
304cabdff1aSopenharmony_ci        vld1.32         {d20[0]}, [lr,:32], r2
305cabdff1aSopenharmony_ci        vld1.32         {d20[1]}, [lr,:32], r2
306cabdff1aSopenharmony_ci        vrhadd.u8       d16, d16, d20
307cabdff1aSopenharmony_ci  .endif
308cabdff1aSopenharmony_ci        subs            r3,  r3,  #2
309cabdff1aSopenharmony_ci        pld             [r1, r2]
310cabdff1aSopenharmony_ci        vst1.32         {d16[0]}, [r0,:32], r2
311cabdff1aSopenharmony_ci        vst1.32         {d16[1]}, [r0,:32], r2
312cabdff1aSopenharmony_ci        bgt             3b
313cabdff1aSopenharmony_ci
314cabdff1aSopenharmony_ci        pop             {r4-r7, pc}
315cabdff1aSopenharmony_ci
316cabdff1aSopenharmony_ci4:      vld1.8          {d4},     [r1], r2
317cabdff1aSopenharmony_ci        vld1.8          {d6},     [r1], r2
318cabdff1aSopenharmony_ci        vext.8          d5,  d4,  d5,  #1
319cabdff1aSopenharmony_ci        vext.8          d7,  d6,  d7,  #1
320cabdff1aSopenharmony_ci        vtrn.32         d4,  d5
321cabdff1aSopenharmony_ci        vtrn.32         d6,  d7
322cabdff1aSopenharmony_ci        vmull.u8        q8,  d4,  d0
323cabdff1aSopenharmony_ci        vmull.u8        q9,  d6,  d0
324cabdff1aSopenharmony_ci        subs            r3,  r3,  #2
325cabdff1aSopenharmony_ci        vadd.i16        d16, d16, d17
326cabdff1aSopenharmony_ci        vadd.i16        d17, d18, d19
327cabdff1aSopenharmony_ci        pld             [r1]
328cabdff1aSopenharmony_ci  .ifc \codec,h264
329cabdff1aSopenharmony_ci        vrshrn.u16      d16, q8,  #6
330cabdff1aSopenharmony_ci  .else
331cabdff1aSopenharmony_ci        vadd.u16        q8,  q8,  q11
332cabdff1aSopenharmony_ci        vshrn.u16       d16, q8,  #6
333cabdff1aSopenharmony_ci  .endif
334cabdff1aSopenharmony_ci  .ifc \type,avg
335cabdff1aSopenharmony_ci        vld1.32         {d20[0]}, [lr,:32], r2
336cabdff1aSopenharmony_ci        vld1.32         {d20[1]}, [lr,:32], r2
337cabdff1aSopenharmony_ci        vrhadd.u8       d16, d16, d20
338cabdff1aSopenharmony_ci  .endif
339cabdff1aSopenharmony_ci        pld             [r1]
340cabdff1aSopenharmony_ci        vst1.32         {d16[0]}, [r0,:32], r2
341cabdff1aSopenharmony_ci        vst1.32         {d16[1]}, [r0,:32], r2
342cabdff1aSopenharmony_ci        bgt             4b
343cabdff1aSopenharmony_ci
344cabdff1aSopenharmony_ci        pop             {r4-r7, pc}
345cabdff1aSopenharmony_ci
346cabdff1aSopenharmony_ci5:      vld1.32         {d4[0]},  [r1], r2
347cabdff1aSopenharmony_ci        vld1.32         {d4[1]},  [r1], r2
348cabdff1aSopenharmony_ci        vmull.u8        q8,  d4,  d0
349cabdff1aSopenharmony_ci        subs            r3,  r3,  #2
350cabdff1aSopenharmony_ci        pld             [r1]
351cabdff1aSopenharmony_ci  .ifc \codec,h264
352cabdff1aSopenharmony_ci        vrshrn.u16      d16, q8,  #6
353cabdff1aSopenharmony_ci  .else
354cabdff1aSopenharmony_ci        vadd.u16        q8,  q8,  q11
355cabdff1aSopenharmony_ci        vshrn.u16       d16, q8,  #6
356cabdff1aSopenharmony_ci  .endif
357cabdff1aSopenharmony_ci  .ifc \type,avg
358cabdff1aSopenharmony_ci        vld1.32         {d20[0]}, [lr,:32], r2
359cabdff1aSopenharmony_ci        vld1.32         {d20[1]}, [lr,:32], r2
360cabdff1aSopenharmony_ci        vrhadd.u8       d16, d16, d20
361cabdff1aSopenharmony_ci  .endif
362cabdff1aSopenharmony_ci        pld             [r1]
363cabdff1aSopenharmony_ci        vst1.32         {d16[0]}, [r0,:32], r2
364cabdff1aSopenharmony_ci        vst1.32         {d16[1]}, [r0,:32], r2
365cabdff1aSopenharmony_ci        bgt             5b
366cabdff1aSopenharmony_ci
367cabdff1aSopenharmony_ci        pop             {r4-r7, pc}
368cabdff1aSopenharmony_ciendfunc
369cabdff1aSopenharmony_ci.endm
370cabdff1aSopenharmony_ci
371cabdff1aSopenharmony_ci.macro  h264_chroma_mc2 type
372cabdff1aSopenharmony_cifunction ff_\type\()_h264_chroma_mc2_neon, export=1
373cabdff1aSopenharmony_ci        push            {r4-r6, lr}
374cabdff1aSopenharmony_ci        ldr             r4,  [sp, #16]
375cabdff1aSopenharmony_ci        ldr             lr,  [sp, #20]
376cabdff1aSopenharmony_ci        pld             [r1]
377cabdff1aSopenharmony_ci        pld             [r1, r2]
378cabdff1aSopenharmony_ci        orrs            r5,  r4,  lr
379cabdff1aSopenharmony_ci        beq             2f
380cabdff1aSopenharmony_ci
381cabdff1aSopenharmony_ci        mul             r5,  r4,  lr
382cabdff1aSopenharmony_ci        rsb             r6,  r5,  lr,  lsl #3
383cabdff1aSopenharmony_ci        rsb             r12, r5,  r4,  lsl #3
384cabdff1aSopenharmony_ci        sub             r4,  r5,  r4,  lsl #3
385cabdff1aSopenharmony_ci        sub             r4,  r4,  lr,  lsl #3
386cabdff1aSopenharmony_ci        add             r4,  r4,  #64
387cabdff1aSopenharmony_ci        vdup.8          d0,  r4
388cabdff1aSopenharmony_ci        vdup.8          d2,  r12
389cabdff1aSopenharmony_ci        vdup.8          d1,  r6
390cabdff1aSopenharmony_ci        vdup.8          d3,  r5
391cabdff1aSopenharmony_ci        vtrn.16         q0,  q1
392cabdff1aSopenharmony_ci1:
393cabdff1aSopenharmony_ci        vld1.32         {d4[0]},  [r1], r2
394cabdff1aSopenharmony_ci        vld1.32         {d4[1]},  [r1], r2
395cabdff1aSopenharmony_ci        vrev64.32       d5,  d4
396cabdff1aSopenharmony_ci        vld1.32         {d5[1]},  [r1]
397cabdff1aSopenharmony_ci        vext.8          q3,  q2,  q2,  #1
398cabdff1aSopenharmony_ci        vtrn.16         q2,  q3
399cabdff1aSopenharmony_ci        vmull.u8        q8,  d4,  d0
400cabdff1aSopenharmony_ci        vmlal.u8        q8,  d5,  d1
401cabdff1aSopenharmony_ci  .ifc \type,avg
402cabdff1aSopenharmony_ci        vld1.16         {d18[0]}, [r0,:16], r2
403cabdff1aSopenharmony_ci        vld1.16         {d18[1]}, [r0,:16]
404cabdff1aSopenharmony_ci        sub             r0,  r0,  r2
405cabdff1aSopenharmony_ci  .endif
406cabdff1aSopenharmony_ci        vtrn.32         d16, d17
407cabdff1aSopenharmony_ci        vadd.i16        d16, d16, d17
408cabdff1aSopenharmony_ci        vrshrn.u16      d16, q8,  #6
409cabdff1aSopenharmony_ci  .ifc \type,avg
410cabdff1aSopenharmony_ci        vrhadd.u8       d16, d16, d18
411cabdff1aSopenharmony_ci  .endif
412cabdff1aSopenharmony_ci        vst1.16         {d16[0]}, [r0,:16], r2
413cabdff1aSopenharmony_ci        vst1.16         {d16[1]}, [r0,:16], r2
414cabdff1aSopenharmony_ci        subs            r3,  r3,  #2
415cabdff1aSopenharmony_ci        bgt             1b
416cabdff1aSopenharmony_ci        pop             {r4-r6, pc}
417cabdff1aSopenharmony_ci2:
418cabdff1aSopenharmony_ci  .ifc \type,put
419cabdff1aSopenharmony_ci        ldrh_post       r5,  r1,  r2
420cabdff1aSopenharmony_ci        strh_post       r5,  r0,  r2
421cabdff1aSopenharmony_ci        ldrh_post       r6,  r1,  r2
422cabdff1aSopenharmony_ci        strh_post       r6,  r0,  r2
423cabdff1aSopenharmony_ci  .else
424cabdff1aSopenharmony_ci        vld1.16         {d16[0]}, [r1], r2
425cabdff1aSopenharmony_ci        vld1.16         {d16[1]}, [r1], r2
426cabdff1aSopenharmony_ci        vld1.16         {d18[0]}, [r0,:16], r2
427cabdff1aSopenharmony_ci        vld1.16         {d18[1]}, [r0,:16]
428cabdff1aSopenharmony_ci        sub             r0,  r0,  r2
429cabdff1aSopenharmony_ci        vrhadd.u8       d16, d16, d18
430cabdff1aSopenharmony_ci        vst1.16         {d16[0]}, [r0,:16], r2
431cabdff1aSopenharmony_ci        vst1.16         {d16[1]}, [r0,:16], r2
432cabdff1aSopenharmony_ci  .endif
433cabdff1aSopenharmony_ci        subs            r3,  r3,  #2
434cabdff1aSopenharmony_ci        bgt             2b
435cabdff1aSopenharmony_ci        pop             {r4-r6, pc}
436cabdff1aSopenharmony_ciendfunc
437cabdff1aSopenharmony_ci.endm
438cabdff1aSopenharmony_ci
439cabdff1aSopenharmony_ci        h264_chroma_mc8 put
440cabdff1aSopenharmony_ci        h264_chroma_mc8 avg
441cabdff1aSopenharmony_ci        h264_chroma_mc4 put
442cabdff1aSopenharmony_ci        h264_chroma_mc4 avg
443cabdff1aSopenharmony_ci        h264_chroma_mc2 put
444cabdff1aSopenharmony_ci        h264_chroma_mc2 avg
445cabdff1aSopenharmony_ci
446cabdff1aSopenharmony_ci#if CONFIG_RV40_DECODER
447cabdff1aSopenharmony_ciconst   rv40bias
448cabdff1aSopenharmony_ci        .short           0, 16, 32, 16
449cabdff1aSopenharmony_ci        .short          32, 28, 32, 28
450cabdff1aSopenharmony_ci        .short           0, 32, 16, 32
451cabdff1aSopenharmony_ci        .short          32, 28, 32, 28
452cabdff1aSopenharmony_ciendconst
453cabdff1aSopenharmony_ci
454cabdff1aSopenharmony_ci        h264_chroma_mc8 put, rv40
455cabdff1aSopenharmony_ci        h264_chroma_mc8 avg, rv40
456cabdff1aSopenharmony_ci        h264_chroma_mc4 put, rv40
457cabdff1aSopenharmony_ci        h264_chroma_mc4 avg, rv40
458cabdff1aSopenharmony_ci#endif
459cabdff1aSopenharmony_ci
460cabdff1aSopenharmony_ci#if CONFIG_VC1DSP
461cabdff1aSopenharmony_ci        h264_chroma_mc8 put, vc1
462cabdff1aSopenharmony_ci        h264_chroma_mc8 avg, vc1
463cabdff1aSopenharmony_ci        h264_chroma_mc4 put, vc1
464cabdff1aSopenharmony_ci        h264_chroma_mc4 avg, vc1
465cabdff1aSopenharmony_ci#endif
466