1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
3cabdff1aSopenharmony_ci * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci#include "config_components.h"
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci#include "libavutil/aarch64/asm.S"
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_ci/* chroma_mc8(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */
27cabdff1aSopenharmony_ci.macro  h264_chroma_mc8 type, codec=h264
28cabdff1aSopenharmony_cifunction ff_\type\()_\codec\()_chroma_mc8_neon, export=1
29cabdff1aSopenharmony_ci  .ifc \type,avg
30cabdff1aSopenharmony_ci        mov             x8,  x0
31cabdff1aSopenharmony_ci  .endif
32cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1]
33cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1, x2]
34cabdff1aSopenharmony_ci  .ifc \codec,rv40
35cabdff1aSopenharmony_ci        movrel          x6,  rv40bias
36cabdff1aSopenharmony_ci        lsr             w9,  w5,  #1
37cabdff1aSopenharmony_ci        lsr             w10, w4,  #1
38cabdff1aSopenharmony_ci        lsl             w9,  w9,  #3
39cabdff1aSopenharmony_ci        lsl             w10, w10, #1
40cabdff1aSopenharmony_ci        add             w9,  w9,  w10
41cabdff1aSopenharmony_ci        add             x6,  x6,  w9, UXTW
42cabdff1aSopenharmony_ci        ld1r            {v22.8H}, [x6]
43cabdff1aSopenharmony_ci  .endif
44cabdff1aSopenharmony_ci  .ifc \codec,vc1
45cabdff1aSopenharmony_ci        movi            v22.8H,   #28
46cabdff1aSopenharmony_ci  .endif
47cabdff1aSopenharmony_ci        mul             w7,  w4,  w5
48cabdff1aSopenharmony_ci        lsl             w14, w5,  #3
49cabdff1aSopenharmony_ci        lsl             w13, w4,  #3
50cabdff1aSopenharmony_ci        cmp             w7,  #0
51cabdff1aSopenharmony_ci        sub             w6,  w14, w7
52cabdff1aSopenharmony_ci        sub             w12, w13, w7
53cabdff1aSopenharmony_ci        sub             w4,  w7,  w13
54cabdff1aSopenharmony_ci        sub             w4,  w4,  w14
55cabdff1aSopenharmony_ci        add             w4,  w4,  #64
56cabdff1aSopenharmony_ci        b.eq            2f
57cabdff1aSopenharmony_ci
58cabdff1aSopenharmony_ci        dup             v0.8B,  w4
59cabdff1aSopenharmony_ci        dup             v1.8B,  w12
60cabdff1aSopenharmony_ci        ld1             {v4.8B, v5.8B}, [x1], x2
61cabdff1aSopenharmony_ci        dup             v2.8B,  w6
62cabdff1aSopenharmony_ci        dup             v3.8B,  w7
63cabdff1aSopenharmony_ci        ext             v5.8B,  v4.8B,  v5.8B,  #1
64cabdff1aSopenharmony_ci1:      ld1             {v6.8B, v7.8B}, [x1], x2
65cabdff1aSopenharmony_ci        umull           v16.8H, v4.8B,  v0.8B
66cabdff1aSopenharmony_ci        umlal           v16.8H, v5.8B,  v1.8B
67cabdff1aSopenharmony_ci        ext             v7.8B,  v6.8B,  v7.8B,  #1
68cabdff1aSopenharmony_ci        ld1             {v4.8B, v5.8B}, [x1], x2
69cabdff1aSopenharmony_ci        umlal           v16.8H, v6.8B,  v2.8B
70cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1]
71cabdff1aSopenharmony_ci        ext             v5.8B,  v4.8B,  v5.8B,  #1
72cabdff1aSopenharmony_ci        umlal           v16.8H, v7.8B,  v3.8B
73cabdff1aSopenharmony_ci        umull           v17.8H, v6.8B,  v0.8B
74cabdff1aSopenharmony_ci        subs            w3,  w3,  #2
75cabdff1aSopenharmony_ci        umlal           v17.8H, v7.8B, v1.8B
76cabdff1aSopenharmony_ci        umlal           v17.8H, v4.8B, v2.8B
77cabdff1aSopenharmony_ci        umlal           v17.8H, v5.8B, v3.8B
78cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1, x2]
79cabdff1aSopenharmony_ci  .ifc \codec,h264
80cabdff1aSopenharmony_ci        rshrn           v16.8B, v16.8H, #6
81cabdff1aSopenharmony_ci        rshrn           v17.8B, v17.8H, #6
82cabdff1aSopenharmony_ci  .else
83cabdff1aSopenharmony_ci        add             v16.8H, v16.8H, v22.8H
84cabdff1aSopenharmony_ci        add             v17.8H, v17.8H, v22.8H
85cabdff1aSopenharmony_ci        shrn            v16.8B, v16.8H, #6
86cabdff1aSopenharmony_ci        shrn            v17.8B, v17.8H, #6
87cabdff1aSopenharmony_ci  .endif
88cabdff1aSopenharmony_ci  .ifc \type,avg
89cabdff1aSopenharmony_ci        ld1             {v20.8B}, [x8], x2
90cabdff1aSopenharmony_ci        ld1             {v21.8B}, [x8], x2
91cabdff1aSopenharmony_ci        urhadd          v16.8B, v16.8B, v20.8B
92cabdff1aSopenharmony_ci        urhadd          v17.8B, v17.8B, v21.8B
93cabdff1aSopenharmony_ci  .endif
94cabdff1aSopenharmony_ci        st1             {v16.8B}, [x0], x2
95cabdff1aSopenharmony_ci        st1             {v17.8B}, [x0], x2
96cabdff1aSopenharmony_ci        b.gt            1b
97cabdff1aSopenharmony_ci        ret
98cabdff1aSopenharmony_ci
99cabdff1aSopenharmony_ci2:      adds            w12, w12, w6
100cabdff1aSopenharmony_ci        dup             v0.8B, w4
101cabdff1aSopenharmony_ci        b.eq            5f
102cabdff1aSopenharmony_ci        tst             w6,  w6
103cabdff1aSopenharmony_ci        dup             v1.8B, w12
104cabdff1aSopenharmony_ci        b.eq            4f
105cabdff1aSopenharmony_ci
106cabdff1aSopenharmony_ci        ld1             {v4.8B}, [x1], x2
107cabdff1aSopenharmony_ci3:      ld1             {v6.8B}, [x1], x2
108cabdff1aSopenharmony_ci        umull           v16.8H, v4.8B,  v0.8B
109cabdff1aSopenharmony_ci        umlal           v16.8H, v6.8B,  v1.8B
110cabdff1aSopenharmony_ci        ld1             {v4.8B}, [x1], x2
111cabdff1aSopenharmony_ci        umull           v17.8H, v6.8B,  v0.8B
112cabdff1aSopenharmony_ci        umlal           v17.8H, v4.8B,  v1.8B
113cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1]
114cabdff1aSopenharmony_ci  .ifc \codec,h264
115cabdff1aSopenharmony_ci        rshrn           v16.8B, v16.8H, #6
116cabdff1aSopenharmony_ci        rshrn           v17.8B, v17.8H, #6
117cabdff1aSopenharmony_ci  .else
118cabdff1aSopenharmony_ci        add             v16.8H, v16.8H, v22.8H
119cabdff1aSopenharmony_ci        add             v17.8H, v17.8H, v22.8H
120cabdff1aSopenharmony_ci        shrn            v16.8B, v16.8H, #6
121cabdff1aSopenharmony_ci        shrn            v17.8B, v17.8H, #6
122cabdff1aSopenharmony_ci  .endif
123cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1, x2]
124cabdff1aSopenharmony_ci  .ifc \type,avg
125cabdff1aSopenharmony_ci        ld1             {v20.8B}, [x8], x2
126cabdff1aSopenharmony_ci        ld1             {v21.8B}, [x8], x2
127cabdff1aSopenharmony_ci        urhadd          v16.8B, v16.8B, v20.8B
128cabdff1aSopenharmony_ci        urhadd          v17.8B, v17.8B, v21.8B
129cabdff1aSopenharmony_ci  .endif
130cabdff1aSopenharmony_ci        subs            w3,  w3,  #2
131cabdff1aSopenharmony_ci        st1             {v16.8B}, [x0], x2
132cabdff1aSopenharmony_ci        st1             {v17.8B}, [x0], x2
133cabdff1aSopenharmony_ci        b.gt            3b
134cabdff1aSopenharmony_ci        ret
135cabdff1aSopenharmony_ci
136cabdff1aSopenharmony_ci4:      ld1             {v4.8B, v5.8B}, [x1], x2
137cabdff1aSopenharmony_ci        ld1             {v6.8B, v7.8B}, [x1], x2
138cabdff1aSopenharmony_ci        ext             v5.8B,  v4.8B,  v5.8B,  #1
139cabdff1aSopenharmony_ci        ext             v7.8B,  v6.8B,  v7.8B,  #1
140cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1]
141cabdff1aSopenharmony_ci        subs            w3,  w3,  #2
142cabdff1aSopenharmony_ci        umull           v16.8H, v4.8B, v0.8B
143cabdff1aSopenharmony_ci        umlal           v16.8H, v5.8B, v1.8B
144cabdff1aSopenharmony_ci        umull           v17.8H, v6.8B, v0.8B
145cabdff1aSopenharmony_ci        umlal           v17.8H, v7.8B, v1.8B
146cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1, x2]
147cabdff1aSopenharmony_ci  .ifc \codec,h264
148cabdff1aSopenharmony_ci        rshrn           v16.8B, v16.8H, #6
149cabdff1aSopenharmony_ci        rshrn           v17.8B, v17.8H, #6
150cabdff1aSopenharmony_ci  .else
151cabdff1aSopenharmony_ci        add             v16.8H, v16.8H, v22.8H
152cabdff1aSopenharmony_ci        add             v17.8H, v17.8H, v22.8H
153cabdff1aSopenharmony_ci        shrn            v16.8B, v16.8H, #6
154cabdff1aSopenharmony_ci        shrn            v17.8B, v17.8H, #6
155cabdff1aSopenharmony_ci  .endif
156cabdff1aSopenharmony_ci  .ifc \type,avg
157cabdff1aSopenharmony_ci        ld1             {v20.8B}, [x8], x2
158cabdff1aSopenharmony_ci        ld1             {v21.8B}, [x8], x2
159cabdff1aSopenharmony_ci        urhadd          v16.8B, v16.8B, v20.8B
160cabdff1aSopenharmony_ci        urhadd          v17.8B, v17.8B, v21.8B
161cabdff1aSopenharmony_ci  .endif
162cabdff1aSopenharmony_ci        st1             {v16.8B}, [x0], x2
163cabdff1aSopenharmony_ci        st1             {v17.8B}, [x0], x2
164cabdff1aSopenharmony_ci        b.gt            4b
165cabdff1aSopenharmony_ci        ret
166cabdff1aSopenharmony_ci
167cabdff1aSopenharmony_ci5:      ld1             {v4.8B}, [x1], x2
168cabdff1aSopenharmony_ci        ld1             {v5.8B}, [x1], x2
169cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1]
170cabdff1aSopenharmony_ci        subs            w3,  w3,  #2
171cabdff1aSopenharmony_ci        umull           v16.8H, v4.8B, v0.8B
172cabdff1aSopenharmony_ci        umull           v17.8H, v5.8B, v0.8B
173cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1, x2]
174cabdff1aSopenharmony_ci  .ifc \codec,h264
175cabdff1aSopenharmony_ci        rshrn           v16.8B, v16.8H, #6
176cabdff1aSopenharmony_ci        rshrn           v17.8B, v17.8H, #6
177cabdff1aSopenharmony_ci  .else
178cabdff1aSopenharmony_ci        add             v16.8H, v16.8H, v22.8H
179cabdff1aSopenharmony_ci        add             v17.8H, v17.8H, v22.8H
180cabdff1aSopenharmony_ci        shrn            v16.8B, v16.8H, #6
181cabdff1aSopenharmony_ci        shrn            v17.8B, v17.8H, #6
182cabdff1aSopenharmony_ci  .endif
183cabdff1aSopenharmony_ci  .ifc \type,avg
184cabdff1aSopenharmony_ci        ld1             {v20.8B}, [x8], x2
185cabdff1aSopenharmony_ci        ld1             {v21.8B}, [x8], x2
186cabdff1aSopenharmony_ci        urhadd          v16.8B, v16.8B, v20.8B
187cabdff1aSopenharmony_ci        urhadd          v17.8B, v17.8B, v21.8B
188cabdff1aSopenharmony_ci  .endif
189cabdff1aSopenharmony_ci        st1             {v16.8B}, [x0], x2
190cabdff1aSopenharmony_ci        st1             {v17.8B}, [x0], x2
191cabdff1aSopenharmony_ci        b.gt            5b
192cabdff1aSopenharmony_ci        ret
193cabdff1aSopenharmony_ciendfunc
194cabdff1aSopenharmony_ci.endm
195cabdff1aSopenharmony_ci
196cabdff1aSopenharmony_ci/* chroma_mc4(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y) */
197cabdff1aSopenharmony_ci.macro  h264_chroma_mc4 type, codec=h264
198cabdff1aSopenharmony_cifunction ff_\type\()_\codec\()_chroma_mc4_neon, export=1
199cabdff1aSopenharmony_ci  .ifc \type,avg
200cabdff1aSopenharmony_ci        mov             x8,  x0
201cabdff1aSopenharmony_ci  .endif
202cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1]
203cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1, x2]
204cabdff1aSopenharmony_ci  .ifc \codec,rv40
205cabdff1aSopenharmony_ci        movrel          x6,  rv40bias
206cabdff1aSopenharmony_ci        lsr             w9,  w5,  #1
207cabdff1aSopenharmony_ci        lsr             w10, w4,  #1
208cabdff1aSopenharmony_ci        lsl             w9,  w9,  #3
209cabdff1aSopenharmony_ci        lsl             w10, w10, #1
210cabdff1aSopenharmony_ci        add             w9,  w9,  w10
211cabdff1aSopenharmony_ci        add             x6,  x6,  w9, UXTW
212cabdff1aSopenharmony_ci        ld1r            {v22.8H}, [x6]
213cabdff1aSopenharmony_ci  .endif
214cabdff1aSopenharmony_ci  .ifc \codec,vc1
215cabdff1aSopenharmony_ci        movi            v22.8H,   #28
216cabdff1aSopenharmony_ci  .endif
217cabdff1aSopenharmony_ci        mul             w7,  w4,  w5
218cabdff1aSopenharmony_ci        lsl             w14, w5,  #3
219cabdff1aSopenharmony_ci        lsl             w13, w4,  #3
220cabdff1aSopenharmony_ci        cmp             w7,  #0
221cabdff1aSopenharmony_ci        sub             w6,  w14, w7
222cabdff1aSopenharmony_ci        sub             w12, w13, w7
223cabdff1aSopenharmony_ci        sub             w4,  w7,  w13
224cabdff1aSopenharmony_ci        sub             w4,  w4,  w14
225cabdff1aSopenharmony_ci        add             w4,  w4,  #64
226cabdff1aSopenharmony_ci        b.eq            2f
227cabdff1aSopenharmony_ci
228cabdff1aSopenharmony_ci        dup             v24.8B,  w4
229cabdff1aSopenharmony_ci        dup             v25.8B,  w12
230cabdff1aSopenharmony_ci        ld1             {v4.8B}, [x1], x2
231cabdff1aSopenharmony_ci        dup             v26.8B,  w6
232cabdff1aSopenharmony_ci        dup             v27.8B,  w7
233cabdff1aSopenharmony_ci        ext             v5.8B,  v4.8B,  v5.8B, #1
234cabdff1aSopenharmony_ci        trn1            v0.2S,  v24.2S, v25.2S
235cabdff1aSopenharmony_ci        trn1            v2.2S,  v26.2S, v27.2S
236cabdff1aSopenharmony_ci        trn1            v4.2S,  v4.2S,  v5.2S
237cabdff1aSopenharmony_ci1:      ld1             {v6.8B}, [x1], x2
238cabdff1aSopenharmony_ci        ext             v7.8B,  v6.8B,  v7.8B, #1
239cabdff1aSopenharmony_ci        trn1            v6.2S,  v6.2S,  v7.2S
240cabdff1aSopenharmony_ci        umull           v18.8H, v4.8B,  v0.8B
241cabdff1aSopenharmony_ci        umlal           v18.8H, v6.8B,  v2.8B
242cabdff1aSopenharmony_ci        ld1             {v4.8B}, [x1], x2
243cabdff1aSopenharmony_ci        ext             v5.8B,  v4.8B,  v5.8B, #1
244cabdff1aSopenharmony_ci        trn1            v4.2S,  v4.2S,  v5.2S
245cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1]
246cabdff1aSopenharmony_ci        umull           v19.8H, v6.8B,  v0.8B
247cabdff1aSopenharmony_ci        umlal           v19.8H, v4.8B,  v2.8B
248cabdff1aSopenharmony_ci        trn1            v30.2D, v18.2D, v19.2D
249cabdff1aSopenharmony_ci        trn2            v31.2D, v18.2D, v19.2D
250cabdff1aSopenharmony_ci        add             v18.8H, v30.8H, v31.8H
251cabdff1aSopenharmony_ci  .ifc \codec,h264
252cabdff1aSopenharmony_ci        rshrn           v16.8B, v18.8H, #6
253cabdff1aSopenharmony_ci  .else
254cabdff1aSopenharmony_ci        add             v18.8H, v18.8H, v22.8H
255cabdff1aSopenharmony_ci        shrn            v16.8B, v18.8H, #6
256cabdff1aSopenharmony_ci  .endif
257cabdff1aSopenharmony_ci        subs            w3,  w3,  #2
258cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1, x2]
259cabdff1aSopenharmony_ci  .ifc \type,avg
260cabdff1aSopenharmony_ci        ld1             {v20.S}[0], [x8], x2
261cabdff1aSopenharmony_ci        ld1             {v20.S}[1], [x8], x2
262cabdff1aSopenharmony_ci        urhadd          v16.8B, v16.8B, v20.8B
263cabdff1aSopenharmony_ci  .endif
264cabdff1aSopenharmony_ci        st1             {v16.S}[0], [x0], x2
265cabdff1aSopenharmony_ci        st1             {v16.S}[1], [x0], x2
266cabdff1aSopenharmony_ci        b.gt            1b
267cabdff1aSopenharmony_ci        ret
268cabdff1aSopenharmony_ci
269cabdff1aSopenharmony_ci2:      adds            w12, w12, w6
270cabdff1aSopenharmony_ci        dup             v30.8B, w4
271cabdff1aSopenharmony_ci        b.eq            5f
272cabdff1aSopenharmony_ci        tst             w6,  w6
273cabdff1aSopenharmony_ci        dup             v31.8B, w12
274cabdff1aSopenharmony_ci        trn1            v0.2S,  v30.2S, v31.2S
275cabdff1aSopenharmony_ci        trn2            v1.2S,  v30.2S, v31.2S
276cabdff1aSopenharmony_ci        b.eq            4f
277cabdff1aSopenharmony_ci
278cabdff1aSopenharmony_ci        ext             v1.8B,  v0.8B,  v1.8B, #4
279cabdff1aSopenharmony_ci        ld1             {v4.S}[0], [x1], x2
280cabdff1aSopenharmony_ci3:      ld1             {v4.S}[1], [x1], x2
281cabdff1aSopenharmony_ci        umull           v18.8H, v4.8B,  v0.8B
282cabdff1aSopenharmony_ci        ld1             {v4.S}[0], [x1], x2
283cabdff1aSopenharmony_ci        umull           v19.8H, v4.8B,  v1.8B
284cabdff1aSopenharmony_ci        trn1            v30.2D, v18.2D, v19.2D
285cabdff1aSopenharmony_ci        trn2            v31.2D, v18.2D, v19.2D
286cabdff1aSopenharmony_ci        add             v18.8H, v30.8H, v31.8H
287cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1]
288cabdff1aSopenharmony_ci  .ifc \codec,h264
289cabdff1aSopenharmony_ci        rshrn           v16.8B, v18.8H, #6
290cabdff1aSopenharmony_ci  .else
291cabdff1aSopenharmony_ci        add             v18.8H, v18.8H, v22.8H
292cabdff1aSopenharmony_ci        shrn            v16.8B, v18.8H, #6
293cabdff1aSopenharmony_ci  .endif
294cabdff1aSopenharmony_ci  .ifc \type,avg
295cabdff1aSopenharmony_ci        ld1             {v20.S}[0], [x8], x2
296cabdff1aSopenharmony_ci        ld1             {v20.S}[1], [x8], x2
297cabdff1aSopenharmony_ci        urhadd          v16.8B, v16.8B, v20.8B
298cabdff1aSopenharmony_ci  .endif
299cabdff1aSopenharmony_ci        subs            w3,  w3,  #2
300cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1, x2]
301cabdff1aSopenharmony_ci        st1             {v16.S}[0], [x0], x2
302cabdff1aSopenharmony_ci        st1             {v16.S}[1], [x0], x2
303cabdff1aSopenharmony_ci        b.gt            3b
304cabdff1aSopenharmony_ci        ret
305cabdff1aSopenharmony_ci
306cabdff1aSopenharmony_ci4:      ld1             {v4.8B}, [x1], x2
307cabdff1aSopenharmony_ci        ld1             {v6.8B}, [x1], x2
308cabdff1aSopenharmony_ci        ext             v5.8B,  v4.8B,  v5.8B, #1
309cabdff1aSopenharmony_ci        ext             v7.8B,  v6.8B,  v7.8B, #1
310cabdff1aSopenharmony_ci        trn1            v4.2S,  v4.2S,  v5.2S
311cabdff1aSopenharmony_ci        trn1            v6.2S,  v6.2S,  v7.2S
312cabdff1aSopenharmony_ci        umull           v18.8H, v4.8B,  v0.8B
313cabdff1aSopenharmony_ci        umull           v19.8H, v6.8B,  v0.8B
314cabdff1aSopenharmony_ci        subs            w3,  w3,  #2
315cabdff1aSopenharmony_ci        trn1            v30.2D, v18.2D, v19.2D
316cabdff1aSopenharmony_ci        trn2            v31.2D, v18.2D, v19.2D
317cabdff1aSopenharmony_ci        add             v18.8H, v30.8H, v31.8H
318cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1]
319cabdff1aSopenharmony_ci  .ifc \codec,h264
320cabdff1aSopenharmony_ci        rshrn           v16.8B, v18.8H, #6
321cabdff1aSopenharmony_ci  .else
322cabdff1aSopenharmony_ci        add             v18.8H, v18.8H, v22.8H
323cabdff1aSopenharmony_ci        shrn            v16.8B, v18.8H, #6
324cabdff1aSopenharmony_ci  .endif
325cabdff1aSopenharmony_ci  .ifc \type,avg
326cabdff1aSopenharmony_ci        ld1             {v20.S}[0], [x8], x2
327cabdff1aSopenharmony_ci        ld1             {v20.S}[1], [x8], x2
328cabdff1aSopenharmony_ci        urhadd          v16.8B, v16.8B, v20.8B
329cabdff1aSopenharmony_ci  .endif
330cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1]
331cabdff1aSopenharmony_ci        st1             {v16.S}[0], [x0], x2
332cabdff1aSopenharmony_ci        st1             {v16.S}[1], [x0], x2
333cabdff1aSopenharmony_ci        b.gt            4b
334cabdff1aSopenharmony_ci        ret
335cabdff1aSopenharmony_ci
336cabdff1aSopenharmony_ci5:      ld1             {v4.S}[0], [x1], x2
337cabdff1aSopenharmony_ci        ld1             {v4.S}[1], [x1], x2
338cabdff1aSopenharmony_ci        umull           v18.8H, v4.8B,  v30.8B
339cabdff1aSopenharmony_ci        subs            w3,  w3,  #2
340cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1]
341cabdff1aSopenharmony_ci  .ifc \codec,h264
342cabdff1aSopenharmony_ci        rshrn           v16.8B, v18.8H, #6
343cabdff1aSopenharmony_ci  .else
344cabdff1aSopenharmony_ci        add             v18.8H, v18.8H, v22.8H
345cabdff1aSopenharmony_ci        shrn            v16.8B, v18.8H, #6
346cabdff1aSopenharmony_ci  .endif
347cabdff1aSopenharmony_ci  .ifc \type,avg
348cabdff1aSopenharmony_ci        ld1             {v20.S}[0], [x8], x2
349cabdff1aSopenharmony_ci        ld1             {v20.S}[1], [x8], x2
350cabdff1aSopenharmony_ci        urhadd          v16.8B, v16.8B, v20.8B
351cabdff1aSopenharmony_ci  .endif
352cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1]
353cabdff1aSopenharmony_ci        st1             {v16.S}[0], [x0], x2
354cabdff1aSopenharmony_ci        st1             {v16.S}[1], [x0], x2
355cabdff1aSopenharmony_ci        b.gt            5b
356cabdff1aSopenharmony_ci        ret
357cabdff1aSopenharmony_ciendfunc
358cabdff1aSopenharmony_ci.endm
359cabdff1aSopenharmony_ci
360cabdff1aSopenharmony_ci.macro  h264_chroma_mc2 type
361cabdff1aSopenharmony_cifunction ff_\type\()_h264_chroma_mc2_neon, export=1
362cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1]
363cabdff1aSopenharmony_ci        prfm            pldl1strm, [x1, x2]
364cabdff1aSopenharmony_ci        orr             w7,  w4,  w5
365cabdff1aSopenharmony_ci        cbz             w7,  2f
366cabdff1aSopenharmony_ci
367cabdff1aSopenharmony_ci        mul             w7,  w4,  w5
368cabdff1aSopenharmony_ci        lsl             w14, w5,  #3
369cabdff1aSopenharmony_ci        lsl             w13, w4,  #3
370cabdff1aSopenharmony_ci        sub             w6,  w14, w7
371cabdff1aSopenharmony_ci        sub             w12, w13, w7
372cabdff1aSopenharmony_ci        sub             w4,  w7,  w13
373cabdff1aSopenharmony_ci        sub             w4,  w4,  w14
374cabdff1aSopenharmony_ci        add             w4,  w4,  #64
375cabdff1aSopenharmony_ci        dup             v0.8B,  w4
376cabdff1aSopenharmony_ci        dup             v2.8B,  w12
377cabdff1aSopenharmony_ci        dup             v1.8B,  w6
378cabdff1aSopenharmony_ci        dup             v3.8B,  w7
379cabdff1aSopenharmony_ci        trn1            v0.4H,  v0.4H,  v2.4H
380cabdff1aSopenharmony_ci        trn1            v1.4H,  v1.4H,  v3.4H
381cabdff1aSopenharmony_ci1:
382cabdff1aSopenharmony_ci        ld1             {v4.S}[0],  [x1], x2
383cabdff1aSopenharmony_ci        ld1             {v4.S}[1],  [x1], x2
384cabdff1aSopenharmony_ci        rev64           v5.2S,  v4.2S
385cabdff1aSopenharmony_ci        ld1             {v5.S}[1],  [x1]
386cabdff1aSopenharmony_ci        ext             v6.8B,  v4.8B,  v5.8B,  #1
387cabdff1aSopenharmony_ci        ext             v7.8B,  v5.8B,  v4.8B,  #1
388cabdff1aSopenharmony_ci        trn1            v4.4H,  v4.4H,  v6.4H
389cabdff1aSopenharmony_ci        trn1            v5.4H,  v5.4H,  v7.4H
390cabdff1aSopenharmony_ci        umull           v16.8H, v4.8B,  v0.8B
391cabdff1aSopenharmony_ci        umlal           v16.8H, v5.8B,  v1.8B
392cabdff1aSopenharmony_ci  .ifc \type,avg
393cabdff1aSopenharmony_ci        ld1             {v18.H}[0], [x0], x2
394cabdff1aSopenharmony_ci        ld1             {v18.H}[2], [x0]
395cabdff1aSopenharmony_ci        sub             x0,  x0,  x2
396cabdff1aSopenharmony_ci  .endif
397cabdff1aSopenharmony_ci        rev64           v17.4S, v16.4S
398cabdff1aSopenharmony_ci        add             v16.8H, v16.8H, v17.8H
399cabdff1aSopenharmony_ci        rshrn           v16.8B, v16.8H, #6
400cabdff1aSopenharmony_ci  .ifc \type,avg
401cabdff1aSopenharmony_ci        urhadd          v16.8B, v16.8B, v18.8B
402cabdff1aSopenharmony_ci  .endif
403cabdff1aSopenharmony_ci        st1             {v16.H}[0], [x0], x2
404cabdff1aSopenharmony_ci        st1             {v16.H}[2], [x0], x2
405cabdff1aSopenharmony_ci        subs            w3,  w3,  #2
406cabdff1aSopenharmony_ci        b.gt            1b
407cabdff1aSopenharmony_ci        ret
408cabdff1aSopenharmony_ci
409cabdff1aSopenharmony_ci2:
410cabdff1aSopenharmony_ci        ld1             {v16.H}[0], [x1], x2
411cabdff1aSopenharmony_ci        ld1             {v16.H}[1], [x1], x2
412cabdff1aSopenharmony_ci  .ifc \type,avg
413cabdff1aSopenharmony_ci        ld1             {v18.H}[0], [x0], x2
414cabdff1aSopenharmony_ci        ld1             {v18.H}[1], [x0]
415cabdff1aSopenharmony_ci        sub             x0,  x0,  x2
416cabdff1aSopenharmony_ci        urhadd          v16.8B, v16.8B, v18.8B
417cabdff1aSopenharmony_ci  .endif
418cabdff1aSopenharmony_ci        st1             {v16.H}[0], [x0], x2
419cabdff1aSopenharmony_ci        st1             {v16.H}[1], [x0], x2
420cabdff1aSopenharmony_ci        subs            w3,  w3,  #2
421cabdff1aSopenharmony_ci        b.gt            2b
422cabdff1aSopenharmony_ci        ret
423cabdff1aSopenharmony_ciendfunc
424cabdff1aSopenharmony_ci.endm
425cabdff1aSopenharmony_ci
426cabdff1aSopenharmony_ci        h264_chroma_mc8 put
427cabdff1aSopenharmony_ci        h264_chroma_mc8 avg
428cabdff1aSopenharmony_ci        h264_chroma_mc4 put
429cabdff1aSopenharmony_ci        h264_chroma_mc4 avg
430cabdff1aSopenharmony_ci        h264_chroma_mc2 put
431cabdff1aSopenharmony_ci        h264_chroma_mc2 avg
432cabdff1aSopenharmony_ci
433cabdff1aSopenharmony_ci#if CONFIG_RV40_DECODER
434cabdff1aSopenharmony_ciconst   rv40bias
435cabdff1aSopenharmony_ci        .short           0, 16, 32, 16
436cabdff1aSopenharmony_ci        .short          32, 28, 32, 28
437cabdff1aSopenharmony_ci        .short           0, 32, 16, 32
438cabdff1aSopenharmony_ci        .short          32, 28, 32, 28
439cabdff1aSopenharmony_ciendconst
440cabdff1aSopenharmony_ci
441cabdff1aSopenharmony_ci        h264_chroma_mc8 put, rv40
442cabdff1aSopenharmony_ci        h264_chroma_mc8 avg, rv40
443cabdff1aSopenharmony_ci        h264_chroma_mc4 put, rv40
444cabdff1aSopenharmony_ci        h264_chroma_mc4 avg, rv40
445cabdff1aSopenharmony_ci#endif
446cabdff1aSopenharmony_ci
447cabdff1aSopenharmony_ci#if CONFIG_VC1DSP
448cabdff1aSopenharmony_ci        h264_chroma_mc8 put, vc1
449cabdff1aSopenharmony_ci        h264_chroma_mc8 avg, vc1
450cabdff1aSopenharmony_ci        h264_chroma_mc4 put, vc1
451cabdff1aSopenharmony_ci        h264_chroma_mc4 avg, vc1
452cabdff1aSopenharmony_ci#endif
453