1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * ARM NEON optimised DSP functions
3cabdff1aSopenharmony_ci * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
4cabdff1aSopenharmony_ci * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * This file is part of FFmpeg.
7cabdff1aSopenharmony_ci *
8cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
9cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
10cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
11cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
12cabdff1aSopenharmony_ci *
13cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
14cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
15cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16cabdff1aSopenharmony_ci * Lesser General Public License for more details.
17cabdff1aSopenharmony_ci *
18cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
19cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
20cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21cabdff1aSopenharmony_ci */
22cabdff1aSopenharmony_ci
23cabdff1aSopenharmony_ci#include "libavutil/aarch64/asm.S"
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_ci.macro  pixels16        rnd=1, avg=0
26cabdff1aSopenharmony_ci  .if \avg
27cabdff1aSopenharmony_ci        mov             x12, x0
28cabdff1aSopenharmony_ci  .endif
29cabdff1aSopenharmony_ci1:      ld1             {v0.16B},  [x1], x2
30cabdff1aSopenharmony_ci        ld1             {v1.16B},  [x1], x2
31cabdff1aSopenharmony_ci        ld1             {v2.16B},  [x1], x2
32cabdff1aSopenharmony_ci        ld1             {v3.16B},  [x1], x2
33cabdff1aSopenharmony_ci  .if \avg
34cabdff1aSopenharmony_ci        ld1             {v4.16B},  [x12], x2
35cabdff1aSopenharmony_ci        urhadd          v0.16B,  v0.16B,  v4.16B
36cabdff1aSopenharmony_ci        ld1             {v5.16B},  [x12], x2
37cabdff1aSopenharmony_ci        urhadd          v1.16B,  v1.16B,  v5.16B
38cabdff1aSopenharmony_ci        ld1             {v6.16B},  [x12], x2
39cabdff1aSopenharmony_ci        urhadd          v2.16B,  v2.16B,  v6.16B
40cabdff1aSopenharmony_ci        ld1             {v7.16B},  [x12], x2
41cabdff1aSopenharmony_ci        urhadd          v3.16B,  v3.16B,  v7.16B
42cabdff1aSopenharmony_ci  .endif
43cabdff1aSopenharmony_ci        subs            w3,  w3,  #4
44cabdff1aSopenharmony_ci        st1             {v0.16B},  [x0], x2
45cabdff1aSopenharmony_ci        st1             {v1.16B},  [x0], x2
46cabdff1aSopenharmony_ci        st1             {v2.16B},  [x0], x2
47cabdff1aSopenharmony_ci        st1             {v3.16B},  [x0], x2
48cabdff1aSopenharmony_ci        b.ne            1b
49cabdff1aSopenharmony_ci        ret
50cabdff1aSopenharmony_ci.endm
51cabdff1aSopenharmony_ci
52cabdff1aSopenharmony_ci.macro  pixels16_x2     rnd=1, avg=0
53cabdff1aSopenharmony_ci1:      ld1             {v0.16B, v1.16B}, [x1], x2
54cabdff1aSopenharmony_ci        ld1             {v2.16B, v3.16B}, [x1], x2
55cabdff1aSopenharmony_ci        subs            w3,  w3,  #2
56cabdff1aSopenharmony_ci        ext             v1.16B,  v0.16B,  v1.16B,  #1
57cabdff1aSopenharmony_ci        avg             v0.16B,  v0.16B,  v1.16B
58cabdff1aSopenharmony_ci        ext             v3.16B,  v2.16B,  v3.16B,  #1
59cabdff1aSopenharmony_ci        avg             v2.16B,  v2.16B,  v3.16B
60cabdff1aSopenharmony_ci  .if \avg
61cabdff1aSopenharmony_ci        ld1             {v1.16B}, [x0], x2
62cabdff1aSopenharmony_ci        ld1             {v3.16B}, [x0]
63cabdff1aSopenharmony_ci        urhadd          v0.16B,  v0.16B,  v1.16B
64cabdff1aSopenharmony_ci        urhadd          v2.16B,  v2.16B,  v3.16B
65cabdff1aSopenharmony_ci        sub             x0,  x0,  x2
66cabdff1aSopenharmony_ci  .endif
67cabdff1aSopenharmony_ci        st1             {v0.16B}, [x0], x2
68cabdff1aSopenharmony_ci        st1             {v2.16B}, [x0], x2
69cabdff1aSopenharmony_ci        b.ne            1b
70cabdff1aSopenharmony_ci        ret
71cabdff1aSopenharmony_ci.endm
72cabdff1aSopenharmony_ci
73cabdff1aSopenharmony_ci.macro  pixels16_y2     rnd=1, avg=0
74cabdff1aSopenharmony_ci        sub             w3,  w3,  #2
75cabdff1aSopenharmony_ci        ld1             {v0.16B}, [x1], x2
76cabdff1aSopenharmony_ci        ld1             {v1.16B}, [x1], x2
77cabdff1aSopenharmony_ci1:      subs            w3,  w3,  #2
78cabdff1aSopenharmony_ci        avg             v2.16B,  v0.16B,  v1.16B
79cabdff1aSopenharmony_ci        ld1             {v0.16B}, [x1], x2
80cabdff1aSopenharmony_ci        avg             v3.16B,  v0.16B,  v1.16B
81cabdff1aSopenharmony_ci        ld1             {v1.16B}, [x1], x2
82cabdff1aSopenharmony_ci  .if \avg
83cabdff1aSopenharmony_ci        ld1             {v4.16B}, [x0], x2
84cabdff1aSopenharmony_ci        ld1             {v5.16B}, [x0]
85cabdff1aSopenharmony_ci        urhadd          v2.16B,  v2.16B,  v4.16B
86cabdff1aSopenharmony_ci        urhadd          v3.16B,  v3.16B,  v5.16B
87cabdff1aSopenharmony_ci        sub             x0,  x0,  x2
88cabdff1aSopenharmony_ci  .endif
89cabdff1aSopenharmony_ci        st1             {v2.16B}, [x0], x2
90cabdff1aSopenharmony_ci        st1             {v3.16B}, [x0], x2
91cabdff1aSopenharmony_ci        b.ne            1b
92cabdff1aSopenharmony_ci
93cabdff1aSopenharmony_ci        avg             v2.16B,  v0.16B,  v1.16B
94cabdff1aSopenharmony_ci        ld1             {v0.16B}, [x1], x2
95cabdff1aSopenharmony_ci        avg             v3.16B,  v0.16B,  v1.16B
96cabdff1aSopenharmony_ci  .if \avg
97cabdff1aSopenharmony_ci        ld1             {v4.16B}, [x0], x2
98cabdff1aSopenharmony_ci        ld1             {v5.16B}, [x0]
99cabdff1aSopenharmony_ci        urhadd          v2.16B,  v2.16B,  v4.16B
100cabdff1aSopenharmony_ci        urhadd          v3.16B,  v3.16B,  v5.16B
101cabdff1aSopenharmony_ci        sub             x0,  x0,  x2
102cabdff1aSopenharmony_ci  .endif
103cabdff1aSopenharmony_ci        st1             {v2.16B},     [x0], x2
104cabdff1aSopenharmony_ci        st1             {v3.16B},     [x0], x2
105cabdff1aSopenharmony_ci
106cabdff1aSopenharmony_ci        ret
107cabdff1aSopenharmony_ci.endm
108cabdff1aSopenharmony_ci
109cabdff1aSopenharmony_ci.macro  pixels16_xy2    rnd=1, avg=0
110cabdff1aSopenharmony_ci        sub             w3,  w3,  #2
111cabdff1aSopenharmony_ci        ld1             {v0.16B, v1.16B}, [x1], x2
112cabdff1aSopenharmony_ci        ld1             {v4.16B, v5.16B}, [x1], x2
113cabdff1aSopenharmony_ciNRND    movi            v26.8H, #1
114cabdff1aSopenharmony_ci        ext             v1.16B,  v0.16B,  v1.16B,  #1
115cabdff1aSopenharmony_ci        ext             v5.16B,  v4.16B,  v5.16B,  #1
116cabdff1aSopenharmony_ci        uaddl           v16.8H,  v0.8B,   v1.8B
117cabdff1aSopenharmony_ci        uaddl2          v20.8H,  v0.16B,  v1.16B
118cabdff1aSopenharmony_ci        uaddl           v18.8H,  v4.8B,   v5.8B
119cabdff1aSopenharmony_ci        uaddl2          v22.8H,  v4.16B,  v5.16B
120cabdff1aSopenharmony_ci1:      subs            w3,  w3,  #2
121cabdff1aSopenharmony_ci        ld1             {v0.16B, v1.16B}, [x1], x2
122cabdff1aSopenharmony_ci        add             v24.8H,  v16.8H,  v18.8H
123cabdff1aSopenharmony_ciNRND    add             v24.8H,  v24.8H,  v26.8H
124cabdff1aSopenharmony_ci        ext             v30.16B, v0.16B,  v1.16B,  #1
125cabdff1aSopenharmony_ci        add             v1.8H,   v20.8H,  v22.8H
126cabdff1aSopenharmony_ci        mshrn           v28.8B,  v24.8H,  #2
127cabdff1aSopenharmony_ciNRND    add             v1.8H,   v1.8H,   v26.8H
128cabdff1aSopenharmony_ci        mshrn2          v28.16B, v1.8H,   #2
129cabdff1aSopenharmony_ci  .if \avg
130cabdff1aSopenharmony_ci        ld1             {v16.16B},        [x0]
131cabdff1aSopenharmony_ci        urhadd          v28.16B, v28.16B, v16.16B
132cabdff1aSopenharmony_ci  .endif
133cabdff1aSopenharmony_ci        uaddl           v16.8H,  v0.8B,   v30.8B
134cabdff1aSopenharmony_ci        ld1             {v2.16B, v3.16B}, [x1], x2
135cabdff1aSopenharmony_ci        uaddl2          v20.8H,  v0.16B,  v30.16B
136cabdff1aSopenharmony_ci        st1             {v28.16B},        [x0], x2
137cabdff1aSopenharmony_ci        add             v24.8H,  v16.8H,  v18.8H
138cabdff1aSopenharmony_ciNRND    add             v24.8H,  v24.8H,  v26.8H
139cabdff1aSopenharmony_ci        ext             v3.16B,  v2.16B,  v3.16B,  #1
140cabdff1aSopenharmony_ci        add             v0.8H,   v20.8H,  v22.8H
141cabdff1aSopenharmony_ci        mshrn           v30.8B,  v24.8H,  #2
142cabdff1aSopenharmony_ciNRND    add             v0.8H,   v0.8H,   v26.8H
143cabdff1aSopenharmony_ci        mshrn2          v30.16B, v0.8H,   #2
144cabdff1aSopenharmony_ci  .if \avg
145cabdff1aSopenharmony_ci        ld1             {v18.16B},        [x0]
146cabdff1aSopenharmony_ci        urhadd          v30.16B, v30.16B, v18.16B
147cabdff1aSopenharmony_ci  .endif
148cabdff1aSopenharmony_ci        uaddl           v18.8H,   v2.8B,  v3.8B
149cabdff1aSopenharmony_ci        uaddl2          v22.8H,   v2.16B, v3.16B
150cabdff1aSopenharmony_ci        st1             {v30.16B},        [x0], x2
151cabdff1aSopenharmony_ci        b.gt            1b
152cabdff1aSopenharmony_ci
153cabdff1aSopenharmony_ci        ld1             {v0.16B, v1.16B}, [x1], x2
154cabdff1aSopenharmony_ci        add             v24.8H,  v16.8H,  v18.8H
155cabdff1aSopenharmony_ciNRND    add             v24.8H,  v24.8H,  v26.8H
156cabdff1aSopenharmony_ci        ext             v30.16B, v0.16B,  v1.16B,  #1
157cabdff1aSopenharmony_ci        add             v1.8H,   v20.8H,  v22.8H
158cabdff1aSopenharmony_ci        mshrn           v28.8B,  v24.8H,  #2
159cabdff1aSopenharmony_ciNRND    add             v1.8H,   v1.8H,   v26.8H
160cabdff1aSopenharmony_ci        mshrn2          v28.16B, v1.8H,   #2
161cabdff1aSopenharmony_ci  .if \avg
162cabdff1aSopenharmony_ci        ld1             {v16.16B},        [x0]
163cabdff1aSopenharmony_ci        urhadd          v28.16B, v28.16B, v16.16B
164cabdff1aSopenharmony_ci  .endif
165cabdff1aSopenharmony_ci        uaddl           v16.8H,  v0.8B,   v30.8B
166cabdff1aSopenharmony_ci        uaddl2          v20.8H,  v0.16B,  v30.16B
167cabdff1aSopenharmony_ci        st1             {v28.16B},        [x0], x2
168cabdff1aSopenharmony_ci        add             v24.8H,  v16.8H,  v18.8H
169cabdff1aSopenharmony_ciNRND    add             v24.8H,  v24.8H,  v26.8H
170cabdff1aSopenharmony_ci        add             v0.8H,   v20.8H,  v22.8H
171cabdff1aSopenharmony_ci        mshrn           v30.8B,  v24.8H,  #2
172cabdff1aSopenharmony_ciNRND    add             v0.8H,   v0.8H,   v26.8H
173cabdff1aSopenharmony_ci        mshrn2          v30.16B, v0.8H,   #2
174cabdff1aSopenharmony_ci  .if \avg
175cabdff1aSopenharmony_ci        ld1             {v18.16B},        [x0]
176cabdff1aSopenharmony_ci        urhadd          v30.16B, v30.16B, v18.16B
177cabdff1aSopenharmony_ci  .endif
178cabdff1aSopenharmony_ci        st1             {v30.16B},        [x0], x2
179cabdff1aSopenharmony_ci
180cabdff1aSopenharmony_ci        ret
181cabdff1aSopenharmony_ci.endm
182cabdff1aSopenharmony_ci
183cabdff1aSopenharmony_ci.macro  pixels8         rnd=1, avg=0
184cabdff1aSopenharmony_ci1:      ld1             {v0.8B}, [x1], x2
185cabdff1aSopenharmony_ci        ld1             {v1.8B}, [x1], x2
186cabdff1aSopenharmony_ci        ld1             {v2.8B}, [x1], x2
187cabdff1aSopenharmony_ci        ld1             {v3.8B}, [x1], x2
188cabdff1aSopenharmony_ci  .if \avg
189cabdff1aSopenharmony_ci        ld1             {v4.8B}, [x0], x2
190cabdff1aSopenharmony_ci        urhadd          v0.8B,  v0.8B,  v4.8B
191cabdff1aSopenharmony_ci        ld1             {v5.8B}, [x0], x2
192cabdff1aSopenharmony_ci        urhadd          v1.8B,  v1.8B,  v5.8B
193cabdff1aSopenharmony_ci        ld1             {v6.8B}, [x0], x2
194cabdff1aSopenharmony_ci        urhadd          v2.8B,  v2.8B,  v6.8B
195cabdff1aSopenharmony_ci        ld1             {v7.8B}, [x0], x2
196cabdff1aSopenharmony_ci        urhadd          v3.8B,  v3.8B,  v7.8B
197cabdff1aSopenharmony_ci        sub             x0,  x0,  x2,  lsl #2
198cabdff1aSopenharmony_ci  .endif
199cabdff1aSopenharmony_ci        subs            w3,  w3,  #4
200cabdff1aSopenharmony_ci        st1             {v0.8B}, [x0], x2
201cabdff1aSopenharmony_ci        st1             {v1.8B}, [x0], x2
202cabdff1aSopenharmony_ci        st1             {v2.8B}, [x0], x2
203cabdff1aSopenharmony_ci        st1             {v3.8B}, [x0], x2
204cabdff1aSopenharmony_ci        b.ne            1b
205cabdff1aSopenharmony_ci        ret
206cabdff1aSopenharmony_ci.endm
207cabdff1aSopenharmony_ci
208cabdff1aSopenharmony_ci.macro  pixels8_x2      rnd=1, avg=0
209cabdff1aSopenharmony_ci1:      ld1             {v0.8B, v1.8B}, [x1], x2
210cabdff1aSopenharmony_ci        ext             v1.8B,  v0.8B,  v1.8B,  #1
211cabdff1aSopenharmony_ci        ld1             {v2.8B, v3.8B}, [x1], x2
212cabdff1aSopenharmony_ci        ext             v3.8B,  v2.8B,  v3.8B,  #1
213cabdff1aSopenharmony_ci        subs            w3,  w3,  #2
214cabdff1aSopenharmony_ci        avg             v0.8B,   v0.8B,   v1.8B
215cabdff1aSopenharmony_ci        avg             v2.8B,   v2.8B,   v3.8B
216cabdff1aSopenharmony_ci  .if \avg
217cabdff1aSopenharmony_ci        ld1             {v4.8B},     [x0], x2
218cabdff1aSopenharmony_ci        ld1             {v5.8B},     [x0]
219cabdff1aSopenharmony_ci        urhadd          v0.8B,   v0.8B,   v4.8B
220cabdff1aSopenharmony_ci        urhadd          v2.8B,   v2.8B,   v5.8B
221cabdff1aSopenharmony_ci        sub             x0,  x0,  x2
222cabdff1aSopenharmony_ci  .endif
223cabdff1aSopenharmony_ci        st1             {v0.8B}, [x0], x2
224cabdff1aSopenharmony_ci        st1             {v2.8B}, [x0], x2
225cabdff1aSopenharmony_ci        b.ne            1b
226cabdff1aSopenharmony_ci        ret
227cabdff1aSopenharmony_ci.endm
228cabdff1aSopenharmony_ci
229cabdff1aSopenharmony_ci.macro  pixels8_y2      rnd=1, avg=0
230cabdff1aSopenharmony_ci        sub             w3,  w3,  #2
231cabdff1aSopenharmony_ci        ld1             {v0.8B},  [x1], x2
232cabdff1aSopenharmony_ci        ld1             {v1.8B},  [x1], x2
233cabdff1aSopenharmony_ci1:      subs            w3,  w3,  #2
234cabdff1aSopenharmony_ci        avg             v4.8B,  v0.8B,  v1.8B
235cabdff1aSopenharmony_ci        ld1             {v0.8B},  [x1], x2
236cabdff1aSopenharmony_ci        avg             v5.8B,  v0.8B,  v1.8B
237cabdff1aSopenharmony_ci        ld1             {v1.8B},  [x1], x2
238cabdff1aSopenharmony_ci  .if \avg
239cabdff1aSopenharmony_ci        ld1             {v2.8B},     [x0], x2
240cabdff1aSopenharmony_ci        ld1             {v3.8B},     [x0]
241cabdff1aSopenharmony_ci        urhadd          v4.8B,  v4.8B,  v2.8B
242cabdff1aSopenharmony_ci        urhadd          v5.8B,  v5.8B,  v3.8B
243cabdff1aSopenharmony_ci        sub             x0,  x0,  x2
244cabdff1aSopenharmony_ci  .endif
245cabdff1aSopenharmony_ci        st1             {v4.8B},     [x0], x2
246cabdff1aSopenharmony_ci        st1             {v5.8B},     [x0], x2
247cabdff1aSopenharmony_ci        b.ne            1b
248cabdff1aSopenharmony_ci
249cabdff1aSopenharmony_ci        avg             v4.8B,  v0.8B,  v1.8B
250cabdff1aSopenharmony_ci        ld1             {v0.8B},  [x1], x2
251cabdff1aSopenharmony_ci        avg             v5.8B,  v0.8B,  v1.8B
252cabdff1aSopenharmony_ci  .if \avg
253cabdff1aSopenharmony_ci        ld1             {v2.8B},     [x0], x2
254cabdff1aSopenharmony_ci        ld1             {v3.8B},     [x0]
255cabdff1aSopenharmony_ci        urhadd          v4.8B,  v4.8B,  v2.8B
256cabdff1aSopenharmony_ci        urhadd          v5.8B,  v5.8B,  v3.8B
257cabdff1aSopenharmony_ci        sub             x0,  x0,  x2
258cabdff1aSopenharmony_ci  .endif
259cabdff1aSopenharmony_ci        st1             {v4.8B},     [x0], x2
260cabdff1aSopenharmony_ci        st1             {v5.8B},     [x0], x2
261cabdff1aSopenharmony_ci
262cabdff1aSopenharmony_ci        ret
263cabdff1aSopenharmony_ci.endm
264cabdff1aSopenharmony_ci
265cabdff1aSopenharmony_ci.macro  pixels8_xy2     rnd=1, avg=0
266cabdff1aSopenharmony_ci        sub             w3,  w3,  #2
267cabdff1aSopenharmony_ci        ld1             {v0.16B},     [x1], x2
268cabdff1aSopenharmony_ci        ld1             {v1.16B},     [x1], x2
269cabdff1aSopenharmony_ciNRND    movi            v19.8H, #1
270cabdff1aSopenharmony_ci        ext             v4.16B,  v0.16B,  v4.16B,  #1
271cabdff1aSopenharmony_ci        ext             v6.16B,  v1.16B,  v6.16B,  #1
272cabdff1aSopenharmony_ci        uaddl           v16.8H,  v0.8B,  v4.8B
273cabdff1aSopenharmony_ci        uaddl           v17.8H,  v1.8B,  v6.8B
274cabdff1aSopenharmony_ci1:      subs            w3,  w3,  #2
275cabdff1aSopenharmony_ci        ld1             {v0.16B},     [x1], x2
276cabdff1aSopenharmony_ci        add             v18.8H, v16.8H,  v17.8H
277cabdff1aSopenharmony_ci        ext             v4.16B,  v0.16B,  v4.16B,  #1
278cabdff1aSopenharmony_ciNRND    add             v18.8H, v18.8H, v19.8H
279cabdff1aSopenharmony_ci        uaddl           v16.8H,  v0.8B,  v4.8B
280cabdff1aSopenharmony_ci        mshrn           v5.8B,  v18.8H, #2
281cabdff1aSopenharmony_ci        ld1             {v1.16B},     [x1], x2
282cabdff1aSopenharmony_ci        add             v18.8H, v16.8H,  v17.8H
283cabdff1aSopenharmony_ci  .if \avg
284cabdff1aSopenharmony_ci        ld1             {v7.8B},     [x0]
285cabdff1aSopenharmony_ci        urhadd          v5.8B,  v5.8B,  v7.8B
286cabdff1aSopenharmony_ci  .endif
287cabdff1aSopenharmony_ciNRND    add             v18.8H, v18.8H, v19.8H
288cabdff1aSopenharmony_ci        st1             {v5.8B},     [x0], x2
289cabdff1aSopenharmony_ci        mshrn           v7.8B,  v18.8H, #2
290cabdff1aSopenharmony_ci  .if \avg
291cabdff1aSopenharmony_ci        ld1             {v5.8B},     [x0]
292cabdff1aSopenharmony_ci        urhadd          v7.8B,  v7.8B,  v5.8B
293cabdff1aSopenharmony_ci  .endif
294cabdff1aSopenharmony_ci        ext             v6.16B,  v1.16B,  v6.16B,  #1
295cabdff1aSopenharmony_ci        uaddl           v17.8H,  v1.8B,   v6.8B
296cabdff1aSopenharmony_ci        st1             {v7.8B},     [x0], x2
297cabdff1aSopenharmony_ci        b.gt            1b
298cabdff1aSopenharmony_ci
299cabdff1aSopenharmony_ci        ld1             {v0.16B},     [x1], x2
300cabdff1aSopenharmony_ci        add             v18.8H, v16.8H, v17.8H
301cabdff1aSopenharmony_ci        ext             v4.16B, v0.16B, v4.16B,  #1
302cabdff1aSopenharmony_ciNRND    add             v18.8H, v18.8H, v19.8H
303cabdff1aSopenharmony_ci        uaddl           v16.8H,  v0.8B, v4.8B
304cabdff1aSopenharmony_ci        mshrn           v5.8B,  v18.8H, #2
305cabdff1aSopenharmony_ci        add             v18.8H, v16.8H, v17.8H
306cabdff1aSopenharmony_ci  .if \avg
307cabdff1aSopenharmony_ci        ld1             {v7.8B},     [x0]
308cabdff1aSopenharmony_ci        urhadd          v5.8B,  v5.8B,  v7.8B
309cabdff1aSopenharmony_ci  .endif
310cabdff1aSopenharmony_ciNRND    add             v18.8H, v18.8H, v19.8H
311cabdff1aSopenharmony_ci        st1             {v5.8B},     [x0], x2
312cabdff1aSopenharmony_ci        mshrn           v7.8B,  v18.8H, #2
313cabdff1aSopenharmony_ci  .if \avg
314cabdff1aSopenharmony_ci        ld1             {v5.8B},     [x0]
315cabdff1aSopenharmony_ci        urhadd          v7.8B,  v7.8B,  v5.8B
316cabdff1aSopenharmony_ci  .endif
317cabdff1aSopenharmony_ci        st1             {v7.8B},     [x0], x2
318cabdff1aSopenharmony_ci
319cabdff1aSopenharmony_ci        ret
320cabdff1aSopenharmony_ci.endm
321cabdff1aSopenharmony_ci
322cabdff1aSopenharmony_ci.macro  pixfunc         pfx, name, suf, rnd=1, avg=0
323cabdff1aSopenharmony_ci  .if \rnd
324cabdff1aSopenharmony_ci    .macro avg  rd, rn, rm
325cabdff1aSopenharmony_ci        urhadd          \rd, \rn, \rm
326cabdff1aSopenharmony_ci    .endm
327cabdff1aSopenharmony_ci    .macro mshrn rd, rn, rm
328cabdff1aSopenharmony_ci        rshrn           \rd, \rn, \rm
329cabdff1aSopenharmony_ci    .endm
330cabdff1aSopenharmony_ci    .macro mshrn2 rd, rn, rm
331cabdff1aSopenharmony_ci        rshrn2          \rd, \rn, \rm
332cabdff1aSopenharmony_ci    .endm
333cabdff1aSopenharmony_ci    .macro NRND insn:vararg
334cabdff1aSopenharmony_ci    .endm
335cabdff1aSopenharmony_ci  .else
336cabdff1aSopenharmony_ci    .macro avg  rd, rn, rm
337cabdff1aSopenharmony_ci        uhadd           \rd, \rn, \rm
338cabdff1aSopenharmony_ci    .endm
339cabdff1aSopenharmony_ci    .macro mshrn rd, rn, rm
340cabdff1aSopenharmony_ci        shrn            \rd, \rn, \rm
341cabdff1aSopenharmony_ci    .endm
342cabdff1aSopenharmony_ci    .macro mshrn2 rd, rn, rm
343cabdff1aSopenharmony_ci        shrn2           \rd, \rn, \rm
344cabdff1aSopenharmony_ci    .endm
345cabdff1aSopenharmony_ci    .macro NRND insn:vararg
346cabdff1aSopenharmony_ci        \insn
347cabdff1aSopenharmony_ci    .endm
348cabdff1aSopenharmony_ci  .endif
349cabdff1aSopenharmony_cifunction ff_\pfx\name\suf\()_neon, export=1
350cabdff1aSopenharmony_ci        \name           \rnd, \avg
351cabdff1aSopenharmony_ciendfunc
352cabdff1aSopenharmony_ci        .purgem         avg
353cabdff1aSopenharmony_ci        .purgem         mshrn
354cabdff1aSopenharmony_ci        .purgem         mshrn2
355cabdff1aSopenharmony_ci        .purgem         NRND
356cabdff1aSopenharmony_ci.endm
357cabdff1aSopenharmony_ci
358cabdff1aSopenharmony_ci.macro  pixfunc2        pfx, name, avg=0
359cabdff1aSopenharmony_ci        pixfunc         \pfx, \name,          rnd=1, avg=\avg
360cabdff1aSopenharmony_ci        pixfunc         \pfx, \name, _no_rnd, rnd=0, avg=\avg
361cabdff1aSopenharmony_ci.endm
362cabdff1aSopenharmony_ci
363cabdff1aSopenharmony_cifunction ff_put_h264_qpel16_mc00_neon, export=1
364cabdff1aSopenharmony_ci        mov             w3,  #16
365cabdff1aSopenharmony_ciendfunc
366cabdff1aSopenharmony_ci
367cabdff1aSopenharmony_ci        pixfunc         put_, pixels16,     avg=0
368cabdff1aSopenharmony_ci        pixfunc2        put_, pixels16_x2,  avg=0
369cabdff1aSopenharmony_ci        pixfunc2        put_, pixels16_y2,  avg=0
370cabdff1aSopenharmony_ci        pixfunc2        put_, pixels16_xy2, avg=0
371cabdff1aSopenharmony_ci
372cabdff1aSopenharmony_cifunction ff_avg_h264_qpel16_mc00_neon, export=1
373cabdff1aSopenharmony_ci        mov             w3,  #16
374cabdff1aSopenharmony_ciendfunc
375cabdff1aSopenharmony_ci
376cabdff1aSopenharmony_ci        pixfunc         avg_, pixels16,     avg=1
377cabdff1aSopenharmony_ci        pixfunc2        avg_, pixels16_x2,  avg=1
378cabdff1aSopenharmony_ci        pixfunc2        avg_, pixels16_y2,  avg=1
379cabdff1aSopenharmony_ci        pixfunc2        avg_, pixels16_xy2, avg=1
380cabdff1aSopenharmony_ci
381cabdff1aSopenharmony_cifunction ff_put_h264_qpel8_mc00_neon, export=1
382cabdff1aSopenharmony_ci        mov             w3,  #8
383cabdff1aSopenharmony_ciendfunc
384cabdff1aSopenharmony_ci
385cabdff1aSopenharmony_ci        pixfunc         put_, pixels8,     avg=0
386cabdff1aSopenharmony_ci        pixfunc2        put_, pixels8_x2,  avg=0
387cabdff1aSopenharmony_ci        pixfunc2        put_, pixels8_y2,  avg=0
388cabdff1aSopenharmony_ci        pixfunc2        put_, pixels8_xy2, avg=0
389cabdff1aSopenharmony_ci
390cabdff1aSopenharmony_cifunction ff_avg_h264_qpel8_mc00_neon, export=1
391cabdff1aSopenharmony_ci        mov             w3,  #8
392cabdff1aSopenharmony_ciendfunc
393cabdff1aSopenharmony_ci
394cabdff1aSopenharmony_ci        pixfunc         avg_, pixels8,     avg=1
395cabdff1aSopenharmony_ci        pixfunc         avg_, pixels8_x2,  avg=1
396cabdff1aSopenharmony_ci        pixfunc         avg_, pixels8_y2,  avg=1
397cabdff1aSopenharmony_ci        pixfunc         avg_, pixels8_xy2, avg=1
398