1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * ARM NEON optimised DSP functions
3cabdff1aSopenharmony_ci * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S"
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci.macro  pixels16        rnd=1, avg=0
25cabdff1aSopenharmony_ci  .if \avg
26cabdff1aSopenharmony_ci        mov             r12, r0
27cabdff1aSopenharmony_ci  .endif
28cabdff1aSopenharmony_ci1:      vld1.8          {q0},     [r1], r2
29cabdff1aSopenharmony_ci        vld1.8          {q1},     [r1], r2
30cabdff1aSopenharmony_ci        vld1.8          {q2},     [r1], r2
31cabdff1aSopenharmony_ci        pld             [r1, r2, lsl #2]
32cabdff1aSopenharmony_ci        vld1.8          {q3},     [r1], r2
33cabdff1aSopenharmony_ci        pld             [r1]
34cabdff1aSopenharmony_ci        pld             [r1, r2]
35cabdff1aSopenharmony_ci        pld             [r1, r2, lsl #1]
36cabdff1aSopenharmony_ci  .if \avg
37cabdff1aSopenharmony_ci        vld1.8          {q8},     [r12,:128], r2
38cabdff1aSopenharmony_ci        vrhadd.u8       q0,  q0,  q8
39cabdff1aSopenharmony_ci        vld1.8          {q9},     [r12,:128], r2
40cabdff1aSopenharmony_ci        vrhadd.u8       q1,  q1,  q9
41cabdff1aSopenharmony_ci        vld1.8          {q10},    [r12,:128], r2
42cabdff1aSopenharmony_ci        vrhadd.u8       q2,  q2,  q10
43cabdff1aSopenharmony_ci        vld1.8          {q11},    [r12,:128], r2
44cabdff1aSopenharmony_ci        vrhadd.u8       q3,  q3,  q11
45cabdff1aSopenharmony_ci  .endif
46cabdff1aSopenharmony_ci        subs            r3,  r3,  #4
47cabdff1aSopenharmony_ci        vst1.64         {q0},     [r0,:128], r2
48cabdff1aSopenharmony_ci        vst1.64         {q1},     [r0,:128], r2
49cabdff1aSopenharmony_ci        vst1.64         {q2},     [r0,:128], r2
50cabdff1aSopenharmony_ci        vst1.64         {q3},     [r0,:128], r2
51cabdff1aSopenharmony_ci        bne             1b
52cabdff1aSopenharmony_ci        bx              lr
53cabdff1aSopenharmony_ci.endm
54cabdff1aSopenharmony_ci
55cabdff1aSopenharmony_ci.macro  pixels16_x2     rnd=1, avg=0
56cabdff1aSopenharmony_ci1:      vld1.8          {d0-d2},  [r1], r2
57cabdff1aSopenharmony_ci        vld1.8          {d4-d6},  [r1], r2
58cabdff1aSopenharmony_ci        pld             [r1]
59cabdff1aSopenharmony_ci        pld             [r1, r2]
60cabdff1aSopenharmony_ci        subs            r3,  r3,  #2
61cabdff1aSopenharmony_ci        vext.8          q1,  q0,  q1,  #1
62cabdff1aSopenharmony_ci        avg             q0,  q0,  q1
63cabdff1aSopenharmony_ci        vext.8          q3,  q2,  q3,  #1
64cabdff1aSopenharmony_ci        avg             q2,  q2,  q3
65cabdff1aSopenharmony_ci  .if \avg
66cabdff1aSopenharmony_ci        vld1.8          {q1},     [r0,:128], r2
67cabdff1aSopenharmony_ci        vld1.8          {q3},     [r0,:128]
68cabdff1aSopenharmony_ci        vrhadd.u8       q0,  q0,  q1
69cabdff1aSopenharmony_ci        vrhadd.u8       q2,  q2,  q3
70cabdff1aSopenharmony_ci        sub             r0,  r0,  r2
71cabdff1aSopenharmony_ci  .endif
72cabdff1aSopenharmony_ci        vst1.8          {q0},     [r0,:128], r2
73cabdff1aSopenharmony_ci        vst1.8          {q2},     [r0,:128], r2
74cabdff1aSopenharmony_ci        bne             1b
75cabdff1aSopenharmony_ci        bx              lr
76cabdff1aSopenharmony_ci.endm
77cabdff1aSopenharmony_ci
78cabdff1aSopenharmony_ci.macro  pixels16_y2     rnd=1, avg=0
79cabdff1aSopenharmony_ci        sub             r3,  r3,  #2
80cabdff1aSopenharmony_ci        vld1.8          {q0},     [r1], r2
81cabdff1aSopenharmony_ci        vld1.8          {q1},     [r1], r2
82cabdff1aSopenharmony_ci1:      subs            r3,  r3,  #2
83cabdff1aSopenharmony_ci        avg             q2,  q0,  q1
84cabdff1aSopenharmony_ci        vld1.8          {q0},     [r1], r2
85cabdff1aSopenharmony_ci        avg             q3,  q0,  q1
86cabdff1aSopenharmony_ci        vld1.8          {q1},     [r1], r2
87cabdff1aSopenharmony_ci        pld             [r1]
88cabdff1aSopenharmony_ci        pld             [r1, r2]
89cabdff1aSopenharmony_ci  .if \avg
90cabdff1aSopenharmony_ci        vld1.8          {q8},     [r0,:128], r2
91cabdff1aSopenharmony_ci        vld1.8          {q9},     [r0,:128]
92cabdff1aSopenharmony_ci        vrhadd.u8       q2,  q2,  q8
93cabdff1aSopenharmony_ci        vrhadd.u8       q3,  q3,  q9
94cabdff1aSopenharmony_ci        sub             r0,  r0,  r2
95cabdff1aSopenharmony_ci  .endif
96cabdff1aSopenharmony_ci        vst1.8          {q2},     [r0,:128], r2
97cabdff1aSopenharmony_ci        vst1.8          {q3},     [r0,:128], r2
98cabdff1aSopenharmony_ci        bne             1b
99cabdff1aSopenharmony_ci
100cabdff1aSopenharmony_ci        avg             q2,  q0,  q1
101cabdff1aSopenharmony_ci        vld1.8          {q0},     [r1], r2
102cabdff1aSopenharmony_ci        avg             q3,  q0,  q1
103cabdff1aSopenharmony_ci  .if \avg
104cabdff1aSopenharmony_ci        vld1.8          {q8},     [r0,:128], r2
105cabdff1aSopenharmony_ci        vld1.8          {q9},     [r0,:128]
106cabdff1aSopenharmony_ci        vrhadd.u8       q2,  q2,  q8
107cabdff1aSopenharmony_ci        vrhadd.u8       q3,  q3,  q9
108cabdff1aSopenharmony_ci        sub             r0,  r0,  r2
109cabdff1aSopenharmony_ci  .endif
110cabdff1aSopenharmony_ci        vst1.8          {q2},     [r0,:128], r2
111cabdff1aSopenharmony_ci        vst1.8          {q3},     [r0,:128], r2
112cabdff1aSopenharmony_ci
113cabdff1aSopenharmony_ci        bx              lr
114cabdff1aSopenharmony_ci.endm
115cabdff1aSopenharmony_ci
116cabdff1aSopenharmony_ci.macro  pixels16_xy2    rnd=1, avg=0
117cabdff1aSopenharmony_ci        sub             r3,  r3,  #2
118cabdff1aSopenharmony_ci        vld1.8          {d0-d2},  [r1], r2
119cabdff1aSopenharmony_ci        vld1.8          {d4-d6},  [r1], r2
120cabdff1aSopenharmony_ciNRND    vmov.i16        q13, #1
121cabdff1aSopenharmony_ci        pld             [r1]
122cabdff1aSopenharmony_ci        pld             [r1, r2]
123cabdff1aSopenharmony_ci        vext.8          q1,  q0,  q1,  #1
124cabdff1aSopenharmony_ci        vext.8          q3,  q2,  q3,  #1
125cabdff1aSopenharmony_ci        vaddl.u8        q8,  d0,  d2
126cabdff1aSopenharmony_ci        vaddl.u8        q10, d1,  d3
127cabdff1aSopenharmony_ci        vaddl.u8        q9,  d4,  d6
128cabdff1aSopenharmony_ci        vaddl.u8        q11, d5,  d7
129cabdff1aSopenharmony_ci1:      subs            r3,  r3,  #2
130cabdff1aSopenharmony_ci        vld1.8          {d0-d2},  [r1], r2
131cabdff1aSopenharmony_ci        vadd.u16        q12, q8,  q9
132cabdff1aSopenharmony_ci        pld             [r1]
133cabdff1aSopenharmony_ciNRND    vadd.u16        q12, q12, q13
134cabdff1aSopenharmony_ci        vext.8          q15, q0,  q1,  #1
135cabdff1aSopenharmony_ci        vadd.u16        q1 , q10, q11
136cabdff1aSopenharmony_ci        shrn            d28, q12, #2
137cabdff1aSopenharmony_ciNRND    vadd.u16        q1,  q1,  q13
138cabdff1aSopenharmony_ci        shrn            d29, q1,  #2
139cabdff1aSopenharmony_ci  .if \avg
140cabdff1aSopenharmony_ci        vld1.8          {q8},     [r0,:128]
141cabdff1aSopenharmony_ci        vrhadd.u8       q14, q14, q8
142cabdff1aSopenharmony_ci  .endif
143cabdff1aSopenharmony_ci        vaddl.u8        q8,  d0,  d30
144cabdff1aSopenharmony_ci        vld1.8          {d2-d4},  [r1], r2
145cabdff1aSopenharmony_ci        vaddl.u8        q10, d1,  d31
146cabdff1aSopenharmony_ci        vst1.8          {q14},    [r0,:128], r2
147cabdff1aSopenharmony_ci        vadd.u16        q12, q8,  q9
148cabdff1aSopenharmony_ci        pld             [r1, r2]
149cabdff1aSopenharmony_ciNRND    vadd.u16        q12, q12, q13
150cabdff1aSopenharmony_ci        vext.8          q2,  q1,  q2,  #1
151cabdff1aSopenharmony_ci        vadd.u16        q0,  q10, q11
152cabdff1aSopenharmony_ci        shrn            d30, q12, #2
153cabdff1aSopenharmony_ciNRND    vadd.u16        q0,  q0,  q13
154cabdff1aSopenharmony_ci        shrn            d31, q0,  #2
155cabdff1aSopenharmony_ci  .if \avg
156cabdff1aSopenharmony_ci        vld1.8          {q9},     [r0,:128]
157cabdff1aSopenharmony_ci        vrhadd.u8       q15, q15, q9
158cabdff1aSopenharmony_ci  .endif
159cabdff1aSopenharmony_ci        vaddl.u8        q9,  d2,  d4
160cabdff1aSopenharmony_ci        vaddl.u8        q11, d3,  d5
161cabdff1aSopenharmony_ci        vst1.8          {q15},    [r0,:128], r2
162cabdff1aSopenharmony_ci        bgt             1b
163cabdff1aSopenharmony_ci
164cabdff1aSopenharmony_ci        vld1.8          {d0-d2},  [r1], r2
165cabdff1aSopenharmony_ci        vadd.u16        q12, q8,  q9
166cabdff1aSopenharmony_ciNRND    vadd.u16        q12, q12, q13
167cabdff1aSopenharmony_ci        vext.8          q15, q0,  q1,  #1
168cabdff1aSopenharmony_ci        vadd.u16        q1 , q10, q11
169cabdff1aSopenharmony_ci        shrn            d28, q12, #2
170cabdff1aSopenharmony_ciNRND    vadd.u16        q1,  q1,  q13
171cabdff1aSopenharmony_ci        shrn            d29, q1,  #2
172cabdff1aSopenharmony_ci  .if \avg
173cabdff1aSopenharmony_ci        vld1.8          {q8},     [r0,:128]
174cabdff1aSopenharmony_ci        vrhadd.u8       q14, q14, q8
175cabdff1aSopenharmony_ci  .endif
176cabdff1aSopenharmony_ci        vaddl.u8        q8,  d0,  d30
177cabdff1aSopenharmony_ci        vaddl.u8        q10, d1,  d31
178cabdff1aSopenharmony_ci        vst1.8          {q14},    [r0,:128], r2
179cabdff1aSopenharmony_ci        vadd.u16        q12, q8,  q9
180cabdff1aSopenharmony_ciNRND    vadd.u16        q12, q12, q13
181cabdff1aSopenharmony_ci        vadd.u16        q0,  q10, q11
182cabdff1aSopenharmony_ci        shrn            d30, q12, #2
183cabdff1aSopenharmony_ciNRND    vadd.u16        q0,  q0,  q13
184cabdff1aSopenharmony_ci        shrn            d31, q0,  #2
185cabdff1aSopenharmony_ci  .if \avg
186cabdff1aSopenharmony_ci        vld1.8          {q9},     [r0,:128]
187cabdff1aSopenharmony_ci        vrhadd.u8       q15, q15, q9
188cabdff1aSopenharmony_ci  .endif
189cabdff1aSopenharmony_ci        vst1.8          {q15},    [r0,:128], r2
190cabdff1aSopenharmony_ci
191cabdff1aSopenharmony_ci        bx              lr
192cabdff1aSopenharmony_ci.endm
193cabdff1aSopenharmony_ci
194cabdff1aSopenharmony_ci.macro  pixels8         rnd=1, avg=0
195cabdff1aSopenharmony_ci1:      vld1.8          {d0},     [r1], r2
196cabdff1aSopenharmony_ci        vld1.8          {d1},     [r1], r2
197cabdff1aSopenharmony_ci        vld1.8          {d2},     [r1], r2
198cabdff1aSopenharmony_ci        pld             [r1, r2, lsl #2]
199cabdff1aSopenharmony_ci        vld1.8          {d3},     [r1], r2
200cabdff1aSopenharmony_ci        pld             [r1]
201cabdff1aSopenharmony_ci        pld             [r1, r2]
202cabdff1aSopenharmony_ci        pld             [r1, r2, lsl #1]
203cabdff1aSopenharmony_ci  .if \avg
204cabdff1aSopenharmony_ci        vld1.8          {d4},     [r0,:64], r2
205cabdff1aSopenharmony_ci        vrhadd.u8       d0,  d0,  d4
206cabdff1aSopenharmony_ci        vld1.8          {d5},     [r0,:64], r2
207cabdff1aSopenharmony_ci        vrhadd.u8       d1,  d1,  d5
208cabdff1aSopenharmony_ci        vld1.8          {d6},     [r0,:64], r2
209cabdff1aSopenharmony_ci        vrhadd.u8       d2,  d2,  d6
210cabdff1aSopenharmony_ci        vld1.8          {d7},     [r0,:64], r2
211cabdff1aSopenharmony_ci        vrhadd.u8       d3,  d3,  d7
212cabdff1aSopenharmony_ci        sub             r0,  r0,  r2,  lsl #2
213cabdff1aSopenharmony_ci  .endif
214cabdff1aSopenharmony_ci        subs            r3,  r3,  #4
215cabdff1aSopenharmony_ci        vst1.8          {d0},     [r0,:64], r2
216cabdff1aSopenharmony_ci        vst1.8          {d1},     [r0,:64], r2
217cabdff1aSopenharmony_ci        vst1.8          {d2},     [r0,:64], r2
218cabdff1aSopenharmony_ci        vst1.8          {d3},     [r0,:64], r2
219cabdff1aSopenharmony_ci        bne             1b
220cabdff1aSopenharmony_ci        bx              lr
221cabdff1aSopenharmony_ci.endm
222cabdff1aSopenharmony_ci
223cabdff1aSopenharmony_ci.macro  pixels8_x2      rnd=1, avg=0
224cabdff1aSopenharmony_ci1:      vld1.8          {q0},     [r1], r2
225cabdff1aSopenharmony_ci        vext.8          d1,  d0,  d1,  #1
226cabdff1aSopenharmony_ci        vld1.8          {q1},     [r1], r2
227cabdff1aSopenharmony_ci        vext.8          d3,  d2,  d3,  #1
228cabdff1aSopenharmony_ci        pld             [r1]
229cabdff1aSopenharmony_ci        pld             [r1, r2]
230cabdff1aSopenharmony_ci        subs            r3,  r3,  #2
231cabdff1aSopenharmony_ci        vswp            d1,  d2
232cabdff1aSopenharmony_ci        avg             q0,  q0,  q1
233cabdff1aSopenharmony_ci  .if \avg
234cabdff1aSopenharmony_ci        vld1.8          {d4},     [r0,:64], r2
235cabdff1aSopenharmony_ci        vld1.8          {d5},     [r0,:64]
236cabdff1aSopenharmony_ci        vrhadd.u8       q0,  q0,  q2
237cabdff1aSopenharmony_ci        sub             r0,  r0,  r2
238cabdff1aSopenharmony_ci  .endif
239cabdff1aSopenharmony_ci        vst1.8          {d0},     [r0,:64], r2
240cabdff1aSopenharmony_ci        vst1.8          {d1},     [r0,:64], r2
241cabdff1aSopenharmony_ci        bne             1b
242cabdff1aSopenharmony_ci        bx              lr
243cabdff1aSopenharmony_ci.endm
244cabdff1aSopenharmony_ci
245cabdff1aSopenharmony_ci.macro  pixels8_y2      rnd=1, avg=0
246cabdff1aSopenharmony_ci        sub             r3,  r3,  #2
247cabdff1aSopenharmony_ci        vld1.8          {d0},     [r1], r2
248cabdff1aSopenharmony_ci        vld1.8          {d1},     [r1], r2
249cabdff1aSopenharmony_ci1:      subs            r3,  r3,  #2
250cabdff1aSopenharmony_ci        avg             d4,  d0,  d1
251cabdff1aSopenharmony_ci        vld1.8          {d0},     [r1], r2
252cabdff1aSopenharmony_ci        avg             d5,  d0,  d1
253cabdff1aSopenharmony_ci        vld1.8          {d1},     [r1], r2
254cabdff1aSopenharmony_ci        pld             [r1]
255cabdff1aSopenharmony_ci        pld             [r1, r2]
256cabdff1aSopenharmony_ci  .if \avg
257cabdff1aSopenharmony_ci        vld1.8          {d2},     [r0,:64], r2
258cabdff1aSopenharmony_ci        vld1.8          {d3},     [r0,:64]
259cabdff1aSopenharmony_ci        vrhadd.u8       q2,  q2,  q1
260cabdff1aSopenharmony_ci        sub             r0,  r0,  r2
261cabdff1aSopenharmony_ci  .endif
262cabdff1aSopenharmony_ci        vst1.8          {d4},     [r0,:64], r2
263cabdff1aSopenharmony_ci        vst1.8          {d5},     [r0,:64], r2
264cabdff1aSopenharmony_ci        bne             1b
265cabdff1aSopenharmony_ci
266cabdff1aSopenharmony_ci        avg             d4,  d0,  d1
267cabdff1aSopenharmony_ci        vld1.8          {d0},     [r1], r2
268cabdff1aSopenharmony_ci        avg             d5,  d0,  d1
269cabdff1aSopenharmony_ci  .if \avg
270cabdff1aSopenharmony_ci        vld1.8          {d2},     [r0,:64], r2
271cabdff1aSopenharmony_ci        vld1.8          {d3},     [r0,:64]
272cabdff1aSopenharmony_ci        vrhadd.u8       q2,  q2,  q1
273cabdff1aSopenharmony_ci        sub             r0,  r0,  r2
274cabdff1aSopenharmony_ci  .endif
275cabdff1aSopenharmony_ci        vst1.8          {d4},     [r0,:64], r2
276cabdff1aSopenharmony_ci        vst1.8          {d5},     [r0,:64], r2
277cabdff1aSopenharmony_ci
278cabdff1aSopenharmony_ci        bx              lr
279cabdff1aSopenharmony_ci.endm
280cabdff1aSopenharmony_ci
281cabdff1aSopenharmony_ci.macro  pixels8_xy2     rnd=1, avg=0
282cabdff1aSopenharmony_ci        sub             r3,  r3,  #2
283cabdff1aSopenharmony_ci        vld1.8          {q0},     [r1], r2
284cabdff1aSopenharmony_ci        vld1.8          {q1},     [r1], r2
285cabdff1aSopenharmony_ciNRND    vmov.i16        q11, #1
286cabdff1aSopenharmony_ci        pld             [r1]
287cabdff1aSopenharmony_ci        pld             [r1, r2]
288cabdff1aSopenharmony_ci        vext.8          d4,  d0,  d1,  #1
289cabdff1aSopenharmony_ci        vext.8          d6,  d2,  d3,  #1
290cabdff1aSopenharmony_ci        vaddl.u8        q8,  d0,  d4
291cabdff1aSopenharmony_ci        vaddl.u8        q9,  d2,  d6
292cabdff1aSopenharmony_ci1:      subs            r3,  r3,  #2
293cabdff1aSopenharmony_ci        vld1.8          {q0},     [r1], r2
294cabdff1aSopenharmony_ci        pld             [r1]
295cabdff1aSopenharmony_ci        vadd.u16        q10, q8,  q9
296cabdff1aSopenharmony_ci        vext.8          d4,  d0,  d1,  #1
297cabdff1aSopenharmony_ciNRND    vadd.u16        q10, q10, q11
298cabdff1aSopenharmony_ci        vaddl.u8        q8,  d0,  d4
299cabdff1aSopenharmony_ci        shrn            d5,  q10, #2
300cabdff1aSopenharmony_ci        vld1.8          {q1},     [r1], r2
301cabdff1aSopenharmony_ci        vadd.u16        q10, q8,  q9
302cabdff1aSopenharmony_ci        pld             [r1, r2]
303cabdff1aSopenharmony_ci  .if \avg
304cabdff1aSopenharmony_ci        vld1.8          {d7},     [r0,:64]
305cabdff1aSopenharmony_ci        vrhadd.u8       d5,  d5,  d7
306cabdff1aSopenharmony_ci  .endif
307cabdff1aSopenharmony_ciNRND    vadd.u16        q10, q10, q11
308cabdff1aSopenharmony_ci        vst1.8          {d5},     [r0,:64], r2
309cabdff1aSopenharmony_ci        shrn            d7,  q10, #2
310cabdff1aSopenharmony_ci  .if \avg
311cabdff1aSopenharmony_ci        vld1.8          {d5},     [r0,:64]
312cabdff1aSopenharmony_ci        vrhadd.u8       d7,  d7,  d5
313cabdff1aSopenharmony_ci  .endif
314cabdff1aSopenharmony_ci        vext.8          d6,  d2,  d3,  #1
315cabdff1aSopenharmony_ci        vaddl.u8        q9,  d2,  d6
316cabdff1aSopenharmony_ci        vst1.8          {d7},     [r0,:64], r2
317cabdff1aSopenharmony_ci        bgt             1b
318cabdff1aSopenharmony_ci
319cabdff1aSopenharmony_ci        vld1.8          {q0},     [r1], r2
320cabdff1aSopenharmony_ci        vadd.u16        q10, q8,  q9
321cabdff1aSopenharmony_ci        vext.8          d4,  d0,  d1,  #1
322cabdff1aSopenharmony_ciNRND    vadd.u16        q10, q10, q11
323cabdff1aSopenharmony_ci        vaddl.u8        q8,  d0,  d4
324cabdff1aSopenharmony_ci        shrn            d5,  q10, #2
325cabdff1aSopenharmony_ci        vadd.u16        q10, q8,  q9
326cabdff1aSopenharmony_ci  .if \avg
327cabdff1aSopenharmony_ci        vld1.8          {d7},     [r0,:64]
328cabdff1aSopenharmony_ci        vrhadd.u8       d5,  d5,  d7
329cabdff1aSopenharmony_ci  .endif
330cabdff1aSopenharmony_ciNRND    vadd.u16        q10, q10, q11
331cabdff1aSopenharmony_ci        vst1.8          {d5},     [r0,:64], r2
332cabdff1aSopenharmony_ci        shrn            d7,  q10, #2
333cabdff1aSopenharmony_ci  .if \avg
334cabdff1aSopenharmony_ci        vld1.8          {d5},     [r0,:64]
335cabdff1aSopenharmony_ci        vrhadd.u8       d7,  d7,  d5
336cabdff1aSopenharmony_ci  .endif
337cabdff1aSopenharmony_ci        vst1.8          {d7},     [r0,:64], r2
338cabdff1aSopenharmony_ci
339cabdff1aSopenharmony_ci        bx              lr
340cabdff1aSopenharmony_ci.endm
341cabdff1aSopenharmony_ci
342cabdff1aSopenharmony_ci.macro  pixfunc         pfx, name, suf, rnd=1, avg=0
343cabdff1aSopenharmony_ci  .if \rnd
344cabdff1aSopenharmony_ci    .macro avg  rd, rn, rm
345cabdff1aSopenharmony_ci        vrhadd.u8       \rd, \rn, \rm
346cabdff1aSopenharmony_ci    .endm
347cabdff1aSopenharmony_ci    .macro shrn rd, rn, rm
348cabdff1aSopenharmony_ci        vrshrn.u16      \rd, \rn, \rm
349cabdff1aSopenharmony_ci    .endm
350cabdff1aSopenharmony_ci    .macro NRND insn:vararg
351cabdff1aSopenharmony_ci    .endm
352cabdff1aSopenharmony_ci  .else
353cabdff1aSopenharmony_ci    .macro avg  rd, rn, rm
354cabdff1aSopenharmony_ci        vhadd.u8        \rd, \rn, \rm
355cabdff1aSopenharmony_ci    .endm
356cabdff1aSopenharmony_ci    .macro shrn rd, rn, rm
357cabdff1aSopenharmony_ci        vshrn.u16       \rd, \rn, \rm
358cabdff1aSopenharmony_ci    .endm
359cabdff1aSopenharmony_ci    .macro NRND insn:vararg
360cabdff1aSopenharmony_ci        \insn
361cabdff1aSopenharmony_ci    .endm
362cabdff1aSopenharmony_ci  .endif
363cabdff1aSopenharmony_cifunction ff_\pfx\name\suf\()_neon, export=1
364cabdff1aSopenharmony_ci        \name           \rnd, \avg
365cabdff1aSopenharmony_ciendfunc
366cabdff1aSopenharmony_ci        .purgem         avg
367cabdff1aSopenharmony_ci        .purgem         shrn
368cabdff1aSopenharmony_ci        .purgem         NRND
369cabdff1aSopenharmony_ci.endm
370cabdff1aSopenharmony_ci
371cabdff1aSopenharmony_ci.macro  pixfunc2        pfx, name, avg=0
372cabdff1aSopenharmony_ci        pixfunc         \pfx, \name,          rnd=1, avg=\avg
373cabdff1aSopenharmony_ci        pixfunc         \pfx, \name, _no_rnd, rnd=0, avg=\avg
374cabdff1aSopenharmony_ci.endm
375cabdff1aSopenharmony_ci
376cabdff1aSopenharmony_cifunction ff_put_h264_qpel16_mc00_neon, export=1
377cabdff1aSopenharmony_ci        mov             r3,  #16
378cabdff1aSopenharmony_ciendfunc
379cabdff1aSopenharmony_ci
380cabdff1aSopenharmony_ci        pixfunc         put_, pixels16,     avg=0
381cabdff1aSopenharmony_ci        pixfunc2        put_, pixels16_x2,  avg=0
382cabdff1aSopenharmony_ci        pixfunc2        put_, pixels16_y2,  avg=0
383cabdff1aSopenharmony_ci        pixfunc2        put_, pixels16_xy2, avg=0
384cabdff1aSopenharmony_ci
385cabdff1aSopenharmony_cifunction ff_avg_h264_qpel16_mc00_neon, export=1
386cabdff1aSopenharmony_ci        mov             r3,  #16
387cabdff1aSopenharmony_ciendfunc
388cabdff1aSopenharmony_ci
389cabdff1aSopenharmony_ci        pixfunc         avg_, pixels16,     avg=1
390cabdff1aSopenharmony_ci        pixfunc2        avg_, pixels16_x2,  avg=1
391cabdff1aSopenharmony_ci        pixfunc2        avg_, pixels16_y2,  avg=1
392cabdff1aSopenharmony_ci        pixfunc2        avg_, pixels16_xy2, avg=1
393cabdff1aSopenharmony_ci
394cabdff1aSopenharmony_cifunction ff_put_h264_qpel8_mc00_neon, export=1
395cabdff1aSopenharmony_ci        mov             r3,  #8
396cabdff1aSopenharmony_ciendfunc
397cabdff1aSopenharmony_ci
398cabdff1aSopenharmony_ci        pixfunc         put_, pixels8,     avg=0
399cabdff1aSopenharmony_ci        pixfunc2        put_, pixels8_x2,  avg=0
400cabdff1aSopenharmony_ci        pixfunc2        put_, pixels8_y2,  avg=0
401cabdff1aSopenharmony_ci        pixfunc2        put_, pixels8_xy2, avg=0
402cabdff1aSopenharmony_ci
403cabdff1aSopenharmony_cifunction ff_avg_h264_qpel8_mc00_neon, export=1
404cabdff1aSopenharmony_ci        mov             r3,  #8
405cabdff1aSopenharmony_ciendfunc
406cabdff1aSopenharmony_ci
407cabdff1aSopenharmony_ci        pixfunc         avg_, pixels8,     avg=1
408cabdff1aSopenharmony_ci        pixfunc         avg_, pixels8_x2,  avg=1
409cabdff1aSopenharmony_ci        pixfunc         avg_, pixels8_y2,  avg=1
410cabdff1aSopenharmony_ci        pixfunc         avg_, pixels8_xy2, avg=1
411