1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * VP8 NEON optimisations
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * Copyright (c) 2010 Rob Clark <rob@ti.com>
5cabdff1aSopenharmony_ci * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
6cabdff1aSopenharmony_ci * Copyright (c) 2018 Magnus Röös <mla2.roos@gmail.com>
7cabdff1aSopenharmony_ci * Copyright (c) 2019 Martin Storsjo <martin@martin.st>
8cabdff1aSopenharmony_ci *
9cabdff1aSopenharmony_ci * This file is part of FFmpeg.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
12cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
13cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
14cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
17cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
18cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19cabdff1aSopenharmony_ci * Lesser General Public License for more details.
20cabdff1aSopenharmony_ci *
21cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
22cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
23cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24cabdff1aSopenharmony_ci */
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_ci#include "libavutil/aarch64/asm.S"
27cabdff1aSopenharmony_ci#include "neon.S"
28cabdff1aSopenharmony_ci
29cabdff1aSopenharmony_cifunction ff_vp8_luma_dc_wht_neon, export=1
30cabdff1aSopenharmony_ci        ld1             {v0.4h - v3.4h}, [x1]
31cabdff1aSopenharmony_ci        movi            v30.8h, #0
32cabdff1aSopenharmony_ci
33cabdff1aSopenharmony_ci        add             v4.4h,  v0.4h,  v3.4h
34cabdff1aSopenharmony_ci        add             v6.4h,  v1.4h,  v2.4h
35cabdff1aSopenharmony_ci        st1             {v30.8h}, [x1], #16
36cabdff1aSopenharmony_ci        sub             v7.4h,  v1.4h,  v2.4h
37cabdff1aSopenharmony_ci        sub             v5.4h,  v0.4h,  v3.4h
38cabdff1aSopenharmony_ci        st1             {v30.8h}, [x1]
39cabdff1aSopenharmony_ci        add             v0.4h,  v4.4h,  v6.4h
40cabdff1aSopenharmony_ci        add             v1.4h,  v5.4h,  v7.4h
41cabdff1aSopenharmony_ci        sub             v2.4h,  v4.4h,  v6.4h
42cabdff1aSopenharmony_ci        sub             v3.4h,  v5.4h,  v7.4h
43cabdff1aSopenharmony_ci
44cabdff1aSopenharmony_ci        movi            v16.4h, #3
45cabdff1aSopenharmony_ci
46cabdff1aSopenharmony_ci        transpose_4x4H  v0, v1, v2, v3, v4, v5, v6, v7
47cabdff1aSopenharmony_ci
48cabdff1aSopenharmony_ci        add             v0.4h,  v0.4h,  v16.4h
49cabdff1aSopenharmony_ci
50cabdff1aSopenharmony_ci        add             v4.4h,  v0.4h,  v3.4h
51cabdff1aSopenharmony_ci        add             v6.4h,  v1.4h,  v2.4h
52cabdff1aSopenharmony_ci        sub             v7.4h,  v1.4h,  v2.4h
53cabdff1aSopenharmony_ci        sub             v5.4h,  v0.4h,  v3.4h
54cabdff1aSopenharmony_ci        add             v0.4h,  v4.4h,  v6.4h
55cabdff1aSopenharmony_ci        add             v1.4h,  v5.4h,  v7.4h
56cabdff1aSopenharmony_ci        sub             v2.4h,  v4.4h,  v6.4h
57cabdff1aSopenharmony_ci        sub             v3.4h,  v5.4h,  v7.4h
58cabdff1aSopenharmony_ci
59cabdff1aSopenharmony_ci        sshr            v0.4h,  v0.4h,  #3
60cabdff1aSopenharmony_ci        sshr            v1.4h,  v1.4h,  #3
61cabdff1aSopenharmony_ci        sshr            v2.4h,  v2.4h,  #3
62cabdff1aSopenharmony_ci        sshr            v3.4h,  v3.4h,  #3
63cabdff1aSopenharmony_ci
64cabdff1aSopenharmony_ci        mov             x3,  #32
65cabdff1aSopenharmony_ci        st1             {v0.h}[0],  [x0], x3
66cabdff1aSopenharmony_ci        st1             {v1.h}[0],  [x0], x3
67cabdff1aSopenharmony_ci        st1             {v2.h}[0],  [x0], x3
68cabdff1aSopenharmony_ci        st1             {v3.h}[0],  [x0], x3
69cabdff1aSopenharmony_ci        st1             {v0.h}[1],  [x0], x3
70cabdff1aSopenharmony_ci        st1             {v1.h}[1],  [x0], x3
71cabdff1aSopenharmony_ci        st1             {v2.h}[1],  [x0], x3
72cabdff1aSopenharmony_ci        st1             {v3.h}[1],  [x0], x3
73cabdff1aSopenharmony_ci        st1             {v0.h}[2],  [x0], x3
74cabdff1aSopenharmony_ci        st1             {v1.h}[2],  [x0], x3
75cabdff1aSopenharmony_ci        st1             {v2.h}[2],  [x0], x3
76cabdff1aSopenharmony_ci        st1             {v3.h}[2],  [x0], x3
77cabdff1aSopenharmony_ci        st1             {v0.h}[3],  [x0], x3
78cabdff1aSopenharmony_ci        st1             {v1.h}[3],  [x0], x3
79cabdff1aSopenharmony_ci        st1             {v2.h}[3],  [x0], x3
80cabdff1aSopenharmony_ci        st1             {v3.h}[3],  [x0], x3
81cabdff1aSopenharmony_ci
82cabdff1aSopenharmony_ci        ret
83cabdff1aSopenharmony_ciendfunc
84cabdff1aSopenharmony_ci
85cabdff1aSopenharmony_cifunction ff_vp8_idct_add_neon, export=1
86cabdff1aSopenharmony_ci        ld1             {v0.8b - v3.8b},  [x1]
87cabdff1aSopenharmony_ci        mov             w4,  #20091
88cabdff1aSopenharmony_ci        movk            w4,  #35468/2, lsl #16
89cabdff1aSopenharmony_ci        dup             v4.2s, w4
90cabdff1aSopenharmony_ci
91cabdff1aSopenharmony_ci        smull           v26.4s, v1.4h,  v4.h[0]
92cabdff1aSopenharmony_ci        smull           v27.4s, v3.4h,  v4.h[0]
93cabdff1aSopenharmony_ci        sqdmulh         v20.4h, v1.4h,  v4.h[1]
94cabdff1aSopenharmony_ci        sqdmulh         v23.4h, v3.4h,  v4.h[1]
95cabdff1aSopenharmony_ci        shrn            v21.4h, v26.4s, #16
96cabdff1aSopenharmony_ci        shrn            v22.4h, v27.4s, #16
97cabdff1aSopenharmony_ci        add             v21.4h, v21.4h, v1.4h
98cabdff1aSopenharmony_ci        add             v22.4h, v22.4h, v3.4h
99cabdff1aSopenharmony_ci
100cabdff1aSopenharmony_ci        add             v16.4h,  v0.4h,   v2.4h
101cabdff1aSopenharmony_ci        sub             v17.4h,  v0.4h,   v2.4h
102cabdff1aSopenharmony_ci
103cabdff1aSopenharmony_ci        add             v18.4h,  v21.4h,  v23.4h
104cabdff1aSopenharmony_ci        sub             v19.4h,  v20.4h,  v22.4h
105cabdff1aSopenharmony_ci
106cabdff1aSopenharmony_ci        add             v0.4h,   v16.4h,  v18.4h
107cabdff1aSopenharmony_ci        add             v1.4h,   v17.4h,  v19.4h
108cabdff1aSopenharmony_ci        sub             v3.4h,   v16.4h,  v18.4h
109cabdff1aSopenharmony_ci        sub             v2.4h,   v17.4h,  v19.4h
110cabdff1aSopenharmony_ci
111cabdff1aSopenharmony_ci        transpose_4x4H  v0, v1, v2, v3, v24, v5, v6, v7
112cabdff1aSopenharmony_ci
113cabdff1aSopenharmony_ci        movi            v29.8h, #0
114cabdff1aSopenharmony_ci        smull           v26.4s,     v1.4h,  v4.h[0]
115cabdff1aSopenharmony_ci        st1             {v29.8h},   [x1],   #16
116cabdff1aSopenharmony_ci        smull           v27.4s,     v3.4h,  v4.h[0]
117cabdff1aSopenharmony_ci        st1             {v29.16b},  [x1]
118cabdff1aSopenharmony_ci        sqdmulh         v21.4h,     v1.4h,  v4.h[1]
119cabdff1aSopenharmony_ci        sqdmulh         v23.4h,     v3.4h,  v4.h[1]
120cabdff1aSopenharmony_ci        shrn            v20.4h,     v26.4s, #16
121cabdff1aSopenharmony_ci        shrn            v22.4h,     v27.4s, #16
122cabdff1aSopenharmony_ci        add             v20.4h,     v20.4h, v1.4h
123cabdff1aSopenharmony_ci        add             v22.4h,     v22.4h, v3.4h
124cabdff1aSopenharmony_ci        add             v16.4h,     v0.4h,  v2.4h
125cabdff1aSopenharmony_ci        sub             v17.4h,     v0.4h,  v2.4h
126cabdff1aSopenharmony_ci
127cabdff1aSopenharmony_ci        add             v18.4h,     v20.4h, v23.4h
128cabdff1aSopenharmony_ci        ld1             {v24.s}[0], [x0],   x2
129cabdff1aSopenharmony_ci        sub             v19.4h, v21.4h, v22.4h
130cabdff1aSopenharmony_ci        ld1             {v25.s}[0], [x0],   x2
131cabdff1aSopenharmony_ci        add             v0.4h,      v16.4h, v18.4h
132cabdff1aSopenharmony_ci        add             v1.4h,      v17.4h, v19.4h
133cabdff1aSopenharmony_ci        ld1             {v26.s}[0], [x0],   x2
134cabdff1aSopenharmony_ci        sub             v3.4h,      v16.4h, v18.4h
135cabdff1aSopenharmony_ci        sub             v2.4h,      v17.4h, v19.4h
136cabdff1aSopenharmony_ci        ld1             {v27.s}[0], [x0],   x2
137cabdff1aSopenharmony_ci        srshr           v0.4h,      v0.4h,  #3
138cabdff1aSopenharmony_ci        srshr           v1.4h,      v1.4h,  #3
139cabdff1aSopenharmony_ci        srshr           v2.4h,      v2.4h,  #3
140cabdff1aSopenharmony_ci        srshr           v3.4h,      v3.4h,  #3
141cabdff1aSopenharmony_ci
142cabdff1aSopenharmony_ci        sub             x0,  x0,  x2,  lsl #2
143cabdff1aSopenharmony_ci
144cabdff1aSopenharmony_ci        transpose_4x4H  v0, v1, v2, v3, v5, v6, v7, v16
145cabdff1aSopenharmony_ci
146cabdff1aSopenharmony_ci        uaddw           v0.8h,  v0.8h, v24.8b
147cabdff1aSopenharmony_ci        uaddw           v1.8h,  v1.8h, v25.8b
148cabdff1aSopenharmony_ci        uaddw           v2.8h,  v2.8h, v26.8b
149cabdff1aSopenharmony_ci        uaddw           v3.8h,  v3.8h, v27.8b
150cabdff1aSopenharmony_ci        sqxtun          v0.8b,  v0.8h
151cabdff1aSopenharmony_ci        sqxtun          v1.8b,  v1.8h
152cabdff1aSopenharmony_ci        sqxtun          v2.8b,  v2.8h
153cabdff1aSopenharmony_ci        sqxtun          v3.8b,  v3.8h
154cabdff1aSopenharmony_ci
155cabdff1aSopenharmony_ci        st1             {v0.s}[0],  [x0], x2
156cabdff1aSopenharmony_ci        st1             {v1.s}[0],  [x0], x2
157cabdff1aSopenharmony_ci        st1             {v2.s}[0],  [x0], x2
158cabdff1aSopenharmony_ci        st1             {v3.s}[0],  [x0], x2
159cabdff1aSopenharmony_ci
160cabdff1aSopenharmony_ci        ret
161cabdff1aSopenharmony_ciendfunc
162cabdff1aSopenharmony_ci
163cabdff1aSopenharmony_cifunction ff_vp8_idct_dc_add4uv_neon, export=1
164cabdff1aSopenharmony_ci        movi            v0.4h,  #0
165cabdff1aSopenharmony_ci        mov             x3,     #32
166cabdff1aSopenharmony_ci        ld1r            {v16.4h},  [x1]
167cabdff1aSopenharmony_ci        st1             {v0.h}[0], [x1], x3
168cabdff1aSopenharmony_ci        ld1r            {v17.4h},  [x1]
169cabdff1aSopenharmony_ci        st1             {v0.h}[0], [x1], x3
170cabdff1aSopenharmony_ci        ld1r            {v18.4h},  [x1]
171cabdff1aSopenharmony_ci        st1             {v0.h}[0], [x1], x3
172cabdff1aSopenharmony_ci        ld1r            {v19.4h},  [x1]
173cabdff1aSopenharmony_ci        st1             {v0.h}[0], [x1], x3
174cabdff1aSopenharmony_ci        ins             v16.d[1],  v17.d[0]
175cabdff1aSopenharmony_ci        ins             v18.d[1],  v19.d[0]
176cabdff1aSopenharmony_ci        mov             x3,  x0
177cabdff1aSopenharmony_ci        srshr           v16.8h,    v16.8h,  #3            // dc >>= 3
178cabdff1aSopenharmony_ci        ld1             {v0.8b},   [x0], x2
179cabdff1aSopenharmony_ci        srshr           v18.8h,    v18.8h,  #3
180cabdff1aSopenharmony_ci        ld1             {v1.8b},   [x0], x2
181cabdff1aSopenharmony_ci        uaddw           v20.8h,    v16.8h, v0.8b
182cabdff1aSopenharmony_ci        ld1             {v2.8b},   [x0], x2
183cabdff1aSopenharmony_ci        uaddw           v0.8h,     v16.8h, v1.8b
184cabdff1aSopenharmony_ci        ld1             {v3.8b},   [x0], x2
185cabdff1aSopenharmony_ci        uaddw           v22.8h,    v16.8h, v2.8b
186cabdff1aSopenharmony_ci        ld1             {v4.8b},   [x0], x2
187cabdff1aSopenharmony_ci        uaddw           v2.8h,     v16.8h, v3.8b
188cabdff1aSopenharmony_ci        ld1             {v5.8b},   [x0], x2
189cabdff1aSopenharmony_ci        uaddw           v24.8h,    v18.8h, v4.8b
190cabdff1aSopenharmony_ci        ld1             {v6.8b},   [x0], x2
191cabdff1aSopenharmony_ci        uaddw           v4.8h,     v18.8h, v5.8b
192cabdff1aSopenharmony_ci        ld1             {v7.8b},   [x0], x2
193cabdff1aSopenharmony_ci        uaddw           v26.8h,    v18.8h, v6.8b
194cabdff1aSopenharmony_ci        sqxtun          v20.8b,    v20.8h
195cabdff1aSopenharmony_ci        uaddw           v6.8h,     v18.8h, v7.8b
196cabdff1aSopenharmony_ci        sqxtun          v21.8b,    v0.8h
197cabdff1aSopenharmony_ci        sqxtun          v22.8b,    v22.8h
198cabdff1aSopenharmony_ci        st1             {v20.8b},  [x3], x2
199cabdff1aSopenharmony_ci        sqxtun          v23.8b,    v2.8h
200cabdff1aSopenharmony_ci        st1             {v21.8b},  [x3], x2
201cabdff1aSopenharmony_ci        sqxtun          v24.8b,    v24.8h
202cabdff1aSopenharmony_ci        st1             {v22.8b},  [x3], x2
203cabdff1aSopenharmony_ci        sqxtun          v25.8b,    v4.8h
204cabdff1aSopenharmony_ci        st1             {v23.8b},  [x3], x2
205cabdff1aSopenharmony_ci        sqxtun          v26.8b,    v26.8h
206cabdff1aSopenharmony_ci        st1             {v24.8b},  [x3], x2
207cabdff1aSopenharmony_ci        sqxtun          v27.8b,    v6.8h
208cabdff1aSopenharmony_ci        st1             {v25.8b},  [x3], x2
209cabdff1aSopenharmony_ci        st1             {v26.8b},  [x3], x2
210cabdff1aSopenharmony_ci        st1             {v27.8b},  [x3], x2
211cabdff1aSopenharmony_ci
212cabdff1aSopenharmony_ci        ret
213cabdff1aSopenharmony_ciendfunc
214cabdff1aSopenharmony_ci
215cabdff1aSopenharmony_cifunction ff_vp8_idct_dc_add4y_neon, export=1
216cabdff1aSopenharmony_ci        movi            v0.16b,  #0
217cabdff1aSopenharmony_ci        mov             x3,  #32
218cabdff1aSopenharmony_ci        ld1r            {v16.4h},    [x1]
219cabdff1aSopenharmony_ci        st1             {v0.h}[0],   [x1], x3
220cabdff1aSopenharmony_ci        ld1r            {v17.4h},    [x1]
221cabdff1aSopenharmony_ci        st1             {v0.h}[0],   [x1], x3
222cabdff1aSopenharmony_ci        zip1            v16.2d,      v16.2d, v17.2d
223cabdff1aSopenharmony_ci        ld1r            {v18.4h},    [x1]
224cabdff1aSopenharmony_ci        st1             {v0.h}[0],   [x1], x3
225cabdff1aSopenharmony_ci        ld1r            {v19.4h},    [x1]
226cabdff1aSopenharmony_ci        st1             {v0.h}[0],   [x1], x3
227cabdff1aSopenharmony_ci        zip1            v18.2d,      v18.2d, v19.2d
228cabdff1aSopenharmony_ci        srshr           v16.8h,      v16.8h,  #3            // dc >>= 3
229cabdff1aSopenharmony_ci        ld1             {v0.16b},     [x0], x2
230cabdff1aSopenharmony_ci        srshr           v18.8h,       v18.8h,  #3
231cabdff1aSopenharmony_ci        ld1             {v1.16b},     [x0], x2
232cabdff1aSopenharmony_ci        uaddw           v20.8h,       v16.8h,  v0.8b
233cabdff1aSopenharmony_ci        ld1             {v2.16b},     [x0], x2
234cabdff1aSopenharmony_ci        uaddw2          v0.8h,        v18.8h,   v0.16b
235cabdff1aSopenharmony_ci        ld1             {v3.16b},     [x0], x2
236cabdff1aSopenharmony_ci        uaddw           v21.8h, v16.8h,  v1.8b
237cabdff1aSopenharmony_ci        uaddw2          v1.8h,  v18.8h,  v1.16b
238cabdff1aSopenharmony_ci        uaddw           v22.8h, v16.8h,  v2.8b
239cabdff1aSopenharmony_ci        uaddw2          v2.8h,  v18.8h,  v2.16b
240cabdff1aSopenharmony_ci        uaddw           v23.8h, v16.8h,  v3.8b
241cabdff1aSopenharmony_ci        uaddw2          v3.8h,  v18.8h,  v3.16b
242cabdff1aSopenharmony_ci        sub             x0,  x0,  x2,  lsl #2
243cabdff1aSopenharmony_ci        sqxtun          v20.8b,  v20.8h
244cabdff1aSopenharmony_ci        sqxtun2         v20.16b, v0.8h
245cabdff1aSopenharmony_ci        sqxtun          v21.8b,  v21.8h
246cabdff1aSopenharmony_ci        sqxtun2         v21.16b, v1.8h
247cabdff1aSopenharmony_ci        sqxtun          v22.8b,  v22.8h
248cabdff1aSopenharmony_ci        st1             {v20.16b},    [x0], x2
249cabdff1aSopenharmony_ci        sqxtun2         v22.16b, v2.8h
250cabdff1aSopenharmony_ci        st1             {v21.16b},    [x0], x2
251cabdff1aSopenharmony_ci        sqxtun          v23.8b,  v23.8h
252cabdff1aSopenharmony_ci        st1             {v22.16b},    [x0], x2
253cabdff1aSopenharmony_ci        sqxtun2         v23.16b, v3.8h
254cabdff1aSopenharmony_ci        st1             {v23.16b},    [x0], x2
255cabdff1aSopenharmony_ci
256cabdff1aSopenharmony_ci        ret
257cabdff1aSopenharmony_ciendfunc
258cabdff1aSopenharmony_ci
259cabdff1aSopenharmony_cifunction ff_vp8_idct_dc_add_neon, export=1
260cabdff1aSopenharmony_ci        mov             w3,       #0
261cabdff1aSopenharmony_ci        ld1r            {v2.8h},  [x1]
262cabdff1aSopenharmony_ci        strh            w3,       [x1]
263cabdff1aSopenharmony_ci        srshr           v2.8h,  v2.8h,  #3
264cabdff1aSopenharmony_ci        ld1             {v0.s}[0],  [x0], x2
265cabdff1aSopenharmony_ci        ld1             {v0.s}[1],  [x0], x2
266cabdff1aSopenharmony_ci        uaddw           v3.8h,  v2.8h,  v0.8b
267cabdff1aSopenharmony_ci        ld1             {v1.s}[0],  [x0], x2
268cabdff1aSopenharmony_ci        ld1             {v1.s}[1],  [x0], x2
269cabdff1aSopenharmony_ci        uaddw           v4.8h,  v2.8h,  v1.8b
270cabdff1aSopenharmony_ci        sqxtun          v0.8b,  v3.8h
271cabdff1aSopenharmony_ci        sqxtun          v1.8b,  v4.8h
272cabdff1aSopenharmony_ci        sub             x0,  x0,  x2, lsl #2
273cabdff1aSopenharmony_ci        st1             {v0.s}[0],  [x0], x2
274cabdff1aSopenharmony_ci        st1             {v0.s}[1],  [x0], x2
275cabdff1aSopenharmony_ci        st1             {v1.s}[0],  [x0], x2
276cabdff1aSopenharmony_ci        st1             {v1.s}[1],  [x0], x2
277cabdff1aSopenharmony_ci        ret
278cabdff1aSopenharmony_ciendfunc
279cabdff1aSopenharmony_ci
280cabdff1aSopenharmony_ci// Register layout:
281cabdff1aSopenharmony_ci//   P3..Q3 -> v0..v7
282cabdff1aSopenharmony_ci//   flim_E -> v22
283cabdff1aSopenharmony_ci//   flim_I -> v23
284cabdff1aSopenharmony_ci//   hev_thresh -> x5
285cabdff1aSopenharmony_ci//
286cabdff1aSopenharmony_ci.macro  vp8_loop_filter, inner=0, simple=0, hev_thresh
287cabdff1aSopenharmony_ci    .if \simple
288cabdff1aSopenharmony_ci        uabd            v17.16b, v3.16b,  v4.16b      // abs(P0-Q0)
289cabdff1aSopenharmony_ci        uabd            v23.16b, v2.16b,  v5.16b      // abs(P1-Q1)
290cabdff1aSopenharmony_ci        uqadd           v17.16b, v17.16b, v17.16b     // abs(P0-Q0) * 2
291cabdff1aSopenharmony_ci        ushr            v18.16b, v23.16b, #1          // abs(P1-Q1) / 2
292cabdff1aSopenharmony_ci        uqadd           v19.16b, v17.16b,  v18.16b    // (abs(P0-Q0)*2) + (abs(P1-Q1)/2)
293cabdff1aSopenharmony_ci        movi            v21.16b, #0x80
294cabdff1aSopenharmony_ci        cmhs            v16.16b, v22.16b, v19.16b    // (abs(P0-Q0)*2) + (abs(P1-Q1)/2) <= flim
295cabdff1aSopenharmony_ci    .else
296cabdff1aSopenharmony_ci        // calculate hev and normal_limit:
297cabdff1aSopenharmony_ci        uabd            v20.16b, v2.16b,  v3.16b      // abs(P1-P0)
298cabdff1aSopenharmony_ci        uabd            v21.16b, v5.16b,  v4.16b      // abs(Q1-Q0)
299cabdff1aSopenharmony_ci        uabd            v18.16b, v0.16b,  v1.16b      // abs(P3-P2)
300cabdff1aSopenharmony_ci        uabd            v19.16b, v1.16b,  v2.16b      // abs(P2-P1)
301cabdff1aSopenharmony_ci        cmhs            v16.16b, v23.16b, v20.16b     // abs(P1-P0) <= flim_I
302cabdff1aSopenharmony_ci        cmhs            v17.16b, v23.16b, v21.16b     // abs(Q1-Q0) <= flim_I
303cabdff1aSopenharmony_ci        cmhs            v18.16b, v23.16b, v18.16b     // abs(P3-P2) <= flim_I
304cabdff1aSopenharmony_ci        cmhs            v19.16b, v23.16b, v19.16b     // abs(P2-P1) <= flim_I
305cabdff1aSopenharmony_ci        and             v16.16b, v17.16b, v16.16b
306cabdff1aSopenharmony_ci        uabd            v17.16b, v7.16b,  v6.16b      // abs(Q3-Q2)
307cabdff1aSopenharmony_ci        and             v16.16b, v16.16b, v19.16b
308cabdff1aSopenharmony_ci        uabd            v19.16b, v6.16b,  v5.16b      // abs(Q2-Q1)
309cabdff1aSopenharmony_ci        and             v16.16b, v16.16b, v18.16b
310cabdff1aSopenharmony_ci        cmhs            v18.16b, v23.16b, v17.16b     // abs(Q3-Q2) <= flim_I
311cabdff1aSopenharmony_ci        cmhs            v19.16b, v23.16b, v19.16b     // abs(Q2-Q1) <= flim_I
312cabdff1aSopenharmony_ci        uabd            v17.16b, v3.16b,  v4.16b      // abs(P0-Q0)
313cabdff1aSopenharmony_ci        uabd            v23.16b, v2.16b,  v5.16b      // abs(P1-Q1)
314cabdff1aSopenharmony_ci        and             v16.16b, v16.16b, v18.16b
315cabdff1aSopenharmony_ci        uqadd           v17.16b, v17.16b, v17.16b     // abs(P0-Q0) * 2
316cabdff1aSopenharmony_ci        and             v16.16b, v16.16b, v19.16b
317cabdff1aSopenharmony_ci        ushr            v18.16b, v23.16b, #1          // abs(P1-Q1) / 2
318cabdff1aSopenharmony_ci        dup             v23.16b, \hev_thresh          // hev_thresh
319cabdff1aSopenharmony_ci        uqadd           v19.16b, v17.16b, v18.16b     // (abs(P0-Q0)*2) + (abs(P1-Q1)/2)
320cabdff1aSopenharmony_ci        cmhi            v20.16b, v20.16b, v23.16b     // abs(P1-P0) > hev_thresh
321cabdff1aSopenharmony_ci        cmhs            v19.16b, v22.16b, v19.16b     // (abs(P0-Q0)*2) + (abs(P1-Q1)/2) <= flim_E
322cabdff1aSopenharmony_ci        cmhi            v22.16b, v21.16b, v23.16b     // abs(Q1-Q0) > hev_thresh
323cabdff1aSopenharmony_ci        and             v16.16b, v16.16b, v19.16b
324cabdff1aSopenharmony_ci        movi            v21.16b, #0x80
325cabdff1aSopenharmony_ci        orr             v17.16b, v20.16b, v22.16b
326cabdff1aSopenharmony_ci    .endif
327cabdff1aSopenharmony_ci
328cabdff1aSopenharmony_ci        // at this point:
329cabdff1aSopenharmony_ci        //   v16: normal_limit
330cabdff1aSopenharmony_ci        //   v17: hev
331cabdff1aSopenharmony_ci
332cabdff1aSopenharmony_ci        // convert to signed value:
333cabdff1aSopenharmony_ci        eor            v3.16b, v3.16b, v21.16b           // PS0 = P0 ^ 0x80
334cabdff1aSopenharmony_ci        eor            v4.16b, v4.16b, v21.16b           // QS0 = Q0 ^ 0x80
335cabdff1aSopenharmony_ci
336cabdff1aSopenharmony_ci        movi           v20.8h, #3
337cabdff1aSopenharmony_ci        ssubl          v18.8h, v4.8b,  v3.8b             // QS0 - PS0
338cabdff1aSopenharmony_ci        ssubl2         v19.8h, v4.16b, v3.16b            //   (widened to 16bit)
339cabdff1aSopenharmony_ci        eor            v2.16b, v2.16b, v21.16b           // PS1 = P1 ^ 0x80
340cabdff1aSopenharmony_ci        eor            v5.16b, v5.16b, v21.16b           // QS1 = Q1 ^ 0x80
341cabdff1aSopenharmony_ci        mul            v18.8h, v18.8h, v20.8h            // w = 3 * (QS0 - PS0)
342cabdff1aSopenharmony_ci        mul            v19.8h, v19.8h, v20.8h
343cabdff1aSopenharmony_ci
344cabdff1aSopenharmony_ci        sqsub          v20.16b, v2.16b, v5.16b           // clamp(PS1-QS1)
345cabdff1aSopenharmony_ci        movi           v22.16b, #4
346cabdff1aSopenharmony_ci        movi           v23.16b, #3
347cabdff1aSopenharmony_ci    .if \inner
348cabdff1aSopenharmony_ci        and            v20.16b, v20.16b, v17.16b         // if(hev) w += clamp(PS1-QS1)
349cabdff1aSopenharmony_ci    .endif
350cabdff1aSopenharmony_ci        saddw          v18.8h,  v18.8h, v20.8b           // w += clamp(PS1-QS1)
351cabdff1aSopenharmony_ci        saddw2         v19.8h,  v19.8h, v20.16b
352cabdff1aSopenharmony_ci        sqxtn          v18.8b,  v18.8h                   // narrow result back into v18
353cabdff1aSopenharmony_ci        sqxtn2         v18.16b, v19.8h
354cabdff1aSopenharmony_ci    .if !\inner && !\simple
355cabdff1aSopenharmony_ci        eor            v1.16b,  v1.16b,  v21.16b         // PS2 = P2 ^ 0x80
356cabdff1aSopenharmony_ci        eor            v6.16b,  v6.16b,  v21.16b         // QS2 = Q2 ^ 0x80
357cabdff1aSopenharmony_ci    .endif
358cabdff1aSopenharmony_ci        and            v18.16b, v18.16b, v16.16b         // w &= normal_limit
359cabdff1aSopenharmony_ci
360cabdff1aSopenharmony_ci        // registers used at this point..
361cabdff1aSopenharmony_ci        //   v0 -> P3  (don't corrupt)
362cabdff1aSopenharmony_ci        //   v1-v6 -> PS2-QS2
363cabdff1aSopenharmony_ci        //   v7 -> Q3  (don't corrupt)
364cabdff1aSopenharmony_ci        //   v17 -> hev
365cabdff1aSopenharmony_ci        //   v18 -> w
366cabdff1aSopenharmony_ci        //   v21 -> #0x80
367cabdff1aSopenharmony_ci        //   v22 -> #4
368cabdff1aSopenharmony_ci        //   v23 -> #3
369cabdff1aSopenharmony_ci        //   v16, v19, v29 -> unused
370cabdff1aSopenharmony_ci        //
371cabdff1aSopenharmony_ci        // filter_common:   is4tap==1
372cabdff1aSopenharmony_ci        //   c1 = clamp(w + 4) >> 3;
373cabdff1aSopenharmony_ci        //   c2 = clamp(w + 3) >> 3;
374cabdff1aSopenharmony_ci        //   Q0 = s2u(QS0 - c1);
375cabdff1aSopenharmony_ci        //   P0 = s2u(PS0 + c2);
376cabdff1aSopenharmony_ci
377cabdff1aSopenharmony_ci    .if \simple
378cabdff1aSopenharmony_ci        sqadd          v19.16b, v18.16b, v22.16b           // c1 = clamp((w&hev)+4)
379cabdff1aSopenharmony_ci        sqadd          v20.16b, v18.16b, v23.16b           // c2 = clamp((w&hev)+3)
380cabdff1aSopenharmony_ci        sshr           v19.16b, v19.16b, #3                // c1 >>= 3
381cabdff1aSopenharmony_ci        sshr           v20.16b, v20.16b, #3                // c2 >>= 3
382cabdff1aSopenharmony_ci        sqsub          v4.16b,  v4.16b,  v19.16b           // QS0 = clamp(QS0-c1)
383cabdff1aSopenharmony_ci        sqadd          v3.16b,  v3.16b,  v20.16b           // PS0 = clamp(PS0+c2)
384cabdff1aSopenharmony_ci        eor            v4.16b,  v4.16b,  v21.16b           // Q0 = QS0 ^ 0x80
385cabdff1aSopenharmony_ci        eor            v3.16b,  v3.16b,  v21.16b           // P0 = PS0 ^ 0x80
386cabdff1aSopenharmony_ci        eor            v5.16b,  v5.16b,  v21.16b           // Q1 = QS1 ^ 0x80
387cabdff1aSopenharmony_ci        eor            v2.16b,  v2.16b,  v21.16b           // P1 = PS1 ^ 0x80
388cabdff1aSopenharmony_ci    .elseif \inner
389cabdff1aSopenharmony_ci        // the !is4tap case of filter_common, only used for inner blocks
390cabdff1aSopenharmony_ci        //   c3 = ((c1&~hev) + 1) >> 1;
391cabdff1aSopenharmony_ci        //   Q1 = s2u(QS1 - c3);
392cabdff1aSopenharmony_ci        //   P1 = s2u(PS1 + c3);
393cabdff1aSopenharmony_ci        sqadd          v19.16b, v18.16b, v22.16b           // c1 = clamp((w&hev)+4)
394cabdff1aSopenharmony_ci        sqadd          v20.16b, v18.16b, v23.16b           // c2 = clamp((w&hev)+3)
395cabdff1aSopenharmony_ci        sshr           v19.16b, v19.16b, #3                // c1 >>= 3
396cabdff1aSopenharmony_ci        sshr           v20.16b, v20.16b, #3                // c2 >>= 3
397cabdff1aSopenharmony_ci        sqsub          v4.16b,  v4.16b,  v19.16b           // QS0 = clamp(QS0-c1)
398cabdff1aSopenharmony_ci        sqadd          v3.16b,  v3.16b,  v20.16b           // PS0 = clamp(PS0+c2)
399cabdff1aSopenharmony_ci        bic            v19.16b, v19.16b, v17.16b           // c1 & ~hev
400cabdff1aSopenharmony_ci        eor            v4.16b,  v4.16b,  v21.16b           // Q0 = QS0 ^ 0x80
401cabdff1aSopenharmony_ci        srshr          v19.16b, v19.16b, #1                // c3 >>= 1
402cabdff1aSopenharmony_ci        eor            v3.16b,  v3.16b,  v21.16b           // P0 = PS0 ^ 0x80
403cabdff1aSopenharmony_ci        sqsub          v5.16b,  v5.16b,  v19.16b           // QS1 = clamp(QS1-c3)
404cabdff1aSopenharmony_ci        sqadd          v2.16b,  v2.16b,  v19.16b           // PS1 = clamp(PS1+c3)
405cabdff1aSopenharmony_ci        eor            v5.16b,  v5.16b,  v21.16b           // Q1 = QS1 ^ 0x80
406cabdff1aSopenharmony_ci        eor            v2.16b,  v2.16b,  v21.16b           // P1 = PS1 ^ 0x80
407cabdff1aSopenharmony_ci    .else
408cabdff1aSopenharmony_ci        and            v20.16b, v18.16b, v17.16b           // w & hev
409cabdff1aSopenharmony_ci        sqadd          v19.16b, v20.16b, v22.16b           // c1 = clamp((w&hev)+4)
410cabdff1aSopenharmony_ci        sqadd          v20.16b, v20.16b, v23.16b           // c2 = clamp((w&hev)+3)
411cabdff1aSopenharmony_ci        sshr           v19.16b, v19.16b, #3                // c1 >>= 3
412cabdff1aSopenharmony_ci        sshr           v20.16b, v20.16b, #3                // c2 >>= 3
413cabdff1aSopenharmony_ci        bic            v18.16b, v18.16b, v17.16b           // w &= ~hev
414cabdff1aSopenharmony_ci        sqsub          v4.16b,  v4.16b,  v19.16b           // QS0 = clamp(QS0-c1)
415cabdff1aSopenharmony_ci        sqadd          v3.16b,  v3.16b,  v20.16b           // PS0 = clamp(PS0+c2)
416cabdff1aSopenharmony_ci
417cabdff1aSopenharmony_ci        // filter_mbedge:
418cabdff1aSopenharmony_ci        //   a = clamp((27*w + 63) >> 7);
419cabdff1aSopenharmony_ci        //   Q0 = s2u(QS0 - a);
420cabdff1aSopenharmony_ci        //   P0 = s2u(PS0 + a);
421cabdff1aSopenharmony_ci        //   a = clamp((18*w + 63) >> 7);
422cabdff1aSopenharmony_ci        //   Q1 = s2u(QS1 - a);
423cabdff1aSopenharmony_ci        //   P1 = s2u(PS1 + a);
424cabdff1aSopenharmony_ci        //   a = clamp((9*w + 63) >> 7);
425cabdff1aSopenharmony_ci        //   Q2 = s2u(QS2 - a);
426cabdff1aSopenharmony_ci        //   P2 = s2u(PS2 + a);
427cabdff1aSopenharmony_ci        movi           v17.8h,  #63
428cabdff1aSopenharmony_ci        sshll          v22.8h,  v18.8b, #3
429cabdff1aSopenharmony_ci        sshll2         v23.8h,  v18.16b, #3
430cabdff1aSopenharmony_ci        saddw          v22.8h,  v22.8h, v18.8b
431cabdff1aSopenharmony_ci        saddw2         v23.8h,  v23.8h, v18.16b
432cabdff1aSopenharmony_ci        add            v16.8h,  v17.8h, v22.8h
433cabdff1aSopenharmony_ci        add            v17.8h,  v17.8h, v23.8h           //  9*w + 63
434cabdff1aSopenharmony_ci        add            v19.8h,  v16.8h, v22.8h
435cabdff1aSopenharmony_ci        add            v20.8h,  v17.8h, v23.8h           // 18*w + 63
436cabdff1aSopenharmony_ci        add            v22.8h,  v19.8h, v22.8h
437cabdff1aSopenharmony_ci        add            v23.8h,  v20.8h, v23.8h           // 27*w + 63
438cabdff1aSopenharmony_ci        sqshrn         v16.8b,  v16.8h,  #7
439cabdff1aSopenharmony_ci        sqshrn2        v16.16b, v17.8h, #7              // clamp(( 9*w + 63)>>7)
440cabdff1aSopenharmony_ci        sqshrn         v19.8b,  v19.8h, #7
441cabdff1aSopenharmony_ci        sqshrn2        v19.16b, v20.8h, #7              // clamp((18*w + 63)>>7)
442cabdff1aSopenharmony_ci        sqshrn         v22.8b,  v22.8h, #7
443cabdff1aSopenharmony_ci        sqshrn2        v22.16b, v23.8h, #7              // clamp((27*w + 63)>>7)
444cabdff1aSopenharmony_ci        sqadd          v1.16b,  v1.16b,  v16.16b        // PS2 = clamp(PS2+a)
445cabdff1aSopenharmony_ci        sqsub          v6.16b,  v6.16b,  v16.16b        // QS2 = clamp(QS2-a)
446cabdff1aSopenharmony_ci        sqadd          v2.16b,  v2.16b,  v19.16b        // PS1 = clamp(PS1+a)
447cabdff1aSopenharmony_ci        sqsub          v5.16b,  v5.16b,  v19.16b        // QS1 = clamp(QS1-a)
448cabdff1aSopenharmony_ci        sqadd          v3.16b,  v3.16b,  v22.16b        // PS0 = clamp(PS0+a)
449cabdff1aSopenharmony_ci        sqsub          v4.16b,  v4.16b,  v22.16b        // QS0 = clamp(QS0-a)
450cabdff1aSopenharmony_ci        eor            v3.16b,  v3.16b,  v21.16b        // P0 = PS0 ^ 0x80
451cabdff1aSopenharmony_ci        eor            v4.16b,  v4.16b,  v21.16b        // Q0 = QS0 ^ 0x80
452cabdff1aSopenharmony_ci        eor            v2.16b,  v2.16b,  v21.16b        // P1 = PS1 ^ 0x80
453cabdff1aSopenharmony_ci        eor            v5.16b,  v5.16b,  v21.16b        // Q1 = QS1 ^ 0x80
454cabdff1aSopenharmony_ci        eor            v1.16b,  v1.16b,  v21.16b        // P2 = PS2 ^ 0x80
455cabdff1aSopenharmony_ci        eor            v6.16b,  v6.16b,  v21.16b        // Q2 = QS2 ^ 0x80
456cabdff1aSopenharmony_ci    .endif
457cabdff1aSopenharmony_ci.endm
458cabdff1aSopenharmony_ci
459cabdff1aSopenharmony_ci.macro  vp8_v_loop_filter16 name, inner=0, simple=0
460cabdff1aSopenharmony_cifunction ff_vp8_v_loop_filter16\name\()_neon, export=1
461cabdff1aSopenharmony_ci        sub             x0,  x0,  x1,  lsl #1+!\simple
462cabdff1aSopenharmony_ci
463cabdff1aSopenharmony_ci        // Load pixels:
464cabdff1aSopenharmony_ci    .if !\simple
465cabdff1aSopenharmony_ci        ld1             {v0.16b},     [x0], x1 // P3
466cabdff1aSopenharmony_ci        ld1             {v1.16b},     [x0], x1 // P2
467cabdff1aSopenharmony_ci    .endif
468cabdff1aSopenharmony_ci        ld1             {v2.16b},     [x0], x1 // P1
469cabdff1aSopenharmony_ci        ld1             {v3.16b},     [x0], x1 // P0
470cabdff1aSopenharmony_ci        ld1             {v4.16b},     [x0], x1 // Q0
471cabdff1aSopenharmony_ci        ld1             {v5.16b},     [x0], x1 // Q1
472cabdff1aSopenharmony_ci    .if !\simple
473cabdff1aSopenharmony_ci        ld1             {v6.16b},     [x0], x1 // Q2
474cabdff1aSopenharmony_ci        ld1             {v7.16b},     [x0]     // Q3
475cabdff1aSopenharmony_ci        dup             v23.16b, w3                 // flim_I
476cabdff1aSopenharmony_ci    .endif
477cabdff1aSopenharmony_ci        dup             v22.16b, w2                 // flim_E
478cabdff1aSopenharmony_ci
479cabdff1aSopenharmony_ci        vp8_loop_filter inner=\inner, simple=\simple, hev_thresh=w4
480cabdff1aSopenharmony_ci
481cabdff1aSopenharmony_ci        // back up to P2:  dst -= stride * 6
482cabdff1aSopenharmony_ci        sub             x0,  x0,  x1,  lsl #2
483cabdff1aSopenharmony_ci    .if !\simple
484cabdff1aSopenharmony_ci        sub             x0,  x0,  x1,  lsl #1
485cabdff1aSopenharmony_ci
486cabdff1aSopenharmony_ci        // Store pixels:
487cabdff1aSopenharmony_ci        st1             {v1.16b},     [x0], x1 // P2
488cabdff1aSopenharmony_ci    .endif
489cabdff1aSopenharmony_ci        st1             {v2.16b},     [x0], x1 // P1
490cabdff1aSopenharmony_ci        st1             {v3.16b},     [x0], x1 // P0
491cabdff1aSopenharmony_ci        st1             {v4.16b},     [x0], x1 // Q0
492cabdff1aSopenharmony_ci        st1             {v5.16b},     [x0], x1 // Q1
493cabdff1aSopenharmony_ci    .if !\simple
494cabdff1aSopenharmony_ci        st1             {v6.16b},     [x0]     // Q2
495cabdff1aSopenharmony_ci    .endif
496cabdff1aSopenharmony_ci
497cabdff1aSopenharmony_ci        ret
498cabdff1aSopenharmony_ciendfunc
499cabdff1aSopenharmony_ci.endm
500cabdff1aSopenharmony_ci
501cabdff1aSopenharmony_civp8_v_loop_filter16
502cabdff1aSopenharmony_civp8_v_loop_filter16 _inner,  inner=1
503cabdff1aSopenharmony_civp8_v_loop_filter16 _simple, simple=1
504cabdff1aSopenharmony_ci
505cabdff1aSopenharmony_ci.macro  vp8_v_loop_filter8uv name, inner=0
506cabdff1aSopenharmony_cifunction ff_vp8_v_loop_filter8uv\name\()_neon, export=1
507cabdff1aSopenharmony_ci        sub             x0,  x0,  x2,  lsl #2
508cabdff1aSopenharmony_ci        sub             x1,  x1,  x2,  lsl #2
509cabdff1aSopenharmony_ci        // Load pixels:
510cabdff1aSopenharmony_ci        ld1          {v0.d}[0],     [x0], x2  // P3
511cabdff1aSopenharmony_ci        ld1          {v0.d}[1],     [x1], x2  // P3
512cabdff1aSopenharmony_ci        ld1          {v1.d}[0],     [x0], x2  // P2
513cabdff1aSopenharmony_ci        ld1          {v1.d}[1],     [x1], x2  // P2
514cabdff1aSopenharmony_ci        ld1          {v2.d}[0],     [x0], x2  // P1
515cabdff1aSopenharmony_ci        ld1          {v2.d}[1],     [x1], x2  // P1
516cabdff1aSopenharmony_ci        ld1          {v3.d}[0],     [x0], x2  // P0
517cabdff1aSopenharmony_ci        ld1          {v3.d}[1],     [x1], x2  // P0
518cabdff1aSopenharmony_ci        ld1          {v4.d}[0],     [x0], x2  // Q0
519cabdff1aSopenharmony_ci        ld1          {v4.d}[1],     [x1], x2  // Q0
520cabdff1aSopenharmony_ci        ld1          {v5.d}[0],     [x0], x2  // Q1
521cabdff1aSopenharmony_ci        ld1          {v5.d}[1],     [x1], x2  // Q1
522cabdff1aSopenharmony_ci        ld1          {v6.d}[0],     [x0], x2  // Q2
523cabdff1aSopenharmony_ci        ld1          {v6.d}[1],     [x1], x2  // Q2
524cabdff1aSopenharmony_ci        ld1          {v7.d}[0],     [x0]      // Q3
525cabdff1aSopenharmony_ci        ld1          {v7.d}[1],     [x1]      // Q3
526cabdff1aSopenharmony_ci
527cabdff1aSopenharmony_ci        dup          v22.16b, w3                 // flim_E
528cabdff1aSopenharmony_ci        dup          v23.16b, w4                 // flim_I
529cabdff1aSopenharmony_ci
530cabdff1aSopenharmony_ci        vp8_loop_filter inner=\inner, hev_thresh=w5
531cabdff1aSopenharmony_ci
532cabdff1aSopenharmony_ci        // back up to P2:  u,v -= stride * 6
533cabdff1aSopenharmony_ci        sub          x0,  x0,  x2,  lsl #2
534cabdff1aSopenharmony_ci        sub          x1,  x1,  x2,  lsl #2
535cabdff1aSopenharmony_ci        sub          x0,  x0,  x2,  lsl #1
536cabdff1aSopenharmony_ci        sub          x1,  x1,  x2,  lsl #1
537cabdff1aSopenharmony_ci
538cabdff1aSopenharmony_ci        // Store pixels:
539cabdff1aSopenharmony_ci
540cabdff1aSopenharmony_ci        st1          {v1.d}[0],     [x0], x2  // P2
541cabdff1aSopenharmony_ci        st1          {v1.d}[1],     [x1], x2  // P2
542cabdff1aSopenharmony_ci        st1          {v2.d}[0],     [x0], x2  // P1
543cabdff1aSopenharmony_ci        st1          {v2.d}[1],     [x1], x2  // P1
544cabdff1aSopenharmony_ci        st1          {v3.d}[0],     [x0], x2  // P0
545cabdff1aSopenharmony_ci        st1          {v3.d}[1],     [x1], x2  // P0
546cabdff1aSopenharmony_ci        st1          {v4.d}[0],     [x0], x2  // Q0
547cabdff1aSopenharmony_ci        st1          {v4.d}[1],     [x1], x2  // Q0
548cabdff1aSopenharmony_ci        st1          {v5.d}[0],     [x0], x2  // Q1
549cabdff1aSopenharmony_ci        st1          {v5.d}[1],     [x1], x2  // Q1
550cabdff1aSopenharmony_ci        st1          {v6.d}[0],     [x0]      // Q2
551cabdff1aSopenharmony_ci        st1          {v6.d}[1],     [x1]      // Q2
552cabdff1aSopenharmony_ci
553cabdff1aSopenharmony_ci        ret
554cabdff1aSopenharmony_ciendfunc
555cabdff1aSopenharmony_ci.endm
556cabdff1aSopenharmony_ci
557cabdff1aSopenharmony_civp8_v_loop_filter8uv
558cabdff1aSopenharmony_civp8_v_loop_filter8uv _inner, inner=1
559cabdff1aSopenharmony_ci
560cabdff1aSopenharmony_ci.macro  vp8_h_loop_filter16 name, inner=0, simple=0
561cabdff1aSopenharmony_cifunction ff_vp8_h_loop_filter16\name\()_neon, export=1
562cabdff1aSopenharmony_ci
563cabdff1aSopenharmony_ci        sub             x0,  x0,  #4
564cabdff1aSopenharmony_ci        // Load pixels:
565cabdff1aSopenharmony_ci        ld1             {v0.d}[0], [x0], x1
566cabdff1aSopenharmony_ci        ld1             {v1.d}[0], [x0], x1
567cabdff1aSopenharmony_ci        ld1             {v2.d}[0], [x0], x1
568cabdff1aSopenharmony_ci        ld1             {v3.d}[0], [x0], x1
569cabdff1aSopenharmony_ci        ld1             {v4.d}[0], [x0], x1
570cabdff1aSopenharmony_ci        ld1             {v5.d}[0], [x0], x1
571cabdff1aSopenharmony_ci        ld1             {v6.d}[0], [x0], x1
572cabdff1aSopenharmony_ci        ld1             {v7.d}[0], [x0], x1
573cabdff1aSopenharmony_ci        ld1             {v0.d}[1], [x0], x1
574cabdff1aSopenharmony_ci        ld1             {v1.d}[1], [x0], x1
575cabdff1aSopenharmony_ci        ld1             {v2.d}[1], [x0], x1
576cabdff1aSopenharmony_ci        ld1             {v3.d}[1], [x0], x1
577cabdff1aSopenharmony_ci        ld1             {v4.d}[1], [x0], x1
578cabdff1aSopenharmony_ci        ld1             {v5.d}[1], [x0], x1
579cabdff1aSopenharmony_ci        ld1             {v6.d}[1], [x0], x1
580cabdff1aSopenharmony_ci        ld1             {v7.d}[1], [x0], x1
581cabdff1aSopenharmony_ci
582cabdff1aSopenharmony_ci        transpose_8x16B   v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
583cabdff1aSopenharmony_ci
584cabdff1aSopenharmony_ci        dup             v22.16b, w2                 // flim_E
585cabdff1aSopenharmony_ci    .if !\simple
586cabdff1aSopenharmony_ci        dup             v23.16b, w3                 // flim_I
587cabdff1aSopenharmony_ci    .endif
588cabdff1aSopenharmony_ci
589cabdff1aSopenharmony_ci        vp8_loop_filter inner=\inner, simple=\simple, hev_thresh=w4
590cabdff1aSopenharmony_ci
591cabdff1aSopenharmony_ci        sub             x0,  x0,  x1, lsl #4    // backup 16 rows
592cabdff1aSopenharmony_ci
593cabdff1aSopenharmony_ci        transpose_8x16B   v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
594cabdff1aSopenharmony_ci
595cabdff1aSopenharmony_ci        // Store pixels:
596cabdff1aSopenharmony_ci        st1             {v0.d}[0], [x0], x1
597cabdff1aSopenharmony_ci        st1             {v1.d}[0], [x0], x1
598cabdff1aSopenharmony_ci        st1             {v2.d}[0], [x0], x1
599cabdff1aSopenharmony_ci        st1             {v3.d}[0], [x0], x1
600cabdff1aSopenharmony_ci        st1             {v4.d}[0], [x0], x1
601cabdff1aSopenharmony_ci        st1             {v5.d}[0], [x0], x1
602cabdff1aSopenharmony_ci        st1             {v6.d}[0], [x0], x1
603cabdff1aSopenharmony_ci        st1             {v7.d}[0], [x0], x1
604cabdff1aSopenharmony_ci        st1             {v0.d}[1], [x0], x1
605cabdff1aSopenharmony_ci        st1             {v1.d}[1], [x0], x1
606cabdff1aSopenharmony_ci        st1             {v2.d}[1], [x0], x1
607cabdff1aSopenharmony_ci        st1             {v3.d}[1], [x0], x1
608cabdff1aSopenharmony_ci        st1             {v4.d}[1], [x0], x1
609cabdff1aSopenharmony_ci        st1             {v5.d}[1], [x0], x1
610cabdff1aSopenharmony_ci        st1             {v6.d}[1], [x0], x1
611cabdff1aSopenharmony_ci        st1             {v7.d}[1], [x0]
612cabdff1aSopenharmony_ci
613cabdff1aSopenharmony_ci        ret
614cabdff1aSopenharmony_ciendfunc
615cabdff1aSopenharmony_ci.endm
616cabdff1aSopenharmony_ci
617cabdff1aSopenharmony_civp8_h_loop_filter16
618cabdff1aSopenharmony_civp8_h_loop_filter16 _inner,  inner=1
619cabdff1aSopenharmony_civp8_h_loop_filter16 _simple, simple=1
620cabdff1aSopenharmony_ci
621cabdff1aSopenharmony_ci.macro  vp8_h_loop_filter8uv name, inner=0
622cabdff1aSopenharmony_cifunction ff_vp8_h_loop_filter8uv\name\()_neon, export=1
623cabdff1aSopenharmony_ci        sub             x0,  x0,  #4
624cabdff1aSopenharmony_ci        sub             x1,  x1,  #4
625cabdff1aSopenharmony_ci
626cabdff1aSopenharmony_ci        // Load pixels:
627cabdff1aSopenharmony_ci        ld1          {v0.d}[0],     [x0], x2 // load u
628cabdff1aSopenharmony_ci        ld1          {v0.d}[1],     [x1], x2 // load v
629cabdff1aSopenharmony_ci        ld1          {v1.d}[0],     [x0], x2
630cabdff1aSopenharmony_ci        ld1          {v1.d}[1],     [x1], x2
631cabdff1aSopenharmony_ci        ld1          {v2.d}[0],     [x0], x2
632cabdff1aSopenharmony_ci        ld1          {v2.d}[1],     [x1], x2
633cabdff1aSopenharmony_ci        ld1          {v3.d}[0],     [x0], x2
634cabdff1aSopenharmony_ci        ld1          {v3.d}[1],     [x1], x2
635cabdff1aSopenharmony_ci        ld1          {v4.d}[0],     [x0], x2
636cabdff1aSopenharmony_ci        ld1          {v4.d}[1],     [x1], x2
637cabdff1aSopenharmony_ci        ld1          {v5.d}[0],     [x0], x2
638cabdff1aSopenharmony_ci        ld1          {v5.d}[1],     [x1], x2
639cabdff1aSopenharmony_ci        ld1          {v6.d}[0],     [x0], x2
640cabdff1aSopenharmony_ci        ld1          {v6.d}[1],     [x1], x2
641cabdff1aSopenharmony_ci        ld1          {v7.d}[0],     [x0], x2
642cabdff1aSopenharmony_ci        ld1          {v7.d}[1],     [x1], x2
643cabdff1aSopenharmony_ci
644cabdff1aSopenharmony_ci        transpose_8x16B   v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
645cabdff1aSopenharmony_ci
646cabdff1aSopenharmony_ci        dup             v22.16b, w3                 // flim_E
647cabdff1aSopenharmony_ci        dup             v23.16b, w4                 // flim_I
648cabdff1aSopenharmony_ci
649cabdff1aSopenharmony_ci        vp8_loop_filter inner=\inner, hev_thresh=w5
650cabdff1aSopenharmony_ci
651cabdff1aSopenharmony_ci        sub             x0,  x0,  x2, lsl #3    // backup u 8 rows
652cabdff1aSopenharmony_ci        sub             x1,  x1,  x2, lsl #3    // backup v 8 rows
653cabdff1aSopenharmony_ci
654cabdff1aSopenharmony_ci        transpose_8x16B   v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
655cabdff1aSopenharmony_ci
656cabdff1aSopenharmony_ci        // Store pixels:
657cabdff1aSopenharmony_ci        st1          {v0.d}[0],     [x0], x2 // load u
658cabdff1aSopenharmony_ci        st1          {v0.d}[1],     [x1], x2 // load v
659cabdff1aSopenharmony_ci        st1          {v1.d}[0],     [x0], x2
660cabdff1aSopenharmony_ci        st1          {v1.d}[1],     [x1], x2
661cabdff1aSopenharmony_ci        st1          {v2.d}[0],     [x0], x2
662cabdff1aSopenharmony_ci        st1          {v2.d}[1],     [x1], x2
663cabdff1aSopenharmony_ci        st1          {v3.d}[0],     [x0], x2
664cabdff1aSopenharmony_ci        st1          {v3.d}[1],     [x1], x2
665cabdff1aSopenharmony_ci        st1          {v4.d}[0],     [x0], x2
666cabdff1aSopenharmony_ci        st1          {v4.d}[1],     [x1], x2
667cabdff1aSopenharmony_ci        st1          {v5.d}[0],     [x0], x2
668cabdff1aSopenharmony_ci        st1          {v5.d}[1],     [x1], x2
669cabdff1aSopenharmony_ci        st1          {v6.d}[0],     [x0], x2
670cabdff1aSopenharmony_ci        st1          {v6.d}[1],     [x1], x2
671cabdff1aSopenharmony_ci        st1          {v7.d}[0],     [x0]
672cabdff1aSopenharmony_ci        st1          {v7.d}[1],     [x1]
673cabdff1aSopenharmony_ci
674cabdff1aSopenharmony_ci        ret
675cabdff1aSopenharmony_ci
676cabdff1aSopenharmony_ciendfunc
677cabdff1aSopenharmony_ci.endm
678cabdff1aSopenharmony_ci
679cabdff1aSopenharmony_civp8_h_loop_filter8uv
680cabdff1aSopenharmony_civp8_h_loop_filter8uv _inner, inner=1
681cabdff1aSopenharmony_ci
682cabdff1aSopenharmony_ci
683cabdff1aSopenharmony_cifunction ff_put_vp8_pixels16_neon, export=1
684cabdff1aSopenharmony_ci1:
685cabdff1aSopenharmony_ci        subs            w4, w4, #4
686cabdff1aSopenharmony_ci        ld1             {v0.16b},     [x2], x3
687cabdff1aSopenharmony_ci        ld1             {v1.16b},     [x2], x3
688cabdff1aSopenharmony_ci        ld1             {v2.16b},     [x2], x3
689cabdff1aSopenharmony_ci        ld1             {v3.16b},     [x2], x3
690cabdff1aSopenharmony_ci        st1             {v0.16b},     [x0], x1
691cabdff1aSopenharmony_ci        st1             {v1.16b},     [x0], x1
692cabdff1aSopenharmony_ci        st1             {v2.16b},     [x0], x1
693cabdff1aSopenharmony_ci        st1             {v3.16b},     [x0], x1
694cabdff1aSopenharmony_ci        b.gt            1b
695cabdff1aSopenharmony_ci        ret
696cabdff1aSopenharmony_ciendfunc
697cabdff1aSopenharmony_ci
698cabdff1aSopenharmony_cifunction ff_put_vp8_pixels8_neon, export=1
699cabdff1aSopenharmony_ci1:
700cabdff1aSopenharmony_ci        subs            w4, w4, #4
701cabdff1aSopenharmony_ci        ld1             {v0.8b},   [x2], x3
702cabdff1aSopenharmony_ci        ld1             {v0.d}[1], [x2], x3
703cabdff1aSopenharmony_ci        ld1             {v1.8b},   [x2], x3
704cabdff1aSopenharmony_ci        ld1             {v1.d}[1], [x2], x3
705cabdff1aSopenharmony_ci        st1             {v0.8b},   [x0], x1
706cabdff1aSopenharmony_ci        st1             {v0.d}[1], [x0], x1
707cabdff1aSopenharmony_ci        st1             {v1.8b},   [x0], x1
708cabdff1aSopenharmony_ci        st1             {v1.d}[1], [x0], x1
709cabdff1aSopenharmony_ci        b.gt            1b
710cabdff1aSopenharmony_ci        ret
711cabdff1aSopenharmony_ciendfunc
712cabdff1aSopenharmony_ci
713cabdff1aSopenharmony_ci/* 4/6-tap 8th-pel MC */
714cabdff1aSopenharmony_ci
715cabdff1aSopenharmony_ci.macro  vp8_epel8_h6    d,   s0,   s1
716cabdff1aSopenharmony_ci        ext             v22.8b, \s0\().8b,  \s1\().8b,  #1
717cabdff1aSopenharmony_ci        uxtl            v18.8h, \s0\().8b
718cabdff1aSopenharmony_ci        ext             v23.8b, \s0\().8b,  \s1\().8b,  #2
719cabdff1aSopenharmony_ci        uxtl            v19.8h, v22.8b
720cabdff1aSopenharmony_ci        ext             v24.8b, \s0\().8b,  \s1\().8b,  #3
721cabdff1aSopenharmony_ci        uxtl            v21.8h, v23.8b
722cabdff1aSopenharmony_ci        ext             v25.8b, \s0\().8b,  \s1\().8b,  #4
723cabdff1aSopenharmony_ci        uxtl            v22.8h, v24.8b
724cabdff1aSopenharmony_ci        ext             v26.8b, \s0\().8b,  \s1\().8b,  #5
725cabdff1aSopenharmony_ci        uxtl            v25.8h, v25.8b
726cabdff1aSopenharmony_ci        mul             v21.8h, v21.8h, v0.h[2]
727cabdff1aSopenharmony_ci        uxtl            v26.8h, v26.8b
728cabdff1aSopenharmony_ci        mul             v22.8h, v22.8h, v0.h[3]
729cabdff1aSopenharmony_ci        mls             v21.8h, v19.8h, v0.h[1]
730cabdff1aSopenharmony_ci        mls             v22.8h, v25.8h, v0.h[4]
731cabdff1aSopenharmony_ci        mla             v21.8h, v18.8h, v0.h[0]
732cabdff1aSopenharmony_ci        mla             v22.8h, v26.8h, v0.h[5]
733cabdff1aSopenharmony_ci        sqadd           v22.8h, v21.8h, v22.8h
734cabdff1aSopenharmony_ci        sqrshrun        \d\().8b, v22.8h, #7
735cabdff1aSopenharmony_ci.endm
736cabdff1aSopenharmony_ci
737cabdff1aSopenharmony_ci.macro  vp8_epel16_h6   d0,  v0,  v1
738cabdff1aSopenharmony_ci        ext             v22.16b, \v0\().16b, \v1\().16b, #3
739cabdff1aSopenharmony_ci        ext             v23.16b, \v0\().16b, \v1\().16b, #4
740cabdff1aSopenharmony_ci        uxtl            v19.8h,  v22.8b
741cabdff1aSopenharmony_ci        uxtl2           v22.8h,  v22.16b
742cabdff1aSopenharmony_ci        ext             v3.16b,  \v0\().16b, \v1\().16b, #2
743cabdff1aSopenharmony_ci        uxtl            v20.8h,  v23.8b
744cabdff1aSopenharmony_ci        uxtl2           v23.8h,  v23.16b
745cabdff1aSopenharmony_ci        ext             v16.16b, \v0\().16b, \v1\().16b, #1
746cabdff1aSopenharmony_ci        uxtl            v18.8h,  v3.8b
747cabdff1aSopenharmony_ci        uxtl2           v3.8h,   v3.16b
748cabdff1aSopenharmony_ci        ext             v2.16b,  \v0\().16b, \v1\().16b, #5
749cabdff1aSopenharmony_ci        uxtl            v21.8h,  v2.8b
750cabdff1aSopenharmony_ci        uxtl2           v2.8h,   v2.16b
751cabdff1aSopenharmony_ci        uxtl            v17.8h,  v16.8b
752cabdff1aSopenharmony_ci        uxtl2           v16.8h,  v16.16b
753cabdff1aSopenharmony_ci        mul             v19.8h,  v19.8h, v0.h[3]
754cabdff1aSopenharmony_ci        mul             v18.8h,  v18.8h, v0.h[2]
755cabdff1aSopenharmony_ci        mul             v3.8h,   v3.8h,  v0.h[2]
756cabdff1aSopenharmony_ci        mul             v22.8h,  v22.8h, v0.h[3]
757cabdff1aSopenharmony_ci        mls             v19.8h,  v20.8h, v0.h[4]
758cabdff1aSopenharmony_ci        uxtl            v20.8h,  \v0\().8b
759cabdff1aSopenharmony_ci        uxtl2           v1.8h,   \v0\().16b
760cabdff1aSopenharmony_ci        mls             v18.8h,  v17.8h, v0.h[1]
761cabdff1aSopenharmony_ci        mls             v3.8h,   v16.8h, v0.h[1]
762cabdff1aSopenharmony_ci        mls             v22.8h,  v23.8h, v0.h[4]
763cabdff1aSopenharmony_ci        mla             v18.8h,  v20.8h, v0.h[0]
764cabdff1aSopenharmony_ci        mla             v19.8h,  v21.8h, v0.h[5]
765cabdff1aSopenharmony_ci        mla             v3.8h,   v1.8h,  v0.h[0]
766cabdff1aSopenharmony_ci        mla             v22.8h,  v2.8h,  v0.h[5]
767cabdff1aSopenharmony_ci        sqadd           v19.8h,  v18.8h, v19.8h
768cabdff1aSopenharmony_ci        sqadd           v22.8h,  v3.8h,  v22.8h
769cabdff1aSopenharmony_ci        sqrshrun        \d0\().8b,  v19.8h, #7
770cabdff1aSopenharmony_ci        sqrshrun2       \d0\().16b, v22.8h, #7
771cabdff1aSopenharmony_ci.endm
772cabdff1aSopenharmony_ci
773cabdff1aSopenharmony_ci.macro  vp8_epel8_v6_y2 d0, d1, s0, s1, s2, s3, s4, s5, s6
774cabdff1aSopenharmony_ci        uxtl            \s0\().8h, \s0\().8b
775cabdff1aSopenharmony_ci        uxtl            \s3\().8h, \s3\().8b
776cabdff1aSopenharmony_ci        uxtl            \s6\().8h, \s6\().8b
777cabdff1aSopenharmony_ci        uxtl            \s1\().8h, \s1\().8b
778cabdff1aSopenharmony_ci        uxtl            \s4\().8h, \s4\().8b
779cabdff1aSopenharmony_ci        uxtl            \s2\().8h, \s2\().8b
780cabdff1aSopenharmony_ci        uxtl            \s5\().8h, \s5\().8b
781cabdff1aSopenharmony_ci        mul             \s0\().8h, \s0\().8h, v0.h[0]
782cabdff1aSopenharmony_ci        mul             v31.8h   , \s3\().8h, v0.h[3]
783cabdff1aSopenharmony_ci        mul             \s3\().8h, \s3\().8h, v0.h[2]
784cabdff1aSopenharmony_ci        mul             \s6\().8h, \s6\().8h, v0.h[5]
785cabdff1aSopenharmony_ci
786cabdff1aSopenharmony_ci        mls             \s0\().8h, \s1\().8h, v0.h[1]
787cabdff1aSopenharmony_ci        mls             v31.8h   , \s4\().8h, v0.h[4]
788cabdff1aSopenharmony_ci        mls             \s3\().8h, \s2\().8h, v0.h[1]
789cabdff1aSopenharmony_ci        mls             \s6\().8h, \s5\().8h, v0.h[4]
790cabdff1aSopenharmony_ci
791cabdff1aSopenharmony_ci        mla             \s0\().8h, \s2\().8h, v0.h[2]
792cabdff1aSopenharmony_ci        mla             v31.8h   , \s5\().8h, v0.h[5]
793cabdff1aSopenharmony_ci        mla             \s3\().8h, \s1\().8h, v0.h[0]
794cabdff1aSopenharmony_ci        mla             \s6\().8h, \s4\().8h, v0.h[3]
795cabdff1aSopenharmony_ci        sqadd           v31.8h   , \s0\().8h, v31.8h
796cabdff1aSopenharmony_ci        sqadd           \s6\().8h, \s3\().8h, \s6\().8h
797cabdff1aSopenharmony_ci        sqrshrun        \d0\().8b, v31.8h,    #7
798cabdff1aSopenharmony_ci        sqrshrun        \d1\().8b, \s6\().8h, #7
799cabdff1aSopenharmony_ci.endm
800cabdff1aSopenharmony_ci
801cabdff1aSopenharmony_ci.macro  vp8_epel8_h4    d,   v0,   v1
802cabdff1aSopenharmony_ci        ext             v22.8b, \v0\().8b,  \v1\().8b,  #1
803cabdff1aSopenharmony_ci        uxtl            v19.8h, \v0\().8b
804cabdff1aSopenharmony_ci        ext             v23.8b, \v0\().8b,  \v1\().8b,  #2
805cabdff1aSopenharmony_ci        uxtl            v20.8h, v22.8b
806cabdff1aSopenharmony_ci        ext             v25.8b, \v0\().8b,  \v1\().8b,  #3
807cabdff1aSopenharmony_ci        uxtl            v22.8h, v23.8b
808cabdff1aSopenharmony_ci        uxtl            v25.8h, v25.8b
809cabdff1aSopenharmony_ci        mul             v20.8h, v20.8h, v0.h[2]
810cabdff1aSopenharmony_ci        mul             v22.8h, v22.8h, v0.h[3]
811cabdff1aSopenharmony_ci        mls             v20.8h, v19.8h, v0.h[1]
812cabdff1aSopenharmony_ci        mls             v22.8h, v25.8h, v0.h[4]
813cabdff1aSopenharmony_ci        sqadd           v22.8h, v20.8h, v22.8h
814cabdff1aSopenharmony_ci        sqrshrun        \d\().8b, v22.8h, #7
815cabdff1aSopenharmony_ci.endm
816cabdff1aSopenharmony_ci
817cabdff1aSopenharmony_ci.macro  vp8_epel8_v4_y2 d0, s0, s1, s2, s3, s4
818cabdff1aSopenharmony_ci        uxtl            \s0\().8h,  \s0\().8b
819cabdff1aSopenharmony_ci        uxtl            \s1\().8h,  \s1\().8b
820cabdff1aSopenharmony_ci        uxtl            \s2\().8h,  \s2\().8b
821cabdff1aSopenharmony_ci        uxtl            \s3\().8h,  \s3\().8b
822cabdff1aSopenharmony_ci        uxtl            \s4\().8h,  \s4\().8b
823cabdff1aSopenharmony_ci        mul             v21.8h,     \s1\().8h, v0.h[2]
824cabdff1aSopenharmony_ci        mul             v23.8h,     \s2\().8h, v0.h[3]
825cabdff1aSopenharmony_ci        mul             \s2\().8h,  \s2\().8h, v0.h[2]
826cabdff1aSopenharmony_ci        mul             v22.8h,     \s3\().8h, v0.h[3]
827cabdff1aSopenharmony_ci        mls             v21.8h,     \s0\().8h, v0.h[1]
828cabdff1aSopenharmony_ci        mls             v23.8h,     \s3\().8h, v0.h[4]
829cabdff1aSopenharmony_ci        mls             \s2\().8h,  \s1\().8h, v0.h[1]
830cabdff1aSopenharmony_ci        mls             v22.8h,     \s4\().8h, v0.h[4]
831cabdff1aSopenharmony_ci        sqadd           v21.8h,     v21.8h,    v23.8h
832cabdff1aSopenharmony_ci        sqadd           \s2\().8h,  \s2\().8h, v22.8h
833cabdff1aSopenharmony_ci        sqrshrun        \d0\().8b,  v21.8h,    #7
834cabdff1aSopenharmony_ci        sqrshrun2       \d0\().16b, \s2\().8h, #7
835cabdff1aSopenharmony_ci.endm
836cabdff1aSopenharmony_ci
837cabdff1aSopenharmony_ci
838cabdff1aSopenharmony_ci// note: worst case sum of all 6-tap filter values * 255 is 0x7f80 so 16 bit
839cabdff1aSopenharmony_ci// arithmetic can be used to apply filters
840cabdff1aSopenharmony_ciconst   subpel_filters, align=4
841cabdff1aSopenharmony_ci        .short     0,   6, 123,  12,   1,   0,   0,   0
842cabdff1aSopenharmony_ci        .short     2,  11, 108,  36,   8,   1,   0,   0
843cabdff1aSopenharmony_ci        .short     0,   9,  93,  50,   6,   0,   0,   0
844cabdff1aSopenharmony_ci        .short     3,  16,  77,  77,  16,   3,   0,   0
845cabdff1aSopenharmony_ci        .short     0,   6,  50,  93,   9,   0,   0,   0
846cabdff1aSopenharmony_ci        .short     1,   8,  36, 108,  11,   2,   0,   0
847cabdff1aSopenharmony_ci        .short     0,   1,  12, 123,   6,   0,   0,   0
848cabdff1aSopenharmony_ciendconst
849cabdff1aSopenharmony_ci
850cabdff1aSopenharmony_cifunction ff_put_vp8_epel16_v6_neon, export=1
851cabdff1aSopenharmony_ci        sub             x2,  x2,  x3,  lsl #1
852cabdff1aSopenharmony_ci
853cabdff1aSopenharmony_ci        sxtw            x4,  w4
854cabdff1aSopenharmony_ci        sxtw            x6,  w6
855cabdff1aSopenharmony_ci        movrel          x17,  subpel_filters, -16
856cabdff1aSopenharmony_ci        add             x6,  x17,  x6, lsl #4  // y
857cabdff1aSopenharmony_ci        ld1             {v0.8h},     [x6]
858cabdff1aSopenharmony_ci1:
859cabdff1aSopenharmony_ci        ld1             {v1.1d - v2.1d},    [x2], x3
860cabdff1aSopenharmony_ci        ld1             {v3.1d - v4.1d},    [x2], x3
861cabdff1aSopenharmony_ci        ld1             {v16.1d - v17.1d},  [x2], x3
862cabdff1aSopenharmony_ci        ld1             {v18.1d - v19.1d},  [x2], x3
863cabdff1aSopenharmony_ci        ld1             {v20.1d - v21.1d},  [x2], x3
864cabdff1aSopenharmony_ci        ld1             {v22.1d - v23.1d},  [x2], x3
865cabdff1aSopenharmony_ci        ld1             {v24.1d - v25.1d},  [x2]
866cabdff1aSopenharmony_ci        sub             x2,  x2,  x3, lsl #2
867cabdff1aSopenharmony_ci
868cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 v1, v3, v1, v3, v16, v18, v20, v22, v24
869cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 v2, v4, v2, v4, v17, v19, v21, v23, v25
870cabdff1aSopenharmony_ci
871cabdff1aSopenharmony_ci        st1             {v1.1d - v2.1d}, [x0], x1
872cabdff1aSopenharmony_ci        st1             {v3.1d - v4.1d}, [x0], x1
873cabdff1aSopenharmony_ci        subs            x4, x4, #2
874cabdff1aSopenharmony_ci        b.ne            1b
875cabdff1aSopenharmony_ci
876cabdff1aSopenharmony_ci        ret
877cabdff1aSopenharmony_ciendfunc
878cabdff1aSopenharmony_ci
879cabdff1aSopenharmony_cifunction ff_put_vp8_epel16_h6_neon, export=1
880cabdff1aSopenharmony_ci        sub             x2,  x2,  #2
881cabdff1aSopenharmony_ci        sxtw            x5,  w5 // x
882cabdff1aSopenharmony_ci
883cabdff1aSopenharmony_ci        // first pass (horizontal):
884cabdff1aSopenharmony_ci        movrel          x17,  subpel_filters, -16
885cabdff1aSopenharmony_ci        add             x5,  x17,  x5, lsl #4 // x
886cabdff1aSopenharmony_ci        ld1             {v0.8h},  [x5]
887cabdff1aSopenharmony_ci1:
888cabdff1aSopenharmony_ci        ld1             {v1.16b, v2.16b}, [x2], x3
889cabdff1aSopenharmony_ci        vp8_epel16_h6   v1, v1, v2
890cabdff1aSopenharmony_ci        st1             {v1.16b}, [x0], x1
891cabdff1aSopenharmony_ci
892cabdff1aSopenharmony_ci        subs            w4, w4, #1
893cabdff1aSopenharmony_ci        b.ne            1b
894cabdff1aSopenharmony_ci        ret
895cabdff1aSopenharmony_ciendfunc
896cabdff1aSopenharmony_ci
897cabdff1aSopenharmony_ci
898cabdff1aSopenharmony_cifunction ff_put_vp8_epel16_h6v6_neon, export=1
899cabdff1aSopenharmony_ci        sub             x2,  x2,  x3,  lsl #1
900cabdff1aSopenharmony_ci        sub             x2,  x2,  #2
901cabdff1aSopenharmony_ci
902cabdff1aSopenharmony_ci        // first pass (horizontal):
903cabdff1aSopenharmony_ci        movrel          x17,  subpel_filters, -16
904cabdff1aSopenharmony_ci        sxtw            x5,  w5 // x
905cabdff1aSopenharmony_ci        add             x16,  x17,  x5, lsl #4 // x
906cabdff1aSopenharmony_ci        sub             sp,  sp,  #336+16
907cabdff1aSopenharmony_ci        ld1             {v0.8h},  [x16]
908cabdff1aSopenharmony_ci        add             x7,  sp,  #15
909cabdff1aSopenharmony_ci        sxtw            x4,  w4
910cabdff1aSopenharmony_ci        add             x16, x4, #5   // h
911cabdff1aSopenharmony_ci        bic             x7,  x7,  #15
912cabdff1aSopenharmony_ci1:
913cabdff1aSopenharmony_ci        ld1             {v1.16b, v2.16b}, [x2], x3
914cabdff1aSopenharmony_ci        vp8_epel16_h6   v1, v1, v2
915cabdff1aSopenharmony_ci        st1             {v1.16b}, [x7], #16
916cabdff1aSopenharmony_ci        subs            x16, x16, #1
917cabdff1aSopenharmony_ci        b.ne            1b
918cabdff1aSopenharmony_ci
919cabdff1aSopenharmony_ci
920cabdff1aSopenharmony_ci        // second pass (vertical):
921cabdff1aSopenharmony_ci        sxtw            x6,  w6
922cabdff1aSopenharmony_ci        add             x6,  x17,  x6, lsl #4  // y
923cabdff1aSopenharmony_ci        add             x7,  sp,  #15
924cabdff1aSopenharmony_ci        ld1             {v0.8h},     [x6]
925cabdff1aSopenharmony_ci        bic             x7,  x7,  #15
926cabdff1aSopenharmony_ci2:
927cabdff1aSopenharmony_ci        ld1             {v1.8b - v4.8b},    [x7], #32
928cabdff1aSopenharmony_ci        ld1             {v16.8b - v19.8b},  [x7], #32
929cabdff1aSopenharmony_ci        ld1             {v20.8b - v23.8b},  [x7], #32
930cabdff1aSopenharmony_ci        ld1             {v24.8b - v25.8b},  [x7]
931cabdff1aSopenharmony_ci        sub             x7,  x7,  #64
932cabdff1aSopenharmony_ci
933cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 v1, v3, v1, v3, v16, v18, v20, v22, v24
934cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 v2, v4, v2, v4, v17, v19, v21, v23, v25
935cabdff1aSopenharmony_ci        trn1            v1.2d, v1.2d, v2.2d
936cabdff1aSopenharmony_ci        trn1            v3.2d, v3.2d, v4.2d
937cabdff1aSopenharmony_ci
938cabdff1aSopenharmony_ci        st1             {v1.16b}, [x0], x1
939cabdff1aSopenharmony_ci        st1             {v3.16b}, [x0], x1
940cabdff1aSopenharmony_ci        subs            x4, x4, #2
941cabdff1aSopenharmony_ci        b.ne            2b
942cabdff1aSopenharmony_ci
943cabdff1aSopenharmony_ci        add             sp,  sp,  #336+16
944cabdff1aSopenharmony_ci        ret
945cabdff1aSopenharmony_ciendfunc
946cabdff1aSopenharmony_ci
947cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_v6_neon, export=1
948cabdff1aSopenharmony_ci        sub             x2,  x2,  x3,  lsl #1
949cabdff1aSopenharmony_ci
950cabdff1aSopenharmony_ci        movrel          x7,  subpel_filters, -16
951cabdff1aSopenharmony_ci        add             x6,  x7,  w6, uxtw #4
952cabdff1aSopenharmony_ci        ld1             {v0.8h},  [x6]
953cabdff1aSopenharmony_ci1:
954cabdff1aSopenharmony_ci        ld1             {v2.8b},  [x2], x3
955cabdff1aSopenharmony_ci        ld1             {v3.8b},  [x2], x3
956cabdff1aSopenharmony_ci        ld1             {v4.8b},  [x2], x3
957cabdff1aSopenharmony_ci        ld1             {v5.8b},  [x2], x3
958cabdff1aSopenharmony_ci        ld1             {v6.8b},  [x2], x3
959cabdff1aSopenharmony_ci        ld1             {v7.8b},  [x2], x3
960cabdff1aSopenharmony_ci        ld1             {v28.8b}, [x2]
961cabdff1aSopenharmony_ci
962cabdff1aSopenharmony_ci        sub             x2,  x2,  x3,  lsl #2
963cabdff1aSopenharmony_ci
964cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 v2, v3, v2, v3, v4, v5, v6, v7, v28
965cabdff1aSopenharmony_ci
966cabdff1aSopenharmony_ci        st1             {v2.8b}, [x0], x1
967cabdff1aSopenharmony_ci        st1             {v3.8b}, [x0], x1
968cabdff1aSopenharmony_ci        subs            w4,  w4,  #2
969cabdff1aSopenharmony_ci        b.ne            1b
970cabdff1aSopenharmony_ci
971cabdff1aSopenharmony_ci        ret
972cabdff1aSopenharmony_ciendfunc
973cabdff1aSopenharmony_ci
974cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h6_neon, export=1
975cabdff1aSopenharmony_ci        sub             x2,  x2,  #2
976cabdff1aSopenharmony_ci
977cabdff1aSopenharmony_ci        movrel          x7,  subpel_filters, -16
978cabdff1aSopenharmony_ci        add             x5,  x7,  w5, uxtw #4
979cabdff1aSopenharmony_ci        ld1             {v0.8h},        [x5]
980cabdff1aSopenharmony_ci1:
981cabdff1aSopenharmony_ci        ld1             {v2.8b, v3.8b}, [x2], x3
982cabdff1aSopenharmony_ci
983cabdff1aSopenharmony_ci        vp8_epel8_h6    v2,  v2,  v3
984cabdff1aSopenharmony_ci
985cabdff1aSopenharmony_ci        st1             {v2.8b}, [x0], x1
986cabdff1aSopenharmony_ci        subs            w4,  w4,  #1
987cabdff1aSopenharmony_ci        b.ne            1b
988cabdff1aSopenharmony_ci
989cabdff1aSopenharmony_ci        ret
990cabdff1aSopenharmony_ciendfunc
991cabdff1aSopenharmony_ci
992cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h6v6_neon, export=1
993cabdff1aSopenharmony_ci        sub             x2,  x2,  x3,  lsl #1
994cabdff1aSopenharmony_ci        sub             x2,  x2,  #2
995cabdff1aSopenharmony_ci        sxtw            x4,  w4
996cabdff1aSopenharmony_ci
997cabdff1aSopenharmony_ci        // first pass (horizontal):
998cabdff1aSopenharmony_ci        movrel          x17,  subpel_filters, -16
999cabdff1aSopenharmony_ci        sxtw            x5,  w5
1000cabdff1aSopenharmony_ci        add             x5,  x17,  x5, lsl #4 // x
1001cabdff1aSopenharmony_ci        sub             sp,  sp,  #168+16
1002cabdff1aSopenharmony_ci        ld1             {v0.8h},  [x5]
1003cabdff1aSopenharmony_ci        add             x7,  sp,  #15
1004cabdff1aSopenharmony_ci        add             x16, x4,  #5   // h
1005cabdff1aSopenharmony_ci        bic             x7,  x7,  #15
1006cabdff1aSopenharmony_ci1:
1007cabdff1aSopenharmony_ci        ld1             {v1.8b, v2.8b}, [x2], x3
1008cabdff1aSopenharmony_ci
1009cabdff1aSopenharmony_ci        vp8_epel8_h6    v1, v1, v2
1010cabdff1aSopenharmony_ci
1011cabdff1aSopenharmony_ci        st1             {v1.8b}, [x7], #8
1012cabdff1aSopenharmony_ci        subs            x16, x16, #1
1013cabdff1aSopenharmony_ci        b.ne            1b
1014cabdff1aSopenharmony_ci
1015cabdff1aSopenharmony_ci        // second pass (vertical):
1016cabdff1aSopenharmony_ci        sxtw            x6,  w6
1017cabdff1aSopenharmony_ci        add             x6,  x17,  x6, lsl #4  // y
1018cabdff1aSopenharmony_ci        add             x7,  sp,   #15
1019cabdff1aSopenharmony_ci        ld1             {v0.8h},   [x6]
1020cabdff1aSopenharmony_ci        bic             x7,  x7,   #15
1021cabdff1aSopenharmony_ci2:
1022cabdff1aSopenharmony_ci        ld1             {v1.8b - v4.8b}, [x7], #32
1023cabdff1aSopenharmony_ci        ld1             {v5.8b - v7.8b}, [x7]
1024cabdff1aSopenharmony_ci
1025cabdff1aSopenharmony_ci        sub             x7,  x7,  #16
1026cabdff1aSopenharmony_ci
1027cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 v1, v2, v1, v2, v3, v4, v5, v6, v7
1028cabdff1aSopenharmony_ci
1029cabdff1aSopenharmony_ci        st1             {v1.8b}, [x0], x1
1030cabdff1aSopenharmony_ci        st1             {v2.8b}, [x0], x1
1031cabdff1aSopenharmony_ci        subs            x4, x4, #2
1032cabdff1aSopenharmony_ci        b.ne            2b
1033cabdff1aSopenharmony_ci
1034cabdff1aSopenharmony_ci        add             sp,  sp,  #168+16
1035cabdff1aSopenharmony_ci        ret
1036cabdff1aSopenharmony_ciendfunc
1037cabdff1aSopenharmony_ci
1038cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_v4_neon, export=1
1039cabdff1aSopenharmony_ci        sub             x2,  x2,  x3
1040cabdff1aSopenharmony_ci
1041cabdff1aSopenharmony_ci        movrel          x7,  subpel_filters, -16
1042cabdff1aSopenharmony_ci        add             x6,  x7,  w6, uxtw #4
1043cabdff1aSopenharmony_ci        ld1             {v0.8h},     [x6]
1044cabdff1aSopenharmony_ci1:
1045cabdff1aSopenharmony_ci        ld1             {v2.8b},     [x2], x3
1046cabdff1aSopenharmony_ci        ld1             {v3.8b},     [x2], x3
1047cabdff1aSopenharmony_ci        ld1             {v4.8b},     [x2], x3
1048cabdff1aSopenharmony_ci        ld1             {v5.8b},     [x2], x3
1049cabdff1aSopenharmony_ci        ld1             {v6.8b},     [x2]
1050cabdff1aSopenharmony_ci        sub             x2,  x2,  x3,  lsl #1
1051cabdff1aSopenharmony_ci
1052cabdff1aSopenharmony_ci        vp8_epel8_v4_y2 v2, v2, v3, v4, v5, v6
1053cabdff1aSopenharmony_ci
1054cabdff1aSopenharmony_ci        st1             {v2.d}[0], [x0], x1
1055cabdff1aSopenharmony_ci        st1             {v2.d}[1], [x0], x1
1056cabdff1aSopenharmony_ci        subs            w4,  w4,  #2
1057cabdff1aSopenharmony_ci        b.ne            1b
1058cabdff1aSopenharmony_ci
1059cabdff1aSopenharmony_ci        ret
1060cabdff1aSopenharmony_ciendfunc
1061cabdff1aSopenharmony_ci
1062cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h4_neon, export=1
1063cabdff1aSopenharmony_ci        sub             x2,  x2,  #1
1064cabdff1aSopenharmony_ci
1065cabdff1aSopenharmony_ci        movrel          x7,  subpel_filters, -16
1066cabdff1aSopenharmony_ci        add             x5,  x7,  w5, uxtw #4
1067cabdff1aSopenharmony_ci        ld1             {v0.8h},       [x5]
1068cabdff1aSopenharmony_ci1:
1069cabdff1aSopenharmony_ci        ld1             {v2.8b,v3.8b}, [x2], x3
1070cabdff1aSopenharmony_ci
1071cabdff1aSopenharmony_ci        vp8_epel8_h4    v2,  v2,  v3
1072cabdff1aSopenharmony_ci
1073cabdff1aSopenharmony_ci        st1             {v2.8b}, [x0], x1
1074cabdff1aSopenharmony_ci        subs            w4,  w4,  #1
1075cabdff1aSopenharmony_ci        b.ne            1b
1076cabdff1aSopenharmony_ci
1077cabdff1aSopenharmony_ci        ret
1078cabdff1aSopenharmony_ciendfunc
1079cabdff1aSopenharmony_ci
1080cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h4v6_neon, export=1
1081cabdff1aSopenharmony_ci        sub             x2,  x2,  x3,  lsl #1
1082cabdff1aSopenharmony_ci        sub             x2,  x2,  #1
1083cabdff1aSopenharmony_ci        sxtw            x4,  w4
1084cabdff1aSopenharmony_ci
1085cabdff1aSopenharmony_ci        // first pass (horizontal):
1086cabdff1aSopenharmony_ci        movrel          x17,  subpel_filters, -16
1087cabdff1aSopenharmony_ci        sxtw            x5,  w5
1088cabdff1aSopenharmony_ci        add             x5,  x17,  x5, lsl #4 // x
1089cabdff1aSopenharmony_ci        sub             sp,  sp,  #168+16
1090cabdff1aSopenharmony_ci        ld1             {v0.8h},  [x5]
1091cabdff1aSopenharmony_ci        add             x7,  sp,  #15
1092cabdff1aSopenharmony_ci        add             x16, x4, #5   // h
1093cabdff1aSopenharmony_ci        bic             x7,  x7,  #15
1094cabdff1aSopenharmony_ci1:
1095cabdff1aSopenharmony_ci        ld1             {v1.8b, v2.8b}, [x2], x3
1096cabdff1aSopenharmony_ci
1097cabdff1aSopenharmony_ci        vp8_epel8_h4    v1, v1, v2
1098cabdff1aSopenharmony_ci
1099cabdff1aSopenharmony_ci        st1             {v1.8b}, [x7], #8
1100cabdff1aSopenharmony_ci        subs            x16, x16, #1
1101cabdff1aSopenharmony_ci        b.ne            1b
1102cabdff1aSopenharmony_ci
1103cabdff1aSopenharmony_ci        // second pass (vertical):
1104cabdff1aSopenharmony_ci        sxtw            x6,  w6
1105cabdff1aSopenharmony_ci        add             x6,  x17,  x6, lsl #4  // y
1106cabdff1aSopenharmony_ci        add             x7,  sp,   #15
1107cabdff1aSopenharmony_ci        ld1             {v0.8h},   [x6]
1108cabdff1aSopenharmony_ci        bic             x7,  x7,   #15
1109cabdff1aSopenharmony_ci2:
1110cabdff1aSopenharmony_ci        ld1             {v1.8b - v4.8b}, [x7], #32
1111cabdff1aSopenharmony_ci        ld1             {v5.8b - v7.8b}, [x7]
1112cabdff1aSopenharmony_ci
1113cabdff1aSopenharmony_ci        sub             x7,  x7,  #16
1114cabdff1aSopenharmony_ci
1115cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 v1, v2, v1, v2, v3, v4, v5, v6, v7
1116cabdff1aSopenharmony_ci
1117cabdff1aSopenharmony_ci        st1             {v1.8b}, [x0], x1
1118cabdff1aSopenharmony_ci        st1             {v2.8b}, [x0], x1
1119cabdff1aSopenharmony_ci        subs            x4, x4, #2
1120cabdff1aSopenharmony_ci        b.ne            2b
1121cabdff1aSopenharmony_ci
1122cabdff1aSopenharmony_ci        add             sp,  sp,  #168+16
1123cabdff1aSopenharmony_ci        ret
1124cabdff1aSopenharmony_ciendfunc
1125cabdff1aSopenharmony_ci
1126cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h4v4_neon, export=1
1127cabdff1aSopenharmony_ci        sub             x2,  x2,  x3
1128cabdff1aSopenharmony_ci        sub             x2,  x2,  #1
1129cabdff1aSopenharmony_ci        sxtw            x4,  w4
1130cabdff1aSopenharmony_ci
1131cabdff1aSopenharmony_ci
1132cabdff1aSopenharmony_ci        // first pass (horizontal):
1133cabdff1aSopenharmony_ci        movrel          x17,  subpel_filters, -16
1134cabdff1aSopenharmony_ci        sxtw            x5,  w5
1135cabdff1aSopenharmony_ci        add             x5,  x17,  x5, lsl #4 // x
1136cabdff1aSopenharmony_ci        sub             sp,  sp,  #168+16
1137cabdff1aSopenharmony_ci        ld1             {v0.8h},  [x5]
1138cabdff1aSopenharmony_ci        add             x7,  sp,  #15
1139cabdff1aSopenharmony_ci        add             x16, x4, #3   // h
1140cabdff1aSopenharmony_ci        bic             x7,  x7,  #15
1141cabdff1aSopenharmony_ci1:
1142cabdff1aSopenharmony_ci        ld1             {v1.8b, v2.8b}, [x2], x3
1143cabdff1aSopenharmony_ci
1144cabdff1aSopenharmony_ci        vp8_epel8_h4    v1, v1, v2
1145cabdff1aSopenharmony_ci
1146cabdff1aSopenharmony_ci        st1             {v1.8b}, [x7], #8
1147cabdff1aSopenharmony_ci        subs            x16, x16, #1
1148cabdff1aSopenharmony_ci        b.ne            1b
1149cabdff1aSopenharmony_ci
1150cabdff1aSopenharmony_ci        // second pass (vertical):
1151cabdff1aSopenharmony_ci        sxtw            x6,  w6
1152cabdff1aSopenharmony_ci        add             x6,  x17,  x6, lsl #4  // y
1153cabdff1aSopenharmony_ci        add             x7,  sp,   #15
1154cabdff1aSopenharmony_ci        ld1             {v0.8h},   [x6]
1155cabdff1aSopenharmony_ci        bic             x7,  x7,   #15
1156cabdff1aSopenharmony_ci2:
1157cabdff1aSopenharmony_ci        ld1             {v1.8b - v2.8b}, [x7], #16
1158cabdff1aSopenharmony_ci        ld1             {v3.8b - v5.8b}, [x7]
1159cabdff1aSopenharmony_ci
1160cabdff1aSopenharmony_ci        vp8_epel8_v4_y2 v1, v1, v2, v3, v4, v5
1161cabdff1aSopenharmony_ci
1162cabdff1aSopenharmony_ci        st1             {v1.d}[0], [x0], x1
1163cabdff1aSopenharmony_ci        st1             {v1.d}[1], [x0], x1
1164cabdff1aSopenharmony_ci        subs            x4, x4, #2
1165cabdff1aSopenharmony_ci        b.ne            2b
1166cabdff1aSopenharmony_ci
1167cabdff1aSopenharmony_ci        add             sp,  sp,  #168+16
1168cabdff1aSopenharmony_ci        ret
1169cabdff1aSopenharmony_ciendfunc
1170cabdff1aSopenharmony_ci
1171cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h6v4_neon, export=1
1172cabdff1aSopenharmony_ci        sub             x2,  x2,  x3
1173cabdff1aSopenharmony_ci        sub             x2,  x2,  #2
1174cabdff1aSopenharmony_ci        sxtw            x4,  w4
1175cabdff1aSopenharmony_ci
1176cabdff1aSopenharmony_ci
1177cabdff1aSopenharmony_ci        // first pass (horizontal):
1178cabdff1aSopenharmony_ci        movrel          x17,  subpel_filters, -16
1179cabdff1aSopenharmony_ci        sxtw            x5,  w5
1180cabdff1aSopenharmony_ci        add             x5,  x17,  x5, lsl #4 // x
1181cabdff1aSopenharmony_ci        sub             sp,  sp,  #168+16
1182cabdff1aSopenharmony_ci        ld1             {v0.8h},  [x5]
1183cabdff1aSopenharmony_ci        add             x7,  sp,  #15
1184cabdff1aSopenharmony_ci        add             x16, x4, #3   // h
1185cabdff1aSopenharmony_ci        bic             x7,  x7,  #15
1186cabdff1aSopenharmony_ci1:
1187cabdff1aSopenharmony_ci        ld1             {v1.8b, v2.8b}, [x2], x3
1188cabdff1aSopenharmony_ci
1189cabdff1aSopenharmony_ci        vp8_epel8_h6    v1, v1, v2
1190cabdff1aSopenharmony_ci
1191cabdff1aSopenharmony_ci        st1             {v1.8b}, [x7], #8
1192cabdff1aSopenharmony_ci        subs            x16, x16, #1
1193cabdff1aSopenharmony_ci        b.ne            1b
1194cabdff1aSopenharmony_ci
1195cabdff1aSopenharmony_ci        // second pass (vertical):
1196cabdff1aSopenharmony_ci        sxtw            x6,  w6
1197cabdff1aSopenharmony_ci        add             x6,  x17,  x6, lsl #4  // y
1198cabdff1aSopenharmony_ci        add             x7,  sp,   #15
1199cabdff1aSopenharmony_ci        ld1             {v0.8h},   [x6]
1200cabdff1aSopenharmony_ci        bic             x7,  x7,   #15
1201cabdff1aSopenharmony_ci2:
1202cabdff1aSopenharmony_ci        ld1             {v1.8b - v2.8b}, [x7], #16
1203cabdff1aSopenharmony_ci        ld1             {v3.8b - v5.8b}, [x7]
1204cabdff1aSopenharmony_ci
1205cabdff1aSopenharmony_ci        vp8_epel8_v4_y2 v1, v1, v2, v3, v4, v5
1206cabdff1aSopenharmony_ci
1207cabdff1aSopenharmony_ci        st1             {v1.d}[0], [x0], x1
1208cabdff1aSopenharmony_ci        st1             {v1.d}[1], [x0], x1
1209cabdff1aSopenharmony_ci        subs            x4, x4, #2
1210cabdff1aSopenharmony_ci        b.ne            2b
1211cabdff1aSopenharmony_ci
1212cabdff1aSopenharmony_ci        add             sp,  sp,  #168+16
1213cabdff1aSopenharmony_ci        ret
1214cabdff1aSopenharmony_ciendfunc
1215cabdff1aSopenharmony_ci
1216cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_v6_neon, export=1
1217cabdff1aSopenharmony_ci        sub             x2,  x2,  x3,  lsl #1
1218cabdff1aSopenharmony_ci
1219cabdff1aSopenharmony_ci        movrel          x7,  subpel_filters, -16
1220cabdff1aSopenharmony_ci        add             x6,  x7,  w6, uxtw #4
1221cabdff1aSopenharmony_ci        ld1             {v0.8h},    [x6]
1222cabdff1aSopenharmony_ci1:
1223cabdff1aSopenharmony_ci        ld1r            {v2.2s},    [x2], x3
1224cabdff1aSopenharmony_ci        ld1r            {v3.2s},    [x2], x3
1225cabdff1aSopenharmony_ci        ld1r            {v4.2s},    [x2], x3
1226cabdff1aSopenharmony_ci        ld1r            {v5.2s},    [x2], x3
1227cabdff1aSopenharmony_ci        ld1r            {v6.2s},    [x2], x3
1228cabdff1aSopenharmony_ci        ld1r            {v7.2s},    [x2], x3
1229cabdff1aSopenharmony_ci        ld1r            {v28.2s},   [x2]
1230cabdff1aSopenharmony_ci        sub             x2,  x2,  x3,  lsl #2
1231cabdff1aSopenharmony_ci        ld1             {v2.s}[1],  [x2], x3
1232cabdff1aSopenharmony_ci        ld1             {v3.s}[1],  [x2], x3
1233cabdff1aSopenharmony_ci        ld1             {v4.s}[1],  [x2], x3
1234cabdff1aSopenharmony_ci        ld1             {v5.s}[1],  [x2], x3
1235cabdff1aSopenharmony_ci        ld1             {v6.s}[1],  [x2], x3
1236cabdff1aSopenharmony_ci        ld1             {v7.s}[1],  [x2], x3
1237cabdff1aSopenharmony_ci        ld1             {v28.s}[1], [x2]
1238cabdff1aSopenharmony_ci        sub             x2,  x2,  x3,  lsl #2
1239cabdff1aSopenharmony_ci
1240cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 v2, v3, v2, v3, v4, v5, v6, v7, v28
1241cabdff1aSopenharmony_ci
1242cabdff1aSopenharmony_ci        st1             {v2.s}[0],  [x0], x1
1243cabdff1aSopenharmony_ci        st1             {v3.s}[0],  [x0], x1
1244cabdff1aSopenharmony_ci        st1             {v2.s}[1],  [x0], x1
1245cabdff1aSopenharmony_ci        st1             {v3.s}[1],  [x0], x1
1246cabdff1aSopenharmony_ci        subs            w4,  w4,  #4
1247cabdff1aSopenharmony_ci        b.ne            1b
1248cabdff1aSopenharmony_ci
1249cabdff1aSopenharmony_ci        ret
1250cabdff1aSopenharmony_ciendfunc
1251cabdff1aSopenharmony_ci
1252cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h6_neon, export=1
1253cabdff1aSopenharmony_ci        sub             x2,  x2,  #2
1254cabdff1aSopenharmony_ci
1255cabdff1aSopenharmony_ci        movrel          x7,  subpel_filters, -16
1256cabdff1aSopenharmony_ci        add             x5,  x7,  w5, uxtw #4
1257cabdff1aSopenharmony_ci        ld1             {v0.8h},       [x5]
1258cabdff1aSopenharmony_ci1:
1259cabdff1aSopenharmony_ci        ld1             {v2.8b,v3.8b}, [x2], x3
1260cabdff1aSopenharmony_ci        vp8_epel8_h6    v2,  v2,  v3
1261cabdff1aSopenharmony_ci        st1             {v2.s}[0], [x0], x1
1262cabdff1aSopenharmony_ci        subs            w4,  w4,  #1
1263cabdff1aSopenharmony_ci        b.ne            1b
1264cabdff1aSopenharmony_ci
1265cabdff1aSopenharmony_ci        ret
1266cabdff1aSopenharmony_ciendfunc
1267cabdff1aSopenharmony_ci
1268cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h6v6_neon, export=1
1269cabdff1aSopenharmony_ci        sub             x2,  x2,  x3,  lsl #1
1270cabdff1aSopenharmony_ci        sub             x2,  x2,  #2
1271cabdff1aSopenharmony_ci
1272cabdff1aSopenharmony_ci        movrel          x7,  subpel_filters, -16
1273cabdff1aSopenharmony_ci        add             x5,  x7,  w5, uxtw #4
1274cabdff1aSopenharmony_ci        ld1             {v0.8h},       [x5]
1275cabdff1aSopenharmony_ci
1276cabdff1aSopenharmony_ci        sub             sp,  sp,  #52
1277cabdff1aSopenharmony_ci        add             w8,  w4,  #5
1278cabdff1aSopenharmony_ci        mov             x9,  sp
1279cabdff1aSopenharmony_ci1:
1280cabdff1aSopenharmony_ci        ld1             {v2.8b,v3.8b}, [x2], x3
1281cabdff1aSopenharmony_ci        vp8_epel8_h6    v2,  v2,  v3
1282cabdff1aSopenharmony_ci        st1             {v2.s}[0],     [x9], #4
1283cabdff1aSopenharmony_ci        subs            w8,  w8,  #1
1284cabdff1aSopenharmony_ci        b.ne            1b
1285cabdff1aSopenharmony_ci
1286cabdff1aSopenharmony_ci        add             x6,  x7,  w6, uxtw #4
1287cabdff1aSopenharmony_ci        ld1             {v0.8h},       [x6]
1288cabdff1aSopenharmony_ci        mov             x9,  sp
1289cabdff1aSopenharmony_ci2:
1290cabdff1aSopenharmony_ci        ld1             {v2.8b,v3.8b}, [x9], #16
1291cabdff1aSopenharmony_ci        ld1             {v6.8b},       [x9], #8
1292cabdff1aSopenharmony_ci        ld1r            {v28.2s},      [x9]
1293cabdff1aSopenharmony_ci        sub             x9,  x9,  #16
1294cabdff1aSopenharmony_ci        ld1             {v4.8b,v5.8b}, [x9], #16
1295cabdff1aSopenharmony_ci        ld1             {v7.8b},       [x9], #8
1296cabdff1aSopenharmony_ci        ld1             {v28.s}[1],    [x9]
1297cabdff1aSopenharmony_ci        sub             x9,  x9,  #16
1298cabdff1aSopenharmony_ci        trn1            v1.2s, v2.2s, v4.2s
1299cabdff1aSopenharmony_ci        trn2            v4.2s, v2.2s, v4.2s
1300cabdff1aSopenharmony_ci        trn1            v2.2s, v3.2s, v5.2s
1301cabdff1aSopenharmony_ci        trn2            v5.2s, v3.2s, v5.2s
1302cabdff1aSopenharmony_ci        trn1            v3.2s, v6.2s, v7.2s
1303cabdff1aSopenharmony_ci        trn2            v7.2s, v6.2s, v7.2s
1304cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 v2, v3, v1, v4, v2, v5, v3, v7, v28
1305cabdff1aSopenharmony_ci        st1             {v2.s}[0],  [x0], x1
1306cabdff1aSopenharmony_ci        st1             {v3.s}[0],  [x0], x1
1307cabdff1aSopenharmony_ci        st1             {v2.s}[1],  [x0], x1
1308cabdff1aSopenharmony_ci        st1             {v3.s}[1],  [x0], x1
1309cabdff1aSopenharmony_ci        subs            w4,  w4,  #4
1310cabdff1aSopenharmony_ci        b.ne            2b
1311cabdff1aSopenharmony_ci
1312cabdff1aSopenharmony_ci        add             sp,  sp,  #52
1313cabdff1aSopenharmony_ci        ret
1314cabdff1aSopenharmony_ciendfunc
1315cabdff1aSopenharmony_ci
1316cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h4v6_neon, export=1
1317cabdff1aSopenharmony_ci        sub             x2,  x2,  x3,  lsl #1
1318cabdff1aSopenharmony_ci        sub             x2,  x2,  #1
1319cabdff1aSopenharmony_ci
1320cabdff1aSopenharmony_ci        movrel          x7,  subpel_filters, -16
1321cabdff1aSopenharmony_ci        add             x5,  x7,  w5, uxtw #4
1322cabdff1aSopenharmony_ci        ld1             {v0.8h},       [x5]
1323cabdff1aSopenharmony_ci
1324cabdff1aSopenharmony_ci        sub             sp,  sp,  #52
1325cabdff1aSopenharmony_ci        add             w8,  w4,  #5
1326cabdff1aSopenharmony_ci        mov             x9,  sp
1327cabdff1aSopenharmony_ci1:
1328cabdff1aSopenharmony_ci        ld1             {v2.8b},       [x2], x3
1329cabdff1aSopenharmony_ci        vp8_epel8_h4    v2,  v2,  v2
1330cabdff1aSopenharmony_ci        st1             {v2.s}[0],     [x9], #4
1331cabdff1aSopenharmony_ci        subs            w8,  w8,  #1
1332cabdff1aSopenharmony_ci        b.ne            1b
1333cabdff1aSopenharmony_ci
1334cabdff1aSopenharmony_ci        add             x6,  x7,  w6, uxtw #4
1335cabdff1aSopenharmony_ci        ld1             {v0.8h},       [x6]
1336cabdff1aSopenharmony_ci        mov             x9,  sp
1337cabdff1aSopenharmony_ci2:
1338cabdff1aSopenharmony_ci        ld1             {v2.8b,v3.8b}, [x9], #16
1339cabdff1aSopenharmony_ci        ld1             {v6.8b},       [x9], #8
1340cabdff1aSopenharmony_ci        ld1r            {v28.2s},      [x9]
1341cabdff1aSopenharmony_ci        sub             x9,  x9,  #16
1342cabdff1aSopenharmony_ci        ld1             {v4.8b,v5.8b}, [x9], #16
1343cabdff1aSopenharmony_ci        ld1             {v7.8b},       [x9], #8
1344cabdff1aSopenharmony_ci        ld1             {v28.s}[1],    [x9]
1345cabdff1aSopenharmony_ci        sub             x9,  x9,  #16
1346cabdff1aSopenharmony_ci        trn1            v1.2s, v2.2s, v4.2s
1347cabdff1aSopenharmony_ci        trn2            v4.2s, v2.2s, v4.2s
1348cabdff1aSopenharmony_ci        trn1            v2.2s, v3.2s, v5.2s
1349cabdff1aSopenharmony_ci        trn2            v5.2s, v3.2s, v5.2s
1350cabdff1aSopenharmony_ci        trn1            v3.2s, v6.2s, v7.2s
1351cabdff1aSopenharmony_ci        trn2            v7.2s, v6.2s, v7.2s
1352cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 v2, v3, v1, v4, v2, v5, v3, v7, v28
1353cabdff1aSopenharmony_ci        st1             {v2.s}[0],  [x0], x1
1354cabdff1aSopenharmony_ci        st1             {v3.s}[0],  [x0], x1
1355cabdff1aSopenharmony_ci        st1             {v2.s}[1],  [x0], x1
1356cabdff1aSopenharmony_ci        st1             {v3.s}[1],  [x0], x1
1357cabdff1aSopenharmony_ci        subs            w4,  w4,  #4
1358cabdff1aSopenharmony_ci        b.ne            2b
1359cabdff1aSopenharmony_ci
1360cabdff1aSopenharmony_ci        add             sp,  sp,  #52
1361cabdff1aSopenharmony_ci        ret
1362cabdff1aSopenharmony_ciendfunc
1363cabdff1aSopenharmony_ci
1364cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h6v4_neon, export=1
1365cabdff1aSopenharmony_ci        sub             x2,  x2,  x3
1366cabdff1aSopenharmony_ci        sub             x2,  x2,  #2
1367cabdff1aSopenharmony_ci
1368cabdff1aSopenharmony_ci        movrel          x7,  subpel_filters, -16
1369cabdff1aSopenharmony_ci        add             x5,  x7,  w5, uxtw #4
1370cabdff1aSopenharmony_ci        ld1             {v0.8h},       [x5]
1371cabdff1aSopenharmony_ci
1372cabdff1aSopenharmony_ci        sub             sp,  sp,  #44
1373cabdff1aSopenharmony_ci        add             w8,  w4,  #3
1374cabdff1aSopenharmony_ci        mov             x9,  sp
1375cabdff1aSopenharmony_ci1:
1376cabdff1aSopenharmony_ci        ld1             {v2.8b,v3.8b}, [x2], x3
1377cabdff1aSopenharmony_ci        vp8_epel8_h6    v2, v2, v3
1378cabdff1aSopenharmony_ci        st1             {v2.s}[0],     [x9], #4
1379cabdff1aSopenharmony_ci        subs            w8,  w8,  #1
1380cabdff1aSopenharmony_ci        b.ne            1b
1381cabdff1aSopenharmony_ci
1382cabdff1aSopenharmony_ci        add             x6,  x7,  w6, uxtw #4
1383cabdff1aSopenharmony_ci        ld1             {v0.8h},       [x6]
1384cabdff1aSopenharmony_ci        mov             x9,  sp
1385cabdff1aSopenharmony_ci2:
1386cabdff1aSopenharmony_ci        ld1             {v2.8b,v3.8b}, [x9], #16
1387cabdff1aSopenharmony_ci        ld1r            {v6.2s},       [x9]
1388cabdff1aSopenharmony_ci        sub             x9,  x9,  #8
1389cabdff1aSopenharmony_ci        ld1             {v4.8b,v5.8b}, [x9], #16
1390cabdff1aSopenharmony_ci        ld1             {v6.s}[1],     [x9]
1391cabdff1aSopenharmony_ci        sub             x9,  x9,  #8
1392cabdff1aSopenharmony_ci        trn1            v1.2s, v2.2s, v4.2s
1393cabdff1aSopenharmony_ci        trn2            v4.2s, v2.2s, v4.2s
1394cabdff1aSopenharmony_ci        trn1            v2.2s, v3.2s, v5.2s
1395cabdff1aSopenharmony_ci        trn2            v5.2s, v3.2s, v5.2s
1396cabdff1aSopenharmony_ci        vp8_epel8_v4_y2 v1, v1, v4, v2, v5, v6
1397cabdff1aSopenharmony_ci        st1             {v1.s}[0],  [x0], x1
1398cabdff1aSopenharmony_ci        st1             {v1.s}[2],  [x0], x1
1399cabdff1aSopenharmony_ci        st1             {v1.s}[1],  [x0], x1
1400cabdff1aSopenharmony_ci        st1             {v1.s}[3],  [x0], x1
1401cabdff1aSopenharmony_ci        subs            w4,  w4,  #4
1402cabdff1aSopenharmony_ci        b.ne            2b
1403cabdff1aSopenharmony_ci
1404cabdff1aSopenharmony_ci        add             sp,  sp,  #44
1405cabdff1aSopenharmony_ci        ret
1406cabdff1aSopenharmony_ciendfunc
1407cabdff1aSopenharmony_ci
1408cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h4_neon, export=1
1409cabdff1aSopenharmony_ci        sub             x2,  x2,  #1
1410cabdff1aSopenharmony_ci
1411cabdff1aSopenharmony_ci        movrel          x7,  subpel_filters, -16
1412cabdff1aSopenharmony_ci        add             x5,  x7,  w5, uxtw #4
1413cabdff1aSopenharmony_ci        ld1             {v0.8h},    [x5]
1414cabdff1aSopenharmony_ci1:
1415cabdff1aSopenharmony_ci        ld1             {v2.8b},    [x2], x3
1416cabdff1aSopenharmony_ci        vp8_epel8_h4    v2,  v2,  v2
1417cabdff1aSopenharmony_ci        st1             {v2.s}[0],  [x0], x1
1418cabdff1aSopenharmony_ci        subs            w4,  w4,  #1
1419cabdff1aSopenharmony_ci        b.ne            1b
1420cabdff1aSopenharmony_ci
1421cabdff1aSopenharmony_ci        ret
1422cabdff1aSopenharmony_ciendfunc
1423cabdff1aSopenharmony_ci
1424cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_v4_neon, export=1
1425cabdff1aSopenharmony_ci        sub             x2,  x2,  x3
1426cabdff1aSopenharmony_ci
1427cabdff1aSopenharmony_ci        movrel          x7,  subpel_filters, -16
1428cabdff1aSopenharmony_ci        add             x6,  x7,  w6, uxtw #4
1429cabdff1aSopenharmony_ci        ld1             {v0.8h},   [x6]
1430cabdff1aSopenharmony_ci1:
1431cabdff1aSopenharmony_ci        ld1r            {v2.2s},   [x2], x3
1432cabdff1aSopenharmony_ci        ld1r            {v3.2s},   [x2], x3
1433cabdff1aSopenharmony_ci        ld1r            {v4.2s},   [x2], x3
1434cabdff1aSopenharmony_ci        ld1r            {v5.2s},   [x2], x3
1435cabdff1aSopenharmony_ci        ld1r            {v6.2s},   [x2]
1436cabdff1aSopenharmony_ci        sub             x2,  x2,  x3,  lsl #1
1437cabdff1aSopenharmony_ci        ld1             {v2.s}[1], [x2], x3
1438cabdff1aSopenharmony_ci        ld1             {v3.s}[1], [x2], x3
1439cabdff1aSopenharmony_ci        ld1             {v4.s}[1], [x2], x3
1440cabdff1aSopenharmony_ci        ld1             {v5.s}[1], [x2], x3
1441cabdff1aSopenharmony_ci        ld1             {v6.s}[1], [x2]
1442cabdff1aSopenharmony_ci        sub             x2,  x2,  x3,  lsl #1
1443cabdff1aSopenharmony_ci
1444cabdff1aSopenharmony_ci        vp8_epel8_v4_y2 v2, v2, v3, v4, v5, v6
1445cabdff1aSopenharmony_ci
1446cabdff1aSopenharmony_ci        st1             {v2.s}[0], [x0], x1
1447cabdff1aSopenharmony_ci        st1             {v2.s}[2], [x0], x1
1448cabdff1aSopenharmony_ci        st1             {v2.s}[1], [x0], x1
1449cabdff1aSopenharmony_ci        st1             {v2.s}[3], [x0], x1
1450cabdff1aSopenharmony_ci        subs            w4,  w4,  #4
1451cabdff1aSopenharmony_ci        b.ne            1b
1452cabdff1aSopenharmony_ci
1453cabdff1aSopenharmony_ci        ret
1454cabdff1aSopenharmony_ciendfunc
1455cabdff1aSopenharmony_ci
1456cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h4v4_neon, export=1
1457cabdff1aSopenharmony_ci        sub             x2,  x2,  x3
1458cabdff1aSopenharmony_ci        sub             x2,  x2,  #1
1459cabdff1aSopenharmony_ci
1460cabdff1aSopenharmony_ci        movrel          x7,  subpel_filters, -16
1461cabdff1aSopenharmony_ci        add             x5,  x7,  w5, uxtw #4
1462cabdff1aSopenharmony_ci        ld1             {v0.8h},       [x5]
1463cabdff1aSopenharmony_ci
1464cabdff1aSopenharmony_ci        sub             sp,  sp,  #44
1465cabdff1aSopenharmony_ci        add             w8,  w4,  #3
1466cabdff1aSopenharmony_ci        mov             x9,  sp
1467cabdff1aSopenharmony_ci1:
1468cabdff1aSopenharmony_ci        ld1             {v2.8b},       [x2], x3
1469cabdff1aSopenharmony_ci        vp8_epel8_h4    v2,  v2,  v3
1470cabdff1aSopenharmony_ci        st1             {v2.s}[0],     [x9], #4
1471cabdff1aSopenharmony_ci        subs            w8,  w8,  #1
1472cabdff1aSopenharmony_ci        b.ne            1b
1473cabdff1aSopenharmony_ci
1474cabdff1aSopenharmony_ci        add             x6,  x7,  w6, uxtw #4
1475cabdff1aSopenharmony_ci        ld1             {v0.8h},       [x6]
1476cabdff1aSopenharmony_ci        mov             x9,  sp
1477cabdff1aSopenharmony_ci2:
1478cabdff1aSopenharmony_ci        ld1             {v2.8b,v3.8b}, [x9], #16
1479cabdff1aSopenharmony_ci        ld1r            {v6.2s},       [x9]
1480cabdff1aSopenharmony_ci        sub             x9,  x9,  #8
1481cabdff1aSopenharmony_ci        ld1             {v4.8b,v5.8b}, [x9], #16
1482cabdff1aSopenharmony_ci        ld1             {v6.s}[1],     [x9]
1483cabdff1aSopenharmony_ci        sub             x9,  x9,  #8
1484cabdff1aSopenharmony_ci        trn1            v1.2s, v2.2s, v4.2s
1485cabdff1aSopenharmony_ci        trn2            v4.2s, v2.2s, v4.2s
1486cabdff1aSopenharmony_ci        trn1            v2.2s, v3.2s, v5.2s
1487cabdff1aSopenharmony_ci        trn2            v5.2s, v3.2s, v5.2s
1488cabdff1aSopenharmony_ci        vp8_epel8_v4_y2 v1, v1, v4, v2, v5, v6
1489cabdff1aSopenharmony_ci        st1             {v1.s}[0], [x0], x1
1490cabdff1aSopenharmony_ci        st1             {v1.s}[2], [x0], x1
1491cabdff1aSopenharmony_ci        st1             {v1.s}[1], [x0], x1
1492cabdff1aSopenharmony_ci        st1             {v1.s}[3], [x0], x1
1493cabdff1aSopenharmony_ci        subs            w4,  w4,  #4
1494cabdff1aSopenharmony_ci        b.ne            2b
1495cabdff1aSopenharmony_ci
1496cabdff1aSopenharmony_ci        add             sp,  sp,  #44
1497cabdff1aSopenharmony_ci        ret
1498cabdff1aSopenharmony_ciendfunc
1499cabdff1aSopenharmony_ci
1500cabdff1aSopenharmony_ci/* Bilinear MC */
1501cabdff1aSopenharmony_ci
1502cabdff1aSopenharmony_cifunction ff_put_vp8_bilin16_h_neon, export=1
1503cabdff1aSopenharmony_ci        mov             w7,     #8
1504cabdff1aSopenharmony_ci        dup             v0.8b,  w5
1505cabdff1aSopenharmony_ci        sub             w5,     w7,     w5
1506cabdff1aSopenharmony_ci        dup             v1.8b,  w5
1507cabdff1aSopenharmony_ci1:
1508cabdff1aSopenharmony_ci        subs            w4,     w4,     #2
1509cabdff1aSopenharmony_ci        ld1             {v2.8b,v3.8b,v4.8b},    [x2], x3
1510cabdff1aSopenharmony_ci        ext             v5.8b,  v3.8b,  v4.8b,  #1
1511cabdff1aSopenharmony_ci        ext             v4.8b,  v2.8b,  v3.8b,  #1
1512cabdff1aSopenharmony_ci        umull           v16.8h, v2.8b,  v1.8b
1513cabdff1aSopenharmony_ci        umlal           v16.8h, v4.8b,  v0.8b
1514cabdff1aSopenharmony_ci        ld1             {v18.8b,v19.8b,v20.8b}, [x2], x3
1515cabdff1aSopenharmony_ci        umull           v6.8h,  v3.8b,  v1.8b
1516cabdff1aSopenharmony_ci        umlal           v6.8h,  v5.8b,  v0.8b
1517cabdff1aSopenharmony_ci        ext             v21.8b, v19.8b, v20.8b, #1
1518cabdff1aSopenharmony_ci        ext             v20.8b, v18.8b, v19.8b, #1
1519cabdff1aSopenharmony_ci        umull           v22.8h, v18.8b, v1.8b
1520cabdff1aSopenharmony_ci        umlal           v22.8h, v20.8b, v0.8b
1521cabdff1aSopenharmony_ci        umull           v24.8h, v19.8b, v1.8b
1522cabdff1aSopenharmony_ci        umlal           v24.8h, v21.8b, v0.8b
1523cabdff1aSopenharmony_ci        rshrn           v4.8b,  v16.8h, #3
1524cabdff1aSopenharmony_ci        rshrn2          v4.16b, v6.8h,  #3
1525cabdff1aSopenharmony_ci        rshrn           v6.8b,  v22.8h, #3
1526cabdff1aSopenharmony_ci        rshrn2          v6.16b, v24.8h, #3
1527cabdff1aSopenharmony_ci        st1             {v4.16b}, [x0], x1
1528cabdff1aSopenharmony_ci        st1             {v6.16b}, [x0], x1
1529cabdff1aSopenharmony_ci        b.gt            1b
1530cabdff1aSopenharmony_ci
1531cabdff1aSopenharmony_ci        ret
1532cabdff1aSopenharmony_ciendfunc
1533cabdff1aSopenharmony_ci
1534cabdff1aSopenharmony_cifunction ff_put_vp8_bilin16_v_neon, export=1
1535cabdff1aSopenharmony_ci        mov             w7,     #8
1536cabdff1aSopenharmony_ci        dup             v0.16b, w6
1537cabdff1aSopenharmony_ci        sub             w6,     w7,     w6
1538cabdff1aSopenharmony_ci        dup             v1.16b, w6
1539cabdff1aSopenharmony_ci
1540cabdff1aSopenharmony_ci        ld1             {v2.16b}, [x2], x3
1541cabdff1aSopenharmony_ci1:
1542cabdff1aSopenharmony_ci        subs            w4,     w4,     #2
1543cabdff1aSopenharmony_ci        ld1             {v4.16b}, [x2], x3
1544cabdff1aSopenharmony_ci        umull           v6.8h,  v2.8b,  v1.8b
1545cabdff1aSopenharmony_ci        umlal           v6.8h,  v4.8b,  v0.8b
1546cabdff1aSopenharmony_ci        umull2          v16.8h, v2.16b, v1.16b
1547cabdff1aSopenharmony_ci        umlal2          v16.8h, v4.16b, v0.16b
1548cabdff1aSopenharmony_ci        ld1             {v2.16b}, [x2], x3
1549cabdff1aSopenharmony_ci        umull           v18.8h, v4.8b,  v1.8b
1550cabdff1aSopenharmony_ci        umlal           v18.8h, v2.8b,  v0.8b
1551cabdff1aSopenharmony_ci        umull2          v20.8h, v4.16b, v1.16b
1552cabdff1aSopenharmony_ci        umlal2          v20.8h, v2.16b, v0.16b
1553cabdff1aSopenharmony_ci        rshrn           v4.8b,  v6.8h,  #3
1554cabdff1aSopenharmony_ci        rshrn2          v4.16b, v16.8h, #3
1555cabdff1aSopenharmony_ci        rshrn           v6.8b,  v18.8h, #3
1556cabdff1aSopenharmony_ci        rshrn2          v6.16b, v20.8h, #3
1557cabdff1aSopenharmony_ci        st1             {v4.16b}, [x0], x1
1558cabdff1aSopenharmony_ci        st1             {v6.16b}, [x0], x1
1559cabdff1aSopenharmony_ci        b.gt            1b
1560cabdff1aSopenharmony_ci
1561cabdff1aSopenharmony_ci        ret
1562cabdff1aSopenharmony_ciendfunc
1563cabdff1aSopenharmony_ci
1564cabdff1aSopenharmony_cifunction ff_put_vp8_bilin16_hv_neon, export=1
1565cabdff1aSopenharmony_ci        mov             w7,      #8
1566cabdff1aSopenharmony_ci        dup             v0.8b,   w5            // mx
1567cabdff1aSopenharmony_ci        sub             w5,      w7,     w5
1568cabdff1aSopenharmony_ci        dup             v1.8b,   w5
1569cabdff1aSopenharmony_ci        dup             v2.16b,  w6            // my
1570cabdff1aSopenharmony_ci        sub             w6,      w7,     w6
1571cabdff1aSopenharmony_ci        dup             v3.16b,  w6
1572cabdff1aSopenharmony_ci
1573cabdff1aSopenharmony_ci        ld1             {v4.8b,v5.8b,v6.8b},    [x2], x3
1574cabdff1aSopenharmony_ci
1575cabdff1aSopenharmony_ci        ext             v7.8b,   v5.8b,  v6.8b, #1
1576cabdff1aSopenharmony_ci        ext             v6.8b,   v4.8b,  v5.8b, #1
1577cabdff1aSopenharmony_ci        umull           v16.8h,  v4.8b,  v1.8b
1578cabdff1aSopenharmony_ci        umlal           v16.8h,  v6.8b,  v0.8b
1579cabdff1aSopenharmony_ci        umull           v18.8h,  v5.8b,  v1.8b
1580cabdff1aSopenharmony_ci        umlal           v18.8h,  v7.8b,  v0.8b
1581cabdff1aSopenharmony_ci        rshrn           v4.8b,   v16.8h, #3
1582cabdff1aSopenharmony_ci        rshrn2          v4.16b,  v18.8h, #3
1583cabdff1aSopenharmony_ci1:
1584cabdff1aSopenharmony_ci        subs            w4,  w4,  #2
1585cabdff1aSopenharmony_ci        ld1             {v18.8b,v19.8b,v20.8b},  [x2], x3
1586cabdff1aSopenharmony_ci        ext             v21.8b,  v19.8b, v20.8b, #1
1587cabdff1aSopenharmony_ci        ext             v20.8b,  v18.8b, v19.8b, #1
1588cabdff1aSopenharmony_ci        umull           v22.8h,  v18.8b, v1.8b
1589cabdff1aSopenharmony_ci        umlal           v22.8h,  v20.8b, v0.8b
1590cabdff1aSopenharmony_ci        ld1             {v26.8b,v27.8b,v28.8b},  [x2], x3
1591cabdff1aSopenharmony_ci        umull           v24.8h,  v19.8b, v1.8b
1592cabdff1aSopenharmony_ci        umlal           v24.8h,  v21.8b, v0.8b
1593cabdff1aSopenharmony_ci        ext             v29.8b,  v27.8b, v28.8b, #1
1594cabdff1aSopenharmony_ci        ext             v28.8b,  v26.8b, v27.8b, #1
1595cabdff1aSopenharmony_ci        umull           v16.8h,  v26.8b, v1.8b
1596cabdff1aSopenharmony_ci        umlal           v16.8h,  v28.8b, v0.8b
1597cabdff1aSopenharmony_ci        umull           v18.8h,  v27.8b, v1.8b
1598cabdff1aSopenharmony_ci        umlal           v18.8h,  v29.8b, v0.8b
1599cabdff1aSopenharmony_ci        rshrn           v6.8b,   v22.8h, #3
1600cabdff1aSopenharmony_ci        rshrn2          v6.16b,  v24.8h, #3
1601cabdff1aSopenharmony_ci        umull           v24.8h,  v4.8b,  v3.8b
1602cabdff1aSopenharmony_ci        umlal           v24.8h,  v6.8b,  v2.8b
1603cabdff1aSopenharmony_ci        umull2          v30.8h,  v4.16b, v3.16b
1604cabdff1aSopenharmony_ci        umlal2          v30.8h,  v6.16b, v2.16b
1605cabdff1aSopenharmony_ci        rshrn           v4.8b,   v16.8h, #3
1606cabdff1aSopenharmony_ci        rshrn2          v4.16b,  v18.8h, #3
1607cabdff1aSopenharmony_ci        umull           v20.8h,  v6.8b,  v3.8b
1608cabdff1aSopenharmony_ci        umlal           v20.8h,  v4.8b,  v2.8b
1609cabdff1aSopenharmony_ci        umull2          v22.8h,  v6.16b, v3.16b
1610cabdff1aSopenharmony_ci        umlal2          v22.8h,  v4.16b, v2.16b
1611cabdff1aSopenharmony_ci        rshrn           v24.8b,  v24.8h, #3
1612cabdff1aSopenharmony_ci        rshrn2          v24.16b, v30.8h, #3
1613cabdff1aSopenharmony_ci        st1             {v24.16b}, [x0], x1
1614cabdff1aSopenharmony_ci        rshrn           v20.8b,  v20.8h, #3
1615cabdff1aSopenharmony_ci        rshrn2          v20.16b, v22.8h, #3
1616cabdff1aSopenharmony_ci        st1             {v20.16b}, [x0], x1
1617cabdff1aSopenharmony_ci        b.gt            1b
1618cabdff1aSopenharmony_ci
1619cabdff1aSopenharmony_ci        ret
1620cabdff1aSopenharmony_ciendfunc
1621cabdff1aSopenharmony_ci
1622cabdff1aSopenharmony_cifunction ff_put_vp8_bilin8_h_neon, export=1
1623cabdff1aSopenharmony_ci        mov             w7,     #8
1624cabdff1aSopenharmony_ci        dup             v0.8b,  w5
1625cabdff1aSopenharmony_ci        sub             w5,     w7,     w5
1626cabdff1aSopenharmony_ci        dup             v1.8b,  w5
1627cabdff1aSopenharmony_ci1:
1628cabdff1aSopenharmony_ci        subs            w4,     w4,     #2
1629cabdff1aSopenharmony_ci        ld1             {v2.8b,v3.8b},  [x2],  x3
1630cabdff1aSopenharmony_ci        ext             v3.8b,  v2.8b,  v3.8b, #1
1631cabdff1aSopenharmony_ci        umull           v4.8h,  v2.8b,  v1.8b
1632cabdff1aSopenharmony_ci        umlal           v4.8h,  v3.8b,  v0.8b
1633cabdff1aSopenharmony_ci        ld1             {v6.8b,v7.8b},  [x2],  x3
1634cabdff1aSopenharmony_ci        ext             v7.8b,  v6.8b,  v7.8b, #1
1635cabdff1aSopenharmony_ci        umull           v16.8h, v6.8b,  v1.8b
1636cabdff1aSopenharmony_ci        umlal           v16.8h, v7.8b,  v0.8b
1637cabdff1aSopenharmony_ci        rshrn           v4.8b,  v4.8h,  #3
1638cabdff1aSopenharmony_ci        rshrn           v16.8b, v16.8h, #3
1639cabdff1aSopenharmony_ci        st1             {v4.8b},  [x0], x1
1640cabdff1aSopenharmony_ci        st1             {v16.8b}, [x0], x1
1641cabdff1aSopenharmony_ci        b.gt            1b
1642cabdff1aSopenharmony_ci
1643cabdff1aSopenharmony_ci        ret
1644cabdff1aSopenharmony_ciendfunc
1645cabdff1aSopenharmony_ci
1646cabdff1aSopenharmony_cifunction ff_put_vp8_bilin8_v_neon, export=1
1647cabdff1aSopenharmony_ci        mov             w7,      #8
1648cabdff1aSopenharmony_ci        dup             v0.8b,   w6
1649cabdff1aSopenharmony_ci        sub             w6,      w7,    w6
1650cabdff1aSopenharmony_ci        dup             v1.8b,   w6
1651cabdff1aSopenharmony_ci
1652cabdff1aSopenharmony_ci        ld1             {v2.8b}, [x2],  x3
1653cabdff1aSopenharmony_ci1:
1654cabdff1aSopenharmony_ci        subs            w4,      w4,    #2
1655cabdff1aSopenharmony_ci        ld1             {v3.8b}, [x2],  x3
1656cabdff1aSopenharmony_ci        umull           v4.8h,   v2.8b, v1.8b
1657cabdff1aSopenharmony_ci        umlal           v4.8h,   v3.8b, v0.8b
1658cabdff1aSopenharmony_ci        ld1             {v2.8b}, [x2],  x3
1659cabdff1aSopenharmony_ci        umull           v6.8h,   v3.8b, v1.8b
1660cabdff1aSopenharmony_ci        umlal           v6.8h,   v2.8b, v0.8b
1661cabdff1aSopenharmony_ci        rshrn           v4.8b,   v4.8h, #3
1662cabdff1aSopenharmony_ci        rshrn           v6.8b,   v6.8h, #3
1663cabdff1aSopenharmony_ci        st1             {v4.8b}, [x0],  x1
1664cabdff1aSopenharmony_ci        st1             {v6.8b}, [x0],  x1
1665cabdff1aSopenharmony_ci        b.gt            1b
1666cabdff1aSopenharmony_ci
1667cabdff1aSopenharmony_ci        ret
1668cabdff1aSopenharmony_ciendfunc
1669cabdff1aSopenharmony_ci
1670cabdff1aSopenharmony_cifunction ff_put_vp8_bilin8_hv_neon, export=1
1671cabdff1aSopenharmony_ci        mov             w7,     #8
1672cabdff1aSopenharmony_ci        dup             v0.8b,  w5             // mx
1673cabdff1aSopenharmony_ci        sub             w5,     w7,     w5
1674cabdff1aSopenharmony_ci        dup             v1.8b,  w5
1675cabdff1aSopenharmony_ci        dup             v2.8b,  w6             // my
1676cabdff1aSopenharmony_ci        sub             w6,     w7,     w6
1677cabdff1aSopenharmony_ci        dup             v3.8b,  w6
1678cabdff1aSopenharmony_ci
1679cabdff1aSopenharmony_ci        ld1             {v4.8b,v5.8b},  [x2],  x3
1680cabdff1aSopenharmony_ci        ext             v5.8b,  v4.8b,  v5.8b, #1
1681cabdff1aSopenharmony_ci        umull           v18.8h, v4.8b,  v1.8b
1682cabdff1aSopenharmony_ci        umlal           v18.8h, v5.8b,  v0.8b
1683cabdff1aSopenharmony_ci        rshrn           v22.8b, v18.8h, #3
1684cabdff1aSopenharmony_ci1:
1685cabdff1aSopenharmony_ci        subs            w4,     w4,     #2
1686cabdff1aSopenharmony_ci        ld1             {v6.8b,v7.8b},  [x2],  x3
1687cabdff1aSopenharmony_ci        ext             v7.8b,  v6.8b,  v7.8b, #1
1688cabdff1aSopenharmony_ci        umull           v16.8h, v6.8b,  v1.8b
1689cabdff1aSopenharmony_ci        umlal           v16.8h, v7.8b,  v0.8b
1690cabdff1aSopenharmony_ci        ld1             {v4.8b,v5.8b},  [x2],  x3
1691cabdff1aSopenharmony_ci        ext             v5.8b,  v4.8b,  v5.8b, #1
1692cabdff1aSopenharmony_ci        umull           v18.8h, v4.8b,  v1.8b
1693cabdff1aSopenharmony_ci        umlal           v18.8h, v5.8b,  v0.8b
1694cabdff1aSopenharmony_ci        rshrn           v16.8b, v16.8h, #3
1695cabdff1aSopenharmony_ci        umull           v20.8h, v22.8b, v3.8b
1696cabdff1aSopenharmony_ci        umlal           v20.8h, v16.8b, v2.8b
1697cabdff1aSopenharmony_ci        rshrn           v22.8b, v18.8h, #3
1698cabdff1aSopenharmony_ci        umull           v24.8h, v16.8b, v3.8b
1699cabdff1aSopenharmony_ci        umlal           v24.8h, v22.8b, v2.8b
1700cabdff1aSopenharmony_ci        rshrn           v20.8b, v20.8h, #3
1701cabdff1aSopenharmony_ci        st1             {v20.8b}, [x0], x1
1702cabdff1aSopenharmony_ci        rshrn           v23.8b, v24.8h, #3
1703cabdff1aSopenharmony_ci        st1             {v23.8b}, [x0], x1
1704cabdff1aSopenharmony_ci        b.gt            1b
1705cabdff1aSopenharmony_ci
1706cabdff1aSopenharmony_ci        ret
1707cabdff1aSopenharmony_ciendfunc
1708cabdff1aSopenharmony_ci
1709cabdff1aSopenharmony_cifunction ff_put_vp8_bilin4_h_neon, export=1
1710cabdff1aSopenharmony_ci        mov             w7,      #8
1711cabdff1aSopenharmony_ci        dup             v0.8b,   w5
1712cabdff1aSopenharmony_ci        sub             w5,      w7,     w5
1713cabdff1aSopenharmony_ci        dup             v1.8b,   w5
1714cabdff1aSopenharmony_ci1:
1715cabdff1aSopenharmony_ci        subs            w4,      w4,     #2
1716cabdff1aSopenharmony_ci        ld1             {v2.8b}, [x2],   x3
1717cabdff1aSopenharmony_ci        ext             v3.8b,   v2.8b,  v3.8b,  #1
1718cabdff1aSopenharmony_ci        ld1             {v6.8b}, [x2],   x3
1719cabdff1aSopenharmony_ci        ext             v7.8b,   v6.8b,  v7.8b,  #1
1720cabdff1aSopenharmony_ci        trn1            v2.2s,   v2.2s,  v6.2s
1721cabdff1aSopenharmony_ci        trn1            v3.2s,   v3.2s,  v7.2s
1722cabdff1aSopenharmony_ci        umull           v4.8h,   v2.8b,  v1.8b
1723cabdff1aSopenharmony_ci        umlal           v4.8h,   v3.8b,  v0.8b
1724cabdff1aSopenharmony_ci        rshrn           v4.8b,   v4.8h,  #3
1725cabdff1aSopenharmony_ci        st1             {v4.s}[0], [x0], x1
1726cabdff1aSopenharmony_ci        st1             {v4.s}[1], [x0], x1
1727cabdff1aSopenharmony_ci        b.gt            1b
1728cabdff1aSopenharmony_ci
1729cabdff1aSopenharmony_ci        ret
1730cabdff1aSopenharmony_ciendfunc
1731cabdff1aSopenharmony_ci
1732cabdff1aSopenharmony_cifunction ff_put_vp8_bilin4_v_neon, export=1
1733cabdff1aSopenharmony_ci        mov             w7,     #8
1734cabdff1aSopenharmony_ci        dup             v0.8b,  w6
1735cabdff1aSopenharmony_ci        sub             w6,     w7,  w6
1736cabdff1aSopenharmony_ci        dup             v1.8b,  w6
1737cabdff1aSopenharmony_ci
1738cabdff1aSopenharmony_ci        ld1r            {v2.2s},    [x2], x3
1739cabdff1aSopenharmony_ci1:
1740cabdff1aSopenharmony_ci        ld1r            {v3.2s},   [x2]
1741cabdff1aSopenharmony_ci        ld1             {v2.s}[1], [x2], x3
1742cabdff1aSopenharmony_ci        ld1             {v3.s}[1], [x2], x3
1743cabdff1aSopenharmony_ci        umull           v4.8h,  v2.8b,  v1.8b
1744cabdff1aSopenharmony_ci        umlal           v4.8h,  v3.8b,  v0.8b
1745cabdff1aSopenharmony_ci        trn2            v2.2s,  v3.2s,  v2.2s
1746cabdff1aSopenharmony_ci        rshrn           v4.8b,  v4.8h,  #3
1747cabdff1aSopenharmony_ci        st1             {v4.s}[0], [x0], x1
1748cabdff1aSopenharmony_ci        st1             {v4.s}[1], [x0], x1
1749cabdff1aSopenharmony_ci        subs            w4,     w4,     #2
1750cabdff1aSopenharmony_ci        b.gt            1b
1751cabdff1aSopenharmony_ci
1752cabdff1aSopenharmony_ci        ret
1753cabdff1aSopenharmony_ciendfunc
1754cabdff1aSopenharmony_ci
1755cabdff1aSopenharmony_cifunction ff_put_vp8_bilin4_hv_neon, export=1
1756cabdff1aSopenharmony_ci        mov             w7,      #8
1757cabdff1aSopenharmony_ci        dup             v0.8b,   w5             // mx
1758cabdff1aSopenharmony_ci        sub             w5,      w7,     w5
1759cabdff1aSopenharmony_ci        dup             v1.8b,   w5
1760cabdff1aSopenharmony_ci        dup             v2.8b,   w6             // my
1761cabdff1aSopenharmony_ci        sub             w6,      w7,     w6
1762cabdff1aSopenharmony_ci        dup             v3.8b,   w6
1763cabdff1aSopenharmony_ci
1764cabdff1aSopenharmony_ci        ld1             {v4.8b}, [x2],   x3
1765cabdff1aSopenharmony_ci        ext             v5.8b,   v4.8b,  v4.8b,  #1
1766cabdff1aSopenharmony_ci        umull           v18.8h,  v4.8b,  v1.8b
1767cabdff1aSopenharmony_ci        umlal           v18.8h,  v5.8b,  v0.8b
1768cabdff1aSopenharmony_ci        rshrn           v22.8b,  v18.8h, #3
1769cabdff1aSopenharmony_ci1:
1770cabdff1aSopenharmony_ci        subs            w4,      w4,     #2
1771cabdff1aSopenharmony_ci        ld1             {v6.8b}, [x2],   x3
1772cabdff1aSopenharmony_ci        ext             v7.8b,   v6.8b,  v6.8b,  #1
1773cabdff1aSopenharmony_ci        ld1             {v4.8b}, [x2],   x3
1774cabdff1aSopenharmony_ci        ext             v5.8b,   v4.8b,  v4.8b,  #1
1775cabdff1aSopenharmony_ci        trn1            v6.2s,   v6.2s,  v4.2s
1776cabdff1aSopenharmony_ci        trn1            v7.2s,   v7.2s,  v5.2s
1777cabdff1aSopenharmony_ci        umull           v16.8h,  v6.8b,  v1.8b
1778cabdff1aSopenharmony_ci        umlal           v16.8h,  v7.8b,  v0.8b
1779cabdff1aSopenharmony_ci        rshrn           v16.8b,  v16.8h, #3
1780cabdff1aSopenharmony_ci        umull           v20.8h,  v16.8b, v2.8b
1781cabdff1aSopenharmony_ci        trn1            v22.2s,  v22.2s, v16.2s
1782cabdff1aSopenharmony_ci        umlal           v20.8h,  v22.8b, v3.8b
1783cabdff1aSopenharmony_ci        rev64           v22.2s,  v16.2s
1784cabdff1aSopenharmony_ci        rshrn           v20.8b,  v20.8h, #3
1785cabdff1aSopenharmony_ci        st1             {v20.s}[0], [x0], x1
1786cabdff1aSopenharmony_ci        st1             {v20.s}[1], [x0], x1
1787cabdff1aSopenharmony_ci        b.gt            1b
1788cabdff1aSopenharmony_ci
1789cabdff1aSopenharmony_ci        ret
1790cabdff1aSopenharmony_ciendfunc
1791