1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
3cabdff1aSopenharmony_ci * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci#include "config.h"
23cabdff1aSopenharmony_ci#include "libavutil/aarch64/asm.S"
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_cifunction swri_oldapi_conv_flt_to_s16_neon, export=1
26cabdff1aSopenharmony_cioldapi_conv_flt_to_s16_neon:
27cabdff1aSopenharmony_ci        subs            x2,  x2,  #8
28cabdff1aSopenharmony_ci        ld1             {v0.4s}, [x1],  #16
29cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v0.4s,  #31
30cabdff1aSopenharmony_ci        ld1             {v1.4s}, [x1],  #16
31cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v1.4s,  #31
32cabdff1aSopenharmony_ci        b.eq            3f
33cabdff1aSopenharmony_ci        ands            x12, x2,  #~15
34cabdff1aSopenharmony_ci        b.eq            2f
35cabdff1aSopenharmony_ci1:      subs            x12, x12, #16
36cabdff1aSopenharmony_ci        sqrshrn         v4.4h,  v4.4s,  #16
37cabdff1aSopenharmony_ci        ld1             {v2.4s}, [x1],  #16
38cabdff1aSopenharmony_ci        fcvtzs          v6.4s,  v2.4s,  #31
39cabdff1aSopenharmony_ci        sqrshrn2        v4.8h,  v5.4s,  #16
40cabdff1aSopenharmony_ci        ld1             {v3.4s}, [x1],  #16
41cabdff1aSopenharmony_ci        fcvtzs          v7.4s,  v3.4s,  #31
42cabdff1aSopenharmony_ci        sqrshrn         v6.4h,  v6.4s,  #16
43cabdff1aSopenharmony_ci        st1             {v4.8h}, [x0],  #16
44cabdff1aSopenharmony_ci        sqrshrn2        v6.8h,  v7.4s,  #16
45cabdff1aSopenharmony_ci        ld1             {v0.4s}, [x1],  #16
46cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v0.4s,  #31
47cabdff1aSopenharmony_ci        ld1             {v1.4s}, [x1],  #16
48cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v1.4s,  #31
49cabdff1aSopenharmony_ci        st1             {v6.8h}, [x0],  #16
50cabdff1aSopenharmony_ci        b.ne            1b
51cabdff1aSopenharmony_ci        ands            x2,  x2,  #15
52cabdff1aSopenharmony_ci        b.eq            3f
53cabdff1aSopenharmony_ci2:      ld1             {v2.4s}, [x1],  #16
54cabdff1aSopenharmony_ci        sqrshrn         v4.4h,  v4.4s,  #16
55cabdff1aSopenharmony_ci        fcvtzs          v6.4s,  v2.4s,  #31
56cabdff1aSopenharmony_ci        ld1             {v3.4s}, [x1],  #16
57cabdff1aSopenharmony_ci        sqrshrn2        v4.8h,  v5.4s,  #16
58cabdff1aSopenharmony_ci        fcvtzs          v7.4s,  v3.4s,  #31
59cabdff1aSopenharmony_ci        sqrshrn         v6.4h,  v6.4s,  #16
60cabdff1aSopenharmony_ci        st1             {v4.8h}, [x0],  #16
61cabdff1aSopenharmony_ci        sqrshrn2        v6.8h,  v7.4s,  #16
62cabdff1aSopenharmony_ci        st1             {v6.8h}, [x0]
63cabdff1aSopenharmony_ci        ret
64cabdff1aSopenharmony_ci3:      sqrshrn         v4.4h,  v4.4s,  #16
65cabdff1aSopenharmony_ci        sqrshrn2        v4.8h,  v5.4s,  #16
66cabdff1aSopenharmony_ci        st1             {v4.8h}, [x0]
67cabdff1aSopenharmony_ci        ret
68cabdff1aSopenharmony_ciendfunc
69cabdff1aSopenharmony_ci
70cabdff1aSopenharmony_cifunction swri_oldapi_conv_fltp_to_s16_2ch_neon, export=1
71cabdff1aSopenharmony_cioldapi_conv_fltp_to_s16_2ch_neon:
72cabdff1aSopenharmony_ci        ldp             x4,  x5,  [x1]
73cabdff1aSopenharmony_ci        subs            x2,  x2,  #8
74cabdff1aSopenharmony_ci        ld1             {v0.4s},  [x4], #16
75cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v0.4s,  #31
76cabdff1aSopenharmony_ci        ld1             {v1.4s},  [x4], #16
77cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v1.4s,  #31
78cabdff1aSopenharmony_ci        ld1             {v2.4s},  [x5], #16
79cabdff1aSopenharmony_ci        fcvtzs          v6.4s,  v2.4s,  #31
80cabdff1aSopenharmony_ci        ld1             {v3.4s},  [x5], #16
81cabdff1aSopenharmony_ci        fcvtzs          v7.4s,  v3.4s,  #31
82cabdff1aSopenharmony_ci        b.eq            3f
83cabdff1aSopenharmony_ci        ands            x12, x2,  #~15
84cabdff1aSopenharmony_ci        b.eq            2f
85cabdff1aSopenharmony_ci1:      subs            x12, x12, #16
86cabdff1aSopenharmony_ci        ld1             {v16.4s}, [x4], #16
87cabdff1aSopenharmony_ci        fcvtzs          v20.4s, v16.4s, #31
88cabdff1aSopenharmony_ci        sri             v6.4s,  v4.4s,  #16
89cabdff1aSopenharmony_ci        ld1             {v17.4s}, [x4], #16
90cabdff1aSopenharmony_ci        fcvtzs          v21.4s, v17.4s, #31
91cabdff1aSopenharmony_ci        ld1             {v18.4s}, [x5], #16
92cabdff1aSopenharmony_ci        fcvtzs          v22.4s, v18.4s, #31
93cabdff1aSopenharmony_ci        ld1             {v19.4s}, [x5], #16
94cabdff1aSopenharmony_ci        sri             v7.4s,  v5.4s,  #16
95cabdff1aSopenharmony_ci        st1             {v6.4s},  [x0], #16
96cabdff1aSopenharmony_ci        fcvtzs          v23.4s, v19.4s, #31
97cabdff1aSopenharmony_ci        st1             {v7.4s},  [x0], #16
98cabdff1aSopenharmony_ci        sri             v22.4s, v20.4s, #16
99cabdff1aSopenharmony_ci        ld1             {v0.4s},  [x4], #16
100cabdff1aSopenharmony_ci        sri             v23.4s, v21.4s, #16
101cabdff1aSopenharmony_ci        st1             {v22.4s}, [x0], #16
102cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v0.4s,  #31
103cabdff1aSopenharmony_ci        ld1             {v1.4s},  [x4], #16
104cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v1.4s,  #31
105cabdff1aSopenharmony_ci        ld1             {v2.4s},  [x5], #16
106cabdff1aSopenharmony_ci        fcvtzs          v6.4s,  v2.4s,  #31
107cabdff1aSopenharmony_ci        ld1             {v3.4s},  [x5], #16
108cabdff1aSopenharmony_ci        fcvtzs          v7.4s,  v3.4s,  #31
109cabdff1aSopenharmony_ci        st1             {v23.4s}, [x0], #16
110cabdff1aSopenharmony_ci        b.ne            1b
111cabdff1aSopenharmony_ci        ands            x2,  x2,  #15
112cabdff1aSopenharmony_ci        b.eq            3f
113cabdff1aSopenharmony_ci2:      sri             v6.4s,  v4.4s,  #16
114cabdff1aSopenharmony_ci        ld1             {v0.4s},  [x4], #16
115cabdff1aSopenharmony_ci        fcvtzs          v0.4s,  v0.4s,  #31
116cabdff1aSopenharmony_ci        ld1             {v1.4s},  [x4], #16
117cabdff1aSopenharmony_ci        fcvtzs          v1.4s,  v1.4s,  #31
118cabdff1aSopenharmony_ci        ld1             {v2.4s},  [x5], #16
119cabdff1aSopenharmony_ci        fcvtzs          v2.4s,  v2.4s,  #31
120cabdff1aSopenharmony_ci        sri             v7.4s,  v5.4s,  #16
121cabdff1aSopenharmony_ci        ld1             {v3.4s},  [x5], #16
122cabdff1aSopenharmony_ci        fcvtzs          v3.4s,  v3.4s,  #31
123cabdff1aSopenharmony_ci        sri             v2.4s,  v0.4s,  #16
124cabdff1aSopenharmony_ci        st1             {v6.4s,v7.4s},  [x0], #32
125cabdff1aSopenharmony_ci        sri             v3.4s,  v1.4s,  #16
126cabdff1aSopenharmony_ci        st1             {v2.4s,v3.4s},  [x0], #32
127cabdff1aSopenharmony_ci        ret
128cabdff1aSopenharmony_ci3:      sri             v6.4s,  v4.4s,  #16
129cabdff1aSopenharmony_ci        sri             v7.4s,  v5.4s,  #16
130cabdff1aSopenharmony_ci        st1             {v6.4s,v7.4s},  [x0]
131cabdff1aSopenharmony_ci        ret
132cabdff1aSopenharmony_ciendfunc
133cabdff1aSopenharmony_ci
134cabdff1aSopenharmony_cifunction swri_oldapi_conv_fltp_to_s16_nch_neon, export=1
135cabdff1aSopenharmony_ci        cmp             w3,  #2
136cabdff1aSopenharmony_ci        b.eq            oldapi_conv_fltp_to_s16_2ch_neon
137cabdff1aSopenharmony_ci        b.gt            1f
138cabdff1aSopenharmony_ci        ldr             x1,  [x1]
139cabdff1aSopenharmony_ci        b               oldapi_conv_flt_to_s16_neon
140cabdff1aSopenharmony_ci1:
141cabdff1aSopenharmony_ci        cmp             w3,  #4
142cabdff1aSopenharmony_ci        lsl             x12, x3,  #1
143cabdff1aSopenharmony_ci        b.lt            4f
144cabdff1aSopenharmony_ci
145cabdff1aSopenharmony_ci5:      // 4 channels
146cabdff1aSopenharmony_ci        ldp             x4, x5, [x1], #16
147cabdff1aSopenharmony_ci        ldp             x6, x7, [x1], #16
148cabdff1aSopenharmony_ci        mov             w9,  w2
149cabdff1aSopenharmony_ci        mov             x8,  x0
150cabdff1aSopenharmony_ci        ld1             {v4.4s},        [x4], #16
151cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v4.4s,  #31
152cabdff1aSopenharmony_ci        ld1             {v5.4s},        [x5], #16
153cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v5.4s,  #31
154cabdff1aSopenharmony_ci        ld1             {v6.4s},        [x6], #16
155cabdff1aSopenharmony_ci        fcvtzs          v6.4s, v6.4s, #31
156cabdff1aSopenharmony_ci        ld1             {v7.4s},        [x7], #16
157cabdff1aSopenharmony_ci        fcvtzs          v7.4s, v7.4s, #31
158cabdff1aSopenharmony_ci6:
159cabdff1aSopenharmony_ci        subs            w9,  w9,  #8
160cabdff1aSopenharmony_ci        ld1             {v0.4s},        [x4], #16
161cabdff1aSopenharmony_ci        fcvtzs          v0.4s,  v0.4s,  #31
162cabdff1aSopenharmony_ci        sri             v5.4s,  v4.4s,  #16
163cabdff1aSopenharmony_ci        ld1             {v1.4s},        [x5], #16
164cabdff1aSopenharmony_ci        fcvtzs          v1.4s,  v1.4s,  #31
165cabdff1aSopenharmony_ci        sri             v7.4s,  v6.4s,  #16
166cabdff1aSopenharmony_ci        ld1             {v2.4s},        [x6], #16
167cabdff1aSopenharmony_ci        fcvtzs          v2.4s,  v2.4s,  #31
168cabdff1aSopenharmony_ci        zip1            v16.4s, v5.4s,  v7.4s
169cabdff1aSopenharmony_ci        ld1             {v3.4s},        [x7], #16
170cabdff1aSopenharmony_ci        fcvtzs          v3.4s,  v3.4s,  #31
171cabdff1aSopenharmony_ci        zip2            v17.4s, v5.4s,  v7.4s
172cabdff1aSopenharmony_ci        st1             {v16.d}[0],     [x8], x12
173cabdff1aSopenharmony_ci        sri             v1.4s,  v0.4s,  #16
174cabdff1aSopenharmony_ci        st1             {v16.d}[1],     [x8], x12
175cabdff1aSopenharmony_ci        sri             v3.4s,  v2.4s,  #16
176cabdff1aSopenharmony_ci        st1             {v17.d}[0],     [x8], x12
177cabdff1aSopenharmony_ci        zip1            v18.4s, v1.4s,  v3.4s
178cabdff1aSopenharmony_ci        st1             {v17.d}[1],     [x8], x12
179cabdff1aSopenharmony_ci        zip2            v19.4s, v1.4s,  v3.4s
180cabdff1aSopenharmony_ci        b.eq            7f
181cabdff1aSopenharmony_ci        ld1             {v4.4s},        [x4], #16
182cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v4.4s,  #31
183cabdff1aSopenharmony_ci        st1             {v18.d}[0],     [x8], x12
184cabdff1aSopenharmony_ci        ld1             {v5.4s},        [x5], #16
185cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v5.4s,  #31
186cabdff1aSopenharmony_ci        st1             {v18.d}[1],     [x8], x12
187cabdff1aSopenharmony_ci        ld1             {v6.4s},    [x6], #16
188cabdff1aSopenharmony_ci        fcvtzs          v6.4s, v6.4s, #31
189cabdff1aSopenharmony_ci        st1             {v19.d}[0],     [x8], x12
190cabdff1aSopenharmony_ci        ld1             {v7.4s},    [x7], #16
191cabdff1aSopenharmony_ci        fcvtzs          v7.4s, v7.4s, #31
192cabdff1aSopenharmony_ci        st1             {v19.d}[1],     [x8], x12
193cabdff1aSopenharmony_ci        b               6b
194cabdff1aSopenharmony_ci7:
195cabdff1aSopenharmony_ci        st1             {v18.d}[0],     [x8], x12
196cabdff1aSopenharmony_ci        st1             {v18.d}[1],     [x8], x12
197cabdff1aSopenharmony_ci        st1             {v19.d}[0],     [x8], x12
198cabdff1aSopenharmony_ci        st1             {v19.d}[1],     [x8], x12
199cabdff1aSopenharmony_ci        subs            w3,  w3,  #4
200cabdff1aSopenharmony_ci        b.eq            end
201cabdff1aSopenharmony_ci        cmp             w3,  #4
202cabdff1aSopenharmony_ci        add             x0,  x0,  #8
203cabdff1aSopenharmony_ci        b.ge            5b
204cabdff1aSopenharmony_ci
205cabdff1aSopenharmony_ci4:      // 2 channels
206cabdff1aSopenharmony_ci        cmp             w3,  #2
207cabdff1aSopenharmony_ci        b.lt            4f
208cabdff1aSopenharmony_ci        ldp             x4,  x5,  [x1], #16
209cabdff1aSopenharmony_ci        mov             w9,  w2
210cabdff1aSopenharmony_ci        mov             x8,  x0
211cabdff1aSopenharmony_ci        tst             w9,  #8
212cabdff1aSopenharmony_ci        ld1             {v4.4s},        [x4], #16
213cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v4.4s,  #31
214cabdff1aSopenharmony_ci        ld1             {v5.4s},        [x5], #16
215cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v5.4s,  #31
216cabdff1aSopenharmony_ci        ld1             {v6.4s},        [x4], #16
217cabdff1aSopenharmony_ci        fcvtzs          v6.4s,  v6.4s,  #31
218cabdff1aSopenharmony_ci        ld1             {v7.4s},        [x5], #16
219cabdff1aSopenharmony_ci        fcvtzs          v7.4s,  v7.4s,  #31
220cabdff1aSopenharmony_ci        b.eq            6f
221cabdff1aSopenharmony_ci        subs            w9,  w9,  #8
222cabdff1aSopenharmony_ci        b.eq            7f
223cabdff1aSopenharmony_ci        sri             v5.4s,  v4.4s,  #16
224cabdff1aSopenharmony_ci        ld1             {v4.4s},        [x4], #16
225cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v4.4s,  #31
226cabdff1aSopenharmony_ci        st1             {v5.s}[0],      [x8], x12
227cabdff1aSopenharmony_ci        sri             v7.4s,  v6.4s,  #16
228cabdff1aSopenharmony_ci        st1             {v5.s}[1],      [x8], x12
229cabdff1aSopenharmony_ci        ld1             {v6.4s},        [x4], #16
230cabdff1aSopenharmony_ci        fcvtzs          v6.4s,  v6.4s, #31
231cabdff1aSopenharmony_ci        st1             {v5.s}[2],      [x8], x12
232cabdff1aSopenharmony_ci        st1             {v5.s}[3],      [x8], x12
233cabdff1aSopenharmony_ci        st1             {v7.s}[0],      [x8], x12
234cabdff1aSopenharmony_ci        st1             {v7.s}[1],      [x8], x12
235cabdff1aSopenharmony_ci        ld1             {v5.4s},        [x5], #16
236cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v5.4s,  #31
237cabdff1aSopenharmony_ci        st1             {v7.s}[2],      [x8], x12
238cabdff1aSopenharmony_ci        st1             {v7.s}[3],      [x8], x12
239cabdff1aSopenharmony_ci        ld1             {v7.4s},        [x5], #16
240cabdff1aSopenharmony_ci        fcvtzs          v7.4s,  v7.4s,  #31
241cabdff1aSopenharmony_ci6:
242cabdff1aSopenharmony_ci        subs            w9,  w9,  #16
243cabdff1aSopenharmony_ci        ld1             {v0.4s},        [x4], #16
244cabdff1aSopenharmony_ci        sri             v5.4s,  v4.4s,  #16
245cabdff1aSopenharmony_ci        fcvtzs          v0.4s,  v0.4s,  #31
246cabdff1aSopenharmony_ci        ld1             {v1.4s},        [x5], #16
247cabdff1aSopenharmony_ci        sri             v7.4s,  v6.4s,  #16
248cabdff1aSopenharmony_ci        st1             {v5.s}[0],      [x8], x12
249cabdff1aSopenharmony_ci        st1             {v5.s}[1],      [x8], x12
250cabdff1aSopenharmony_ci        fcvtzs          v1.4s,  v1.4s,  #31
251cabdff1aSopenharmony_ci        st1             {v5.s}[2],      [x8], x12
252cabdff1aSopenharmony_ci        st1             {v5.s}[3],      [x8], x12
253cabdff1aSopenharmony_ci        ld1             {v2.4s},        [x4], #16
254cabdff1aSopenharmony_ci        st1             {v7.s}[0],      [x8], x12
255cabdff1aSopenharmony_ci        fcvtzs          v2.4s,  v2.4s,  #31
256cabdff1aSopenharmony_ci        st1             {v7.s}[1],      [x8], x12
257cabdff1aSopenharmony_ci        ld1             {v3.4s},        [x5], #16
258cabdff1aSopenharmony_ci        st1             {v7.s}[2],      [x8], x12
259cabdff1aSopenharmony_ci        fcvtzs          v3.4s,  v3.4s,  #31
260cabdff1aSopenharmony_ci        st1             {v7.s}[3],      [x8], x12
261cabdff1aSopenharmony_ci        sri             v1.4s,  v0.4s,  #16
262cabdff1aSopenharmony_ci        sri             v3.4s,  v2.4s,  #16
263cabdff1aSopenharmony_ci        b.eq            6f
264cabdff1aSopenharmony_ci        ld1             {v4.4s},        [x4], #16
265cabdff1aSopenharmony_ci        st1             {v1.s}[0],      [x8], x12
266cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v4.4s,  #31
267cabdff1aSopenharmony_ci        st1             {v1.s}[1],      [x8], x12
268cabdff1aSopenharmony_ci        ld1             {v5.4s},        [x5], #16
269cabdff1aSopenharmony_ci        st1             {v1.s}[2],      [x8], x12
270cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v5.4s,  #31
271cabdff1aSopenharmony_ci        st1             {v1.s}[3],      [x8], x12
272cabdff1aSopenharmony_ci        ld1             {v6.4s},        [x4], #16
273cabdff1aSopenharmony_ci        st1             {v3.s}[0],      [x8], x12
274cabdff1aSopenharmony_ci        fcvtzs          v6.4s,  v6.4s,  #31
275cabdff1aSopenharmony_ci        st1             {v3.s}[1],      [x8], x12
276cabdff1aSopenharmony_ci        ld1             {v7.4s},        [x5], #16
277cabdff1aSopenharmony_ci        st1             {v3.s}[2],      [x8], x12
278cabdff1aSopenharmony_ci        fcvtzs          v7.4s,  v7.4s,  #31
279cabdff1aSopenharmony_ci        st1             {v3.s}[3],      [x8], x12
280cabdff1aSopenharmony_ci        b.gt            6b
281cabdff1aSopenharmony_ci6:
282cabdff1aSopenharmony_ci        st1             {v1.s}[0],      [x8], x12
283cabdff1aSopenharmony_ci        st1             {v1.s}[1],      [x8], x12
284cabdff1aSopenharmony_ci        st1             {v1.s}[2],      [x8], x12
285cabdff1aSopenharmony_ci        st1             {v1.s}[3],      [x8], x12
286cabdff1aSopenharmony_ci        st1             {v3.s}[0],      [x8], x12
287cabdff1aSopenharmony_ci        st1             {v3.s}[1],      [x8], x12
288cabdff1aSopenharmony_ci        st1             {v3.s}[2],      [x8], x12
289cabdff1aSopenharmony_ci        st1             {v3.s}[3],      [x8], x12
290cabdff1aSopenharmony_ci        b               8f
291cabdff1aSopenharmony_ci7:
292cabdff1aSopenharmony_ci        sri             v5.4s,  v4.4s,  #16
293cabdff1aSopenharmony_ci        sri             v7.4s,  v6.4s,  #16
294cabdff1aSopenharmony_ci        st1             {v5.s}[0],      [x8], x12
295cabdff1aSopenharmony_ci        st1             {v5.s}[1],      [x8], x12
296cabdff1aSopenharmony_ci        st1             {v5.s}[2],      [x8], x12
297cabdff1aSopenharmony_ci        st1             {v5.s}[3],      [x8], x12
298cabdff1aSopenharmony_ci        st1             {v7.s}[0],      [x8], x12
299cabdff1aSopenharmony_ci        st1             {v7.s}[1],      [x8], x12
300cabdff1aSopenharmony_ci        st1             {v7.s}[2],      [x8], x12
301cabdff1aSopenharmony_ci        st1             {v7.s}[3],      [x8], x12
302cabdff1aSopenharmony_ci8:
303cabdff1aSopenharmony_ci        subs            w3,  w3,  #2
304cabdff1aSopenharmony_ci        add             x0,  x0,  #4
305cabdff1aSopenharmony_ci        b.eq            end
306cabdff1aSopenharmony_ci
307cabdff1aSopenharmony_ci4:      // 1 channel
308cabdff1aSopenharmony_ci        ldr             x4,  [x1]
309cabdff1aSopenharmony_ci        tst             w2,  #8
310cabdff1aSopenharmony_ci        mov             w9,  w2
311cabdff1aSopenharmony_ci        mov             x5,  x0
312cabdff1aSopenharmony_ci        ld1             {v0.4s},        [x4], #16
313cabdff1aSopenharmony_ci        fcvtzs          v0.4s,  v0.4s,  #31
314cabdff1aSopenharmony_ci        ld1             {v1.4s},        [x4], #16
315cabdff1aSopenharmony_ci        fcvtzs          v1.4s,  v1.4s,  #31
316cabdff1aSopenharmony_ci        b.ne            8f
317cabdff1aSopenharmony_ci6:
318cabdff1aSopenharmony_ci        subs            w9,  w9,  #16
319cabdff1aSopenharmony_ci        ld1             {v2.4s},        [x4], #16
320cabdff1aSopenharmony_ci        fcvtzs          v2.4s,  v2.4s,  #31
321cabdff1aSopenharmony_ci        ld1             {v3.4s},        [x4], #16
322cabdff1aSopenharmony_ci        fcvtzs          v3.4s,  v3.4s,  #31
323cabdff1aSopenharmony_ci        st1             {v0.h}[1],      [x5], x12
324cabdff1aSopenharmony_ci        st1             {v0.h}[3],      [x5], x12
325cabdff1aSopenharmony_ci        st1             {v0.h}[5],      [x5], x12
326cabdff1aSopenharmony_ci        st1             {v0.h}[7],      [x5], x12
327cabdff1aSopenharmony_ci        st1             {v1.h}[1],      [x5], x12
328cabdff1aSopenharmony_ci        st1             {v1.h}[3],      [x5], x12
329cabdff1aSopenharmony_ci        st1             {v1.h}[5],      [x5], x12
330cabdff1aSopenharmony_ci        st1             {v1.h}[7],      [x5], x12
331cabdff1aSopenharmony_ci        b.eq            7f
332cabdff1aSopenharmony_ci        ld1             {v0.4s},        [x4], #16
333cabdff1aSopenharmony_ci        fcvtzs          v0.4s,  v0.4s,  #31
334cabdff1aSopenharmony_ci        ld1             {v1.4s},        [x4], #16
335cabdff1aSopenharmony_ci        fcvtzs          v1.4s,  v1.4s,  #31
336cabdff1aSopenharmony_ci7:
337cabdff1aSopenharmony_ci        st1             {v2.h}[1],      [x5], x12
338cabdff1aSopenharmony_ci        st1             {v2.h}[3],      [x5], x12
339cabdff1aSopenharmony_ci        st1             {v2.h}[5],      [x5], x12
340cabdff1aSopenharmony_ci        st1             {v2.h}[7],      [x5], x12
341cabdff1aSopenharmony_ci        st1             {v3.h}[1],      [x5], x12
342cabdff1aSopenharmony_ci        st1             {v3.h}[3],      [x5], x12
343cabdff1aSopenharmony_ci        st1             {v3.h}[5],      [x5], x12
344cabdff1aSopenharmony_ci        st1             {v3.h}[7],      [x5], x12
345cabdff1aSopenharmony_ci        b.gt            6b
346cabdff1aSopenharmony_ci        ret
347cabdff1aSopenharmony_ci8:
348cabdff1aSopenharmony_ci        subs            w9,  w9,  #8
349cabdff1aSopenharmony_ci        st1             {v0.h}[1],      [x5], x12
350cabdff1aSopenharmony_ci        st1             {v0.h}[3],      [x5], x12
351cabdff1aSopenharmony_ci        st1             {v0.h}[5],      [x5], x12
352cabdff1aSopenharmony_ci        st1             {v0.h}[7],      [x5], x12
353cabdff1aSopenharmony_ci        st1             {v1.h}[1],      [x5], x12
354cabdff1aSopenharmony_ci        st1             {v1.h}[3],      [x5], x12
355cabdff1aSopenharmony_ci        st1             {v1.h}[5],      [x5], x12
356cabdff1aSopenharmony_ci        st1             {v1.h}[7],      [x5], x12
357cabdff1aSopenharmony_ci        b.eq            end
358cabdff1aSopenharmony_ci        ld1             {v0.4s},        [x4], #16
359cabdff1aSopenharmony_ci        fcvtzs          v0.4s,  v0.4s,  #31
360cabdff1aSopenharmony_ci        ld1             {v1.4s},        [x4], #16
361cabdff1aSopenharmony_ci        fcvtzs          v1.4s,  v1.4s,  #31
362cabdff1aSopenharmony_ci        b               6b
363cabdff1aSopenharmony_ciend:
364cabdff1aSopenharmony_ci        ret
365cabdff1aSopenharmony_ciendfunc
366