1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
3cabdff1aSopenharmony_ci * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci#include "config.h"
23cabdff1aSopenharmony_ci#include "libavutil/aarch64/asm.S"
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_cifunction ff_conv_flt_to_s16_neon, export=1
26cabdff1aSopenharmony_ci        subs            x2,  x2,  #8
27cabdff1aSopenharmony_ci        ld1             {v0.4s}, [x1],  #16
28cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v0.4s,  #31
29cabdff1aSopenharmony_ci        ld1             {v1.4s}, [x1],  #16
30cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v1.4s,  #31
31cabdff1aSopenharmony_ci        b.eq            3f
32cabdff1aSopenharmony_ci        ands            x12, x2,  #~15
33cabdff1aSopenharmony_ci        b.eq            2f
34cabdff1aSopenharmony_ci1:      subs            x12, x12, #16
35cabdff1aSopenharmony_ci        sqrshrn         v4.4h,  v4.4s,  #16
36cabdff1aSopenharmony_ci        ld1             {v2.4s}, [x1],  #16
37cabdff1aSopenharmony_ci        fcvtzs          v6.4s,  v2.4s,  #31
38cabdff1aSopenharmony_ci        sqrshrn2        v4.8h,  v5.4s,  #16
39cabdff1aSopenharmony_ci        ld1             {v3.4s}, [x1],  #16
40cabdff1aSopenharmony_ci        fcvtzs          v7.4s,  v3.4s,  #31
41cabdff1aSopenharmony_ci        sqrshrn         v6.4h,  v6.4s,  #16
42cabdff1aSopenharmony_ci        st1             {v4.8h}, [x0],  #16
43cabdff1aSopenharmony_ci        sqrshrn2        v6.8h,  v7.4s,  #16
44cabdff1aSopenharmony_ci        ld1             {v0.4s}, [x1],  #16
45cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v0.4s,  #31
46cabdff1aSopenharmony_ci        ld1             {v1.4s}, [x1],  #16
47cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v1.4s,  #31
48cabdff1aSopenharmony_ci        st1             {v6.8h}, [x0],  #16
49cabdff1aSopenharmony_ci        b.ne            1b
50cabdff1aSopenharmony_ci        ands            x2,  x2,  #15
51cabdff1aSopenharmony_ci        b.eq            3f
52cabdff1aSopenharmony_ci2:      ld1             {v2.4s}, [x1],  #16
53cabdff1aSopenharmony_ci        sqrshrn         v4.4h,  v4.4s,  #16
54cabdff1aSopenharmony_ci        fcvtzs          v6.4s,  v2.4s,  #31
55cabdff1aSopenharmony_ci        ld1             {v3.4s}, [x1],  #16
56cabdff1aSopenharmony_ci        sqrshrn2        v4.8h,  v5.4s,  #16
57cabdff1aSopenharmony_ci        fcvtzs          v7.4s,  v3.4s,  #31
58cabdff1aSopenharmony_ci        sqrshrn         v6.4h,  v6.4s,  #16
59cabdff1aSopenharmony_ci        st1             {v4.8h}, [x0],  #16
60cabdff1aSopenharmony_ci        sqrshrn2        v6.8h,  v7.4s,  #16
61cabdff1aSopenharmony_ci        st1             {v6.8h}, [x0]
62cabdff1aSopenharmony_ci        ret
63cabdff1aSopenharmony_ci3:      sqrshrn         v4.4h,  v4.4s,  #16
64cabdff1aSopenharmony_ci        sqrshrn2        v4.8h,  v5.4s,  #16
65cabdff1aSopenharmony_ci        st1             {v4.8h}, [x0]
66cabdff1aSopenharmony_ci        ret
67cabdff1aSopenharmony_ciendfunc
68cabdff1aSopenharmony_ci
69cabdff1aSopenharmony_cifunction ff_conv_fltp_to_s16_2ch_neon, export=1
70cabdff1aSopenharmony_ci        ldp             x4,  x5,  [x1]
71cabdff1aSopenharmony_ci        subs            x2,  x2,  #8
72cabdff1aSopenharmony_ci        ld1             {v0.4s},  [x4], #16
73cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v0.4s,  #31
74cabdff1aSopenharmony_ci        ld1             {v1.4s},  [x4], #16
75cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v1.4s,  #31
76cabdff1aSopenharmony_ci        ld1             {v2.4s},  [x5], #16
77cabdff1aSopenharmony_ci        fcvtzs          v6.4s,  v2.4s,  #31
78cabdff1aSopenharmony_ci        ld1             {v3.4s},  [x5], #16
79cabdff1aSopenharmony_ci        fcvtzs          v7.4s,  v3.4s,  #31
80cabdff1aSopenharmony_ci        b.eq            3f
81cabdff1aSopenharmony_ci        ands            x12, x2,  #~15
82cabdff1aSopenharmony_ci        b.eq            2f
83cabdff1aSopenharmony_ci1:      subs            x12, x12, #16
84cabdff1aSopenharmony_ci        ld1             {v16.4s}, [x4], #16
85cabdff1aSopenharmony_ci        fcvtzs          v20.4s, v16.4s, #31
86cabdff1aSopenharmony_ci        sri             v6.4s,  v4.4s,  #16
87cabdff1aSopenharmony_ci        ld1             {v17.4s}, [x4], #16
88cabdff1aSopenharmony_ci        fcvtzs          v21.4s, v17.4s, #31
89cabdff1aSopenharmony_ci        ld1             {v18.4s}, [x5], #16
90cabdff1aSopenharmony_ci        fcvtzs          v22.4s, v18.4s, #31
91cabdff1aSopenharmony_ci        ld1             {v19.4s}, [x5], #16
92cabdff1aSopenharmony_ci        sri             v7.4s,  v5.4s,  #16
93cabdff1aSopenharmony_ci        st1             {v6.4s},  [x0], #16
94cabdff1aSopenharmony_ci        fcvtzs          v23.4s, v19.4s, #31
95cabdff1aSopenharmony_ci        st1             {v7.4s},  [x0], #16
96cabdff1aSopenharmony_ci        sri             v22.4s, v20.4s, #16
97cabdff1aSopenharmony_ci        ld1             {v0.4s},  [x4], #16
98cabdff1aSopenharmony_ci        sri             v23.4s, v21.4s, #16
99cabdff1aSopenharmony_ci        st1             {v22.4s}, [x0], #16
100cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v0.4s,  #31
101cabdff1aSopenharmony_ci        ld1             {v1.4s},  [x4], #16
102cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v1.4s,  #31
103cabdff1aSopenharmony_ci        ld1             {v2.4s},  [x5], #16
104cabdff1aSopenharmony_ci        fcvtzs          v6.4s,  v2.4s,  #31
105cabdff1aSopenharmony_ci        ld1             {v3.4s},  [x5], #16
106cabdff1aSopenharmony_ci        fcvtzs          v7.4s,  v3.4s,  #31
107cabdff1aSopenharmony_ci        st1             {v23.4s}, [x0], #16
108cabdff1aSopenharmony_ci        b.ne            1b
109cabdff1aSopenharmony_ci        ands            x2,  x2,  #15
110cabdff1aSopenharmony_ci        b.eq            3f
111cabdff1aSopenharmony_ci2:      sri             v6.4s,  v4.4s,  #16
112cabdff1aSopenharmony_ci        ld1             {v0.4s},  [x4], #16
113cabdff1aSopenharmony_ci        fcvtzs          v0.4s,  v0.4s,  #31
114cabdff1aSopenharmony_ci        ld1             {v1.4s},  [x4], #16
115cabdff1aSopenharmony_ci        fcvtzs          v1.4s,  v1.4s,  #31
116cabdff1aSopenharmony_ci        ld1             {v2.4s},  [x5], #16
117cabdff1aSopenharmony_ci        fcvtzs          v2.4s,  v2.4s,  #31
118cabdff1aSopenharmony_ci        sri             v7.4s,  v5.4s,  #16
119cabdff1aSopenharmony_ci        ld1             {v3.4s},  [x5], #16
120cabdff1aSopenharmony_ci        fcvtzs          v3.4s,  v3.4s,  #31
121cabdff1aSopenharmony_ci        sri             v2.4s,  v0.4s,  #16
122cabdff1aSopenharmony_ci        st1             {v6.4s,v7.4s},  [x0], #32
123cabdff1aSopenharmony_ci        sri             v3.4s,  v1.4s,  #16
124cabdff1aSopenharmony_ci        st1             {v2.4s,v3.4s},  [x0], #32
125cabdff1aSopenharmony_ci        ret
126cabdff1aSopenharmony_ci3:      sri             v6.4s,  v4.4s,  #16
127cabdff1aSopenharmony_ci        sri             v7.4s,  v5.4s,  #16
128cabdff1aSopenharmony_ci        st1             {v6.4s,v7.4s},  [x0]
129cabdff1aSopenharmony_ci        ret
130cabdff1aSopenharmony_ciendfunc
131cabdff1aSopenharmony_ci
132cabdff1aSopenharmony_cifunction ff_conv_fltp_to_s16_neon, export=1
133cabdff1aSopenharmony_ci        cmp             w3,  #2
134cabdff1aSopenharmony_ci        b.eq            X(ff_conv_fltp_to_s16_2ch_neon)
135cabdff1aSopenharmony_ci        b.gt            1f
136cabdff1aSopenharmony_ci        ldr             x1,  [x1]
137cabdff1aSopenharmony_ci        b               X(ff_conv_flt_to_s16_neon)
138cabdff1aSopenharmony_ci1:
139cabdff1aSopenharmony_ci        cmp             w3,  #4
140cabdff1aSopenharmony_ci        lsl             x12, x3,  #1
141cabdff1aSopenharmony_ci        b.lt            4f
142cabdff1aSopenharmony_ci
143cabdff1aSopenharmony_ci5:      // 4 channels
144cabdff1aSopenharmony_ci        ldp             x4, x5, [x1], #16
145cabdff1aSopenharmony_ci        ldp             x6, x7, [x1], #16
146cabdff1aSopenharmony_ci        mov             w9,  w2
147cabdff1aSopenharmony_ci        mov             x8,  x0
148cabdff1aSopenharmony_ci        ld1             {v4.4s},        [x4], #16
149cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v4.4s,  #31
150cabdff1aSopenharmony_ci        ld1             {v5.4s},        [x5], #16
151cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v5.4s,  #31
152cabdff1aSopenharmony_ci        ld1             {v6.4s},        [x6], #16
153cabdff1aSopenharmony_ci        fcvtzs          v6.4s, v6.4s, #31
154cabdff1aSopenharmony_ci        ld1             {v7.4s},        [x7], #16
155cabdff1aSopenharmony_ci        fcvtzs          v7.4s, v7.4s, #31
156cabdff1aSopenharmony_ci6:
157cabdff1aSopenharmony_ci        subs            w9,  w9,  #8
158cabdff1aSopenharmony_ci        ld1             {v0.4s},        [x4], #16
159cabdff1aSopenharmony_ci        fcvtzs          v0.4s,  v0.4s,  #31
160cabdff1aSopenharmony_ci        sri             v5.4s,  v4.4s,  #16
161cabdff1aSopenharmony_ci        ld1             {v1.4s},        [x5], #16
162cabdff1aSopenharmony_ci        fcvtzs          v1.4s,  v1.4s,  #31
163cabdff1aSopenharmony_ci        sri             v7.4s,  v6.4s,  #16
164cabdff1aSopenharmony_ci        ld1             {v2.4s},        [x6], #16
165cabdff1aSopenharmony_ci        fcvtzs          v2.4s,  v2.4s,  #31
166cabdff1aSopenharmony_ci        zip1            v16.4s, v5.4s,  v7.4s
167cabdff1aSopenharmony_ci        ld1             {v3.4s},        [x7], #16
168cabdff1aSopenharmony_ci        fcvtzs          v3.4s,  v3.4s,  #31
169cabdff1aSopenharmony_ci        zip2            v17.4s, v5.4s,  v7.4s
170cabdff1aSopenharmony_ci        st1             {v16.d}[0],     [x8], x12
171cabdff1aSopenharmony_ci        sri             v1.4s,  v0.4s,  #16
172cabdff1aSopenharmony_ci        st1             {v16.d}[1],     [x8], x12
173cabdff1aSopenharmony_ci        sri             v3.4s,  v2.4s,  #16
174cabdff1aSopenharmony_ci        st1             {v17.d}[0],     [x8], x12
175cabdff1aSopenharmony_ci        zip1            v18.4s, v1.4s,  v3.4s
176cabdff1aSopenharmony_ci        st1             {v17.d}[1],     [x8], x12
177cabdff1aSopenharmony_ci        zip2            v19.4s, v1.4s,  v3.4s
178cabdff1aSopenharmony_ci        b.eq            7f
179cabdff1aSopenharmony_ci        ld1             {v4.4s},        [x4], #16
180cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v4.4s,  #31
181cabdff1aSopenharmony_ci        st1             {v18.d}[0],     [x8], x12
182cabdff1aSopenharmony_ci        ld1             {v5.4s},        [x5], #16
183cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v5.4s,  #31
184cabdff1aSopenharmony_ci        st1             {v18.d}[1],     [x8], x12
185cabdff1aSopenharmony_ci        ld1             {v6.4s},    [x6], #16
186cabdff1aSopenharmony_ci        fcvtzs          v6.4s, v6.4s, #31
187cabdff1aSopenharmony_ci        st1             {v19.d}[0],     [x8], x12
188cabdff1aSopenharmony_ci        ld1             {v7.4s},    [x7], #16
189cabdff1aSopenharmony_ci        fcvtzs          v7.4s, v7.4s, #31
190cabdff1aSopenharmony_ci        st1             {v19.d}[1],     [x8], x12
191cabdff1aSopenharmony_ci        b               6b
192cabdff1aSopenharmony_ci7:
193cabdff1aSopenharmony_ci        st1             {v18.d}[0],     [x8], x12
194cabdff1aSopenharmony_ci        st1             {v18.d}[1],     [x8], x12
195cabdff1aSopenharmony_ci        st1             {v19.d}[0],     [x8], x12
196cabdff1aSopenharmony_ci        st1             {v19.d}[1],     [x8], x12
197cabdff1aSopenharmony_ci        subs            w3,  w3,  #4
198cabdff1aSopenharmony_ci        b.eq            end
199cabdff1aSopenharmony_ci        cmp             w3,  #4
200cabdff1aSopenharmony_ci        add             x0,  x0,  #8
201cabdff1aSopenharmony_ci        b.ge            5b
202cabdff1aSopenharmony_ci
203cabdff1aSopenharmony_ci4:      // 2 channels
204cabdff1aSopenharmony_ci        cmp             w3,  #2
205cabdff1aSopenharmony_ci        b.lt            4f
206cabdff1aSopenharmony_ci        ldp             x4,  x5,  [x1], #16
207cabdff1aSopenharmony_ci        mov             w9,  w2
208cabdff1aSopenharmony_ci        mov             x8,  x0
209cabdff1aSopenharmony_ci        tst             w9,  #8
210cabdff1aSopenharmony_ci        ld1             {v4.4s},        [x4], #16
211cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v4.4s,  #31
212cabdff1aSopenharmony_ci        ld1             {v5.4s},        [x5], #16
213cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v5.4s,  #31
214cabdff1aSopenharmony_ci        ld1             {v6.4s},        [x4], #16
215cabdff1aSopenharmony_ci        fcvtzs          v6.4s,  v6.4s,  #31
216cabdff1aSopenharmony_ci        ld1             {v7.4s},        [x5], #16
217cabdff1aSopenharmony_ci        fcvtzs          v7.4s,  v7.4s,  #31
218cabdff1aSopenharmony_ci        b.eq            6f
219cabdff1aSopenharmony_ci        subs            w9,  w9,  #8
220cabdff1aSopenharmony_ci        b.eq            7f
221cabdff1aSopenharmony_ci        sri             v5.4s,  v4.4s,  #16
222cabdff1aSopenharmony_ci        ld1             {v4.4s},        [x4], #16
223cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v4.4s,  #31
224cabdff1aSopenharmony_ci        st1             {v5.s}[0],      [x8], x12
225cabdff1aSopenharmony_ci        sri             v7.4s,  v6.4s,  #16
226cabdff1aSopenharmony_ci        st1             {v5.s}[1],      [x8], x12
227cabdff1aSopenharmony_ci        ld1             {v6.4s},        [x4], #16
228cabdff1aSopenharmony_ci        fcvtzs          v6.4s,  v6.4s, #31
229cabdff1aSopenharmony_ci        st1             {v5.s}[2],      [x8], x12
230cabdff1aSopenharmony_ci        st1             {v5.s}[3],      [x8], x12
231cabdff1aSopenharmony_ci        st1             {v7.s}[0],      [x8], x12
232cabdff1aSopenharmony_ci        st1             {v7.s}[1],      [x8], x12
233cabdff1aSopenharmony_ci        ld1             {v5.4s},        [x5], #16
234cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v5.4s,  #31
235cabdff1aSopenharmony_ci        st1             {v7.s}[2],      [x8], x12
236cabdff1aSopenharmony_ci        st1             {v7.s}[3],      [x8], x12
237cabdff1aSopenharmony_ci        ld1             {v7.4s},        [x5], #16
238cabdff1aSopenharmony_ci        fcvtzs          v7.4s,  v7.4s,  #31
239cabdff1aSopenharmony_ci6:
240cabdff1aSopenharmony_ci        subs            w9,  w9,  #16
241cabdff1aSopenharmony_ci        ld1             {v0.4s},        [x4], #16
242cabdff1aSopenharmony_ci        sri             v5.4s,  v4.4s,  #16
243cabdff1aSopenharmony_ci        fcvtzs          v0.4s,  v0.4s,  #31
244cabdff1aSopenharmony_ci        ld1             {v1.4s},        [x5], #16
245cabdff1aSopenharmony_ci        sri             v7.4s,  v6.4s,  #16
246cabdff1aSopenharmony_ci        st1             {v5.s}[0],      [x8], x12
247cabdff1aSopenharmony_ci        st1             {v5.s}[1],      [x8], x12
248cabdff1aSopenharmony_ci        fcvtzs          v1.4s,  v1.4s,  #31
249cabdff1aSopenharmony_ci        st1             {v5.s}[2],      [x8], x12
250cabdff1aSopenharmony_ci        st1             {v5.s}[3],      [x8], x12
251cabdff1aSopenharmony_ci        ld1             {v2.4s},        [x4], #16
252cabdff1aSopenharmony_ci        st1             {v7.s}[0],      [x8], x12
253cabdff1aSopenharmony_ci        fcvtzs          v2.4s,  v2.4s,  #31
254cabdff1aSopenharmony_ci        st1             {v7.s}[1],      [x8], x12
255cabdff1aSopenharmony_ci        ld1             {v3.4s},        [x5], #16
256cabdff1aSopenharmony_ci        st1             {v7.s}[2],      [x8], x12
257cabdff1aSopenharmony_ci        fcvtzs          v3.4s,  v3.4s,  #31
258cabdff1aSopenharmony_ci        st1             {v7.s}[3],      [x8], x12
259cabdff1aSopenharmony_ci        sri             v1.4s,  v0.4s,  #16
260cabdff1aSopenharmony_ci        sri             v3.4s,  v2.4s,  #16
261cabdff1aSopenharmony_ci        b.eq            6f
262cabdff1aSopenharmony_ci        ld1             {v4.4s},        [x4], #16
263cabdff1aSopenharmony_ci        st1             {v1.s}[0],      [x8], x12
264cabdff1aSopenharmony_ci        fcvtzs          v4.4s,  v4.4s,  #31
265cabdff1aSopenharmony_ci        st1             {v1.s}[1],      [x8], x12
266cabdff1aSopenharmony_ci        ld1             {v5.4s},        [x5], #16
267cabdff1aSopenharmony_ci        st1             {v1.s}[2],      [x8], x12
268cabdff1aSopenharmony_ci        fcvtzs          v5.4s,  v5.4s,  #31
269cabdff1aSopenharmony_ci        st1             {v1.s}[3],      [x8], x12
270cabdff1aSopenharmony_ci        ld1             {v6.4s},        [x4], #16
271cabdff1aSopenharmony_ci        st1             {v3.s}[0],      [x8], x12
272cabdff1aSopenharmony_ci        fcvtzs          v6.4s,  v6.4s,  #31
273cabdff1aSopenharmony_ci        st1             {v3.s}[1],      [x8], x12
274cabdff1aSopenharmony_ci        ld1             {v7.4s},        [x5], #16
275cabdff1aSopenharmony_ci        st1             {v3.s}[2],      [x8], x12
276cabdff1aSopenharmony_ci        fcvtzs          v7.4s,  v7.4s,  #31
277cabdff1aSopenharmony_ci        st1             {v3.s}[3],      [x8], x12
278cabdff1aSopenharmony_ci        b.gt            6b
279cabdff1aSopenharmony_ci6:
280cabdff1aSopenharmony_ci        st1             {v1.s}[0],      [x8], x12
281cabdff1aSopenharmony_ci        st1             {v1.s}[1],      [x8], x12
282cabdff1aSopenharmony_ci        st1             {v1.s}[2],      [x8], x12
283cabdff1aSopenharmony_ci        st1             {v1.s}[3],      [x8], x12
284cabdff1aSopenharmony_ci        st1             {v3.s}[0],      [x8], x12
285cabdff1aSopenharmony_ci        st1             {v3.s}[1],      [x8], x12
286cabdff1aSopenharmony_ci        st1             {v3.s}[2],      [x8], x12
287cabdff1aSopenharmony_ci        st1             {v3.s}[3],      [x8], x12
288cabdff1aSopenharmony_ci        b               8f
289cabdff1aSopenharmony_ci7:
290cabdff1aSopenharmony_ci        sri             v5.4s,  v4.4s,  #16
291cabdff1aSopenharmony_ci        sri             v7.4s,  v6.4s,  #16
292cabdff1aSopenharmony_ci        st1             {v5.s}[0],      [x8], x12
293cabdff1aSopenharmony_ci        st1             {v5.s}[1],      [x8], x12
294cabdff1aSopenharmony_ci        st1             {v5.s}[2],      [x8], x12
295cabdff1aSopenharmony_ci        st1             {v5.s}[3],      [x8], x12
296cabdff1aSopenharmony_ci        st1             {v7.s}[0],      [x8], x12
297cabdff1aSopenharmony_ci        st1             {v7.s}[1],      [x8], x12
298cabdff1aSopenharmony_ci        st1             {v7.s}[2],      [x8], x12
299cabdff1aSopenharmony_ci        st1             {v7.s}[3],      [x8], x12
300cabdff1aSopenharmony_ci8:
301cabdff1aSopenharmony_ci        subs            w3,  w3,  #2
302cabdff1aSopenharmony_ci        add             x0,  x0,  #4
303cabdff1aSopenharmony_ci        b.eq            end
304cabdff1aSopenharmony_ci
305cabdff1aSopenharmony_ci4:      // 1 channel
306cabdff1aSopenharmony_ci        ldr             x4,  [x1]
307cabdff1aSopenharmony_ci        tst             w2,  #8
308cabdff1aSopenharmony_ci        mov             w9,  w2
309cabdff1aSopenharmony_ci        mov             x5,  x0
310cabdff1aSopenharmony_ci        ld1             {v0.4s},        [x4], #16
311cabdff1aSopenharmony_ci        fcvtzs          v0.4s,  v0.4s,  #31
312cabdff1aSopenharmony_ci        ld1             {v1.4s},        [x4], #16
313cabdff1aSopenharmony_ci        fcvtzs          v1.4s,  v1.4s,  #31
314cabdff1aSopenharmony_ci        b.ne            8f
315cabdff1aSopenharmony_ci6:
316cabdff1aSopenharmony_ci        subs            w9,  w9,  #16
317cabdff1aSopenharmony_ci        ld1             {v2.4s},        [x4], #16
318cabdff1aSopenharmony_ci        fcvtzs          v2.4s,  v2.4s,  #31
319cabdff1aSopenharmony_ci        ld1             {v3.4s},        [x4], #16
320cabdff1aSopenharmony_ci        fcvtzs          v3.4s,  v3.4s,  #31
321cabdff1aSopenharmony_ci        st1             {v0.h}[1],      [x5], x12
322cabdff1aSopenharmony_ci        st1             {v0.h}[3],      [x5], x12
323cabdff1aSopenharmony_ci        st1             {v0.h}[5],      [x5], x12
324cabdff1aSopenharmony_ci        st1             {v0.h}[7],      [x5], x12
325cabdff1aSopenharmony_ci        st1             {v1.h}[1],      [x5], x12
326cabdff1aSopenharmony_ci        st1             {v1.h}[3],      [x5], x12
327cabdff1aSopenharmony_ci        st1             {v1.h}[5],      [x5], x12
328cabdff1aSopenharmony_ci        st1             {v1.h}[7],      [x5], x12
329cabdff1aSopenharmony_ci        b.eq            7f
330cabdff1aSopenharmony_ci        ld1             {v0.4s},        [x4], #16
331cabdff1aSopenharmony_ci        fcvtzs          v0.4s,  v0.4s,  #31
332cabdff1aSopenharmony_ci        ld1             {v1.4s},        [x4], #16
333cabdff1aSopenharmony_ci        fcvtzs          v1.4s,  v1.4s,  #31
334cabdff1aSopenharmony_ci7:
335cabdff1aSopenharmony_ci        st1             {v2.h}[1],      [x5], x12
336cabdff1aSopenharmony_ci        st1             {v2.h}[3],      [x5], x12
337cabdff1aSopenharmony_ci        st1             {v2.h}[5],      [x5], x12
338cabdff1aSopenharmony_ci        st1             {v2.h}[7],      [x5], x12
339cabdff1aSopenharmony_ci        st1             {v3.h}[1],      [x5], x12
340cabdff1aSopenharmony_ci        st1             {v3.h}[3],      [x5], x12
341cabdff1aSopenharmony_ci        st1             {v3.h}[5],      [x5], x12
342cabdff1aSopenharmony_ci        st1             {v3.h}[7],      [x5], x12
343cabdff1aSopenharmony_ci        b.gt            6b
344cabdff1aSopenharmony_ci        ret
345cabdff1aSopenharmony_ci8:
346cabdff1aSopenharmony_ci        subs            w9,  w9,  #8
347cabdff1aSopenharmony_ci        st1             {v0.h}[1],      [x5], x12
348cabdff1aSopenharmony_ci        st1             {v0.h}[3],      [x5], x12
349cabdff1aSopenharmony_ci        st1             {v0.h}[5],      [x5], x12
350cabdff1aSopenharmony_ci        st1             {v0.h}[7],      [x5], x12
351cabdff1aSopenharmony_ci        st1             {v1.h}[1],      [x5], x12
352cabdff1aSopenharmony_ci        st1             {v1.h}[3],      [x5], x12
353cabdff1aSopenharmony_ci        st1             {v1.h}[5],      [x5], x12
354cabdff1aSopenharmony_ci        st1             {v1.h}[7],      [x5], x12
355cabdff1aSopenharmony_ci        b.eq            end
356cabdff1aSopenharmony_ci        ld1             {v0.4s},        [x4], #16
357cabdff1aSopenharmony_ci        fcvtzs          v0.4s,  v0.4s,  #31
358cabdff1aSopenharmony_ci        ld1             {v1.4s},        [x4], #16
359cabdff1aSopenharmony_ci        fcvtzs          v1.4s,  v1.4s,  #31
360cabdff1aSopenharmony_ci        b               6b
361cabdff1aSopenharmony_ciend:
362cabdff1aSopenharmony_ci        ret
363cabdff1aSopenharmony_ciendfunc
364