1cabdff1aSopenharmony_ci;******************************************************************************
2cabdff1aSopenharmony_ci;* Copyright (c) 2012 Michael Niedermayer
3cabdff1aSopenharmony_ci;*
4cabdff1aSopenharmony_ci;* This file is part of FFmpeg.
5cabdff1aSopenharmony_ci;*
6cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci;*
11cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci;* Lesser General Public License for more details.
15cabdff1aSopenharmony_ci;*
16cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci;******************************************************************************
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm"
22cabdff1aSopenharmony_ci
23cabdff1aSopenharmony_ciSECTION_RODATA 32
24cabdff1aSopenharmony_ciflt2pm31: times 8 dd 4.6566129e-10
25cabdff1aSopenharmony_ciflt2p31 : times 8 dd 2147483648.0
26cabdff1aSopenharmony_ciflt2p15 : times 8 dd 32768.0
27cabdff1aSopenharmony_ci
28cabdff1aSopenharmony_ciword_unpack_shuf : db  0, 1, 4, 5, 8, 9,12,13, 2, 3, 6, 7,10,11,14,15
29cabdff1aSopenharmony_ci
30cabdff1aSopenharmony_ciSECTION .text
31cabdff1aSopenharmony_ci
32cabdff1aSopenharmony_ci
33cabdff1aSopenharmony_ci;to, from, a/u, log2_outsize, log_intsize, const
34cabdff1aSopenharmony_ci%macro PACK_2CH 5-7
35cabdff1aSopenharmony_cicglobal pack_2ch_%2_to_%1_%3, 3, 4, 6, dst, src, len, src2
36cabdff1aSopenharmony_ci    mov src2q   , [srcq+gprsize]
37cabdff1aSopenharmony_ci    mov srcq    , [srcq]
38cabdff1aSopenharmony_ci    mov dstq    , [dstq]
39cabdff1aSopenharmony_ci%ifidn %3, a
40cabdff1aSopenharmony_ci    test dstq, mmsize-1
41cabdff1aSopenharmony_ci        jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
42cabdff1aSopenharmony_ci    test srcq, mmsize-1
43cabdff1aSopenharmony_ci        jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
44cabdff1aSopenharmony_ci    test src2q, mmsize-1
45cabdff1aSopenharmony_ci        jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
46cabdff1aSopenharmony_ci%else
47cabdff1aSopenharmony_cipack_2ch_%2_to_%1_u_int %+ SUFFIX:
48cabdff1aSopenharmony_ci%endif
49cabdff1aSopenharmony_ci    lea     srcq , [srcq  + (1<<%5)*lenq]
50cabdff1aSopenharmony_ci    lea     src2q, [src2q + (1<<%5)*lenq]
51cabdff1aSopenharmony_ci    lea     dstq , [dstq  + (2<<%4)*lenq]
52cabdff1aSopenharmony_ci    neg     lenq
53cabdff1aSopenharmony_ci    %7 m0,m1,m2,m3,m4,m5
54cabdff1aSopenharmony_ci.next:
55cabdff1aSopenharmony_ci%if %4 >= %5
56cabdff1aSopenharmony_ci    mov%3     m0, [         srcq +(1<<%5)*lenq]
57cabdff1aSopenharmony_ci    mova      m1, m0
58cabdff1aSopenharmony_ci    mov%3     m2, [         src2q+(1<<%5)*lenq]
59cabdff1aSopenharmony_ci%if %5 == 1
60cabdff1aSopenharmony_ci    punpcklwd m0, m2
61cabdff1aSopenharmony_ci    punpckhwd m1, m2
62cabdff1aSopenharmony_ci%else
63cabdff1aSopenharmony_ci    punpckldq m0, m2
64cabdff1aSopenharmony_ci    punpckhdq m1, m2
65cabdff1aSopenharmony_ci%endif
66cabdff1aSopenharmony_ci    %6 m0,m1,m2,m3,m4,m5
67cabdff1aSopenharmony_ci%else
68cabdff1aSopenharmony_ci    mov%3     m0, [         srcq +(1<<%5)*lenq]
69cabdff1aSopenharmony_ci    mov%3     m1, [mmsize + srcq +(1<<%5)*lenq]
70cabdff1aSopenharmony_ci    mov%3     m2, [         src2q+(1<<%5)*lenq]
71cabdff1aSopenharmony_ci    mov%3     m3, [mmsize + src2q+(1<<%5)*lenq]
72cabdff1aSopenharmony_ci    %6 m0,m1,m2,m3,m4,m5
73cabdff1aSopenharmony_ci    mova      m2, m0
74cabdff1aSopenharmony_ci    punpcklwd m0, m1
75cabdff1aSopenharmony_ci    punpckhwd m2, m1
76cabdff1aSopenharmony_ci    SWAP 1,2
77cabdff1aSopenharmony_ci%endif
78cabdff1aSopenharmony_ci    mov%3 [           dstq+(2<<%4)*lenq], m0
79cabdff1aSopenharmony_ci    mov%3 [  mmsize + dstq+(2<<%4)*lenq], m1
80cabdff1aSopenharmony_ci%if %4 > %5
81cabdff1aSopenharmony_ci    mov%3 [2*mmsize + dstq+(2<<%4)*lenq], m2
82cabdff1aSopenharmony_ci    mov%3 [3*mmsize + dstq+(2<<%4)*lenq], m3
83cabdff1aSopenharmony_ci    add lenq, 4*mmsize/(2<<%4)
84cabdff1aSopenharmony_ci%else
85cabdff1aSopenharmony_ci    add lenq, 2*mmsize/(2<<%4)
86cabdff1aSopenharmony_ci%endif
87cabdff1aSopenharmony_ci        jl .next
88cabdff1aSopenharmony_ci    REP_RET
89cabdff1aSopenharmony_ci%endmacro
90cabdff1aSopenharmony_ci
91cabdff1aSopenharmony_ci%macro UNPACK_2CH 5-7
92cabdff1aSopenharmony_cicglobal unpack_2ch_%2_to_%1_%3, 3, 4, 7, dst, src, len, dst2
93cabdff1aSopenharmony_ci    mov dst2q   , [dstq+gprsize]
94cabdff1aSopenharmony_ci    mov srcq    , [srcq]
95cabdff1aSopenharmony_ci    mov dstq    , [dstq]
96cabdff1aSopenharmony_ci%ifidn %3, a
97cabdff1aSopenharmony_ci    test dstq, mmsize-1
98cabdff1aSopenharmony_ci        jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
99cabdff1aSopenharmony_ci    test srcq, mmsize-1
100cabdff1aSopenharmony_ci        jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
101cabdff1aSopenharmony_ci    test dst2q, mmsize-1
102cabdff1aSopenharmony_ci        jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
103cabdff1aSopenharmony_ci%else
104cabdff1aSopenharmony_ciunpack_2ch_%2_to_%1_u_int %+ SUFFIX:
105cabdff1aSopenharmony_ci%endif
106cabdff1aSopenharmony_ci    lea     srcq , [srcq  + (2<<%5)*lenq]
107cabdff1aSopenharmony_ci    lea     dstq , [dstq  + (1<<%4)*lenq]
108cabdff1aSopenharmony_ci    lea     dst2q, [dst2q + (1<<%4)*lenq]
109cabdff1aSopenharmony_ci    neg     lenq
110cabdff1aSopenharmony_ci    %7 m0,m1,m2,m3,m4,m5
111cabdff1aSopenharmony_ci    mova      m6, [word_unpack_shuf]
112cabdff1aSopenharmony_ci.next:
113cabdff1aSopenharmony_ci    mov%3     m0, [           srcq +(2<<%5)*lenq]
114cabdff1aSopenharmony_ci    mov%3     m2, [  mmsize + srcq +(2<<%5)*lenq]
115cabdff1aSopenharmony_ci%if %5 == 1
116cabdff1aSopenharmony_ci%ifidn SUFFIX, _ssse3
117cabdff1aSopenharmony_ci    pshufb    m0, m6
118cabdff1aSopenharmony_ci    mova      m1, m0
119cabdff1aSopenharmony_ci    pshufb    m2, m6
120cabdff1aSopenharmony_ci    punpcklqdq m0,m2
121cabdff1aSopenharmony_ci    punpckhqdq m1,m2
122cabdff1aSopenharmony_ci%else
123cabdff1aSopenharmony_ci    mova      m1, m0
124cabdff1aSopenharmony_ci    punpcklwd m0,m2
125cabdff1aSopenharmony_ci    punpckhwd m1,m2
126cabdff1aSopenharmony_ci
127cabdff1aSopenharmony_ci    mova      m2, m0
128cabdff1aSopenharmony_ci    punpcklwd m0,m1
129cabdff1aSopenharmony_ci    punpckhwd m2,m1
130cabdff1aSopenharmony_ci
131cabdff1aSopenharmony_ci    mova      m1, m0
132cabdff1aSopenharmony_ci    punpcklwd m0,m2
133cabdff1aSopenharmony_ci    punpckhwd m1,m2
134cabdff1aSopenharmony_ci%endif
135cabdff1aSopenharmony_ci%else
136cabdff1aSopenharmony_ci    mova      m1, m0
137cabdff1aSopenharmony_ci    shufps    m0, m2, 10001000b
138cabdff1aSopenharmony_ci    shufps    m1, m2, 11011101b
139cabdff1aSopenharmony_ci%endif
140cabdff1aSopenharmony_ci%if %4 < %5
141cabdff1aSopenharmony_ci    mov%3     m2, [2*mmsize + srcq +(2<<%5)*lenq]
142cabdff1aSopenharmony_ci    mova      m3, m2
143cabdff1aSopenharmony_ci    mov%3     m4, [3*mmsize + srcq +(2<<%5)*lenq]
144cabdff1aSopenharmony_ci    shufps    m2, m4, 10001000b
145cabdff1aSopenharmony_ci    shufps    m3, m4, 11011101b
146cabdff1aSopenharmony_ci    SWAP 1,2
147cabdff1aSopenharmony_ci%endif
148cabdff1aSopenharmony_ci    %6 m0,m1,m2,m3,m4,m5
149cabdff1aSopenharmony_ci    mov%3 [           dstq+(1<<%4)*lenq], m0
150cabdff1aSopenharmony_ci%if %4 > %5
151cabdff1aSopenharmony_ci    mov%3 [          dst2q+(1<<%4)*lenq], m2
152cabdff1aSopenharmony_ci    mov%3 [ mmsize +  dstq+(1<<%4)*lenq], m1
153cabdff1aSopenharmony_ci    mov%3 [ mmsize + dst2q+(1<<%4)*lenq], m3
154cabdff1aSopenharmony_ci    add lenq, 2*mmsize/(1<<%4)
155cabdff1aSopenharmony_ci%else
156cabdff1aSopenharmony_ci    mov%3 [          dst2q+(1<<%4)*lenq], m1
157cabdff1aSopenharmony_ci    add lenq, mmsize/(1<<%4)
158cabdff1aSopenharmony_ci%endif
159cabdff1aSopenharmony_ci        jl .next
160cabdff1aSopenharmony_ci    REP_RET
161cabdff1aSopenharmony_ci%endmacro
162cabdff1aSopenharmony_ci
163cabdff1aSopenharmony_ci%macro CONV 5-7
164cabdff1aSopenharmony_cicglobal %2_to_%1_%3, 3, 3, 6, dst, src, len
165cabdff1aSopenharmony_ci    mov srcq    , [srcq]
166cabdff1aSopenharmony_ci    mov dstq    , [dstq]
167cabdff1aSopenharmony_ci%ifidn %3, a
168cabdff1aSopenharmony_ci    test dstq, mmsize-1
169cabdff1aSopenharmony_ci        jne %2_to_%1_u_int %+ SUFFIX
170cabdff1aSopenharmony_ci    test srcq, mmsize-1
171cabdff1aSopenharmony_ci        jne %2_to_%1_u_int %+ SUFFIX
172cabdff1aSopenharmony_ci%else
173cabdff1aSopenharmony_ci%2_to_%1_u_int %+ SUFFIX:
174cabdff1aSopenharmony_ci%endif
175cabdff1aSopenharmony_ci    lea     srcq , [srcq  + (1<<%5)*lenq]
176cabdff1aSopenharmony_ci    lea     dstq , [dstq  + (1<<%4)*lenq]
177cabdff1aSopenharmony_ci    neg     lenq
178cabdff1aSopenharmony_ci    %7 m0,m1,m2,m3,m4,m5
179cabdff1aSopenharmony_ci.next:
180cabdff1aSopenharmony_ci    mov%3     m0, [           srcq +(1<<%5)*lenq]
181cabdff1aSopenharmony_ci    mov%3     m1, [  mmsize + srcq +(1<<%5)*lenq]
182cabdff1aSopenharmony_ci%if %4 < %5
183cabdff1aSopenharmony_ci    mov%3     m2, [2*mmsize + srcq +(1<<%5)*lenq]
184cabdff1aSopenharmony_ci    mov%3     m3, [3*mmsize + srcq +(1<<%5)*lenq]
185cabdff1aSopenharmony_ci%endif
186cabdff1aSopenharmony_ci    %6 m0,m1,m2,m3,m4,m5
187cabdff1aSopenharmony_ci    mov%3 [           dstq+(1<<%4)*lenq], m0
188cabdff1aSopenharmony_ci    mov%3 [  mmsize + dstq+(1<<%4)*lenq], m1
189cabdff1aSopenharmony_ci%if %4 > %5
190cabdff1aSopenharmony_ci    mov%3 [2*mmsize + dstq+(1<<%4)*lenq], m2
191cabdff1aSopenharmony_ci    mov%3 [3*mmsize + dstq+(1<<%4)*lenq], m3
192cabdff1aSopenharmony_ci    add lenq, 4*mmsize/(1<<%4)
193cabdff1aSopenharmony_ci%else
194cabdff1aSopenharmony_ci    add lenq, 2*mmsize/(1<<%4)
195cabdff1aSopenharmony_ci%endif
196cabdff1aSopenharmony_ci        jl .next
197cabdff1aSopenharmony_ci%if mmsize == 8
198cabdff1aSopenharmony_ci    emms
199cabdff1aSopenharmony_ci    RET
200cabdff1aSopenharmony_ci%else
201cabdff1aSopenharmony_ci    REP_RET
202cabdff1aSopenharmony_ci%endif
203cabdff1aSopenharmony_ci%endmacro
204cabdff1aSopenharmony_ci
205cabdff1aSopenharmony_ci%macro PACK_6CH 8
206cabdff1aSopenharmony_cicglobal pack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, src1, src2, src3, src4, src5, len
207cabdff1aSopenharmony_ci%if ARCH_X86_64
208cabdff1aSopenharmony_ci    mov     lend, r2d
209cabdff1aSopenharmony_ci%else
210cabdff1aSopenharmony_ci    %define lend dword r2m
211cabdff1aSopenharmony_ci%endif
212cabdff1aSopenharmony_ci    mov    src1q, [srcq+1*gprsize]
213cabdff1aSopenharmony_ci    mov    src2q, [srcq+2*gprsize]
214cabdff1aSopenharmony_ci    mov    src3q, [srcq+3*gprsize]
215cabdff1aSopenharmony_ci    mov    src4q, [srcq+4*gprsize]
216cabdff1aSopenharmony_ci    mov    src5q, [srcq+5*gprsize]
217cabdff1aSopenharmony_ci    mov     srcq, [srcq]
218cabdff1aSopenharmony_ci    mov     dstq, [dstq]
219cabdff1aSopenharmony_ci%ifidn %3, a
220cabdff1aSopenharmony_ci    test dstq, mmsize-1
221cabdff1aSopenharmony_ci        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
222cabdff1aSopenharmony_ci    test srcq, mmsize-1
223cabdff1aSopenharmony_ci        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
224cabdff1aSopenharmony_ci    test src1q, mmsize-1
225cabdff1aSopenharmony_ci        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
226cabdff1aSopenharmony_ci    test src2q, mmsize-1
227cabdff1aSopenharmony_ci        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
228cabdff1aSopenharmony_ci    test src3q, mmsize-1
229cabdff1aSopenharmony_ci        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
230cabdff1aSopenharmony_ci    test src4q, mmsize-1
231cabdff1aSopenharmony_ci        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
232cabdff1aSopenharmony_ci    test src5q, mmsize-1
233cabdff1aSopenharmony_ci        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
234cabdff1aSopenharmony_ci%else
235cabdff1aSopenharmony_cipack_6ch_%2_to_%1_u_int %+ SUFFIX:
236cabdff1aSopenharmony_ci%endif
237cabdff1aSopenharmony_ci    sub    src1q, srcq
238cabdff1aSopenharmony_ci    sub    src2q, srcq
239cabdff1aSopenharmony_ci    sub    src3q, srcq
240cabdff1aSopenharmony_ci    sub    src4q, srcq
241cabdff1aSopenharmony_ci    sub    src5q, srcq
242cabdff1aSopenharmony_ci    %8 x,x,x,x,m7,x
243cabdff1aSopenharmony_ci.loop:
244cabdff1aSopenharmony_ci    mov%3     m0, [srcq      ]
245cabdff1aSopenharmony_ci    mov%3     m1, [srcq+src1q]
246cabdff1aSopenharmony_ci    mov%3     m2, [srcq+src2q]
247cabdff1aSopenharmony_ci    mov%3     m3, [srcq+src3q]
248cabdff1aSopenharmony_ci    mov%3     m4, [srcq+src4q]
249cabdff1aSopenharmony_ci    mov%3     m5, [srcq+src5q]
250cabdff1aSopenharmony_ci%if cpuflag(sse)
251cabdff1aSopenharmony_ci    SBUTTERFLYPS 0, 1, 6
252cabdff1aSopenharmony_ci    SBUTTERFLYPS 2, 3, 6
253cabdff1aSopenharmony_ci    SBUTTERFLYPS 4, 5, 6
254cabdff1aSopenharmony_ci
255cabdff1aSopenharmony_ci%if cpuflag(avx)
256cabdff1aSopenharmony_ci    blendps   m6, m4, m0, 1100b
257cabdff1aSopenharmony_ci%else
258cabdff1aSopenharmony_ci    movaps    m6, m4
259cabdff1aSopenharmony_ci    shufps    m4, m0, q3210
260cabdff1aSopenharmony_ci    SWAP 4,6
261cabdff1aSopenharmony_ci%endif
262cabdff1aSopenharmony_ci    movlhps   m0, m2
263cabdff1aSopenharmony_ci    movhlps   m4, m2
264cabdff1aSopenharmony_ci%if cpuflag(avx)
265cabdff1aSopenharmony_ci    blendps   m2, m5, m1, 1100b
266cabdff1aSopenharmony_ci%else
267cabdff1aSopenharmony_ci    movaps    m2, m5
268cabdff1aSopenharmony_ci    shufps    m5, m1, q3210
269cabdff1aSopenharmony_ci    SWAP 2,5
270cabdff1aSopenharmony_ci%endif
271cabdff1aSopenharmony_ci    movlhps   m1, m3
272cabdff1aSopenharmony_ci    movhlps   m5, m3
273cabdff1aSopenharmony_ci
274cabdff1aSopenharmony_ci    %7 m0,m6,x,x,m7,m3
275cabdff1aSopenharmony_ci    %7 m4,m1,x,x,m7,m3
276cabdff1aSopenharmony_ci    %7 m2,m5,x,x,m7,m3
277cabdff1aSopenharmony_ci
278cabdff1aSopenharmony_ci    mov %+ %3 %+ ps [dstq   ], m0
279cabdff1aSopenharmony_ci    mov %+ %3 %+ ps [dstq+16], m6
280cabdff1aSopenharmony_ci    mov %+ %3 %+ ps [dstq+32], m4
281cabdff1aSopenharmony_ci    mov %+ %3 %+ ps [dstq+48], m1
282cabdff1aSopenharmony_ci    mov %+ %3 %+ ps [dstq+64], m2
283cabdff1aSopenharmony_ci    mov %+ %3 %+ ps [dstq+80], m5
284cabdff1aSopenharmony_ci%else ; mmx
285cabdff1aSopenharmony_ci    SBUTTERFLY dq, 0, 1, 6
286cabdff1aSopenharmony_ci    SBUTTERFLY dq, 2, 3, 6
287cabdff1aSopenharmony_ci    SBUTTERFLY dq, 4, 5, 6
288cabdff1aSopenharmony_ci
289cabdff1aSopenharmony_ci    movq   [dstq   ], m0
290cabdff1aSopenharmony_ci    movq   [dstq+ 8], m2
291cabdff1aSopenharmony_ci    movq   [dstq+16], m4
292cabdff1aSopenharmony_ci    movq   [dstq+24], m1
293cabdff1aSopenharmony_ci    movq   [dstq+32], m3
294cabdff1aSopenharmony_ci    movq   [dstq+40], m5
295cabdff1aSopenharmony_ci%endif
296cabdff1aSopenharmony_ci    add      srcq, mmsize
297cabdff1aSopenharmony_ci    add      dstq, mmsize*6
298cabdff1aSopenharmony_ci    sub      lend, mmsize/4
299cabdff1aSopenharmony_ci    jg .loop
300cabdff1aSopenharmony_ci%if mmsize == 8
301cabdff1aSopenharmony_ci    emms
302cabdff1aSopenharmony_ci    RET
303cabdff1aSopenharmony_ci%else
304cabdff1aSopenharmony_ci    REP_RET
305cabdff1aSopenharmony_ci%endif
306cabdff1aSopenharmony_ci%endmacro
307cabdff1aSopenharmony_ci
308cabdff1aSopenharmony_ci%macro UNPACK_6CH 8
309cabdff1aSopenharmony_cicglobal unpack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, dst1, dst2, dst3, dst4, dst5, len
310cabdff1aSopenharmony_ci%if ARCH_X86_64
311cabdff1aSopenharmony_ci    mov     lend, r2d
312cabdff1aSopenharmony_ci%else
313cabdff1aSopenharmony_ci    %define lend dword r2m
314cabdff1aSopenharmony_ci%endif
315cabdff1aSopenharmony_ci    mov    dst1q, [dstq+1*gprsize]
316cabdff1aSopenharmony_ci    mov    dst2q, [dstq+2*gprsize]
317cabdff1aSopenharmony_ci    mov    dst3q, [dstq+3*gprsize]
318cabdff1aSopenharmony_ci    mov    dst4q, [dstq+4*gprsize]
319cabdff1aSopenharmony_ci    mov    dst5q, [dstq+5*gprsize]
320cabdff1aSopenharmony_ci    mov     dstq, [dstq]
321cabdff1aSopenharmony_ci    mov     srcq, [srcq]
322cabdff1aSopenharmony_ci%ifidn %3, a
323cabdff1aSopenharmony_ci    test dstq, mmsize-1
324cabdff1aSopenharmony_ci        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
325cabdff1aSopenharmony_ci    test srcq, mmsize-1
326cabdff1aSopenharmony_ci        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
327cabdff1aSopenharmony_ci    test dst1q, mmsize-1
328cabdff1aSopenharmony_ci        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
329cabdff1aSopenharmony_ci    test dst2q, mmsize-1
330cabdff1aSopenharmony_ci        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
331cabdff1aSopenharmony_ci    test dst3q, mmsize-1
332cabdff1aSopenharmony_ci        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
333cabdff1aSopenharmony_ci    test dst4q, mmsize-1
334cabdff1aSopenharmony_ci        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
335cabdff1aSopenharmony_ci    test dst5q, mmsize-1
336cabdff1aSopenharmony_ci        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
337cabdff1aSopenharmony_ci%else
338cabdff1aSopenharmony_ciunpack_6ch_%2_to_%1_u_int %+ SUFFIX:
339cabdff1aSopenharmony_ci%endif
340cabdff1aSopenharmony_ci    sub    dst1q, dstq
341cabdff1aSopenharmony_ci    sub    dst2q, dstq
342cabdff1aSopenharmony_ci    sub    dst3q, dstq
343cabdff1aSopenharmony_ci    sub    dst4q, dstq
344cabdff1aSopenharmony_ci    sub    dst5q, dstq
345cabdff1aSopenharmony_ci    %8 x,x,x,x,m7,x
346cabdff1aSopenharmony_ci.loop:
347cabdff1aSopenharmony_ci    mov%3     m0, [srcq   ]
348cabdff1aSopenharmony_ci    mov%3     m1, [srcq+16]
349cabdff1aSopenharmony_ci    mov%3     m2, [srcq+32]
350cabdff1aSopenharmony_ci    mov%3     m3, [srcq+48]
351cabdff1aSopenharmony_ci    mov%3     m4, [srcq+64]
352cabdff1aSopenharmony_ci    mov%3     m5, [srcq+80]
353cabdff1aSopenharmony_ci
354cabdff1aSopenharmony_ci    SBUTTERFLYPS 0, 3, 6
355cabdff1aSopenharmony_ci    SBUTTERFLYPS 1, 4, 6
356cabdff1aSopenharmony_ci    SBUTTERFLYPS 2, 5, 6
357cabdff1aSopenharmony_ci    SBUTTERFLYPS 0, 4, 6
358cabdff1aSopenharmony_ci    SBUTTERFLYPS 3, 2, 6
359cabdff1aSopenharmony_ci    SBUTTERFLYPS 1, 5, 6
360cabdff1aSopenharmony_ci    SWAP 1, 4
361cabdff1aSopenharmony_ci    SWAP 2, 3
362cabdff1aSopenharmony_ci
363cabdff1aSopenharmony_ci    %7 m0,m1,x,x,m7,m6
364cabdff1aSopenharmony_ci    %7 m2,m3,x,x,m7,m6
365cabdff1aSopenharmony_ci    %7 m4,m5,x,x,m7,m6
366cabdff1aSopenharmony_ci
367cabdff1aSopenharmony_ci    mov %+ %3 %+ ps [dstq      ], m0
368cabdff1aSopenharmony_ci    mov %+ %3 %+ ps [dstq+dst1q], m1
369cabdff1aSopenharmony_ci    mov %+ %3 %+ ps [dstq+dst2q], m2
370cabdff1aSopenharmony_ci    mov %+ %3 %+ ps [dstq+dst3q], m3
371cabdff1aSopenharmony_ci    mov %+ %3 %+ ps [dstq+dst4q], m4
372cabdff1aSopenharmony_ci    mov %+ %3 %+ ps [dstq+dst5q], m5
373cabdff1aSopenharmony_ci
374cabdff1aSopenharmony_ci    add      srcq, mmsize*6
375cabdff1aSopenharmony_ci    add      dstq, mmsize
376cabdff1aSopenharmony_ci    sub      lend, mmsize/4
377cabdff1aSopenharmony_ci    jg .loop
378cabdff1aSopenharmony_ci    REP_RET
379cabdff1aSopenharmony_ci%endmacro
380cabdff1aSopenharmony_ci
381cabdff1aSopenharmony_ci%define PACK_8CH_GPRS (10 * ARCH_X86_64) + ((6 + HAVE_ALIGNED_STACK) * ARCH_X86_32)
382cabdff1aSopenharmony_ci
383cabdff1aSopenharmony_ci%macro PACK_8CH 8
384cabdff1aSopenharmony_cicglobal pack_8ch_%2_to_%1_%3, 2, PACK_8CH_GPRS, %6, ARCH_X86_32*48, dst, src, len, src1, src2, src3, src4, src5, src6, src7
385cabdff1aSopenharmony_ci    mov     dstq, [dstq]
386cabdff1aSopenharmony_ci%if ARCH_X86_32
387cabdff1aSopenharmony_ci    DEFINE_ARGS dst, src, src2, src3, src4, src5, src6
388cabdff1aSopenharmony_ci    %define lend dword r2m
389cabdff1aSopenharmony_ci    %define src1q r0q
390cabdff1aSopenharmony_ci    %define src1m dword [rsp+32]
391cabdff1aSopenharmony_ci%if HAVE_ALIGNED_STACK == 0
392cabdff1aSopenharmony_ci    DEFINE_ARGS dst, src, src2, src3, src5, src6
393cabdff1aSopenharmony_ci    %define src4q r0q
394cabdff1aSopenharmony_ci    %define src4m dword [rsp+36]
395cabdff1aSopenharmony_ci%endif
396cabdff1aSopenharmony_ci    %define src7q r0q
397cabdff1aSopenharmony_ci    %define src7m dword [rsp+40]
398cabdff1aSopenharmony_ci    mov     dstm, dstq
399cabdff1aSopenharmony_ci%endif
400cabdff1aSopenharmony_ci    mov    src7q, [srcq+7*gprsize]
401cabdff1aSopenharmony_ci    mov    src6q, [srcq+6*gprsize]
402cabdff1aSopenharmony_ci%if ARCH_X86_32
403cabdff1aSopenharmony_ci    mov    src7m, src7q
404cabdff1aSopenharmony_ci%endif
405cabdff1aSopenharmony_ci    mov    src5q, [srcq+5*gprsize]
406cabdff1aSopenharmony_ci    mov    src4q, [srcq+4*gprsize]
407cabdff1aSopenharmony_ci    mov    src3q, [srcq+3*gprsize]
408cabdff1aSopenharmony_ci%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
409cabdff1aSopenharmony_ci    mov    src4m, src4q
410cabdff1aSopenharmony_ci%endif
411cabdff1aSopenharmony_ci    mov    src2q, [srcq+2*gprsize]
412cabdff1aSopenharmony_ci    mov    src1q, [srcq+1*gprsize]
413cabdff1aSopenharmony_ci    mov     srcq, [srcq]
414cabdff1aSopenharmony_ci%ifidn %3, a
415cabdff1aSopenharmony_ci%if ARCH_X86_32
416cabdff1aSopenharmony_ci    test dstmp, mmsize-1
417cabdff1aSopenharmony_ci%else
418cabdff1aSopenharmony_ci    test dstq, mmsize-1
419cabdff1aSopenharmony_ci%endif
420cabdff1aSopenharmony_ci        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
421cabdff1aSopenharmony_ci    test srcq, mmsize-1
422cabdff1aSopenharmony_ci        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
423cabdff1aSopenharmony_ci    test src1q, mmsize-1
424cabdff1aSopenharmony_ci        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
425cabdff1aSopenharmony_ci    test src2q, mmsize-1
426cabdff1aSopenharmony_ci        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
427cabdff1aSopenharmony_ci    test src3q, mmsize-1
428cabdff1aSopenharmony_ci        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
429cabdff1aSopenharmony_ci%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
430cabdff1aSopenharmony_ci    test src4m, mmsize-1
431cabdff1aSopenharmony_ci%else
432cabdff1aSopenharmony_ci    test src4q, mmsize-1
433cabdff1aSopenharmony_ci%endif
434cabdff1aSopenharmony_ci        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
435cabdff1aSopenharmony_ci    test src5q, mmsize-1
436cabdff1aSopenharmony_ci        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
437cabdff1aSopenharmony_ci    test src6q, mmsize-1
438cabdff1aSopenharmony_ci        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
439cabdff1aSopenharmony_ci%if ARCH_X86_32
440cabdff1aSopenharmony_ci    test src7m, mmsize-1
441cabdff1aSopenharmony_ci%else
442cabdff1aSopenharmony_ci    test src7q, mmsize-1
443cabdff1aSopenharmony_ci%endif
444cabdff1aSopenharmony_ci        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
445cabdff1aSopenharmony_ci%else
446cabdff1aSopenharmony_cipack_8ch_%2_to_%1_u_int %+ SUFFIX:
447cabdff1aSopenharmony_ci%endif
448cabdff1aSopenharmony_ci    sub    src1q, srcq
449cabdff1aSopenharmony_ci    sub    src2q, srcq
450cabdff1aSopenharmony_ci    sub    src3q, srcq
451cabdff1aSopenharmony_ci%if ARCH_X86_64 || HAVE_ALIGNED_STACK
452cabdff1aSopenharmony_ci    sub    src4q, srcq
453cabdff1aSopenharmony_ci%else
454cabdff1aSopenharmony_ci    sub    src4m, srcq
455cabdff1aSopenharmony_ci%endif
456cabdff1aSopenharmony_ci    sub    src5q, srcq
457cabdff1aSopenharmony_ci    sub    src6q, srcq
458cabdff1aSopenharmony_ci%if ARCH_X86_64
459cabdff1aSopenharmony_ci    sub    src7q, srcq
460cabdff1aSopenharmony_ci%else
461cabdff1aSopenharmony_ci    mov src1m, src1q
462cabdff1aSopenharmony_ci    sub src7m, srcq
463cabdff1aSopenharmony_ci%endif
464cabdff1aSopenharmony_ci
465cabdff1aSopenharmony_ci%if ARCH_X86_64
466cabdff1aSopenharmony_ci    %8 x,x,x,x,m9,x
467cabdff1aSopenharmony_ci%elifidn %1, int32
468cabdff1aSopenharmony_ci    %define m9 [flt2p31]
469cabdff1aSopenharmony_ci%else
470cabdff1aSopenharmony_ci    %define m9 [flt2pm31]
471cabdff1aSopenharmony_ci%endif
472cabdff1aSopenharmony_ci
473cabdff1aSopenharmony_ci.loop:
474cabdff1aSopenharmony_ci    mov%3     m0, [srcq      ]
475cabdff1aSopenharmony_ci    mov%3     m1, [srcq+src1q]
476cabdff1aSopenharmony_ci    mov%3     m2, [srcq+src2q]
477cabdff1aSopenharmony_ci%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
478cabdff1aSopenharmony_ci    mov    src4q, src4m
479cabdff1aSopenharmony_ci%endif
480cabdff1aSopenharmony_ci    mov%3     m3, [srcq+src3q]
481cabdff1aSopenharmony_ci    mov%3     m4, [srcq+src4q]
482cabdff1aSopenharmony_ci    mov%3     m5, [srcq+src5q]
483cabdff1aSopenharmony_ci%if ARCH_X86_32
484cabdff1aSopenharmony_ci    mov    src7q, src7m
485cabdff1aSopenharmony_ci%endif
486cabdff1aSopenharmony_ci    mov%3     m6, [srcq+src6q]
487cabdff1aSopenharmony_ci    mov%3     m7, [srcq+src7q]
488cabdff1aSopenharmony_ci
489cabdff1aSopenharmony_ci%if ARCH_X86_64
490cabdff1aSopenharmony_ci    TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, 8
491cabdff1aSopenharmony_ci
492cabdff1aSopenharmony_ci    %7 m0,m1,x,x,m9,m8
493cabdff1aSopenharmony_ci    %7 m2,m3,x,x,m9,m8
494cabdff1aSopenharmony_ci    %7 m4,m5,x,x,m9,m8
495cabdff1aSopenharmony_ci    %7 m6,m7,x,x,m9,m8
496cabdff1aSopenharmony_ci
497cabdff1aSopenharmony_ci    mov%3 [dstq], m0
498cabdff1aSopenharmony_ci%else
499cabdff1aSopenharmony_ci    mov     dstq, dstm
500cabdff1aSopenharmony_ci
501cabdff1aSopenharmony_ci    TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, [rsp], [rsp+16], 1
502cabdff1aSopenharmony_ci
503cabdff1aSopenharmony_ci    %7 m0,m1,x,x,m9,m2
504cabdff1aSopenharmony_ci    mova     m2, [rsp]
505cabdff1aSopenharmony_ci    mov%3   [dstq], m0
506cabdff1aSopenharmony_ci    %7 m2,m3,x,x,m9,m0
507cabdff1aSopenharmony_ci    %7 m4,m5,x,x,m9,m0
508cabdff1aSopenharmony_ci    %7 m6,m7,x,x,m9,m0
509cabdff1aSopenharmony_ci
510cabdff1aSopenharmony_ci%endif
511cabdff1aSopenharmony_ci
512cabdff1aSopenharmony_ci    mov%3 [dstq+16],  m1
513cabdff1aSopenharmony_ci    mov%3 [dstq+32],  m2
514cabdff1aSopenharmony_ci    mov%3 [dstq+48],  m3
515cabdff1aSopenharmony_ci    mov%3 [dstq+64],  m4
516cabdff1aSopenharmony_ci    mov%3 [dstq+80],  m5
517cabdff1aSopenharmony_ci    mov%3 [dstq+96],  m6
518cabdff1aSopenharmony_ci    mov%3 [dstq+112], m7
519cabdff1aSopenharmony_ci
520cabdff1aSopenharmony_ci    add      srcq, mmsize
521cabdff1aSopenharmony_ci    add      dstq, mmsize*8
522cabdff1aSopenharmony_ci%if ARCH_X86_32
523cabdff1aSopenharmony_ci    mov      dstm, dstq
524cabdff1aSopenharmony_ci    mov      src1q, src1m
525cabdff1aSopenharmony_ci%endif
526cabdff1aSopenharmony_ci    sub      lend, mmsize/4
527cabdff1aSopenharmony_ci    jg .loop
528cabdff1aSopenharmony_ci    REP_RET
529cabdff1aSopenharmony_ci%endmacro
530cabdff1aSopenharmony_ci
531cabdff1aSopenharmony_ci%macro INT16_TO_INT32_N 6
532cabdff1aSopenharmony_ci    pxor      m2, m2
533cabdff1aSopenharmony_ci    pxor      m3, m3
534cabdff1aSopenharmony_ci    punpcklwd m2, m1
535cabdff1aSopenharmony_ci    punpckhwd m3, m1
536cabdff1aSopenharmony_ci    SWAP 4,0
537cabdff1aSopenharmony_ci    pxor      m0, m0
538cabdff1aSopenharmony_ci    pxor      m1, m1
539cabdff1aSopenharmony_ci    punpcklwd m0, m4
540cabdff1aSopenharmony_ci    punpckhwd m1, m4
541cabdff1aSopenharmony_ci%endmacro
542cabdff1aSopenharmony_ci
543cabdff1aSopenharmony_ci%macro INT32_TO_INT16_N 6
544cabdff1aSopenharmony_ci    psrad     m0, 16
545cabdff1aSopenharmony_ci    psrad     m1, 16
546cabdff1aSopenharmony_ci    psrad     m2, 16
547cabdff1aSopenharmony_ci    psrad     m3, 16
548cabdff1aSopenharmony_ci    packssdw  m0, m1
549cabdff1aSopenharmony_ci    packssdw  m2, m3
550cabdff1aSopenharmony_ci    SWAP 1,2
551cabdff1aSopenharmony_ci%endmacro
552cabdff1aSopenharmony_ci
553cabdff1aSopenharmony_ci%macro INT32_TO_FLOAT_INIT 6
554cabdff1aSopenharmony_ci    mova      %5, [flt2pm31]
555cabdff1aSopenharmony_ci%endmacro
556cabdff1aSopenharmony_ci%macro INT32_TO_FLOAT_N 6
557cabdff1aSopenharmony_ci    cvtdq2ps  %1, %1
558cabdff1aSopenharmony_ci    cvtdq2ps  %2, %2
559cabdff1aSopenharmony_ci    mulps %1, %1, %5
560cabdff1aSopenharmony_ci    mulps %2, %2, %5
561cabdff1aSopenharmony_ci%endmacro
562cabdff1aSopenharmony_ci
563cabdff1aSopenharmony_ci%macro FLOAT_TO_INT32_INIT 6
564cabdff1aSopenharmony_ci    mova      %5, [flt2p31]
565cabdff1aSopenharmony_ci%endmacro
566cabdff1aSopenharmony_ci%macro FLOAT_TO_INT32_N 6
567cabdff1aSopenharmony_ci    mulps %1, %5
568cabdff1aSopenharmony_ci    mulps %2, %5
569cabdff1aSopenharmony_ci    cvtps2dq  %6, %1
570cabdff1aSopenharmony_ci    cmpps %1, %1, %5, 5
571cabdff1aSopenharmony_ci    paddd %1, %6
572cabdff1aSopenharmony_ci    cvtps2dq  %6, %2
573cabdff1aSopenharmony_ci    cmpps %2, %2, %5, 5
574cabdff1aSopenharmony_ci    paddd %2, %6
575cabdff1aSopenharmony_ci%endmacro
576cabdff1aSopenharmony_ci
577cabdff1aSopenharmony_ci%macro INT16_TO_FLOAT_INIT 6
578cabdff1aSopenharmony_ci    mova      m5, [flt2pm31]
579cabdff1aSopenharmony_ci%endmacro
580cabdff1aSopenharmony_ci%macro INT16_TO_FLOAT_N 6
581cabdff1aSopenharmony_ci    INT16_TO_INT32_N %1,%2,%3,%4,%5,%6
582cabdff1aSopenharmony_ci    cvtdq2ps  m0, m0
583cabdff1aSopenharmony_ci    cvtdq2ps  m1, m1
584cabdff1aSopenharmony_ci    cvtdq2ps  m2, m2
585cabdff1aSopenharmony_ci    cvtdq2ps  m3, m3
586cabdff1aSopenharmony_ci    mulps m0, m0, m5
587cabdff1aSopenharmony_ci    mulps m1, m1, m5
588cabdff1aSopenharmony_ci    mulps m2, m2, m5
589cabdff1aSopenharmony_ci    mulps m3, m3, m5
590cabdff1aSopenharmony_ci%endmacro
591cabdff1aSopenharmony_ci
592cabdff1aSopenharmony_ci%macro FLOAT_TO_INT16_INIT 6
593cabdff1aSopenharmony_ci    mova      m5, [flt2p15]
594cabdff1aSopenharmony_ci%endmacro
595cabdff1aSopenharmony_ci%macro FLOAT_TO_INT16_N 6
596cabdff1aSopenharmony_ci    mulps m0, m5
597cabdff1aSopenharmony_ci    mulps m1, m5
598cabdff1aSopenharmony_ci    mulps m2, m5
599cabdff1aSopenharmony_ci    mulps m3, m5
600cabdff1aSopenharmony_ci    cvtps2dq  m0, m0
601cabdff1aSopenharmony_ci    cvtps2dq  m1, m1
602cabdff1aSopenharmony_ci    packssdw  m0, m1
603cabdff1aSopenharmony_ci    cvtps2dq  m1, m2
604cabdff1aSopenharmony_ci    cvtps2dq  m3, m3
605cabdff1aSopenharmony_ci    packssdw  m1, m3
606cabdff1aSopenharmony_ci%endmacro
607cabdff1aSopenharmony_ci
608cabdff1aSopenharmony_ci%macro NOP_N 0-6
609cabdff1aSopenharmony_ci%endmacro
610cabdff1aSopenharmony_ci
611cabdff1aSopenharmony_ciINIT_XMM sse
612cabdff1aSopenharmony_ciPACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N
613cabdff1aSopenharmony_ciPACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N
614cabdff1aSopenharmony_ci
615cabdff1aSopenharmony_ciUNPACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N
616cabdff1aSopenharmony_ciUNPACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N
617cabdff1aSopenharmony_ci
618cabdff1aSopenharmony_ciINIT_XMM sse2
619cabdff1aSopenharmony_ciCONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
620cabdff1aSopenharmony_ciCONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
621cabdff1aSopenharmony_ciCONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
622cabdff1aSopenharmony_ciCONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
623cabdff1aSopenharmony_ci
624cabdff1aSopenharmony_ciPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
625cabdff1aSopenharmony_ciPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
626cabdff1aSopenharmony_ciPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
627cabdff1aSopenharmony_ciPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
628cabdff1aSopenharmony_ciPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
629cabdff1aSopenharmony_ciPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
630cabdff1aSopenharmony_ciPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
631cabdff1aSopenharmony_ciPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
632cabdff1aSopenharmony_ci
633cabdff1aSopenharmony_ciUNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
634cabdff1aSopenharmony_ciUNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
635cabdff1aSopenharmony_ciUNPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
636cabdff1aSopenharmony_ciUNPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
637cabdff1aSopenharmony_ciUNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
638cabdff1aSopenharmony_ciUNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
639cabdff1aSopenharmony_ciUNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
640cabdff1aSopenharmony_ciUNPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
641cabdff1aSopenharmony_ci
642cabdff1aSopenharmony_ciCONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
643cabdff1aSopenharmony_ciCONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
644cabdff1aSopenharmony_ciCONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
645cabdff1aSopenharmony_ciCONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
646cabdff1aSopenharmony_ciCONV float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
647cabdff1aSopenharmony_ciCONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
648cabdff1aSopenharmony_ciCONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
649cabdff1aSopenharmony_ciCONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
650cabdff1aSopenharmony_ci
651cabdff1aSopenharmony_ciPACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
652cabdff1aSopenharmony_ciPACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
653cabdff1aSopenharmony_ciPACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
654cabdff1aSopenharmony_ciPACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
655cabdff1aSopenharmony_ciPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
656cabdff1aSopenharmony_ciPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
657cabdff1aSopenharmony_ciPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
658cabdff1aSopenharmony_ciPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
659cabdff1aSopenharmony_ci
660cabdff1aSopenharmony_ciUNPACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
661cabdff1aSopenharmony_ciUNPACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
662cabdff1aSopenharmony_ciUNPACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
663cabdff1aSopenharmony_ciUNPACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
664cabdff1aSopenharmony_ciUNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
665cabdff1aSopenharmony_ciUNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
666cabdff1aSopenharmony_ciUNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
667cabdff1aSopenharmony_ciUNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
668cabdff1aSopenharmony_ci
669cabdff1aSopenharmony_ciPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
670cabdff1aSopenharmony_ciPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
671cabdff1aSopenharmony_ciPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
672cabdff1aSopenharmony_ciPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
673cabdff1aSopenharmony_ci
674cabdff1aSopenharmony_ciUNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
675cabdff1aSopenharmony_ciUNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
676cabdff1aSopenharmony_ciUNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
677cabdff1aSopenharmony_ciUNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
678cabdff1aSopenharmony_ci
679cabdff1aSopenharmony_ciPACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N
680cabdff1aSopenharmony_ciPACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N
681cabdff1aSopenharmony_ci
682cabdff1aSopenharmony_ciPACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
683cabdff1aSopenharmony_ciPACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
684cabdff1aSopenharmony_ciPACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
685cabdff1aSopenharmony_ciPACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
686cabdff1aSopenharmony_ci
687cabdff1aSopenharmony_ciINIT_XMM ssse3
688cabdff1aSopenharmony_ciUNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
689cabdff1aSopenharmony_ciUNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
690cabdff1aSopenharmony_ciUNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
691cabdff1aSopenharmony_ciUNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
692cabdff1aSopenharmony_ciUNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
693cabdff1aSopenharmony_ciUNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
694cabdff1aSopenharmony_ci
695cabdff1aSopenharmony_ci%if HAVE_AVX_EXTERNAL
696cabdff1aSopenharmony_ciINIT_XMM avx
697cabdff1aSopenharmony_ciPACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N
698cabdff1aSopenharmony_ciPACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N
699cabdff1aSopenharmony_ci
700cabdff1aSopenharmony_ciUNPACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N
701cabdff1aSopenharmony_ciUNPACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N
702cabdff1aSopenharmony_ci
703cabdff1aSopenharmony_ciPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
704cabdff1aSopenharmony_ciPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
705cabdff1aSopenharmony_ciPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
706cabdff1aSopenharmony_ciPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
707cabdff1aSopenharmony_ci
708cabdff1aSopenharmony_ciUNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
709cabdff1aSopenharmony_ciUNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
710cabdff1aSopenharmony_ciUNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
711cabdff1aSopenharmony_ciUNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
712cabdff1aSopenharmony_ci
713cabdff1aSopenharmony_ciPACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N
714cabdff1aSopenharmony_ciPACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N
715cabdff1aSopenharmony_ci
716cabdff1aSopenharmony_ciPACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
717cabdff1aSopenharmony_ciPACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
718cabdff1aSopenharmony_ciPACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
719cabdff1aSopenharmony_ciPACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
720cabdff1aSopenharmony_ci
721cabdff1aSopenharmony_ciINIT_YMM avx
722cabdff1aSopenharmony_ciCONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
723cabdff1aSopenharmony_ciCONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
724cabdff1aSopenharmony_ci%endif
725cabdff1aSopenharmony_ci
726cabdff1aSopenharmony_ci%if HAVE_AVX2_EXTERNAL
727cabdff1aSopenharmony_ciINIT_YMM avx2
728cabdff1aSopenharmony_ciCONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
729cabdff1aSopenharmony_ciCONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
730cabdff1aSopenharmony_ci%endif
731