1;******************************************************************************
2;* Copyright (c) 2012 Michael Niedermayer
3;*
4;* This file is part of FFmpeg.
5;*
6;* FFmpeg is free software; you can redistribute it and/or
7;* modify it under the terms of the GNU Lesser General Public
8;* License as published by the Free Software Foundation; either
9;* version 2.1 of the License, or (at your option) any later version.
10;*
11;* FFmpeg is distributed in the hope that it will be useful,
12;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14;* Lesser General Public License for more details.
15;*
16;* You should have received a copy of the GNU Lesser General Public
17;* License along with FFmpeg; if not, write to the Free Software
18;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19;******************************************************************************
20
21%include "libavutil/x86/x86util.asm"
22
23SECTION_RODATA 32
24flt2pm31: times 8 dd 4.6566129e-10
25flt2p31 : times 8 dd 2147483648.0
26flt2p15 : times 8 dd 32768.0
27
28word_unpack_shuf : db  0, 1, 4, 5, 8, 9,12,13, 2, 3, 6, 7,10,11,14,15
29
30SECTION .text
31
32
33;to, from, a/u, log2_outsize, log_intsize, const
34%macro PACK_2CH 5-7
35cglobal pack_2ch_%2_to_%1_%3, 3, 4, 6, dst, src, len, src2
36    mov src2q   , [srcq+gprsize]
37    mov srcq    , [srcq]
38    mov dstq    , [dstq]
39%ifidn %3, a
40    test dstq, mmsize-1
41        jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
42    test srcq, mmsize-1
43        jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
44    test src2q, mmsize-1
45        jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
46%else
47pack_2ch_%2_to_%1_u_int %+ SUFFIX:
48%endif
49    lea     srcq , [srcq  + (1<<%5)*lenq]
50    lea     src2q, [src2q + (1<<%5)*lenq]
51    lea     dstq , [dstq  + (2<<%4)*lenq]
52    neg     lenq
53    %7 m0,m1,m2,m3,m4,m5
54.next:
55%if %4 >= %5
56    mov%3     m0, [         srcq +(1<<%5)*lenq]
57    mova      m1, m0
58    mov%3     m2, [         src2q+(1<<%5)*lenq]
59%if %5 == 1
60    punpcklwd m0, m2
61    punpckhwd m1, m2
62%else
63    punpckldq m0, m2
64    punpckhdq m1, m2
65%endif
66    %6 m0,m1,m2,m3,m4,m5
67%else
68    mov%3     m0, [         srcq +(1<<%5)*lenq]
69    mov%3     m1, [mmsize + srcq +(1<<%5)*lenq]
70    mov%3     m2, [         src2q+(1<<%5)*lenq]
71    mov%3     m3, [mmsize + src2q+(1<<%5)*lenq]
72    %6 m0,m1,m2,m3,m4,m5
73    mova      m2, m0
74    punpcklwd m0, m1
75    punpckhwd m2, m1
76    SWAP 1,2
77%endif
78    mov%3 [           dstq+(2<<%4)*lenq], m0
79    mov%3 [  mmsize + dstq+(2<<%4)*lenq], m1
80%if %4 > %5
81    mov%3 [2*mmsize + dstq+(2<<%4)*lenq], m2
82    mov%3 [3*mmsize + dstq+(2<<%4)*lenq], m3
83    add lenq, 4*mmsize/(2<<%4)
84%else
85    add lenq, 2*mmsize/(2<<%4)
86%endif
87        jl .next
88    REP_RET
89%endmacro
90
91%macro UNPACK_2CH 5-7
92cglobal unpack_2ch_%2_to_%1_%3, 3, 4, 7, dst, src, len, dst2
93    mov dst2q   , [dstq+gprsize]
94    mov srcq    , [srcq]
95    mov dstq    , [dstq]
96%ifidn %3, a
97    test dstq, mmsize-1
98        jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
99    test srcq, mmsize-1
100        jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
101    test dst2q, mmsize-1
102        jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
103%else
104unpack_2ch_%2_to_%1_u_int %+ SUFFIX:
105%endif
106    lea     srcq , [srcq  + (2<<%5)*lenq]
107    lea     dstq , [dstq  + (1<<%4)*lenq]
108    lea     dst2q, [dst2q + (1<<%4)*lenq]
109    neg     lenq
110    %7 m0,m1,m2,m3,m4,m5
111    mova      m6, [word_unpack_shuf]
112.next:
113    mov%3     m0, [           srcq +(2<<%5)*lenq]
114    mov%3     m2, [  mmsize + srcq +(2<<%5)*lenq]
115%if %5 == 1
116%ifidn SUFFIX, _ssse3
117    pshufb    m0, m6
118    mova      m1, m0
119    pshufb    m2, m6
120    punpcklqdq m0,m2
121    punpckhqdq m1,m2
122%else
123    mova      m1, m0
124    punpcklwd m0,m2
125    punpckhwd m1,m2
126
127    mova      m2, m0
128    punpcklwd m0,m1
129    punpckhwd m2,m1
130
131    mova      m1, m0
132    punpcklwd m0,m2
133    punpckhwd m1,m2
134%endif
135%else
136    mova      m1, m0
137    shufps    m0, m2, 10001000b
138    shufps    m1, m2, 11011101b
139%endif
140%if %4 < %5
141    mov%3     m2, [2*mmsize + srcq +(2<<%5)*lenq]
142    mova      m3, m2
143    mov%3     m4, [3*mmsize + srcq +(2<<%5)*lenq]
144    shufps    m2, m4, 10001000b
145    shufps    m3, m4, 11011101b
146    SWAP 1,2
147%endif
148    %6 m0,m1,m2,m3,m4,m5
149    mov%3 [           dstq+(1<<%4)*lenq], m0
150%if %4 > %5
151    mov%3 [          dst2q+(1<<%4)*lenq], m2
152    mov%3 [ mmsize +  dstq+(1<<%4)*lenq], m1
153    mov%3 [ mmsize + dst2q+(1<<%4)*lenq], m3
154    add lenq, 2*mmsize/(1<<%4)
155%else
156    mov%3 [          dst2q+(1<<%4)*lenq], m1
157    add lenq, mmsize/(1<<%4)
158%endif
159        jl .next
160    REP_RET
161%endmacro
162
163%macro CONV 5-7
164cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len
165    mov srcq    , [srcq]
166    mov dstq    , [dstq]
167%ifidn %3, a
168    test dstq, mmsize-1
169        jne %2_to_%1_u_int %+ SUFFIX
170    test srcq, mmsize-1
171        jne %2_to_%1_u_int %+ SUFFIX
172%else
173%2_to_%1_u_int %+ SUFFIX:
174%endif
175    lea     srcq , [srcq  + (1<<%5)*lenq]
176    lea     dstq , [dstq  + (1<<%4)*lenq]
177    neg     lenq
178    %7 m0,m1,m2,m3,m4,m5
179.next:
180    mov%3     m0, [           srcq +(1<<%5)*lenq]
181    mov%3     m1, [  mmsize + srcq +(1<<%5)*lenq]
182%if %4 < %5
183    mov%3     m2, [2*mmsize + srcq +(1<<%5)*lenq]
184    mov%3     m3, [3*mmsize + srcq +(1<<%5)*lenq]
185%endif
186    %6 m0,m1,m2,m3,m4,m5
187    mov%3 [           dstq+(1<<%4)*lenq], m0
188    mov%3 [  mmsize + dstq+(1<<%4)*lenq], m1
189%if %4 > %5
190    mov%3 [2*mmsize + dstq+(1<<%4)*lenq], m2
191    mov%3 [3*mmsize + dstq+(1<<%4)*lenq], m3
192    add lenq, 4*mmsize/(1<<%4)
193%else
194    add lenq, 2*mmsize/(1<<%4)
195%endif
196        jl .next
197%if mmsize == 8
198    emms
199    RET
200%else
201    REP_RET
202%endif
203%endmacro
204
205%macro PACK_6CH 8
206cglobal pack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, src1, src2, src3, src4, src5, len
207%if ARCH_X86_64
208    mov     lend, r2d
209%else
210    %define lend dword r2m
211%endif
212    mov    src1q, [srcq+1*gprsize]
213    mov    src2q, [srcq+2*gprsize]
214    mov    src3q, [srcq+3*gprsize]
215    mov    src4q, [srcq+4*gprsize]
216    mov    src5q, [srcq+5*gprsize]
217    mov     srcq, [srcq]
218    mov     dstq, [dstq]
219%ifidn %3, a
220    test dstq, mmsize-1
221        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
222    test srcq, mmsize-1
223        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
224    test src1q, mmsize-1
225        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
226    test src2q, mmsize-1
227        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
228    test src3q, mmsize-1
229        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
230    test src4q, mmsize-1
231        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
232    test src5q, mmsize-1
233        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
234%else
235pack_6ch_%2_to_%1_u_int %+ SUFFIX:
236%endif
237    sub    src1q, srcq
238    sub    src2q, srcq
239    sub    src3q, srcq
240    sub    src4q, srcq
241    sub    src5q, srcq
242    %8 x,x,x,x,m7,x
243.loop:
244    mov%3     m0, [srcq      ]
245    mov%3     m1, [srcq+src1q]
246    mov%3     m2, [srcq+src2q]
247    mov%3     m3, [srcq+src3q]
248    mov%3     m4, [srcq+src4q]
249    mov%3     m5, [srcq+src5q]
250%if cpuflag(sse)
251    SBUTTERFLYPS 0, 1, 6
252    SBUTTERFLYPS 2, 3, 6
253    SBUTTERFLYPS 4, 5, 6
254
255%if cpuflag(avx)
256    blendps   m6, m4, m0, 1100b
257%else
258    movaps    m6, m4
259    shufps    m4, m0, q3210
260    SWAP 4,6
261%endif
262    movlhps   m0, m2
263    movhlps   m4, m2
264%if cpuflag(avx)
265    blendps   m2, m5, m1, 1100b
266%else
267    movaps    m2, m5
268    shufps    m5, m1, q3210
269    SWAP 2,5
270%endif
271    movlhps   m1, m3
272    movhlps   m5, m3
273
274    %7 m0,m6,x,x,m7,m3
275    %7 m4,m1,x,x,m7,m3
276    %7 m2,m5,x,x,m7,m3
277
278    mov %+ %3 %+ ps [dstq   ], m0
279    mov %+ %3 %+ ps [dstq+16], m6
280    mov %+ %3 %+ ps [dstq+32], m4
281    mov %+ %3 %+ ps [dstq+48], m1
282    mov %+ %3 %+ ps [dstq+64], m2
283    mov %+ %3 %+ ps [dstq+80], m5
284%else ; mmx
285    SBUTTERFLY dq, 0, 1, 6
286    SBUTTERFLY dq, 2, 3, 6
287    SBUTTERFLY dq, 4, 5, 6
288
289    movq   [dstq   ], m0
290    movq   [dstq+ 8], m2
291    movq   [dstq+16], m4
292    movq   [dstq+24], m1
293    movq   [dstq+32], m3
294    movq   [dstq+40], m5
295%endif
296    add      srcq, mmsize
297    add      dstq, mmsize*6
298    sub      lend, mmsize/4
299    jg .loop
300%if mmsize == 8
301    emms
302    RET
303%else
304    REP_RET
305%endif
306%endmacro
307
308%macro UNPACK_6CH 8
309cglobal unpack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, dst1, dst2, dst3, dst4, dst5, len
310%if ARCH_X86_64
311    mov     lend, r2d
312%else
313    %define lend dword r2m
314%endif
315    mov    dst1q, [dstq+1*gprsize]
316    mov    dst2q, [dstq+2*gprsize]
317    mov    dst3q, [dstq+3*gprsize]
318    mov    dst4q, [dstq+4*gprsize]
319    mov    dst5q, [dstq+5*gprsize]
320    mov     dstq, [dstq]
321    mov     srcq, [srcq]
322%ifidn %3, a
323    test dstq, mmsize-1
324        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
325    test srcq, mmsize-1
326        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
327    test dst1q, mmsize-1
328        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
329    test dst2q, mmsize-1
330        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
331    test dst3q, mmsize-1
332        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
333    test dst4q, mmsize-1
334        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
335    test dst5q, mmsize-1
336        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
337%else
338unpack_6ch_%2_to_%1_u_int %+ SUFFIX:
339%endif
340    sub    dst1q, dstq
341    sub    dst2q, dstq
342    sub    dst3q, dstq
343    sub    dst4q, dstq
344    sub    dst5q, dstq
345    %8 x,x,x,x,m7,x
346.loop:
347    mov%3     m0, [srcq   ]
348    mov%3     m1, [srcq+16]
349    mov%3     m2, [srcq+32]
350    mov%3     m3, [srcq+48]
351    mov%3     m4, [srcq+64]
352    mov%3     m5, [srcq+80]
353
354    SBUTTERFLYPS 0, 3, 6
355    SBUTTERFLYPS 1, 4, 6
356    SBUTTERFLYPS 2, 5, 6
357    SBUTTERFLYPS 0, 4, 6
358    SBUTTERFLYPS 3, 2, 6
359    SBUTTERFLYPS 1, 5, 6
360    SWAP 1, 4
361    SWAP 2, 3
362
363    %7 m0,m1,x,x,m7,m6
364    %7 m2,m3,x,x,m7,m6
365    %7 m4,m5,x,x,m7,m6
366
367    mov %+ %3 %+ ps [dstq      ], m0
368    mov %+ %3 %+ ps [dstq+dst1q], m1
369    mov %+ %3 %+ ps [dstq+dst2q], m2
370    mov %+ %3 %+ ps [dstq+dst3q], m3
371    mov %+ %3 %+ ps [dstq+dst4q], m4
372    mov %+ %3 %+ ps [dstq+dst5q], m5
373
374    add      srcq, mmsize*6
375    add      dstq, mmsize
376    sub      lend, mmsize/4
377    jg .loop
378    REP_RET
379%endmacro
380
381%define PACK_8CH_GPRS (10 * ARCH_X86_64) + ((6 + HAVE_ALIGNED_STACK) * ARCH_X86_32)
382
383%macro PACK_8CH 8
384cglobal pack_8ch_%2_to_%1_%3, 2, PACK_8CH_GPRS, %6, ARCH_X86_32*48, dst, src, len, src1, src2, src3, src4, src5, src6, src7
385    mov     dstq, [dstq]
386%if ARCH_X86_32
387    DEFINE_ARGS dst, src, src2, src3, src4, src5, src6
388    %define lend dword r2m
389    %define src1q r0q
390    %define src1m dword [rsp+32]
391%if HAVE_ALIGNED_STACK == 0
392    DEFINE_ARGS dst, src, src2, src3, src5, src6
393    %define src4q r0q
394    %define src4m dword [rsp+36]
395%endif
396    %define src7q r0q
397    %define src7m dword [rsp+40]
398    mov     dstm, dstq
399%endif
400    mov    src7q, [srcq+7*gprsize]
401    mov    src6q, [srcq+6*gprsize]
402%if ARCH_X86_32
403    mov    src7m, src7q
404%endif
405    mov    src5q, [srcq+5*gprsize]
406    mov    src4q, [srcq+4*gprsize]
407    mov    src3q, [srcq+3*gprsize]
408%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
409    mov    src4m, src4q
410%endif
411    mov    src2q, [srcq+2*gprsize]
412    mov    src1q, [srcq+1*gprsize]
413    mov     srcq, [srcq]
414%ifidn %3, a
415%if ARCH_X86_32
416    test dstmp, mmsize-1
417%else
418    test dstq, mmsize-1
419%endif
420        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
421    test srcq, mmsize-1
422        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
423    test src1q, mmsize-1
424        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
425    test src2q, mmsize-1
426        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
427    test src3q, mmsize-1
428        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
429%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
430    test src4m, mmsize-1
431%else
432    test src4q, mmsize-1
433%endif
434        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
435    test src5q, mmsize-1
436        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
437    test src6q, mmsize-1
438        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
439%if ARCH_X86_32
440    test src7m, mmsize-1
441%else
442    test src7q, mmsize-1
443%endif
444        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
445%else
446pack_8ch_%2_to_%1_u_int %+ SUFFIX:
447%endif
448    sub    src1q, srcq
449    sub    src2q, srcq
450    sub    src3q, srcq
451%if ARCH_X86_64 || HAVE_ALIGNED_STACK
452    sub    src4q, srcq
453%else
454    sub    src4m, srcq
455%endif
456    sub    src5q, srcq
457    sub    src6q, srcq
458%if ARCH_X86_64
459    sub    src7q, srcq
460%else
461    mov src1m, src1q
462    sub src7m, srcq
463%endif
464
465%if ARCH_X86_64
466    %8 x,x,x,x,m9,x
467%elifidn %1, int32
468    %define m9 [flt2p31]
469%else
470    %define m9 [flt2pm31]
471%endif
472
473.loop:
474    mov%3     m0, [srcq      ]
475    mov%3     m1, [srcq+src1q]
476    mov%3     m2, [srcq+src2q]
477%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
478    mov    src4q, src4m
479%endif
480    mov%3     m3, [srcq+src3q]
481    mov%3     m4, [srcq+src4q]
482    mov%3     m5, [srcq+src5q]
483%if ARCH_X86_32
484    mov    src7q, src7m
485%endif
486    mov%3     m6, [srcq+src6q]
487    mov%3     m7, [srcq+src7q]
488
489%if ARCH_X86_64
490    TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, 8
491
492    %7 m0,m1,x,x,m9,m8
493    %7 m2,m3,x,x,m9,m8
494    %7 m4,m5,x,x,m9,m8
495    %7 m6,m7,x,x,m9,m8
496
497    mov%3 [dstq], m0
498%else
499    mov     dstq, dstm
500
501    TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, [rsp], [rsp+16], 1
502
503    %7 m0,m1,x,x,m9,m2
504    mova     m2, [rsp]
505    mov%3   [dstq], m0
506    %7 m2,m3,x,x,m9,m0
507    %7 m4,m5,x,x,m9,m0
508    %7 m6,m7,x,x,m9,m0
509
510%endif
511
512    mov%3 [dstq+16],  m1
513    mov%3 [dstq+32],  m2
514    mov%3 [dstq+48],  m3
515    mov%3 [dstq+64],  m4
516    mov%3 [dstq+80],  m5
517    mov%3 [dstq+96],  m6
518    mov%3 [dstq+112], m7
519
520    add      srcq, mmsize
521    add      dstq, mmsize*8
522%if ARCH_X86_32
523    mov      dstm, dstq
524    mov      src1q, src1m
525%endif
526    sub      lend, mmsize/4
527    jg .loop
528    REP_RET
529%endmacro
530
531%macro INT16_TO_INT32_N 6
532    pxor      m2, m2
533    pxor      m3, m3
534    punpcklwd m2, m1
535    punpckhwd m3, m1
536    SWAP 4,0
537    pxor      m0, m0
538    pxor      m1, m1
539    punpcklwd m0, m4
540    punpckhwd m1, m4
541%endmacro
542
543%macro INT32_TO_INT16_N 6
544    psrad     m0, 16
545    psrad     m1, 16
546    psrad     m2, 16
547    psrad     m3, 16
548    packssdw  m0, m1
549    packssdw  m2, m3
550    SWAP 1,2
551%endmacro
552
553%macro INT32_TO_FLOAT_INIT 6
554    mova      %5, [flt2pm31]
555%endmacro
556%macro INT32_TO_FLOAT_N 6
557    cvtdq2ps  %1, %1
558    cvtdq2ps  %2, %2
559    mulps %1, %1, %5
560    mulps %2, %2, %5
561%endmacro
562
563%macro FLOAT_TO_INT32_INIT 6
564    mova      %5, [flt2p31]
565%endmacro
566%macro FLOAT_TO_INT32_N 6
567    mulps %1, %5
568    mulps %2, %5
569    cvtps2dq  %6, %1
570    cmpps %1, %1, %5, 5
571    paddd %1, %6
572    cvtps2dq  %6, %2
573    cmpps %2, %2, %5, 5
574    paddd %2, %6
575%endmacro
576
577%macro INT16_TO_FLOAT_INIT 6
578    mova      m5, [flt2pm31]
579%endmacro
580%macro INT16_TO_FLOAT_N 6
581    INT16_TO_INT32_N %1,%2,%3,%4,%5,%6
582    cvtdq2ps  m0, m0
583    cvtdq2ps  m1, m1
584    cvtdq2ps  m2, m2
585    cvtdq2ps  m3, m3
586    mulps m0, m0, m5
587    mulps m1, m1, m5
588    mulps m2, m2, m5
589    mulps m3, m3, m5
590%endmacro
591
592%macro FLOAT_TO_INT16_INIT 6
593    mova      m5, [flt2p15]
594%endmacro
595%macro FLOAT_TO_INT16_N 6
596    mulps m0, m5
597    mulps m1, m5
598    mulps m2, m5
599    mulps m3, m5
600    cvtps2dq  m0, m0
601    cvtps2dq  m1, m1
602    packssdw  m0, m1
603    cvtps2dq  m1, m2
604    cvtps2dq  m3, m3
605    packssdw  m1, m3
606%endmacro
607
608%macro NOP_N 0-6
609%endmacro
610
611INIT_XMM sse
612PACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N
613PACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N
614
615UNPACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N
616UNPACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N
617
618INIT_XMM sse2
619CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
620CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
621CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
622CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
623
624PACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
625PACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
626PACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
627PACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
628PACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
629PACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
630PACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
631PACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
632
633UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
634UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
635UNPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
636UNPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
637UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
638UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
639UNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
640UNPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
641
642CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
643CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
644CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
645CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
646CONV float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
647CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
648CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
649CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
650
651PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
652PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
653PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
654PACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
655PACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
656PACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
657PACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
658PACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
659
660UNPACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
661UNPACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
662UNPACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
663UNPACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
664UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
665UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
666UNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
667UNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
668
669PACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
670PACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
671PACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
672PACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
673
674UNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
675UNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
676UNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
677UNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
678
679PACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N
680PACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N
681
682PACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
683PACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
684PACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
685PACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
686
687INIT_XMM ssse3
688UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
689UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
690UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
691UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
692UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
693UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
694
695%if HAVE_AVX_EXTERNAL
696INIT_XMM avx
697PACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N
698PACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N
699
700UNPACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N
701UNPACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N
702
703PACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
704PACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
705PACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
706PACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
707
708UNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
709UNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
710UNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
711UNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
712
713PACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N
714PACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N
715
716PACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
717PACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
718PACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
719PACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
720
721INIT_YMM avx
722CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
723CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
724%endif
725
726%if HAVE_AVX2_EXTERNAL
727INIT_YMM avx2
728CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
729CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
730%endif
731