1;****************************************************************************** 2;* Copyright (c) 2012 Michael Niedermayer 3;* 4;* This file is part of FFmpeg. 5;* 6;* FFmpeg is free software; you can redistribute it and/or 7;* modify it under the terms of the GNU Lesser General Public 8;* License as published by the Free Software Foundation; either 9;* version 2.1 of the License, or (at your option) any later version. 10;* 11;* FFmpeg is distributed in the hope that it will be useful, 12;* but WITHOUT ANY WARRANTY; without even the implied warranty of 13;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14;* Lesser General Public License for more details. 15;* 16;* You should have received a copy of the GNU Lesser General Public 17;* License along with FFmpeg; if not, write to the Free Software 18;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19;****************************************************************************** 20 21%include "libavutil/x86/x86util.asm" 22 23SECTION_RODATA 32 24flt2pm31: times 8 dd 4.6566129e-10 25flt2p31 : times 8 dd 2147483648.0 26flt2p15 : times 8 dd 32768.0 27 28word_unpack_shuf : db 0, 1, 4, 5, 8, 9,12,13, 2, 3, 6, 7,10,11,14,15 29 30SECTION .text 31 32 33;to, from, a/u, log2_outsize, log_intsize, const 34%macro PACK_2CH 5-7 35cglobal pack_2ch_%2_to_%1_%3, 3, 4, 6, dst, src, len, src2 36 mov src2q , [srcq+gprsize] 37 mov srcq , [srcq] 38 mov dstq , [dstq] 39%ifidn %3, a 40 test dstq, mmsize-1 41 jne pack_2ch_%2_to_%1_u_int %+ SUFFIX 42 test srcq, mmsize-1 43 jne pack_2ch_%2_to_%1_u_int %+ SUFFIX 44 test src2q, mmsize-1 45 jne pack_2ch_%2_to_%1_u_int %+ SUFFIX 46%else 47pack_2ch_%2_to_%1_u_int %+ SUFFIX: 48%endif 49 lea srcq , [srcq + (1<<%5)*lenq] 50 lea src2q, [src2q + (1<<%5)*lenq] 51 lea dstq , [dstq + (2<<%4)*lenq] 52 neg lenq 53 %7 m0,m1,m2,m3,m4,m5 54.next: 55%if %4 >= %5 56 mov%3 m0, [ srcq +(1<<%5)*lenq] 57 mova m1, m0 58 mov%3 m2, [ src2q+(1<<%5)*lenq] 59%if %5 == 1 60 punpcklwd m0, m2 61 punpckhwd m1, m2 62%else 63 punpckldq m0, m2 64 punpckhdq m1, m2 65%endif 66 %6 m0,m1,m2,m3,m4,m5 67%else 68 mov%3 m0, [ srcq +(1<<%5)*lenq] 69 mov%3 m1, [mmsize + srcq +(1<<%5)*lenq] 70 mov%3 m2, [ src2q+(1<<%5)*lenq] 71 mov%3 m3, [mmsize + src2q+(1<<%5)*lenq] 72 %6 m0,m1,m2,m3,m4,m5 73 mova m2, m0 74 punpcklwd m0, m1 75 punpckhwd m2, m1 76 SWAP 1,2 77%endif 78 mov%3 [ dstq+(2<<%4)*lenq], m0 79 mov%3 [ mmsize + dstq+(2<<%4)*lenq], m1 80%if %4 > %5 81 mov%3 [2*mmsize + dstq+(2<<%4)*lenq], m2 82 mov%3 [3*mmsize + dstq+(2<<%4)*lenq], m3 83 add lenq, 4*mmsize/(2<<%4) 84%else 85 add lenq, 2*mmsize/(2<<%4) 86%endif 87 jl .next 88 REP_RET 89%endmacro 90 91%macro UNPACK_2CH 5-7 92cglobal unpack_2ch_%2_to_%1_%3, 3, 4, 7, dst, src, len, dst2 93 mov dst2q , [dstq+gprsize] 94 mov srcq , [srcq] 95 mov dstq , [dstq] 96%ifidn %3, a 97 test dstq, mmsize-1 98 jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX 99 test srcq, mmsize-1 100 jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX 101 test dst2q, mmsize-1 102 jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX 103%else 104unpack_2ch_%2_to_%1_u_int %+ SUFFIX: 105%endif 106 lea srcq , [srcq + (2<<%5)*lenq] 107 lea dstq , [dstq + (1<<%4)*lenq] 108 lea dst2q, [dst2q + (1<<%4)*lenq] 109 neg lenq 110 %7 m0,m1,m2,m3,m4,m5 111 mova m6, [word_unpack_shuf] 112.next: 113 mov%3 m0, [ srcq +(2<<%5)*lenq] 114 mov%3 m2, [ mmsize + srcq +(2<<%5)*lenq] 115%if %5 == 1 116%ifidn SUFFIX, _ssse3 117 pshufb m0, m6 118 mova m1, m0 119 pshufb m2, m6 120 punpcklqdq m0,m2 121 punpckhqdq m1,m2 122%else 123 mova m1, m0 124 punpcklwd m0,m2 125 punpckhwd m1,m2 126 127 mova m2, m0 128 punpcklwd m0,m1 129 punpckhwd m2,m1 130 131 mova m1, m0 132 punpcklwd m0,m2 133 punpckhwd m1,m2 134%endif 135%else 136 mova m1, m0 137 shufps m0, m2, 10001000b 138 shufps m1, m2, 11011101b 139%endif 140%if %4 < %5 141 mov%3 m2, [2*mmsize + srcq +(2<<%5)*lenq] 142 mova m3, m2 143 mov%3 m4, [3*mmsize + srcq +(2<<%5)*lenq] 144 shufps m2, m4, 10001000b 145 shufps m3, m4, 11011101b 146 SWAP 1,2 147%endif 148 %6 m0,m1,m2,m3,m4,m5 149 mov%3 [ dstq+(1<<%4)*lenq], m0 150%if %4 > %5 151 mov%3 [ dst2q+(1<<%4)*lenq], m2 152 mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1 153 mov%3 [ mmsize + dst2q+(1<<%4)*lenq], m3 154 add lenq, 2*mmsize/(1<<%4) 155%else 156 mov%3 [ dst2q+(1<<%4)*lenq], m1 157 add lenq, mmsize/(1<<%4) 158%endif 159 jl .next 160 REP_RET 161%endmacro 162 163%macro CONV 5-7 164cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len 165 mov srcq , [srcq] 166 mov dstq , [dstq] 167%ifidn %3, a 168 test dstq, mmsize-1 169 jne %2_to_%1_u_int %+ SUFFIX 170 test srcq, mmsize-1 171 jne %2_to_%1_u_int %+ SUFFIX 172%else 173%2_to_%1_u_int %+ SUFFIX: 174%endif 175 lea srcq , [srcq + (1<<%5)*lenq] 176 lea dstq , [dstq + (1<<%4)*lenq] 177 neg lenq 178 %7 m0,m1,m2,m3,m4,m5 179.next: 180 mov%3 m0, [ srcq +(1<<%5)*lenq] 181 mov%3 m1, [ mmsize + srcq +(1<<%5)*lenq] 182%if %4 < %5 183 mov%3 m2, [2*mmsize + srcq +(1<<%5)*lenq] 184 mov%3 m3, [3*mmsize + srcq +(1<<%5)*lenq] 185%endif 186 %6 m0,m1,m2,m3,m4,m5 187 mov%3 [ dstq+(1<<%4)*lenq], m0 188 mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1 189%if %4 > %5 190 mov%3 [2*mmsize + dstq+(1<<%4)*lenq], m2 191 mov%3 [3*mmsize + dstq+(1<<%4)*lenq], m3 192 add lenq, 4*mmsize/(1<<%4) 193%else 194 add lenq, 2*mmsize/(1<<%4) 195%endif 196 jl .next 197%if mmsize == 8 198 emms 199 RET 200%else 201 REP_RET 202%endif 203%endmacro 204 205%macro PACK_6CH 8 206cglobal pack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, src1, src2, src3, src4, src5, len 207%if ARCH_X86_64 208 mov lend, r2d 209%else 210 %define lend dword r2m 211%endif 212 mov src1q, [srcq+1*gprsize] 213 mov src2q, [srcq+2*gprsize] 214 mov src3q, [srcq+3*gprsize] 215 mov src4q, [srcq+4*gprsize] 216 mov src5q, [srcq+5*gprsize] 217 mov srcq, [srcq] 218 mov dstq, [dstq] 219%ifidn %3, a 220 test dstq, mmsize-1 221 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 222 test srcq, mmsize-1 223 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 224 test src1q, mmsize-1 225 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 226 test src2q, mmsize-1 227 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 228 test src3q, mmsize-1 229 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 230 test src4q, mmsize-1 231 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 232 test src5q, mmsize-1 233 jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 234%else 235pack_6ch_%2_to_%1_u_int %+ SUFFIX: 236%endif 237 sub src1q, srcq 238 sub src2q, srcq 239 sub src3q, srcq 240 sub src4q, srcq 241 sub src5q, srcq 242 %8 x,x,x,x,m7,x 243.loop: 244 mov%3 m0, [srcq ] 245 mov%3 m1, [srcq+src1q] 246 mov%3 m2, [srcq+src2q] 247 mov%3 m3, [srcq+src3q] 248 mov%3 m4, [srcq+src4q] 249 mov%3 m5, [srcq+src5q] 250%if cpuflag(sse) 251 SBUTTERFLYPS 0, 1, 6 252 SBUTTERFLYPS 2, 3, 6 253 SBUTTERFLYPS 4, 5, 6 254 255%if cpuflag(avx) 256 blendps m6, m4, m0, 1100b 257%else 258 movaps m6, m4 259 shufps m4, m0, q3210 260 SWAP 4,6 261%endif 262 movlhps m0, m2 263 movhlps m4, m2 264%if cpuflag(avx) 265 blendps m2, m5, m1, 1100b 266%else 267 movaps m2, m5 268 shufps m5, m1, q3210 269 SWAP 2,5 270%endif 271 movlhps m1, m3 272 movhlps m5, m3 273 274 %7 m0,m6,x,x,m7,m3 275 %7 m4,m1,x,x,m7,m3 276 %7 m2,m5,x,x,m7,m3 277 278 mov %+ %3 %+ ps [dstq ], m0 279 mov %+ %3 %+ ps [dstq+16], m6 280 mov %+ %3 %+ ps [dstq+32], m4 281 mov %+ %3 %+ ps [dstq+48], m1 282 mov %+ %3 %+ ps [dstq+64], m2 283 mov %+ %3 %+ ps [dstq+80], m5 284%else ; mmx 285 SBUTTERFLY dq, 0, 1, 6 286 SBUTTERFLY dq, 2, 3, 6 287 SBUTTERFLY dq, 4, 5, 6 288 289 movq [dstq ], m0 290 movq [dstq+ 8], m2 291 movq [dstq+16], m4 292 movq [dstq+24], m1 293 movq [dstq+32], m3 294 movq [dstq+40], m5 295%endif 296 add srcq, mmsize 297 add dstq, mmsize*6 298 sub lend, mmsize/4 299 jg .loop 300%if mmsize == 8 301 emms 302 RET 303%else 304 REP_RET 305%endif 306%endmacro 307 308%macro UNPACK_6CH 8 309cglobal unpack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, dst1, dst2, dst3, dst4, dst5, len 310%if ARCH_X86_64 311 mov lend, r2d 312%else 313 %define lend dword r2m 314%endif 315 mov dst1q, [dstq+1*gprsize] 316 mov dst2q, [dstq+2*gprsize] 317 mov dst3q, [dstq+3*gprsize] 318 mov dst4q, [dstq+4*gprsize] 319 mov dst5q, [dstq+5*gprsize] 320 mov dstq, [dstq] 321 mov srcq, [srcq] 322%ifidn %3, a 323 test dstq, mmsize-1 324 jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 325 test srcq, mmsize-1 326 jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 327 test dst1q, mmsize-1 328 jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 329 test dst2q, mmsize-1 330 jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 331 test dst3q, mmsize-1 332 jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 333 test dst4q, mmsize-1 334 jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 335 test dst5q, mmsize-1 336 jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 337%else 338unpack_6ch_%2_to_%1_u_int %+ SUFFIX: 339%endif 340 sub dst1q, dstq 341 sub dst2q, dstq 342 sub dst3q, dstq 343 sub dst4q, dstq 344 sub dst5q, dstq 345 %8 x,x,x,x,m7,x 346.loop: 347 mov%3 m0, [srcq ] 348 mov%3 m1, [srcq+16] 349 mov%3 m2, [srcq+32] 350 mov%3 m3, [srcq+48] 351 mov%3 m4, [srcq+64] 352 mov%3 m5, [srcq+80] 353 354 SBUTTERFLYPS 0, 3, 6 355 SBUTTERFLYPS 1, 4, 6 356 SBUTTERFLYPS 2, 5, 6 357 SBUTTERFLYPS 0, 4, 6 358 SBUTTERFLYPS 3, 2, 6 359 SBUTTERFLYPS 1, 5, 6 360 SWAP 1, 4 361 SWAP 2, 3 362 363 %7 m0,m1,x,x,m7,m6 364 %7 m2,m3,x,x,m7,m6 365 %7 m4,m5,x,x,m7,m6 366 367 mov %+ %3 %+ ps [dstq ], m0 368 mov %+ %3 %+ ps [dstq+dst1q], m1 369 mov %+ %3 %+ ps [dstq+dst2q], m2 370 mov %+ %3 %+ ps [dstq+dst3q], m3 371 mov %+ %3 %+ ps [dstq+dst4q], m4 372 mov %+ %3 %+ ps [dstq+dst5q], m5 373 374 add srcq, mmsize*6 375 add dstq, mmsize 376 sub lend, mmsize/4 377 jg .loop 378 REP_RET 379%endmacro 380 381%define PACK_8CH_GPRS (10 * ARCH_X86_64) + ((6 + HAVE_ALIGNED_STACK) * ARCH_X86_32) 382 383%macro PACK_8CH 8 384cglobal pack_8ch_%2_to_%1_%3, 2, PACK_8CH_GPRS, %6, ARCH_X86_32*48, dst, src, len, src1, src2, src3, src4, src5, src6, src7 385 mov dstq, [dstq] 386%if ARCH_X86_32 387 DEFINE_ARGS dst, src, src2, src3, src4, src5, src6 388 %define lend dword r2m 389 %define src1q r0q 390 %define src1m dword [rsp+32] 391%if HAVE_ALIGNED_STACK == 0 392 DEFINE_ARGS dst, src, src2, src3, src5, src6 393 %define src4q r0q 394 %define src4m dword [rsp+36] 395%endif 396 %define src7q r0q 397 %define src7m dword [rsp+40] 398 mov dstm, dstq 399%endif 400 mov src7q, [srcq+7*gprsize] 401 mov src6q, [srcq+6*gprsize] 402%if ARCH_X86_32 403 mov src7m, src7q 404%endif 405 mov src5q, [srcq+5*gprsize] 406 mov src4q, [srcq+4*gprsize] 407 mov src3q, [srcq+3*gprsize] 408%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0 409 mov src4m, src4q 410%endif 411 mov src2q, [srcq+2*gprsize] 412 mov src1q, [srcq+1*gprsize] 413 mov srcq, [srcq] 414%ifidn %3, a 415%if ARCH_X86_32 416 test dstmp, mmsize-1 417%else 418 test dstq, mmsize-1 419%endif 420 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 421 test srcq, mmsize-1 422 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 423 test src1q, mmsize-1 424 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 425 test src2q, mmsize-1 426 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 427 test src3q, mmsize-1 428 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 429%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0 430 test src4m, mmsize-1 431%else 432 test src4q, mmsize-1 433%endif 434 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 435 test src5q, mmsize-1 436 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 437 test src6q, mmsize-1 438 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 439%if ARCH_X86_32 440 test src7m, mmsize-1 441%else 442 test src7q, mmsize-1 443%endif 444 jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 445%else 446pack_8ch_%2_to_%1_u_int %+ SUFFIX: 447%endif 448 sub src1q, srcq 449 sub src2q, srcq 450 sub src3q, srcq 451%if ARCH_X86_64 || HAVE_ALIGNED_STACK 452 sub src4q, srcq 453%else 454 sub src4m, srcq 455%endif 456 sub src5q, srcq 457 sub src6q, srcq 458%if ARCH_X86_64 459 sub src7q, srcq 460%else 461 mov src1m, src1q 462 sub src7m, srcq 463%endif 464 465%if ARCH_X86_64 466 %8 x,x,x,x,m9,x 467%elifidn %1, int32 468 %define m9 [flt2p31] 469%else 470 %define m9 [flt2pm31] 471%endif 472 473.loop: 474 mov%3 m0, [srcq ] 475 mov%3 m1, [srcq+src1q] 476 mov%3 m2, [srcq+src2q] 477%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0 478 mov src4q, src4m 479%endif 480 mov%3 m3, [srcq+src3q] 481 mov%3 m4, [srcq+src4q] 482 mov%3 m5, [srcq+src5q] 483%if ARCH_X86_32 484 mov src7q, src7m 485%endif 486 mov%3 m6, [srcq+src6q] 487 mov%3 m7, [srcq+src7q] 488 489%if ARCH_X86_64 490 TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, 8 491 492 %7 m0,m1,x,x,m9,m8 493 %7 m2,m3,x,x,m9,m8 494 %7 m4,m5,x,x,m9,m8 495 %7 m6,m7,x,x,m9,m8 496 497 mov%3 [dstq], m0 498%else 499 mov dstq, dstm 500 501 TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, [rsp], [rsp+16], 1 502 503 %7 m0,m1,x,x,m9,m2 504 mova m2, [rsp] 505 mov%3 [dstq], m0 506 %7 m2,m3,x,x,m9,m0 507 %7 m4,m5,x,x,m9,m0 508 %7 m6,m7,x,x,m9,m0 509 510%endif 511 512 mov%3 [dstq+16], m1 513 mov%3 [dstq+32], m2 514 mov%3 [dstq+48], m3 515 mov%3 [dstq+64], m4 516 mov%3 [dstq+80], m5 517 mov%3 [dstq+96], m6 518 mov%3 [dstq+112], m7 519 520 add srcq, mmsize 521 add dstq, mmsize*8 522%if ARCH_X86_32 523 mov dstm, dstq 524 mov src1q, src1m 525%endif 526 sub lend, mmsize/4 527 jg .loop 528 REP_RET 529%endmacro 530 531%macro INT16_TO_INT32_N 6 532 pxor m2, m2 533 pxor m3, m3 534 punpcklwd m2, m1 535 punpckhwd m3, m1 536 SWAP 4,0 537 pxor m0, m0 538 pxor m1, m1 539 punpcklwd m0, m4 540 punpckhwd m1, m4 541%endmacro 542 543%macro INT32_TO_INT16_N 6 544 psrad m0, 16 545 psrad m1, 16 546 psrad m2, 16 547 psrad m3, 16 548 packssdw m0, m1 549 packssdw m2, m3 550 SWAP 1,2 551%endmacro 552 553%macro INT32_TO_FLOAT_INIT 6 554 mova %5, [flt2pm31] 555%endmacro 556%macro INT32_TO_FLOAT_N 6 557 cvtdq2ps %1, %1 558 cvtdq2ps %2, %2 559 mulps %1, %1, %5 560 mulps %2, %2, %5 561%endmacro 562 563%macro FLOAT_TO_INT32_INIT 6 564 mova %5, [flt2p31] 565%endmacro 566%macro FLOAT_TO_INT32_N 6 567 mulps %1, %5 568 mulps %2, %5 569 cvtps2dq %6, %1 570 cmpps %1, %1, %5, 5 571 paddd %1, %6 572 cvtps2dq %6, %2 573 cmpps %2, %2, %5, 5 574 paddd %2, %6 575%endmacro 576 577%macro INT16_TO_FLOAT_INIT 6 578 mova m5, [flt2pm31] 579%endmacro 580%macro INT16_TO_FLOAT_N 6 581 INT16_TO_INT32_N %1,%2,%3,%4,%5,%6 582 cvtdq2ps m0, m0 583 cvtdq2ps m1, m1 584 cvtdq2ps m2, m2 585 cvtdq2ps m3, m3 586 mulps m0, m0, m5 587 mulps m1, m1, m5 588 mulps m2, m2, m5 589 mulps m3, m3, m5 590%endmacro 591 592%macro FLOAT_TO_INT16_INIT 6 593 mova m5, [flt2p15] 594%endmacro 595%macro FLOAT_TO_INT16_N 6 596 mulps m0, m5 597 mulps m1, m5 598 mulps m2, m5 599 mulps m3, m5 600 cvtps2dq m0, m0 601 cvtps2dq m1, m1 602 packssdw m0, m1 603 cvtps2dq m1, m2 604 cvtps2dq m3, m3 605 packssdw m1, m3 606%endmacro 607 608%macro NOP_N 0-6 609%endmacro 610 611INIT_XMM sse 612PACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N 613PACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N 614 615UNPACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N 616UNPACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N 617 618INIT_XMM sse2 619CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N 620CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N 621CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N 622CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N 623 624PACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N 625PACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N 626PACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N 627PACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N 628PACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N 629PACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N 630PACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N 631PACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N 632 633UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N 634UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N 635UNPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N 636UNPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N 637UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N 638UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N 639UNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N 640UNPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N 641 642CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 643CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 644CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 645CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 646CONV float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 647CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 648CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 649CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 650 651PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 652PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 653PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 654PACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 655PACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 656PACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 657PACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 658PACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 659 660UNPACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 661UNPACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 662UNPACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 663UNPACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 664UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 665UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 666UNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 667UNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 668 669PACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 670PACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 671PACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 672PACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 673 674UNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 675UNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 676UNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 677UNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 678 679PACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N 680PACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N 681 682PACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 683PACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 684PACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 685PACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 686 687INIT_XMM ssse3 688UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N 689UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N 690UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N 691UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N 692UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 693UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 694 695%if HAVE_AVX_EXTERNAL 696INIT_XMM avx 697PACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N 698PACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N 699 700UNPACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N 701UNPACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N 702 703PACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 704PACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 705PACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 706PACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 707 708UNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 709UNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 710UNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 711UNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 712 713PACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N 714PACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N 715 716PACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 717PACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 718PACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 719PACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 720 721INIT_YMM avx 722CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 723CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 724%endif 725 726%if HAVE_AVX2_EXTERNAL 727INIT_YMM avx2 728CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 729CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 730%endif 731