1cabdff1aSopenharmony_ci;****************************************************************************** 2cabdff1aSopenharmony_ci;* Copyright (c) 2012 Michael Niedermayer 3cabdff1aSopenharmony_ci;* 4cabdff1aSopenharmony_ci;* This file is part of FFmpeg. 5cabdff1aSopenharmony_ci;* 6cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci;* 11cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci;* Lesser General Public License for more details. 15cabdff1aSopenharmony_ci;* 16cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci;****************************************************************************** 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm" 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ciSECTION_RODATA 32 24cabdff1aSopenharmony_ciflt2pm31: times 8 dd 4.6566129e-10 25cabdff1aSopenharmony_ciflt2p31 : times 8 dd 2147483648.0 26cabdff1aSopenharmony_ciflt2p15 : times 8 dd 32768.0 27cabdff1aSopenharmony_ci 28cabdff1aSopenharmony_ciword_unpack_shuf : db 0, 1, 4, 5, 8, 9,12,13, 2, 3, 6, 7,10,11,14,15 29cabdff1aSopenharmony_ci 30cabdff1aSopenharmony_ciSECTION .text 31cabdff1aSopenharmony_ci 32cabdff1aSopenharmony_ci 33cabdff1aSopenharmony_ci;to, from, a/u, log2_outsize, log_intsize, const 34cabdff1aSopenharmony_ci%macro PACK_2CH 5-7 35cabdff1aSopenharmony_cicglobal pack_2ch_%2_to_%1_%3, 3, 4, 6, dst, src, len, src2 36cabdff1aSopenharmony_ci mov src2q , [srcq+gprsize] 37cabdff1aSopenharmony_ci mov srcq , [srcq] 38cabdff1aSopenharmony_ci mov dstq , [dstq] 39cabdff1aSopenharmony_ci%ifidn %3, a 40cabdff1aSopenharmony_ci test dstq, mmsize-1 41cabdff1aSopenharmony_ci jne pack_2ch_%2_to_%1_u_int %+ SUFFIX 42cabdff1aSopenharmony_ci test srcq, mmsize-1 43cabdff1aSopenharmony_ci jne pack_2ch_%2_to_%1_u_int %+ SUFFIX 44cabdff1aSopenharmony_ci test src2q, mmsize-1 45cabdff1aSopenharmony_ci jne pack_2ch_%2_to_%1_u_int %+ SUFFIX 46cabdff1aSopenharmony_ci%else 47cabdff1aSopenharmony_cipack_2ch_%2_to_%1_u_int %+ SUFFIX: 48cabdff1aSopenharmony_ci%endif 49cabdff1aSopenharmony_ci lea srcq , [srcq + (1<<%5)*lenq] 50cabdff1aSopenharmony_ci lea src2q, [src2q + (1<<%5)*lenq] 51cabdff1aSopenharmony_ci lea dstq , [dstq + (2<<%4)*lenq] 52cabdff1aSopenharmony_ci neg lenq 53cabdff1aSopenharmony_ci %7 m0,m1,m2,m3,m4,m5 54cabdff1aSopenharmony_ci.next: 55cabdff1aSopenharmony_ci%if %4 >= %5 56cabdff1aSopenharmony_ci mov%3 m0, [ srcq +(1<<%5)*lenq] 57cabdff1aSopenharmony_ci mova m1, m0 58cabdff1aSopenharmony_ci mov%3 m2, [ src2q+(1<<%5)*lenq] 59cabdff1aSopenharmony_ci%if %5 == 1 60cabdff1aSopenharmony_ci punpcklwd m0, m2 61cabdff1aSopenharmony_ci punpckhwd m1, m2 62cabdff1aSopenharmony_ci%else 63cabdff1aSopenharmony_ci punpckldq m0, m2 64cabdff1aSopenharmony_ci punpckhdq m1, m2 65cabdff1aSopenharmony_ci%endif 66cabdff1aSopenharmony_ci %6 m0,m1,m2,m3,m4,m5 67cabdff1aSopenharmony_ci%else 68cabdff1aSopenharmony_ci mov%3 m0, [ srcq +(1<<%5)*lenq] 69cabdff1aSopenharmony_ci mov%3 m1, [mmsize + srcq +(1<<%5)*lenq] 70cabdff1aSopenharmony_ci mov%3 m2, [ src2q+(1<<%5)*lenq] 71cabdff1aSopenharmony_ci mov%3 m3, [mmsize + src2q+(1<<%5)*lenq] 72cabdff1aSopenharmony_ci %6 m0,m1,m2,m3,m4,m5 73cabdff1aSopenharmony_ci mova m2, m0 74cabdff1aSopenharmony_ci punpcklwd m0, m1 75cabdff1aSopenharmony_ci punpckhwd m2, m1 76cabdff1aSopenharmony_ci SWAP 1,2 77cabdff1aSopenharmony_ci%endif 78cabdff1aSopenharmony_ci mov%3 [ dstq+(2<<%4)*lenq], m0 79cabdff1aSopenharmony_ci mov%3 [ mmsize + dstq+(2<<%4)*lenq], m1 80cabdff1aSopenharmony_ci%if %4 > %5 81cabdff1aSopenharmony_ci mov%3 [2*mmsize + dstq+(2<<%4)*lenq], m2 82cabdff1aSopenharmony_ci mov%3 [3*mmsize + dstq+(2<<%4)*lenq], m3 83cabdff1aSopenharmony_ci add lenq, 4*mmsize/(2<<%4) 84cabdff1aSopenharmony_ci%else 85cabdff1aSopenharmony_ci add lenq, 2*mmsize/(2<<%4) 86cabdff1aSopenharmony_ci%endif 87cabdff1aSopenharmony_ci jl .next 88cabdff1aSopenharmony_ci REP_RET 89cabdff1aSopenharmony_ci%endmacro 90cabdff1aSopenharmony_ci 91cabdff1aSopenharmony_ci%macro UNPACK_2CH 5-7 92cabdff1aSopenharmony_cicglobal unpack_2ch_%2_to_%1_%3, 3, 4, 7, dst, src, len, dst2 93cabdff1aSopenharmony_ci mov dst2q , [dstq+gprsize] 94cabdff1aSopenharmony_ci mov srcq , [srcq] 95cabdff1aSopenharmony_ci mov dstq , [dstq] 96cabdff1aSopenharmony_ci%ifidn %3, a 97cabdff1aSopenharmony_ci test dstq, mmsize-1 98cabdff1aSopenharmony_ci jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX 99cabdff1aSopenharmony_ci test srcq, mmsize-1 100cabdff1aSopenharmony_ci jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX 101cabdff1aSopenharmony_ci test dst2q, mmsize-1 102cabdff1aSopenharmony_ci jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX 103cabdff1aSopenharmony_ci%else 104cabdff1aSopenharmony_ciunpack_2ch_%2_to_%1_u_int %+ SUFFIX: 105cabdff1aSopenharmony_ci%endif 106cabdff1aSopenharmony_ci lea srcq , [srcq + (2<<%5)*lenq] 107cabdff1aSopenharmony_ci lea dstq , [dstq + (1<<%4)*lenq] 108cabdff1aSopenharmony_ci lea dst2q, [dst2q + (1<<%4)*lenq] 109cabdff1aSopenharmony_ci neg lenq 110cabdff1aSopenharmony_ci %7 m0,m1,m2,m3,m4,m5 111cabdff1aSopenharmony_ci mova m6, [word_unpack_shuf] 112cabdff1aSopenharmony_ci.next: 113cabdff1aSopenharmony_ci mov%3 m0, [ srcq +(2<<%5)*lenq] 114cabdff1aSopenharmony_ci mov%3 m2, [ mmsize + srcq +(2<<%5)*lenq] 115cabdff1aSopenharmony_ci%if %5 == 1 116cabdff1aSopenharmony_ci%ifidn SUFFIX, _ssse3 117cabdff1aSopenharmony_ci pshufb m0, m6 118cabdff1aSopenharmony_ci mova m1, m0 119cabdff1aSopenharmony_ci pshufb m2, m6 120cabdff1aSopenharmony_ci punpcklqdq m0,m2 121cabdff1aSopenharmony_ci punpckhqdq m1,m2 122cabdff1aSopenharmony_ci%else 123cabdff1aSopenharmony_ci mova m1, m0 124cabdff1aSopenharmony_ci punpcklwd m0,m2 125cabdff1aSopenharmony_ci punpckhwd m1,m2 126cabdff1aSopenharmony_ci 127cabdff1aSopenharmony_ci mova m2, m0 128cabdff1aSopenharmony_ci punpcklwd m0,m1 129cabdff1aSopenharmony_ci punpckhwd m2,m1 130cabdff1aSopenharmony_ci 131cabdff1aSopenharmony_ci mova m1, m0 132cabdff1aSopenharmony_ci punpcklwd m0,m2 133cabdff1aSopenharmony_ci punpckhwd m1,m2 134cabdff1aSopenharmony_ci%endif 135cabdff1aSopenharmony_ci%else 136cabdff1aSopenharmony_ci mova m1, m0 137cabdff1aSopenharmony_ci shufps m0, m2, 10001000b 138cabdff1aSopenharmony_ci shufps m1, m2, 11011101b 139cabdff1aSopenharmony_ci%endif 140cabdff1aSopenharmony_ci%if %4 < %5 141cabdff1aSopenharmony_ci mov%3 m2, [2*mmsize + srcq +(2<<%5)*lenq] 142cabdff1aSopenharmony_ci mova m3, m2 143cabdff1aSopenharmony_ci mov%3 m4, [3*mmsize + srcq +(2<<%5)*lenq] 144cabdff1aSopenharmony_ci shufps m2, m4, 10001000b 145cabdff1aSopenharmony_ci shufps m3, m4, 11011101b 146cabdff1aSopenharmony_ci SWAP 1,2 147cabdff1aSopenharmony_ci%endif 148cabdff1aSopenharmony_ci %6 m0,m1,m2,m3,m4,m5 149cabdff1aSopenharmony_ci mov%3 [ dstq+(1<<%4)*lenq], m0 150cabdff1aSopenharmony_ci%if %4 > %5 151cabdff1aSopenharmony_ci mov%3 [ dst2q+(1<<%4)*lenq], m2 152cabdff1aSopenharmony_ci mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1 153cabdff1aSopenharmony_ci mov%3 [ mmsize + dst2q+(1<<%4)*lenq], m3 154cabdff1aSopenharmony_ci add lenq, 2*mmsize/(1<<%4) 155cabdff1aSopenharmony_ci%else 156cabdff1aSopenharmony_ci mov%3 [ dst2q+(1<<%4)*lenq], m1 157cabdff1aSopenharmony_ci add lenq, mmsize/(1<<%4) 158cabdff1aSopenharmony_ci%endif 159cabdff1aSopenharmony_ci jl .next 160cabdff1aSopenharmony_ci REP_RET 161cabdff1aSopenharmony_ci%endmacro 162cabdff1aSopenharmony_ci 163cabdff1aSopenharmony_ci%macro CONV 5-7 164cabdff1aSopenharmony_cicglobal %2_to_%1_%3, 3, 3, 6, dst, src, len 165cabdff1aSopenharmony_ci mov srcq , [srcq] 166cabdff1aSopenharmony_ci mov dstq , [dstq] 167cabdff1aSopenharmony_ci%ifidn %3, a 168cabdff1aSopenharmony_ci test dstq, mmsize-1 169cabdff1aSopenharmony_ci jne %2_to_%1_u_int %+ SUFFIX 170cabdff1aSopenharmony_ci test srcq, mmsize-1 171cabdff1aSopenharmony_ci jne %2_to_%1_u_int %+ SUFFIX 172cabdff1aSopenharmony_ci%else 173cabdff1aSopenharmony_ci%2_to_%1_u_int %+ SUFFIX: 174cabdff1aSopenharmony_ci%endif 175cabdff1aSopenharmony_ci lea srcq , [srcq + (1<<%5)*lenq] 176cabdff1aSopenharmony_ci lea dstq , [dstq + (1<<%4)*lenq] 177cabdff1aSopenharmony_ci neg lenq 178cabdff1aSopenharmony_ci %7 m0,m1,m2,m3,m4,m5 179cabdff1aSopenharmony_ci.next: 180cabdff1aSopenharmony_ci mov%3 m0, [ srcq +(1<<%5)*lenq] 181cabdff1aSopenharmony_ci mov%3 m1, [ mmsize + srcq +(1<<%5)*lenq] 182cabdff1aSopenharmony_ci%if %4 < %5 183cabdff1aSopenharmony_ci mov%3 m2, [2*mmsize + srcq +(1<<%5)*lenq] 184cabdff1aSopenharmony_ci mov%3 m3, [3*mmsize + srcq +(1<<%5)*lenq] 185cabdff1aSopenharmony_ci%endif 186cabdff1aSopenharmony_ci %6 m0,m1,m2,m3,m4,m5 187cabdff1aSopenharmony_ci mov%3 [ dstq+(1<<%4)*lenq], m0 188cabdff1aSopenharmony_ci mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1 189cabdff1aSopenharmony_ci%if %4 > %5 190cabdff1aSopenharmony_ci mov%3 [2*mmsize + dstq+(1<<%4)*lenq], m2 191cabdff1aSopenharmony_ci mov%3 [3*mmsize + dstq+(1<<%4)*lenq], m3 192cabdff1aSopenharmony_ci add lenq, 4*mmsize/(1<<%4) 193cabdff1aSopenharmony_ci%else 194cabdff1aSopenharmony_ci add lenq, 2*mmsize/(1<<%4) 195cabdff1aSopenharmony_ci%endif 196cabdff1aSopenharmony_ci jl .next 197cabdff1aSopenharmony_ci%if mmsize == 8 198cabdff1aSopenharmony_ci emms 199cabdff1aSopenharmony_ci RET 200cabdff1aSopenharmony_ci%else 201cabdff1aSopenharmony_ci REP_RET 202cabdff1aSopenharmony_ci%endif 203cabdff1aSopenharmony_ci%endmacro 204cabdff1aSopenharmony_ci 205cabdff1aSopenharmony_ci%macro PACK_6CH 8 206cabdff1aSopenharmony_cicglobal pack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, src1, src2, src3, src4, src5, len 207cabdff1aSopenharmony_ci%if ARCH_X86_64 208cabdff1aSopenharmony_ci mov lend, r2d 209cabdff1aSopenharmony_ci%else 210cabdff1aSopenharmony_ci %define lend dword r2m 211cabdff1aSopenharmony_ci%endif 212cabdff1aSopenharmony_ci mov src1q, [srcq+1*gprsize] 213cabdff1aSopenharmony_ci mov src2q, [srcq+2*gprsize] 214cabdff1aSopenharmony_ci mov src3q, [srcq+3*gprsize] 215cabdff1aSopenharmony_ci mov src4q, [srcq+4*gprsize] 216cabdff1aSopenharmony_ci mov src5q, [srcq+5*gprsize] 217cabdff1aSopenharmony_ci mov srcq, [srcq] 218cabdff1aSopenharmony_ci mov dstq, [dstq] 219cabdff1aSopenharmony_ci%ifidn %3, a 220cabdff1aSopenharmony_ci test dstq, mmsize-1 221cabdff1aSopenharmony_ci jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 222cabdff1aSopenharmony_ci test srcq, mmsize-1 223cabdff1aSopenharmony_ci jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 224cabdff1aSopenharmony_ci test src1q, mmsize-1 225cabdff1aSopenharmony_ci jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 226cabdff1aSopenharmony_ci test src2q, mmsize-1 227cabdff1aSopenharmony_ci jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 228cabdff1aSopenharmony_ci test src3q, mmsize-1 229cabdff1aSopenharmony_ci jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 230cabdff1aSopenharmony_ci test src4q, mmsize-1 231cabdff1aSopenharmony_ci jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 232cabdff1aSopenharmony_ci test src5q, mmsize-1 233cabdff1aSopenharmony_ci jne pack_6ch_%2_to_%1_u_int %+ SUFFIX 234cabdff1aSopenharmony_ci%else 235cabdff1aSopenharmony_cipack_6ch_%2_to_%1_u_int %+ SUFFIX: 236cabdff1aSopenharmony_ci%endif 237cabdff1aSopenharmony_ci sub src1q, srcq 238cabdff1aSopenharmony_ci sub src2q, srcq 239cabdff1aSopenharmony_ci sub src3q, srcq 240cabdff1aSopenharmony_ci sub src4q, srcq 241cabdff1aSopenharmony_ci sub src5q, srcq 242cabdff1aSopenharmony_ci %8 x,x,x,x,m7,x 243cabdff1aSopenharmony_ci.loop: 244cabdff1aSopenharmony_ci mov%3 m0, [srcq ] 245cabdff1aSopenharmony_ci mov%3 m1, [srcq+src1q] 246cabdff1aSopenharmony_ci mov%3 m2, [srcq+src2q] 247cabdff1aSopenharmony_ci mov%3 m3, [srcq+src3q] 248cabdff1aSopenharmony_ci mov%3 m4, [srcq+src4q] 249cabdff1aSopenharmony_ci mov%3 m5, [srcq+src5q] 250cabdff1aSopenharmony_ci%if cpuflag(sse) 251cabdff1aSopenharmony_ci SBUTTERFLYPS 0, 1, 6 252cabdff1aSopenharmony_ci SBUTTERFLYPS 2, 3, 6 253cabdff1aSopenharmony_ci SBUTTERFLYPS 4, 5, 6 254cabdff1aSopenharmony_ci 255cabdff1aSopenharmony_ci%if cpuflag(avx) 256cabdff1aSopenharmony_ci blendps m6, m4, m0, 1100b 257cabdff1aSopenharmony_ci%else 258cabdff1aSopenharmony_ci movaps m6, m4 259cabdff1aSopenharmony_ci shufps m4, m0, q3210 260cabdff1aSopenharmony_ci SWAP 4,6 261cabdff1aSopenharmony_ci%endif 262cabdff1aSopenharmony_ci movlhps m0, m2 263cabdff1aSopenharmony_ci movhlps m4, m2 264cabdff1aSopenharmony_ci%if cpuflag(avx) 265cabdff1aSopenharmony_ci blendps m2, m5, m1, 1100b 266cabdff1aSopenharmony_ci%else 267cabdff1aSopenharmony_ci movaps m2, m5 268cabdff1aSopenharmony_ci shufps m5, m1, q3210 269cabdff1aSopenharmony_ci SWAP 2,5 270cabdff1aSopenharmony_ci%endif 271cabdff1aSopenharmony_ci movlhps m1, m3 272cabdff1aSopenharmony_ci movhlps m5, m3 273cabdff1aSopenharmony_ci 274cabdff1aSopenharmony_ci %7 m0,m6,x,x,m7,m3 275cabdff1aSopenharmony_ci %7 m4,m1,x,x,m7,m3 276cabdff1aSopenharmony_ci %7 m2,m5,x,x,m7,m3 277cabdff1aSopenharmony_ci 278cabdff1aSopenharmony_ci mov %+ %3 %+ ps [dstq ], m0 279cabdff1aSopenharmony_ci mov %+ %3 %+ ps [dstq+16], m6 280cabdff1aSopenharmony_ci mov %+ %3 %+ ps [dstq+32], m4 281cabdff1aSopenharmony_ci mov %+ %3 %+ ps [dstq+48], m1 282cabdff1aSopenharmony_ci mov %+ %3 %+ ps [dstq+64], m2 283cabdff1aSopenharmony_ci mov %+ %3 %+ ps [dstq+80], m5 284cabdff1aSopenharmony_ci%else ; mmx 285cabdff1aSopenharmony_ci SBUTTERFLY dq, 0, 1, 6 286cabdff1aSopenharmony_ci SBUTTERFLY dq, 2, 3, 6 287cabdff1aSopenharmony_ci SBUTTERFLY dq, 4, 5, 6 288cabdff1aSopenharmony_ci 289cabdff1aSopenharmony_ci movq [dstq ], m0 290cabdff1aSopenharmony_ci movq [dstq+ 8], m2 291cabdff1aSopenharmony_ci movq [dstq+16], m4 292cabdff1aSopenharmony_ci movq [dstq+24], m1 293cabdff1aSopenharmony_ci movq [dstq+32], m3 294cabdff1aSopenharmony_ci movq [dstq+40], m5 295cabdff1aSopenharmony_ci%endif 296cabdff1aSopenharmony_ci add srcq, mmsize 297cabdff1aSopenharmony_ci add dstq, mmsize*6 298cabdff1aSopenharmony_ci sub lend, mmsize/4 299cabdff1aSopenharmony_ci jg .loop 300cabdff1aSopenharmony_ci%if mmsize == 8 301cabdff1aSopenharmony_ci emms 302cabdff1aSopenharmony_ci RET 303cabdff1aSopenharmony_ci%else 304cabdff1aSopenharmony_ci REP_RET 305cabdff1aSopenharmony_ci%endif 306cabdff1aSopenharmony_ci%endmacro 307cabdff1aSopenharmony_ci 308cabdff1aSopenharmony_ci%macro UNPACK_6CH 8 309cabdff1aSopenharmony_cicglobal unpack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, dst1, dst2, dst3, dst4, dst5, len 310cabdff1aSopenharmony_ci%if ARCH_X86_64 311cabdff1aSopenharmony_ci mov lend, r2d 312cabdff1aSopenharmony_ci%else 313cabdff1aSopenharmony_ci %define lend dword r2m 314cabdff1aSopenharmony_ci%endif 315cabdff1aSopenharmony_ci mov dst1q, [dstq+1*gprsize] 316cabdff1aSopenharmony_ci mov dst2q, [dstq+2*gprsize] 317cabdff1aSopenharmony_ci mov dst3q, [dstq+3*gprsize] 318cabdff1aSopenharmony_ci mov dst4q, [dstq+4*gprsize] 319cabdff1aSopenharmony_ci mov dst5q, [dstq+5*gprsize] 320cabdff1aSopenharmony_ci mov dstq, [dstq] 321cabdff1aSopenharmony_ci mov srcq, [srcq] 322cabdff1aSopenharmony_ci%ifidn %3, a 323cabdff1aSopenharmony_ci test dstq, mmsize-1 324cabdff1aSopenharmony_ci jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 325cabdff1aSopenharmony_ci test srcq, mmsize-1 326cabdff1aSopenharmony_ci jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 327cabdff1aSopenharmony_ci test dst1q, mmsize-1 328cabdff1aSopenharmony_ci jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 329cabdff1aSopenharmony_ci test dst2q, mmsize-1 330cabdff1aSopenharmony_ci jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 331cabdff1aSopenharmony_ci test dst3q, mmsize-1 332cabdff1aSopenharmony_ci jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 333cabdff1aSopenharmony_ci test dst4q, mmsize-1 334cabdff1aSopenharmony_ci jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 335cabdff1aSopenharmony_ci test dst5q, mmsize-1 336cabdff1aSopenharmony_ci jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX 337cabdff1aSopenharmony_ci%else 338cabdff1aSopenharmony_ciunpack_6ch_%2_to_%1_u_int %+ SUFFIX: 339cabdff1aSopenharmony_ci%endif 340cabdff1aSopenharmony_ci sub dst1q, dstq 341cabdff1aSopenharmony_ci sub dst2q, dstq 342cabdff1aSopenharmony_ci sub dst3q, dstq 343cabdff1aSopenharmony_ci sub dst4q, dstq 344cabdff1aSopenharmony_ci sub dst5q, dstq 345cabdff1aSopenharmony_ci %8 x,x,x,x,m7,x 346cabdff1aSopenharmony_ci.loop: 347cabdff1aSopenharmony_ci mov%3 m0, [srcq ] 348cabdff1aSopenharmony_ci mov%3 m1, [srcq+16] 349cabdff1aSopenharmony_ci mov%3 m2, [srcq+32] 350cabdff1aSopenharmony_ci mov%3 m3, [srcq+48] 351cabdff1aSopenharmony_ci mov%3 m4, [srcq+64] 352cabdff1aSopenharmony_ci mov%3 m5, [srcq+80] 353cabdff1aSopenharmony_ci 354cabdff1aSopenharmony_ci SBUTTERFLYPS 0, 3, 6 355cabdff1aSopenharmony_ci SBUTTERFLYPS 1, 4, 6 356cabdff1aSopenharmony_ci SBUTTERFLYPS 2, 5, 6 357cabdff1aSopenharmony_ci SBUTTERFLYPS 0, 4, 6 358cabdff1aSopenharmony_ci SBUTTERFLYPS 3, 2, 6 359cabdff1aSopenharmony_ci SBUTTERFLYPS 1, 5, 6 360cabdff1aSopenharmony_ci SWAP 1, 4 361cabdff1aSopenharmony_ci SWAP 2, 3 362cabdff1aSopenharmony_ci 363cabdff1aSopenharmony_ci %7 m0,m1,x,x,m7,m6 364cabdff1aSopenharmony_ci %7 m2,m3,x,x,m7,m6 365cabdff1aSopenharmony_ci %7 m4,m5,x,x,m7,m6 366cabdff1aSopenharmony_ci 367cabdff1aSopenharmony_ci mov %+ %3 %+ ps [dstq ], m0 368cabdff1aSopenharmony_ci mov %+ %3 %+ ps [dstq+dst1q], m1 369cabdff1aSopenharmony_ci mov %+ %3 %+ ps [dstq+dst2q], m2 370cabdff1aSopenharmony_ci mov %+ %3 %+ ps [dstq+dst3q], m3 371cabdff1aSopenharmony_ci mov %+ %3 %+ ps [dstq+dst4q], m4 372cabdff1aSopenharmony_ci mov %+ %3 %+ ps [dstq+dst5q], m5 373cabdff1aSopenharmony_ci 374cabdff1aSopenharmony_ci add srcq, mmsize*6 375cabdff1aSopenharmony_ci add dstq, mmsize 376cabdff1aSopenharmony_ci sub lend, mmsize/4 377cabdff1aSopenharmony_ci jg .loop 378cabdff1aSopenharmony_ci REP_RET 379cabdff1aSopenharmony_ci%endmacro 380cabdff1aSopenharmony_ci 381cabdff1aSopenharmony_ci%define PACK_8CH_GPRS (10 * ARCH_X86_64) + ((6 + HAVE_ALIGNED_STACK) * ARCH_X86_32) 382cabdff1aSopenharmony_ci 383cabdff1aSopenharmony_ci%macro PACK_8CH 8 384cabdff1aSopenharmony_cicglobal pack_8ch_%2_to_%1_%3, 2, PACK_8CH_GPRS, %6, ARCH_X86_32*48, dst, src, len, src1, src2, src3, src4, src5, src6, src7 385cabdff1aSopenharmony_ci mov dstq, [dstq] 386cabdff1aSopenharmony_ci%if ARCH_X86_32 387cabdff1aSopenharmony_ci DEFINE_ARGS dst, src, src2, src3, src4, src5, src6 388cabdff1aSopenharmony_ci %define lend dword r2m 389cabdff1aSopenharmony_ci %define src1q r0q 390cabdff1aSopenharmony_ci %define src1m dword [rsp+32] 391cabdff1aSopenharmony_ci%if HAVE_ALIGNED_STACK == 0 392cabdff1aSopenharmony_ci DEFINE_ARGS dst, src, src2, src3, src5, src6 393cabdff1aSopenharmony_ci %define src4q r0q 394cabdff1aSopenharmony_ci %define src4m dword [rsp+36] 395cabdff1aSopenharmony_ci%endif 396cabdff1aSopenharmony_ci %define src7q r0q 397cabdff1aSopenharmony_ci %define src7m dword [rsp+40] 398cabdff1aSopenharmony_ci mov dstm, dstq 399cabdff1aSopenharmony_ci%endif 400cabdff1aSopenharmony_ci mov src7q, [srcq+7*gprsize] 401cabdff1aSopenharmony_ci mov src6q, [srcq+6*gprsize] 402cabdff1aSopenharmony_ci%if ARCH_X86_32 403cabdff1aSopenharmony_ci mov src7m, src7q 404cabdff1aSopenharmony_ci%endif 405cabdff1aSopenharmony_ci mov src5q, [srcq+5*gprsize] 406cabdff1aSopenharmony_ci mov src4q, [srcq+4*gprsize] 407cabdff1aSopenharmony_ci mov src3q, [srcq+3*gprsize] 408cabdff1aSopenharmony_ci%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0 409cabdff1aSopenharmony_ci mov src4m, src4q 410cabdff1aSopenharmony_ci%endif 411cabdff1aSopenharmony_ci mov src2q, [srcq+2*gprsize] 412cabdff1aSopenharmony_ci mov src1q, [srcq+1*gprsize] 413cabdff1aSopenharmony_ci mov srcq, [srcq] 414cabdff1aSopenharmony_ci%ifidn %3, a 415cabdff1aSopenharmony_ci%if ARCH_X86_32 416cabdff1aSopenharmony_ci test dstmp, mmsize-1 417cabdff1aSopenharmony_ci%else 418cabdff1aSopenharmony_ci test dstq, mmsize-1 419cabdff1aSopenharmony_ci%endif 420cabdff1aSopenharmony_ci jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 421cabdff1aSopenharmony_ci test srcq, mmsize-1 422cabdff1aSopenharmony_ci jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 423cabdff1aSopenharmony_ci test src1q, mmsize-1 424cabdff1aSopenharmony_ci jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 425cabdff1aSopenharmony_ci test src2q, mmsize-1 426cabdff1aSopenharmony_ci jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 427cabdff1aSopenharmony_ci test src3q, mmsize-1 428cabdff1aSopenharmony_ci jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 429cabdff1aSopenharmony_ci%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0 430cabdff1aSopenharmony_ci test src4m, mmsize-1 431cabdff1aSopenharmony_ci%else 432cabdff1aSopenharmony_ci test src4q, mmsize-1 433cabdff1aSopenharmony_ci%endif 434cabdff1aSopenharmony_ci jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 435cabdff1aSopenharmony_ci test src5q, mmsize-1 436cabdff1aSopenharmony_ci jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 437cabdff1aSopenharmony_ci test src6q, mmsize-1 438cabdff1aSopenharmony_ci jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 439cabdff1aSopenharmony_ci%if ARCH_X86_32 440cabdff1aSopenharmony_ci test src7m, mmsize-1 441cabdff1aSopenharmony_ci%else 442cabdff1aSopenharmony_ci test src7q, mmsize-1 443cabdff1aSopenharmony_ci%endif 444cabdff1aSopenharmony_ci jne pack_8ch_%2_to_%1_u_int %+ SUFFIX 445cabdff1aSopenharmony_ci%else 446cabdff1aSopenharmony_cipack_8ch_%2_to_%1_u_int %+ SUFFIX: 447cabdff1aSopenharmony_ci%endif 448cabdff1aSopenharmony_ci sub src1q, srcq 449cabdff1aSopenharmony_ci sub src2q, srcq 450cabdff1aSopenharmony_ci sub src3q, srcq 451cabdff1aSopenharmony_ci%if ARCH_X86_64 || HAVE_ALIGNED_STACK 452cabdff1aSopenharmony_ci sub src4q, srcq 453cabdff1aSopenharmony_ci%else 454cabdff1aSopenharmony_ci sub src4m, srcq 455cabdff1aSopenharmony_ci%endif 456cabdff1aSopenharmony_ci sub src5q, srcq 457cabdff1aSopenharmony_ci sub src6q, srcq 458cabdff1aSopenharmony_ci%if ARCH_X86_64 459cabdff1aSopenharmony_ci sub src7q, srcq 460cabdff1aSopenharmony_ci%else 461cabdff1aSopenharmony_ci mov src1m, src1q 462cabdff1aSopenharmony_ci sub src7m, srcq 463cabdff1aSopenharmony_ci%endif 464cabdff1aSopenharmony_ci 465cabdff1aSopenharmony_ci%if ARCH_X86_64 466cabdff1aSopenharmony_ci %8 x,x,x,x,m9,x 467cabdff1aSopenharmony_ci%elifidn %1, int32 468cabdff1aSopenharmony_ci %define m9 [flt2p31] 469cabdff1aSopenharmony_ci%else 470cabdff1aSopenharmony_ci %define m9 [flt2pm31] 471cabdff1aSopenharmony_ci%endif 472cabdff1aSopenharmony_ci 473cabdff1aSopenharmony_ci.loop: 474cabdff1aSopenharmony_ci mov%3 m0, [srcq ] 475cabdff1aSopenharmony_ci mov%3 m1, [srcq+src1q] 476cabdff1aSopenharmony_ci mov%3 m2, [srcq+src2q] 477cabdff1aSopenharmony_ci%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0 478cabdff1aSopenharmony_ci mov src4q, src4m 479cabdff1aSopenharmony_ci%endif 480cabdff1aSopenharmony_ci mov%3 m3, [srcq+src3q] 481cabdff1aSopenharmony_ci mov%3 m4, [srcq+src4q] 482cabdff1aSopenharmony_ci mov%3 m5, [srcq+src5q] 483cabdff1aSopenharmony_ci%if ARCH_X86_32 484cabdff1aSopenharmony_ci mov src7q, src7m 485cabdff1aSopenharmony_ci%endif 486cabdff1aSopenharmony_ci mov%3 m6, [srcq+src6q] 487cabdff1aSopenharmony_ci mov%3 m7, [srcq+src7q] 488cabdff1aSopenharmony_ci 489cabdff1aSopenharmony_ci%if ARCH_X86_64 490cabdff1aSopenharmony_ci TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, 8 491cabdff1aSopenharmony_ci 492cabdff1aSopenharmony_ci %7 m0,m1,x,x,m9,m8 493cabdff1aSopenharmony_ci %7 m2,m3,x,x,m9,m8 494cabdff1aSopenharmony_ci %7 m4,m5,x,x,m9,m8 495cabdff1aSopenharmony_ci %7 m6,m7,x,x,m9,m8 496cabdff1aSopenharmony_ci 497cabdff1aSopenharmony_ci mov%3 [dstq], m0 498cabdff1aSopenharmony_ci%else 499cabdff1aSopenharmony_ci mov dstq, dstm 500cabdff1aSopenharmony_ci 501cabdff1aSopenharmony_ci TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, [rsp], [rsp+16], 1 502cabdff1aSopenharmony_ci 503cabdff1aSopenharmony_ci %7 m0,m1,x,x,m9,m2 504cabdff1aSopenharmony_ci mova m2, [rsp] 505cabdff1aSopenharmony_ci mov%3 [dstq], m0 506cabdff1aSopenharmony_ci %7 m2,m3,x,x,m9,m0 507cabdff1aSopenharmony_ci %7 m4,m5,x,x,m9,m0 508cabdff1aSopenharmony_ci %7 m6,m7,x,x,m9,m0 509cabdff1aSopenharmony_ci 510cabdff1aSopenharmony_ci%endif 511cabdff1aSopenharmony_ci 512cabdff1aSopenharmony_ci mov%3 [dstq+16], m1 513cabdff1aSopenharmony_ci mov%3 [dstq+32], m2 514cabdff1aSopenharmony_ci mov%3 [dstq+48], m3 515cabdff1aSopenharmony_ci mov%3 [dstq+64], m4 516cabdff1aSopenharmony_ci mov%3 [dstq+80], m5 517cabdff1aSopenharmony_ci mov%3 [dstq+96], m6 518cabdff1aSopenharmony_ci mov%3 [dstq+112], m7 519cabdff1aSopenharmony_ci 520cabdff1aSopenharmony_ci add srcq, mmsize 521cabdff1aSopenharmony_ci add dstq, mmsize*8 522cabdff1aSopenharmony_ci%if ARCH_X86_32 523cabdff1aSopenharmony_ci mov dstm, dstq 524cabdff1aSopenharmony_ci mov src1q, src1m 525cabdff1aSopenharmony_ci%endif 526cabdff1aSopenharmony_ci sub lend, mmsize/4 527cabdff1aSopenharmony_ci jg .loop 528cabdff1aSopenharmony_ci REP_RET 529cabdff1aSopenharmony_ci%endmacro 530cabdff1aSopenharmony_ci 531cabdff1aSopenharmony_ci%macro INT16_TO_INT32_N 6 532cabdff1aSopenharmony_ci pxor m2, m2 533cabdff1aSopenharmony_ci pxor m3, m3 534cabdff1aSopenharmony_ci punpcklwd m2, m1 535cabdff1aSopenharmony_ci punpckhwd m3, m1 536cabdff1aSopenharmony_ci SWAP 4,0 537cabdff1aSopenharmony_ci pxor m0, m0 538cabdff1aSopenharmony_ci pxor m1, m1 539cabdff1aSopenharmony_ci punpcklwd m0, m4 540cabdff1aSopenharmony_ci punpckhwd m1, m4 541cabdff1aSopenharmony_ci%endmacro 542cabdff1aSopenharmony_ci 543cabdff1aSopenharmony_ci%macro INT32_TO_INT16_N 6 544cabdff1aSopenharmony_ci psrad m0, 16 545cabdff1aSopenharmony_ci psrad m1, 16 546cabdff1aSopenharmony_ci psrad m2, 16 547cabdff1aSopenharmony_ci psrad m3, 16 548cabdff1aSopenharmony_ci packssdw m0, m1 549cabdff1aSopenharmony_ci packssdw m2, m3 550cabdff1aSopenharmony_ci SWAP 1,2 551cabdff1aSopenharmony_ci%endmacro 552cabdff1aSopenharmony_ci 553cabdff1aSopenharmony_ci%macro INT32_TO_FLOAT_INIT 6 554cabdff1aSopenharmony_ci mova %5, [flt2pm31] 555cabdff1aSopenharmony_ci%endmacro 556cabdff1aSopenharmony_ci%macro INT32_TO_FLOAT_N 6 557cabdff1aSopenharmony_ci cvtdq2ps %1, %1 558cabdff1aSopenharmony_ci cvtdq2ps %2, %2 559cabdff1aSopenharmony_ci mulps %1, %1, %5 560cabdff1aSopenharmony_ci mulps %2, %2, %5 561cabdff1aSopenharmony_ci%endmacro 562cabdff1aSopenharmony_ci 563cabdff1aSopenharmony_ci%macro FLOAT_TO_INT32_INIT 6 564cabdff1aSopenharmony_ci mova %5, [flt2p31] 565cabdff1aSopenharmony_ci%endmacro 566cabdff1aSopenharmony_ci%macro FLOAT_TO_INT32_N 6 567cabdff1aSopenharmony_ci mulps %1, %5 568cabdff1aSopenharmony_ci mulps %2, %5 569cabdff1aSopenharmony_ci cvtps2dq %6, %1 570cabdff1aSopenharmony_ci cmpps %1, %1, %5, 5 571cabdff1aSopenharmony_ci paddd %1, %6 572cabdff1aSopenharmony_ci cvtps2dq %6, %2 573cabdff1aSopenharmony_ci cmpps %2, %2, %5, 5 574cabdff1aSopenharmony_ci paddd %2, %6 575cabdff1aSopenharmony_ci%endmacro 576cabdff1aSopenharmony_ci 577cabdff1aSopenharmony_ci%macro INT16_TO_FLOAT_INIT 6 578cabdff1aSopenharmony_ci mova m5, [flt2pm31] 579cabdff1aSopenharmony_ci%endmacro 580cabdff1aSopenharmony_ci%macro INT16_TO_FLOAT_N 6 581cabdff1aSopenharmony_ci INT16_TO_INT32_N %1,%2,%3,%4,%5,%6 582cabdff1aSopenharmony_ci cvtdq2ps m0, m0 583cabdff1aSopenharmony_ci cvtdq2ps m1, m1 584cabdff1aSopenharmony_ci cvtdq2ps m2, m2 585cabdff1aSopenharmony_ci cvtdq2ps m3, m3 586cabdff1aSopenharmony_ci mulps m0, m0, m5 587cabdff1aSopenharmony_ci mulps m1, m1, m5 588cabdff1aSopenharmony_ci mulps m2, m2, m5 589cabdff1aSopenharmony_ci mulps m3, m3, m5 590cabdff1aSopenharmony_ci%endmacro 591cabdff1aSopenharmony_ci 592cabdff1aSopenharmony_ci%macro FLOAT_TO_INT16_INIT 6 593cabdff1aSopenharmony_ci mova m5, [flt2p15] 594cabdff1aSopenharmony_ci%endmacro 595cabdff1aSopenharmony_ci%macro FLOAT_TO_INT16_N 6 596cabdff1aSopenharmony_ci mulps m0, m5 597cabdff1aSopenharmony_ci mulps m1, m5 598cabdff1aSopenharmony_ci mulps m2, m5 599cabdff1aSopenharmony_ci mulps m3, m5 600cabdff1aSopenharmony_ci cvtps2dq m0, m0 601cabdff1aSopenharmony_ci cvtps2dq m1, m1 602cabdff1aSopenharmony_ci packssdw m0, m1 603cabdff1aSopenharmony_ci cvtps2dq m1, m2 604cabdff1aSopenharmony_ci cvtps2dq m3, m3 605cabdff1aSopenharmony_ci packssdw m1, m3 606cabdff1aSopenharmony_ci%endmacro 607cabdff1aSopenharmony_ci 608cabdff1aSopenharmony_ci%macro NOP_N 0-6 609cabdff1aSopenharmony_ci%endmacro 610cabdff1aSopenharmony_ci 611cabdff1aSopenharmony_ciINIT_XMM sse 612cabdff1aSopenharmony_ciPACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N 613cabdff1aSopenharmony_ciPACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N 614cabdff1aSopenharmony_ci 615cabdff1aSopenharmony_ciUNPACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N 616cabdff1aSopenharmony_ciUNPACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N 617cabdff1aSopenharmony_ci 618cabdff1aSopenharmony_ciINIT_XMM sse2 619cabdff1aSopenharmony_ciCONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N 620cabdff1aSopenharmony_ciCONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N 621cabdff1aSopenharmony_ciCONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N 622cabdff1aSopenharmony_ciCONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N 623cabdff1aSopenharmony_ci 624cabdff1aSopenharmony_ciPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N 625cabdff1aSopenharmony_ciPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N 626cabdff1aSopenharmony_ciPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N 627cabdff1aSopenharmony_ciPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N 628cabdff1aSopenharmony_ciPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N 629cabdff1aSopenharmony_ciPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N 630cabdff1aSopenharmony_ciPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N 631cabdff1aSopenharmony_ciPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N 632cabdff1aSopenharmony_ci 633cabdff1aSopenharmony_ciUNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N 634cabdff1aSopenharmony_ciUNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N 635cabdff1aSopenharmony_ciUNPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N 636cabdff1aSopenharmony_ciUNPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N 637cabdff1aSopenharmony_ciUNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N 638cabdff1aSopenharmony_ciUNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N 639cabdff1aSopenharmony_ciUNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N 640cabdff1aSopenharmony_ciUNPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N 641cabdff1aSopenharmony_ci 642cabdff1aSopenharmony_ciCONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 643cabdff1aSopenharmony_ciCONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 644cabdff1aSopenharmony_ciCONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 645cabdff1aSopenharmony_ciCONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 646cabdff1aSopenharmony_ciCONV float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 647cabdff1aSopenharmony_ciCONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 648cabdff1aSopenharmony_ciCONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 649cabdff1aSopenharmony_ciCONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 650cabdff1aSopenharmony_ci 651cabdff1aSopenharmony_ciPACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 652cabdff1aSopenharmony_ciPACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 653cabdff1aSopenharmony_ciPACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 654cabdff1aSopenharmony_ciPACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 655cabdff1aSopenharmony_ciPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 656cabdff1aSopenharmony_ciPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 657cabdff1aSopenharmony_ciPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 658cabdff1aSopenharmony_ciPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 659cabdff1aSopenharmony_ci 660cabdff1aSopenharmony_ciUNPACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 661cabdff1aSopenharmony_ciUNPACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 662cabdff1aSopenharmony_ciUNPACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 663cabdff1aSopenharmony_ciUNPACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 664cabdff1aSopenharmony_ciUNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 665cabdff1aSopenharmony_ciUNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 666cabdff1aSopenharmony_ciUNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 667cabdff1aSopenharmony_ciUNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT 668cabdff1aSopenharmony_ci 669cabdff1aSopenharmony_ciPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 670cabdff1aSopenharmony_ciPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 671cabdff1aSopenharmony_ciPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 672cabdff1aSopenharmony_ciPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 673cabdff1aSopenharmony_ci 674cabdff1aSopenharmony_ciUNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 675cabdff1aSopenharmony_ciUNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 676cabdff1aSopenharmony_ciUNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 677cabdff1aSopenharmony_ciUNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 678cabdff1aSopenharmony_ci 679cabdff1aSopenharmony_ciPACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N 680cabdff1aSopenharmony_ciPACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N 681cabdff1aSopenharmony_ci 682cabdff1aSopenharmony_ciPACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 683cabdff1aSopenharmony_ciPACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 684cabdff1aSopenharmony_ciPACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 685cabdff1aSopenharmony_ciPACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 686cabdff1aSopenharmony_ci 687cabdff1aSopenharmony_ciINIT_XMM ssse3 688cabdff1aSopenharmony_ciUNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N 689cabdff1aSopenharmony_ciUNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N 690cabdff1aSopenharmony_ciUNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N 691cabdff1aSopenharmony_ciUNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N 692cabdff1aSopenharmony_ciUNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 693cabdff1aSopenharmony_ciUNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT 694cabdff1aSopenharmony_ci 695cabdff1aSopenharmony_ci%if HAVE_AVX_EXTERNAL 696cabdff1aSopenharmony_ciINIT_XMM avx 697cabdff1aSopenharmony_ciPACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N 698cabdff1aSopenharmony_ciPACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N 699cabdff1aSopenharmony_ci 700cabdff1aSopenharmony_ciUNPACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N 701cabdff1aSopenharmony_ciUNPACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N 702cabdff1aSopenharmony_ci 703cabdff1aSopenharmony_ciPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 704cabdff1aSopenharmony_ciPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 705cabdff1aSopenharmony_ciPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 706cabdff1aSopenharmony_ciPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 707cabdff1aSopenharmony_ci 708cabdff1aSopenharmony_ciUNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 709cabdff1aSopenharmony_ciUNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 710cabdff1aSopenharmony_ciUNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 711cabdff1aSopenharmony_ciUNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 712cabdff1aSopenharmony_ci 713cabdff1aSopenharmony_ciPACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N 714cabdff1aSopenharmony_ciPACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N 715cabdff1aSopenharmony_ci 716cabdff1aSopenharmony_ciPACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 717cabdff1aSopenharmony_ciPACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 718cabdff1aSopenharmony_ciPACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 719cabdff1aSopenharmony_ciPACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 720cabdff1aSopenharmony_ci 721cabdff1aSopenharmony_ciINIT_YMM avx 722cabdff1aSopenharmony_ciCONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 723cabdff1aSopenharmony_ciCONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT 724cabdff1aSopenharmony_ci%endif 725cabdff1aSopenharmony_ci 726cabdff1aSopenharmony_ci%if HAVE_AVX2_EXTERNAL 727cabdff1aSopenharmony_ciINIT_YMM avx2 728cabdff1aSopenharmony_ciCONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 729cabdff1aSopenharmony_ciCONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT 730cabdff1aSopenharmony_ci%endif 731