Lines Matching refs:T0
420 my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) =
887 vmovdqu 16*2($inp),$T0
891 vpsrldq \$6,$T0,$T2 # splat input
893 vpunpckhqdq $T1,$T0,$T4 # 4
894 vpunpcklqdq $T1,$T0,$T0 # 0:1
898 vpsrlq \$26,$T0,$T1
899 vpand $MASK,$T0,$T0 # 0
980 vpmuludq $T0,$D4,$D0 # d0 = h0*r0
1001 vpmuludq $T0,$H2,$H2 # h0*r1
1008 vpmuludq $T0,$H3,$H3 # h0*r2
1020 vpmuludq $T0,$H2,$H2 # h0*r3
1040 vpmuludq 0x70(%rsp),$T0,$T4 # h0*r4
1041 vpmuludq $T1,$H4,$T0 # h1*s4
1045 vpaddq $T0,$D0,$D0 # d0 += h1*s4
1082 vpmuludq $H0,$T4,$T0 # h0*r0
1084 vpaddq $T0,$D0,$D0
1087 vpmuludq $H2,$T4,$T0 # h2*r0
1089 vpaddq $T0,$D2,$D2
1092 vpmuludq -0x70(%r11),$H4,$T0 # h4*s1
1095 vpaddq $T0,$D0,$D0 # d0 += h4*s1
1097 vpmuludq $H3,$T2,$T0 # h3*r1
1100 vpaddq $T0,$D4,$D4 # d4 += h3*r1
1107 vpmuludq $H2,$T3,$T0 # h2*r2
1109 vpaddq $T0,$D4,$D4 # d4 += h2*r2
1113 vpmuludq $H4,$T4,$T0 # h4*s2
1115 vpaddq $T0,$D1,$D1 # d1 += h4*s2
1124 vpmuludq $H4,$T3,$T0 # h4*s3
1126 vpaddq $T0,$D2,$D2 # d2 += h4*s3
1127 vmovdqu 16*2($inp),$T0 # load input
1136 vpsrldq \$6,$T0,$T2 # splat input
1144 vpunpckhqdq $T1,$T0,$T4 # 4
1148 vpunpcklqdq $T1,$T0,$T0 # 0:1
1153 vpsrlq \$26,$T0,$T1
1155 vpand $MASK,$T0,$T0 # 0
1210 vpaddq $H0,$T0,$T0
1229 vpmuludq $T0,$D4,$D0 # d0 = h0*r0
1243 vpmuludq $T0,$H2,$H2 # h0*r1
1254 vpmuludq $T0,$H4,$H4 # h0*r2
1264 vpmuludq $T0,$H3,$H3 # h0*r3
1275 vpmuludq $T0,$H2,$H2 # h0*r4
1317 vpmuludq $H0,$T4,$T0 # h0*r0
1318 vpaddq $T0,$D0,$D0 # d0 += h0*r0
1321 vpmuludq $H2,$T4,$T0 # h2*r0
1322 vpaddq $T0,$D2,$D2 # d2 += h2*r0
1329 vpmuludq $H3,$T2,$T0 # h3*r1
1330 vpaddq $T0,$D4,$D4 # d4 += h3*r1
1335 vpmuludq $H1,$T2,$T0 # h1*r1
1336 vpaddq $T0,$D2,$D2 # d2 += h1*r1
1345 vpmuludq $H1,$T4,$T0 # h1*r2
1346 vpaddq $T0,$D3,$D3 # d3 += h1*r2
1356 vpmuludq $H1,$T3,$T0 # h1*r3
1357 vpaddq $T0,$D4,$D4 # d4 += h1*r3
1364 vpmuludq $H3,$T4,$T0 # h3*s3
1365 vpaddq $T0,$D1,$D1 # d1 += h3*s3
1373 vpmuludq $H3,$T3,$T0 # h3*s4
1374 vpaddq $T0,$D2,$D2 # d2 += h3*s4
1387 vpsrldq \$8,$D0,$T0
1391 vpaddq $T0,$D0,$D0
1517 my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) =
1832 vmovdqa 96(%rcx),$T0 # .Lpermd_avx2
1844 vpermd $T2,$T0,$T2 # 00003412 -> 14243444
1846 vpermd $T3,$T0,$T3
1848 vpermd $T4,$T0,$T4
1850 vpermd $D0,$T0,$D0
1852 vpermd $D1,$T0,$D1
1854 vpermd $D2,$T0,$D2
1856 vpermd $D3,$T0,$D3
1858 vpermd $D4,$T0,$D4
1860 vpermd $MASK,$T0,$MASK
1868 vmovdqu 16*0($inp),%x#$T0
1870 vinserti128 \$1,16*2($inp),$T0,$T0
1874 vpsrldq \$6,$T0,$T2 # splat input
1876 vpunpckhqdq $T1,$T0,$T4 # 4
1878 vpunpcklqdq $T1,$T0,$T0 # 0:1
1882 vpsrlq \$26,$T0,$T1
1885 vpand $MASK,$T0,$T0 # 0
1905 vpaddq $H0,$T0,$H0
1906 vmovdqa `32*0`(%rsp),$T0 # r0^4
1930 vpmuludq $H2,$T0,$D2 # d2 = h2*r0
1946 vpmuludq $H0,$T0,$T4 # h0*r0
1947 vpmuludq $H1,$T0,$H2 # h1*r0
1950 vpmuludq $H3,$T0,$T4 # h3*r0
1951 vpmuludq $H4,$T0,$H2 # h4*r0
1952 vmovdqu 16*0($inp),%x#$T0 # load input
1955 vinserti128 \$1,16*2($inp),$T0,$T0
1972 vpsrldq \$6,$T0,$T2 # splat input
1980 vpunpckhqdq $T1,$T0,$T4 # 4
1984 vpunpcklqdq $T1,$T0,$T0 # 0:1
2019 vpsrlq \$26,$T0,$T1
2038 vpand $MASK,$T0,$T0 # 0
2055 vpaddq $H0,$T0,$H0
2056 vmovdqu `32*0+4`(%rsp),$T0 # r0^4
2065 vpmuludq $H2,$T0,$D2 # d2 = h2*r0
2080 vpmuludq $H0,$T0,$T4 # h0*r0
2081 vpmuludq $H1,$T0,$H2 # h1*r0
2085 vpmuludq $H3,$T0,$T4 # h3*r0
2086 vpmuludq $H4,$T0,$H2 # h4*r0
2126 vpsrldq \$8,$H0,$T0
2131 vpaddq $T0,$H0,$H0
2135 vpermq \$0x2,$H0,$T0
2140 vpaddq $T0,$H0,$H0
2212 map(s/%y/%z/,($T4,$T0,$T1,$T2,$T3)); # switch to %zmm domain
2253 vmovdqu `16*2-64`($ctx),%x#$T0 # ... ${S1}
2263 vpermd $T0,$T2,$S1
2266 vpsrlq \$32,$R0,$T0 # 14243444 -> 01020304
2291 vpmuludq $T0,$R0,$D0 # d0 = r0'*r0
2292 vpmuludq $T0,$R1,$D1 # d1 = r0'*r1
2293 vpmuludq $T0,$R2,$D2 # d2 = r0'*r2
2294 vpmuludq $T0,$R3,$D3 # d3 = r0'*r3
2295 vpmuludq $T0,$R4,$D4 # d4 = r0'*r4
2388 vpunpcklqdq $T4,$T3,$T0 # transpose input
2423 vpsrlq \$52,$T0,$T2 # splat input
2426 vpsrlq \$26,$T0,$T1
2430 vpandq $MASK,$T0,$T0 # 0
2471 vpaddq $H0,$T0,$H0
2504 vpunpcklqdq $T4,$T3,$T0 # transpose input
2537 vpsrlq \$52,$T0,$T2 # splat input
2564 vpsrlq \$26,$T0,$T1
2582 vpandq $MASK,$T0,$T0 # 0
2611 vpaddq $H0,$T0,$H0
2625 vmovdqu 16*0($inp),%x#$T0
2645 vinserti128 \$1,16*2($inp),%y#$T0,%y#$T0
2712 map(s/%z/%y/,($T0,$T1,$T2,$T3,$T4, $PADBIT));
2720 vpsrldq \$6,$T0,$T2 # splat input
2722 vpunpckhqdq $T1,$T0,$T4 # 4
2728 vpunpcklqdq $T1,$T0,$T0 # 0:1
2742 vpsrlq \$26,$T0,$T1
2749 vpand $MASK,$T0,$T0 # 0
2901 my ($T0,$inp_permd,$inp_shift,$PAD) = map("%ymm$_",(18..21));
2954 vmovdqu32 0($inp),%x#$T0 # load input as ----3210
2957 vpermd $T0,$inp_permd,$T0 # ----3210 -> --322110
2958 vpsrlvq $inp_shift,$T0,$T0
2959 vpandq $reduc_mask,$T0,$T0
2960 vporq $PAD,$T0,$T0
2962 vpaddq $T0,$Dlo,$Dlo # accumulate input
2980 vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost qword
2984 vpaddq $T0,$Dhi,$Dhi
2990 vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost word
2993 vpermq \$0b10010011,$T0,$T0
2995 vpaddq $T0,$Dlo,$Dlo
2997 vpermq \$0b10010011,$Dlo,${T0}{%k1}{z}
2999 vpaddq $T0,$Dlo,$Dlo
3000 vpsllq \$2,$T0,$T0
3002 vpaddq $T0,$Dlo,$Dlo
3025 my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
3081 vpandq $mask44,$T1,$T0
3251 vpandq $mask44,$T1,$T0
3263 vpaddq $T0,$H0,$H0
3314 vpandq $mask44,$T1,$T0
3352 vpaddq $T0,$H0,$H0
3387 vpsrldq \$8,$D0lo,$T0
3391 vpaddq $T0,$D0lo,$D0lo
3397 vpermq \$0x2,$D0lo,$T0
3404 vpaddq $T0,$D0lo,${D0lo}{%k1}{z}
3466 my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
3570 vpunpcklqdq $R0,$RR0,$T0
3578 map(s/%y/%z/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
3583 vshufi64x2 \$0x44,$R0,$T0,$RR0
3615 vpandq $mask44,$T1,$T0
3628 vpaddq $T0,$H0,$H0
3679 vpandq $mask44,$T1,$T0
3707 vpaddq $T0,$H0,$H0
3742 vpsrldq \$8,$D0lo,$T0
3746 vpaddq $T0,$D0lo,$D0lo
3752 vpermq \$0x2,$D0lo,$T0
3759 vpaddq $T0,$D0lo,$D0lo
3765 vextracti64x4 \$1,$D0lo,%y#$T0
3778 map(s/%z/%y/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
3781 vpaddq $T0,$D0lo,${D0lo}{%k1}{z}