Lines Matching refs:T0

344 my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) =
811 vmovdqu 16*2($inp),$T0
815 vpsrldq \$6,$T0,$T2 # splat input
817 vpunpckhqdq $T1,$T0,$T4 # 4
818 vpunpcklqdq $T1,$T0,$T0 # 0:1
822 vpsrlq \$26,$T0,$T1
823 vpand $MASK,$T0,$T0 # 0
904 vpmuludq $T0,$D4,$D0 # d0 = h0*r0
925 vpmuludq $T0,$H2,$H2 # h0*r1
932 vpmuludq $T0,$H3,$H3 # h0*r2
944 vpmuludq $T0,$H2,$H2 # h0*r3
964 vpmuludq 0x70(%rsp),$T0,$T4 # h0*r4
965 vpmuludq $T1,$H4,$T0 # h1*s4
969 vpaddq $T0,$D0,$D0 # d0 += h1*s4
1006 vpmuludq $H0,$T4,$T0 # h0*r0
1008 vpaddq $T0,$D0,$D0
1011 vpmuludq $H2,$T4,$T0 # h2*r0
1013 vpaddq $T0,$D2,$D2
1016 vpmuludq -0x70(%r11),$H4,$T0 # h4*s1
1019 vpaddq $T0,$D0,$D0 # d0 += h4*s1
1021 vpmuludq $H3,$T2,$T0 # h3*r1
1024 vpaddq $T0,$D4,$D4 # d4 += h3*r1
1031 vpmuludq $H2,$T3,$T0 # h2*r2
1033 vpaddq $T0,$D4,$D4 # d4 += h2*r2
1037 vpmuludq $H4,$T4,$T0 # h4*s2
1039 vpaddq $T0,$D1,$D1 # d1 += h4*s2
1048 vpmuludq $H4,$T3,$T0 # h4*s3
1050 vpaddq $T0,$D2,$D2 # d2 += h4*s3
1051 vmovdqu 16*2($inp),$T0 # load input
1060 vpsrldq \$6,$T0,$T2 # splat input
1068 vpunpckhqdq $T1,$T0,$T4 # 4
1072 vpunpcklqdq $T1,$T0,$T0 # 0:1
1077 vpsrlq \$26,$T0,$T1
1079 vpand $MASK,$T0,$T0 # 0
1134 vpaddq $H0,$T0,$T0
1153 vpmuludq $T0,$D4,$D0 # d0 = h0*r0
1167 vpmuludq $T0,$H2,$H2 # h0*r1
1178 vpmuludq $T0,$H4,$H4 # h0*r2
1188 vpmuludq $T0,$H3,$H3 # h0*r3
1199 vpmuludq $T0,$H2,$H2 # h0*r4
1241 vpmuludq $H0,$T4,$T0 # h0*r0
1242 vpaddq $T0,$D0,$D0 # d0 += h0*r0
1245 vpmuludq $H2,$T4,$T0 # h2*r0
1246 vpaddq $T0,$D2,$D2 # d2 += h2*r0
1253 vpmuludq $H3,$T2,$T0 # h3*r1
1254 vpaddq $T0,$D4,$D4 # d4 += h3*r1
1259 vpmuludq $H1,$T2,$T0 # h1*r1
1260 vpaddq $T0,$D2,$D2 # d2 += h1*r1
1269 vpmuludq $H1,$T4,$T0 # h1*r2
1270 vpaddq $T0,$D3,$D3 # d3 += h1*r2
1280 vpmuludq $H1,$T3,$T0 # h1*r3
1281 vpaddq $T0,$D4,$D4 # d4 += h1*r3
1288 vpmuludq $H3,$T4,$T0 # h3*s3
1289 vpaddq $T0,$D1,$D1 # d1 += h3*s3
1297 vpmuludq $H3,$T3,$T0 # h3*s4
1298 vpaddq $T0,$D2,$D2 # d2 += h3*s4
1311 vpsrldq \$8,$D0,$T0
1315 vpaddq $T0,$D0,$D0
1442 my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) =
1749 vmovdqa 96(%rcx),$T0 # .Lpermd_avx2
1761 vpermd $T2,$T0,$T2 # 00003412 -> 14243444
1763 vpermd $T3,$T0,$T3
1765 vpermd $T4,$T0,$T4
1767 vpermd $D0,$T0,$D0
1769 vpermd $D1,$T0,$D1
1771 vpermd $D2,$T0,$D2
1773 vpermd $D3,$T0,$D3
1775 vpermd $D4,$T0,$D4
1777 vpermd $MASK,$T0,$MASK
1785 vmovdqu 16*0($inp),%x#$T0
1787 vinserti128 \$1,16*2($inp),$T0,$T0
1791 vpsrldq \$6,$T0,$T2 # splat input
1793 vpunpckhqdq $T1,$T0,$T4 # 4
1795 vpunpcklqdq $T1,$T0,$T0 # 0:1
1799 vpsrlq \$26,$T0,$T1
1802 vpand $MASK,$T0,$T0 # 0
1822 vpaddq $H0,$T0,$H0
1823 vmovdqa `32*0`(%rsp),$T0 # r0^4
1847 vpmuludq $H2,$T0,$D2 # d2 = h2*r0
1863 vpmuludq $H0,$T0,$T4 # h0*r0
1864 vpmuludq $H1,$T0,$H2 # h1*r0
1867 vpmuludq $H3,$T0,$T4 # h3*r0
1868 vpmuludq $H4,$T0,$H2 # h4*r0
1869 vmovdqu 16*0($inp),%x#$T0 # load input
1872 vinserti128 \$1,16*2($inp),$T0,$T0
1889 vpsrldq \$6,$T0,$T2 # splat input
1897 vpunpckhqdq $T1,$T0,$T4 # 4
1901 vpunpcklqdq $T1,$T0,$T0 # 0:1
1936 vpsrlq \$26,$T0,$T1
1955 vpand $MASK,$T0,$T0 # 0
1972 vpaddq $H0,$T0,$H0
1973 vmovdqu `32*0+4`(%rsp),$T0 # r0^4
1982 vpmuludq $H2,$T0,$D2 # d2 = h2*r0
1997 vpmuludq $H0,$T0,$T4 # h0*r0
1998 vpmuludq $H1,$T0,$H2 # h1*r0
2002 vpmuludq $H3,$T0,$T4 # h3*r0
2003 vpmuludq $H4,$T0,$H2 # h4*r0
2043 vpsrldq \$8,$H0,$T0
2048 vpaddq $T0,$H0,$H0
2052 vpermq \$0x2,$H0,$T0
2057 vpaddq $T0,$H0,$H0
2137 map(s/%y/%z/,($T4,$T0,$T1,$T2,$T3)); # switch to %zmm domain
2181 vmovdqu `16*2-64`($ctx),%x#$T0 # ... ${S1}
2191 vpermd $T0,$T2,$S1
2194 vpsrlq \$32,$R0,$T0 # 14243444 -> 01020304
2219 vpmuludq $T0,$R0,$D0 # d0 = r0'*r0
2220 vpmuludq $T0,$R1,$D1 # d1 = r0'*r1
2221 vpmuludq $T0,$R2,$D2 # d2 = r0'*r2
2222 vpmuludq $T0,$R3,$D3 # d3 = r0'*r3
2223 vpmuludq $T0,$R4,$D4 # d4 = r0'*r4
2316 vpunpcklqdq $T4,$T3,$T0 # transpose input
2351 vpsrlq \$52,$T0,$T2 # splat input
2354 vpsrlq \$26,$T0,$T1
2358 vpandq $MASK,$T0,$T0 # 0
2399 vpaddq $H0,$T0,$H0
2432 vpunpcklqdq $T4,$T3,$T0 # transpose input
2465 vpsrlq \$52,$T0,$T2 # splat input
2492 vpsrlq \$26,$T0,$T1
2510 vpandq $MASK,$T0,$T0 # 0
2539 vpaddq $H0,$T0,$H0
2553 vmovdqu 16*0($inp),%x#$T0
2573 vinserti128 \$1,16*2($inp),%y#$T0,%y#$T0
2640 map(s/%z/%y/,($T0,$T1,$T2,$T3,$T4, $PADBIT));
2648 vpsrldq \$6,$T0,$T2 # splat input
2650 vpunpckhqdq $T1,$T0,$T4 # 4
2656 vpunpcklqdq $T1,$T0,$T0 # 0:1
2670 vpsrlq \$26,$T0,$T1
2677 vpand $MASK,$T0,$T0 # 0
2803 my ($T0,$inp_permd,$inp_shift,$PAD) = map("%ymm$_",(18..21));
2858 vmovdqu32 0($inp),%x#$T0 # load input as ----3210
2861 vpermd $T0,$inp_permd,$T0 # ----3210 -> --322110
2862 vpsrlvq $inp_shift,$T0,$T0
2863 vpandq $reduc_mask,$T0,$T0
2864 vporq $PAD,$T0,$T0
2866 vpaddq $T0,$Dlo,$Dlo # accumulate input
2884 vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost qword
2888 vpaddq $T0,$Dhi,$Dhi
2894 vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost word
2897 vpermq \$0b10010011,$T0,$T0
2899 vpaddq $T0,$Dlo,$Dlo
2901 vpermq \$0b10010011,$Dlo,${T0}{%k1}{z}
2903 vpaddq $T0,$Dlo,$Dlo
2904 vpsllq \$2,$T0,$T0
2906 vpaddq $T0,$Dlo,$Dlo
2930 my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
2987 vpandq $mask44,$T1,$T0
3157 vpandq $mask44,$T1,$T0
3169 vpaddq $T0,$H0,$H0
3220 vpandq $mask44,$T1,$T0
3258 vpaddq $T0,$H0,$H0
3293 vpsrldq \$8,$D0lo,$T0
3297 vpaddq $T0,$D0lo,$D0lo
3303 vpermq \$0x2,$D0lo,$T0
3310 vpaddq $T0,$D0lo,${D0lo}{%k1}{z}
3373 my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
3478 vpunpcklqdq $R0,$RR0,$T0
3486 map(s/%y/%z/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
3491 vshufi64x2 \$0x44,$R0,$T0,$RR0
3523 vpandq $mask44,$T1,$T0
3536 vpaddq $T0,$H0,$H0
3587 vpandq $mask44,$T1,$T0
3615 vpaddq $T0,$H0,$H0
3650 vpsrldq \$8,$D0lo,$T0
3654 vpaddq $T0,$D0lo,$D0lo
3660 vpermq \$0x2,$D0lo,$T0
3667 vpaddq $T0,$D0lo,$D0lo
3673 vextracti64x4 \$1,$D0lo,%y#$T0
3686 map(s/%z/%y/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
3689 vpaddq $T0,$D0lo,${D0lo}{%k1}{z}