Lines Matching refs:T2
419 my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) =
890 vpsrldq \$6,$T0,$T2 # splat input
894 vpunpcklqdq $T3,$T2,$T3 # 2:3
899 vpsrlq \$4,$T3,$T2
902 vpand $MASK,$T2,$T2 # 2
982 vpmuludq $T2,$D4,$D2 # d3 = h2*r0
994 vpmuludq $T2,$H2,$H0 # h2*r1
1001 vpmuludq $T2,$H3,$H0 # h2*r2
1027 vpmuludq $T2,$H3,$H3 # h2*s3
1028 vpmuludq $T2,$H4,$T2 # h2*s4
1032 vpaddq $T2,$D1,$D1 # d1 += h2*s4
1085 vmovdqa -0x80(%r11),$T2 # r1^4
1095 vpmuludq $H2,$T2,$T1 # h2*r1
1096 vpmuludq $H3,$T2,$T0 # h3*r1
1100 vpmuludq $H1,$T2,$T1 # h1*r1
1101 vpmuludq $H0,$T2,$T2 # h0*r1
1103 vpaddq $T2,$D1,$D1 # d1 += h0*r1
1110 vmovdqa -0x40(%r11),$T2 # r3^4
1117 vpmuludq $H1,$T2,$T1 # h1*r3
1122 vpmuludq $H0,$T2,$T2 # h0*r3
1124 vpaddq $T2,$D3,$D3 # d3 += h0*r3
1127 vpmuludq $H3,$T3,$T2 # h3*s3
1129 vpaddq $T2,$D1,$D1 # d1 += h3*s3
1135 vpsrldq \$6,$T0,$T2 # splat input
1148 vpunpcklqdq $T3,$T2,$T3 # 2:3
1155 vpsrlq \$4,$T3,$T2
1159 vpand $MASK,$T2,$T2 # 2
1208 vpaddq $H2,$T2,$T2
1227 vpmuludq $T2,$D4,$D2 # d2 = h2*r0
1237 vpmuludq $T2,$H2,$H1 # h2*r1
1248 vpmuludq $T2,$H4,$H1 # h2*r2
1271 vpmuludq $T2,$H4,$H4 # h2*s3
1280 vpmuludq $T2,$H3,$H1 # h2*s4
1322 vpshufd \$0x32,`16*1-64`($ctx),$T2 # r1^n
1328 vpmuludq $H3,$T2,$T0 # h3*r1
1331 vpmuludq $H2,$T2,$T1 # h2*r1
1334 vpmuludq $H1,$T2,$T0 # h1*r1
1336 vpmuludq $H0,$T2,$T2 # h0*r1
1337 vpaddq $T2,$D1,$D1 # d1 += h0*r1
1341 vpshufd \$0x32,`16*4-64`($ctx),$T2 # s2
1349 vpmuludq $H4,$T2,$T1 # h4*s2
1352 vpmuludq $H3,$T2,$T2 # h3*s2
1353 vpaddq $T2,$D0,$D0 # d0 += h3*s2
1359 vpshufd \$0x32,`16*7-64`($ctx),$T2 # r4
1368 vpmuludq $H0,$T2,$T2 # h0*r4
1369 vpaddq $T2,$D4,$D4 # d4 += h0*r4
1387 vpsrldq \$8,$D2,$T2
1392 vpaddq $T2,$D2,$D2
1516 my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) =
1834 vmovdqu `16*0-64`($ctx),%x#$T2
1843 vpermd $T2,$T0,$T2 # 00003412 -> 14243444
1848 vmovdqa $T2,0x00(%rsp)
1873 vpsrldq \$6,$T0,$T2 # splat input
1876 vpunpcklqdq $T3,$T2,$T2 # 2:3
1879 vpsrlq \$30,$T2,$T3
1880 vpsrlq \$4,$T2,$T2
1883 vpand $MASK,$T2,$T2 # 2
1889 vpaddq $H2,$T2,$H2 # accumulate input
1903 #vpaddq $H2,$T2,$H2 # accumulate input
1909 vmovdqa `32*3`(%rsp),$T2 # r2^4
1931 vpmuludq $H2,$T2,$D4 # d4 = h2*r2
1962 vpmuludq $H1,$T2,$T4 # h1*r2
1963 vpmuludq $H0,$T2,$T2 # h0*r2
1965 vpaddq $T2,$D2,$D2 # d2 += h0*r2
1971 vpsrldq \$6,$T0,$T2 # splat input
1986 vpunpcklqdq $T3,$T2,$T3 # 2:3
2007 vpsrlq \$4,$T3,$T2
2017 vpand $MASK,$T2,$T2 # 2
2024 vpaddq $T2,$H2,$H2 # modulo-scheduled
2053 #vpaddq $H2,$T2,$H2 # accumulate input
2059 vmovdqu `32*3+4`(%rsp),$T2 # r2^4
2066 vpmuludq $H2,$T2,$D4 # d4 = h2*r2
2094 vpmuludq $H1,$T2,$T4 # h1*r2
2095 vpmuludq $H0,$T2,$T2 # h0*r2
2097 vpaddq $T2,$D2,$D2 # d2 += h0*r2
2122 vpsrldq \$8,$H2,$T2
2127 vpaddq $T2,$H2,$H2
2136 vpermq \$0x2,$H2,$T2
2141 vpaddq $T2,$H2,$H2
2211 map(s/%y/%z/,($T4,$T0,$T1,$T2,$T3)); # switch to %zmm domain
2245 vmovdqa 96(%rcx),%y#$T2 # .Lpermd_avx2
2259 vpermd $D0,$T2,$R0 # 00003412 -> 14243444
2261 vpermd $D1,$T2,$R1
2262 vpermd $T0,$T2,$S1
2263 vpermd $D2,$T2,$R2
2266 vpermd $T1,$T2,$S2
2269 vpermd $D3,$T2,$R3
2271 vpermd $T3,$T2,$S3
2272 vpermd $D4,$T2,$R4
2274 vpermd $T4,$T2,$S4
2295 vpsrlq \$32,$R2,$T2
2309 vpmuludq $T2,$S3,$M0
2310 vpmuludq $T2,$S4,$M1
2311 vpmuludq $T2,$R1,$M3
2312 vpmuludq $T2,$R2,$M4
2313 vpmuludq $T2,$R0,$M2
2422 vpsrlq \$52,$T0,$T2 # splat input
2424 vporq $T3,$T2,$T2
2428 vpandq $MASK,$T2,$T2 # 2
2434 vpaddq $H2,$T2,$H2 # accumulate input
2452 #vpaddq $H2,$T2,$H2 # accumulate input
2536 vpsrlq \$52,$T0,$T2 # splat input
2543 vporq $T3,$T2,$T2
2549 vpandq $MASK,$T2,$T2 # 2
2562 vpaddq $T2,$H2,$H2 # modulo-scheduled
2609 #vpaddq $H2,$T2,$H2 # accumulate input
2711 map(s/%z/%y/,($T0,$T1,$T2,$T3,$T4, $PADBIT));
2719 vpsrldq \$6,$T0,$T2 # splat input
2726 vpunpcklqdq $T3,$T2,$T2 # 2:3
2735 vpsrlq \$30,$T2,$T3
2736 vpsrlq \$4,$T2,$T2
2747 vpand $MASK,$T2,$T2 # 2
2753 vpaddq $H2,$T2,$H2 # accumulate input for .Ltail_avx2
2767 vpsubq $T2,$H2,$H2 # undo input accumulation
3024 my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
3068 vmovdqu64 16*0($inp),$T2 # load data
3072 vpunpcklqdq $T3,$T2,$T1 # transpose data
3073 vpunpckhqdq $T3,$T2,$T3
3077 vpsrlq \$24,$T3,$T2 # splat the data
3078 vporq $PAD,$T2,$T2
3079 vpaddq $T2,$H2,$H2 # accumulate input
3238 vmovdqu64 16*0($inp),$T2 # load data
3242 vpunpcklqdq $T3,$T2,$T1 # transpose data
3243 vpunpckhqdq $T3,$T2,$T3
3247 vpsrlq \$24,$T3,$T2 # splat the data
3248 vporq $PAD,$T2,$T2
3249 vpaddq $T2,$H2,$H2 # accumulate input
3261 #vpaddq $T2,$H2,$H2 # accumulate input
3278 vmovdqu64 16*0($inp),$T2 # load data
3288 vpunpcklqdq $T3,$T2,$T1 # transpose data
3289 vpunpckhqdq $T3,$T2,$T3
3304 vpsrlq \$24,$T3,$T2
3305 vporq $PAD,$T2,$T2
3323 vpaddq $T2,$H2,$H2 # accumulate input
3350 #vpaddq $T2,$H2,$H2 # accumulate input
3392 vpsrldq \$8,$D2lo,$T2
3398 vpaddq $T2,$D2lo,$D2lo
3405 vpermq \$0x2,$D2lo,$T2
3409 vpaddq $T2,$D2lo,${D2lo}{%k1}{z}
3465 my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
3567 vpunpcklqdq $R2,$RR2,$T2 # 3748
3577 map(s/%y/%z/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
3581 vshufi64x2 \$0x44,$R2,$T2,$RR2 # 15263748
3585 vmovdqu64 16*0($inp),$T2 # load data
3606 vpunpcklqdq $T3,$T2,$T1 # transpose data
3607 vpunpckhqdq $T3,$T2,$T3
3611 vpsrlq \$24,$T3,$T2 # splat the data
3612 vporq $PAD,$T2,$T2
3613 vpaddq $T2,$H2,$H2 # accumulate input
3626 #vpaddq $T2,$H2,$H2 # accumulate input
3643 vmovdqu64 16*0($inp),$T2 # load data
3653 vpunpcklqdq $T3,$T2,$T1 # transpose data
3654 vpunpckhqdq $T3,$T2,$T3
3669 vpsrlq \$24,$T3,$T2
3670 vporq $PAD,$T2,$T2
3688 vpaddq $T2,$H2,$H2 # accumulate input
3705 #vpaddq $T2,$H2,$H2 # accumulate input
3747 vpsrldq \$8,$D2lo,$T2
3753 vpaddq $T2,$D2lo,$D2lo
3760 vpermq \$0x2,$D2lo,$T2
3766 vpaddq $T2,$D2lo,$D2lo
3771 vextracti64x4 \$1,$D2lo,%y#$T2
3777 map(s/%z/%y/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
3784 vpaddq $T2,$D2lo,${D2lo}{%k1}{z}