Lines Matching refs:T2
420 my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) =
891 vpsrldq \$6,$T0,$T2 # splat input
895 vpunpcklqdq $T3,$T2,$T3 # 2:3
900 vpsrlq \$4,$T3,$T2
903 vpand $MASK,$T2,$T2 # 2
983 vpmuludq $T2,$D4,$D2 # d3 = h2*r0
995 vpmuludq $T2,$H2,$H0 # h2*r1
1002 vpmuludq $T2,$H3,$H0 # h2*r2
1028 vpmuludq $T2,$H3,$H3 # h2*s3
1029 vpmuludq $T2,$H4,$T2 # h2*s4
1033 vpaddq $T2,$D1,$D1 # d1 += h2*s4
1086 vmovdqa -0x80(%r11),$T2 # r1^4
1096 vpmuludq $H2,$T2,$T1 # h2*r1
1097 vpmuludq $H3,$T2,$T0 # h3*r1
1101 vpmuludq $H1,$T2,$T1 # h1*r1
1102 vpmuludq $H0,$T2,$T2 # h0*r1
1104 vpaddq $T2,$D1,$D1 # d1 += h0*r1
1111 vmovdqa -0x40(%r11),$T2 # r3^4
1118 vpmuludq $H1,$T2,$T1 # h1*r3
1123 vpmuludq $H0,$T2,$T2 # h0*r3
1125 vpaddq $T2,$D3,$D3 # d3 += h0*r3
1128 vpmuludq $H3,$T3,$T2 # h3*s3
1130 vpaddq $T2,$D1,$D1 # d1 += h3*s3
1136 vpsrldq \$6,$T0,$T2 # splat input
1149 vpunpcklqdq $T3,$T2,$T3 # 2:3
1156 vpsrlq \$4,$T3,$T2
1160 vpand $MASK,$T2,$T2 # 2
1209 vpaddq $H2,$T2,$T2
1228 vpmuludq $T2,$D4,$D2 # d2 = h2*r0
1238 vpmuludq $T2,$H2,$H1 # h2*r1
1249 vpmuludq $T2,$H4,$H1 # h2*r2
1272 vpmuludq $T2,$H4,$H4 # h2*s3
1281 vpmuludq $T2,$H3,$H1 # h2*s4
1323 vpshufd \$0x32,`16*1-64`($ctx),$T2 # r1^n
1329 vpmuludq $H3,$T2,$T0 # h3*r1
1332 vpmuludq $H2,$T2,$T1 # h2*r1
1335 vpmuludq $H1,$T2,$T0 # h1*r1
1337 vpmuludq $H0,$T2,$T2 # h0*r1
1338 vpaddq $T2,$D1,$D1 # d1 += h0*r1
1342 vpshufd \$0x32,`16*4-64`($ctx),$T2 # s2
1350 vpmuludq $H4,$T2,$T1 # h4*s2
1353 vpmuludq $H3,$T2,$T2 # h3*s2
1354 vpaddq $T2,$D0,$D0 # d0 += h3*s2
1360 vpshufd \$0x32,`16*7-64`($ctx),$T2 # r4
1369 vpmuludq $H0,$T2,$T2 # h0*r4
1370 vpaddq $T2,$D4,$D4 # d4 += h0*r4
1388 vpsrldq \$8,$D2,$T2
1393 vpaddq $T2,$D2,$D2
1517 my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) =
1835 vmovdqu `16*0-64`($ctx),%x#$T2
1844 vpermd $T2,$T0,$T2 # 00003412 -> 14243444
1849 vmovdqa $T2,0x00(%rsp)
1874 vpsrldq \$6,$T0,$T2 # splat input
1877 vpunpcklqdq $T3,$T2,$T2 # 2:3
1880 vpsrlq \$30,$T2,$T3
1881 vpsrlq \$4,$T2,$T2
1884 vpand $MASK,$T2,$T2 # 2
1890 vpaddq $H2,$T2,$H2 # accumulate input
1904 #vpaddq $H2,$T2,$H2 # accumulate input
1910 vmovdqa `32*3`(%rsp),$T2 # r2^4
1932 vpmuludq $H2,$T2,$D4 # d4 = h2*r2
1963 vpmuludq $H1,$T2,$T4 # h1*r2
1964 vpmuludq $H0,$T2,$T2 # h0*r2
1966 vpaddq $T2,$D2,$D2 # d2 += h0*r2
1972 vpsrldq \$6,$T0,$T2 # splat input
1987 vpunpcklqdq $T3,$T2,$T3 # 2:3
2008 vpsrlq \$4,$T3,$T2
2018 vpand $MASK,$T2,$T2 # 2
2025 vpaddq $T2,$H2,$H2 # modulo-scheduled
2054 #vpaddq $H2,$T2,$H2 # accumulate input
2060 vmovdqu `32*3+4`(%rsp),$T2 # r2^4
2067 vpmuludq $H2,$T2,$D4 # d4 = h2*r2
2095 vpmuludq $H1,$T2,$T4 # h1*r2
2096 vpmuludq $H0,$T2,$T2 # h0*r2
2098 vpaddq $T2,$D2,$D2 # d2 += h0*r2
2123 vpsrldq \$8,$H2,$T2
2128 vpaddq $T2,$H2,$H2
2137 vpermq \$0x2,$H2,$T2
2142 vpaddq $T2,$H2,$H2
2212 map(s/%y/%z/,($T4,$T0,$T1,$T2,$T3)); # switch to %zmm domain
2246 vmovdqa 96(%rcx),%y#$T2 # .Lpermd_avx2
2260 vpermd $D0,$T2,$R0 # 00003412 -> 14243444
2262 vpermd $D1,$T2,$R1
2263 vpermd $T0,$T2,$S1
2264 vpermd $D2,$T2,$R2
2267 vpermd $T1,$T2,$S2
2270 vpermd $D3,$T2,$R3
2272 vpermd $T3,$T2,$S3
2273 vpermd $D4,$T2,$R4
2275 vpermd $T4,$T2,$S4
2296 vpsrlq \$32,$R2,$T2
2310 vpmuludq $T2,$S3,$M0
2311 vpmuludq $T2,$S4,$M1
2312 vpmuludq $T2,$R1,$M3
2313 vpmuludq $T2,$R2,$M4
2314 vpmuludq $T2,$R0,$M2
2423 vpsrlq \$52,$T0,$T2 # splat input
2425 vporq $T3,$T2,$T2
2429 vpandq $MASK,$T2,$T2 # 2
2435 vpaddq $H2,$T2,$H2 # accumulate input
2453 #vpaddq $H2,$T2,$H2 # accumulate input
2537 vpsrlq \$52,$T0,$T2 # splat input
2544 vporq $T3,$T2,$T2
2550 vpandq $MASK,$T2,$T2 # 2
2563 vpaddq $T2,$H2,$H2 # modulo-scheduled
2610 #vpaddq $H2,$T2,$H2 # accumulate input
2712 map(s/%z/%y/,($T0,$T1,$T2,$T3,$T4, $PADBIT));
2720 vpsrldq \$6,$T0,$T2 # splat input
2727 vpunpcklqdq $T3,$T2,$T2 # 2:3
2736 vpsrlq \$30,$T2,$T3
2737 vpsrlq \$4,$T2,$T2
2748 vpand $MASK,$T2,$T2 # 2
2754 vpaddq $H2,$T2,$H2 # accumulate input for .Ltail_avx2
2768 vpsubq $T2,$H2,$H2 # undo input accumulation
3025 my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
3069 vmovdqu64 16*0($inp),$T2 # load data
3073 vpunpcklqdq $T3,$T2,$T1 # transpose data
3074 vpunpckhqdq $T3,$T2,$T3
3078 vpsrlq \$24,$T3,$T2 # splat the data
3079 vporq $PAD,$T2,$T2
3080 vpaddq $T2,$H2,$H2 # accumulate input
3239 vmovdqu64 16*0($inp),$T2 # load data
3243 vpunpcklqdq $T3,$T2,$T1 # transpose data
3244 vpunpckhqdq $T3,$T2,$T3
3248 vpsrlq \$24,$T3,$T2 # splat the data
3249 vporq $PAD,$T2,$T2
3250 vpaddq $T2,$H2,$H2 # accumulate input
3262 #vpaddq $T2,$H2,$H2 # accumulate input
3279 vmovdqu64 16*0($inp),$T2 # load data
3289 vpunpcklqdq $T3,$T2,$T1 # transpose data
3290 vpunpckhqdq $T3,$T2,$T3
3305 vpsrlq \$24,$T3,$T2
3306 vporq $PAD,$T2,$T2
3324 vpaddq $T2,$H2,$H2 # accumulate input
3351 #vpaddq $T2,$H2,$H2 # accumulate input
3393 vpsrldq \$8,$D2lo,$T2
3399 vpaddq $T2,$D2lo,$D2lo
3406 vpermq \$0x2,$D2lo,$T2
3410 vpaddq $T2,$D2lo,${D2lo}{%k1}{z}
3466 my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
3568 vpunpcklqdq $R2,$RR2,$T2 # 3748
3578 map(s/%y/%z/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
3582 vshufi64x2 \$0x44,$R2,$T2,$RR2 # 15263748
3586 vmovdqu64 16*0($inp),$T2 # load data
3607 vpunpcklqdq $T3,$T2,$T1 # transpose data
3608 vpunpckhqdq $T3,$T2,$T3
3612 vpsrlq \$24,$T3,$T2 # splat the data
3613 vporq $PAD,$T2,$T2
3614 vpaddq $T2,$H2,$H2 # accumulate input
3627 #vpaddq $T2,$H2,$H2 # accumulate input
3644 vmovdqu64 16*0($inp),$T2 # load data
3654 vpunpcklqdq $T3,$T2,$T1 # transpose data
3655 vpunpckhqdq $T3,$T2,$T3
3670 vpsrlq \$24,$T3,$T2
3671 vporq $PAD,$T2,$T2
3689 vpaddq $T2,$H2,$H2 # accumulate input
3706 #vpaddq $T2,$H2,$H2 # accumulate input
3748 vpsrldq \$8,$D2lo,$T2
3754 vpaddq $T2,$D2lo,$D2lo
3761 vpermq \$0x2,$D2lo,$T2
3767 vpaddq $T2,$D2lo,$D2lo
3772 vextracti64x4 \$1,$D2lo,%y#$T2
3778 map(s/%z/%y/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
3785 vpaddq $T2,$D2lo,${D2lo}{%k1}{z}