Lines Matching refs:T2

392 my ($D0,$D1,$D2,$D3,$D4,$T0,$T1,$T2)=map("xmm$_",(0..7));
393 my $MASK=$T2; # borrow and keep in mind
477 # As for choice to "rotate" $T0-$T2 in order to move paddq
484 &movdqa ($T2,$T1);
487 &movdqa ($T0,$T2);
488 &pmuludq ($T2,&QWP(16*1,$base)); # r1*h1
492 &paddq ($D2,$T2);
494 &$load ($T2,2); # r2^n
497 &movdqa ($T0,$T2);
498 &pmuludq ($T2,&QWP(16*2,$base)); # r2*h2
502 &paddq ($D4,$T2);
503 &$load ($T2,6); # s2^n
506 &movdqa ($T0,$T2);
507 &pmuludq ($T2,&QWP(16*4,$base)); # s2*h4
511 &paddq ($D1,$T2);
513 &movdqa ($T2,$T1);
517 &pmuludq ($T2,&QWP(16*0,$base)); # r3*h0
521 &paddq ($D3,$T2);
522 &movdqa ($T2,$T1);
525 &pmuludq ($T2,&QWP(16*2,$base)); # s3*h2
531 &paddq ($D0,$T2);
532 &movdqa ($T2,$T1);
535 &movdqa ($T0,$T2);
536 &pmuludq ($T2,&QWP(16*1,$base)); # s4*h1
540 &paddq ($D0,$T2);
891 &movdqa ($T2,&QWP(16*(0-9),"edx")); # r0^2
905 &pmuludq ($T0,$T2); # h0*r0
907 &pmuludq ($T1,$T2); # h1*r0
908 &pmuludq ($D2,$T2); # h2*r0
909 &pmuludq ($D3,$T2); # h3*r0
910 &pmuludq ($D4,$T2); # h4*r0
916 &movdqa ($T2,$D1);
919 &movdqa ($T0,$T2);
920 &pmuludq ($T2,&$addr(2)); # h0*r2
924 &paddq ($D2,$T2);
925 &movdqa ($T2,&QWP(16*1,"eax")); # pull h1
929 &movdqa ($T0,$T2);
930 &pmuludq ($T2,&$addr(1)); # h1*r1
934 &paddq ($D2,$T2);
935 &movdqa ($T2,&QWP(16*2,"eax")); # pull h2
938 &movdqa ($T0,$T2);
939 &pmuludq ($T2,&$addr(7)); # h2*s3
943 &paddq ($D0,$T2);
945 &movdqa ($T2,$T1);
949 &pmuludq ($T2,&$addr(2)); # h2*r2
953 &paddq ($D4,$T2);
954 &movdqa ($T2,$T1);
957 &movdqa ($T0,$T2);
958 &pmuludq ($T2,&$addr(8)); # h3*s4
963 &paddq ($D2,$T2);
964 &movdqa ($T2,$T1);
967 &movdqa ($T0,$T2);
968 &pmuludq ($T2,&$addr(5)); # h4*s1
972 &paddq ($D0,$T2);
993 &movdqa ($T2,&QWP(16*0,"edx")); # r0^4
1008 &pmuludq ($T0,$T2); # h0*r0
1011 &pmuludq ($T1,$T2); # h1*r0
1012 &pmuludq ($D2,$T2); # h2*r0
1013 &pmuludq ($D3,$T2); # h3*r0
1014 &pmuludq ($D4,$T2); # h4*r0
1033 &pshufd ($T2,&QWP(16*(0-9),"edx"),0x10);# r0^n
1058 &pmuludq ($T0,$T2); # h0*r0
1059 &pmuludq ($T1,$T2); # h1*r0
1060 &pmuludq ($D2,$T2); # h2*r0
1063 &pmuludq ($D3,$T2); # h3*r0
1065 &pmuludq ($D4,$T2); # h4*r0
1075 &pshufd ($T2,&QWP(16*0,"edx"),0x10); # r0^n
1086 &pmuludq ($T0,$T2); # h0*r0
1088 &pmuludq ($T1,$T2); # h1*r0
1091 &pmuludq ($D2,$T2); # h2*r0
1094 &pmuludq ($D3,$T2); # h3*r0
1101 &pmuludq ($D4,$T2); # h4*r0
1306 &vpmuludq ($T2,$T1,&QWP(16*2,"edx")); # r1*h2
1307 &vpaddq ($D3,$D3,$T2);
1310 &vmovdqa ($T2,&QWP(16*5,"esp")); # s1
1314 &vpmuludq ($T2,$T2,&QWP(16*4,"edx")); # s1*h4
1315 &vpaddq ($D0,$D0,$T2);
1319 &vpmuludq ($T2,$T0,&QWP(16*1,"edx")); # r2*h1
1320 &vpaddq ($D3,$D3,$T2);
1324 &vpmuludq ($T2,$T1,&QWP(16*4,"edx")); # s2*h4
1325 &vpaddq ($D1,$D1,$T2);
1330 &vpmuludq ($T2,$T0,&QWP(16*1,"edx")); # r3*h1
1331 &vpaddq ($D4,$D4,$T2);
1335 &vpmuludq ($T2,$T1,&QWP(16*4,"edx")); # s3*h4
1336 &vpaddq ($D2,$D2,$T2);
1339 &vmovdqa ($T2,&QWP(16*4,"esp")); # r4
1344 &vpmuludq ($T2,$T2,&QWP(16*0,"edx")); # r4*h0
1345 &vpaddq ($D4,$D4,$T2);
1348 &vpmuludq ($T2,$T0,&QWP(16*1,"edx")); # s4*h1
1349 &vpaddq ($D0,$D0,$T2);
1437 my ($D0,$D1,$D2,$D3,$D4,$T0,$T1,$T2)=map("ymm$_",(0..7));
1438 my $MASK=$T2;
1658 &vpmuludq ($T2,$T0,&$addr(3)); # h0*r3
1659 &vpaddq ($D3,$D3,$T2); # d3 += h0*r3
1662 &vpmuludq ($T2,$T0,&$addr(0)); # h0*r0
1663 &vpaddq ($D0,$D0,$T2); # d0 + h0*r0
1664 &vmovdqa ($T2,&QWP(32*1,"esp")); # h1
1670 &vpmuludq ($T1,$T2,&$addr(2)); # h1*r2
1672 &vpmuludq ($T0,$T2,&$addr(3)); # h1*r3
1674 &vpmuludq ($T1,$T2,&$addr(8)); # h1*s4
1677 &vpmuludq ($T0,$T2,&$addr(0)); # h1*r0
1679 &vpmuludq ($T2,$T2,&$addr(1)); # h1*r1
1680 &vpaddq ($D2,$D2,$T2); # d2 += h1*r1
1684 &vpmuludq ($T2,$T1,&$addr(1)); # h3*r1
1685 &vpaddq ($D4,$D4,$T2); # d4 += h3*r1
1689 &vpmuludq ($T2,$T1,&$addr(7)); # h3*s3
1690 &vpaddq ($D1,$D1,$T2); # d1+= h3*s3
1694 &vpmuludq ($T2,$T0,&$addr(8)); # h4*s4
1695 &vpaddq ($D3,$D3,$T2); # d3 += h4*s4
1698 &vpmuludq ($T2,$T0,&$addr(0)); # h4*r0
1699 &vpaddq ($D4,$D4,$T2); # d4 += h4*r0