Lines Matching refs:np

84 my $np="%rdx";	# const BN_ULONG *np,
126 $np="%r13"; # reassigned argument
169 mov %rdx, $np # reassigned argument
171 mov $np, $tmp
174 sub \$-128, $np
176 and \$4095, $tmp # see if $np crosses page
183 # cause >2x performance degradation here, so if $np does
187 vmovdqu 32*0-128($np), $ACC0
189 vmovdqu 32*1-128($np), $ACC1
190 vmovdqu 32*2-128($np), $ACC2
191 vmovdqu 32*3-128($np), $ACC3
192 vmovdqu 32*4-128($np), $ACC4
193 vmovdqu 32*5-128($np), $ACC5
194 vmovdqu 32*6-128($np), $ACC6
195 vmovdqu 32*7-128($np), $ACC7
196 vmovdqu 32*8-128($np), $ACC8
197 lea $FrameSize+128(%rsp),$np
198 vmovdqu $ACC0, 32*0-128($np)
199 vmovdqu $ACC1, 32*1-128($np)
200 vmovdqu $ACC2, 32*2-128($np)
201 vmovdqu $ACC3, 32*3-128($np)
202 vmovdqu $ACC4, 32*4-128($np)
203 vmovdqu $ACC5, 32*5-128($np)
204 vmovdqu $ACC6, 32*6-128($np)
205 vmovdqu $ACC7, 32*7-128($np)
206 vmovdqu $ACC8, 32*8-128($np)
207 vmovdqu $ACC9, 32*9-128($np) # $ACC9 is zero
468 imulq -128($np), %rax
472 imulq 8-128($np), %rax
476 imulq 16-128($np), %rax
479 imulq 24-128($np), %rdx
494 vpmuludq 32*1-128($np), $Y1, $TEMP0
496 imulq -128($np), %rax
499 vpmuludq 32*2-128($np), $Y1, $TEMP1
501 imulq 8-128($np), %rax
503 vpmuludq 32*3-128($np), $Y1, $TEMP2
508 imulq 16-128($np), %rax
511 vpmuludq 32*4-128($np), $Y1, $TEMP0
515 vpmuludq 32*5-128($np), $Y1, $TEMP1
519 vpmuludq 32*6-128($np), $Y1, $TEMP2
522 vpmuludq 32*7-128($np), $Y1, $TEMP0
524 vpmuludq 32*8-128($np), $Y1, $TEMP1
526 #vmovdqu 32*1-8-128($np), $TEMP2 # moved below
528 #vmovdqu 32*2-8-128($np), $TEMP0 # moved below
531 vpmuludq 32*1-8-128($np), $Y2, $TEMP2 # see above
532 vmovdqu 32*3-8-128($np), $TEMP1
534 imulq -128($np), %rax
536 vpmuludq 32*2-8-128($np), $Y2, $TEMP0 # see above
537 vmovdqu 32*4-8-128($np), $TEMP2
540 imulq 8-128($np), %rax
545 vmovdqu 32*5-8-128($np), $TEMP0
549 vmovdqu 32*6-8-128($np), $TEMP1
555 .byte 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00 # vmovdqu 32*7-8-128($np), $TEMP2
559 vmovdqu 32*8-8-128($np), $TEMP0
562 vmovdqu 32*9-8-128($np), $ACC9
564 imulq -128($np), %rax
567 vmovdqu 32*1-16-128($np), $TEMP1
571 vmovdqu 32*2-16-128($np), $TEMP2
577 vmovdqu 32*1-24-128($np), $ACC0
579 vmovdqu 32*3-16-128($np), $TEMP0
583 .byte 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff # vmovdqu 32*4-16-128($np), $TEMP1
587 vmovdqu 32*5-16-128($np), $TEMP2
593 vmovdqu 32*6-16-128($np), $TEMP0
596 vmovdqu 32*7-16-128($np), $TEMP1
599 vmovdqu 32*8-16-128($np), $TEMP2
603 vmovdqu 32*9-16-128($np), $TEMP0
607 #vmovdqu 32*2-24-128($np), $TEMP1 # moved below
614 vmovdqu 32*3-24-128($np), $TEMP2
619 vpmuludq 32*2-24-128($np), $Y2, $TEMP1 # see above
620 vmovdqu 32*4-24-128($np), $TEMP0
622 imulq -128($np), %rax
626 vmovdqu 32*5-24-128($np), $TEMP1
629 imulq 8-128($np), %rax
635 vmovdqu 32*6-24-128($np), $TEMP2
638 imulq 16-128($np), %rax
641 vmovdqu 32*7-24-128($np), $TEMP0
642 imulq 24-128($np), %rdx # future $r3
647 vmovdqu 32*8-24-128($np), $TEMP1
652 vmovdqu 32*9-24-128($np), $TEMP2
842 my $np="%rcx"; # const BN_ULONG *np,
938 mov $np, $tmp
940 sub \$-128,$np
943 and \$4095, $tmp # see if $np crosses page
950 # cause severe performance degradation here, so if $np does
954 vmovdqu 32*0-128($np), $ACC0
956 vmovdqu 32*1-128($np), $ACC1
957 vmovdqu 32*2-128($np), $ACC2
958 vmovdqu 32*3-128($np), $ACC3
959 vmovdqu 32*4-128($np), $ACC4
960 vmovdqu 32*5-128($np), $ACC5
961 vmovdqu 32*6-128($np), $ACC6
962 vmovdqu 32*7-128($np), $ACC7
963 vmovdqu 32*8-128($np), $ACC8
964 lea 64+128(%rsp),$np
965 vmovdqu $ACC0, 32*0-128($np)
967 vmovdqu $ACC1, 32*1-128($np)
969 vmovdqu $ACC2, 32*2-128($np)
971 vmovdqu $ACC3, 32*3-128($np)
973 vmovdqu $ACC4, 32*4-128($np)
975 vmovdqu $ACC5, 32*5-128($np)
977 vmovdqu $ACC6, 32*6-128($np)
979 vmovdqu $ACC7, 32*7-128($np)
981 vmovdqu $ACC8, 32*8-128($np)
983 vmovdqu $ACC9, 32*9-128($np) # $ACC9 is zero after vzeroall
1045 imulq -128($np),%rax
1048 imulq 8-128($np),%rax
1051 imulq 16-128($np),%rax
1054 imulq 24-128($np),%rdx
1058 vpmuludq 32*1-128($np),$Yi,$TEMP2
1061 vpmuludq 32*2-128($np),$Yi,$TEMP0
1063 vpmuludq 32*3-128($np),$Yi,$TEMP1
1065 vpmuludq 32*4-128($np),$Yi,$TEMP2
1067 vpmuludq 32*5-128($np),$Yi,$TEMP0
1069 vpmuludq 32*6-128($np),$Yi,$TEMP1
1071 vpmuludq 32*7-128($np),$Yi,$TEMP2
1074 vpmuludq 32*8-128($np),$Yi,$TEMP0
1124 imulq -128($np),%rax
1126 vmovdqu -8+32*1-128($np),$TEMP0
1128 imulq 8-128($np),%rax
1130 vmovdqu -8+32*2-128($np),$TEMP1
1132 imulq 16-128($np),%rdx
1138 vmovdqu -8+32*3-128($np),$TEMP2
1141 vmovdqu -8+32*4-128($np),$TEMP0
1144 vmovdqu -8+32*5-128($np),$TEMP1
1147 vmovdqu -8+32*6-128($np),$TEMP2
1150 vmovdqu -8+32*7-128($np),$TEMP0
1153 vmovdqu -8+32*8-128($np),$TEMP1
1156 vmovdqu -8+32*9-128($np),$TEMP2
1204 vmovdqu -16+32*1-128($np),$TEMP0
1206 imulq -128($np),%rax
1208 vmovdqu -16+32*2-128($np),$TEMP1
1209 imulq 8-128($np),%rdx
1215 vmovdqu -16+32*3-128($np),$TEMP2
1218 vmovdqu -16+32*4-128($np),$TEMP0
1221 vmovdqu -16+32*5-128($np),$TEMP1
1224 vmovdqu -16+32*6-128($np),$TEMP2
1227 vmovdqu -16+32*7-128($np),$TEMP0
1230 vmovdqu -16+32*8-128($np),$TEMP1
1233 vmovdqu -16+32*9-128($np),$TEMP2
1280 vmovdqu -24+32*1-128($np),$TEMP0
1281 imulq -128($np),%rax
1285 vmovdqu -24+32*2-128($np),$TEMP1
1288 vmovdqu -24+32*3-128($np),$TEMP2
1293 vmovdqu -24+32*4-128($np),$TEMP0
1295 vmovdqu -24+32*5-128($np),$TEMP1
1298 vmovdqu -24+32*6-128($np),$TEMP2
1301 vmovdqu -24+32*7-128($np),$TEMP0
1304 vmovdqu -24+32*8-128($np),$TEMP1
1307 vmovdqu -24+32*9-128($np),$TEMP2