Lines Matching refs:tptr
1029 lea ($tp,$num),%rbx # tptr in .sqr4x_sub
1096 my ($i,$j,$tptr)=("%rbp","%rcx",$rptr);
1328 lea 48+8(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num]
1330 lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"]
1338 mov $A0[0],-24($tptr,$i) # t[1]
1344 mov $A0[1],-16($tptr,$i) # t[2]
1362 mov $A0[0],-8($tptr,$j) # t[3]
1387 mov $A0[1],($tptr,$j) # t[4]
1403 mov $A0[0],8($tptr,$j) # t[5]
1420 mov $A0[1],16($tptr,$j) # t[6]
1432 mov $A0[0],-8($tptr,$j) # t[7]
1444 mov $A1[1],($tptr) # t[8]
1446 mov %rdx,8($tptr) # t[9]
1452 lea 48+8(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num]
1454 lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"]
1459 mov -24($tptr,$i),$A0[0] # t[1]
1463 mov $A0[0],-24($tptr,$i) # t[1]
1470 add -16($tptr,$i),$A0[1] # a[2]*a[0]+t[2]
1473 mov $A0[1],-16($tptr,$i) # t[2]
1482 add -8($tptr,$i),$A1[0]
1493 mov $A0[0],-8($tptr,$i) # t[3]
1506 add ($tptr,$j),$A1[1]
1521 mov $A0[1],($tptr,$j) # t[4]
1525 add 8($tptr,$j),$A1[0]
1536 mov $A0[0],-8($tptr,$j) # t[5], "preloaded t[1]" below
1548 mov $A1[1],($tptr) # t[6], "preloaded t[2]" below
1550 mov %rdx,8($tptr) # t[7], "preloaded t[3]" below
1557 lea 48+8(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num]
1559 lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"]
1572 mov $A0[0],-24($tptr) # t[1]
1582 mov $A0[1],-16($tptr) # t[2]
1593 mov $A0[0],-8($tptr) # t[3]
1602 mov $A1[1],($tptr) # t[4]
1604 mov %rdx,8($tptr) # t[5]
1619 mov %rax,8($tptr) # t[5]
1620 mov %rdx,16($tptr) # t[6]
1621 mov $carry,24($tptr) # t[7]
1624 lea 48+8(%rsp),$tptr
1626 mov 8($tptr),$A0[1] # t[1]
1633 mov 16($tptr),$A0[0] # t[2*i+2] # prefetch
1637 mov 24($tptr),$A0[1] # t[2*i+2+1] # prefetch
1640 mov $S[0],($tptr)
1644 mov $S[1],8($tptr)
1650 mov 32($tptr),$A0[0] # t[2*i+2] # prefetch
1654 mov 40($tptr),$A0[1] # t[2*i+2+1] # prefetch
1657 mov $S[2],16($tptr)
1660 mov $S[3],24($tptr)
1662 lea 64($tptr),$tptr
1672 mov -16($tptr),$A0[0] # t[2*i+2] # prefetch
1676 mov -8($tptr),$A0[1] # t[2*i+2+1] # prefetch
1679 mov $S[0],-32($tptr)
1683 mov $S[1],-24($tptr)
1689 mov 0($tptr),$A0[0] # t[2*i+2] # prefetch
1693 mov 8($tptr),$A0[1] # t[2*i+2+1] # prefetch
1696 mov $S[2],-16($tptr)
1700 mov $S[3],-8($tptr)
1706 mov 16($tptr),$A0[0] # t[2*i+2] # prefetch
1710 mov 24($tptr),$A0[1] # t[2*i+2+1] # prefetch
1713 mov $S[0],0($tptr)
1717 mov $S[1],8($tptr)
1723 mov 32($tptr),$A0[0] # t[2*i+2] # prefetch
1727 mov 40($tptr),$A0[1] # t[2*i+2+1] # prefetch
1730 mov $S[2],16($tptr)
1732 mov $S[3],24($tptr)
1734 lea 64($tptr),$tptr
1744 mov -16($tptr),$A0[0] # t[2*i+2] # prefetch
1748 mov -8($tptr),$A0[1] # t[2*i+2+1] # prefetch
1751 mov $S[0],-32($tptr)
1755 mov $S[1],-24($tptr)
1765 mov $S[2],-16($tptr)
1766 mov $S[3],-8($tptr)
1777 my ($nptr,$tptr,$carry,$m0)=("%rbp","%rdi","%rsi","%rbx");
1786 lea 48+8(%rsp,$num),$tptr # end of initial t[] window
1793 lea ($tptr,$num),$tptr # start of current t[] window
1795 mov 8*0($tptr),$m0
1796 mov 8*1($tptr),%r9
1797 mov 8*2($tptr),%r10
1798 mov 8*3($tptr),%r11
1799 mov 8*4($tptr),%r12
1800 mov 8*5($tptr),%r13
1801 mov 8*6($tptr),%r14
1802 mov 8*7($tptr),%r15
1804 lea 8*8($tptr),$tptr
1891 add 8*0($tptr),%r8
1892 adc 8*1($tptr),%r9
1893 adc 8*2($tptr),%r10
1894 adc 8*3($tptr),%r11
1895 adc 8*4($tptr),%r12
1896 adc 8*5($tptr),%r13
1897 adc 8*6($tptr),%r14
1898 adc 8*7($tptr),%r15
1911 mov %r8,($tptr) # save result
1920 lea 8($tptr),$tptr # $tptr++
1984 adc 8*0($tptr),%r8
1985 adc 8*1($tptr),%r9
1986 adc 8*2($tptr),%r10
1987 adc 8*3($tptr),%r11
1988 adc 8*4($tptr),%r12
1989 adc 8*5($tptr),%r13
1990 adc 8*6($tptr),%r14
1991 adc 8*7($tptr),%r15
2012 adc 8*0($tptr),%r8
2013 adc 8*1($tptr),%r9
2014 adc 8*2($tptr),%r10
2015 adc 8*3($tptr),%r11
2016 adc 8*4($tptr),%r12
2017 adc 8*5($tptr),%r13
2018 adc 8*6($tptr),%r14
2019 adc 8*7($tptr),%r15
2026 mov %r8,8*0($tptr) # store top 512 bits
2027 mov %r9,8*1($tptr)
2029 mov %r10,8*2($tptr)
2030 mov %r11,8*3($tptr)
2031 mov %r12,8*4($tptr)
2032 mov %r13,8*5($tptr)
2033 mov %r14,8*6($tptr)
2034 mov %r15,8*7($tptr)
2035 lea 8*8($tptr),$tptr
2037 cmp %rdx,$tptr # end of t[]?
2049 my ($tptr,$nptr)=("%rbx","%rbp");
2056 lea (%rdi,$num),$tptr # %rdi was $tptr above
2087 adc 8*0($tptr),%r12
2088 adc 8*1($tptr),%r13
2089 adc 8*2($tptr),%r14
2090 adc 8*3($tptr),%r15
2092 lea 8*4($tptr),$tptr
2245 my ($aptr, $bptr, $nptr, $tptr, $mi, $bi, $zero, $num)=
2336 lea 64+8*4+8(%rsp),$tptr
2366 mov %r10,-8*4($tptr)
2371 mov %r11,-8*3($tptr)
2375 mov %r12,-8*2($tptr)
2393 lea 4*8($tptr),$tptr
2403 mov %r10,-5*8($tptr)
2405 mov %r11,-4*8($tptr)
2409 mov %r12,-3*8($tptr)
2413 mov %r13,-2*8($tptr)
2424 mov %r14,-1*8($tptr)
2429 lea 16-256($tptr),%r10 # where 256-byte mask is (+density control)
2457 mov $zero,($tptr) # save top-most carry
2458 lea 4*8($tptr,$num),$tptr # rewind $tptr
2463 adox -4*8($tptr),$mi # +t[0]
2466 adox -3*8($tptr),%r11
2469 adox -2*8($tptr),%r12
2473 adox -1*8($tptr),%r13
2496 mov %r10,-8*4($tptr)
2498 mov %r11,-8*3($tptr)
2500 mov %r12,-8*2($tptr)
2510 adcx 0*8($tptr),%r10
2513 adcx 1*8($tptr),%r11
2517 adcx 2*8($tptr),%r12
2519 adcx 3*8($tptr),%r13
2522 lea 4*8($tptr),$tptr
2533 mov %r10,-5*8($tptr)
2536 mov %r11,-4*8($tptr)
2540 mov %r12,-3*8($tptr)
2543 mov %r13,-2*8($tptr)
2550 sub 0*8($tptr),$bptr # pull top-most carry to %cf
2556 mov %r14,-1*8($tptr)
2566 lea ($tptr,$num),%rdi # rewind $tptr
2596 my ($i,$j,$tptr)=("%rbp","%rcx",$rptr);
2789 lea 48+8(%rsp),$tptr
2799 movdqa %xmm0,0*8($tptr)
2800 movdqa %xmm0,2*8($tptr)
2801 movdqa %xmm0,4*8($tptr)
2802 movdqa %xmm0,6*8($tptr)
2804 movdqa %xmm0,8*8($tptr)
2805 movdqa %xmm0,10*8($tptr)
2806 movdqa %xmm0,12*8($tptr)
2807 movdqa %xmm0,14*8($tptr)
2808 lea 16*8($tptr),$tptr
2820 lea 48+8(%rsp),$tptr
2848 adc 8*8($tptr),%r15
2849 mov %r8,1*8($tptr) # t[1]
2850 mov %r9,2*8($tptr) # t[2]
2876 mov %r8,3*8($tptr) # t[3]
2877 mov %r9,4*8($tptr) # t[4]
2895 mov %r8,5*8($tptr) # t[5]
2896 mov %r9,6*8($tptr) # t[6]
2917 mov %r8,7*8($tptr) # t[7]
2918 mov %r9,8*8($tptr) # t[8]
2953 mov 8*8($tptr),%r8
2954 adcx 9*8($tptr),%r9 # +=t[9]
2955 adcx 10*8($tptr),%r10 # ...
2956 adcx 11*8($tptr),%r11
2957 adc 12*8($tptr),%r12
2958 adc 13*8($tptr),%r13
2959 adc 14*8($tptr),%r14
2960 adc 15*8($tptr),%r15
2962 lea 2*64($tptr),$tptr
2967 mov $tptr,24+8(%rsp)
2969 #lea 8*8($tptr),$tptr # see 2*8*8($tptr) above
3001 mov %rbx,($tptr,%rcx,8) # store t[8+i]
3024 adcx 0*8($tptr),%r8
3025 adcx 1*8($tptr),%r9
3026 adc 2*8($tptr),%r10
3027 adc 3*8($tptr),%r11
3028 adc 4*8($tptr),%r12
3029 adc 5*8($tptr),%r13
3030 adc 6*8($tptr),%r14
3031 adc 7*8($tptr),%r15
3032 lea 8*8($tptr),$tptr
3044 mov 24+8(%rsp),$carry # initial $tptr, borrow $carry
3048 mov %r8,0*8($tptr)
3054 cmp $carry,$tptr # cf=0, of=0
3057 mov %r9,1*8($tptr)
3059 mov %r10,2*8($tptr)
3061 mov %r11,3*8($tptr)
3063 mov %r12,4*8($tptr)
3065 mov %r13,5*8($tptr)
3067 mov %r14,6*8($tptr)
3069 mov %r15,7*8($tptr)
3071 mov $carry,$tptr
3076 mov %r9,9*8($tptr) # t[9]
3078 mov %r10,10*8($tptr) # ...
3079 mov %r11,11*8($tptr)
3080 mov %r12,12*8($tptr)
3081 mov %r13,13*8($tptr)
3082 mov %r14,14*8($tptr)
3088 lea 48+8(%rsp),$tptr
3091 mov 8($tptr),$A0[1] # t[1]
3095 mov 16($tptr),$A1[0] # t[2] # prefetch
3096 mov 24($tptr),$A1[1] # t[3] # prefetch
3105 .byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 # mov 32($tptr),$A0[0] # t[2*i+4] # prefetch
3108 mov 40($tptr),$A0[1] # t[2*i+4+1] # prefetch
3109 mov %rax,0($tptr)
3110 mov %rbx,8($tptr)
3116 mov 48($tptr),$A1[0] # t[2*i+6] # prefetch
3119 mov 56($tptr),$A1[1] # t[2*i+6+1] # prefetch
3120 mov %rax,16($tptr)
3121 mov %rbx,24($tptr)
3128 mov 64($tptr),$A0[0] # t[2*i+8] # prefetch
3131 mov 72($tptr),$A0[1] # t[2*i+8+1] # prefetch
3132 mov %rax,32($tptr)
3133 mov %rbx,40($tptr)
3142 mov 80($tptr),$A1[0] # t[2*i+10] # prefetch
3143 mov 88($tptr),$A1[1] # t[2*i+10+1] # prefetch
3144 mov %rax,48($tptr)
3145 mov %rbx,56($tptr)
3146 lea 64($tptr),$tptr
3153 mov %rax,48($tptr)
3154 mov %rbx,56($tptr)
3155 lea 64($tptr),$tptr # end of t[] buffer
3173 mov 48+8(%rsp),%rdx # "%r8", 8*0($tptr)
3175 #lea 48+8(%rsp,$num,2),$tptr # end of t[] buffer
3177 mov $tptr,8+8(%rsp) # save end of t[]
3179 lea 48+8(%rsp),$tptr # initial t[] window
3184 mov 8*1($tptr),%r9
3185 mov 8*2($tptr),%r10
3186 mov 8*3($tptr),%r11
3187 mov 8*4($tptr),%r12
3190 mov 8*5($tptr),%r13
3191 mov 8*6($tptr),%r14
3192 mov 8*7($tptr),%r15
3195 lea 8*8($tptr),$tptr
3252 add 8*0($tptr),%r8
3255 adcx 8*1($tptr),%r9
3256 adcx 8*2($tptr),%r10
3257 adc 8*3($tptr),%r11
3258 adc 8*4($tptr),%r12
3259 adc 8*5($tptr),%r13
3260 adc 8*6($tptr),%r14
3261 adc 8*7($tptr),%r15
3262 lea 8*8($tptr),$tptr
3304 mov %rbx,($tptr,%rcx,8) # save result
3317 adc 8*0($tptr),%r8
3318 adc 8*1($tptr),%r9
3319 adc 8*2($tptr),%r10
3320 adc 8*3($tptr),%r11
3321 adc 8*4($tptr),%r12
3322 adc 8*5($tptr),%r13
3323 adc 8*6($tptr),%r14
3324 adc 8*7($tptr),%r15
3325 lea 8*8($tptr),$tptr
3348 adc 8*0($tptr),%r8
3350 adc 8*1($tptr),%r9
3353 adc 8*2($tptr),%r10
3354 adc 8*3($tptr),%r11
3355 adc 8*4($tptr),%r12
3356 adc 8*5($tptr),%r13
3357 adc 8*6($tptr),%r14
3358 adc 8*7($tptr),%r15
3362 mov 8*8($tptr,%rcx),%rdx # modulo-scheduled "%r8"
3364 mov %r8,8*0($tptr) # store top 512 bits
3365 lea 8*8($tptr),%r8 # borrow %r8
3366 mov %r9,8*1($tptr)
3367 mov %r10,8*2($tptr)
3368 mov %r11,8*3($tptr)
3369 mov %r12,8*4($tptr)
3370 mov %r13,8*5($tptr)
3371 mov %r14,8*6($tptr)
3372 mov %r15,8*7($tptr)
3374 lea 8*8($tptr,%rcx),$tptr # start of current t[] window
3397 #lea 48+8(%rsp,%r9),$tptr
3421 adc 8*0($tptr),%r12
3422 adc 8*1($tptr),%r13
3423 adc 8*2($tptr),%r14
3424 adc 8*3($tptr),%r15
3426 lea 8*4($tptr),$tptr