1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2005-2021 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci 10e1051a39Sopenharmony_ci# ==================================================================== 11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 15e1051a39Sopenharmony_ci# ==================================================================== 16e1051a39Sopenharmony_ci 17e1051a39Sopenharmony_ci# December 2005 18e1051a39Sopenharmony_ci# 19e1051a39Sopenharmony_ci# Pure SPARCv9/8+ and IALU-only bn_mul_mont implementation. The reasons 20e1051a39Sopenharmony_ci# for undertaken effort are multiple. First of all, UltraSPARC is not 21e1051a39Sopenharmony_ci# the whole SPARCv9 universe and other VIS-free implementations deserve 22e1051a39Sopenharmony_ci# optimized code as much. Secondly, newly introduced UltraSPARC T1, 23e1051a39Sopenharmony_ci# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive paths, 24e1051a39Sopenharmony_ci# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with 25e1051a39Sopenharmony_ci# several integrated RSA/DSA accelerator circuits accessible through 26e1051a39Sopenharmony_ci# kernel driver [only(*)], but having decent user-land software 27e1051a39Sopenharmony_ci# implementation is important too. Finally, reasons like desire to 28e1051a39Sopenharmony_ci# experiment with dedicated squaring procedure. Yes, this module 29e1051a39Sopenharmony_ci# implements one, because it was easiest to draft it in SPARCv9 30e1051a39Sopenharmony_ci# instructions... 31e1051a39Sopenharmony_ci 32e1051a39Sopenharmony_ci# (*) Engine accessing the driver in question is on my TODO list. 33e1051a39Sopenharmony_ci# For reference, accelerator is estimated to give 6 to 10 times 34e1051a39Sopenharmony_ci# improvement on single-threaded RSA sign. It should be noted 35e1051a39Sopenharmony_ci# that 6-10x improvement coefficient does not actually mean 36e1051a39Sopenharmony_ci# something extraordinary in terms of absolute [single-threaded] 37e1051a39Sopenharmony_ci# performance, as SPARCv9 instruction set is by all means least 38e1051a39Sopenharmony_ci# suitable for high performance crypto among other 64 bit 39e1051a39Sopenharmony_ci# platforms. 6-10x factor simply places T1 in same performance 40e1051a39Sopenharmony_ci# domain as say AMD64 and IA-64. Improvement of RSA verify don't 41e1051a39Sopenharmony_ci# appear impressive at all, but it's the sign operation which is 42e1051a39Sopenharmony_ci# far more critical/interesting. 43e1051a39Sopenharmony_ci 44e1051a39Sopenharmony_ci# You might notice that inner loops are modulo-scheduled:-) This has 45e1051a39Sopenharmony_ci# essentially negligible impact on UltraSPARC performance, it's 46e1051a39Sopenharmony_ci# Fujitsu SPARC64 V users who should notice and hopefully appreciate 47e1051a39Sopenharmony_ci# the advantage... Currently this module surpasses sparcv9a-mont.pl 48e1051a39Sopenharmony_ci# by ~20% on UltraSPARC-III and later cores, but recall that sparcv9a 49e1051a39Sopenharmony_ci# module still have hidden potential [see TODO list there], which is 50e1051a39Sopenharmony_ci# estimated to be larger than 20%... 51e1051a39Sopenharmony_ci 52e1051a39Sopenharmony_ci$output = pop and open STDOUT,">$output"; 53e1051a39Sopenharmony_ci 54e1051a39Sopenharmony_ci# int bn_mul_mont( 55e1051a39Sopenharmony_ci$rp="%i0"; # BN_ULONG *rp, 56e1051a39Sopenharmony_ci$ap="%i1"; # const BN_ULONG *ap, 57e1051a39Sopenharmony_ci$bp="%i2"; # const BN_ULONG *bp, 58e1051a39Sopenharmony_ci$np="%i3"; # const BN_ULONG *np, 59e1051a39Sopenharmony_ci$n0="%i4"; # const BN_ULONG *n0, 60e1051a39Sopenharmony_ci$num="%i5"; # int num); 61e1051a39Sopenharmony_ci 62e1051a39Sopenharmony_ci$frame="STACK_FRAME"; 63e1051a39Sopenharmony_ci$bias="STACK_BIAS"; 64e1051a39Sopenharmony_ci 65e1051a39Sopenharmony_ci$car0="%o0"; 66e1051a39Sopenharmony_ci$car1="%o1"; 67e1051a39Sopenharmony_ci$car2="%o2"; # 1 bit 68e1051a39Sopenharmony_ci$acc0="%o3"; 69e1051a39Sopenharmony_ci$acc1="%o4"; 70e1051a39Sopenharmony_ci$mask="%g1"; # 32 bits, what a waste... 71e1051a39Sopenharmony_ci$tmp0="%g4"; 72e1051a39Sopenharmony_ci$tmp1="%g5"; 73e1051a39Sopenharmony_ci 74e1051a39Sopenharmony_ci$i="%l0"; 75e1051a39Sopenharmony_ci$j="%l1"; 76e1051a39Sopenharmony_ci$mul0="%l2"; 77e1051a39Sopenharmony_ci$mul1="%l3"; 78e1051a39Sopenharmony_ci$tp="%l4"; 79e1051a39Sopenharmony_ci$apj="%l5"; 80e1051a39Sopenharmony_ci$npj="%l6"; 81e1051a39Sopenharmony_ci$tpj="%l7"; 82e1051a39Sopenharmony_ci 83e1051a39Sopenharmony_ci$fname="bn_mul_mont_int"; 84e1051a39Sopenharmony_ci 85e1051a39Sopenharmony_ci$code=<<___; 86e1051a39Sopenharmony_ci#ifndef __ASSEMBLER__ 87e1051a39Sopenharmony_ci# define __ASSEMBLER__ 1 88e1051a39Sopenharmony_ci#endif 89e1051a39Sopenharmony_ci#include "crypto/sparc_arch.h" 90e1051a39Sopenharmony_ci 91e1051a39Sopenharmony_ci.section ".text",#alloc,#execinstr 92e1051a39Sopenharmony_ci 93e1051a39Sopenharmony_ci.global $fname 94e1051a39Sopenharmony_ci.align 32 95e1051a39Sopenharmony_ci$fname: 96e1051a39Sopenharmony_ci cmp %o5,4 ! 128 bits minimum 97e1051a39Sopenharmony_ci bge,pt %icc,.Lenter 98e1051a39Sopenharmony_ci sethi %hi(0xffffffff),$mask 99e1051a39Sopenharmony_ci retl 100e1051a39Sopenharmony_ci clr %o0 101e1051a39Sopenharmony_ci.align 32 102e1051a39Sopenharmony_ci.Lenter: 103e1051a39Sopenharmony_ci save %sp,-$frame,%sp 104e1051a39Sopenharmony_ci sll $num,2,$num ! num*=4 105e1051a39Sopenharmony_ci or $mask,%lo(0xffffffff),$mask 106e1051a39Sopenharmony_ci ld [$n0],$n0 107e1051a39Sopenharmony_ci cmp $ap,$bp 108e1051a39Sopenharmony_ci and $num,$mask,$num 109e1051a39Sopenharmony_ci ld [$bp],$mul0 ! bp[0] 110e1051a39Sopenharmony_ci nop 111e1051a39Sopenharmony_ci 112e1051a39Sopenharmony_ci add %sp,$bias,%o7 ! real top of stack 113e1051a39Sopenharmony_ci ld [$ap],$car0 ! ap[0] ! redundant in squaring context 114e1051a39Sopenharmony_ci sub %o7,$num,%o7 115e1051a39Sopenharmony_ci ld [$ap+4],$apj ! ap[1] 116e1051a39Sopenharmony_ci and %o7,-1024,%o7 117e1051a39Sopenharmony_ci ld [$np],$car1 ! np[0] 118e1051a39Sopenharmony_ci sub %o7,$bias,%sp ! alloca 119e1051a39Sopenharmony_ci ld [$np+4],$npj ! np[1] 120e1051a39Sopenharmony_ci be,pt SIZE_T_CC,.Lbn_sqr_mont 121e1051a39Sopenharmony_ci mov 12,$j 122e1051a39Sopenharmony_ci 123e1051a39Sopenharmony_ci mulx $car0,$mul0,$car0 ! ap[0]*bp[0] 124e1051a39Sopenharmony_ci mulx $apj,$mul0,$tmp0 !prologue! ap[1]*bp[0] 125e1051a39Sopenharmony_ci and $car0,$mask,$acc0 126e1051a39Sopenharmony_ci add %sp,$bias+$frame,$tp 127e1051a39Sopenharmony_ci ld [$ap+8],$apj !prologue! 128e1051a39Sopenharmony_ci 129e1051a39Sopenharmony_ci mulx $n0,$acc0,$mul1 ! "t[0]"*n0 130e1051a39Sopenharmony_ci and $mul1,$mask,$mul1 131e1051a39Sopenharmony_ci 132e1051a39Sopenharmony_ci mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0 133e1051a39Sopenharmony_ci mulx $npj,$mul1,$acc1 !prologue! np[1]*"t[0]"*n0 134e1051a39Sopenharmony_ci srlx $car0,32,$car0 135e1051a39Sopenharmony_ci add $acc0,$car1,$car1 136e1051a39Sopenharmony_ci ld [$np+8],$npj !prologue! 137e1051a39Sopenharmony_ci srlx $car1,32,$car1 138e1051a39Sopenharmony_ci mov $tmp0,$acc0 !prologue! 139e1051a39Sopenharmony_ci 140e1051a39Sopenharmony_ci.L1st: 141e1051a39Sopenharmony_ci mulx $apj,$mul0,$tmp0 142e1051a39Sopenharmony_ci mulx $npj,$mul1,$tmp1 143e1051a39Sopenharmony_ci add $acc0,$car0,$car0 144e1051a39Sopenharmony_ci ld [$ap+$j],$apj ! ap[j] 145e1051a39Sopenharmony_ci and $car0,$mask,$acc0 146e1051a39Sopenharmony_ci add $acc1,$car1,$car1 147e1051a39Sopenharmony_ci ld [$np+$j],$npj ! np[j] 148e1051a39Sopenharmony_ci srlx $car0,32,$car0 149e1051a39Sopenharmony_ci add $acc0,$car1,$car1 150e1051a39Sopenharmony_ci add $j,4,$j ! j++ 151e1051a39Sopenharmony_ci mov $tmp0,$acc0 152e1051a39Sopenharmony_ci st $car1,[$tp] 153e1051a39Sopenharmony_ci cmp $j,$num 154e1051a39Sopenharmony_ci mov $tmp1,$acc1 155e1051a39Sopenharmony_ci srlx $car1,32,$car1 156e1051a39Sopenharmony_ci bl %icc,.L1st 157e1051a39Sopenharmony_ci add $tp,4,$tp ! tp++ 158e1051a39Sopenharmony_ci!.L1st 159e1051a39Sopenharmony_ci 160e1051a39Sopenharmony_ci mulx $apj,$mul0,$tmp0 !epilogue! 161e1051a39Sopenharmony_ci mulx $npj,$mul1,$tmp1 162e1051a39Sopenharmony_ci add $acc0,$car0,$car0 163e1051a39Sopenharmony_ci and $car0,$mask,$acc0 164e1051a39Sopenharmony_ci add $acc1,$car1,$car1 165e1051a39Sopenharmony_ci srlx $car0,32,$car0 166e1051a39Sopenharmony_ci add $acc0,$car1,$car1 167e1051a39Sopenharmony_ci st $car1,[$tp] 168e1051a39Sopenharmony_ci srlx $car1,32,$car1 169e1051a39Sopenharmony_ci 170e1051a39Sopenharmony_ci add $tmp0,$car0,$car0 171e1051a39Sopenharmony_ci and $car0,$mask,$acc0 172e1051a39Sopenharmony_ci add $tmp1,$car1,$car1 173e1051a39Sopenharmony_ci srlx $car0,32,$car0 174e1051a39Sopenharmony_ci add $acc0,$car1,$car1 175e1051a39Sopenharmony_ci st $car1,[$tp+4] 176e1051a39Sopenharmony_ci srlx $car1,32,$car1 177e1051a39Sopenharmony_ci 178e1051a39Sopenharmony_ci add $car0,$car1,$car1 179e1051a39Sopenharmony_ci st $car1,[$tp+8] 180e1051a39Sopenharmony_ci srlx $car1,32,$car2 181e1051a39Sopenharmony_ci 182e1051a39Sopenharmony_ci mov 4,$i ! i++ 183e1051a39Sopenharmony_ci ld [$bp+4],$mul0 ! bp[1] 184e1051a39Sopenharmony_ci.Louter: 185e1051a39Sopenharmony_ci add %sp,$bias+$frame,$tp 186e1051a39Sopenharmony_ci ld [$ap],$car0 ! ap[0] 187e1051a39Sopenharmony_ci ld [$ap+4],$apj ! ap[1] 188e1051a39Sopenharmony_ci ld [$np],$car1 ! np[0] 189e1051a39Sopenharmony_ci ld [$np+4],$npj ! np[1] 190e1051a39Sopenharmony_ci ld [$tp],$tmp1 ! tp[0] 191e1051a39Sopenharmony_ci ld [$tp+4],$tpj ! tp[1] 192e1051a39Sopenharmony_ci mov 12,$j 193e1051a39Sopenharmony_ci 194e1051a39Sopenharmony_ci mulx $car0,$mul0,$car0 195e1051a39Sopenharmony_ci mulx $apj,$mul0,$tmp0 !prologue! 196e1051a39Sopenharmony_ci add $tmp1,$car0,$car0 197e1051a39Sopenharmony_ci ld [$ap+8],$apj !prologue! 198e1051a39Sopenharmony_ci and $car0,$mask,$acc0 199e1051a39Sopenharmony_ci 200e1051a39Sopenharmony_ci mulx $n0,$acc0,$mul1 201e1051a39Sopenharmony_ci and $mul1,$mask,$mul1 202e1051a39Sopenharmony_ci 203e1051a39Sopenharmony_ci mulx $car1,$mul1,$car1 204e1051a39Sopenharmony_ci mulx $npj,$mul1,$acc1 !prologue! 205e1051a39Sopenharmony_ci srlx $car0,32,$car0 206e1051a39Sopenharmony_ci add $acc0,$car1,$car1 207e1051a39Sopenharmony_ci ld [$np+8],$npj !prologue! 208e1051a39Sopenharmony_ci srlx $car1,32,$car1 209e1051a39Sopenharmony_ci mov $tmp0,$acc0 !prologue! 210e1051a39Sopenharmony_ci 211e1051a39Sopenharmony_ci.Linner: 212e1051a39Sopenharmony_ci mulx $apj,$mul0,$tmp0 213e1051a39Sopenharmony_ci mulx $npj,$mul1,$tmp1 214e1051a39Sopenharmony_ci add $tpj,$car0,$car0 215e1051a39Sopenharmony_ci ld [$ap+$j],$apj ! ap[j] 216e1051a39Sopenharmony_ci add $acc0,$car0,$car0 217e1051a39Sopenharmony_ci add $acc1,$car1,$car1 218e1051a39Sopenharmony_ci ld [$np+$j],$npj ! np[j] 219e1051a39Sopenharmony_ci and $car0,$mask,$acc0 220e1051a39Sopenharmony_ci ld [$tp+8],$tpj ! tp[j] 221e1051a39Sopenharmony_ci srlx $car0,32,$car0 222e1051a39Sopenharmony_ci add $acc0,$car1,$car1 223e1051a39Sopenharmony_ci add $j,4,$j ! j++ 224e1051a39Sopenharmony_ci mov $tmp0,$acc0 225e1051a39Sopenharmony_ci st $car1,[$tp] ! tp[j-1] 226e1051a39Sopenharmony_ci srlx $car1,32,$car1 227e1051a39Sopenharmony_ci mov $tmp1,$acc1 228e1051a39Sopenharmony_ci cmp $j,$num 229e1051a39Sopenharmony_ci bl %icc,.Linner 230e1051a39Sopenharmony_ci add $tp,4,$tp ! tp++ 231e1051a39Sopenharmony_ci!.Linner 232e1051a39Sopenharmony_ci 233e1051a39Sopenharmony_ci mulx $apj,$mul0,$tmp0 !epilogue! 234e1051a39Sopenharmony_ci mulx $npj,$mul1,$tmp1 235e1051a39Sopenharmony_ci add $tpj,$car0,$car0 236e1051a39Sopenharmony_ci add $acc0,$car0,$car0 237e1051a39Sopenharmony_ci ld [$tp+8],$tpj ! tp[j] 238e1051a39Sopenharmony_ci and $car0,$mask,$acc0 239e1051a39Sopenharmony_ci add $acc1,$car1,$car1 240e1051a39Sopenharmony_ci srlx $car0,32,$car0 241e1051a39Sopenharmony_ci add $acc0,$car1,$car1 242e1051a39Sopenharmony_ci st $car1,[$tp] ! tp[j-1] 243e1051a39Sopenharmony_ci srlx $car1,32,$car1 244e1051a39Sopenharmony_ci 245e1051a39Sopenharmony_ci add $tpj,$car0,$car0 246e1051a39Sopenharmony_ci add $tmp0,$car0,$car0 247e1051a39Sopenharmony_ci and $car0,$mask,$acc0 248e1051a39Sopenharmony_ci add $tmp1,$car1,$car1 249e1051a39Sopenharmony_ci add $acc0,$car1,$car1 250e1051a39Sopenharmony_ci st $car1,[$tp+4] ! tp[j-1] 251e1051a39Sopenharmony_ci srlx $car0,32,$car0 252e1051a39Sopenharmony_ci add $i,4,$i ! i++ 253e1051a39Sopenharmony_ci srlx $car1,32,$car1 254e1051a39Sopenharmony_ci 255e1051a39Sopenharmony_ci add $car0,$car1,$car1 256e1051a39Sopenharmony_ci cmp $i,$num 257e1051a39Sopenharmony_ci add $car2,$car1,$car1 258e1051a39Sopenharmony_ci st $car1,[$tp+8] 259e1051a39Sopenharmony_ci 260e1051a39Sopenharmony_ci srlx $car1,32,$car2 261e1051a39Sopenharmony_ci bl,a %icc,.Louter 262e1051a39Sopenharmony_ci ld [$bp+$i],$mul0 ! bp[i] 263e1051a39Sopenharmony_ci!.Louter 264e1051a39Sopenharmony_ci 265e1051a39Sopenharmony_ci add $tp,12,$tp 266e1051a39Sopenharmony_ci 267e1051a39Sopenharmony_ci.Ltail: 268e1051a39Sopenharmony_ci add $np,$num,$np 269e1051a39Sopenharmony_ci add $rp,$num,$rp 270e1051a39Sopenharmony_ci sub %g0,$num,%o7 ! k=-num 271e1051a39Sopenharmony_ci ba .Lsub 272e1051a39Sopenharmony_ci subcc %g0,%g0,%g0 ! clear %icc.c 273e1051a39Sopenharmony_ci.align 16 274e1051a39Sopenharmony_ci.Lsub: 275e1051a39Sopenharmony_ci ld [$tp+%o7],%o0 276e1051a39Sopenharmony_ci ld [$np+%o7],%o1 277e1051a39Sopenharmony_ci subccc %o0,%o1,%o1 ! tp[j]-np[j] 278e1051a39Sopenharmony_ci add $rp,%o7,$i 279e1051a39Sopenharmony_ci add %o7,4,%o7 280e1051a39Sopenharmony_ci brnz %o7,.Lsub 281e1051a39Sopenharmony_ci st %o1,[$i] 282e1051a39Sopenharmony_ci subccc $car2,0,$car2 ! handle upmost overflow bit 283e1051a39Sopenharmony_ci sub %g0,$num,%o7 284e1051a39Sopenharmony_ci 285e1051a39Sopenharmony_ci.Lcopy: 286e1051a39Sopenharmony_ci ld [$tp+%o7],%o1 ! conditional copy 287e1051a39Sopenharmony_ci ld [$rp+%o7],%o0 288e1051a39Sopenharmony_ci st %g0,[$tp+%o7] ! zap tp 289e1051a39Sopenharmony_ci movcs %icc,%o1,%o0 290e1051a39Sopenharmony_ci st %o0,[$rp+%o7] 291e1051a39Sopenharmony_ci add %o7,4,%o7 292e1051a39Sopenharmony_ci brnz %o7,.Lcopy 293e1051a39Sopenharmony_ci nop 294e1051a39Sopenharmony_ci mov 1,%i0 295e1051a39Sopenharmony_ci ret 296e1051a39Sopenharmony_ci restore 297e1051a39Sopenharmony_ci___ 298e1051a39Sopenharmony_ci 299e1051a39Sopenharmony_ci######## 300e1051a39Sopenharmony_ci######## .Lbn_sqr_mont gives up to 20% *overall* improvement over 301e1051a39Sopenharmony_ci######## code without following dedicated squaring procedure. 302e1051a39Sopenharmony_ci######## 303e1051a39Sopenharmony_ci$sbit="%o5"; 304e1051a39Sopenharmony_ci 305e1051a39Sopenharmony_ci$code.=<<___; 306e1051a39Sopenharmony_ci.align 32 307e1051a39Sopenharmony_ci.Lbn_sqr_mont: 308e1051a39Sopenharmony_ci mulx $mul0,$mul0,$car0 ! ap[0]*ap[0] 309e1051a39Sopenharmony_ci mulx $apj,$mul0,$tmp0 !prologue! 310e1051a39Sopenharmony_ci and $car0,$mask,$acc0 311e1051a39Sopenharmony_ci add %sp,$bias+$frame,$tp 312e1051a39Sopenharmony_ci ld [$ap+8],$apj !prologue! 313e1051a39Sopenharmony_ci 314e1051a39Sopenharmony_ci mulx $n0,$acc0,$mul1 ! "t[0]"*n0 315e1051a39Sopenharmony_ci srlx $car0,32,$car0 316e1051a39Sopenharmony_ci and $mul1,$mask,$mul1 317e1051a39Sopenharmony_ci 318e1051a39Sopenharmony_ci mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0 319e1051a39Sopenharmony_ci mulx $npj,$mul1,$acc1 !prologue! 320e1051a39Sopenharmony_ci and $car0,1,$sbit 321e1051a39Sopenharmony_ci ld [$np+8],$npj !prologue! 322e1051a39Sopenharmony_ci srlx $car0,1,$car0 323e1051a39Sopenharmony_ci add $acc0,$car1,$car1 324e1051a39Sopenharmony_ci srlx $car1,32,$car1 325e1051a39Sopenharmony_ci mov $tmp0,$acc0 !prologue! 326e1051a39Sopenharmony_ci 327e1051a39Sopenharmony_ci.Lsqr_1st: 328e1051a39Sopenharmony_ci mulx $apj,$mul0,$tmp0 329e1051a39Sopenharmony_ci mulx $npj,$mul1,$tmp1 330e1051a39Sopenharmony_ci add $acc0,$car0,$car0 ! ap[j]*a0+c0 331e1051a39Sopenharmony_ci add $acc1,$car1,$car1 332e1051a39Sopenharmony_ci ld [$ap+$j],$apj ! ap[j] 333e1051a39Sopenharmony_ci and $car0,$mask,$acc0 334e1051a39Sopenharmony_ci ld [$np+$j],$npj ! np[j] 335e1051a39Sopenharmony_ci srlx $car0,32,$car0 336e1051a39Sopenharmony_ci add $acc0,$acc0,$acc0 337e1051a39Sopenharmony_ci or $sbit,$acc0,$acc0 338e1051a39Sopenharmony_ci mov $tmp1,$acc1 339e1051a39Sopenharmony_ci srlx $acc0,32,$sbit 340e1051a39Sopenharmony_ci add $j,4,$j ! j++ 341e1051a39Sopenharmony_ci and $acc0,$mask,$acc0 342e1051a39Sopenharmony_ci cmp $j,$num 343e1051a39Sopenharmony_ci add $acc0,$car1,$car1 344e1051a39Sopenharmony_ci st $car1,[$tp] 345e1051a39Sopenharmony_ci mov $tmp0,$acc0 346e1051a39Sopenharmony_ci srlx $car1,32,$car1 347e1051a39Sopenharmony_ci bl %icc,.Lsqr_1st 348e1051a39Sopenharmony_ci add $tp,4,$tp ! tp++ 349e1051a39Sopenharmony_ci!.Lsqr_1st 350e1051a39Sopenharmony_ci 351e1051a39Sopenharmony_ci mulx $apj,$mul0,$tmp0 ! epilogue 352e1051a39Sopenharmony_ci mulx $npj,$mul1,$tmp1 353e1051a39Sopenharmony_ci add $acc0,$car0,$car0 ! ap[j]*a0+c0 354e1051a39Sopenharmony_ci add $acc1,$car1,$car1 355e1051a39Sopenharmony_ci and $car0,$mask,$acc0 356e1051a39Sopenharmony_ci srlx $car0,32,$car0 357e1051a39Sopenharmony_ci add $acc0,$acc0,$acc0 358e1051a39Sopenharmony_ci or $sbit,$acc0,$acc0 359e1051a39Sopenharmony_ci srlx $acc0,32,$sbit 360e1051a39Sopenharmony_ci and $acc0,$mask,$acc0 361e1051a39Sopenharmony_ci add $acc0,$car1,$car1 362e1051a39Sopenharmony_ci st $car1,[$tp] 363e1051a39Sopenharmony_ci srlx $car1,32,$car1 364e1051a39Sopenharmony_ci 365e1051a39Sopenharmony_ci add $tmp0,$car0,$car0 ! ap[j]*a0+c0 366e1051a39Sopenharmony_ci add $tmp1,$car1,$car1 367e1051a39Sopenharmony_ci and $car0,$mask,$acc0 368e1051a39Sopenharmony_ci srlx $car0,32,$car0 369e1051a39Sopenharmony_ci add $acc0,$acc0,$acc0 370e1051a39Sopenharmony_ci or $sbit,$acc0,$acc0 371e1051a39Sopenharmony_ci srlx $acc0,32,$sbit 372e1051a39Sopenharmony_ci and $acc0,$mask,$acc0 373e1051a39Sopenharmony_ci add $acc0,$car1,$car1 374e1051a39Sopenharmony_ci st $car1,[$tp+4] 375e1051a39Sopenharmony_ci srlx $car1,32,$car1 376e1051a39Sopenharmony_ci 377e1051a39Sopenharmony_ci add $car0,$car0,$car0 378e1051a39Sopenharmony_ci or $sbit,$car0,$car0 379e1051a39Sopenharmony_ci add $car0,$car1,$car1 380e1051a39Sopenharmony_ci st $car1,[$tp+8] 381e1051a39Sopenharmony_ci srlx $car1,32,$car2 382e1051a39Sopenharmony_ci 383e1051a39Sopenharmony_ci ld [%sp+$bias+$frame],$tmp0 ! tp[0] 384e1051a39Sopenharmony_ci ld [%sp+$bias+$frame+4],$tmp1 ! tp[1] 385e1051a39Sopenharmony_ci ld [%sp+$bias+$frame+8],$tpj ! tp[2] 386e1051a39Sopenharmony_ci ld [$ap+4],$mul0 ! ap[1] 387e1051a39Sopenharmony_ci ld [$ap+8],$apj ! ap[2] 388e1051a39Sopenharmony_ci ld [$np],$car1 ! np[0] 389e1051a39Sopenharmony_ci ld [$np+4],$npj ! np[1] 390e1051a39Sopenharmony_ci mulx $n0,$tmp0,$mul1 391e1051a39Sopenharmony_ci 392e1051a39Sopenharmony_ci mulx $mul0,$mul0,$car0 393e1051a39Sopenharmony_ci and $mul1,$mask,$mul1 394e1051a39Sopenharmony_ci 395e1051a39Sopenharmony_ci mulx $car1,$mul1,$car1 396e1051a39Sopenharmony_ci mulx $npj,$mul1,$acc1 397e1051a39Sopenharmony_ci add $tmp0,$car1,$car1 398e1051a39Sopenharmony_ci and $car0,$mask,$acc0 399e1051a39Sopenharmony_ci ld [$np+8],$npj ! np[2] 400e1051a39Sopenharmony_ci srlx $car1,32,$car1 401e1051a39Sopenharmony_ci add $tmp1,$car1,$car1 402e1051a39Sopenharmony_ci srlx $car0,32,$car0 403e1051a39Sopenharmony_ci add $acc0,$car1,$car1 404e1051a39Sopenharmony_ci and $car0,1,$sbit 405e1051a39Sopenharmony_ci add $acc1,$car1,$car1 406e1051a39Sopenharmony_ci srlx $car0,1,$car0 407e1051a39Sopenharmony_ci mov 12,$j 408e1051a39Sopenharmony_ci st $car1,[%sp+$bias+$frame] ! tp[0]= 409e1051a39Sopenharmony_ci srlx $car1,32,$car1 410e1051a39Sopenharmony_ci add %sp,$bias+$frame+4,$tp 411e1051a39Sopenharmony_ci 412e1051a39Sopenharmony_ci.Lsqr_2nd: 413e1051a39Sopenharmony_ci mulx $apj,$mul0,$acc0 414e1051a39Sopenharmony_ci mulx $npj,$mul1,$acc1 415e1051a39Sopenharmony_ci add $acc0,$car0,$car0 416e1051a39Sopenharmony_ci add $tpj,$sbit,$sbit 417e1051a39Sopenharmony_ci ld [$ap+$j],$apj ! ap[j] 418e1051a39Sopenharmony_ci and $car0,$mask,$acc0 419e1051a39Sopenharmony_ci ld [$np+$j],$npj ! np[j] 420e1051a39Sopenharmony_ci srlx $car0,32,$car0 421e1051a39Sopenharmony_ci add $acc1,$car1,$car1 422e1051a39Sopenharmony_ci ld [$tp+8],$tpj ! tp[j] 423e1051a39Sopenharmony_ci add $acc0,$acc0,$acc0 424e1051a39Sopenharmony_ci add $j,4,$j ! j++ 425e1051a39Sopenharmony_ci add $sbit,$acc0,$acc0 426e1051a39Sopenharmony_ci srlx $acc0,32,$sbit 427e1051a39Sopenharmony_ci and $acc0,$mask,$acc0 428e1051a39Sopenharmony_ci cmp $j,$num 429e1051a39Sopenharmony_ci add $acc0,$car1,$car1 430e1051a39Sopenharmony_ci st $car1,[$tp] ! tp[j-1] 431e1051a39Sopenharmony_ci srlx $car1,32,$car1 432e1051a39Sopenharmony_ci bl %icc,.Lsqr_2nd 433e1051a39Sopenharmony_ci add $tp,4,$tp ! tp++ 434e1051a39Sopenharmony_ci!.Lsqr_2nd 435e1051a39Sopenharmony_ci 436e1051a39Sopenharmony_ci mulx $apj,$mul0,$acc0 437e1051a39Sopenharmony_ci mulx $npj,$mul1,$acc1 438e1051a39Sopenharmony_ci add $acc0,$car0,$car0 439e1051a39Sopenharmony_ci add $tpj,$sbit,$sbit 440e1051a39Sopenharmony_ci and $car0,$mask,$acc0 441e1051a39Sopenharmony_ci srlx $car0,32,$car0 442e1051a39Sopenharmony_ci add $acc1,$car1,$car1 443e1051a39Sopenharmony_ci add $acc0,$acc0,$acc0 444e1051a39Sopenharmony_ci add $sbit,$acc0,$acc0 445e1051a39Sopenharmony_ci srlx $acc0,32,$sbit 446e1051a39Sopenharmony_ci and $acc0,$mask,$acc0 447e1051a39Sopenharmony_ci add $acc0,$car1,$car1 448e1051a39Sopenharmony_ci st $car1,[$tp] ! tp[j-1] 449e1051a39Sopenharmony_ci srlx $car1,32,$car1 450e1051a39Sopenharmony_ci 451e1051a39Sopenharmony_ci add $car0,$car0,$car0 452e1051a39Sopenharmony_ci add $sbit,$car0,$car0 453e1051a39Sopenharmony_ci add $car0,$car1,$car1 454e1051a39Sopenharmony_ci add $car2,$car1,$car1 455e1051a39Sopenharmony_ci st $car1,[$tp+4] 456e1051a39Sopenharmony_ci srlx $car1,32,$car2 457e1051a39Sopenharmony_ci 458e1051a39Sopenharmony_ci ld [%sp+$bias+$frame],$tmp1 ! tp[0] 459e1051a39Sopenharmony_ci ld [%sp+$bias+$frame+4],$tpj ! tp[1] 460e1051a39Sopenharmony_ci ld [$ap+8],$mul0 ! ap[2] 461e1051a39Sopenharmony_ci ld [$np],$car1 ! np[0] 462e1051a39Sopenharmony_ci ld [$np+4],$npj ! np[1] 463e1051a39Sopenharmony_ci mulx $n0,$tmp1,$mul1 464e1051a39Sopenharmony_ci and $mul1,$mask,$mul1 465e1051a39Sopenharmony_ci mov 8,$i 466e1051a39Sopenharmony_ci 467e1051a39Sopenharmony_ci mulx $mul0,$mul0,$car0 468e1051a39Sopenharmony_ci mulx $car1,$mul1,$car1 469e1051a39Sopenharmony_ci and $car0,$mask,$acc0 470e1051a39Sopenharmony_ci add $tmp1,$car1,$car1 471e1051a39Sopenharmony_ci srlx $car0,32,$car0 472e1051a39Sopenharmony_ci add %sp,$bias+$frame,$tp 473e1051a39Sopenharmony_ci srlx $car1,32,$car1 474e1051a39Sopenharmony_ci and $car0,1,$sbit 475e1051a39Sopenharmony_ci srlx $car0,1,$car0 476e1051a39Sopenharmony_ci mov 4,$j 477e1051a39Sopenharmony_ci 478e1051a39Sopenharmony_ci.Lsqr_outer: 479e1051a39Sopenharmony_ci.Lsqr_inner1: 480e1051a39Sopenharmony_ci mulx $npj,$mul1,$acc1 481e1051a39Sopenharmony_ci add $tpj,$car1,$car1 482e1051a39Sopenharmony_ci add $j,4,$j 483e1051a39Sopenharmony_ci ld [$tp+8],$tpj 484e1051a39Sopenharmony_ci cmp $j,$i 485e1051a39Sopenharmony_ci add $acc1,$car1,$car1 486e1051a39Sopenharmony_ci ld [$np+$j],$npj 487e1051a39Sopenharmony_ci st $car1,[$tp] 488e1051a39Sopenharmony_ci srlx $car1,32,$car1 489e1051a39Sopenharmony_ci bl %icc,.Lsqr_inner1 490e1051a39Sopenharmony_ci add $tp,4,$tp 491e1051a39Sopenharmony_ci!.Lsqr_inner1 492e1051a39Sopenharmony_ci 493e1051a39Sopenharmony_ci add $j,4,$j 494e1051a39Sopenharmony_ci ld [$ap+$j],$apj ! ap[j] 495e1051a39Sopenharmony_ci mulx $npj,$mul1,$acc1 496e1051a39Sopenharmony_ci add $tpj,$car1,$car1 497e1051a39Sopenharmony_ci ld [$np+$j],$npj ! np[j] 498e1051a39Sopenharmony_ci srlx $car1,32,$tmp0 499e1051a39Sopenharmony_ci and $car1,$mask,$car1 500e1051a39Sopenharmony_ci add $tmp0,$sbit,$sbit 501e1051a39Sopenharmony_ci add $acc0,$car1,$car1 502e1051a39Sopenharmony_ci ld [$tp+8],$tpj ! tp[j] 503e1051a39Sopenharmony_ci add $acc1,$car1,$car1 504e1051a39Sopenharmony_ci st $car1,[$tp] 505e1051a39Sopenharmony_ci srlx $car1,32,$car1 506e1051a39Sopenharmony_ci 507e1051a39Sopenharmony_ci add $j,4,$j 508e1051a39Sopenharmony_ci cmp $j,$num 509e1051a39Sopenharmony_ci be,pn %icc,.Lsqr_no_inner2 510e1051a39Sopenharmony_ci add $tp,4,$tp 511e1051a39Sopenharmony_ci 512e1051a39Sopenharmony_ci.Lsqr_inner2: 513e1051a39Sopenharmony_ci mulx $apj,$mul0,$acc0 514e1051a39Sopenharmony_ci mulx $npj,$mul1,$acc1 515e1051a39Sopenharmony_ci add $tpj,$sbit,$sbit 516e1051a39Sopenharmony_ci add $acc0,$car0,$car0 517e1051a39Sopenharmony_ci ld [$ap+$j],$apj ! ap[j] 518e1051a39Sopenharmony_ci and $car0,$mask,$acc0 519e1051a39Sopenharmony_ci ld [$np+$j],$npj ! np[j] 520e1051a39Sopenharmony_ci srlx $car0,32,$car0 521e1051a39Sopenharmony_ci add $acc0,$acc0,$acc0 522e1051a39Sopenharmony_ci ld [$tp+8],$tpj ! tp[j] 523e1051a39Sopenharmony_ci add $sbit,$acc0,$acc0 524e1051a39Sopenharmony_ci add $j,4,$j ! j++ 525e1051a39Sopenharmony_ci srlx $acc0,32,$sbit 526e1051a39Sopenharmony_ci and $acc0,$mask,$acc0 527e1051a39Sopenharmony_ci cmp $j,$num 528e1051a39Sopenharmony_ci add $acc0,$car1,$car1 529e1051a39Sopenharmony_ci add $acc1,$car1,$car1 530e1051a39Sopenharmony_ci st $car1,[$tp] ! tp[j-1] 531e1051a39Sopenharmony_ci srlx $car1,32,$car1 532e1051a39Sopenharmony_ci bl %icc,.Lsqr_inner2 533e1051a39Sopenharmony_ci add $tp,4,$tp ! tp++ 534e1051a39Sopenharmony_ci 535e1051a39Sopenharmony_ci.Lsqr_no_inner2: 536e1051a39Sopenharmony_ci mulx $apj,$mul0,$acc0 537e1051a39Sopenharmony_ci mulx $npj,$mul1,$acc1 538e1051a39Sopenharmony_ci add $tpj,$sbit,$sbit 539e1051a39Sopenharmony_ci add $acc0,$car0,$car0 540e1051a39Sopenharmony_ci and $car0,$mask,$acc0 541e1051a39Sopenharmony_ci srlx $car0,32,$car0 542e1051a39Sopenharmony_ci add $acc0,$acc0,$acc0 543e1051a39Sopenharmony_ci add $sbit,$acc0,$acc0 544e1051a39Sopenharmony_ci srlx $acc0,32,$sbit 545e1051a39Sopenharmony_ci and $acc0,$mask,$acc0 546e1051a39Sopenharmony_ci add $acc0,$car1,$car1 547e1051a39Sopenharmony_ci add $acc1,$car1,$car1 548e1051a39Sopenharmony_ci st $car1,[$tp] ! tp[j-1] 549e1051a39Sopenharmony_ci srlx $car1,32,$car1 550e1051a39Sopenharmony_ci 551e1051a39Sopenharmony_ci add $car0,$car0,$car0 552e1051a39Sopenharmony_ci add $sbit,$car0,$car0 553e1051a39Sopenharmony_ci add $car0,$car1,$car1 554e1051a39Sopenharmony_ci add $car2,$car1,$car1 555e1051a39Sopenharmony_ci st $car1,[$tp+4] 556e1051a39Sopenharmony_ci srlx $car1,32,$car2 557e1051a39Sopenharmony_ci 558e1051a39Sopenharmony_ci add $i,4,$i ! i++ 559e1051a39Sopenharmony_ci ld [%sp+$bias+$frame],$tmp1 ! tp[0] 560e1051a39Sopenharmony_ci ld [%sp+$bias+$frame+4],$tpj ! tp[1] 561e1051a39Sopenharmony_ci ld [$ap+$i],$mul0 ! ap[j] 562e1051a39Sopenharmony_ci ld [$np],$car1 ! np[0] 563e1051a39Sopenharmony_ci ld [$np+4],$npj ! np[1] 564e1051a39Sopenharmony_ci mulx $n0,$tmp1,$mul1 565e1051a39Sopenharmony_ci and $mul1,$mask,$mul1 566e1051a39Sopenharmony_ci add $i,4,$tmp0 567e1051a39Sopenharmony_ci 568e1051a39Sopenharmony_ci mulx $mul0,$mul0,$car0 569e1051a39Sopenharmony_ci mulx $car1,$mul1,$car1 570e1051a39Sopenharmony_ci and $car0,$mask,$acc0 571e1051a39Sopenharmony_ci add $tmp1,$car1,$car1 572e1051a39Sopenharmony_ci srlx $car0,32,$car0 573e1051a39Sopenharmony_ci add %sp,$bias+$frame,$tp 574e1051a39Sopenharmony_ci srlx $car1,32,$car1 575e1051a39Sopenharmony_ci and $car0,1,$sbit 576e1051a39Sopenharmony_ci srlx $car0,1,$car0 577e1051a39Sopenharmony_ci 578e1051a39Sopenharmony_ci cmp $tmp0,$num ! i<num-1 579e1051a39Sopenharmony_ci bl %icc,.Lsqr_outer 580e1051a39Sopenharmony_ci mov 4,$j 581e1051a39Sopenharmony_ci 582e1051a39Sopenharmony_ci.Lsqr_last: 583e1051a39Sopenharmony_ci mulx $npj,$mul1,$acc1 584e1051a39Sopenharmony_ci add $tpj,$car1,$car1 585e1051a39Sopenharmony_ci add $j,4,$j 586e1051a39Sopenharmony_ci ld [$tp+8],$tpj 587e1051a39Sopenharmony_ci cmp $j,$i 588e1051a39Sopenharmony_ci add $acc1,$car1,$car1 589e1051a39Sopenharmony_ci ld [$np+$j],$npj 590e1051a39Sopenharmony_ci st $car1,[$tp] 591e1051a39Sopenharmony_ci srlx $car1,32,$car1 592e1051a39Sopenharmony_ci bl %icc,.Lsqr_last 593e1051a39Sopenharmony_ci add $tp,4,$tp 594e1051a39Sopenharmony_ci!.Lsqr_last 595e1051a39Sopenharmony_ci 596e1051a39Sopenharmony_ci mulx $npj,$mul1,$acc1 597e1051a39Sopenharmony_ci add $tpj,$acc0,$acc0 598e1051a39Sopenharmony_ci srlx $acc0,32,$tmp0 599e1051a39Sopenharmony_ci and $acc0,$mask,$acc0 600e1051a39Sopenharmony_ci add $tmp0,$sbit,$sbit 601e1051a39Sopenharmony_ci add $acc0,$car1,$car1 602e1051a39Sopenharmony_ci add $acc1,$car1,$car1 603e1051a39Sopenharmony_ci st $car1,[$tp] 604e1051a39Sopenharmony_ci srlx $car1,32,$car1 605e1051a39Sopenharmony_ci 606e1051a39Sopenharmony_ci add $car0,$car0,$car0 ! recover $car0 607e1051a39Sopenharmony_ci add $sbit,$car0,$car0 608e1051a39Sopenharmony_ci add $car0,$car1,$car1 609e1051a39Sopenharmony_ci add $car2,$car1,$car1 610e1051a39Sopenharmony_ci st $car1,[$tp+4] 611e1051a39Sopenharmony_ci srlx $car1,32,$car2 612e1051a39Sopenharmony_ci 613e1051a39Sopenharmony_ci ba .Ltail 614e1051a39Sopenharmony_ci add $tp,8,$tp 615e1051a39Sopenharmony_ci.type $fname,#function 616e1051a39Sopenharmony_ci.size $fname,(.-$fname) 617e1051a39Sopenharmony_ci.asciz "Montgomery Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" 618e1051a39Sopenharmony_ci.align 32 619e1051a39Sopenharmony_ci___ 620e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval($1)/gem; 621e1051a39Sopenharmony_ciprint $code; 622e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 623