1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci# 10e1051a39Sopenharmony_ci# ==================================================================== 11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 15e1051a39Sopenharmony_ci# ==================================================================== 16e1051a39Sopenharmony_ci 17e1051a39Sopenharmony_ci# This module doesn't present direct interest for OpenSSL, because it 18e1051a39Sopenharmony_ci# doesn't provide better performance for longer keys, at least not on 19e1051a39Sopenharmony_ci# in-order-execution cores. While 512-bit RSA sign operations can be 20e1051a39Sopenharmony_ci# 65% faster in 64-bit mode, 1024-bit ones are only 15% faster, and 21e1051a39Sopenharmony_ci# 4096-bit ones are up to 15% slower. In 32-bit mode it varies from 22e1051a39Sopenharmony_ci# 16% improvement for 512-bit RSA sign to -33% for 4096-bit RSA 23e1051a39Sopenharmony_ci# verify:-( All comparisons are against bn_mul_mont-free assembler. 24e1051a39Sopenharmony_ci# The module might be of interest to embedded system developers, as 25e1051a39Sopenharmony_ci# the code is smaller than 1KB, yet offers >3x improvement on MIPS64 26e1051a39Sopenharmony_ci# and 75-30% [less for longer keys] on MIPS32 over compiler-generated 27e1051a39Sopenharmony_ci# code. 28e1051a39Sopenharmony_ci 29e1051a39Sopenharmony_ci###################################################################### 30e1051a39Sopenharmony_ci# There is a number of MIPS ABI in use, O32 and N32/64 are most 31e1051a39Sopenharmony_ci# widely used. Then there is a new contender: NUBI. It appears that if 32e1051a39Sopenharmony_ci# one picks the latter, it's possible to arrange code in ABI neutral 33e1051a39Sopenharmony_ci# manner. Therefore let's stick to NUBI register layout: 34e1051a39Sopenharmony_ci# 35e1051a39Sopenharmony_ci($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); 36e1051a39Sopenharmony_ci($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 37e1051a39Sopenharmony_ci($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); 38e1051a39Sopenharmony_ci($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); 39e1051a39Sopenharmony_ci# 40e1051a39Sopenharmony_ci# The return value is placed in $a0. Following coding rules facilitate 41e1051a39Sopenharmony_ci# interoperability: 42e1051a39Sopenharmony_ci# 43e1051a39Sopenharmony_ci# - never ever touch $tp, "thread pointer", former $gp; 44e1051a39Sopenharmony_ci# - copy return value to $t0, former $v0 [or to $a0 if you're adapting 45e1051a39Sopenharmony_ci# old code]; 46e1051a39Sopenharmony_ci# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; 47e1051a39Sopenharmony_ci# 48e1051a39Sopenharmony_ci# For reference here is register layout for N32/64 MIPS ABIs: 49e1051a39Sopenharmony_ci# 50e1051a39Sopenharmony_ci# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); 51e1051a39Sopenharmony_ci# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 52e1051a39Sopenharmony_ci# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); 53e1051a39Sopenharmony_ci# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); 54e1051a39Sopenharmony_ci# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); 55e1051a39Sopenharmony_ci 56e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension) 57e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file 58e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 59e1051a39Sopenharmony_ci# supported flavours are o32,n32,64,nubi32,nubi64, default is o32 60e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : "o32"; 61e1051a39Sopenharmony_ci 62e1051a39Sopenharmony_ciif ($flavour =~ /64|n32/i) { 63e1051a39Sopenharmony_ci $PTR_ADD="daddu"; # incidentally works even on n32 64e1051a39Sopenharmony_ci $PTR_SUB="dsubu"; # incidentally works even on n32 65e1051a39Sopenharmony_ci $REG_S="sd"; 66e1051a39Sopenharmony_ci $REG_L="ld"; 67e1051a39Sopenharmony_ci $SZREG=8; 68e1051a39Sopenharmony_ci} else { 69e1051a39Sopenharmony_ci $PTR_ADD="addu"; 70e1051a39Sopenharmony_ci $PTR_SUB="subu"; 71e1051a39Sopenharmony_ci $REG_S="sw"; 72e1051a39Sopenharmony_ci $REG_L="lw"; 73e1051a39Sopenharmony_ci $SZREG=4; 74e1051a39Sopenharmony_ci} 75e1051a39Sopenharmony_ci$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000; 76e1051a39Sopenharmony_ci# 77e1051a39Sopenharmony_ci# <appro@openssl.org> 78e1051a39Sopenharmony_ci# 79e1051a39Sopenharmony_ci###################################################################### 80e1051a39Sopenharmony_ci 81e1051a39Sopenharmony_ci$output and open STDOUT,">$output"; 82e1051a39Sopenharmony_ci 83e1051a39Sopenharmony_ciif ($flavour =~ /64|n32/i) { 84e1051a39Sopenharmony_ci $LD="ld"; 85e1051a39Sopenharmony_ci $ST="sd"; 86e1051a39Sopenharmony_ci $MULTU="dmultu"; 87e1051a39Sopenharmony_ci $ADDU="daddu"; 88e1051a39Sopenharmony_ci $SUBU="dsubu"; 89e1051a39Sopenharmony_ci $BNSZ=8; 90e1051a39Sopenharmony_ci} else { 91e1051a39Sopenharmony_ci $LD="lw"; 92e1051a39Sopenharmony_ci $ST="sw"; 93e1051a39Sopenharmony_ci $MULTU="multu"; 94e1051a39Sopenharmony_ci $ADDU="addu"; 95e1051a39Sopenharmony_ci $SUBU="subu"; 96e1051a39Sopenharmony_ci $BNSZ=4; 97e1051a39Sopenharmony_ci} 98e1051a39Sopenharmony_ci 99e1051a39Sopenharmony_ci# int bn_mul_mont( 100e1051a39Sopenharmony_ci$rp=$a0; # BN_ULONG *rp, 101e1051a39Sopenharmony_ci$ap=$a1; # const BN_ULONG *ap, 102e1051a39Sopenharmony_ci$bp=$a2; # const BN_ULONG *bp, 103e1051a39Sopenharmony_ci$np=$a3; # const BN_ULONG *np, 104e1051a39Sopenharmony_ci$n0=$a4; # const BN_ULONG *n0, 105e1051a39Sopenharmony_ci$num=$a5; # int num); 106e1051a39Sopenharmony_ci 107e1051a39Sopenharmony_ci$lo0=$a6; 108e1051a39Sopenharmony_ci$hi0=$a7; 109e1051a39Sopenharmony_ci$lo1=$t1; 110e1051a39Sopenharmony_ci$hi1=$t2; 111e1051a39Sopenharmony_ci$aj=$s0; 112e1051a39Sopenharmony_ci$bi=$s1; 113e1051a39Sopenharmony_ci$nj=$s2; 114e1051a39Sopenharmony_ci$tp=$s3; 115e1051a39Sopenharmony_ci$alo=$s4; 116e1051a39Sopenharmony_ci$ahi=$s5; 117e1051a39Sopenharmony_ci$nlo=$s6; 118e1051a39Sopenharmony_ci$nhi=$s7; 119e1051a39Sopenharmony_ci$tj=$s8; 120e1051a39Sopenharmony_ci$i=$s9; 121e1051a39Sopenharmony_ci$j=$s10; 122e1051a39Sopenharmony_ci$m1=$s11; 123e1051a39Sopenharmony_ci 124e1051a39Sopenharmony_ci$FRAMESIZE=14; 125e1051a39Sopenharmony_ci 126e1051a39Sopenharmony_ci$code=<<___; 127e1051a39Sopenharmony_ci#include "mips_arch.h" 128e1051a39Sopenharmony_ci 129e1051a39Sopenharmony_ci.text 130e1051a39Sopenharmony_ci 131e1051a39Sopenharmony_ci.set noat 132e1051a39Sopenharmony_ci.set noreorder 133e1051a39Sopenharmony_ci 134e1051a39Sopenharmony_ci.align 5 135e1051a39Sopenharmony_ci.globl bn_mul_mont 136e1051a39Sopenharmony_ci.ent bn_mul_mont 137e1051a39Sopenharmony_cibn_mul_mont: 138e1051a39Sopenharmony_ci___ 139e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /o32/i); 140e1051a39Sopenharmony_ci lw $n0,16($sp) 141e1051a39Sopenharmony_ci lw $num,20($sp) 142e1051a39Sopenharmony_ci___ 143e1051a39Sopenharmony_ci$code.=<<___; 144e1051a39Sopenharmony_ci slt $at,$num,4 145e1051a39Sopenharmony_ci bnez $at,1f 146e1051a39Sopenharmony_ci li $t0,0 147e1051a39Sopenharmony_ci slt $at,$num,17 # on in-order CPU 148e1051a39Sopenharmony_ci bnez $at,bn_mul_mont_internal 149e1051a39Sopenharmony_ci nop 150e1051a39Sopenharmony_ci1: jr $ra 151e1051a39Sopenharmony_ci li $a0,0 152e1051a39Sopenharmony_ci.end bn_mul_mont 153e1051a39Sopenharmony_ci 154e1051a39Sopenharmony_ci.align 5 155e1051a39Sopenharmony_ci.ent bn_mul_mont_internal 156e1051a39Sopenharmony_cibn_mul_mont_internal: 157e1051a39Sopenharmony_ci .frame $fp,$FRAMESIZE*$SZREG,$ra 158e1051a39Sopenharmony_ci .mask 0x40000000|$SAVED_REGS_MASK,-$SZREG 159e1051a39Sopenharmony_ci $PTR_SUB $sp,$FRAMESIZE*$SZREG 160e1051a39Sopenharmony_ci $REG_S $fp,($FRAMESIZE-1)*$SZREG($sp) 161e1051a39Sopenharmony_ci $REG_S $s11,($FRAMESIZE-2)*$SZREG($sp) 162e1051a39Sopenharmony_ci $REG_S $s10,($FRAMESIZE-3)*$SZREG($sp) 163e1051a39Sopenharmony_ci $REG_S $s9,($FRAMESIZE-4)*$SZREG($sp) 164e1051a39Sopenharmony_ci $REG_S $s8,($FRAMESIZE-5)*$SZREG($sp) 165e1051a39Sopenharmony_ci $REG_S $s7,($FRAMESIZE-6)*$SZREG($sp) 166e1051a39Sopenharmony_ci $REG_S $s6,($FRAMESIZE-7)*$SZREG($sp) 167e1051a39Sopenharmony_ci $REG_S $s5,($FRAMESIZE-8)*$SZREG($sp) 168e1051a39Sopenharmony_ci $REG_S $s4,($FRAMESIZE-9)*$SZREG($sp) 169e1051a39Sopenharmony_ci___ 170e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i); 171e1051a39Sopenharmony_ci $REG_S $s3,($FRAMESIZE-10)*$SZREG($sp) 172e1051a39Sopenharmony_ci $REG_S $s2,($FRAMESIZE-11)*$SZREG($sp) 173e1051a39Sopenharmony_ci $REG_S $s1,($FRAMESIZE-12)*$SZREG($sp) 174e1051a39Sopenharmony_ci $REG_S $s0,($FRAMESIZE-13)*$SZREG($sp) 175e1051a39Sopenharmony_ci___ 176e1051a39Sopenharmony_ci$code.=<<___; 177e1051a39Sopenharmony_ci move $fp,$sp 178e1051a39Sopenharmony_ci 179e1051a39Sopenharmony_ci .set reorder 180e1051a39Sopenharmony_ci $LD $n0,0($n0) 181e1051a39Sopenharmony_ci $LD $bi,0($bp) # bp[0] 182e1051a39Sopenharmony_ci $LD $aj,0($ap) # ap[0] 183e1051a39Sopenharmony_ci $LD $nj,0($np) # np[0] 184e1051a39Sopenharmony_ci 185e1051a39Sopenharmony_ci $PTR_SUB $sp,2*$BNSZ # place for two extra words 186e1051a39Sopenharmony_ci sll $num,`log($BNSZ)/log(2)` 187e1051a39Sopenharmony_ci li $at,-4096 188e1051a39Sopenharmony_ci $PTR_SUB $sp,$num 189e1051a39Sopenharmony_ci and $sp,$at 190e1051a39Sopenharmony_ci 191e1051a39Sopenharmony_ci $MULTU ($aj,$bi) 192e1051a39Sopenharmony_ci $LD $ahi,$BNSZ($ap) 193e1051a39Sopenharmony_ci $LD $nhi,$BNSZ($np) 194e1051a39Sopenharmony_ci mflo ($lo0,$aj,$bi) 195e1051a39Sopenharmony_ci mfhi ($hi0,$aj,$bi) 196e1051a39Sopenharmony_ci $MULTU ($lo0,$n0) 197e1051a39Sopenharmony_ci mflo ($m1,$lo0,$n0) 198e1051a39Sopenharmony_ci 199e1051a39Sopenharmony_ci $MULTU ($ahi,$bi) 200e1051a39Sopenharmony_ci mflo ($alo,$ahi,$bi) 201e1051a39Sopenharmony_ci mfhi ($ahi,$ahi,$bi) 202e1051a39Sopenharmony_ci 203e1051a39Sopenharmony_ci $MULTU ($nj,$m1) 204e1051a39Sopenharmony_ci mflo ($lo1,$nj,$m1) 205e1051a39Sopenharmony_ci mfhi ($hi1,$nj,$m1) 206e1051a39Sopenharmony_ci $MULTU ($nhi,$m1) 207e1051a39Sopenharmony_ci $ADDU $lo1,$lo0 208e1051a39Sopenharmony_ci sltu $at,$lo1,$lo0 209e1051a39Sopenharmony_ci $ADDU $hi1,$at 210e1051a39Sopenharmony_ci mflo ($nlo,$nhi,$m1) 211e1051a39Sopenharmony_ci mfhi ($nhi,$nhi,$m1) 212e1051a39Sopenharmony_ci 213e1051a39Sopenharmony_ci move $tp,$sp 214e1051a39Sopenharmony_ci li $j,2*$BNSZ 215e1051a39Sopenharmony_ci.align 4 216e1051a39Sopenharmony_ci.L1st: 217e1051a39Sopenharmony_ci .set noreorder 218e1051a39Sopenharmony_ci $PTR_ADD $aj,$ap,$j 219e1051a39Sopenharmony_ci $PTR_ADD $nj,$np,$j 220e1051a39Sopenharmony_ci $LD $aj,($aj) 221e1051a39Sopenharmony_ci $LD $nj,($nj) 222e1051a39Sopenharmony_ci 223e1051a39Sopenharmony_ci $MULTU ($aj,$bi) 224e1051a39Sopenharmony_ci $ADDU $lo0,$alo,$hi0 225e1051a39Sopenharmony_ci $ADDU $lo1,$nlo,$hi1 226e1051a39Sopenharmony_ci sltu $at,$lo0,$hi0 227e1051a39Sopenharmony_ci sltu $t0,$lo1,$hi1 228e1051a39Sopenharmony_ci $ADDU $hi0,$ahi,$at 229e1051a39Sopenharmony_ci $ADDU $hi1,$nhi,$t0 230e1051a39Sopenharmony_ci mflo ($alo,$aj,$bi) 231e1051a39Sopenharmony_ci mfhi ($ahi,$aj,$bi) 232e1051a39Sopenharmony_ci 233e1051a39Sopenharmony_ci $ADDU $lo1,$lo0 234e1051a39Sopenharmony_ci sltu $at,$lo1,$lo0 235e1051a39Sopenharmony_ci $MULTU ($nj,$m1) 236e1051a39Sopenharmony_ci $ADDU $hi1,$at 237e1051a39Sopenharmony_ci addu $j,$BNSZ 238e1051a39Sopenharmony_ci $ST $lo1,($tp) 239e1051a39Sopenharmony_ci sltu $t0,$j,$num 240e1051a39Sopenharmony_ci mflo ($nlo,$nj,$m1) 241e1051a39Sopenharmony_ci mfhi ($nhi,$nj,$m1) 242e1051a39Sopenharmony_ci 243e1051a39Sopenharmony_ci bnez $t0,.L1st 244e1051a39Sopenharmony_ci $PTR_ADD $tp,$BNSZ 245e1051a39Sopenharmony_ci .set reorder 246e1051a39Sopenharmony_ci 247e1051a39Sopenharmony_ci $ADDU $lo0,$alo,$hi0 248e1051a39Sopenharmony_ci sltu $at,$lo0,$hi0 249e1051a39Sopenharmony_ci $ADDU $hi0,$ahi,$at 250e1051a39Sopenharmony_ci 251e1051a39Sopenharmony_ci $ADDU $lo1,$nlo,$hi1 252e1051a39Sopenharmony_ci sltu $t0,$lo1,$hi1 253e1051a39Sopenharmony_ci $ADDU $hi1,$nhi,$t0 254e1051a39Sopenharmony_ci $ADDU $lo1,$lo0 255e1051a39Sopenharmony_ci sltu $at,$lo1,$lo0 256e1051a39Sopenharmony_ci $ADDU $hi1,$at 257e1051a39Sopenharmony_ci 258e1051a39Sopenharmony_ci $ST $lo1,($tp) 259e1051a39Sopenharmony_ci 260e1051a39Sopenharmony_ci $ADDU $hi1,$hi0 261e1051a39Sopenharmony_ci sltu $at,$hi1,$hi0 262e1051a39Sopenharmony_ci $ST $hi1,$BNSZ($tp) 263e1051a39Sopenharmony_ci $ST $at,2*$BNSZ($tp) 264e1051a39Sopenharmony_ci 265e1051a39Sopenharmony_ci li $i,$BNSZ 266e1051a39Sopenharmony_ci.align 4 267e1051a39Sopenharmony_ci.Louter: 268e1051a39Sopenharmony_ci $PTR_ADD $bi,$bp,$i 269e1051a39Sopenharmony_ci $LD $bi,($bi) 270e1051a39Sopenharmony_ci $LD $aj,($ap) 271e1051a39Sopenharmony_ci $LD $ahi,$BNSZ($ap) 272e1051a39Sopenharmony_ci $LD $tj,($sp) 273e1051a39Sopenharmony_ci 274e1051a39Sopenharmony_ci $MULTU ($aj,$bi) 275e1051a39Sopenharmony_ci $LD $nj,($np) 276e1051a39Sopenharmony_ci $LD $nhi,$BNSZ($np) 277e1051a39Sopenharmony_ci mflo ($lo0,$aj,$bi) 278e1051a39Sopenharmony_ci mfhi ($hi0,$aj,$bi) 279e1051a39Sopenharmony_ci $ADDU $lo0,$tj 280e1051a39Sopenharmony_ci $MULTU ($lo0,$n0) 281e1051a39Sopenharmony_ci sltu $at,$lo0,$tj 282e1051a39Sopenharmony_ci $ADDU $hi0,$at 283e1051a39Sopenharmony_ci mflo ($m1,$lo0,$n0) 284e1051a39Sopenharmony_ci 285e1051a39Sopenharmony_ci $MULTU ($ahi,$bi) 286e1051a39Sopenharmony_ci mflo ($alo,$ahi,$bi) 287e1051a39Sopenharmony_ci mfhi ($ahi,$ahi,$bi) 288e1051a39Sopenharmony_ci 289e1051a39Sopenharmony_ci $MULTU ($nj,$m1) 290e1051a39Sopenharmony_ci mflo ($lo1,$nj,$m1) 291e1051a39Sopenharmony_ci mfhi ($hi1,$nj,$m1) 292e1051a39Sopenharmony_ci 293e1051a39Sopenharmony_ci $MULTU ($nhi,$m1) 294e1051a39Sopenharmony_ci $ADDU $lo1,$lo0 295e1051a39Sopenharmony_ci sltu $at,$lo1,$lo0 296e1051a39Sopenharmony_ci $ADDU $hi1,$at 297e1051a39Sopenharmony_ci mflo ($nlo,$nhi,$m1) 298e1051a39Sopenharmony_ci mfhi ($nhi,$nhi,$m1) 299e1051a39Sopenharmony_ci 300e1051a39Sopenharmony_ci move $tp,$sp 301e1051a39Sopenharmony_ci li $j,2*$BNSZ 302e1051a39Sopenharmony_ci $LD $tj,$BNSZ($tp) 303e1051a39Sopenharmony_ci.align 4 304e1051a39Sopenharmony_ci.Linner: 305e1051a39Sopenharmony_ci .set noreorder 306e1051a39Sopenharmony_ci $PTR_ADD $aj,$ap,$j 307e1051a39Sopenharmony_ci $PTR_ADD $nj,$np,$j 308e1051a39Sopenharmony_ci $LD $aj,($aj) 309e1051a39Sopenharmony_ci $LD $nj,($nj) 310e1051a39Sopenharmony_ci 311e1051a39Sopenharmony_ci $MULTU ($aj,$bi) 312e1051a39Sopenharmony_ci $ADDU $lo0,$alo,$hi0 313e1051a39Sopenharmony_ci $ADDU $lo1,$nlo,$hi1 314e1051a39Sopenharmony_ci sltu $at,$lo0,$hi0 315e1051a39Sopenharmony_ci sltu $t0,$lo1,$hi1 316e1051a39Sopenharmony_ci $ADDU $hi0,$ahi,$at 317e1051a39Sopenharmony_ci $ADDU $hi1,$nhi,$t0 318e1051a39Sopenharmony_ci mflo ($alo,$aj,$bi) 319e1051a39Sopenharmony_ci mfhi ($ahi,$aj,$bi) 320e1051a39Sopenharmony_ci 321e1051a39Sopenharmony_ci $ADDU $lo0,$tj 322e1051a39Sopenharmony_ci addu $j,$BNSZ 323e1051a39Sopenharmony_ci $MULTU ($nj,$m1) 324e1051a39Sopenharmony_ci sltu $at,$lo0,$tj 325e1051a39Sopenharmony_ci $ADDU $lo1,$lo0 326e1051a39Sopenharmony_ci $ADDU $hi0,$at 327e1051a39Sopenharmony_ci sltu $t0,$lo1,$lo0 328e1051a39Sopenharmony_ci $LD $tj,2*$BNSZ($tp) 329e1051a39Sopenharmony_ci $ADDU $hi1,$t0 330e1051a39Sopenharmony_ci sltu $at,$j,$num 331e1051a39Sopenharmony_ci mflo ($nlo,$nj,$m1) 332e1051a39Sopenharmony_ci mfhi ($nhi,$nj,$m1) 333e1051a39Sopenharmony_ci $ST $lo1,($tp) 334e1051a39Sopenharmony_ci bnez $at,.Linner 335e1051a39Sopenharmony_ci $PTR_ADD $tp,$BNSZ 336e1051a39Sopenharmony_ci .set reorder 337e1051a39Sopenharmony_ci 338e1051a39Sopenharmony_ci $ADDU $lo0,$alo,$hi0 339e1051a39Sopenharmony_ci sltu $at,$lo0,$hi0 340e1051a39Sopenharmony_ci $ADDU $hi0,$ahi,$at 341e1051a39Sopenharmony_ci $ADDU $lo0,$tj 342e1051a39Sopenharmony_ci sltu $t0,$lo0,$tj 343e1051a39Sopenharmony_ci $ADDU $hi0,$t0 344e1051a39Sopenharmony_ci 345e1051a39Sopenharmony_ci $LD $tj,2*$BNSZ($tp) 346e1051a39Sopenharmony_ci $ADDU $lo1,$nlo,$hi1 347e1051a39Sopenharmony_ci sltu $at,$lo1,$hi1 348e1051a39Sopenharmony_ci $ADDU $hi1,$nhi,$at 349e1051a39Sopenharmony_ci $ADDU $lo1,$lo0 350e1051a39Sopenharmony_ci sltu $t0,$lo1,$lo0 351e1051a39Sopenharmony_ci $ADDU $hi1,$t0 352e1051a39Sopenharmony_ci $ST $lo1,($tp) 353e1051a39Sopenharmony_ci 354e1051a39Sopenharmony_ci $ADDU $lo1,$hi1,$hi0 355e1051a39Sopenharmony_ci sltu $hi1,$lo1,$hi0 356e1051a39Sopenharmony_ci $ADDU $lo1,$tj 357e1051a39Sopenharmony_ci sltu $at,$lo1,$tj 358e1051a39Sopenharmony_ci $ADDU $hi1,$at 359e1051a39Sopenharmony_ci $ST $lo1,$BNSZ($tp) 360e1051a39Sopenharmony_ci $ST $hi1,2*$BNSZ($tp) 361e1051a39Sopenharmony_ci 362e1051a39Sopenharmony_ci addu $i,$BNSZ 363e1051a39Sopenharmony_ci sltu $t0,$i,$num 364e1051a39Sopenharmony_ci bnez $t0,.Louter 365e1051a39Sopenharmony_ci 366e1051a39Sopenharmony_ci .set noreorder 367e1051a39Sopenharmony_ci $PTR_ADD $tj,$sp,$num # &tp[num] 368e1051a39Sopenharmony_ci move $tp,$sp 369e1051a39Sopenharmony_ci move $ap,$sp 370e1051a39Sopenharmony_ci li $hi0,0 # clear borrow bit 371e1051a39Sopenharmony_ci 372e1051a39Sopenharmony_ci.align 4 373e1051a39Sopenharmony_ci.Lsub: $LD $lo0,($tp) 374e1051a39Sopenharmony_ci $LD $lo1,($np) 375e1051a39Sopenharmony_ci $PTR_ADD $tp,$BNSZ 376e1051a39Sopenharmony_ci $PTR_ADD $np,$BNSZ 377e1051a39Sopenharmony_ci $SUBU $lo1,$lo0,$lo1 # tp[i]-np[i] 378e1051a39Sopenharmony_ci sgtu $at,$lo1,$lo0 379e1051a39Sopenharmony_ci $SUBU $lo0,$lo1,$hi0 380e1051a39Sopenharmony_ci sgtu $hi0,$lo0,$lo1 381e1051a39Sopenharmony_ci $ST $lo0,($rp) 382e1051a39Sopenharmony_ci or $hi0,$at 383e1051a39Sopenharmony_ci sltu $at,$tp,$tj 384e1051a39Sopenharmony_ci bnez $at,.Lsub 385e1051a39Sopenharmony_ci $PTR_ADD $rp,$BNSZ 386e1051a39Sopenharmony_ci 387e1051a39Sopenharmony_ci $SUBU $hi0,$hi1,$hi0 # handle upmost overflow bit 388e1051a39Sopenharmony_ci move $tp,$sp 389e1051a39Sopenharmony_ci $PTR_SUB $rp,$num # restore rp 390e1051a39Sopenharmony_ci not $hi1,$hi0 391e1051a39Sopenharmony_ci 392e1051a39Sopenharmony_ci.Lcopy: $LD $nj,($tp) # conditional move 393e1051a39Sopenharmony_ci $LD $aj,($rp) 394e1051a39Sopenharmony_ci $ST $zero,($tp) 395e1051a39Sopenharmony_ci $PTR_ADD $tp,$BNSZ 396e1051a39Sopenharmony_ci and $nj,$hi0 397e1051a39Sopenharmony_ci and $aj,$hi1 398e1051a39Sopenharmony_ci or $aj,$nj 399e1051a39Sopenharmony_ci sltu $at,$tp,$tj 400e1051a39Sopenharmony_ci $ST $aj,($rp) 401e1051a39Sopenharmony_ci bnez $at,.Lcopy 402e1051a39Sopenharmony_ci $PTR_ADD $rp,$BNSZ 403e1051a39Sopenharmony_ci 404e1051a39Sopenharmony_ci li $a0,1 405e1051a39Sopenharmony_ci li $t0,1 406e1051a39Sopenharmony_ci 407e1051a39Sopenharmony_ci .set noreorder 408e1051a39Sopenharmony_ci move $sp,$fp 409e1051a39Sopenharmony_ci $REG_L $fp,($FRAMESIZE-1)*$SZREG($sp) 410e1051a39Sopenharmony_ci $REG_L $s11,($FRAMESIZE-2)*$SZREG($sp) 411e1051a39Sopenharmony_ci $REG_L $s10,($FRAMESIZE-3)*$SZREG($sp) 412e1051a39Sopenharmony_ci $REG_L $s9,($FRAMESIZE-4)*$SZREG($sp) 413e1051a39Sopenharmony_ci $REG_L $s8,($FRAMESIZE-5)*$SZREG($sp) 414e1051a39Sopenharmony_ci $REG_L $s7,($FRAMESIZE-6)*$SZREG($sp) 415e1051a39Sopenharmony_ci $REG_L $s6,($FRAMESIZE-7)*$SZREG($sp) 416e1051a39Sopenharmony_ci $REG_L $s5,($FRAMESIZE-8)*$SZREG($sp) 417e1051a39Sopenharmony_ci $REG_L $s4,($FRAMESIZE-9)*$SZREG($sp) 418e1051a39Sopenharmony_ci___ 419e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i); 420e1051a39Sopenharmony_ci $REG_L $s3,($FRAMESIZE-10)*$SZREG($sp) 421e1051a39Sopenharmony_ci $REG_L $s2,($FRAMESIZE-11)*$SZREG($sp) 422e1051a39Sopenharmony_ci $REG_L $s1,($FRAMESIZE-12)*$SZREG($sp) 423e1051a39Sopenharmony_ci $REG_L $s0,($FRAMESIZE-13)*$SZREG($sp) 424e1051a39Sopenharmony_ci___ 425e1051a39Sopenharmony_ci$code.=<<___; 426e1051a39Sopenharmony_ci jr $ra 427e1051a39Sopenharmony_ci $PTR_ADD $sp,$FRAMESIZE*$SZREG 428e1051a39Sopenharmony_ci.end bn_mul_mont_internal 429e1051a39Sopenharmony_ci.rdata 430e1051a39Sopenharmony_ci.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro\@openssl.org>" 431e1051a39Sopenharmony_ci___ 432e1051a39Sopenharmony_ci 433e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval $1/gem; 434e1051a39Sopenharmony_ci 435e1051a39Sopenharmony_ciprint $code; 436e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 437