xref: /third_party/openssl/crypto/bn/asm/mips-mont.pl (revision e1051a39)
1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci#
10e1051a39Sopenharmony_ci# ====================================================================
11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/.
15e1051a39Sopenharmony_ci# ====================================================================
16e1051a39Sopenharmony_ci
17e1051a39Sopenharmony_ci# This module doesn't present direct interest for OpenSSL, because it
18e1051a39Sopenharmony_ci# doesn't provide better performance for longer keys, at least not on
19e1051a39Sopenharmony_ci# in-order-execution cores. While 512-bit RSA sign operations can be
20e1051a39Sopenharmony_ci# 65% faster in 64-bit mode, 1024-bit ones are only 15% faster, and
21e1051a39Sopenharmony_ci# 4096-bit ones are up to 15% slower. In 32-bit mode it varies from
22e1051a39Sopenharmony_ci# 16% improvement for 512-bit RSA sign to -33% for 4096-bit RSA
23e1051a39Sopenharmony_ci# verify:-( All comparisons are against bn_mul_mont-free assembler.
24e1051a39Sopenharmony_ci# The module might be of interest to embedded system developers, as
25e1051a39Sopenharmony_ci# the code is smaller than 1KB, yet offers >3x improvement on MIPS64
26e1051a39Sopenharmony_ci# and 75-30% [less for longer keys] on MIPS32 over compiler-generated
27e1051a39Sopenharmony_ci# code.
28e1051a39Sopenharmony_ci
29e1051a39Sopenharmony_ci######################################################################
30e1051a39Sopenharmony_ci# There is a number of MIPS ABI in use, O32 and N32/64 are most
31e1051a39Sopenharmony_ci# widely used. Then there is a new contender: NUBI. It appears that if
32e1051a39Sopenharmony_ci# one picks the latter, it's possible to arrange code in ABI neutral
33e1051a39Sopenharmony_ci# manner. Therefore let's stick to NUBI register layout:
34e1051a39Sopenharmony_ci#
35e1051a39Sopenharmony_ci($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
36e1051a39Sopenharmony_ci($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
37e1051a39Sopenharmony_ci($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
38e1051a39Sopenharmony_ci($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
39e1051a39Sopenharmony_ci#
40e1051a39Sopenharmony_ci# The return value is placed in $a0. Following coding rules facilitate
41e1051a39Sopenharmony_ci# interoperability:
42e1051a39Sopenharmony_ci#
43e1051a39Sopenharmony_ci# - never ever touch $tp, "thread pointer", former $gp;
44e1051a39Sopenharmony_ci# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
45e1051a39Sopenharmony_ci#   old code];
46e1051a39Sopenharmony_ci# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
47e1051a39Sopenharmony_ci#
48e1051a39Sopenharmony_ci# For reference here is register layout for N32/64 MIPS ABIs:
49e1051a39Sopenharmony_ci#
50e1051a39Sopenharmony_ci# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
51e1051a39Sopenharmony_ci# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
52e1051a39Sopenharmony_ci# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
53e1051a39Sopenharmony_ci# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
54e1051a39Sopenharmony_ci# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
55e1051a39Sopenharmony_ci
56e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension)
57e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file
58e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
59e1051a39Sopenharmony_ci# supported flavours are o32,n32,64,nubi32,nubi64, default is o32
60e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : "o32";
61e1051a39Sopenharmony_ci
62e1051a39Sopenharmony_ciif ($flavour =~ /64|n32/i) {
63e1051a39Sopenharmony_ci	$PTR_ADD="daddu";	# incidentally works even on n32
64e1051a39Sopenharmony_ci	$PTR_SUB="dsubu";	# incidentally works even on n32
65e1051a39Sopenharmony_ci	$REG_S="sd";
66e1051a39Sopenharmony_ci	$REG_L="ld";
67e1051a39Sopenharmony_ci	$SZREG=8;
68e1051a39Sopenharmony_ci} else {
69e1051a39Sopenharmony_ci	$PTR_ADD="addu";
70e1051a39Sopenharmony_ci	$PTR_SUB="subu";
71e1051a39Sopenharmony_ci	$REG_S="sw";
72e1051a39Sopenharmony_ci	$REG_L="lw";
73e1051a39Sopenharmony_ci	$SZREG=4;
74e1051a39Sopenharmony_ci}
75e1051a39Sopenharmony_ci$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000;
76e1051a39Sopenharmony_ci#
77e1051a39Sopenharmony_ci# <appro@openssl.org>
78e1051a39Sopenharmony_ci#
79e1051a39Sopenharmony_ci######################################################################
80e1051a39Sopenharmony_ci
81e1051a39Sopenharmony_ci$output and open STDOUT,">$output";
82e1051a39Sopenharmony_ci
83e1051a39Sopenharmony_ciif ($flavour =~ /64|n32/i) {
84e1051a39Sopenharmony_ci	$LD="ld";
85e1051a39Sopenharmony_ci	$ST="sd";
86e1051a39Sopenharmony_ci	$MULTU="dmultu";
87e1051a39Sopenharmony_ci	$ADDU="daddu";
88e1051a39Sopenharmony_ci	$SUBU="dsubu";
89e1051a39Sopenharmony_ci	$BNSZ=8;
90e1051a39Sopenharmony_ci} else {
91e1051a39Sopenharmony_ci	$LD="lw";
92e1051a39Sopenharmony_ci	$ST="sw";
93e1051a39Sopenharmony_ci	$MULTU="multu";
94e1051a39Sopenharmony_ci	$ADDU="addu";
95e1051a39Sopenharmony_ci	$SUBU="subu";
96e1051a39Sopenharmony_ci	$BNSZ=4;
97e1051a39Sopenharmony_ci}
98e1051a39Sopenharmony_ci
99e1051a39Sopenharmony_ci# int bn_mul_mont(
100e1051a39Sopenharmony_ci$rp=$a0;	# BN_ULONG *rp,
101e1051a39Sopenharmony_ci$ap=$a1;	# const BN_ULONG *ap,
102e1051a39Sopenharmony_ci$bp=$a2;	# const BN_ULONG *bp,
103e1051a39Sopenharmony_ci$np=$a3;	# const BN_ULONG *np,
104e1051a39Sopenharmony_ci$n0=$a4;	# const BN_ULONG *n0,
105e1051a39Sopenharmony_ci$num=$a5;	# int num);
106e1051a39Sopenharmony_ci
107e1051a39Sopenharmony_ci$lo0=$a6;
108e1051a39Sopenharmony_ci$hi0=$a7;
109e1051a39Sopenharmony_ci$lo1=$t1;
110e1051a39Sopenharmony_ci$hi1=$t2;
111e1051a39Sopenharmony_ci$aj=$s0;
112e1051a39Sopenharmony_ci$bi=$s1;
113e1051a39Sopenharmony_ci$nj=$s2;
114e1051a39Sopenharmony_ci$tp=$s3;
115e1051a39Sopenharmony_ci$alo=$s4;
116e1051a39Sopenharmony_ci$ahi=$s5;
117e1051a39Sopenharmony_ci$nlo=$s6;
118e1051a39Sopenharmony_ci$nhi=$s7;
119e1051a39Sopenharmony_ci$tj=$s8;
120e1051a39Sopenharmony_ci$i=$s9;
121e1051a39Sopenharmony_ci$j=$s10;
122e1051a39Sopenharmony_ci$m1=$s11;
123e1051a39Sopenharmony_ci
124e1051a39Sopenharmony_ci$FRAMESIZE=14;
125e1051a39Sopenharmony_ci
126e1051a39Sopenharmony_ci$code=<<___;
127e1051a39Sopenharmony_ci#include "mips_arch.h"
128e1051a39Sopenharmony_ci
129e1051a39Sopenharmony_ci.text
130e1051a39Sopenharmony_ci
131e1051a39Sopenharmony_ci.set	noat
132e1051a39Sopenharmony_ci.set	noreorder
133e1051a39Sopenharmony_ci
134e1051a39Sopenharmony_ci.align	5
135e1051a39Sopenharmony_ci.globl	bn_mul_mont
136e1051a39Sopenharmony_ci.ent	bn_mul_mont
137e1051a39Sopenharmony_cibn_mul_mont:
138e1051a39Sopenharmony_ci___
139e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /o32/i);
140e1051a39Sopenharmony_ci	lw	$n0,16($sp)
141e1051a39Sopenharmony_ci	lw	$num,20($sp)
142e1051a39Sopenharmony_ci___
143e1051a39Sopenharmony_ci$code.=<<___;
144e1051a39Sopenharmony_ci	slt	$at,$num,4
145e1051a39Sopenharmony_ci	bnez	$at,1f
146e1051a39Sopenharmony_ci	li	$t0,0
147e1051a39Sopenharmony_ci	slt	$at,$num,17	# on in-order CPU
148e1051a39Sopenharmony_ci	bnez	$at,bn_mul_mont_internal
149e1051a39Sopenharmony_ci	nop
150e1051a39Sopenharmony_ci1:	jr	$ra
151e1051a39Sopenharmony_ci	li	$a0,0
152e1051a39Sopenharmony_ci.end	bn_mul_mont
153e1051a39Sopenharmony_ci
154e1051a39Sopenharmony_ci.align	5
155e1051a39Sopenharmony_ci.ent	bn_mul_mont_internal
156e1051a39Sopenharmony_cibn_mul_mont_internal:
157e1051a39Sopenharmony_ci	.frame	$fp,$FRAMESIZE*$SZREG,$ra
158e1051a39Sopenharmony_ci	.mask	0x40000000|$SAVED_REGS_MASK,-$SZREG
159e1051a39Sopenharmony_ci	$PTR_SUB $sp,$FRAMESIZE*$SZREG
160e1051a39Sopenharmony_ci	$REG_S	$fp,($FRAMESIZE-1)*$SZREG($sp)
161e1051a39Sopenharmony_ci	$REG_S	$s11,($FRAMESIZE-2)*$SZREG($sp)
162e1051a39Sopenharmony_ci	$REG_S	$s10,($FRAMESIZE-3)*$SZREG($sp)
163e1051a39Sopenharmony_ci	$REG_S	$s9,($FRAMESIZE-4)*$SZREG($sp)
164e1051a39Sopenharmony_ci	$REG_S	$s8,($FRAMESIZE-5)*$SZREG($sp)
165e1051a39Sopenharmony_ci	$REG_S	$s7,($FRAMESIZE-6)*$SZREG($sp)
166e1051a39Sopenharmony_ci	$REG_S	$s6,($FRAMESIZE-7)*$SZREG($sp)
167e1051a39Sopenharmony_ci	$REG_S	$s5,($FRAMESIZE-8)*$SZREG($sp)
168e1051a39Sopenharmony_ci	$REG_S	$s4,($FRAMESIZE-9)*$SZREG($sp)
169e1051a39Sopenharmony_ci___
170e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i);
171e1051a39Sopenharmony_ci	$REG_S	$s3,($FRAMESIZE-10)*$SZREG($sp)
172e1051a39Sopenharmony_ci	$REG_S	$s2,($FRAMESIZE-11)*$SZREG($sp)
173e1051a39Sopenharmony_ci	$REG_S	$s1,($FRAMESIZE-12)*$SZREG($sp)
174e1051a39Sopenharmony_ci	$REG_S	$s0,($FRAMESIZE-13)*$SZREG($sp)
175e1051a39Sopenharmony_ci___
176e1051a39Sopenharmony_ci$code.=<<___;
177e1051a39Sopenharmony_ci	move	$fp,$sp
178e1051a39Sopenharmony_ci
179e1051a39Sopenharmony_ci	.set	reorder
180e1051a39Sopenharmony_ci	$LD	$n0,0($n0)
181e1051a39Sopenharmony_ci	$LD	$bi,0($bp)	# bp[0]
182e1051a39Sopenharmony_ci	$LD	$aj,0($ap)	# ap[0]
183e1051a39Sopenharmony_ci	$LD	$nj,0($np)	# np[0]
184e1051a39Sopenharmony_ci
185e1051a39Sopenharmony_ci	$PTR_SUB $sp,2*$BNSZ	# place for two extra words
186e1051a39Sopenharmony_ci	sll	$num,`log($BNSZ)/log(2)`
187e1051a39Sopenharmony_ci	li	$at,-4096
188e1051a39Sopenharmony_ci	$PTR_SUB $sp,$num
189e1051a39Sopenharmony_ci	and	$sp,$at
190e1051a39Sopenharmony_ci
191e1051a39Sopenharmony_ci	$MULTU	($aj,$bi)
192e1051a39Sopenharmony_ci	$LD	$ahi,$BNSZ($ap)
193e1051a39Sopenharmony_ci	$LD	$nhi,$BNSZ($np)
194e1051a39Sopenharmony_ci	mflo	($lo0,$aj,$bi)
195e1051a39Sopenharmony_ci	mfhi	($hi0,$aj,$bi)
196e1051a39Sopenharmony_ci	$MULTU	($lo0,$n0)
197e1051a39Sopenharmony_ci	mflo	($m1,$lo0,$n0)
198e1051a39Sopenharmony_ci
199e1051a39Sopenharmony_ci	$MULTU	($ahi,$bi)
200e1051a39Sopenharmony_ci	mflo	($alo,$ahi,$bi)
201e1051a39Sopenharmony_ci	mfhi	($ahi,$ahi,$bi)
202e1051a39Sopenharmony_ci
203e1051a39Sopenharmony_ci	$MULTU	($nj,$m1)
204e1051a39Sopenharmony_ci	mflo	($lo1,$nj,$m1)
205e1051a39Sopenharmony_ci	mfhi	($hi1,$nj,$m1)
206e1051a39Sopenharmony_ci	$MULTU	($nhi,$m1)
207e1051a39Sopenharmony_ci	$ADDU	$lo1,$lo0
208e1051a39Sopenharmony_ci	sltu	$at,$lo1,$lo0
209e1051a39Sopenharmony_ci	$ADDU	$hi1,$at
210e1051a39Sopenharmony_ci	mflo	($nlo,$nhi,$m1)
211e1051a39Sopenharmony_ci	mfhi	($nhi,$nhi,$m1)
212e1051a39Sopenharmony_ci
213e1051a39Sopenharmony_ci	move	$tp,$sp
214e1051a39Sopenharmony_ci	li	$j,2*$BNSZ
215e1051a39Sopenharmony_ci.align	4
216e1051a39Sopenharmony_ci.L1st:
217e1051a39Sopenharmony_ci	.set	noreorder
218e1051a39Sopenharmony_ci	$PTR_ADD $aj,$ap,$j
219e1051a39Sopenharmony_ci	$PTR_ADD $nj,$np,$j
220e1051a39Sopenharmony_ci	$LD	$aj,($aj)
221e1051a39Sopenharmony_ci	$LD	$nj,($nj)
222e1051a39Sopenharmony_ci
223e1051a39Sopenharmony_ci	$MULTU	($aj,$bi)
224e1051a39Sopenharmony_ci	$ADDU	$lo0,$alo,$hi0
225e1051a39Sopenharmony_ci	$ADDU	$lo1,$nlo,$hi1
226e1051a39Sopenharmony_ci	sltu	$at,$lo0,$hi0
227e1051a39Sopenharmony_ci	sltu	$t0,$lo1,$hi1
228e1051a39Sopenharmony_ci	$ADDU	$hi0,$ahi,$at
229e1051a39Sopenharmony_ci	$ADDU	$hi1,$nhi,$t0
230e1051a39Sopenharmony_ci	mflo	($alo,$aj,$bi)
231e1051a39Sopenharmony_ci	mfhi	($ahi,$aj,$bi)
232e1051a39Sopenharmony_ci
233e1051a39Sopenharmony_ci	$ADDU	$lo1,$lo0
234e1051a39Sopenharmony_ci	sltu	$at,$lo1,$lo0
235e1051a39Sopenharmony_ci	$MULTU	($nj,$m1)
236e1051a39Sopenharmony_ci	$ADDU	$hi1,$at
237e1051a39Sopenharmony_ci	addu	$j,$BNSZ
238e1051a39Sopenharmony_ci	$ST	$lo1,($tp)
239e1051a39Sopenharmony_ci	sltu	$t0,$j,$num
240e1051a39Sopenharmony_ci	mflo	($nlo,$nj,$m1)
241e1051a39Sopenharmony_ci	mfhi	($nhi,$nj,$m1)
242e1051a39Sopenharmony_ci
243e1051a39Sopenharmony_ci	bnez	$t0,.L1st
244e1051a39Sopenharmony_ci	$PTR_ADD $tp,$BNSZ
245e1051a39Sopenharmony_ci	.set	reorder
246e1051a39Sopenharmony_ci
247e1051a39Sopenharmony_ci	$ADDU	$lo0,$alo,$hi0
248e1051a39Sopenharmony_ci	sltu	$at,$lo0,$hi0
249e1051a39Sopenharmony_ci	$ADDU	$hi0,$ahi,$at
250e1051a39Sopenharmony_ci
251e1051a39Sopenharmony_ci	$ADDU	$lo1,$nlo,$hi1
252e1051a39Sopenharmony_ci	sltu	$t0,$lo1,$hi1
253e1051a39Sopenharmony_ci	$ADDU	$hi1,$nhi,$t0
254e1051a39Sopenharmony_ci	$ADDU	$lo1,$lo0
255e1051a39Sopenharmony_ci	sltu	$at,$lo1,$lo0
256e1051a39Sopenharmony_ci	$ADDU	$hi1,$at
257e1051a39Sopenharmony_ci
258e1051a39Sopenharmony_ci	$ST	$lo1,($tp)
259e1051a39Sopenharmony_ci
260e1051a39Sopenharmony_ci	$ADDU	$hi1,$hi0
261e1051a39Sopenharmony_ci	sltu	$at,$hi1,$hi0
262e1051a39Sopenharmony_ci	$ST	$hi1,$BNSZ($tp)
263e1051a39Sopenharmony_ci	$ST	$at,2*$BNSZ($tp)
264e1051a39Sopenharmony_ci
265e1051a39Sopenharmony_ci	li	$i,$BNSZ
266e1051a39Sopenharmony_ci.align	4
267e1051a39Sopenharmony_ci.Louter:
268e1051a39Sopenharmony_ci	$PTR_ADD $bi,$bp,$i
269e1051a39Sopenharmony_ci	$LD	$bi,($bi)
270e1051a39Sopenharmony_ci	$LD	$aj,($ap)
271e1051a39Sopenharmony_ci	$LD	$ahi,$BNSZ($ap)
272e1051a39Sopenharmony_ci	$LD	$tj,($sp)
273e1051a39Sopenharmony_ci
274e1051a39Sopenharmony_ci	$MULTU	($aj,$bi)
275e1051a39Sopenharmony_ci	$LD	$nj,($np)
276e1051a39Sopenharmony_ci	$LD	$nhi,$BNSZ($np)
277e1051a39Sopenharmony_ci	mflo	($lo0,$aj,$bi)
278e1051a39Sopenharmony_ci	mfhi	($hi0,$aj,$bi)
279e1051a39Sopenharmony_ci	$ADDU	$lo0,$tj
280e1051a39Sopenharmony_ci	$MULTU	($lo0,$n0)
281e1051a39Sopenharmony_ci	sltu	$at,$lo0,$tj
282e1051a39Sopenharmony_ci	$ADDU	$hi0,$at
283e1051a39Sopenharmony_ci	mflo	($m1,$lo0,$n0)
284e1051a39Sopenharmony_ci
285e1051a39Sopenharmony_ci	$MULTU	($ahi,$bi)
286e1051a39Sopenharmony_ci	mflo	($alo,$ahi,$bi)
287e1051a39Sopenharmony_ci	mfhi	($ahi,$ahi,$bi)
288e1051a39Sopenharmony_ci
289e1051a39Sopenharmony_ci	$MULTU	($nj,$m1)
290e1051a39Sopenharmony_ci	mflo	($lo1,$nj,$m1)
291e1051a39Sopenharmony_ci	mfhi	($hi1,$nj,$m1)
292e1051a39Sopenharmony_ci
293e1051a39Sopenharmony_ci	$MULTU	($nhi,$m1)
294e1051a39Sopenharmony_ci	$ADDU	$lo1,$lo0
295e1051a39Sopenharmony_ci	sltu	$at,$lo1,$lo0
296e1051a39Sopenharmony_ci	$ADDU	$hi1,$at
297e1051a39Sopenharmony_ci	mflo	($nlo,$nhi,$m1)
298e1051a39Sopenharmony_ci	mfhi	($nhi,$nhi,$m1)
299e1051a39Sopenharmony_ci
300e1051a39Sopenharmony_ci	move	$tp,$sp
301e1051a39Sopenharmony_ci	li	$j,2*$BNSZ
302e1051a39Sopenharmony_ci	$LD	$tj,$BNSZ($tp)
303e1051a39Sopenharmony_ci.align	4
304e1051a39Sopenharmony_ci.Linner:
305e1051a39Sopenharmony_ci	.set	noreorder
306e1051a39Sopenharmony_ci	$PTR_ADD $aj,$ap,$j
307e1051a39Sopenharmony_ci	$PTR_ADD $nj,$np,$j
308e1051a39Sopenharmony_ci	$LD	$aj,($aj)
309e1051a39Sopenharmony_ci	$LD	$nj,($nj)
310e1051a39Sopenharmony_ci
311e1051a39Sopenharmony_ci	$MULTU	($aj,$bi)
312e1051a39Sopenharmony_ci	$ADDU	$lo0,$alo,$hi0
313e1051a39Sopenharmony_ci	$ADDU	$lo1,$nlo,$hi1
314e1051a39Sopenharmony_ci	sltu	$at,$lo0,$hi0
315e1051a39Sopenharmony_ci	sltu	$t0,$lo1,$hi1
316e1051a39Sopenharmony_ci	$ADDU	$hi0,$ahi,$at
317e1051a39Sopenharmony_ci	$ADDU	$hi1,$nhi,$t0
318e1051a39Sopenharmony_ci	mflo	($alo,$aj,$bi)
319e1051a39Sopenharmony_ci	mfhi	($ahi,$aj,$bi)
320e1051a39Sopenharmony_ci
321e1051a39Sopenharmony_ci	$ADDU	$lo0,$tj
322e1051a39Sopenharmony_ci	addu	$j,$BNSZ
323e1051a39Sopenharmony_ci	$MULTU	($nj,$m1)
324e1051a39Sopenharmony_ci	sltu	$at,$lo0,$tj
325e1051a39Sopenharmony_ci	$ADDU	$lo1,$lo0
326e1051a39Sopenharmony_ci	$ADDU	$hi0,$at
327e1051a39Sopenharmony_ci	sltu	$t0,$lo1,$lo0
328e1051a39Sopenharmony_ci	$LD	$tj,2*$BNSZ($tp)
329e1051a39Sopenharmony_ci	$ADDU	$hi1,$t0
330e1051a39Sopenharmony_ci	sltu	$at,$j,$num
331e1051a39Sopenharmony_ci	mflo	($nlo,$nj,$m1)
332e1051a39Sopenharmony_ci	mfhi	($nhi,$nj,$m1)
333e1051a39Sopenharmony_ci	$ST	$lo1,($tp)
334e1051a39Sopenharmony_ci	bnez	$at,.Linner
335e1051a39Sopenharmony_ci	$PTR_ADD $tp,$BNSZ
336e1051a39Sopenharmony_ci	.set	reorder
337e1051a39Sopenharmony_ci
338e1051a39Sopenharmony_ci	$ADDU	$lo0,$alo,$hi0
339e1051a39Sopenharmony_ci	sltu	$at,$lo0,$hi0
340e1051a39Sopenharmony_ci	$ADDU	$hi0,$ahi,$at
341e1051a39Sopenharmony_ci	$ADDU	$lo0,$tj
342e1051a39Sopenharmony_ci	sltu	$t0,$lo0,$tj
343e1051a39Sopenharmony_ci	$ADDU	$hi0,$t0
344e1051a39Sopenharmony_ci
345e1051a39Sopenharmony_ci	$LD	$tj,2*$BNSZ($tp)
346e1051a39Sopenharmony_ci	$ADDU	$lo1,$nlo,$hi1
347e1051a39Sopenharmony_ci	sltu	$at,$lo1,$hi1
348e1051a39Sopenharmony_ci	$ADDU	$hi1,$nhi,$at
349e1051a39Sopenharmony_ci	$ADDU	$lo1,$lo0
350e1051a39Sopenharmony_ci	sltu	$t0,$lo1,$lo0
351e1051a39Sopenharmony_ci	$ADDU	$hi1,$t0
352e1051a39Sopenharmony_ci	$ST	$lo1,($tp)
353e1051a39Sopenharmony_ci
354e1051a39Sopenharmony_ci	$ADDU	$lo1,$hi1,$hi0
355e1051a39Sopenharmony_ci	sltu	$hi1,$lo1,$hi0
356e1051a39Sopenharmony_ci	$ADDU	$lo1,$tj
357e1051a39Sopenharmony_ci	sltu	$at,$lo1,$tj
358e1051a39Sopenharmony_ci	$ADDU	$hi1,$at
359e1051a39Sopenharmony_ci	$ST	$lo1,$BNSZ($tp)
360e1051a39Sopenharmony_ci	$ST	$hi1,2*$BNSZ($tp)
361e1051a39Sopenharmony_ci
362e1051a39Sopenharmony_ci	addu	$i,$BNSZ
363e1051a39Sopenharmony_ci	sltu	$t0,$i,$num
364e1051a39Sopenharmony_ci	bnez	$t0,.Louter
365e1051a39Sopenharmony_ci
366e1051a39Sopenharmony_ci	.set	noreorder
367e1051a39Sopenharmony_ci	$PTR_ADD $tj,$sp,$num	# &tp[num]
368e1051a39Sopenharmony_ci	move	$tp,$sp
369e1051a39Sopenharmony_ci	move	$ap,$sp
370e1051a39Sopenharmony_ci	li	$hi0,0		# clear borrow bit
371e1051a39Sopenharmony_ci
372e1051a39Sopenharmony_ci.align	4
373e1051a39Sopenharmony_ci.Lsub:	$LD	$lo0,($tp)
374e1051a39Sopenharmony_ci	$LD	$lo1,($np)
375e1051a39Sopenharmony_ci	$PTR_ADD $tp,$BNSZ
376e1051a39Sopenharmony_ci	$PTR_ADD $np,$BNSZ
377e1051a39Sopenharmony_ci	$SUBU	$lo1,$lo0,$lo1	# tp[i]-np[i]
378e1051a39Sopenharmony_ci	sgtu	$at,$lo1,$lo0
379e1051a39Sopenharmony_ci	$SUBU	$lo0,$lo1,$hi0
380e1051a39Sopenharmony_ci	sgtu	$hi0,$lo0,$lo1
381e1051a39Sopenharmony_ci	$ST	$lo0,($rp)
382e1051a39Sopenharmony_ci	or	$hi0,$at
383e1051a39Sopenharmony_ci	sltu	$at,$tp,$tj
384e1051a39Sopenharmony_ci	bnez	$at,.Lsub
385e1051a39Sopenharmony_ci	$PTR_ADD $rp,$BNSZ
386e1051a39Sopenharmony_ci
387e1051a39Sopenharmony_ci	$SUBU	$hi0,$hi1,$hi0	# handle upmost overflow bit
388e1051a39Sopenharmony_ci	move	$tp,$sp
389e1051a39Sopenharmony_ci	$PTR_SUB $rp,$num	# restore rp
390e1051a39Sopenharmony_ci	not	$hi1,$hi0
391e1051a39Sopenharmony_ci
392e1051a39Sopenharmony_ci.Lcopy:	$LD	$nj,($tp)	# conditional move
393e1051a39Sopenharmony_ci	$LD	$aj,($rp)
394e1051a39Sopenharmony_ci	$ST	$zero,($tp)
395e1051a39Sopenharmony_ci	$PTR_ADD $tp,$BNSZ
396e1051a39Sopenharmony_ci	and	$nj,$hi0
397e1051a39Sopenharmony_ci	and	$aj,$hi1
398e1051a39Sopenharmony_ci	or	$aj,$nj
399e1051a39Sopenharmony_ci	sltu	$at,$tp,$tj
400e1051a39Sopenharmony_ci	$ST	$aj,($rp)
401e1051a39Sopenharmony_ci	bnez	$at,.Lcopy
402e1051a39Sopenharmony_ci	$PTR_ADD $rp,$BNSZ
403e1051a39Sopenharmony_ci
404e1051a39Sopenharmony_ci	li	$a0,1
405e1051a39Sopenharmony_ci	li	$t0,1
406e1051a39Sopenharmony_ci
407e1051a39Sopenharmony_ci	.set	noreorder
408e1051a39Sopenharmony_ci	move	$sp,$fp
409e1051a39Sopenharmony_ci	$REG_L	$fp,($FRAMESIZE-1)*$SZREG($sp)
410e1051a39Sopenharmony_ci	$REG_L	$s11,($FRAMESIZE-2)*$SZREG($sp)
411e1051a39Sopenharmony_ci	$REG_L	$s10,($FRAMESIZE-3)*$SZREG($sp)
412e1051a39Sopenharmony_ci	$REG_L	$s9,($FRAMESIZE-4)*$SZREG($sp)
413e1051a39Sopenharmony_ci	$REG_L	$s8,($FRAMESIZE-5)*$SZREG($sp)
414e1051a39Sopenharmony_ci	$REG_L	$s7,($FRAMESIZE-6)*$SZREG($sp)
415e1051a39Sopenharmony_ci	$REG_L	$s6,($FRAMESIZE-7)*$SZREG($sp)
416e1051a39Sopenharmony_ci	$REG_L	$s5,($FRAMESIZE-8)*$SZREG($sp)
417e1051a39Sopenharmony_ci	$REG_L	$s4,($FRAMESIZE-9)*$SZREG($sp)
418e1051a39Sopenharmony_ci___
419e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i);
420e1051a39Sopenharmony_ci	$REG_L	$s3,($FRAMESIZE-10)*$SZREG($sp)
421e1051a39Sopenharmony_ci	$REG_L	$s2,($FRAMESIZE-11)*$SZREG($sp)
422e1051a39Sopenharmony_ci	$REG_L	$s1,($FRAMESIZE-12)*$SZREG($sp)
423e1051a39Sopenharmony_ci	$REG_L	$s0,($FRAMESIZE-13)*$SZREG($sp)
424e1051a39Sopenharmony_ci___
425e1051a39Sopenharmony_ci$code.=<<___;
426e1051a39Sopenharmony_ci	jr	$ra
427e1051a39Sopenharmony_ci	$PTR_ADD $sp,$FRAMESIZE*$SZREG
428e1051a39Sopenharmony_ci.end	bn_mul_mont_internal
429e1051a39Sopenharmony_ci.rdata
430e1051a39Sopenharmony_ci.asciiz	"Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
431e1051a39Sopenharmony_ci___
432e1051a39Sopenharmony_ci
433e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval $1/gem;
434e1051a39Sopenharmony_ci
435e1051a39Sopenharmony_ciprint $code;
436e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
437