1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci
10e1051a39Sopenharmony_ci# ====================================================================
11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/.
15e1051a39Sopenharmony_ci# ====================================================================
16e1051a39Sopenharmony_ci
17e1051a39Sopenharmony_ci# SHA1 block procedure for MIPS.
18e1051a39Sopenharmony_ci
19e1051a39Sopenharmony_ci# Performance improvement is 30% on unaligned input. The "secret" is
20e1051a39Sopenharmony_ci# to deploy lwl/lwr pair to load unaligned input. One could have
21e1051a39Sopenharmony_ci# vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32-
22e1051a39Sopenharmony_ci# compatible subroutine. There is room for minor optimization on
23e1051a39Sopenharmony_ci# little-endian platforms...
24e1051a39Sopenharmony_ci
25e1051a39Sopenharmony_ci# September 2012.
26e1051a39Sopenharmony_ci#
27e1051a39Sopenharmony_ci# Add MIPS32r2 code (>25% less instructions).
28e1051a39Sopenharmony_ci
29e1051a39Sopenharmony_ci######################################################################
30e1051a39Sopenharmony_ci# There is a number of MIPS ABI in use, O32 and N32/64 are most
31e1051a39Sopenharmony_ci# widely used. Then there is a new contender: NUBI. It appears that if
32e1051a39Sopenharmony_ci# one picks the latter, it's possible to arrange code in ABI neutral
33e1051a39Sopenharmony_ci# manner. Therefore let's stick to NUBI register layout:
34e1051a39Sopenharmony_ci#
35e1051a39Sopenharmony_ci($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
36e1051a39Sopenharmony_ci($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
37e1051a39Sopenharmony_ci($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
38e1051a39Sopenharmony_ci($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
39e1051a39Sopenharmony_ci#
40e1051a39Sopenharmony_ci# The return value is placed in $a0. Following coding rules facilitate
41e1051a39Sopenharmony_ci# interoperability:
42e1051a39Sopenharmony_ci#
43e1051a39Sopenharmony_ci# - never ever touch $tp, "thread pointer", former $gp;
44e1051a39Sopenharmony_ci# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
45e1051a39Sopenharmony_ci#   old code];
46e1051a39Sopenharmony_ci# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
47e1051a39Sopenharmony_ci#
48e1051a39Sopenharmony_ci# For reference here is register layout for N32/64 MIPS ABIs:
49e1051a39Sopenharmony_ci#
50e1051a39Sopenharmony_ci# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
51e1051a39Sopenharmony_ci# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
52e1051a39Sopenharmony_ci# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
53e1051a39Sopenharmony_ci# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
54e1051a39Sopenharmony_ci# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
55e1051a39Sopenharmony_ci
56e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension)
57e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file
58e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
59e1051a39Sopenharmony_ci# supported flavours are o32,n32,64,nubi32,nubi64, default is o32
60e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : "o32";
61e1051a39Sopenharmony_ci
62e1051a39Sopenharmony_ciif ($flavour =~ /64|n32/i) {
63e1051a39Sopenharmony_ci	$PTR_ADD="daddu";	# incidentally works even on n32
64e1051a39Sopenharmony_ci	$PTR_SUB="dsubu";	# incidentally works even on n32
65e1051a39Sopenharmony_ci	$REG_S="sd";
66e1051a39Sopenharmony_ci	$REG_L="ld";
67e1051a39Sopenharmony_ci	$PTR_SLL="dsll";	# incidentally works even on n32
68e1051a39Sopenharmony_ci	$SZREG=8;
69e1051a39Sopenharmony_ci} else {
70e1051a39Sopenharmony_ci	$PTR_ADD="addu";
71e1051a39Sopenharmony_ci	$PTR_SUB="subu";
72e1051a39Sopenharmony_ci	$REG_S="sw";
73e1051a39Sopenharmony_ci	$REG_L="lw";
74e1051a39Sopenharmony_ci	$PTR_SLL="sll";
75e1051a39Sopenharmony_ci	$SZREG=4;
76e1051a39Sopenharmony_ci}
77e1051a39Sopenharmony_ci#
78e1051a39Sopenharmony_ci# <appro@openssl.org>
79e1051a39Sopenharmony_ci#
80e1051a39Sopenharmony_ci######################################################################
81e1051a39Sopenharmony_ci
82e1051a39Sopenharmony_ci$big_endian=(`echo MIPSEB | $ENV{CC} -E -`=~/MIPSEB/)?0:1 if ($ENV{CC});
83e1051a39Sopenharmony_ci
84e1051a39Sopenharmony_ci$output and open STDOUT,">$output";
85e1051a39Sopenharmony_ci
86e1051a39Sopenharmony_ciif (!defined($big_endian))
87e1051a39Sopenharmony_ci            {   $big_endian=(unpack('L',pack('N',1))==1);   }
88e1051a39Sopenharmony_ci
89e1051a39Sopenharmony_ci# offsets of the Most and Least Significant Bytes
90e1051a39Sopenharmony_ci$MSB=$big_endian?0:3;
91e1051a39Sopenharmony_ci$LSB=3&~$MSB;
92e1051a39Sopenharmony_ci
93e1051a39Sopenharmony_ci@X=map("\$$_",(8..23));	# a4-a7,s0-s11
94e1051a39Sopenharmony_ci
95e1051a39Sopenharmony_ci$ctx=$a0;
96e1051a39Sopenharmony_ci$inp=$a1;
97e1051a39Sopenharmony_ci$num=$a2;
98e1051a39Sopenharmony_ci$A="\$1";
99e1051a39Sopenharmony_ci$B="\$2";
100e1051a39Sopenharmony_ci$C="\$3";
101e1051a39Sopenharmony_ci$D="\$7";
102e1051a39Sopenharmony_ci$E="\$24";	@V=($A,$B,$C,$D,$E);
103e1051a39Sopenharmony_ci$t0="\$25";
104e1051a39Sopenharmony_ci$t1=$num;	# $num is offloaded to stack
105e1051a39Sopenharmony_ci$t2="\$30";	# fp
106e1051a39Sopenharmony_ci$K="\$31";	# ra
107e1051a39Sopenharmony_ci
108e1051a39Sopenharmony_cisub BODY_00_14 {
109e1051a39Sopenharmony_cimy ($i,$a,$b,$c,$d,$e)=@_;
110e1051a39Sopenharmony_cimy $j=$i+1;
111e1051a39Sopenharmony_ci$code.=<<___	if (!$big_endian);
112e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
113e1051a39Sopenharmony_ci	wsbh	@X[$i],@X[$i]	# byte swap($i)
114e1051a39Sopenharmony_ci	rotr	@X[$i],@X[$i],16
115e1051a39Sopenharmony_ci#else
116e1051a39Sopenharmony_ci	srl	$t0,@X[$i],24	# byte swap($i)
117e1051a39Sopenharmony_ci	srl	$t1,@X[$i],8
118e1051a39Sopenharmony_ci	andi	$t2,@X[$i],0xFF00
119e1051a39Sopenharmony_ci	sll	@X[$i],@X[$i],24
120e1051a39Sopenharmony_ci	andi	$t1,0xFF00
121e1051a39Sopenharmony_ci	sll	$t2,$t2,8
122e1051a39Sopenharmony_ci	or	@X[$i],$t0
123e1051a39Sopenharmony_ci	or	$t1,$t2
124e1051a39Sopenharmony_ci	or	@X[$i],$t1
125e1051a39Sopenharmony_ci#endif
126e1051a39Sopenharmony_ci___
127e1051a39Sopenharmony_ci$code.=<<___;
128e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
129e1051a39Sopenharmony_ci	addu	$e,$K		# $i
130e1051a39Sopenharmony_ci	xor	$t0,$c,$d
131e1051a39Sopenharmony_ci	rotr	$t1,$a,27
132e1051a39Sopenharmony_ci	and	$t0,$b
133e1051a39Sopenharmony_ci	addu	$e,$t1
134e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
135e1051a39Sopenharmony_ci	 lw	@X[$j],$j*4($inp)
136e1051a39Sopenharmony_ci#else
137e1051a39Sopenharmony_ci	 lwl	@X[$j],$j*4+$MSB($inp)
138e1051a39Sopenharmony_ci	 lwr	@X[$j],$j*4+$LSB($inp)
139e1051a39Sopenharmony_ci#endif
140e1051a39Sopenharmony_ci	xor	$t0,$d
141e1051a39Sopenharmony_ci	addu	$e,@X[$i]
142e1051a39Sopenharmony_ci	rotr	$b,$b,2
143e1051a39Sopenharmony_ci	addu	$e,$t0
144e1051a39Sopenharmony_ci#else
145e1051a39Sopenharmony_ci	 lwl	@X[$j],$j*4+$MSB($inp)
146e1051a39Sopenharmony_ci	sll	$t0,$a,5	# $i
147e1051a39Sopenharmony_ci	addu	$e,$K
148e1051a39Sopenharmony_ci	 lwr	@X[$j],$j*4+$LSB($inp)
149e1051a39Sopenharmony_ci	srl	$t1,$a,27
150e1051a39Sopenharmony_ci	addu	$e,$t0
151e1051a39Sopenharmony_ci	xor	$t0,$c,$d
152e1051a39Sopenharmony_ci	addu	$e,$t1
153e1051a39Sopenharmony_ci	sll	$t2,$b,30
154e1051a39Sopenharmony_ci	and	$t0,$b
155e1051a39Sopenharmony_ci	srl	$b,$b,2
156e1051a39Sopenharmony_ci	xor	$t0,$d
157e1051a39Sopenharmony_ci	addu	$e,@X[$i]
158e1051a39Sopenharmony_ci	or	$b,$t2
159e1051a39Sopenharmony_ci	addu	$e,$t0
160e1051a39Sopenharmony_ci#endif
161e1051a39Sopenharmony_ci___
162e1051a39Sopenharmony_ci}
163e1051a39Sopenharmony_ci
164e1051a39Sopenharmony_cisub BODY_15_19 {
165e1051a39Sopenharmony_cimy ($i,$a,$b,$c,$d,$e)=@_;
166e1051a39Sopenharmony_cimy $j=$i+1;
167e1051a39Sopenharmony_ci
168e1051a39Sopenharmony_ci$code.=<<___	if (!$big_endian && $i==15);
169e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
170e1051a39Sopenharmony_ci	wsbh	@X[$i],@X[$i]	# byte swap($i)
171e1051a39Sopenharmony_ci	rotr	@X[$i],@X[$i],16
172e1051a39Sopenharmony_ci#else
173e1051a39Sopenharmony_ci	srl	$t0,@X[$i],24	# byte swap($i)
174e1051a39Sopenharmony_ci	srl	$t1,@X[$i],8
175e1051a39Sopenharmony_ci	andi	$t2,@X[$i],0xFF00
176e1051a39Sopenharmony_ci	sll	@X[$i],@X[$i],24
177e1051a39Sopenharmony_ci	andi	$t1,0xFF00
178e1051a39Sopenharmony_ci	sll	$t2,$t2,8
179e1051a39Sopenharmony_ci	or	@X[$i],$t0
180e1051a39Sopenharmony_ci	or	@X[$i],$t1
181e1051a39Sopenharmony_ci	or	@X[$i],$t2
182e1051a39Sopenharmony_ci#endif
183e1051a39Sopenharmony_ci___
184e1051a39Sopenharmony_ci$code.=<<___;
185e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
186e1051a39Sopenharmony_ci	addu	$e,$K		# $i
187e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+2)%16]
188e1051a39Sopenharmony_ci	xor	$t0,$c,$d
189e1051a39Sopenharmony_ci	rotr	$t1,$a,27
190e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+8)%16]
191e1051a39Sopenharmony_ci	and	$t0,$b
192e1051a39Sopenharmony_ci	addu	$e,$t1
193e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+13)%16]
194e1051a39Sopenharmony_ci	xor	$t0,$d
195e1051a39Sopenharmony_ci	addu	$e,@X[$i%16]
196e1051a39Sopenharmony_ci	 rotr	@X[$j%16],@X[$j%16],31
197e1051a39Sopenharmony_ci	rotr	$b,$b,2
198e1051a39Sopenharmony_ci	addu	$e,$t0
199e1051a39Sopenharmony_ci#else
200e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+2)%16]
201e1051a39Sopenharmony_ci	sll	$t0,$a,5	# $i
202e1051a39Sopenharmony_ci	addu	$e,$K
203e1051a39Sopenharmony_ci	srl	$t1,$a,27
204e1051a39Sopenharmony_ci	addu	$e,$t0
205e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+8)%16]
206e1051a39Sopenharmony_ci	xor	$t0,$c,$d
207e1051a39Sopenharmony_ci	addu	$e,$t1
208e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+13)%16]
209e1051a39Sopenharmony_ci	sll	$t2,$b,30
210e1051a39Sopenharmony_ci	and	$t0,$b
211e1051a39Sopenharmony_ci	 srl	$t1,@X[$j%16],31
212e1051a39Sopenharmony_ci	 addu	@X[$j%16],@X[$j%16]
213e1051a39Sopenharmony_ci	srl	$b,$b,2
214e1051a39Sopenharmony_ci	xor	$t0,$d
215e1051a39Sopenharmony_ci	 or	@X[$j%16],$t1
216e1051a39Sopenharmony_ci	addu	$e,@X[$i%16]
217e1051a39Sopenharmony_ci	or	$b,$t2
218e1051a39Sopenharmony_ci	addu	$e,$t0
219e1051a39Sopenharmony_ci#endif
220e1051a39Sopenharmony_ci___
221e1051a39Sopenharmony_ci}
222e1051a39Sopenharmony_ci
223e1051a39Sopenharmony_cisub BODY_20_39 {
224e1051a39Sopenharmony_cimy ($i,$a,$b,$c,$d,$e)=@_;
225e1051a39Sopenharmony_cimy $j=$i+1;
226e1051a39Sopenharmony_ci$code.=<<___ if ($i<79);
227e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
228e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+2)%16]
229e1051a39Sopenharmony_ci	addu	$e,$K		# $i
230e1051a39Sopenharmony_ci	rotr	$t1,$a,27
231e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+8)%16]
232e1051a39Sopenharmony_ci	xor	$t0,$c,$d
233e1051a39Sopenharmony_ci	addu	$e,$t1
234e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+13)%16]
235e1051a39Sopenharmony_ci	xor	$t0,$b
236e1051a39Sopenharmony_ci	addu	$e,@X[$i%16]
237e1051a39Sopenharmony_ci	 rotr	@X[$j%16],@X[$j%16],31
238e1051a39Sopenharmony_ci	rotr	$b,$b,2
239e1051a39Sopenharmony_ci	addu	$e,$t0
240e1051a39Sopenharmony_ci#else
241e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+2)%16]
242e1051a39Sopenharmony_ci	sll	$t0,$a,5	# $i
243e1051a39Sopenharmony_ci	addu	$e,$K
244e1051a39Sopenharmony_ci	srl	$t1,$a,27
245e1051a39Sopenharmony_ci	addu	$e,$t0
246e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+8)%16]
247e1051a39Sopenharmony_ci	xor	$t0,$c,$d
248e1051a39Sopenharmony_ci	addu	$e,$t1
249e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+13)%16]
250e1051a39Sopenharmony_ci	sll	$t2,$b,30
251e1051a39Sopenharmony_ci	xor	$t0,$b
252e1051a39Sopenharmony_ci	 srl	$t1,@X[$j%16],31
253e1051a39Sopenharmony_ci	 addu	@X[$j%16],@X[$j%16]
254e1051a39Sopenharmony_ci	srl	$b,$b,2
255e1051a39Sopenharmony_ci	addu	$e,@X[$i%16]
256e1051a39Sopenharmony_ci	 or	@X[$j%16],$t1
257e1051a39Sopenharmony_ci	or	$b,$t2
258e1051a39Sopenharmony_ci	addu	$e,$t0
259e1051a39Sopenharmony_ci#endif
260e1051a39Sopenharmony_ci___
261e1051a39Sopenharmony_ci$code.=<<___ if ($i==79);
262e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
263e1051a39Sopenharmony_ci	 lw	@X[0],0($ctx)
264e1051a39Sopenharmony_ci	addu	$e,$K		# $i
265e1051a39Sopenharmony_ci	 lw	@X[1],4($ctx)
266e1051a39Sopenharmony_ci	rotr	$t1,$a,27
267e1051a39Sopenharmony_ci	 lw	@X[2],8($ctx)
268e1051a39Sopenharmony_ci	xor	$t0,$c,$d
269e1051a39Sopenharmony_ci	addu	$e,$t1
270e1051a39Sopenharmony_ci	 lw	@X[3],12($ctx)
271e1051a39Sopenharmony_ci	xor	$t0,$b
272e1051a39Sopenharmony_ci	addu	$e,@X[$i%16]
273e1051a39Sopenharmony_ci	 lw	@X[4],16($ctx)
274e1051a39Sopenharmony_ci	rotr	$b,$b,2
275e1051a39Sopenharmony_ci	addu	$e,$t0
276e1051a39Sopenharmony_ci#else
277e1051a39Sopenharmony_ci	 lw	@X[0],0($ctx)
278e1051a39Sopenharmony_ci	sll	$t0,$a,5	# $i
279e1051a39Sopenharmony_ci	addu	$e,$K
280e1051a39Sopenharmony_ci	 lw	@X[1],4($ctx)
281e1051a39Sopenharmony_ci	srl	$t1,$a,27
282e1051a39Sopenharmony_ci	addu	$e,$t0
283e1051a39Sopenharmony_ci	 lw	@X[2],8($ctx)
284e1051a39Sopenharmony_ci	xor	$t0,$c,$d
285e1051a39Sopenharmony_ci	addu	$e,$t1
286e1051a39Sopenharmony_ci	 lw	@X[3],12($ctx)
287e1051a39Sopenharmony_ci	sll	$t2,$b,30
288e1051a39Sopenharmony_ci	xor	$t0,$b
289e1051a39Sopenharmony_ci	 lw	@X[4],16($ctx)
290e1051a39Sopenharmony_ci	srl	$b,$b,2
291e1051a39Sopenharmony_ci	addu	$e,@X[$i%16]
292e1051a39Sopenharmony_ci	or	$b,$t2
293e1051a39Sopenharmony_ci	addu	$e,$t0
294e1051a39Sopenharmony_ci#endif
295e1051a39Sopenharmony_ci___
296e1051a39Sopenharmony_ci}
297e1051a39Sopenharmony_ci
298e1051a39Sopenharmony_cisub BODY_40_59 {
299e1051a39Sopenharmony_cimy ($i,$a,$b,$c,$d,$e)=@_;
300e1051a39Sopenharmony_cimy $j=$i+1;
301e1051a39Sopenharmony_ci$code.=<<___ if ($i<79);
302e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
303e1051a39Sopenharmony_ci	addu	$e,$K		# $i
304e1051a39Sopenharmony_ci	and	$t0,$c,$d
305e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+2)%16]
306e1051a39Sopenharmony_ci	rotr	$t1,$a,27
307e1051a39Sopenharmony_ci	addu	$e,$t0
308e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+8)%16]
309e1051a39Sopenharmony_ci	xor	$t0,$c,$d
310e1051a39Sopenharmony_ci	addu	$e,$t1
311e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+13)%16]
312e1051a39Sopenharmony_ci	and	$t0,$b
313e1051a39Sopenharmony_ci	addu	$e,@X[$i%16]
314e1051a39Sopenharmony_ci	 rotr	@X[$j%16],@X[$j%16],31
315e1051a39Sopenharmony_ci	rotr	$b,$b,2
316e1051a39Sopenharmony_ci	addu	$e,$t0
317e1051a39Sopenharmony_ci#else
318e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+2)%16]
319e1051a39Sopenharmony_ci	sll	$t0,$a,5	# $i
320e1051a39Sopenharmony_ci	addu	$e,$K
321e1051a39Sopenharmony_ci	srl	$t1,$a,27
322e1051a39Sopenharmony_ci	addu	$e,$t0
323e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+8)%16]
324e1051a39Sopenharmony_ci	and	$t0,$c,$d
325e1051a39Sopenharmony_ci	addu	$e,$t1
326e1051a39Sopenharmony_ci	 xor	@X[$j%16],@X[($j+13)%16]
327e1051a39Sopenharmony_ci	sll	$t2,$b,30
328e1051a39Sopenharmony_ci	addu	$e,$t0
329e1051a39Sopenharmony_ci	 srl	$t1,@X[$j%16],31
330e1051a39Sopenharmony_ci	xor	$t0,$c,$d
331e1051a39Sopenharmony_ci	 addu	@X[$j%16],@X[$j%16]
332e1051a39Sopenharmony_ci	and	$t0,$b
333e1051a39Sopenharmony_ci	srl	$b,$b,2
334e1051a39Sopenharmony_ci	 or	@X[$j%16],$t1
335e1051a39Sopenharmony_ci	addu	$e,@X[$i%16]
336e1051a39Sopenharmony_ci	or	$b,$t2
337e1051a39Sopenharmony_ci	addu	$e,$t0
338e1051a39Sopenharmony_ci#endif
339e1051a39Sopenharmony_ci___
340e1051a39Sopenharmony_ci}
341e1051a39Sopenharmony_ci
342e1051a39Sopenharmony_ci$FRAMESIZE=16;	# large enough to accommodate NUBI saved registers
343e1051a39Sopenharmony_ci$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
344e1051a39Sopenharmony_ci
345e1051a39Sopenharmony_ci$code=<<___;
346e1051a39Sopenharmony_ci#include "mips_arch.h"
347e1051a39Sopenharmony_ci
348e1051a39Sopenharmony_ci.text
349e1051a39Sopenharmony_ci
350e1051a39Sopenharmony_ci.set	noat
351e1051a39Sopenharmony_ci.set	noreorder
352e1051a39Sopenharmony_ci.align	5
353e1051a39Sopenharmony_ci.globl	sha1_block_data_order
354e1051a39Sopenharmony_ci.ent	sha1_block_data_order
355e1051a39Sopenharmony_cisha1_block_data_order:
356e1051a39Sopenharmony_ci	.frame	$sp,$FRAMESIZE*$SZREG,$ra
357e1051a39Sopenharmony_ci	.mask	$SAVED_REGS_MASK,-$SZREG
358e1051a39Sopenharmony_ci	.set	noreorder
359e1051a39Sopenharmony_ci	$PTR_SUB $sp,$FRAMESIZE*$SZREG
360e1051a39Sopenharmony_ci	$REG_S	$ra,($FRAMESIZE-1)*$SZREG($sp)
361e1051a39Sopenharmony_ci	$REG_S	$fp,($FRAMESIZE-2)*$SZREG($sp)
362e1051a39Sopenharmony_ci	$REG_S	$s11,($FRAMESIZE-3)*$SZREG($sp)
363e1051a39Sopenharmony_ci	$REG_S	$s10,($FRAMESIZE-4)*$SZREG($sp)
364e1051a39Sopenharmony_ci	$REG_S	$s9,($FRAMESIZE-5)*$SZREG($sp)
365e1051a39Sopenharmony_ci	$REG_S	$s8,($FRAMESIZE-6)*$SZREG($sp)
366e1051a39Sopenharmony_ci	$REG_S	$s7,($FRAMESIZE-7)*$SZREG($sp)
367e1051a39Sopenharmony_ci	$REG_S	$s6,($FRAMESIZE-8)*$SZREG($sp)
368e1051a39Sopenharmony_ci	$REG_S	$s5,($FRAMESIZE-9)*$SZREG($sp)
369e1051a39Sopenharmony_ci	$REG_S	$s4,($FRAMESIZE-10)*$SZREG($sp)
370e1051a39Sopenharmony_ci___
371e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
372e1051a39Sopenharmony_ci	$REG_S	$s3,($FRAMESIZE-11)*$SZREG($sp)
373e1051a39Sopenharmony_ci	$REG_S	$s2,($FRAMESIZE-12)*$SZREG($sp)
374e1051a39Sopenharmony_ci	$REG_S	$s1,($FRAMESIZE-13)*$SZREG($sp)
375e1051a39Sopenharmony_ci	$REG_S	$s0,($FRAMESIZE-14)*$SZREG($sp)
376e1051a39Sopenharmony_ci	$REG_S	$gp,($FRAMESIZE-15)*$SZREG($sp)
377e1051a39Sopenharmony_ci___
378e1051a39Sopenharmony_ci$code.=<<___;
379e1051a39Sopenharmony_ci	$PTR_SLL $num,6
380e1051a39Sopenharmony_ci	$PTR_ADD $num,$inp
381e1051a39Sopenharmony_ci	$REG_S	$num,0($sp)
382e1051a39Sopenharmony_ci	lw	$A,0($ctx)
383e1051a39Sopenharmony_ci	lw	$B,4($ctx)
384e1051a39Sopenharmony_ci	lw	$C,8($ctx)
385e1051a39Sopenharmony_ci	lw	$D,12($ctx)
386e1051a39Sopenharmony_ci	b	.Loop
387e1051a39Sopenharmony_ci	lw	$E,16($ctx)
388e1051a39Sopenharmony_ci.align	4
389e1051a39Sopenharmony_ci.Loop:
390e1051a39Sopenharmony_ci	.set	reorder
391e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
392e1051a39Sopenharmony_ci	lui	$K,0x5a82
393e1051a39Sopenharmony_ci	lw	@X[0],($inp)
394e1051a39Sopenharmony_ci	ori	$K,0x7999	# K_00_19
395e1051a39Sopenharmony_ci#else
396e1051a39Sopenharmony_ci	lwl	@X[0],$MSB($inp)
397e1051a39Sopenharmony_ci	lui	$K,0x5a82
398e1051a39Sopenharmony_ci	lwr	@X[0],$LSB($inp)
399e1051a39Sopenharmony_ci	ori	$K,0x7999	# K_00_19
400e1051a39Sopenharmony_ci#endif
401e1051a39Sopenharmony_ci___
402e1051a39Sopenharmony_cifor ($i=0;$i<15;$i++)	{ &BODY_00_14($i,@V); unshift(@V,pop(@V)); }
403e1051a39Sopenharmony_cifor (;$i<20;$i++)	{ &BODY_15_19($i,@V); unshift(@V,pop(@V)); }
404e1051a39Sopenharmony_ci$code.=<<___;
405e1051a39Sopenharmony_ci	lui	$K,0x6ed9
406e1051a39Sopenharmony_ci	ori	$K,0xeba1	# K_20_39
407e1051a39Sopenharmony_ci___
408e1051a39Sopenharmony_cifor (;$i<40;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
409e1051a39Sopenharmony_ci$code.=<<___;
410e1051a39Sopenharmony_ci	lui	$K,0x8f1b
411e1051a39Sopenharmony_ci	ori	$K,0xbcdc	# K_40_59
412e1051a39Sopenharmony_ci___
413e1051a39Sopenharmony_cifor (;$i<60;$i++)	{ &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
414e1051a39Sopenharmony_ci$code.=<<___;
415e1051a39Sopenharmony_ci	lui	$K,0xca62
416e1051a39Sopenharmony_ci	ori	$K,0xc1d6	# K_60_79
417e1051a39Sopenharmony_ci___
418e1051a39Sopenharmony_cifor (;$i<80;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
419e1051a39Sopenharmony_ci$code.=<<___;
420e1051a39Sopenharmony_ci	$PTR_ADD $inp,64
421e1051a39Sopenharmony_ci	$REG_L	$num,0($sp)
422e1051a39Sopenharmony_ci
423e1051a39Sopenharmony_ci	addu	$A,$X[0]
424e1051a39Sopenharmony_ci	addu	$B,$X[1]
425e1051a39Sopenharmony_ci	sw	$A,0($ctx)
426e1051a39Sopenharmony_ci	addu	$C,$X[2]
427e1051a39Sopenharmony_ci	addu	$D,$X[3]
428e1051a39Sopenharmony_ci	sw	$B,4($ctx)
429e1051a39Sopenharmony_ci	addu	$E,$X[4]
430e1051a39Sopenharmony_ci	sw	$C,8($ctx)
431e1051a39Sopenharmony_ci	sw	$D,12($ctx)
432e1051a39Sopenharmony_ci	sw	$E,16($ctx)
433e1051a39Sopenharmony_ci	.set	noreorder
434e1051a39Sopenharmony_ci	bne	$inp,$num,.Loop
435e1051a39Sopenharmony_ci	nop
436e1051a39Sopenharmony_ci
437e1051a39Sopenharmony_ci	.set	noreorder
438e1051a39Sopenharmony_ci	$REG_L	$ra,($FRAMESIZE-1)*$SZREG($sp)
439e1051a39Sopenharmony_ci	$REG_L	$fp,($FRAMESIZE-2)*$SZREG($sp)
440e1051a39Sopenharmony_ci	$REG_L	$s11,($FRAMESIZE-3)*$SZREG($sp)
441e1051a39Sopenharmony_ci	$REG_L	$s10,($FRAMESIZE-4)*$SZREG($sp)
442e1051a39Sopenharmony_ci	$REG_L	$s9,($FRAMESIZE-5)*$SZREG($sp)
443e1051a39Sopenharmony_ci	$REG_L	$s8,($FRAMESIZE-6)*$SZREG($sp)
444e1051a39Sopenharmony_ci	$REG_L	$s7,($FRAMESIZE-7)*$SZREG($sp)
445e1051a39Sopenharmony_ci	$REG_L	$s6,($FRAMESIZE-8)*$SZREG($sp)
446e1051a39Sopenharmony_ci	$REG_L	$s5,($FRAMESIZE-9)*$SZREG($sp)
447e1051a39Sopenharmony_ci	$REG_L	$s4,($FRAMESIZE-10)*$SZREG($sp)
448e1051a39Sopenharmony_ci___
449e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i);
450e1051a39Sopenharmony_ci	$REG_L	$s3,($FRAMESIZE-11)*$SZREG($sp)
451e1051a39Sopenharmony_ci	$REG_L	$s2,($FRAMESIZE-12)*$SZREG($sp)
452e1051a39Sopenharmony_ci	$REG_L	$s1,($FRAMESIZE-13)*$SZREG($sp)
453e1051a39Sopenharmony_ci	$REG_L	$s0,($FRAMESIZE-14)*$SZREG($sp)
454e1051a39Sopenharmony_ci	$REG_L	$gp,($FRAMESIZE-15)*$SZREG($sp)
455e1051a39Sopenharmony_ci___
456e1051a39Sopenharmony_ci$code.=<<___;
457e1051a39Sopenharmony_ci	jr	$ra
458e1051a39Sopenharmony_ci	$PTR_ADD $sp,$FRAMESIZE*$SZREG
459e1051a39Sopenharmony_ci.end	sha1_block_data_order
460e1051a39Sopenharmony_ci.rdata
461e1051a39Sopenharmony_ci.asciiz	"SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
462e1051a39Sopenharmony_ci___
463e1051a39Sopenharmony_ciprint $code;
464e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
465