1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci
10e1051a39Sopenharmony_ci# ====================================================================
11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/.
15e1051a39Sopenharmony_ci# ====================================================================
16e1051a39Sopenharmony_ci
17e1051a39Sopenharmony_ci# SHA2 block procedures for MIPS.
18e1051a39Sopenharmony_ci
19e1051a39Sopenharmony_ci# October 2010.
20e1051a39Sopenharmony_ci#
21e1051a39Sopenharmony_ci# SHA256 performance improvement on MIPS R5000 CPU is ~27% over gcc-
22e1051a39Sopenharmony_ci# generated code in o32 build and ~55% in n32/64 build. SHA512 [which
23e1051a39Sopenharmony_ci# for now can only be compiled for MIPS64 ISA] improvement is modest
24e1051a39Sopenharmony_ci# ~17%, but it comes for free, because it's same instruction sequence.
25e1051a39Sopenharmony_ci# Improvement coefficients are for aligned input.
26e1051a39Sopenharmony_ci
27e1051a39Sopenharmony_ci# September 2012.
28e1051a39Sopenharmony_ci#
29e1051a39Sopenharmony_ci# Add MIPS[32|64]R2 code (>25% less instructions).
30e1051a39Sopenharmony_ci
31e1051a39Sopenharmony_ci######################################################################
32e1051a39Sopenharmony_ci# There is a number of MIPS ABI in use, O32 and N32/64 are most
33e1051a39Sopenharmony_ci# widely used. Then there is a new contender: NUBI. It appears that if
34e1051a39Sopenharmony_ci# one picks the latter, it's possible to arrange code in ABI neutral
35e1051a39Sopenharmony_ci# manner. Therefore let's stick to NUBI register layout:
36e1051a39Sopenharmony_ci#
37e1051a39Sopenharmony_ci($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
38e1051a39Sopenharmony_ci($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
39e1051a39Sopenharmony_ci($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
40e1051a39Sopenharmony_ci($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
41e1051a39Sopenharmony_ci#
42e1051a39Sopenharmony_ci# The return value is placed in $a0. Following coding rules facilitate
43e1051a39Sopenharmony_ci# interoperability:
44e1051a39Sopenharmony_ci#
45e1051a39Sopenharmony_ci# - never ever touch $tp, "thread pointer", former $gp [o32 can be
46e1051a39Sopenharmony_ci#   excluded from the rule, because it's specified volatile];
47e1051a39Sopenharmony_ci# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
48e1051a39Sopenharmony_ci#   old code];
49e1051a39Sopenharmony_ci# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
50e1051a39Sopenharmony_ci#
51e1051a39Sopenharmony_ci# For reference here is register layout for N32/64 MIPS ABIs:
52e1051a39Sopenharmony_ci#
53e1051a39Sopenharmony_ci# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
54e1051a39Sopenharmony_ci# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
55e1051a39Sopenharmony_ci# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
56e1051a39Sopenharmony_ci# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
57e1051a39Sopenharmony_ci# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
58e1051a39Sopenharmony_ci#
59e1051a39Sopenharmony_ci# if $output doesn't have an extension, it's not an output file
60e1051a39Sopenharmony_ci# so use it for $flavour.
61e1051a39Sopenharmony_ci
62e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension)
63e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file
64e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
65e1051a39Sopenharmony_ci# supported flavours are o32,n32,64,nubi32,nubi64, default is o32
66e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : "o32";
67e1051a39Sopenharmony_ci
68e1051a39Sopenharmony_ciif ($flavour =~ /64|n32/i) {
69e1051a39Sopenharmony_ci	$PTR_LA="dla";
70e1051a39Sopenharmony_ci	$PTR_ADD="daddu";	# incidentally works even on n32
71e1051a39Sopenharmony_ci	$PTR_SUB="dsubu";	# incidentally works even on n32
72e1051a39Sopenharmony_ci	$REG_S="sd";
73e1051a39Sopenharmony_ci	$REG_L="ld";
74e1051a39Sopenharmony_ci	$PTR_SLL="dsll";	# incidentally works even on n32
75e1051a39Sopenharmony_ci	$SZREG=8;
76e1051a39Sopenharmony_ci} else {
77e1051a39Sopenharmony_ci	$PTR_LA="la";
78e1051a39Sopenharmony_ci	$PTR_ADD="addu";
79e1051a39Sopenharmony_ci	$PTR_SUB="subu";
80e1051a39Sopenharmony_ci	$REG_S="sw";
81e1051a39Sopenharmony_ci	$REG_L="lw";
82e1051a39Sopenharmony_ci	$PTR_SLL="sll";
83e1051a39Sopenharmony_ci	$SZREG=4;
84e1051a39Sopenharmony_ci}
85e1051a39Sopenharmony_ci$pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
86e1051a39Sopenharmony_ci#
87e1051a39Sopenharmony_ci# <appro@openssl.org>
88e1051a39Sopenharmony_ci#
89e1051a39Sopenharmony_ci######################################################################
90e1051a39Sopenharmony_ci
91e1051a39Sopenharmony_ci$big_endian=(`echo MIPSEB | $ENV{CC} -E -`=~/MIPSEB/)?0:1 if ($ENV{CC});
92e1051a39Sopenharmony_ci
93e1051a39Sopenharmony_ciopen STDOUT,">$output";
94e1051a39Sopenharmony_ci
95e1051a39Sopenharmony_ciif (!defined($big_endian)) { $big_endian=(unpack('L',pack('N',1))==1); }
96e1051a39Sopenharmony_ci
97e1051a39Sopenharmony_ciif ($output =~ /512/) {
98e1051a39Sopenharmony_ci	$label="512";
99e1051a39Sopenharmony_ci	$SZ=8;
100e1051a39Sopenharmony_ci	$LD="ld";		# load from memory
101e1051a39Sopenharmony_ci	$ST="sd";		# store to memory
102e1051a39Sopenharmony_ci	$SLL="dsll";		# shift left logical
103e1051a39Sopenharmony_ci	$SRL="dsrl";		# shift right logical
104e1051a39Sopenharmony_ci	$ADDU="daddu";
105e1051a39Sopenharmony_ci	$ROTR="drotr";
106e1051a39Sopenharmony_ci	@Sigma0=(28,34,39);
107e1051a39Sopenharmony_ci	@Sigma1=(14,18,41);
108e1051a39Sopenharmony_ci	@sigma0=( 7, 1, 8);	# right shift first
109e1051a39Sopenharmony_ci	@sigma1=( 6,19,61);	# right shift first
110e1051a39Sopenharmony_ci	$lastK=0x817;
111e1051a39Sopenharmony_ci	$rounds=80;
112e1051a39Sopenharmony_ci} else {
113e1051a39Sopenharmony_ci	$label="256";
114e1051a39Sopenharmony_ci	$SZ=4;
115e1051a39Sopenharmony_ci	$LD="lw";		# load from memory
116e1051a39Sopenharmony_ci	$ST="sw";		# store to memory
117e1051a39Sopenharmony_ci	$SLL="sll";		# shift left logical
118e1051a39Sopenharmony_ci	$SRL="srl";		# shift right logical
119e1051a39Sopenharmony_ci	$ADDU="addu";
120e1051a39Sopenharmony_ci	$ROTR="rotr";
121e1051a39Sopenharmony_ci	@Sigma0=( 2,13,22);
122e1051a39Sopenharmony_ci	@Sigma1=( 6,11,25);
123e1051a39Sopenharmony_ci	@sigma0=( 3, 7,18);	# right shift first
124e1051a39Sopenharmony_ci	@sigma1=(10,17,19);	# right shift first
125e1051a39Sopenharmony_ci	$lastK=0x8f2;
126e1051a39Sopenharmony_ci	$rounds=64;
127e1051a39Sopenharmony_ci}
128e1051a39Sopenharmony_ci
129e1051a39Sopenharmony_ci$MSB = $big_endian ? 0 : ($SZ-1);
130e1051a39Sopenharmony_ci$LSB = ($SZ-1)&~$MSB;
131e1051a39Sopenharmony_ci
132e1051a39Sopenharmony_ci@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("\$$_",(1,2,3,7,24,25,30,31));
133e1051a39Sopenharmony_ci@X=map("\$$_",(8..23));
134e1051a39Sopenharmony_ci
135e1051a39Sopenharmony_ci$ctx=$a0;
136e1051a39Sopenharmony_ci$inp=$a1;
137e1051a39Sopenharmony_ci$len=$a2;	$Ktbl=$len;
138e1051a39Sopenharmony_ci
139e1051a39Sopenharmony_cisub BODY_00_15 {
140e1051a39Sopenharmony_cimy ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
141e1051a39Sopenharmony_cimy ($T1,$tmp0,$tmp1,$tmp2)=(@X[4],@X[5],@X[6],@X[7]);
142e1051a39Sopenharmony_ci
143e1051a39Sopenharmony_ci$code.=<<___ if ($i<15);
144e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
145e1051a39Sopenharmony_ci	${LD}	@X[1],`($i+1)*$SZ`($inp)
146e1051a39Sopenharmony_ci#else
147e1051a39Sopenharmony_ci	${LD}l	@X[1],`($i+1)*$SZ+$MSB`($inp)
148e1051a39Sopenharmony_ci	${LD}r	@X[1],`($i+1)*$SZ+$LSB`($inp)
149e1051a39Sopenharmony_ci#endif
150e1051a39Sopenharmony_ci___
151e1051a39Sopenharmony_ci$code.=<<___	if (!$big_endian && $i<16 && $SZ==4);
152e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
153e1051a39Sopenharmony_ci	wsbh	@X[0],@X[0]		# byte swap($i)
154e1051a39Sopenharmony_ci	rotr	@X[0],@X[0],16
155e1051a39Sopenharmony_ci#else
156e1051a39Sopenharmony_ci	srl	$tmp0,@X[0],24		# byte swap($i)
157e1051a39Sopenharmony_ci	srl	$tmp1,@X[0],8
158e1051a39Sopenharmony_ci	andi	$tmp2,@X[0],0xFF00
159e1051a39Sopenharmony_ci	sll	@X[0],@X[0],24
160e1051a39Sopenharmony_ci	andi	$tmp1,0xFF00
161e1051a39Sopenharmony_ci	sll	$tmp2,$tmp2,8
162e1051a39Sopenharmony_ci	or	@X[0],$tmp0
163e1051a39Sopenharmony_ci	or	$tmp1,$tmp2
164e1051a39Sopenharmony_ci	or	@X[0],$tmp1
165e1051a39Sopenharmony_ci#endif
166e1051a39Sopenharmony_ci___
167e1051a39Sopenharmony_ci$code.=<<___	if (!$big_endian && $i<16 && $SZ==8);
168e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS64R2)
169e1051a39Sopenharmony_ci	dsbh	@X[0],@X[0]		# byte swap($i)
170e1051a39Sopenharmony_ci	dshd	@X[0],@X[0]
171e1051a39Sopenharmony_ci#else
172e1051a39Sopenharmony_ci	ori	$tmp0,$zero,0xFF
173e1051a39Sopenharmony_ci	dsll	$tmp2,$tmp0,32
174e1051a39Sopenharmony_ci	or	$tmp0,$tmp2		# 0x000000FF000000FF
175e1051a39Sopenharmony_ci	and	$tmp1,@X[0],$tmp0	# byte swap($i)
176e1051a39Sopenharmony_ci	dsrl	$tmp2,@X[0],24
177e1051a39Sopenharmony_ci	dsll	$tmp1,24
178e1051a39Sopenharmony_ci	and	$tmp2,$tmp0
179e1051a39Sopenharmony_ci	dsll	$tmp0,8			# 0x0000FF000000FF00
180e1051a39Sopenharmony_ci	or	$tmp1,$tmp2
181e1051a39Sopenharmony_ci	and	$tmp2,@X[0],$tmp0
182e1051a39Sopenharmony_ci	dsrl	@X[0],8
183e1051a39Sopenharmony_ci	dsll	$tmp2,8
184e1051a39Sopenharmony_ci	and	@X[0],$tmp0
185e1051a39Sopenharmony_ci	or	$tmp1,$tmp2
186e1051a39Sopenharmony_ci	or	@X[0],$tmp1
187e1051a39Sopenharmony_ci	dsrl	$tmp1,@X[0],32
188e1051a39Sopenharmony_ci	dsll	@X[0],32
189e1051a39Sopenharmony_ci	or	@X[0],$tmp1
190e1051a39Sopenharmony_ci#endif
191e1051a39Sopenharmony_ci___
192e1051a39Sopenharmony_ci$code.=<<___;
193e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
194e1051a39Sopenharmony_ci	xor	$tmp2,$f,$g			# $i
195e1051a39Sopenharmony_ci	$ROTR	$tmp0,$e,@Sigma1[0]
196e1051a39Sopenharmony_ci	$ADDU	$T1,$X[0],$h
197e1051a39Sopenharmony_ci	$ROTR	$tmp1,$e,@Sigma1[1]
198e1051a39Sopenharmony_ci	and	$tmp2,$e
199e1051a39Sopenharmony_ci	$ROTR	$h,$e,@Sigma1[2]
200e1051a39Sopenharmony_ci	xor	$tmp0,$tmp1
201e1051a39Sopenharmony_ci	$ROTR	$tmp1,$a,@Sigma0[0]
202e1051a39Sopenharmony_ci	xor	$tmp2,$g			# Ch(e,f,g)
203e1051a39Sopenharmony_ci	xor	$tmp0,$h			# Sigma1(e)
204e1051a39Sopenharmony_ci
205e1051a39Sopenharmony_ci	$ROTR	$h,$a,@Sigma0[1]
206e1051a39Sopenharmony_ci	$ADDU	$T1,$tmp2
207e1051a39Sopenharmony_ci	$LD	$tmp2,`$i*$SZ`($Ktbl)		# K[$i]
208e1051a39Sopenharmony_ci	xor	$h,$tmp1
209e1051a39Sopenharmony_ci	$ROTR	$tmp1,$a,@Sigma0[2]
210e1051a39Sopenharmony_ci	$ADDU	$T1,$tmp0
211e1051a39Sopenharmony_ci	and	$tmp0,$b,$c
212e1051a39Sopenharmony_ci	xor	$h,$tmp1			# Sigma0(a)
213e1051a39Sopenharmony_ci	xor	$tmp1,$b,$c
214e1051a39Sopenharmony_ci#else
215e1051a39Sopenharmony_ci	$ADDU	$T1,$X[0],$h			# $i
216e1051a39Sopenharmony_ci	$SRL	$h,$e,@Sigma1[0]
217e1051a39Sopenharmony_ci	xor	$tmp2,$f,$g
218e1051a39Sopenharmony_ci	$SLL	$tmp1,$e,`$SZ*8-@Sigma1[2]`
219e1051a39Sopenharmony_ci	and	$tmp2,$e
220e1051a39Sopenharmony_ci	$SRL	$tmp0,$e,@Sigma1[1]
221e1051a39Sopenharmony_ci	xor	$h,$tmp1
222e1051a39Sopenharmony_ci	$SLL	$tmp1,$e,`$SZ*8-@Sigma1[1]`
223e1051a39Sopenharmony_ci	xor	$h,$tmp0
224e1051a39Sopenharmony_ci	$SRL	$tmp0,$e,@Sigma1[2]
225e1051a39Sopenharmony_ci	xor	$h,$tmp1
226e1051a39Sopenharmony_ci	$SLL	$tmp1,$e,`$SZ*8-@Sigma1[0]`
227e1051a39Sopenharmony_ci	xor	$h,$tmp0
228e1051a39Sopenharmony_ci	xor	$tmp2,$g			# Ch(e,f,g)
229e1051a39Sopenharmony_ci	xor	$tmp0,$tmp1,$h			# Sigma1(e)
230e1051a39Sopenharmony_ci
231e1051a39Sopenharmony_ci	$SRL	$h,$a,@Sigma0[0]
232e1051a39Sopenharmony_ci	$ADDU	$T1,$tmp2
233e1051a39Sopenharmony_ci	$LD	$tmp2,`$i*$SZ`($Ktbl)		# K[$i]
234e1051a39Sopenharmony_ci	$SLL	$tmp1,$a,`$SZ*8-@Sigma0[2]`
235e1051a39Sopenharmony_ci	$ADDU	$T1,$tmp0
236e1051a39Sopenharmony_ci	$SRL	$tmp0,$a,@Sigma0[1]
237e1051a39Sopenharmony_ci	xor	$h,$tmp1
238e1051a39Sopenharmony_ci	$SLL	$tmp1,$a,`$SZ*8-@Sigma0[1]`
239e1051a39Sopenharmony_ci	xor	$h,$tmp0
240e1051a39Sopenharmony_ci	$SRL	$tmp0,$a,@Sigma0[2]
241e1051a39Sopenharmony_ci	xor	$h,$tmp1
242e1051a39Sopenharmony_ci	$SLL	$tmp1,$a,`$SZ*8-@Sigma0[0]`
243e1051a39Sopenharmony_ci	xor	$h,$tmp0
244e1051a39Sopenharmony_ci	and	$tmp0,$b,$c
245e1051a39Sopenharmony_ci	xor	$h,$tmp1			# Sigma0(a)
246e1051a39Sopenharmony_ci	xor	$tmp1,$b,$c
247e1051a39Sopenharmony_ci#endif
248e1051a39Sopenharmony_ci	$ST	@X[0],`($i%16)*$SZ`($sp)	# offload to ring buffer
249e1051a39Sopenharmony_ci	$ADDU	$h,$tmp0
250e1051a39Sopenharmony_ci	and	$tmp1,$a
251e1051a39Sopenharmony_ci	$ADDU	$T1,$tmp2			# +=K[$i]
252e1051a39Sopenharmony_ci	$ADDU	$h,$tmp1			# +=Maj(a,b,c)
253e1051a39Sopenharmony_ci	$ADDU	$d,$T1
254e1051a39Sopenharmony_ci	$ADDU	$h,$T1
255e1051a39Sopenharmony_ci___
256e1051a39Sopenharmony_ci$code.=<<___ if ($i>=13);
257e1051a39Sopenharmony_ci	$LD	@X[3],`(($i+3)%16)*$SZ`($sp)	# prefetch from ring buffer
258e1051a39Sopenharmony_ci___
259e1051a39Sopenharmony_ci}
260e1051a39Sopenharmony_ci
261e1051a39Sopenharmony_cisub BODY_16_XX {
262e1051a39Sopenharmony_cimy $i=@_[0];
263e1051a39Sopenharmony_cimy ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]);
264e1051a39Sopenharmony_ci
265e1051a39Sopenharmony_ci$code.=<<___;
266e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
267e1051a39Sopenharmony_ci	$SRL	$tmp2,@X[1],@sigma0[0]		# Xupdate($i)
268e1051a39Sopenharmony_ci	$ROTR	$tmp0,@X[1],@sigma0[1]
269e1051a39Sopenharmony_ci	$ADDU	@X[0],@X[9]			# +=X[i+9]
270e1051a39Sopenharmony_ci	xor	$tmp2,$tmp0
271e1051a39Sopenharmony_ci	$ROTR	$tmp0,@X[1],@sigma0[2]
272e1051a39Sopenharmony_ci
273e1051a39Sopenharmony_ci	$SRL	$tmp3,@X[14],@sigma1[0]
274e1051a39Sopenharmony_ci	$ROTR	$tmp1,@X[14],@sigma1[1]
275e1051a39Sopenharmony_ci	xor	$tmp2,$tmp0			# sigma0(X[i+1])
276e1051a39Sopenharmony_ci	$ROTR	$tmp0,@X[14],@sigma1[2]
277e1051a39Sopenharmony_ci	xor	$tmp3,$tmp1
278e1051a39Sopenharmony_ci	$ADDU	@X[0],$tmp2
279e1051a39Sopenharmony_ci#else
280e1051a39Sopenharmony_ci	$SRL	$tmp2,@X[1],@sigma0[0]		# Xupdate($i)
281e1051a39Sopenharmony_ci	$ADDU	@X[0],@X[9]			# +=X[i+9]
282e1051a39Sopenharmony_ci	$SLL	$tmp1,@X[1],`$SZ*8-@sigma0[2]`
283e1051a39Sopenharmony_ci	$SRL	$tmp0,@X[1],@sigma0[1]
284e1051a39Sopenharmony_ci	xor	$tmp2,$tmp1
285e1051a39Sopenharmony_ci	$SLL	$tmp1,`@sigma0[2]-@sigma0[1]`
286e1051a39Sopenharmony_ci	xor	$tmp2,$tmp0
287e1051a39Sopenharmony_ci	$SRL	$tmp0,@X[1],@sigma0[2]
288e1051a39Sopenharmony_ci	xor	$tmp2,$tmp1
289e1051a39Sopenharmony_ci
290e1051a39Sopenharmony_ci	$SRL	$tmp3,@X[14],@sigma1[0]
291e1051a39Sopenharmony_ci	xor	$tmp2,$tmp0			# sigma0(X[i+1])
292e1051a39Sopenharmony_ci	$SLL	$tmp1,@X[14],`$SZ*8-@sigma1[2]`
293e1051a39Sopenharmony_ci	$ADDU	@X[0],$tmp2
294e1051a39Sopenharmony_ci	$SRL	$tmp0,@X[14],@sigma1[1]
295e1051a39Sopenharmony_ci	xor	$tmp3,$tmp1
296e1051a39Sopenharmony_ci	$SLL	$tmp1,`@sigma1[2]-@sigma1[1]`
297e1051a39Sopenharmony_ci	xor	$tmp3,$tmp0
298e1051a39Sopenharmony_ci	$SRL	$tmp0,@X[14],@sigma1[2]
299e1051a39Sopenharmony_ci	xor	$tmp3,$tmp1
300e1051a39Sopenharmony_ci#endif
301e1051a39Sopenharmony_ci	xor	$tmp3,$tmp0			# sigma1(X[i+14])
302e1051a39Sopenharmony_ci	$ADDU	@X[0],$tmp3
303e1051a39Sopenharmony_ci___
304e1051a39Sopenharmony_ci	&BODY_00_15(@_);
305e1051a39Sopenharmony_ci}
306e1051a39Sopenharmony_ci
307e1051a39Sopenharmony_ci$FRAMESIZE=16*$SZ+16*$SZREG;
308e1051a39Sopenharmony_ci$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
309e1051a39Sopenharmony_ci
310e1051a39Sopenharmony_ci$code.=<<___;
311e1051a39Sopenharmony_ci#include "mips_arch.h"
312e1051a39Sopenharmony_ci
313e1051a39Sopenharmony_ci.text
314e1051a39Sopenharmony_ci.set	noat
315e1051a39Sopenharmony_ci#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
316e1051a39Sopenharmony_ci.option	pic2
317e1051a39Sopenharmony_ci#endif
318e1051a39Sopenharmony_ci
319e1051a39Sopenharmony_ci.align	5
320e1051a39Sopenharmony_ci.globl	sha${label}_block_data_order
321e1051a39Sopenharmony_ci.ent	sha${label}_block_data_order
322e1051a39Sopenharmony_cisha${label}_block_data_order:
323e1051a39Sopenharmony_ci	.frame	$sp,$FRAMESIZE,$ra
324e1051a39Sopenharmony_ci	.mask	$SAVED_REGS_MASK,-$SZREG
325e1051a39Sopenharmony_ci	.set	noreorder
326e1051a39Sopenharmony_ci___
327e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
328e1051a39Sopenharmony_ci	.cpload	$pf
329e1051a39Sopenharmony_ci___
330e1051a39Sopenharmony_ci$code.=<<___;
331e1051a39Sopenharmony_ci	$PTR_SUB $sp,$FRAMESIZE
332e1051a39Sopenharmony_ci	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
333e1051a39Sopenharmony_ci	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
334e1051a39Sopenharmony_ci	$REG_S	$s11,$FRAMESIZE-3*$SZREG($sp)
335e1051a39Sopenharmony_ci	$REG_S	$s10,$FRAMESIZE-4*$SZREG($sp)
336e1051a39Sopenharmony_ci	$REG_S	$s9,$FRAMESIZE-5*$SZREG($sp)
337e1051a39Sopenharmony_ci	$REG_S	$s8,$FRAMESIZE-6*$SZREG($sp)
338e1051a39Sopenharmony_ci	$REG_S	$s7,$FRAMESIZE-7*$SZREG($sp)
339e1051a39Sopenharmony_ci	$REG_S	$s6,$FRAMESIZE-8*$SZREG($sp)
340e1051a39Sopenharmony_ci	$REG_S	$s5,$FRAMESIZE-9*$SZREG($sp)
341e1051a39Sopenharmony_ci	$REG_S	$s4,$FRAMESIZE-10*$SZREG($sp)
342e1051a39Sopenharmony_ci___
343e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
344e1051a39Sopenharmony_ci	$REG_S	$s3,$FRAMESIZE-11*$SZREG($sp)
345e1051a39Sopenharmony_ci	$REG_S	$s2,$FRAMESIZE-12*$SZREG($sp)
346e1051a39Sopenharmony_ci	$REG_S	$s1,$FRAMESIZE-13*$SZREG($sp)
347e1051a39Sopenharmony_ci	$REG_S	$s0,$FRAMESIZE-14*$SZREG($sp)
348e1051a39Sopenharmony_ci	$REG_S	$gp,$FRAMESIZE-15*$SZREG($sp)
349e1051a39Sopenharmony_ci___
350e1051a39Sopenharmony_ci$code.=<<___;
351e1051a39Sopenharmony_ci	$PTR_SLL @X[15],$len,`log(16*$SZ)/log(2)`
352e1051a39Sopenharmony_ci___
353e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
354e1051a39Sopenharmony_ci	.cplocal	$Ktbl
355e1051a39Sopenharmony_ci	.cpsetup	$pf,$zero,sha${label}_block_data_order
356e1051a39Sopenharmony_ci___
357e1051a39Sopenharmony_ci$code.=<<___;
358e1051a39Sopenharmony_ci	.set	reorder
359e1051a39Sopenharmony_ci	$PTR_LA	$Ktbl,K${label}		# PIC-ified 'load address'
360e1051a39Sopenharmony_ci
361e1051a39Sopenharmony_ci	$LD	$A,0*$SZ($ctx)		# load context
362e1051a39Sopenharmony_ci	$LD	$B,1*$SZ($ctx)
363e1051a39Sopenharmony_ci	$LD	$C,2*$SZ($ctx)
364e1051a39Sopenharmony_ci	$LD	$D,3*$SZ($ctx)
365e1051a39Sopenharmony_ci	$LD	$E,4*$SZ($ctx)
366e1051a39Sopenharmony_ci	$LD	$F,5*$SZ($ctx)
367e1051a39Sopenharmony_ci	$LD	$G,6*$SZ($ctx)
368e1051a39Sopenharmony_ci	$LD	$H,7*$SZ($ctx)
369e1051a39Sopenharmony_ci
370e1051a39Sopenharmony_ci	$PTR_ADD @X[15],$inp		# pointer to the end of input
371e1051a39Sopenharmony_ci	$REG_S	@X[15],16*$SZ($sp)
372e1051a39Sopenharmony_ci	b	.Loop
373e1051a39Sopenharmony_ci
374e1051a39Sopenharmony_ci.align	5
375e1051a39Sopenharmony_ci.Loop:
376e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
377e1051a39Sopenharmony_ci	${LD}	@X[0],($inp)
378e1051a39Sopenharmony_ci#else
379e1051a39Sopenharmony_ci	${LD}l	@X[0],$MSB($inp)
380e1051a39Sopenharmony_ci	${LD}r	@X[0],$LSB($inp)
381e1051a39Sopenharmony_ci#endif
382e1051a39Sopenharmony_ci___
383e1051a39Sopenharmony_cifor ($i=0;$i<16;$i++)
384e1051a39Sopenharmony_ci{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); }
385e1051a39Sopenharmony_ci$code.=<<___;
386e1051a39Sopenharmony_ci	b	.L16_xx
387e1051a39Sopenharmony_ci.align	4
388e1051a39Sopenharmony_ci.L16_xx:
389e1051a39Sopenharmony_ci___
390e1051a39Sopenharmony_cifor (;$i<32;$i++)
391e1051a39Sopenharmony_ci{ &BODY_16_XX($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); }
392e1051a39Sopenharmony_ci$code.=<<___;
393e1051a39Sopenharmony_ci	and	@X[6],0xfff
394e1051a39Sopenharmony_ci	li	@X[7],$lastK
395e1051a39Sopenharmony_ci	.set	noreorder
396e1051a39Sopenharmony_ci	bne	@X[6],@X[7],.L16_xx
397e1051a39Sopenharmony_ci	$PTR_ADD $Ktbl,16*$SZ		# Ktbl+=16
398e1051a39Sopenharmony_ci
399e1051a39Sopenharmony_ci	$REG_L	@X[15],16*$SZ($sp)	# restore pointer to the end of input
400e1051a39Sopenharmony_ci	$LD	@X[0],0*$SZ($ctx)
401e1051a39Sopenharmony_ci	$LD	@X[1],1*$SZ($ctx)
402e1051a39Sopenharmony_ci	$LD	@X[2],2*$SZ($ctx)
403e1051a39Sopenharmony_ci	$PTR_ADD $inp,16*$SZ
404e1051a39Sopenharmony_ci	$LD	@X[3],3*$SZ($ctx)
405e1051a39Sopenharmony_ci	$ADDU	$A,@X[0]
406e1051a39Sopenharmony_ci	$LD	@X[4],4*$SZ($ctx)
407e1051a39Sopenharmony_ci	$ADDU	$B,@X[1]
408e1051a39Sopenharmony_ci	$LD	@X[5],5*$SZ($ctx)
409e1051a39Sopenharmony_ci	$ADDU	$C,@X[2]
410e1051a39Sopenharmony_ci	$LD	@X[6],6*$SZ($ctx)
411e1051a39Sopenharmony_ci	$ADDU	$D,@X[3]
412e1051a39Sopenharmony_ci	$LD	@X[7],7*$SZ($ctx)
413e1051a39Sopenharmony_ci	$ADDU	$E,@X[4]
414e1051a39Sopenharmony_ci	$ST	$A,0*$SZ($ctx)
415e1051a39Sopenharmony_ci	$ADDU	$F,@X[5]
416e1051a39Sopenharmony_ci	$ST	$B,1*$SZ($ctx)
417e1051a39Sopenharmony_ci	$ADDU	$G,@X[6]
418e1051a39Sopenharmony_ci	$ST	$C,2*$SZ($ctx)
419e1051a39Sopenharmony_ci	$ADDU	$H,@X[7]
420e1051a39Sopenharmony_ci	$ST	$D,3*$SZ($ctx)
421e1051a39Sopenharmony_ci	$ST	$E,4*$SZ($ctx)
422e1051a39Sopenharmony_ci	$ST	$F,5*$SZ($ctx)
423e1051a39Sopenharmony_ci	$ST	$G,6*$SZ($ctx)
424e1051a39Sopenharmony_ci	$ST	$H,7*$SZ($ctx)
425e1051a39Sopenharmony_ci
426e1051a39Sopenharmony_ci	bne	$inp,@X[15],.Loop
427e1051a39Sopenharmony_ci	$PTR_SUB $Ktbl,`($rounds-16)*$SZ`	# rewind $Ktbl
428e1051a39Sopenharmony_ci
429e1051a39Sopenharmony_ci	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
430e1051a39Sopenharmony_ci	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
431e1051a39Sopenharmony_ci	$REG_L	$s11,$FRAMESIZE-3*$SZREG($sp)
432e1051a39Sopenharmony_ci	$REG_L	$s10,$FRAMESIZE-4*$SZREG($sp)
433e1051a39Sopenharmony_ci	$REG_L	$s9,$FRAMESIZE-5*$SZREG($sp)
434e1051a39Sopenharmony_ci	$REG_L	$s8,$FRAMESIZE-6*$SZREG($sp)
435e1051a39Sopenharmony_ci	$REG_L	$s7,$FRAMESIZE-7*$SZREG($sp)
436e1051a39Sopenharmony_ci	$REG_L	$s6,$FRAMESIZE-8*$SZREG($sp)
437e1051a39Sopenharmony_ci	$REG_L	$s5,$FRAMESIZE-9*$SZREG($sp)
438e1051a39Sopenharmony_ci	$REG_L	$s4,$FRAMESIZE-10*$SZREG($sp)
439e1051a39Sopenharmony_ci___
440e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i);
441e1051a39Sopenharmony_ci	$REG_L	$s3,$FRAMESIZE-11*$SZREG($sp)
442e1051a39Sopenharmony_ci	$REG_L	$s2,$FRAMESIZE-12*$SZREG($sp)
443e1051a39Sopenharmony_ci	$REG_L	$s1,$FRAMESIZE-13*$SZREG($sp)
444e1051a39Sopenharmony_ci	$REG_L	$s0,$FRAMESIZE-14*$SZREG($sp)
445e1051a39Sopenharmony_ci	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
446e1051a39Sopenharmony_ci___
447e1051a39Sopenharmony_ci$code.=<<___;
448e1051a39Sopenharmony_ci	jr	$ra
449e1051a39Sopenharmony_ci	$PTR_ADD $sp,$FRAMESIZE
450e1051a39Sopenharmony_ci.end	sha${label}_block_data_order
451e1051a39Sopenharmony_ci
452e1051a39Sopenharmony_ci.rdata
453e1051a39Sopenharmony_ci.align	5
454e1051a39Sopenharmony_ciK${label}:
455e1051a39Sopenharmony_ci___
456e1051a39Sopenharmony_ciif ($SZ==4) {
457e1051a39Sopenharmony_ci$code.=<<___;
458e1051a39Sopenharmony_ci	.word	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
459e1051a39Sopenharmony_ci	.word	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
460e1051a39Sopenharmony_ci	.word	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
461e1051a39Sopenharmony_ci	.word	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
462e1051a39Sopenharmony_ci	.word	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
463e1051a39Sopenharmony_ci	.word	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
464e1051a39Sopenharmony_ci	.word	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
465e1051a39Sopenharmony_ci	.word	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
466e1051a39Sopenharmony_ci	.word	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
467e1051a39Sopenharmony_ci	.word	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
468e1051a39Sopenharmony_ci	.word	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
469e1051a39Sopenharmony_ci	.word	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
470e1051a39Sopenharmony_ci	.word	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
471e1051a39Sopenharmony_ci	.word	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
472e1051a39Sopenharmony_ci	.word	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
473e1051a39Sopenharmony_ci	.word	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
474e1051a39Sopenharmony_ci___
475e1051a39Sopenharmony_ci} else {
476e1051a39Sopenharmony_ci$code.=<<___;
477e1051a39Sopenharmony_ci	.dword	0x428a2f98d728ae22, 0x7137449123ef65cd
478e1051a39Sopenharmony_ci	.dword	0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
479e1051a39Sopenharmony_ci	.dword	0x3956c25bf348b538, 0x59f111f1b605d019
480e1051a39Sopenharmony_ci	.dword	0x923f82a4af194f9b, 0xab1c5ed5da6d8118
481e1051a39Sopenharmony_ci	.dword	0xd807aa98a3030242, 0x12835b0145706fbe
482e1051a39Sopenharmony_ci	.dword	0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
483e1051a39Sopenharmony_ci	.dword	0x72be5d74f27b896f, 0x80deb1fe3b1696b1
484e1051a39Sopenharmony_ci	.dword	0x9bdc06a725c71235, 0xc19bf174cf692694
485e1051a39Sopenharmony_ci	.dword	0xe49b69c19ef14ad2, 0xefbe4786384f25e3
486e1051a39Sopenharmony_ci	.dword	0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
487e1051a39Sopenharmony_ci	.dword	0x2de92c6f592b0275, 0x4a7484aa6ea6e483
488e1051a39Sopenharmony_ci	.dword	0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
489e1051a39Sopenharmony_ci	.dword	0x983e5152ee66dfab, 0xa831c66d2db43210
490e1051a39Sopenharmony_ci	.dword	0xb00327c898fb213f, 0xbf597fc7beef0ee4
491e1051a39Sopenharmony_ci	.dword	0xc6e00bf33da88fc2, 0xd5a79147930aa725
492e1051a39Sopenharmony_ci	.dword	0x06ca6351e003826f, 0x142929670a0e6e70
493e1051a39Sopenharmony_ci	.dword	0x27b70a8546d22ffc, 0x2e1b21385c26c926
494e1051a39Sopenharmony_ci	.dword	0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
495e1051a39Sopenharmony_ci	.dword	0x650a73548baf63de, 0x766a0abb3c77b2a8
496e1051a39Sopenharmony_ci	.dword	0x81c2c92e47edaee6, 0x92722c851482353b
497e1051a39Sopenharmony_ci	.dword	0xa2bfe8a14cf10364, 0xa81a664bbc423001
498e1051a39Sopenharmony_ci	.dword	0xc24b8b70d0f89791, 0xc76c51a30654be30
499e1051a39Sopenharmony_ci	.dword	0xd192e819d6ef5218, 0xd69906245565a910
500e1051a39Sopenharmony_ci	.dword	0xf40e35855771202a, 0x106aa07032bbd1b8
501e1051a39Sopenharmony_ci	.dword	0x19a4c116b8d2d0c8, 0x1e376c085141ab53
502e1051a39Sopenharmony_ci	.dword	0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
503e1051a39Sopenharmony_ci	.dword	0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb
504e1051a39Sopenharmony_ci	.dword	0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
505e1051a39Sopenharmony_ci	.dword	0x748f82ee5defb2fc, 0x78a5636f43172f60
506e1051a39Sopenharmony_ci	.dword	0x84c87814a1f0ab72, 0x8cc702081a6439ec
507e1051a39Sopenharmony_ci	.dword	0x90befffa23631e28, 0xa4506cebde82bde9
508e1051a39Sopenharmony_ci	.dword	0xbef9a3f7b2c67915, 0xc67178f2e372532b
509e1051a39Sopenharmony_ci	.dword	0xca273eceea26619c, 0xd186b8c721c0c207
510e1051a39Sopenharmony_ci	.dword	0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
511e1051a39Sopenharmony_ci	.dword	0x06f067aa72176fba, 0x0a637dc5a2c898a6
512e1051a39Sopenharmony_ci	.dword	0x113f9804bef90dae, 0x1b710b35131c471b
513e1051a39Sopenharmony_ci	.dword	0x28db77f523047d84, 0x32caab7b40c72493
514e1051a39Sopenharmony_ci	.dword	0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
515e1051a39Sopenharmony_ci	.dword	0x4cc5d4becb3e42b6, 0x597f299cfc657e2a
516e1051a39Sopenharmony_ci	.dword	0x5fcb6fab3ad6faec, 0x6c44198c4a475817
517e1051a39Sopenharmony_ci___
518e1051a39Sopenharmony_ci}
519e1051a39Sopenharmony_ci$code.=<<___;
520e1051a39Sopenharmony_ci.asciiz	"SHA${label} for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
521e1051a39Sopenharmony_ci.align	5
522e1051a39Sopenharmony_ci
523e1051a39Sopenharmony_ci___
524e1051a39Sopenharmony_ci
525e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval $1/gem;
526e1051a39Sopenharmony_ciprint $code;
527e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
528