1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci
10e1051a39Sopenharmony_ci# ====================================================================
11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/.
15e1051a39Sopenharmony_ci# ====================================================================
16e1051a39Sopenharmony_ci
17e1051a39Sopenharmony_ci# SHA256/512 block procedure for PA-RISC.
18e1051a39Sopenharmony_ci
19e1051a39Sopenharmony_ci# June 2009.
20e1051a39Sopenharmony_ci#
21e1051a39Sopenharmony_ci# SHA256 performance is >75% better than gcc 3.2 generated code on
22e1051a39Sopenharmony_ci# PA-7100LC. Compared to code generated by vendor compiler this
23e1051a39Sopenharmony_ci# implementation is almost 70% faster in 64-bit build, but delivers
24e1051a39Sopenharmony_ci# virtually same performance in 32-bit build on PA-8600.
25e1051a39Sopenharmony_ci#
26e1051a39Sopenharmony_ci# SHA512 performance is >2.9x better than gcc 3.2 generated code on
27e1051a39Sopenharmony_ci# PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the
28e1051a39Sopenharmony_ci# code is executed on PA-RISC 2.0 processor and switches to 64-bit
29e1051a39Sopenharmony_ci# code path delivering adequate performance even in "blended" 32-bit
30e1051a39Sopenharmony_ci# build. Though 64-bit code is not any faster than code generated by
31e1051a39Sopenharmony_ci# vendor compiler on PA-8600...
32e1051a39Sopenharmony_ci#
33e1051a39Sopenharmony_ci# Special thanks to polarhome.com for providing HP-UX account.
34e1051a39Sopenharmony_ci
35e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension)
36e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file
37e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
38e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
39e1051a39Sopenharmony_ci
40e1051a39Sopenharmony_ci$output and open STDOUT,">$output";
41e1051a39Sopenharmony_ci
42e1051a39Sopenharmony_ciif ($flavour =~ /64/) {
43e1051a39Sopenharmony_ci	$LEVEL		="2.0W";
44e1051a39Sopenharmony_ci	$SIZE_T		=8;
45e1051a39Sopenharmony_ci	$FRAME_MARKER	=80;
46e1051a39Sopenharmony_ci	$SAVED_RP	=16;
47e1051a39Sopenharmony_ci	$PUSH		="std";
48e1051a39Sopenharmony_ci	$PUSHMA		="std,ma";
49e1051a39Sopenharmony_ci	$POP		="ldd";
50e1051a39Sopenharmony_ci	$POPMB		="ldd,mb";
51e1051a39Sopenharmony_ci} else {
52e1051a39Sopenharmony_ci	$LEVEL		="1.0";
53e1051a39Sopenharmony_ci	$SIZE_T		=4;
54e1051a39Sopenharmony_ci	$FRAME_MARKER	=48;
55e1051a39Sopenharmony_ci	$SAVED_RP	=20;
56e1051a39Sopenharmony_ci	$PUSH		="stw";
57e1051a39Sopenharmony_ci	$PUSHMA		="stwm";
58e1051a39Sopenharmony_ci	$POP		="ldw";
59e1051a39Sopenharmony_ci	$POPMB		="ldwm";
60e1051a39Sopenharmony_ci}
61e1051a39Sopenharmony_ci
62e1051a39Sopenharmony_ciif ($output =~ /512/) {
63e1051a39Sopenharmony_ci	$func="sha512_block_data_order";
64e1051a39Sopenharmony_ci	$SZ=8;
65e1051a39Sopenharmony_ci	@Sigma0=(28,34,39);
66e1051a39Sopenharmony_ci	@Sigma1=(14,18,41);
67e1051a39Sopenharmony_ci	@sigma0=(1,  8, 7);
68e1051a39Sopenharmony_ci	@sigma1=(19,61, 6);
69e1051a39Sopenharmony_ci	$rounds=80;
70e1051a39Sopenharmony_ci	$LAST10BITS=0x017;
71e1051a39Sopenharmony_ci	$LD="ldd";
72e1051a39Sopenharmony_ci	$LDM="ldd,ma";
73e1051a39Sopenharmony_ci	$ST="std";
74e1051a39Sopenharmony_ci} else {
75e1051a39Sopenharmony_ci	$func="sha256_block_data_order";
76e1051a39Sopenharmony_ci	$SZ=4;
77e1051a39Sopenharmony_ci	@Sigma0=( 2,13,22);
78e1051a39Sopenharmony_ci	@Sigma1=( 6,11,25);
79e1051a39Sopenharmony_ci	@sigma0=( 7,18, 3);
80e1051a39Sopenharmony_ci	@sigma1=(17,19,10);
81e1051a39Sopenharmony_ci	$rounds=64;
82e1051a39Sopenharmony_ci	$LAST10BITS=0x0f2;
83e1051a39Sopenharmony_ci	$LD="ldw";
84e1051a39Sopenharmony_ci	$LDM="ldwm";
85e1051a39Sopenharmony_ci	$ST="stw";
86e1051a39Sopenharmony_ci}
87e1051a39Sopenharmony_ci
88e1051a39Sopenharmony_ci$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker
89e1051a39Sopenharmony_ci				#                 [+ argument transfer]
90e1051a39Sopenharmony_ci$XOFF=16*$SZ+32;		# local variables
91e1051a39Sopenharmony_ci$FRAME+=$XOFF;
92e1051a39Sopenharmony_ci$XOFF+=$FRAME_MARKER;		# distance between %sp and local variables
93e1051a39Sopenharmony_ci
94e1051a39Sopenharmony_ci$ctx="%r26";	# zapped by $a0
95e1051a39Sopenharmony_ci$inp="%r25";	# zapped by $a1
96e1051a39Sopenharmony_ci$num="%r24";	# zapped by $t0
97e1051a39Sopenharmony_ci
98e1051a39Sopenharmony_ci$a0 ="%r26";
99e1051a39Sopenharmony_ci$a1 ="%r25";
100e1051a39Sopenharmony_ci$t0 ="%r24";
101e1051a39Sopenharmony_ci$t1 ="%r29";
102e1051a39Sopenharmony_ci$Tbl="%r31";
103e1051a39Sopenharmony_ci
104e1051a39Sopenharmony_ci@V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28");
105e1051a39Sopenharmony_ci
106e1051a39Sopenharmony_ci@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
107e1051a39Sopenharmony_ci    "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp);
108e1051a39Sopenharmony_ci
109e1051a39Sopenharmony_cisub ROUND_00_15 {
110e1051a39Sopenharmony_cimy ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
111e1051a39Sopenharmony_ci$code.=<<___;
112e1051a39Sopenharmony_ci	_ror	$e,$Sigma1[0],$a0
113e1051a39Sopenharmony_ci	and	$f,$e,$t0
114e1051a39Sopenharmony_ci	_ror	$e,$Sigma1[1],$a1
115e1051a39Sopenharmony_ci	addl	$t1,$h,$h
116e1051a39Sopenharmony_ci	andcm	$g,$e,$t1
117e1051a39Sopenharmony_ci	xor	$a1,$a0,$a0
118e1051a39Sopenharmony_ci	_ror	$a1,`$Sigma1[2]-$Sigma1[1]`,$a1
119e1051a39Sopenharmony_ci	or	$t0,$t1,$t1		; Ch(e,f,g)
120e1051a39Sopenharmony_ci	addl	@X[$i%16],$h,$h
121e1051a39Sopenharmony_ci	xor	$a0,$a1,$a1		; Sigma1(e)
122e1051a39Sopenharmony_ci	addl	$t1,$h,$h
123e1051a39Sopenharmony_ci	_ror	$a,$Sigma0[0],$a0
124e1051a39Sopenharmony_ci	addl	$a1,$h,$h
125e1051a39Sopenharmony_ci
126e1051a39Sopenharmony_ci	_ror	$a,$Sigma0[1],$a1
127e1051a39Sopenharmony_ci	and	$a,$b,$t0
128e1051a39Sopenharmony_ci	and	$a,$c,$t1
129e1051a39Sopenharmony_ci	xor	$a1,$a0,$a0
130e1051a39Sopenharmony_ci	_ror	$a1,`$Sigma0[2]-$Sigma0[1]`,$a1
131e1051a39Sopenharmony_ci	xor	$t1,$t0,$t0
132e1051a39Sopenharmony_ci	and	$b,$c,$t1
133e1051a39Sopenharmony_ci	xor	$a0,$a1,$a1		; Sigma0(a)
134e1051a39Sopenharmony_ci	addl	$h,$d,$d
135e1051a39Sopenharmony_ci	xor	$t1,$t0,$t0		; Maj(a,b,c)
136e1051a39Sopenharmony_ci	`"$LDM	$SZ($Tbl),$t1" if ($i<15)`
137e1051a39Sopenharmony_ci	addl	$a1,$h,$h
138e1051a39Sopenharmony_ci	addl	$t0,$h,$h
139e1051a39Sopenharmony_ci
140e1051a39Sopenharmony_ci___
141e1051a39Sopenharmony_ci}
142e1051a39Sopenharmony_ci
143e1051a39Sopenharmony_cisub ROUND_16_xx {
144e1051a39Sopenharmony_cimy ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
145e1051a39Sopenharmony_ci$i-=16;
146e1051a39Sopenharmony_ci$code.=<<___;
147e1051a39Sopenharmony_ci	_ror	@X[($i+1)%16],$sigma0[0],$a0
148e1051a39Sopenharmony_ci	_ror	@X[($i+1)%16],$sigma0[1],$a1
149e1051a39Sopenharmony_ci	addl	@X[($i+9)%16],@X[$i],@X[$i]
150e1051a39Sopenharmony_ci	_ror	@X[($i+14)%16],$sigma1[0],$t0
151e1051a39Sopenharmony_ci	_ror	@X[($i+14)%16],$sigma1[1],$t1
152e1051a39Sopenharmony_ci	xor	$a1,$a0,$a0
153e1051a39Sopenharmony_ci	_shr	@X[($i+1)%16],$sigma0[2],$a1
154e1051a39Sopenharmony_ci	xor	$t1,$t0,$t0
155e1051a39Sopenharmony_ci	_shr	@X[($i+14)%16],$sigma1[2],$t1
156e1051a39Sopenharmony_ci	xor	$a1,$a0,$a0		; sigma0(X[(i+1)&0x0f])
157e1051a39Sopenharmony_ci	xor	$t1,$t0,$t0		; sigma1(X[(i+14)&0x0f])
158e1051a39Sopenharmony_ci	$LDM	$SZ($Tbl),$t1
159e1051a39Sopenharmony_ci	addl	$a0,@X[$i],@X[$i]
160e1051a39Sopenharmony_ci	addl	$t0,@X[$i],@X[$i]
161e1051a39Sopenharmony_ci___
162e1051a39Sopenharmony_ci$code.=<<___ if ($i==15);
163e1051a39Sopenharmony_ci	extru	$t1,31,10,$a1
164e1051a39Sopenharmony_ci	comiclr,<> $LAST10BITS,$a1,%r0
165e1051a39Sopenharmony_ci	ldo	1($Tbl),$Tbl		; signal end of $Tbl
166e1051a39Sopenharmony_ci___
167e1051a39Sopenharmony_ci&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
168e1051a39Sopenharmony_ci}
169e1051a39Sopenharmony_ci
170e1051a39Sopenharmony_ci$code=<<___;
171e1051a39Sopenharmony_ci	.LEVEL	$LEVEL
172e1051a39Sopenharmony_ci	.SPACE	\$TEXT\$
173e1051a39Sopenharmony_ci	.SUBSPA	\$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
174e1051a39Sopenharmony_ci
175e1051a39Sopenharmony_ci	.ALIGN	64
176e1051a39Sopenharmony_ciL\$table
177e1051a39Sopenharmony_ci___
178e1051a39Sopenharmony_ci$code.=<<___ if ($SZ==8);
179e1051a39Sopenharmony_ci	.WORD	0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
180e1051a39Sopenharmony_ci	.WORD	0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
181e1051a39Sopenharmony_ci	.WORD	0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
182e1051a39Sopenharmony_ci	.WORD	0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
183e1051a39Sopenharmony_ci	.WORD	0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
184e1051a39Sopenharmony_ci	.WORD	0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
185e1051a39Sopenharmony_ci	.WORD	0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
186e1051a39Sopenharmony_ci	.WORD	0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
187e1051a39Sopenharmony_ci	.WORD	0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
188e1051a39Sopenharmony_ci	.WORD	0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
189e1051a39Sopenharmony_ci	.WORD	0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
190e1051a39Sopenharmony_ci	.WORD	0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
191e1051a39Sopenharmony_ci	.WORD	0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
192e1051a39Sopenharmony_ci	.WORD	0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
193e1051a39Sopenharmony_ci	.WORD	0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
194e1051a39Sopenharmony_ci	.WORD	0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
195e1051a39Sopenharmony_ci	.WORD	0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
196e1051a39Sopenharmony_ci	.WORD	0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
197e1051a39Sopenharmony_ci	.WORD	0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
198e1051a39Sopenharmony_ci	.WORD	0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
199e1051a39Sopenharmony_ci	.WORD	0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
200e1051a39Sopenharmony_ci	.WORD	0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
201e1051a39Sopenharmony_ci	.WORD	0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
202e1051a39Sopenharmony_ci	.WORD	0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
203e1051a39Sopenharmony_ci	.WORD	0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
204e1051a39Sopenharmony_ci	.WORD	0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
205e1051a39Sopenharmony_ci	.WORD	0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
206e1051a39Sopenharmony_ci	.WORD	0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
207e1051a39Sopenharmony_ci	.WORD	0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
208e1051a39Sopenharmony_ci	.WORD	0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
209e1051a39Sopenharmony_ci	.WORD	0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
210e1051a39Sopenharmony_ci	.WORD	0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
211e1051a39Sopenharmony_ci	.WORD	0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
212e1051a39Sopenharmony_ci	.WORD	0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
213e1051a39Sopenharmony_ci	.WORD	0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
214e1051a39Sopenharmony_ci	.WORD	0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
215e1051a39Sopenharmony_ci	.WORD	0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
216e1051a39Sopenharmony_ci	.WORD	0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
217e1051a39Sopenharmony_ci	.WORD	0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
218e1051a39Sopenharmony_ci	.WORD	0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
219e1051a39Sopenharmony_ci___
220e1051a39Sopenharmony_ci$code.=<<___ if ($SZ==4);
221e1051a39Sopenharmony_ci	.WORD	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
222e1051a39Sopenharmony_ci	.WORD	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
223e1051a39Sopenharmony_ci	.WORD	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
224e1051a39Sopenharmony_ci	.WORD	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
225e1051a39Sopenharmony_ci	.WORD	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
226e1051a39Sopenharmony_ci	.WORD	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
227e1051a39Sopenharmony_ci	.WORD	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
228e1051a39Sopenharmony_ci	.WORD	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
229e1051a39Sopenharmony_ci	.WORD	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
230e1051a39Sopenharmony_ci	.WORD	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
231e1051a39Sopenharmony_ci	.WORD	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
232e1051a39Sopenharmony_ci	.WORD	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
233e1051a39Sopenharmony_ci	.WORD	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
234e1051a39Sopenharmony_ci	.WORD	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
235e1051a39Sopenharmony_ci	.WORD	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
236e1051a39Sopenharmony_ci	.WORD	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
237e1051a39Sopenharmony_ci___
238e1051a39Sopenharmony_ci$code.=<<___;
239e1051a39Sopenharmony_ci
240e1051a39Sopenharmony_ci	.EXPORT	$func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
241e1051a39Sopenharmony_ci	.ALIGN	64
242e1051a39Sopenharmony_ci$func
243e1051a39Sopenharmony_ci	.PROC
244e1051a39Sopenharmony_ci	.CALLINFO	FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
245e1051a39Sopenharmony_ci	.ENTRY
246e1051a39Sopenharmony_ci	$PUSH	%r2,-$SAVED_RP(%sp)	; standard prologue
247e1051a39Sopenharmony_ci	$PUSHMA	%r3,$FRAME(%sp)
248e1051a39Sopenharmony_ci	$PUSH	%r4,`-$FRAME+1*$SIZE_T`(%sp)
249e1051a39Sopenharmony_ci	$PUSH	%r5,`-$FRAME+2*$SIZE_T`(%sp)
250e1051a39Sopenharmony_ci	$PUSH	%r6,`-$FRAME+3*$SIZE_T`(%sp)
251e1051a39Sopenharmony_ci	$PUSH	%r7,`-$FRAME+4*$SIZE_T`(%sp)
252e1051a39Sopenharmony_ci	$PUSH	%r8,`-$FRAME+5*$SIZE_T`(%sp)
253e1051a39Sopenharmony_ci	$PUSH	%r9,`-$FRAME+6*$SIZE_T`(%sp)
254e1051a39Sopenharmony_ci	$PUSH	%r10,`-$FRAME+7*$SIZE_T`(%sp)
255e1051a39Sopenharmony_ci	$PUSH	%r11,`-$FRAME+8*$SIZE_T`(%sp)
256e1051a39Sopenharmony_ci	$PUSH	%r12,`-$FRAME+9*$SIZE_T`(%sp)
257e1051a39Sopenharmony_ci	$PUSH	%r13,`-$FRAME+10*$SIZE_T`(%sp)
258e1051a39Sopenharmony_ci	$PUSH	%r14,`-$FRAME+11*$SIZE_T`(%sp)
259e1051a39Sopenharmony_ci	$PUSH	%r15,`-$FRAME+12*$SIZE_T`(%sp)
260e1051a39Sopenharmony_ci	$PUSH	%r16,`-$FRAME+13*$SIZE_T`(%sp)
261e1051a39Sopenharmony_ci	$PUSH	%r17,`-$FRAME+14*$SIZE_T`(%sp)
262e1051a39Sopenharmony_ci	$PUSH	%r18,`-$FRAME+15*$SIZE_T`(%sp)
263e1051a39Sopenharmony_ci
264e1051a39Sopenharmony_ci	_shl	$num,`log(16*$SZ)/log(2)`,$num
265e1051a39Sopenharmony_ci	addl	$inp,$num,$num		; $num to point at the end of $inp
266e1051a39Sopenharmony_ci
267e1051a39Sopenharmony_ci	$PUSH	$num,`-$FRAME_MARKER-4*$SIZE_T`(%sp)	; save arguments
268e1051a39Sopenharmony_ci	$PUSH	$inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)
269e1051a39Sopenharmony_ci	$PUSH	$ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp)
270e1051a39Sopenharmony_ci
271e1051a39Sopenharmony_ci	blr	%r0,$Tbl
272e1051a39Sopenharmony_ci	ldi	3,$t1
273e1051a39Sopenharmony_ciL\$pic
274e1051a39Sopenharmony_ci	andcm	$Tbl,$t1,$Tbl		; wipe privilege level
275e1051a39Sopenharmony_ci	ldo	L\$table-L\$pic($Tbl),$Tbl
276e1051a39Sopenharmony_ci___
277e1051a39Sopenharmony_ci$code.=<<___ if ($SZ==8 && $SIZE_T==4);
278e1051a39Sopenharmony_ci	ldi	31,$t1
279e1051a39Sopenharmony_ci	mtctl	$t1,%cr11
280e1051a39Sopenharmony_ci	extrd,u,*= $t1,%sar,1,$t1	; executes on PA-RISC 1.0
281e1051a39Sopenharmony_ci	b	L\$parisc1
282e1051a39Sopenharmony_ci	nop
283e1051a39Sopenharmony_ci___
284e1051a39Sopenharmony_ci$code.=<<___;
285e1051a39Sopenharmony_ci	$LD	`0*$SZ`($ctx),$A	; load context
286e1051a39Sopenharmony_ci	$LD	`1*$SZ`($ctx),$B
287e1051a39Sopenharmony_ci	$LD	`2*$SZ`($ctx),$C
288e1051a39Sopenharmony_ci	$LD	`3*$SZ`($ctx),$D
289e1051a39Sopenharmony_ci	$LD	`4*$SZ`($ctx),$E
290e1051a39Sopenharmony_ci	$LD	`5*$SZ`($ctx),$F
291e1051a39Sopenharmony_ci	$LD	`6*$SZ`($ctx),$G
292e1051a39Sopenharmony_ci	$LD	`7*$SZ`($ctx),$H
293e1051a39Sopenharmony_ci
294e1051a39Sopenharmony_ci	extru	$inp,31,`log($SZ)/log(2)`,$t0
295e1051a39Sopenharmony_ci	sh3addl	$t0,%r0,$t0
296e1051a39Sopenharmony_ci	subi	`8*$SZ`,$t0,$t0
297e1051a39Sopenharmony_ci	mtctl	$t0,%cr11		; load %sar with align factor
298e1051a39Sopenharmony_ci
299e1051a39Sopenharmony_ciL\$oop
300e1051a39Sopenharmony_ci	ldi	`$SZ-1`,$t0
301e1051a39Sopenharmony_ci	$LDM	$SZ($Tbl),$t1
302e1051a39Sopenharmony_ci	andcm	$inp,$t0,$t0		; align $inp
303e1051a39Sopenharmony_ci___
304e1051a39Sopenharmony_ci	for ($i=0;$i<15;$i++) {		# load input block
305e1051a39Sopenharmony_ci	$code.="\t$LD	`$SZ*$i`($t0),@X[$i]\n";		}
306e1051a39Sopenharmony_ci$code.=<<___;
307e1051a39Sopenharmony_ci	cmpb,*=	$inp,$t0,L\$aligned
308e1051a39Sopenharmony_ci	$LD	`$SZ*15`($t0),@X[15]
309e1051a39Sopenharmony_ci	$LD	`$SZ*16`($t0),@X[16]
310e1051a39Sopenharmony_ci___
311e1051a39Sopenharmony_ci	for ($i=0;$i<16;$i++) {		# align data
312e1051a39Sopenharmony_ci	$code.="\t_align	@X[$i],@X[$i+1],@X[$i]\n";	}
313e1051a39Sopenharmony_ci$code.=<<___;
314e1051a39Sopenharmony_ciL\$aligned
315e1051a39Sopenharmony_ci	nop	; otherwise /usr/ccs/bin/as is confused by below .WORD
316e1051a39Sopenharmony_ci___
317e1051a39Sopenharmony_ci
318e1051a39Sopenharmony_cifor($i=0;$i<16;$i++)	{ &ROUND_00_15($i,@V); unshift(@V,pop(@V)); }
319e1051a39Sopenharmony_ci$code.=<<___;
320e1051a39Sopenharmony_ciL\$rounds
321e1051a39Sopenharmony_ci	nop	; otherwise /usr/ccs/bin/as is confused by below .WORD
322e1051a39Sopenharmony_ci___
323e1051a39Sopenharmony_cifor(;$i<32;$i++)	{ &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); }
324e1051a39Sopenharmony_ci$code.=<<___;
325e1051a39Sopenharmony_ci	bb,>=	$Tbl,31,L\$rounds	; end of $Tbl signalled?
326e1051a39Sopenharmony_ci	nop
327e1051a39Sopenharmony_ci
328e1051a39Sopenharmony_ci	$POP	`-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx	; restore arguments
329e1051a39Sopenharmony_ci	$POP	`-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
330e1051a39Sopenharmony_ci	$POP	`-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
331e1051a39Sopenharmony_ci	ldo	`-$rounds*$SZ-1`($Tbl),$Tbl		; rewind $Tbl
332e1051a39Sopenharmony_ci
333e1051a39Sopenharmony_ci	$LD	`0*$SZ`($ctx),@X[0]	; load context
334e1051a39Sopenharmony_ci	$LD	`1*$SZ`($ctx),@X[1]
335e1051a39Sopenharmony_ci	$LD	`2*$SZ`($ctx),@X[2]
336e1051a39Sopenharmony_ci	$LD	`3*$SZ`($ctx),@X[3]
337e1051a39Sopenharmony_ci	$LD	`4*$SZ`($ctx),@X[4]
338e1051a39Sopenharmony_ci	$LD	`5*$SZ`($ctx),@X[5]
339e1051a39Sopenharmony_ci	addl	@X[0],$A,$A
340e1051a39Sopenharmony_ci	$LD	`6*$SZ`($ctx),@X[6]
341e1051a39Sopenharmony_ci	addl	@X[1],$B,$B
342e1051a39Sopenharmony_ci	$LD	`7*$SZ`($ctx),@X[7]
343e1051a39Sopenharmony_ci	ldo	`16*$SZ`($inp),$inp	; advance $inp
344e1051a39Sopenharmony_ci
345e1051a39Sopenharmony_ci	$ST	$A,`0*$SZ`($ctx)	; save context
346e1051a39Sopenharmony_ci	addl	@X[2],$C,$C
347e1051a39Sopenharmony_ci	$ST	$B,`1*$SZ`($ctx)
348e1051a39Sopenharmony_ci	addl	@X[3],$D,$D
349e1051a39Sopenharmony_ci	$ST	$C,`2*$SZ`($ctx)
350e1051a39Sopenharmony_ci	addl	@X[4],$E,$E
351e1051a39Sopenharmony_ci	$ST	$D,`3*$SZ`($ctx)
352e1051a39Sopenharmony_ci	addl	@X[5],$F,$F
353e1051a39Sopenharmony_ci	$ST	$E,`4*$SZ`($ctx)
354e1051a39Sopenharmony_ci	addl	@X[6],$G,$G
355e1051a39Sopenharmony_ci	$ST	$F,`5*$SZ`($ctx)
356e1051a39Sopenharmony_ci	addl	@X[7],$H,$H
357e1051a39Sopenharmony_ci	$ST	$G,`6*$SZ`($ctx)
358e1051a39Sopenharmony_ci	$ST	$H,`7*$SZ`($ctx)
359e1051a39Sopenharmony_ci
360e1051a39Sopenharmony_ci	cmpb,*<>,n $inp,$num,L\$oop
361e1051a39Sopenharmony_ci	$PUSH	$inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)	; save $inp
362e1051a39Sopenharmony_ci___
363e1051a39Sopenharmony_ciif ($SZ==8 && $SIZE_T==4)	# SHA512 for 32-bit PA-RISC 1.0
364e1051a39Sopenharmony_ci{{
365e1051a39Sopenharmony_ci$code.=<<___;
366e1051a39Sopenharmony_ci	b	L\$done
367e1051a39Sopenharmony_ci	nop
368e1051a39Sopenharmony_ci
369e1051a39Sopenharmony_ci	.ALIGN	64
370e1051a39Sopenharmony_ciL\$parisc1
371e1051a39Sopenharmony_ci___
372e1051a39Sopenharmony_ci
373e1051a39Sopenharmony_ci@V=(  $Ahi,  $Alo,  $Bhi,  $Blo,  $Chi,  $Clo,  $Dhi,  $Dlo,
374e1051a39Sopenharmony_ci      $Ehi,  $Elo,  $Fhi,  $Flo,  $Ghi,  $Glo,  $Hhi,  $Hlo) =
375e1051a39Sopenharmony_ci   ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
376e1051a39Sopenharmony_ci     "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16");
377e1051a39Sopenharmony_ci$a0 ="%r17";
378e1051a39Sopenharmony_ci$a1 ="%r18";
379e1051a39Sopenharmony_ci$a2 ="%r19";
380e1051a39Sopenharmony_ci$a3 ="%r20";
381e1051a39Sopenharmony_ci$t0 ="%r21";
382e1051a39Sopenharmony_ci$t1 ="%r22";
383e1051a39Sopenharmony_ci$t2 ="%r28";
384e1051a39Sopenharmony_ci$t3 ="%r29";
385e1051a39Sopenharmony_ci$Tbl="%r31";
386e1051a39Sopenharmony_ci
387e1051a39Sopenharmony_ci@X=("%r23","%r24","%r25","%r26");	# zaps $num,$inp,$ctx
388e1051a39Sopenharmony_ci
389e1051a39Sopenharmony_cisub ROUND_00_15_pa1 {
390e1051a39Sopenharmony_cimy ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
391e1051a39Sopenharmony_ci       $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_;
392e1051a39Sopenharmony_cimy ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
393e1051a39Sopenharmony_ci
394e1051a39Sopenharmony_ci$code.=<<___ if (!$flag);
395e1051a39Sopenharmony_ci	ldw	`-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
396e1051a39Sopenharmony_ci	ldw	`-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo	; load X[i+1]
397e1051a39Sopenharmony_ci___
398e1051a39Sopenharmony_ci$code.=<<___;
399e1051a39Sopenharmony_ci	shd	$ehi,$elo,$Sigma1[0],$t0
400e1051a39Sopenharmony_ci	 add	$Xlo,$hlo,$hlo
401e1051a39Sopenharmony_ci	shd	$elo,$ehi,$Sigma1[0],$t1
402e1051a39Sopenharmony_ci	 addc	$Xhi,$hhi,$hhi		; h += X[i]
403e1051a39Sopenharmony_ci	shd	$ehi,$elo,$Sigma1[1],$t2
404e1051a39Sopenharmony_ci	 ldwm	8($Tbl),$Xhi
405e1051a39Sopenharmony_ci	shd	$elo,$ehi,$Sigma1[1],$t3
406e1051a39Sopenharmony_ci	 ldw	-4($Tbl),$Xlo		; load K[i]
407e1051a39Sopenharmony_ci	xor	$t2,$t0,$t0
408e1051a39Sopenharmony_ci	xor	$t3,$t1,$t1
409e1051a39Sopenharmony_ci	 and	$flo,$elo,$a0
410e1051a39Sopenharmony_ci	 and	$fhi,$ehi,$a1
411e1051a39Sopenharmony_ci	shd	$ehi,$elo,$Sigma1[2],$t2
412e1051a39Sopenharmony_ci	 andcm	$glo,$elo,$a2
413e1051a39Sopenharmony_ci	shd	$elo,$ehi,$Sigma1[2],$t3
414e1051a39Sopenharmony_ci	 andcm	$ghi,$ehi,$a3
415e1051a39Sopenharmony_ci	xor	$t2,$t0,$t0
416e1051a39Sopenharmony_ci	xor	$t3,$t1,$t1		; Sigma1(e)
417e1051a39Sopenharmony_ci	add	$Xlo,$hlo,$hlo
418e1051a39Sopenharmony_ci	 xor	$a2,$a0,$a0
419e1051a39Sopenharmony_ci	addc	$Xhi,$hhi,$hhi		; h += K[i]
420e1051a39Sopenharmony_ci	 xor	$a3,$a1,$a1		; Ch(e,f,g)
421e1051a39Sopenharmony_ci
422e1051a39Sopenharmony_ci	 add	$t0,$hlo,$hlo
423e1051a39Sopenharmony_ci	shd	$ahi,$alo,$Sigma0[0],$t0
424e1051a39Sopenharmony_ci	 addc	$t1,$hhi,$hhi		; h += Sigma1(e)
425e1051a39Sopenharmony_ci	shd	$alo,$ahi,$Sigma0[0],$t1
426e1051a39Sopenharmony_ci	 add	$a0,$hlo,$hlo
427e1051a39Sopenharmony_ci	shd	$ahi,$alo,$Sigma0[1],$t2
428e1051a39Sopenharmony_ci	 addc	$a1,$hhi,$hhi		; h += Ch(e,f,g)
429e1051a39Sopenharmony_ci	shd	$alo,$ahi,$Sigma0[1],$t3
430e1051a39Sopenharmony_ci
431e1051a39Sopenharmony_ci	xor	$t2,$t0,$t0
432e1051a39Sopenharmony_ci	xor	$t3,$t1,$t1
433e1051a39Sopenharmony_ci	shd	$ahi,$alo,$Sigma0[2],$t2
434e1051a39Sopenharmony_ci	and	$alo,$blo,$a0
435e1051a39Sopenharmony_ci	shd	$alo,$ahi,$Sigma0[2],$t3
436e1051a39Sopenharmony_ci	and	$ahi,$bhi,$a1
437e1051a39Sopenharmony_ci	xor	$t2,$t0,$t0
438e1051a39Sopenharmony_ci	xor	$t3,$t1,$t1		; Sigma0(a)
439e1051a39Sopenharmony_ci
440e1051a39Sopenharmony_ci	and	$alo,$clo,$a2
441e1051a39Sopenharmony_ci	and	$ahi,$chi,$a3
442e1051a39Sopenharmony_ci	xor	$a2,$a0,$a0
443e1051a39Sopenharmony_ci	 add	$hlo,$dlo,$dlo
444e1051a39Sopenharmony_ci	xor	$a3,$a1,$a1
445e1051a39Sopenharmony_ci	 addc	$hhi,$dhi,$dhi		; d += h
446e1051a39Sopenharmony_ci	and	$blo,$clo,$a2
447e1051a39Sopenharmony_ci	 add	$t0,$hlo,$hlo
448e1051a39Sopenharmony_ci	and	$bhi,$chi,$a3
449e1051a39Sopenharmony_ci	 addc	$t1,$hhi,$hhi		; h += Sigma0(a)
450e1051a39Sopenharmony_ci	xor	$a2,$a0,$a0
451e1051a39Sopenharmony_ci	 add	$a0,$hlo,$hlo
452e1051a39Sopenharmony_ci	xor	$a3,$a1,$a1		; Maj(a,b,c)
453e1051a39Sopenharmony_ci	 addc	$a1,$hhi,$hhi		; h += Maj(a,b,c)
454e1051a39Sopenharmony_ci
455e1051a39Sopenharmony_ci___
456e1051a39Sopenharmony_ci$code.=<<___ if ($i==15 && $flag);
457e1051a39Sopenharmony_ci	extru	$Xlo,31,10,$Xlo
458e1051a39Sopenharmony_ci	comiclr,= $LAST10BITS,$Xlo,%r0
459e1051a39Sopenharmony_ci	b	L\$rounds_pa1
460e1051a39Sopenharmony_ci	nop
461e1051a39Sopenharmony_ci___
462e1051a39Sopenharmony_cipush(@X,shift(@X)); push(@X,shift(@X));
463e1051a39Sopenharmony_ci}
464e1051a39Sopenharmony_ci
465e1051a39Sopenharmony_cisub ROUND_16_xx_pa1 {
466e1051a39Sopenharmony_cimy ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
467e1051a39Sopenharmony_cimy ($i)=shift;
468e1051a39Sopenharmony_ci$i-=16;
469e1051a39Sopenharmony_ci$code.=<<___;
470e1051a39Sopenharmony_ci	ldw	`-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
471e1051a39Sopenharmony_ci	ldw	`-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo	; load X[i+1]
472e1051a39Sopenharmony_ci	ldw	`-$XOFF+8*(($i+9)%16)`(%sp),$a1
473e1051a39Sopenharmony_ci	ldw	`-$XOFF+8*(($i+9)%16)+4`(%sp),$a0	; load X[i+9]
474e1051a39Sopenharmony_ci	ldw	`-$XOFF+8*(($i+14)%16)`(%sp),$a3
475e1051a39Sopenharmony_ci	ldw	`-$XOFF+8*(($i+14)%16)+4`(%sp),$a2	; load X[i+14]
476e1051a39Sopenharmony_ci	shd	$Xnhi,$Xnlo,$sigma0[0],$t0
477e1051a39Sopenharmony_ci	shd	$Xnlo,$Xnhi,$sigma0[0],$t1
478e1051a39Sopenharmony_ci	 add	$a0,$Xlo,$Xlo
479e1051a39Sopenharmony_ci	shd	$Xnhi,$Xnlo,$sigma0[1],$t2
480e1051a39Sopenharmony_ci	 addc	$a1,$Xhi,$Xhi
481e1051a39Sopenharmony_ci	shd	$Xnlo,$Xnhi,$sigma0[1],$t3
482e1051a39Sopenharmony_ci	xor	$t2,$t0,$t0
483e1051a39Sopenharmony_ci	shd	$Xnhi,$Xnlo,$sigma0[2],$t2
484e1051a39Sopenharmony_ci	xor	$t3,$t1,$t1
485e1051a39Sopenharmony_ci	extru	$Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3
486e1051a39Sopenharmony_ci	xor	$t2,$t0,$t0
487e1051a39Sopenharmony_ci	 shd	$a3,$a2,$sigma1[0],$a0
488e1051a39Sopenharmony_ci	xor	$t3,$t1,$t1		; sigma0(X[i+1)&0x0f])
489e1051a39Sopenharmony_ci	 shd	$a2,$a3,$sigma1[0],$a1
490e1051a39Sopenharmony_ci	add	$t0,$Xlo,$Xlo
491e1051a39Sopenharmony_ci	 shd	$a3,$a2,$sigma1[1],$t2
492e1051a39Sopenharmony_ci	addc	$t1,$Xhi,$Xhi
493e1051a39Sopenharmony_ci	 shd	$a2,$a3,$sigma1[1],$t3
494e1051a39Sopenharmony_ci	xor	$t2,$a0,$a0
495e1051a39Sopenharmony_ci	shd	$a3,$a2,$sigma1[2],$t2
496e1051a39Sopenharmony_ci	xor	$t3,$a1,$a1
497e1051a39Sopenharmony_ci	extru	$a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3
498e1051a39Sopenharmony_ci	xor	$t2,$a0,$a0
499e1051a39Sopenharmony_ci	xor	$t3,$a1,$a1		; sigma0(X[i+14)&0x0f])
500e1051a39Sopenharmony_ci	add	$a0,$Xlo,$Xlo
501e1051a39Sopenharmony_ci	addc	$a1,$Xhi,$Xhi
502e1051a39Sopenharmony_ci
503e1051a39Sopenharmony_ci	stw	$Xhi,`-$XOFF+8*($i%16)`(%sp)
504e1051a39Sopenharmony_ci	stw	$Xlo,`-$XOFF+8*($i%16)+4`(%sp)
505e1051a39Sopenharmony_ci___
506e1051a39Sopenharmony_ci&ROUND_00_15_pa1($i,@_,1);
507e1051a39Sopenharmony_ci}
508e1051a39Sopenharmony_ci$code.=<<___;
509e1051a39Sopenharmony_ci	ldw	`0*4`($ctx),$Ahi		; load context
510e1051a39Sopenharmony_ci	ldw	`1*4`($ctx),$Alo
511e1051a39Sopenharmony_ci	ldw	`2*4`($ctx),$Bhi
512e1051a39Sopenharmony_ci	ldw	`3*4`($ctx),$Blo
513e1051a39Sopenharmony_ci	ldw	`4*4`($ctx),$Chi
514e1051a39Sopenharmony_ci	ldw	`5*4`($ctx),$Clo
515e1051a39Sopenharmony_ci	ldw	`6*4`($ctx),$Dhi
516e1051a39Sopenharmony_ci	ldw	`7*4`($ctx),$Dlo
517e1051a39Sopenharmony_ci	ldw	`8*4`($ctx),$Ehi
518e1051a39Sopenharmony_ci	ldw	`9*4`($ctx),$Elo
519e1051a39Sopenharmony_ci	ldw	`10*4`($ctx),$Fhi
520e1051a39Sopenharmony_ci	ldw	`11*4`($ctx),$Flo
521e1051a39Sopenharmony_ci	ldw	`12*4`($ctx),$Ghi
522e1051a39Sopenharmony_ci	ldw	`13*4`($ctx),$Glo
523e1051a39Sopenharmony_ci	ldw	`14*4`($ctx),$Hhi
524e1051a39Sopenharmony_ci	ldw	`15*4`($ctx),$Hlo
525e1051a39Sopenharmony_ci
526e1051a39Sopenharmony_ci	extru	$inp,31,2,$t0
527e1051a39Sopenharmony_ci	sh3addl	$t0,%r0,$t0
528e1051a39Sopenharmony_ci	subi	32,$t0,$t0
529e1051a39Sopenharmony_ci	mtctl	$t0,%cr11		; load %sar with align factor
530e1051a39Sopenharmony_ci
531e1051a39Sopenharmony_ciL\$oop_pa1
532e1051a39Sopenharmony_ci	extru	$inp,31,2,$a3
533e1051a39Sopenharmony_ci	comib,=	0,$a3,L\$aligned_pa1
534e1051a39Sopenharmony_ci	sub	$inp,$a3,$inp
535e1051a39Sopenharmony_ci
536e1051a39Sopenharmony_ci	ldw	`0*4`($inp),$X[0]
537e1051a39Sopenharmony_ci	ldw	`1*4`($inp),$X[1]
538e1051a39Sopenharmony_ci	ldw	`2*4`($inp),$t2
539e1051a39Sopenharmony_ci	ldw	`3*4`($inp),$t3
540e1051a39Sopenharmony_ci	ldw	`4*4`($inp),$a0
541e1051a39Sopenharmony_ci	ldw	`5*4`($inp),$a1
542e1051a39Sopenharmony_ci	ldw	`6*4`($inp),$a2
543e1051a39Sopenharmony_ci	ldw	`7*4`($inp),$a3
544e1051a39Sopenharmony_ci	vshd	$X[0],$X[1],$X[0]
545e1051a39Sopenharmony_ci	vshd	$X[1],$t2,$X[1]
546e1051a39Sopenharmony_ci	stw	$X[0],`-$XOFF+0*4`(%sp)
547e1051a39Sopenharmony_ci	ldw	`8*4`($inp),$t0
548e1051a39Sopenharmony_ci	vshd	$t2,$t3,$t2
549e1051a39Sopenharmony_ci	stw	$X[1],`-$XOFF+1*4`(%sp)
550e1051a39Sopenharmony_ci	ldw	`9*4`($inp),$t1
551e1051a39Sopenharmony_ci	vshd	$t3,$a0,$t3
552e1051a39Sopenharmony_ci___
553e1051a39Sopenharmony_ci{
554e1051a39Sopenharmony_cimy @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
555e1051a39Sopenharmony_cifor ($i=2;$i<=(128/4-8);$i++) {
556e1051a39Sopenharmony_ci$code.=<<___;
557e1051a39Sopenharmony_ci	stw	$t[0],`-$XOFF+$i*4`(%sp)
558e1051a39Sopenharmony_ci	ldw	`(8+$i)*4`($inp),$t[0]
559e1051a39Sopenharmony_ci	vshd	$t[1],$t[2],$t[1]
560e1051a39Sopenharmony_ci___
561e1051a39Sopenharmony_cipush(@t,shift(@t));
562e1051a39Sopenharmony_ci}
563e1051a39Sopenharmony_cifor (;$i<(128/4-1);$i++) {
564e1051a39Sopenharmony_ci$code.=<<___;
565e1051a39Sopenharmony_ci	stw	$t[0],`-$XOFF+$i*4`(%sp)
566e1051a39Sopenharmony_ci	vshd	$t[1],$t[2],$t[1]
567e1051a39Sopenharmony_ci___
568e1051a39Sopenharmony_cipush(@t,shift(@t));
569e1051a39Sopenharmony_ci}
570e1051a39Sopenharmony_ci$code.=<<___;
571e1051a39Sopenharmony_ci	b	L\$collected_pa1
572e1051a39Sopenharmony_ci	stw	$t[0],`-$XOFF+$i*4`(%sp)
573e1051a39Sopenharmony_ci
574e1051a39Sopenharmony_ci___
575e1051a39Sopenharmony_ci}
576e1051a39Sopenharmony_ci$code.=<<___;
577e1051a39Sopenharmony_ciL\$aligned_pa1
578e1051a39Sopenharmony_ci	ldw	`0*4`($inp),$X[0]
579e1051a39Sopenharmony_ci	ldw	`1*4`($inp),$X[1]
580e1051a39Sopenharmony_ci	ldw	`2*4`($inp),$t2
581e1051a39Sopenharmony_ci	ldw	`3*4`($inp),$t3
582e1051a39Sopenharmony_ci	ldw	`4*4`($inp),$a0
583e1051a39Sopenharmony_ci	ldw	`5*4`($inp),$a1
584e1051a39Sopenharmony_ci	ldw	`6*4`($inp),$a2
585e1051a39Sopenharmony_ci	ldw	`7*4`($inp),$a3
586e1051a39Sopenharmony_ci	stw	$X[0],`-$XOFF+0*4`(%sp)
587e1051a39Sopenharmony_ci	ldw	`8*4`($inp),$t0
588e1051a39Sopenharmony_ci	stw	$X[1],`-$XOFF+1*4`(%sp)
589e1051a39Sopenharmony_ci	ldw	`9*4`($inp),$t1
590e1051a39Sopenharmony_ci___
591e1051a39Sopenharmony_ci{
592e1051a39Sopenharmony_cimy @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
593e1051a39Sopenharmony_cifor ($i=2;$i<(128/4-8);$i++) {
594e1051a39Sopenharmony_ci$code.=<<___;
595e1051a39Sopenharmony_ci	stw	$t[0],`-$XOFF+$i*4`(%sp)
596e1051a39Sopenharmony_ci	ldw	`(8+$i)*4`($inp),$t[0]
597e1051a39Sopenharmony_ci___
598e1051a39Sopenharmony_cipush(@t,shift(@t));
599e1051a39Sopenharmony_ci}
600e1051a39Sopenharmony_cifor (;$i<128/4;$i++) {
601e1051a39Sopenharmony_ci$code.=<<___;
602e1051a39Sopenharmony_ci	stw	$t[0],`-$XOFF+$i*4`(%sp)
603e1051a39Sopenharmony_ci___
604e1051a39Sopenharmony_cipush(@t,shift(@t));
605e1051a39Sopenharmony_ci}
606e1051a39Sopenharmony_ci$code.="L\$collected_pa1\n";
607e1051a39Sopenharmony_ci}
608e1051a39Sopenharmony_ci
609e1051a39Sopenharmony_cifor($i=0;$i<16;$i++)	{ &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
610e1051a39Sopenharmony_ci$code.="L\$rounds_pa1\n";
611e1051a39Sopenharmony_cifor(;$i<32;$i++)	{ &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
612e1051a39Sopenharmony_ci
613e1051a39Sopenharmony_ci$code.=<<___;
614e1051a39Sopenharmony_ci	$POP	`-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx	; restore arguments
615e1051a39Sopenharmony_ci	$POP	`-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
616e1051a39Sopenharmony_ci	$POP	`-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
617e1051a39Sopenharmony_ci	ldo	`-$rounds*$SZ`($Tbl),$Tbl		; rewind $Tbl
618e1051a39Sopenharmony_ci
619e1051a39Sopenharmony_ci	ldw	`0*4`($ctx),$t1		; update context
620e1051a39Sopenharmony_ci	ldw	`1*4`($ctx),$t0
621e1051a39Sopenharmony_ci	ldw	`2*4`($ctx),$t3
622e1051a39Sopenharmony_ci	ldw	`3*4`($ctx),$t2
623e1051a39Sopenharmony_ci	ldw	`4*4`($ctx),$a1
624e1051a39Sopenharmony_ci	ldw	`5*4`($ctx),$a0
625e1051a39Sopenharmony_ci	ldw	`6*4`($ctx),$a3
626e1051a39Sopenharmony_ci	add	$t0,$Alo,$Alo
627e1051a39Sopenharmony_ci	ldw	`7*4`($ctx),$a2
628e1051a39Sopenharmony_ci	addc	$t1,$Ahi,$Ahi
629e1051a39Sopenharmony_ci	ldw	`8*4`($ctx),$t1
630e1051a39Sopenharmony_ci	add	$t2,$Blo,$Blo
631e1051a39Sopenharmony_ci	ldw	`9*4`($ctx),$t0
632e1051a39Sopenharmony_ci	addc	$t3,$Bhi,$Bhi
633e1051a39Sopenharmony_ci	ldw	`10*4`($ctx),$t3
634e1051a39Sopenharmony_ci	add	$a0,$Clo,$Clo
635e1051a39Sopenharmony_ci	ldw	`11*4`($ctx),$t2
636e1051a39Sopenharmony_ci	addc	$a1,$Chi,$Chi
637e1051a39Sopenharmony_ci	ldw	`12*4`($ctx),$a1
638e1051a39Sopenharmony_ci	add	$a2,$Dlo,$Dlo
639e1051a39Sopenharmony_ci	ldw	`13*4`($ctx),$a0
640e1051a39Sopenharmony_ci	addc	$a3,$Dhi,$Dhi
641e1051a39Sopenharmony_ci	ldw	`14*4`($ctx),$a3
642e1051a39Sopenharmony_ci	add	$t0,$Elo,$Elo
643e1051a39Sopenharmony_ci	ldw	`15*4`($ctx),$a2
644e1051a39Sopenharmony_ci	addc	$t1,$Ehi,$Ehi
645e1051a39Sopenharmony_ci	stw	$Ahi,`0*4`($ctx)
646e1051a39Sopenharmony_ci	add	$t2,$Flo,$Flo
647e1051a39Sopenharmony_ci	stw	$Alo,`1*4`($ctx)
648e1051a39Sopenharmony_ci	addc	$t3,$Fhi,$Fhi
649e1051a39Sopenharmony_ci	stw	$Bhi,`2*4`($ctx)
650e1051a39Sopenharmony_ci	add	$a0,$Glo,$Glo
651e1051a39Sopenharmony_ci	stw	$Blo,`3*4`($ctx)
652e1051a39Sopenharmony_ci	addc	$a1,$Ghi,$Ghi
653e1051a39Sopenharmony_ci	stw	$Chi,`4*4`($ctx)
654e1051a39Sopenharmony_ci	add	$a2,$Hlo,$Hlo
655e1051a39Sopenharmony_ci	stw	$Clo,`5*4`($ctx)
656e1051a39Sopenharmony_ci	addc	$a3,$Hhi,$Hhi
657e1051a39Sopenharmony_ci	stw	$Dhi,`6*4`($ctx)
658e1051a39Sopenharmony_ci	ldo	`16*$SZ`($inp),$inp	; advance $inp
659e1051a39Sopenharmony_ci	stw	$Dlo,`7*4`($ctx)
660e1051a39Sopenharmony_ci	stw	$Ehi,`8*4`($ctx)
661e1051a39Sopenharmony_ci	stw	$Elo,`9*4`($ctx)
662e1051a39Sopenharmony_ci	stw	$Fhi,`10*4`($ctx)
663e1051a39Sopenharmony_ci	stw	$Flo,`11*4`($ctx)
664e1051a39Sopenharmony_ci	stw	$Ghi,`12*4`($ctx)
665e1051a39Sopenharmony_ci	stw	$Glo,`13*4`($ctx)
666e1051a39Sopenharmony_ci	stw	$Hhi,`14*4`($ctx)
667e1051a39Sopenharmony_ci	comb,=	$inp,$num,L\$done
668e1051a39Sopenharmony_ci	stw	$Hlo,`15*4`($ctx)
669e1051a39Sopenharmony_ci	b	L\$oop_pa1
670e1051a39Sopenharmony_ci	$PUSH	$inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)	; save $inp
671e1051a39Sopenharmony_ciL\$done
672e1051a39Sopenharmony_ci___
673e1051a39Sopenharmony_ci}}
674e1051a39Sopenharmony_ci$code.=<<___;
675e1051a39Sopenharmony_ci	$POP	`-$FRAME-$SAVED_RP`(%sp),%r2		; standard epilogue
676e1051a39Sopenharmony_ci	$POP	`-$FRAME+1*$SIZE_T`(%sp),%r4
677e1051a39Sopenharmony_ci	$POP	`-$FRAME+2*$SIZE_T`(%sp),%r5
678e1051a39Sopenharmony_ci	$POP	`-$FRAME+3*$SIZE_T`(%sp),%r6
679e1051a39Sopenharmony_ci	$POP	`-$FRAME+4*$SIZE_T`(%sp),%r7
680e1051a39Sopenharmony_ci	$POP	`-$FRAME+5*$SIZE_T`(%sp),%r8
681e1051a39Sopenharmony_ci	$POP	`-$FRAME+6*$SIZE_T`(%sp),%r9
682e1051a39Sopenharmony_ci	$POP	`-$FRAME+7*$SIZE_T`(%sp),%r10
683e1051a39Sopenharmony_ci	$POP	`-$FRAME+8*$SIZE_T`(%sp),%r11
684e1051a39Sopenharmony_ci	$POP	`-$FRAME+9*$SIZE_T`(%sp),%r12
685e1051a39Sopenharmony_ci	$POP	`-$FRAME+10*$SIZE_T`(%sp),%r13
686e1051a39Sopenharmony_ci	$POP	`-$FRAME+11*$SIZE_T`(%sp),%r14
687e1051a39Sopenharmony_ci	$POP	`-$FRAME+12*$SIZE_T`(%sp),%r15
688e1051a39Sopenharmony_ci	$POP	`-$FRAME+13*$SIZE_T`(%sp),%r16
689e1051a39Sopenharmony_ci	$POP	`-$FRAME+14*$SIZE_T`(%sp),%r17
690e1051a39Sopenharmony_ci	$POP	`-$FRAME+15*$SIZE_T`(%sp),%r18
691e1051a39Sopenharmony_ci	bv	(%r2)
692e1051a39Sopenharmony_ci	.EXIT
693e1051a39Sopenharmony_ci	$POPMB	-$FRAME(%sp),%r3
694e1051a39Sopenharmony_ci	.PROCEND
695e1051a39Sopenharmony_ci	.STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
696e1051a39Sopenharmony_ci___
697e1051a39Sopenharmony_ci
698e1051a39Sopenharmony_ci# Explicitly encode PA-RISC 2.0 instructions used in this module, so
699e1051a39Sopenharmony_ci# that it can be compiled with .LEVEL 1.0. It should be noted that I
700e1051a39Sopenharmony_ci# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
701e1051a39Sopenharmony_ci# directive...
702e1051a39Sopenharmony_ci
703e1051a39Sopenharmony_cimy $ldd = sub {
704e1051a39Sopenharmony_ci  my ($mod,$args) = @_;
705e1051a39Sopenharmony_ci  my $orig = "ldd$mod\t$args";
706e1051a39Sopenharmony_ci
707e1051a39Sopenharmony_ci    if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
708e1051a39Sopenharmony_ci    {	my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1);
709e1051a39Sopenharmony_ci	$opcode|=(1<<3) if ($mod =~ /^,m/);
710e1051a39Sopenharmony_ci	$opcode|=(1<<2) if ($mod =~ /^,mb/);
711e1051a39Sopenharmony_ci	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
712e1051a39Sopenharmony_ci    }
713e1051a39Sopenharmony_ci    else { "\t".$orig; }
714e1051a39Sopenharmony_ci};
715e1051a39Sopenharmony_ci
716e1051a39Sopenharmony_cimy $std = sub {
717e1051a39Sopenharmony_ci  my ($mod,$args) = @_;
718e1051a39Sopenharmony_ci  my $orig = "std$mod\t$args";
719e1051a39Sopenharmony_ci
720e1051a39Sopenharmony_ci    if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
721e1051a39Sopenharmony_ci    {	my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
722e1051a39Sopenharmony_ci	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
723e1051a39Sopenharmony_ci    }
724e1051a39Sopenharmony_ci    else { "\t".$orig; }
725e1051a39Sopenharmony_ci};
726e1051a39Sopenharmony_ci
727e1051a39Sopenharmony_cimy $extrd = sub {
728e1051a39Sopenharmony_ci  my ($mod,$args) = @_;
729e1051a39Sopenharmony_ci  my $orig = "extrd$mod\t$args";
730e1051a39Sopenharmony_ci
731e1051a39Sopenharmony_ci    # I only have ",u" completer, it's implicitly encoded...
732e1051a39Sopenharmony_ci    if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/)	# format 15
733e1051a39Sopenharmony_ci    {	my $opcode=(0x36<<26)|($1<<21)|($4<<16);
734e1051a39Sopenharmony_ci	my $len=32-$3;
735e1051a39Sopenharmony_ci	$opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5);		# encode pos
736e1051a39Sopenharmony_ci	$opcode |= (($len&0x20)<<7)|($len&0x1f);		# encode len
737e1051a39Sopenharmony_ci	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
738e1051a39Sopenharmony_ci    }
739e1051a39Sopenharmony_ci    elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/)	# format 12
740e1051a39Sopenharmony_ci    {	my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
741e1051a39Sopenharmony_ci	my $len=32-$2;
742e1051a39Sopenharmony_ci	$opcode |= (($len&0x20)<<3)|($len&0x1f);		# encode len
743e1051a39Sopenharmony_ci	$opcode |= (1<<13) if ($mod =~ /,\**=/);
744e1051a39Sopenharmony_ci	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
745e1051a39Sopenharmony_ci    }
746e1051a39Sopenharmony_ci    else { "\t".$orig; }
747e1051a39Sopenharmony_ci};
748e1051a39Sopenharmony_ci
749e1051a39Sopenharmony_cimy $shrpd = sub {
750e1051a39Sopenharmony_ci  my ($mod,$args) = @_;
751e1051a39Sopenharmony_ci  my $orig = "shrpd$mod\t$args";
752e1051a39Sopenharmony_ci
753e1051a39Sopenharmony_ci    if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/)	# format 14
754e1051a39Sopenharmony_ci    {	my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
755e1051a39Sopenharmony_ci	my $cpos=63-$3;
756e1051a39Sopenharmony_ci	$opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5);		# encode sa
757e1051a39Sopenharmony_ci	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
758e1051a39Sopenharmony_ci    }
759e1051a39Sopenharmony_ci    elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/)	# format 11
760e1051a39Sopenharmony_ci    {	sprintf "\t.WORD\t0x%08x\t; %s",
761e1051a39Sopenharmony_ci		(0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
762e1051a39Sopenharmony_ci    }
763e1051a39Sopenharmony_ci    else { "\t".$orig; }
764e1051a39Sopenharmony_ci};
765e1051a39Sopenharmony_ci
766e1051a39Sopenharmony_cisub assemble {
767e1051a39Sopenharmony_ci  my ($mnemonic,$mod,$args)=@_;
768e1051a39Sopenharmony_ci  my $opcode = eval("\$$mnemonic");
769e1051a39Sopenharmony_ci
770e1051a39Sopenharmony_ci    ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
771e1051a39Sopenharmony_ci}
772e1051a39Sopenharmony_ci
773e1051a39Sopenharmony_ciif (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
774e1051a39Sopenharmony_ci	=~ /GNU assembler/) {
775e1051a39Sopenharmony_ci    $gnuas = 1;
776e1051a39Sopenharmony_ci}
777e1051a39Sopenharmony_ci
778e1051a39Sopenharmony_ciforeach (split("\n",$code)) {
779e1051a39Sopenharmony_ci	s/\`([^\`]*)\`/eval $1/ge;
780e1051a39Sopenharmony_ci
781e1051a39Sopenharmony_ci	s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/
782e1051a39Sopenharmony_ci		$3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32)	# rotation for >=32
783e1051a39Sopenharmony_ci		:       sprintf("shd\t%$1,%$2,%d",$3)/e			or
784e1051a39Sopenharmony_ci	# translate made up instructions: _ror, _shr, _align, _shl
785e1051a39Sopenharmony_ci	s/_ror(\s+)(%r[0-9]+),/
786e1051a39Sopenharmony_ci		($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e			or
787e1051a39Sopenharmony_ci
788e1051a39Sopenharmony_ci	s/_shr(\s+%r[0-9]+),([0-9]+),/
789e1051a39Sopenharmony_ci		$SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2)
790e1051a39Sopenharmony_ci		:        sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e	or
791e1051a39Sopenharmony_ci
792e1051a39Sopenharmony_ci	s/_align(\s+%r[0-9]+,%r[0-9]+),/
793e1051a39Sopenharmony_ci		($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e		or
794e1051a39Sopenharmony_ci
795e1051a39Sopenharmony_ci	s/_shl(\s+%r[0-9]+),([0-9]+),/
796e1051a39Sopenharmony_ci		$SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2)
797e1051a39Sopenharmony_ci		:            sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e;
798e1051a39Sopenharmony_ci
799e1051a39Sopenharmony_ci	s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4);
800e1051a39Sopenharmony_ci
801e1051a39Sopenharmony_ci	s/(\.LEVEL\s+2\.0)W/$1w/	if ($gnuas && $SIZE_T==8);
802e1051a39Sopenharmony_ci	s/\.SPACE\s+\$TEXT\$/.text/	if ($gnuas && $SIZE_T==8);
803e1051a39Sopenharmony_ci	s/\.SUBSPA.*//			if ($gnuas && $SIZE_T==8);
804e1051a39Sopenharmony_ci	s/cmpb,\*/comb,/ 		if ($SIZE_T==4);
805e1051a39Sopenharmony_ci	s/\bbv\b/bve/    		if ($SIZE_T==8);
806e1051a39Sopenharmony_ci
807e1051a39Sopenharmony_ci	print $_,"\n";
808e1051a39Sopenharmony_ci}
809e1051a39Sopenharmony_ci
810e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
811