1e1051a39Sopenharmony_ci#!/usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2017-2020 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci#
9e1051a39Sopenharmony_ci# ====================================================================
10e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
11e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
12e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
13e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/.
14e1051a39Sopenharmony_ci# ====================================================================
15e1051a39Sopenharmony_ci#
16e1051a39Sopenharmony_ci# Keccak-1600 for s390x.
17e1051a39Sopenharmony_ci#
18e1051a39Sopenharmony_ci# June 2017.
19e1051a39Sopenharmony_ci#
20e1051a39Sopenharmony_ci# Below code is [lane complementing] KECCAK_2X implementation (see
21e1051a39Sopenharmony_ci# sha/keccak1600.c) with C[5] and D[5] held in register bank. Though
22e1051a39Sopenharmony_ci# instead of actually unrolling the loop pair-wise I simply flip
23e1051a39Sopenharmony_ci# pointers to T[][] and A[][] at the end of round. Since number of
24e1051a39Sopenharmony_ci# rounds is even, last round writes to A[][] and everything works out.
25e1051a39Sopenharmony_ci# In the nutshell it's transliteration of x86_64 module, because both
26e1051a39Sopenharmony_ci# architectures have similar capabilities/limitations. Performance
27e1051a39Sopenharmony_ci# measurement is problematic as I don't have access to an idle system.
28e1051a39Sopenharmony_ci# It looks like z13 processes one byte [out of long message] in ~14
29e1051a39Sopenharmony_ci# cycles. At least the result is consistent with estimate based on
30e1051a39Sopenharmony_ci# amount of instruction and assumed instruction issue rate. It's ~2.5x
31e1051a39Sopenharmony_ci# faster than compiler-generated code.
32e1051a39Sopenharmony_ci
33e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension)
34e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file
35e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
36e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
37e1051a39Sopenharmony_ci
38e1051a39Sopenharmony_ciif ($flavour =~ /3[12]/) {
39e1051a39Sopenharmony_ci	$SIZE_T=4;
40e1051a39Sopenharmony_ci	$g="";
41e1051a39Sopenharmony_ci} else {
42e1051a39Sopenharmony_ci	$SIZE_T=8;
43e1051a39Sopenharmony_ci	$g="g";
44e1051a39Sopenharmony_ci}
45e1051a39Sopenharmony_ci
46e1051a39Sopenharmony_ci$output and open STDOUT,">$output";
47e1051a39Sopenharmony_ci
48e1051a39Sopenharmony_cimy @A = map([ 8*$_, 8*($_+1), 8*($_+2), 8*($_+3), 8*($_+4) ], (0,5,10,15,20));
49e1051a39Sopenharmony_ci
50e1051a39Sopenharmony_cimy @C = map("%r$_",(0,1,5..7));
51e1051a39Sopenharmony_cimy @D = map("%r$_",(8..12));
52e1051a39Sopenharmony_cimy @T = map("%r$_",(13..14));
53e1051a39Sopenharmony_cimy ($src,$dst,$iotas) = map("%r$_",(2..4));
54e1051a39Sopenharmony_cimy $sp = "%r15";
55e1051a39Sopenharmony_ci
56e1051a39Sopenharmony_ci$stdframe=16*$SIZE_T+4*8;
57e1051a39Sopenharmony_ci$frame=$stdframe+25*8;
58e1051a39Sopenharmony_ci
59e1051a39Sopenharmony_cimy @rhotates = ([  0,  1, 62, 28, 27 ],
60e1051a39Sopenharmony_ci                [ 36, 44,  6, 55, 20 ],
61e1051a39Sopenharmony_ci                [  3, 10, 43, 25, 39 ],
62e1051a39Sopenharmony_ci                [ 41, 45, 15, 21,  8 ],
63e1051a39Sopenharmony_ci                [ 18,  2, 61, 56, 14 ]);
64e1051a39Sopenharmony_ci
65e1051a39Sopenharmony_ci{ my @C = @C;	# copy, because we mess them up...
66e1051a39Sopenharmony_ci  my @D = @D;
67e1051a39Sopenharmony_ci
68e1051a39Sopenharmony_ci$code.=<<___;
69e1051a39Sopenharmony_ci.text
70e1051a39Sopenharmony_ci
71e1051a39Sopenharmony_ci.type	__KeccakF1600,\@function
72e1051a39Sopenharmony_ci.align	32
73e1051a39Sopenharmony_ci__KeccakF1600:
74e1051a39Sopenharmony_ci	st${g}	%r14,$SIZE_T*14($sp)
75e1051a39Sopenharmony_ci	lg	@C[0],$A[4][0]($src)
76e1051a39Sopenharmony_ci	lg	@C[1],$A[4][1]($src)
77e1051a39Sopenharmony_ci	lg	@C[2],$A[4][2]($src)
78e1051a39Sopenharmony_ci	lg	@C[3],$A[4][3]($src)
79e1051a39Sopenharmony_ci	lg	@C[4],$A[4][4]($src)
80e1051a39Sopenharmony_ci	larl	$iotas,iotas
81e1051a39Sopenharmony_ci	j	.Loop
82e1051a39Sopenharmony_ci
83e1051a39Sopenharmony_ci.align	16
84e1051a39Sopenharmony_ci.Loop:
85e1051a39Sopenharmony_ci	lg	@D[0],$A[0][0]($src)
86e1051a39Sopenharmony_ci	lg	@D[1],$A[1][1]($src)
87e1051a39Sopenharmony_ci	lg	@D[2],$A[2][2]($src)
88e1051a39Sopenharmony_ci	lg	@D[3],$A[3][3]($src)
89e1051a39Sopenharmony_ci
90e1051a39Sopenharmony_ci	xgr	@C[0],@D[0]
91e1051a39Sopenharmony_ci	xg	@C[1],$A[0][1]($src)
92e1051a39Sopenharmony_ci	xg	@C[2],$A[0][2]($src)
93e1051a39Sopenharmony_ci	xg	@C[3],$A[0][3]($src)
94e1051a39Sopenharmony_ci	lgr	@D[4],@C[4]
95e1051a39Sopenharmony_ci	xg	@C[4],$A[0][4]($src)
96e1051a39Sopenharmony_ci
97e1051a39Sopenharmony_ci	xg	@C[0],$A[1][0]($src)
98e1051a39Sopenharmony_ci	xgr	@C[1],@D[1]
99e1051a39Sopenharmony_ci	xg	@C[2],$A[1][2]($src)
100e1051a39Sopenharmony_ci	xg	@C[3],$A[1][3]($src)
101e1051a39Sopenharmony_ci	xg	@C[4],$A[1][4]($src)
102e1051a39Sopenharmony_ci
103e1051a39Sopenharmony_ci	xg	@C[0],$A[2][0]($src)
104e1051a39Sopenharmony_ci	xg	@C[1],$A[2][1]($src)
105e1051a39Sopenharmony_ci	xgr	@C[2],@D[2]
106e1051a39Sopenharmony_ci	xg	@C[3],$A[2][3]($src)
107e1051a39Sopenharmony_ci	xg	@C[4],$A[2][4]($src)
108e1051a39Sopenharmony_ci
109e1051a39Sopenharmony_ci	xg	@C[0],$A[3][0]($src)
110e1051a39Sopenharmony_ci	xg	@C[1],$A[3][1]($src)
111e1051a39Sopenharmony_ci	xg	@C[2],$A[3][2]($src)
112e1051a39Sopenharmony_ci	xgr	@C[3],@D[3]
113e1051a39Sopenharmony_ci	xg	@C[4],$A[3][4]($src)
114e1051a39Sopenharmony_ci
115e1051a39Sopenharmony_ci	lgr	@T[0],@C[2]
116e1051a39Sopenharmony_ci	rllg	@C[2],@C[2],1
117e1051a39Sopenharmony_ci	xgr	@C[2],@C[0]		# D[1] = ROL64(C[2], 1) ^ C[0]
118e1051a39Sopenharmony_ci
119e1051a39Sopenharmony_ci	rllg	@C[0],@C[0],1
120e1051a39Sopenharmony_ci	xgr	@C[0],@C[3]		# D[4] = ROL64(C[0], 1) ^ C[3]
121e1051a39Sopenharmony_ci
122e1051a39Sopenharmony_ci	rllg	@C[3],@C[3],1
123e1051a39Sopenharmony_ci	xgr	@C[3],@C[1]		# D[2] = ROL64(C[3], 1) ^ C[1]
124e1051a39Sopenharmony_ci
125e1051a39Sopenharmony_ci	rllg	@C[1],@C[1],1
126e1051a39Sopenharmony_ci	xgr	@C[1],@C[4]		# D[0] = ROL64(C[1], 1) ^ C[4]
127e1051a39Sopenharmony_ci
128e1051a39Sopenharmony_ci	rllg	@C[4],@C[4],1
129e1051a39Sopenharmony_ci	xgr	@C[4],@T[0]		# D[3] = ROL64(C[4], 1) ^ C[2]
130e1051a39Sopenharmony_ci___
131e1051a39Sopenharmony_ci	(@D[0..4], @C) = (@C[1..4,0], @D);
132e1051a39Sopenharmony_ci$code.=<<___;
133e1051a39Sopenharmony_ci	xgr	@C[1],@D[1]
134e1051a39Sopenharmony_ci	xgr	@C[2],@D[2]
135e1051a39Sopenharmony_ci	xgr	@C[3],@D[3]
136e1051a39Sopenharmony_ci	 rllg	@C[1],@C[1],$rhotates[1][1]
137e1051a39Sopenharmony_ci	xgr	@C[4],@D[4]
138e1051a39Sopenharmony_ci	 rllg	@C[2],@C[2],$rhotates[2][2]
139e1051a39Sopenharmony_ci	xgr	@C[0],@D[0]
140e1051a39Sopenharmony_ci
141e1051a39Sopenharmony_ci	lgr	@T[0],@C[1]
142e1051a39Sopenharmony_ci	ogr	@C[1],@C[2]
143e1051a39Sopenharmony_ci	 rllg	@C[3],@C[3],$rhotates[3][3]
144e1051a39Sopenharmony_ci	xgr	@C[1],@C[0]		#	    C[0] ^ ( C[1] | C[2])
145e1051a39Sopenharmony_ci	 rllg	@C[4],@C[4],$rhotates[4][4]
146e1051a39Sopenharmony_ci	xg	@C[1],0($iotas)
147e1051a39Sopenharmony_ci	la	$iotas,8($iotas)
148e1051a39Sopenharmony_ci	stg	@C[1],$A[0][0]($dst)	# R[0][0] = C[0] ^ ( C[1] | C[2]) ^ iotas[i]
149e1051a39Sopenharmony_ci
150e1051a39Sopenharmony_ci	lgr	@T[1],@C[4]
151e1051a39Sopenharmony_ci	ngr	@C[4],@C[3]
152e1051a39Sopenharmony_ci	 lghi	@C[1],-1		# no 'not' instruction :-(
153e1051a39Sopenharmony_ci	xgr	@C[4],@C[2]		#	    C[2] ^ ( C[4] & C[3])
154e1051a39Sopenharmony_ci	 xgr	@C[2],@C[1]		# not	@C[2]
155e1051a39Sopenharmony_ci	stg	@C[4],$A[0][2]($dst)	# R[0][2] = C[2] ^ ( C[4] & C[3])
156e1051a39Sopenharmony_ci	 ogr	@C[2],@C[3]
157e1051a39Sopenharmony_ci	 xgr	@C[2],@T[0]		#	    C[1] ^ (~C[2] | C[3])
158e1051a39Sopenharmony_ci
159e1051a39Sopenharmony_ci	ngr	@T[0],@C[0]
160e1051a39Sopenharmony_ci	 stg	@C[2],$A[0][1]($dst)	# R[0][1] = C[1] ^ (~C[2] | C[3])
161e1051a39Sopenharmony_ci	xgr	@T[0],@T[1]		#	    C[4] ^ ( C[1] & C[0])
162e1051a39Sopenharmony_ci	 ogr	@T[1],@C[0]
163e1051a39Sopenharmony_ci	stg	@T[0],$A[0][4]($dst)	# R[0][4] = C[4] ^ ( C[1] & C[0])
164e1051a39Sopenharmony_ci	 xgr	@T[1],@C[3]		#	    C[3] ^ ( C[4] | C[0])
165e1051a39Sopenharmony_ci	 stg	@T[1],$A[0][3]($dst)	# R[0][3] = C[3] ^ ( C[4] | C[0])
166e1051a39Sopenharmony_ci
167e1051a39Sopenharmony_ci
168e1051a39Sopenharmony_ci	lg	@C[0],$A[0][3]($src)
169e1051a39Sopenharmony_ci	lg	@C[4],$A[4][2]($src)
170e1051a39Sopenharmony_ci	lg	@C[3],$A[3][1]($src)
171e1051a39Sopenharmony_ci	lg	@C[1],$A[1][4]($src)
172e1051a39Sopenharmony_ci	lg	@C[2],$A[2][0]($src)
173e1051a39Sopenharmony_ci
174e1051a39Sopenharmony_ci	xgr	@C[0],@D[3]
175e1051a39Sopenharmony_ci	xgr	@C[4],@D[2]
176e1051a39Sopenharmony_ci	 rllg	@C[0],@C[0],$rhotates[0][3]
177e1051a39Sopenharmony_ci	xgr	@C[3],@D[1]
178e1051a39Sopenharmony_ci	 rllg	@C[4],@C[4],$rhotates[4][2]
179e1051a39Sopenharmony_ci	xgr	@C[1],@D[4]
180e1051a39Sopenharmony_ci	 rllg	@C[3],@C[3],$rhotates[3][1]
181e1051a39Sopenharmony_ci	xgr	@C[2],@D[0]
182e1051a39Sopenharmony_ci
183e1051a39Sopenharmony_ci	lgr	@T[0],@C[0]
184e1051a39Sopenharmony_ci	ogr	@C[0],@C[4]
185e1051a39Sopenharmony_ci	 rllg	@C[1],@C[1],$rhotates[1][4]
186e1051a39Sopenharmony_ci	xgr	@C[0],@C[3]		#	    C[3] ^ (C[0] |  C[4])
187e1051a39Sopenharmony_ci	 rllg	@C[2],@C[2],$rhotates[2][0]
188e1051a39Sopenharmony_ci	stg	@C[0],$A[1][3]($dst)	# R[1][3] = C[3] ^ (C[0] |  C[4])
189e1051a39Sopenharmony_ci
190e1051a39Sopenharmony_ci	lgr	@T[1],@C[1]
191e1051a39Sopenharmony_ci	ngr	@C[1],@T[0]
192e1051a39Sopenharmony_ci	 lghi	@C[0],-1		# no 'not' instruction :-(
193e1051a39Sopenharmony_ci	xgr	@C[1],@C[4]		#	    C[4] ^ (C[1] &  C[0])
194e1051a39Sopenharmony_ci	 xgr	@C[4],@C[0]		# not	@C[4]
195e1051a39Sopenharmony_ci	stg	@C[1],$A[1][4]($dst)	# R[1][4] = C[4] ^ (C[1] &  C[0])
196e1051a39Sopenharmony_ci
197e1051a39Sopenharmony_ci	 ogr	@C[4],@C[3]
198e1051a39Sopenharmony_ci	 xgr	@C[4],@C[2]		#	    C[2] ^ (~C[4] | C[3])
199e1051a39Sopenharmony_ci
200e1051a39Sopenharmony_ci	ngr	@C[3],@C[2]
201e1051a39Sopenharmony_ci	 stg	@C[4],$A[1][2]($dst)	# R[1][2] = C[2] ^ (~C[4] | C[3])
202e1051a39Sopenharmony_ci	xgr	@C[3],@T[1]		#	    C[1] ^ (C[3] &  C[2])
203e1051a39Sopenharmony_ci	 ogr	@T[1],@C[2]
204e1051a39Sopenharmony_ci	stg	@C[3],$A[1][1]($dst)	# R[1][1] = C[1] ^ (C[3] &  C[2])
205e1051a39Sopenharmony_ci	 xgr	@T[1],@T[0]		#	    C[0] ^ (C[1] |  C[2])
206e1051a39Sopenharmony_ci	 stg	@T[1],$A[1][0]($dst)	# R[1][0] = C[0] ^ (C[1] |  C[2])
207e1051a39Sopenharmony_ci
208e1051a39Sopenharmony_ci
209e1051a39Sopenharmony_ci	lg	@C[2],$A[2][3]($src)
210e1051a39Sopenharmony_ci	lg	@C[3],$A[3][4]($src)
211e1051a39Sopenharmony_ci	lg	@C[1],$A[1][2]($src)
212e1051a39Sopenharmony_ci	lg	@C[4],$A[4][0]($src)
213e1051a39Sopenharmony_ci	lg	@C[0],$A[0][1]($src)
214e1051a39Sopenharmony_ci
215e1051a39Sopenharmony_ci	xgr	@C[2],@D[3]
216e1051a39Sopenharmony_ci	xgr	@C[3],@D[4]
217e1051a39Sopenharmony_ci	 rllg	@C[2],@C[2],$rhotates[2][3]
218e1051a39Sopenharmony_ci	xgr	@C[1],@D[2]
219e1051a39Sopenharmony_ci	 rllg	@C[3],@C[3],$rhotates[3][4]
220e1051a39Sopenharmony_ci	xgr	@C[4],@D[0]
221e1051a39Sopenharmony_ci	 rllg	@C[1],@C[1],$rhotates[1][2]
222e1051a39Sopenharmony_ci	xgr	@C[0],@D[1]
223e1051a39Sopenharmony_ci
224e1051a39Sopenharmony_ci	lgr	@T[0],@C[2]
225e1051a39Sopenharmony_ci	ngr	@C[2],@C[3]
226e1051a39Sopenharmony_ci	 rllg	@C[4],@C[4],$rhotates[4][0]
227e1051a39Sopenharmony_ci	xgr	@C[2],@C[1]		#	     C[1] ^ ( C[2] & C[3])
228e1051a39Sopenharmony_ci	lghi	@T[1],-1		# no 'not' instruction :-(
229e1051a39Sopenharmony_ci	stg	@C[2],$A[2][1]($dst)	# R[2][1] =  C[1] ^ ( C[2] & C[3])
230e1051a39Sopenharmony_ci
231e1051a39Sopenharmony_ci	xgr	@C[3],@T[1]		# not	@C[3]
232e1051a39Sopenharmony_ci	lgr	@T[1],@C[4]
233e1051a39Sopenharmony_ci	ngr	@C[4],@C[3]
234e1051a39Sopenharmony_ci	 rllg	@C[0],@C[0],$rhotates[0][1]
235e1051a39Sopenharmony_ci	xgr	@C[4],@T[0]		#	     C[2] ^ ( C[4] & ~C[3])
236e1051a39Sopenharmony_ci	 ogr	@T[0],@C[1]
237e1051a39Sopenharmony_ci	stg	@C[4],$A[2][2]($dst)	# R[2][2] =  C[2] ^ ( C[4] & ~C[3])
238e1051a39Sopenharmony_ci	 xgr	@T[0],@C[0]		#	     C[0] ^ ( C[2] | C[1])
239e1051a39Sopenharmony_ci
240e1051a39Sopenharmony_ci	ngr	@C[1],@C[0]
241e1051a39Sopenharmony_ci	 stg	@T[0],$A[2][0]($dst)	# R[2][0] =  C[0] ^ ( C[2] | C[1])
242e1051a39Sopenharmony_ci	xgr	@C[1],@T[1]		#	     C[4] ^ ( C[1] & C[0])
243e1051a39Sopenharmony_ci	 ogr	@C[0],@T[1]
244e1051a39Sopenharmony_ci	stg	@C[1],$A[2][4]($dst)	# R[2][4] =  C[4] ^ ( C[1] & C[0])
245e1051a39Sopenharmony_ci	 xgr	@C[0],@C[3]		#	    ~C[3] ^ ( C[0] | C[4])
246e1051a39Sopenharmony_ci	 stg	@C[0],$A[2][3]($dst)	# R[2][3] = ~C[3] ^ ( C[0] | C[4])
247e1051a39Sopenharmony_ci
248e1051a39Sopenharmony_ci
249e1051a39Sopenharmony_ci	lg	@C[2],$A[2][1]($src)
250e1051a39Sopenharmony_ci	lg	@C[3],$A[3][2]($src)
251e1051a39Sopenharmony_ci	lg	@C[1],$A[1][0]($src)
252e1051a39Sopenharmony_ci	lg	@C[4],$A[4][3]($src)
253e1051a39Sopenharmony_ci	lg	@C[0],$A[0][4]($src)
254e1051a39Sopenharmony_ci
255e1051a39Sopenharmony_ci	xgr	@C[2],@D[1]
256e1051a39Sopenharmony_ci	xgr	@C[3],@D[2]
257e1051a39Sopenharmony_ci	 rllg	@C[2],@C[2],$rhotates[2][1]
258e1051a39Sopenharmony_ci	xgr	@C[1],@D[0]
259e1051a39Sopenharmony_ci	 rllg	@C[3],@C[3],$rhotates[3][2]
260e1051a39Sopenharmony_ci	xgr	@C[4],@D[3]
261e1051a39Sopenharmony_ci	 rllg	@C[1],@C[1],$rhotates[1][0]
262e1051a39Sopenharmony_ci	xgr	@C[0],@D[4]
263e1051a39Sopenharmony_ci	 rllg	@C[4],@C[4],$rhotates[4][3]
264e1051a39Sopenharmony_ci
265e1051a39Sopenharmony_ci	lgr	@T[0],@C[2]
266e1051a39Sopenharmony_ci	ogr	@C[2],@C[3]
267e1051a39Sopenharmony_ci	lghi	@T[1],-1		# no 'not' instruction :-(
268e1051a39Sopenharmony_ci	xgr	@C[2],@C[1]		#	     C[1] ^ ( C[2] | C[3])
269e1051a39Sopenharmony_ci	xgr	@C[3],@T[1]		# not	@C[3]
270e1051a39Sopenharmony_ci	stg	@C[2],$A[3][1]($dst)	# R[3][1] =  C[1] ^ ( C[2] | C[3])
271e1051a39Sopenharmony_ci
272e1051a39Sopenharmony_ci	lgr	@T[1],@C[4]
273e1051a39Sopenharmony_ci	ogr	@C[4],@C[3]
274e1051a39Sopenharmony_ci	 rllg	@C[0],@C[0],$rhotates[0][4]
275e1051a39Sopenharmony_ci	xgr	@C[4],@T[0]		#	     C[2] ^ ( C[4] | ~C[3])
276e1051a39Sopenharmony_ci	 ngr	@T[0],@C[1]
277e1051a39Sopenharmony_ci	stg	@C[4],$A[3][2]($dst)	# R[3][2] =  C[2] ^ ( C[4] | ~C[3])
278e1051a39Sopenharmony_ci	 xgr	@T[0],@C[0]		#	     C[0] ^ ( C[2] & C[1])
279e1051a39Sopenharmony_ci
280e1051a39Sopenharmony_ci	ogr	@C[1],@C[0]
281e1051a39Sopenharmony_ci	 stg	@T[0],$A[3][0]($dst)	# R[3][0] =  C[0] ^ ( C[2] & C[1])
282e1051a39Sopenharmony_ci	xgr	@C[1],@T[1]		#	     C[4] ^ ( C[1] | C[0])
283e1051a39Sopenharmony_ci	 ngr	@C[0],@T[1]
284e1051a39Sopenharmony_ci	stg	@C[1],$A[3][4]($dst)	# R[3][4] =  C[4] ^ ( C[1] | C[0])
285e1051a39Sopenharmony_ci	 xgr	@C[0],@C[3]		#	    ~C[3] ^ ( C[0] & C[4])
286e1051a39Sopenharmony_ci	 stg	@C[0],$A[3][3]($dst)	# R[3][3] = ~C[3] ^ ( C[0] & C[4])
287e1051a39Sopenharmony_ci
288e1051a39Sopenharmony_ci
289e1051a39Sopenharmony_ci	xg	@D[2],$A[0][2]($src)
290e1051a39Sopenharmony_ci	xg	@D[3],$A[1][3]($src)
291e1051a39Sopenharmony_ci	xg	@D[1],$A[4][1]($src)
292e1051a39Sopenharmony_ci	xg	@D[4],$A[2][4]($src)
293e1051a39Sopenharmony_ci	xgr	$dst,$src		# xchg	$dst,$src
294e1051a39Sopenharmony_ci	 rllg	@D[2],@D[2],$rhotates[0][2]
295e1051a39Sopenharmony_ci	xg	@D[0],$A[3][0]($src)
296e1051a39Sopenharmony_ci	 rllg	@D[3],@D[3],$rhotates[1][3]
297e1051a39Sopenharmony_ci	xgr	$src,$dst
298e1051a39Sopenharmony_ci	 rllg	@D[1],@D[1],$rhotates[4][1]
299e1051a39Sopenharmony_ci	xgr	$dst,$src
300e1051a39Sopenharmony_ci	 rllg	@D[4],@D[4],$rhotates[2][4]
301e1051a39Sopenharmony_ci___
302e1051a39Sopenharmony_ci	@C = @D[2..4,0,1];
303e1051a39Sopenharmony_ci$code.=<<___;
304e1051a39Sopenharmony_ci	lgr	@T[0],@C[0]
305e1051a39Sopenharmony_ci	ngr	@C[0],@C[1]
306e1051a39Sopenharmony_ci	lghi	@T[1],-1		# no 'not' instruction :-(
307e1051a39Sopenharmony_ci	xgr	@C[0],@C[4]		#	     C[4] ^ ( C[0] & C[1])
308e1051a39Sopenharmony_ci	xgr	@C[1],@T[1]		# not	@C[1]
309e1051a39Sopenharmony_ci	stg	@C[0],$A[4][4]($src)	# R[4][4] =  C[4] ^ ( C[0] & C[1])
310e1051a39Sopenharmony_ci
311e1051a39Sopenharmony_ci	lgr	@T[1],@C[2]
312e1051a39Sopenharmony_ci	ngr	@C[2],@C[1]
313e1051a39Sopenharmony_ci	 rllg	@D[0],@D[0],$rhotates[3][0]
314e1051a39Sopenharmony_ci	xgr	@C[2],@T[0]		#	     C[0] ^ ( C[2] & ~C[1])
315e1051a39Sopenharmony_ci	 ogr	@T[0],@C[4]
316e1051a39Sopenharmony_ci	stg	@C[2],$A[4][0]($src)	# R[4][0] =  C[0] ^ ( C[2] & ~C[1])
317e1051a39Sopenharmony_ci	 xgr	@T[0],@C[3]		#	     C[3] ^ ( C[0] | C[4])
318e1051a39Sopenharmony_ci
319e1051a39Sopenharmony_ci	ngr	@C[4],@C[3]
320e1051a39Sopenharmony_ci	 stg	@T[0],$A[4][3]($src)	# R[4][3] =  C[3] ^ ( C[0] | C[4])
321e1051a39Sopenharmony_ci	xgr	@C[4],@T[1]		#	     C[2] ^ ( C[4] & C[3])
322e1051a39Sopenharmony_ci	 ogr	@C[3],@T[1]
323e1051a39Sopenharmony_ci	stg	@C[4],$A[4][2]($src)	# R[4][2] =  C[2] ^ ( C[4] & C[3])
324e1051a39Sopenharmony_ci	 xgr	@C[3],@C[1]		#	    ~C[1] ^ ( C[2] | C[3])
325e1051a39Sopenharmony_ci
326e1051a39Sopenharmony_ci	lgr	@C[1],@C[0]		# harmonize with the loop top
327e1051a39Sopenharmony_ci	lgr	@C[0],@T[0]
328e1051a39Sopenharmony_ci	 stg	@C[3],$A[4][1]($src)	# R[4][1] = ~C[1] ^ ( C[2] | C[3])
329e1051a39Sopenharmony_ci
330e1051a39Sopenharmony_ci	tmll	$iotas,255
331e1051a39Sopenharmony_ci	jnz	.Loop
332e1051a39Sopenharmony_ci
333e1051a39Sopenharmony_ci	l${g}	%r14,$SIZE_T*14($sp)
334e1051a39Sopenharmony_ci	br	%r14
335e1051a39Sopenharmony_ci.size	__KeccakF1600,.-__KeccakF1600
336e1051a39Sopenharmony_ci___
337e1051a39Sopenharmony_ci}
338e1051a39Sopenharmony_ci{
339e1051a39Sopenharmony_ci$code.=<<___;
340e1051a39Sopenharmony_ci.type	KeccakF1600,\@function
341e1051a39Sopenharmony_ci.align	32
342e1051a39Sopenharmony_ciKeccakF1600:
343e1051a39Sopenharmony_ci.LKeccakF1600:
344e1051a39Sopenharmony_ci	lghi	%r1,-$frame
345e1051a39Sopenharmony_ci	stm${g}	%r6,%r15,$SIZE_T*6($sp)
346e1051a39Sopenharmony_ci	lgr	%r0,$sp
347e1051a39Sopenharmony_ci	la	$sp,0(%r1,$sp)
348e1051a39Sopenharmony_ci	st${g}	%r0,0($sp)
349e1051a39Sopenharmony_ci
350e1051a39Sopenharmony_ci	lghi	@D[0],-1		# no 'not' instruction :-(
351e1051a39Sopenharmony_ci	lghi	@D[1],-1
352e1051a39Sopenharmony_ci	lghi	@D[2],-1
353e1051a39Sopenharmony_ci	lghi	@D[3],-1
354e1051a39Sopenharmony_ci	lghi	@D[4],-1
355e1051a39Sopenharmony_ci	lghi	@T[0],-1
356e1051a39Sopenharmony_ci	xg	@D[0],$A[0][1]($src)
357e1051a39Sopenharmony_ci	xg	@D[1],$A[0][2]($src)
358e1051a39Sopenharmony_ci	xg	@D[2],$A[1][3]($src)
359e1051a39Sopenharmony_ci	xg	@D[3],$A[2][2]($src)
360e1051a39Sopenharmony_ci	xg	@D[4],$A[3][2]($src)
361e1051a39Sopenharmony_ci	xg	@T[0],$A[4][0]($src)
362e1051a39Sopenharmony_ci	stmg	@D[0],@D[1],$A[0][1]($src)
363e1051a39Sopenharmony_ci	stg	@D[2],$A[1][3]($src)
364e1051a39Sopenharmony_ci	stg	@D[3],$A[2][2]($src)
365e1051a39Sopenharmony_ci	stg	@D[4],$A[3][2]($src)
366e1051a39Sopenharmony_ci	stg	@T[0],$A[4][0]($src)
367e1051a39Sopenharmony_ci
368e1051a39Sopenharmony_ci	la	$dst,$stdframe($sp)
369e1051a39Sopenharmony_ci
370e1051a39Sopenharmony_ci	bras	%r14,__KeccakF1600
371e1051a39Sopenharmony_ci
372e1051a39Sopenharmony_ci	lghi	@D[0],-1		# no 'not' instruction :-(
373e1051a39Sopenharmony_ci	lghi	@D[1],-1
374e1051a39Sopenharmony_ci	lghi	@D[2],-1
375e1051a39Sopenharmony_ci	lghi	@D[3],-1
376e1051a39Sopenharmony_ci	lghi	@D[4],-1
377e1051a39Sopenharmony_ci	lghi	@T[0],-1
378e1051a39Sopenharmony_ci	xg	@D[0],$A[0][1]($src)
379e1051a39Sopenharmony_ci	xg	@D[1],$A[0][2]($src)
380e1051a39Sopenharmony_ci	xg	@D[2],$A[1][3]($src)
381e1051a39Sopenharmony_ci	xg	@D[3],$A[2][2]($src)
382e1051a39Sopenharmony_ci	xg	@D[4],$A[3][2]($src)
383e1051a39Sopenharmony_ci	xg	@T[0],$A[4][0]($src)
384e1051a39Sopenharmony_ci	stmg	@D[0],@D[1],$A[0][1]($src)
385e1051a39Sopenharmony_ci	stg	@D[2],$A[1][3]($src)
386e1051a39Sopenharmony_ci	stg	@D[3],$A[2][2]($src)
387e1051a39Sopenharmony_ci	stg	@D[4],$A[3][2]($src)
388e1051a39Sopenharmony_ci	stg	@T[0],$A[4][0]($src)
389e1051a39Sopenharmony_ci
390e1051a39Sopenharmony_ci	lm${g}	%r6,%r15,$frame+6*$SIZE_T($sp)
391e1051a39Sopenharmony_ci	br	%r14
392e1051a39Sopenharmony_ci.size	KeccakF1600,.-KeccakF1600
393e1051a39Sopenharmony_ci___
394e1051a39Sopenharmony_ci}
395e1051a39Sopenharmony_ci{ my ($A_flat,$inp,$len,$bsz) = map("%r$_",(2..5));
396e1051a39Sopenharmony_ci
397e1051a39Sopenharmony_ci$code.=<<___;
398e1051a39Sopenharmony_ci.globl	SHA3_absorb
399e1051a39Sopenharmony_ci.type	SHA3_absorb,\@function
400e1051a39Sopenharmony_ci.align	32
401e1051a39Sopenharmony_ciSHA3_absorb:
402e1051a39Sopenharmony_ci	lghi	%r1,-$frame
403e1051a39Sopenharmony_ci	stm${g}	%r5,%r15,$SIZE_T*5($sp)
404e1051a39Sopenharmony_ci	lgr	%r0,$sp
405e1051a39Sopenharmony_ci	la	$sp,0(%r1,$sp)
406e1051a39Sopenharmony_ci	st${g}	%r0,0($sp)
407e1051a39Sopenharmony_ci
408e1051a39Sopenharmony_ci	lghi	@D[0],-1		# no 'not' instruction :-(
409e1051a39Sopenharmony_ci	lghi	@D[1],-1
410e1051a39Sopenharmony_ci	lghi	@D[2],-1
411e1051a39Sopenharmony_ci	lghi	@D[3],-1
412e1051a39Sopenharmony_ci	lghi	@D[4],-1
413e1051a39Sopenharmony_ci	lghi	@T[0],-1
414e1051a39Sopenharmony_ci	xg	@D[0],$A[0][1]($src)
415e1051a39Sopenharmony_ci	xg	@D[1],$A[0][2]($src)
416e1051a39Sopenharmony_ci	xg	@D[2],$A[1][3]($src)
417e1051a39Sopenharmony_ci	xg	@D[3],$A[2][2]($src)
418e1051a39Sopenharmony_ci	xg	@D[4],$A[3][2]($src)
419e1051a39Sopenharmony_ci	xg	@T[0],$A[4][0]($src)
420e1051a39Sopenharmony_ci	stmg	@D[0],@D[1],$A[0][1]($src)
421e1051a39Sopenharmony_ci	stg	@D[2],$A[1][3]($src)
422e1051a39Sopenharmony_ci	stg	@D[3],$A[2][2]($src)
423e1051a39Sopenharmony_ci	stg	@D[4],$A[3][2]($src)
424e1051a39Sopenharmony_ci	stg	@T[0],$A[4][0]($src)
425e1051a39Sopenharmony_ci
426e1051a39Sopenharmony_ci.Loop_absorb:
427e1051a39Sopenharmony_ci	cl${g}r	$len,$bsz
428e1051a39Sopenharmony_ci	jl	.Ldone_absorb
429e1051a39Sopenharmony_ci
430e1051a39Sopenharmony_ci	srl${g}	$bsz,3
431e1051a39Sopenharmony_ci	la	%r1,0($A_flat)
432e1051a39Sopenharmony_ci
433e1051a39Sopenharmony_ci.Lblock_absorb:
434e1051a39Sopenharmony_ci	lrvg	%r0,0($inp)
435e1051a39Sopenharmony_ci	la	$inp,8($inp)
436e1051a39Sopenharmony_ci	xg	%r0,0(%r1)
437e1051a39Sopenharmony_ci	a${g}hi	$len,-8
438e1051a39Sopenharmony_ci	stg	%r0,0(%r1)
439e1051a39Sopenharmony_ci	la	%r1,8(%r1)
440e1051a39Sopenharmony_ci	brct	$bsz,.Lblock_absorb
441e1051a39Sopenharmony_ci
442e1051a39Sopenharmony_ci	stm${g}	$inp,$len,$frame+3*$SIZE_T($sp)
443e1051a39Sopenharmony_ci	la	$dst,$stdframe($sp)
444e1051a39Sopenharmony_ci	bras	%r14,__KeccakF1600
445e1051a39Sopenharmony_ci	lm${g}	$inp,$bsz,$frame+3*$SIZE_T($sp)
446e1051a39Sopenharmony_ci	j	.Loop_absorb
447e1051a39Sopenharmony_ci
448e1051a39Sopenharmony_ci.align	16
449e1051a39Sopenharmony_ci.Ldone_absorb:
450e1051a39Sopenharmony_ci	lghi	@D[0],-1		# no 'not' instruction :-(
451e1051a39Sopenharmony_ci	lghi	@D[1],-1
452e1051a39Sopenharmony_ci	lghi	@D[2],-1
453e1051a39Sopenharmony_ci	lghi	@D[3],-1
454e1051a39Sopenharmony_ci	lghi	@D[4],-1
455e1051a39Sopenharmony_ci	lghi	@T[0],-1
456e1051a39Sopenharmony_ci	xg	@D[0],$A[0][1]($src)
457e1051a39Sopenharmony_ci	xg	@D[1],$A[0][2]($src)
458e1051a39Sopenharmony_ci	xg	@D[2],$A[1][3]($src)
459e1051a39Sopenharmony_ci	xg	@D[3],$A[2][2]($src)
460e1051a39Sopenharmony_ci	xg	@D[4],$A[3][2]($src)
461e1051a39Sopenharmony_ci	xg	@T[0],$A[4][0]($src)
462e1051a39Sopenharmony_ci	stmg	@D[0],@D[1],$A[0][1]($src)
463e1051a39Sopenharmony_ci	stg	@D[2],$A[1][3]($src)
464e1051a39Sopenharmony_ci	stg	@D[3],$A[2][2]($src)
465e1051a39Sopenharmony_ci	stg	@D[4],$A[3][2]($src)
466e1051a39Sopenharmony_ci	stg	@T[0],$A[4][0]($src)
467e1051a39Sopenharmony_ci
468e1051a39Sopenharmony_ci	lgr	%r2,$len		# return value
469e1051a39Sopenharmony_ci
470e1051a39Sopenharmony_ci	lm${g}	%r6,%r15,$frame+6*$SIZE_T($sp)
471e1051a39Sopenharmony_ci	br	%r14
472e1051a39Sopenharmony_ci.size	SHA3_absorb,.-SHA3_absorb
473e1051a39Sopenharmony_ci___
474e1051a39Sopenharmony_ci}
475e1051a39Sopenharmony_ci{ my ($A_flat,$out,$len,$bsz) = map("%r$_",(2..5));
476e1051a39Sopenharmony_ci
477e1051a39Sopenharmony_ci$code.=<<___;
478e1051a39Sopenharmony_ci.globl	SHA3_squeeze
479e1051a39Sopenharmony_ci.type	SHA3_squeeze,\@function
480e1051a39Sopenharmony_ci.align	32
481e1051a39Sopenharmony_ciSHA3_squeeze:
482e1051a39Sopenharmony_ci	srl${g}	$bsz,3
483e1051a39Sopenharmony_ci	st${g}	%r14,2*$SIZE_T($sp)
484e1051a39Sopenharmony_ci	lghi	%r14,8
485e1051a39Sopenharmony_ci	st${g}	$bsz,5*$SIZE_T($sp)
486e1051a39Sopenharmony_ci	la	%r1,0($A_flat)
487e1051a39Sopenharmony_ci
488e1051a39Sopenharmony_ci	j	.Loop_squeeze
489e1051a39Sopenharmony_ci
490e1051a39Sopenharmony_ci.align	16
491e1051a39Sopenharmony_ci.Loop_squeeze:
492e1051a39Sopenharmony_ci	cl${g}r $len,%r14
493e1051a39Sopenharmony_ci	jl	.Ltail_squeeze
494e1051a39Sopenharmony_ci
495e1051a39Sopenharmony_ci	lrvg	%r0,0(%r1)
496e1051a39Sopenharmony_ci	la	%r1,8(%r1)
497e1051a39Sopenharmony_ci	stg	%r0,0($out)
498e1051a39Sopenharmony_ci	la	$out,8($out)
499e1051a39Sopenharmony_ci	a${g}hi	$len,-8			# len -= 8
500e1051a39Sopenharmony_ci	jz	.Ldone_squeeze
501e1051a39Sopenharmony_ci
502e1051a39Sopenharmony_ci	brct	$bsz,.Loop_squeeze	# bsz--
503e1051a39Sopenharmony_ci
504e1051a39Sopenharmony_ci	stm${g}	$out,$len,3*$SIZE_T($sp)
505e1051a39Sopenharmony_ci	bras	%r14,.LKeccakF1600
506e1051a39Sopenharmony_ci	lm${g}	$out,$bsz,3*$SIZE_T($sp)
507e1051a39Sopenharmony_ci	lghi	%r14,8
508e1051a39Sopenharmony_ci	la	%r1,0($A_flat)
509e1051a39Sopenharmony_ci	j	.Loop_squeeze
510e1051a39Sopenharmony_ci
511e1051a39Sopenharmony_ci.Ltail_squeeze:
512e1051a39Sopenharmony_ci	lg	%r0,0(%r1)
513e1051a39Sopenharmony_ci.Loop_tail_squeeze:
514e1051a39Sopenharmony_ci	stc	%r0,0($out)
515e1051a39Sopenharmony_ci	la	$out,1($out)
516e1051a39Sopenharmony_ci	srlg	%r0,8
517e1051a39Sopenharmony_ci	brct	$len,.Loop_tail_squeeze
518e1051a39Sopenharmony_ci
519e1051a39Sopenharmony_ci.Ldone_squeeze:
520e1051a39Sopenharmony_ci	l${g}	%r14,2*$SIZE_T($sp)
521e1051a39Sopenharmony_ci	br	%r14
522e1051a39Sopenharmony_ci.size	SHA3_squeeze,.-SHA3_squeeze
523e1051a39Sopenharmony_ci___
524e1051a39Sopenharmony_ci}
525e1051a39Sopenharmony_ci$code.=<<___;
526e1051a39Sopenharmony_ci.align	256
527e1051a39Sopenharmony_ci	.quad	0,0,0,0,0,0,0,0
528e1051a39Sopenharmony_ci.type	iotas,\@object
529e1051a39Sopenharmony_ciiotas:
530e1051a39Sopenharmony_ci	.quad	0x0000000000000001
531e1051a39Sopenharmony_ci	.quad	0x0000000000008082
532e1051a39Sopenharmony_ci	.quad	0x800000000000808a
533e1051a39Sopenharmony_ci	.quad	0x8000000080008000
534e1051a39Sopenharmony_ci	.quad	0x000000000000808b
535e1051a39Sopenharmony_ci	.quad	0x0000000080000001
536e1051a39Sopenharmony_ci	.quad	0x8000000080008081
537e1051a39Sopenharmony_ci	.quad	0x8000000000008009
538e1051a39Sopenharmony_ci	.quad	0x000000000000008a
539e1051a39Sopenharmony_ci	.quad	0x0000000000000088
540e1051a39Sopenharmony_ci	.quad	0x0000000080008009
541e1051a39Sopenharmony_ci	.quad	0x000000008000000a
542e1051a39Sopenharmony_ci	.quad	0x000000008000808b
543e1051a39Sopenharmony_ci	.quad	0x800000000000008b
544e1051a39Sopenharmony_ci	.quad	0x8000000000008089
545e1051a39Sopenharmony_ci	.quad	0x8000000000008003
546e1051a39Sopenharmony_ci	.quad	0x8000000000008002
547e1051a39Sopenharmony_ci	.quad	0x8000000000000080
548e1051a39Sopenharmony_ci	.quad	0x000000000000800a
549e1051a39Sopenharmony_ci	.quad	0x800000008000000a
550e1051a39Sopenharmony_ci	.quad	0x8000000080008081
551e1051a39Sopenharmony_ci	.quad	0x8000000000008080
552e1051a39Sopenharmony_ci	.quad	0x0000000080000001
553e1051a39Sopenharmony_ci	.quad	0x8000000080008008
554e1051a39Sopenharmony_ci.size	iotas,.-iotas
555e1051a39Sopenharmony_ci.asciz	"Keccak-1600 absorb and squeeze for s390x, CRYPTOGAMS by <appro\@openssl.org>"
556e1051a39Sopenharmony_ci___
557e1051a39Sopenharmony_ci
558e1051a39Sopenharmony_ci# unlike 32-bit shift 64-bit one takes three arguments
559e1051a39Sopenharmony_ci$code =~ s/(srlg\s+)(%r[0-9]+),/$1$2,$2,/gm;
560e1051a39Sopenharmony_ci
561e1051a39Sopenharmony_ciprint $code;
562e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
563