1e1051a39Sopenharmony_ci#!/usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2017-2020 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci#
9e1051a39Sopenharmony_ci# ====================================================================
10e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
11e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
12e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
13e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/.
14e1051a39Sopenharmony_ci# ====================================================================
15e1051a39Sopenharmony_ci#
16e1051a39Sopenharmony_ci# Keccak-1600 for PPC64.
17e1051a39Sopenharmony_ci#
18e1051a39Sopenharmony_ci# June 2017.
19e1051a39Sopenharmony_ci#
20e1051a39Sopenharmony_ci# This is straightforward KECCAK_1X_ALT implementation that works on
21e1051a39Sopenharmony_ci# *any* PPC64. Then PowerISA 2.07 adds 2x64-bit vector rotate, and
22e1051a39Sopenharmony_ci# it's possible to achieve performance better than below, but that is
23e1051a39Sopenharmony_ci# naturally option only for POWER8 and successors...
24e1051a39Sopenharmony_ci#
25e1051a39Sopenharmony_ci######################################################################
26e1051a39Sopenharmony_ci# Numbers are cycles per processed byte.
27e1051a39Sopenharmony_ci#
28e1051a39Sopenharmony_ci#		r=1088(*)
29e1051a39Sopenharmony_ci#
30e1051a39Sopenharmony_ci# PPC970/G5	14.0/+130%
31e1051a39Sopenharmony_ci# POWER7	9.7/+110%
32e1051a39Sopenharmony_ci# POWER8	10.6/+100%
33e1051a39Sopenharmony_ci# POWER9	8.2/+66%
34e1051a39Sopenharmony_ci#
35e1051a39Sopenharmony_ci# (*)	Corresponds to SHA3-256. Percentage after slash is improvement
36e1051a39Sopenharmony_ci#	over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do
37e1051a39Sopenharmony_ci#	much better (but watch out for them generating code specific
38e1051a39Sopenharmony_ci#	to processor they execute on).
39e1051a39Sopenharmony_ci
40e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension)
41e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file
42e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
43e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
44e1051a39Sopenharmony_ci
45e1051a39Sopenharmony_ciif ($flavour =~ /64/) {
46e1051a39Sopenharmony_ci	$SIZE_T	=8;
47e1051a39Sopenharmony_ci	$LRSAVE	=2*$SIZE_T;
48e1051a39Sopenharmony_ci	$UCMP	="cmpld";
49e1051a39Sopenharmony_ci	$STU	="stdu";
50e1051a39Sopenharmony_ci	$POP	="ld";
51e1051a39Sopenharmony_ci	$PUSH	="std";
52e1051a39Sopenharmony_ci} else { die "nonsense $flavour"; }
53e1051a39Sopenharmony_ci
54e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
55e1051a39Sopenharmony_ci( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
56e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
57e1051a39Sopenharmony_cidie "can't locate ppc-xlate.pl";
58e1051a39Sopenharmony_ci
59e1051a39Sopenharmony_ciopen STDOUT,"| $^X $xlate $flavour \"$output\""
60e1051a39Sopenharmony_ci    or die "can't call $xlate: $!";
61e1051a39Sopenharmony_ci
62e1051a39Sopenharmony_ci$FRAME=24*$SIZE_T+6*$SIZE_T+32;
63e1051a39Sopenharmony_ci$LOCALS=6*$SIZE_T;
64e1051a39Sopenharmony_ci$TEMP=$LOCALS+6*$SIZE_T;
65e1051a39Sopenharmony_ci
66e1051a39Sopenharmony_cimy $sp ="r1";
67e1051a39Sopenharmony_ci
68e1051a39Sopenharmony_cimy @A = map([ "r$_", "r".($_+1), "r".($_+2), "r".($_+3), "r".($_+4) ],
69e1051a39Sopenharmony_ci            (7, 12, 17, 22, 27));
70e1051a39Sopenharmony_ci   $A[1][1] = "r6"; # r13 is reserved
71e1051a39Sopenharmony_ci
72e1051a39Sopenharmony_cimy @C = map("r$_", (0,3,4,5));
73e1051a39Sopenharmony_ci
74e1051a39Sopenharmony_cimy @rhotates = ([  0,  1, 62, 28, 27 ],
75e1051a39Sopenharmony_ci                [ 36, 44,  6, 55, 20 ],
76e1051a39Sopenharmony_ci                [  3, 10, 43, 25, 39 ],
77e1051a39Sopenharmony_ci                [ 41, 45, 15, 21,  8 ],
78e1051a39Sopenharmony_ci                [ 18,  2, 61, 56, 14 ]);
79e1051a39Sopenharmony_ci
80e1051a39Sopenharmony_ci$code.=<<___;
81e1051a39Sopenharmony_ci.text
82e1051a39Sopenharmony_ci
83e1051a39Sopenharmony_ci.type	KeccakF1600_int,\@function
84e1051a39Sopenharmony_ci.align	5
85e1051a39Sopenharmony_ciKeccakF1600_int:
86e1051a39Sopenharmony_ci	li	r0,24
87e1051a39Sopenharmony_ci	mtctr	r0
88e1051a39Sopenharmony_ci	b	.Loop
89e1051a39Sopenharmony_ci.align	4
90e1051a39Sopenharmony_ci.Loop:
91e1051a39Sopenharmony_ci	xor	$C[0],$A[0][0],$A[1][0]		; Theta
92e1051a39Sopenharmony_ci	std	$A[0][4],`$TEMP+0`($sp)
93e1051a39Sopenharmony_ci	xor	$C[1],$A[0][1],$A[1][1]
94e1051a39Sopenharmony_ci	std	$A[1][4],`$TEMP+8`($sp)
95e1051a39Sopenharmony_ci	xor	$C[2],$A[0][2],$A[1][2]
96e1051a39Sopenharmony_ci	std	$A[2][4],`$TEMP+16`($sp)
97e1051a39Sopenharmony_ci	xor	$C[3],$A[0][3],$A[1][3]
98e1051a39Sopenharmony_ci	std	$A[3][4],`$TEMP+24`($sp)
99e1051a39Sopenharmony_ci___
100e1051a39Sopenharmony_ci	$C[4]=$A[0][4];
101e1051a39Sopenharmony_ci	$C[5]=$A[1][4];
102e1051a39Sopenharmony_ci	$C[6]=$A[2][4];
103e1051a39Sopenharmony_ci	$C[7]=$A[3][4];
104e1051a39Sopenharmony_ci$code.=<<___;
105e1051a39Sopenharmony_ci	xor	$C[4],$A[0][4],$A[1][4]
106e1051a39Sopenharmony_ci	xor	$C[0],$C[0],$A[2][0]
107e1051a39Sopenharmony_ci	xor	$C[1],$C[1],$A[2][1]
108e1051a39Sopenharmony_ci	xor	$C[2],$C[2],$A[2][2]
109e1051a39Sopenharmony_ci	xor	$C[3],$C[3],$A[2][3]
110e1051a39Sopenharmony_ci	xor	$C[4],$C[4],$A[2][4]
111e1051a39Sopenharmony_ci	xor	$C[0],$C[0],$A[3][0]
112e1051a39Sopenharmony_ci	xor	$C[1],$C[1],$A[3][1]
113e1051a39Sopenharmony_ci	xor	$C[2],$C[2],$A[3][2]
114e1051a39Sopenharmony_ci	xor	$C[3],$C[3],$A[3][3]
115e1051a39Sopenharmony_ci	xor	$C[4],$C[4],$A[3][4]
116e1051a39Sopenharmony_ci	xor	$C[0],$C[0],$A[4][0]
117e1051a39Sopenharmony_ci	xor	$C[2],$C[2],$A[4][2]
118e1051a39Sopenharmony_ci	xor	$C[1],$C[1],$A[4][1]
119e1051a39Sopenharmony_ci	xor	$C[3],$C[3],$A[4][3]
120e1051a39Sopenharmony_ci	rotldi	$C[5],$C[2],1
121e1051a39Sopenharmony_ci	xor	$C[4],$C[4],$A[4][4]
122e1051a39Sopenharmony_ci	rotldi	$C[6],$C[3],1
123e1051a39Sopenharmony_ci	xor	$C[5],$C[5],$C[0]
124e1051a39Sopenharmony_ci	rotldi	$C[7],$C[4],1
125e1051a39Sopenharmony_ci
126e1051a39Sopenharmony_ci	xor	$A[0][1],$A[0][1],$C[5]
127e1051a39Sopenharmony_ci	xor	$A[1][1],$A[1][1],$C[5]
128e1051a39Sopenharmony_ci	xor	$A[2][1],$A[2][1],$C[5]
129e1051a39Sopenharmony_ci	xor	$A[3][1],$A[3][1],$C[5]
130e1051a39Sopenharmony_ci	xor	$A[4][1],$A[4][1],$C[5]
131e1051a39Sopenharmony_ci
132e1051a39Sopenharmony_ci	rotldi	$C[5],$C[0],1
133e1051a39Sopenharmony_ci	xor	$C[6],$C[6],$C[1]
134e1051a39Sopenharmony_ci	xor	$C[2],$C[2],$C[7]
135e1051a39Sopenharmony_ci	rotldi	$C[7],$C[1],1
136e1051a39Sopenharmony_ci	xor	$C[3],$C[3],$C[5]
137e1051a39Sopenharmony_ci	xor	$C[4],$C[4],$C[7]
138e1051a39Sopenharmony_ci
139e1051a39Sopenharmony_ci	xor	$C[1],   $A[0][2],$C[6]			;mr	$C[1],$A[0][2]
140e1051a39Sopenharmony_ci	xor	$A[1][2],$A[1][2],$C[6]
141e1051a39Sopenharmony_ci	xor	$A[2][2],$A[2][2],$C[6]
142e1051a39Sopenharmony_ci	xor	$A[3][2],$A[3][2],$C[6]
143e1051a39Sopenharmony_ci	xor	$A[4][2],$A[4][2],$C[6]
144e1051a39Sopenharmony_ci
145e1051a39Sopenharmony_ci	xor	$A[0][0],$A[0][0],$C[4]
146e1051a39Sopenharmony_ci	xor	$A[1][0],$A[1][0],$C[4]
147e1051a39Sopenharmony_ci	xor	$A[2][0],$A[2][0],$C[4]
148e1051a39Sopenharmony_ci	xor	$A[3][0],$A[3][0],$C[4]
149e1051a39Sopenharmony_ci	xor	$A[4][0],$A[4][0],$C[4]
150e1051a39Sopenharmony_ci___
151e1051a39Sopenharmony_ci	$C[4]=undef;
152e1051a39Sopenharmony_ci	$C[5]=undef;
153e1051a39Sopenharmony_ci	$C[6]=undef;
154e1051a39Sopenharmony_ci	$C[7]=undef;
155e1051a39Sopenharmony_ci$code.=<<___;
156e1051a39Sopenharmony_ci	ld	$A[0][4],`$TEMP+0`($sp)
157e1051a39Sopenharmony_ci	xor	$C[0],   $A[0][3],$C[2]			;mr	$C[0],$A[0][3]
158e1051a39Sopenharmony_ci	ld	$A[1][4],`$TEMP+8`($sp)
159e1051a39Sopenharmony_ci	xor	$A[1][3],$A[1][3],$C[2]
160e1051a39Sopenharmony_ci	ld	$A[2][4],`$TEMP+16`($sp)
161e1051a39Sopenharmony_ci	xor	$A[2][3],$A[2][3],$C[2]
162e1051a39Sopenharmony_ci	ld	$A[3][4],`$TEMP+24`($sp)
163e1051a39Sopenharmony_ci	xor	$A[3][3],$A[3][3],$C[2]
164e1051a39Sopenharmony_ci	xor	$A[4][3],$A[4][3],$C[2]
165e1051a39Sopenharmony_ci
166e1051a39Sopenharmony_ci	xor	$C[2],   $A[0][4],$C[3]			;mr	$C[2],$A[0][4]
167e1051a39Sopenharmony_ci	xor	$A[1][4],$A[1][4],$C[3]
168e1051a39Sopenharmony_ci	xor	$A[2][4],$A[2][4],$C[3]
169e1051a39Sopenharmony_ci	xor	$A[3][4],$A[3][4],$C[3]
170e1051a39Sopenharmony_ci	xor	$A[4][4],$A[4][4],$C[3]
171e1051a39Sopenharmony_ci
172e1051a39Sopenharmony_ci	mr	$C[3],$A[0][1]				; Rho+Pi
173e1051a39Sopenharmony_ci	rotldi	$A[0][1],$A[1][1],$rhotates[1][1]
174e1051a39Sopenharmony_ci	;mr	$C[1],$A[0][2]
175e1051a39Sopenharmony_ci	rotldi	$A[0][2],$A[2][2],$rhotates[2][2]
176e1051a39Sopenharmony_ci	;mr	$C[0],$A[0][3]
177e1051a39Sopenharmony_ci	rotldi	$A[0][3],$A[3][3],$rhotates[3][3]
178e1051a39Sopenharmony_ci	;mr	$C[2],$A[0][4]
179e1051a39Sopenharmony_ci	rotldi	$A[0][4],$A[4][4],$rhotates[4][4]
180e1051a39Sopenharmony_ci
181e1051a39Sopenharmony_ci	rotldi	$A[1][1],$A[1][4],$rhotates[1][4]
182e1051a39Sopenharmony_ci	rotldi	$A[2][2],$A[2][3],$rhotates[2][3]
183e1051a39Sopenharmony_ci	rotldi	$A[3][3],$A[3][2],$rhotates[3][2]
184e1051a39Sopenharmony_ci	rotldi	$A[4][4],$A[4][1],$rhotates[4][1]
185e1051a39Sopenharmony_ci
186e1051a39Sopenharmony_ci	rotldi	$A[1][4],$A[4][2],$rhotates[4][2]
187e1051a39Sopenharmony_ci	rotldi	$A[2][3],$A[3][4],$rhotates[3][4]
188e1051a39Sopenharmony_ci	rotldi	$A[3][2],$A[2][1],$rhotates[2][1]
189e1051a39Sopenharmony_ci	rotldi	$A[4][1],$A[1][3],$rhotates[1][3]
190e1051a39Sopenharmony_ci
191e1051a39Sopenharmony_ci	rotldi	$A[4][2],$A[2][4],$rhotates[2][4]
192e1051a39Sopenharmony_ci	rotldi	$A[3][4],$A[4][3],$rhotates[4][3]
193e1051a39Sopenharmony_ci	rotldi	$A[2][1],$A[1][2],$rhotates[1][2]
194e1051a39Sopenharmony_ci	rotldi	$A[1][3],$A[3][1],$rhotates[3][1]
195e1051a39Sopenharmony_ci
196e1051a39Sopenharmony_ci	rotldi	$A[2][4],$A[4][0],$rhotates[4][0]
197e1051a39Sopenharmony_ci	rotldi	$A[4][3],$A[3][0],$rhotates[3][0]
198e1051a39Sopenharmony_ci	rotldi	$A[1][2],$A[2][0],$rhotates[2][0]
199e1051a39Sopenharmony_ci	rotldi	$A[3][1],$A[1][0],$rhotates[1][0]
200e1051a39Sopenharmony_ci
201e1051a39Sopenharmony_ci	rotldi	$A[1][0],$C[0],$rhotates[0][3]
202e1051a39Sopenharmony_ci	rotldi	$A[2][0],$C[3],$rhotates[0][1]
203e1051a39Sopenharmony_ci	rotldi	$A[3][0],$C[2],$rhotates[0][4]
204e1051a39Sopenharmony_ci	rotldi	$A[4][0],$C[1],$rhotates[0][2]
205e1051a39Sopenharmony_ci
206e1051a39Sopenharmony_ci	andc	$C[0],$A[0][2],$A[0][1]			; Chi+Iota
207e1051a39Sopenharmony_ci	andc	$C[1],$A[0][3],$A[0][2]
208e1051a39Sopenharmony_ci	andc	$C[2],$A[0][0],$A[0][4]
209e1051a39Sopenharmony_ci	andc	$C[3],$A[0][1],$A[0][0]
210e1051a39Sopenharmony_ci	xor	$A[0][0],$A[0][0],$C[0]
211e1051a39Sopenharmony_ci	andc	$C[0],$A[0][4],$A[0][3]
212e1051a39Sopenharmony_ci	xor	$A[0][1],$A[0][1],$C[1]
213e1051a39Sopenharmony_ci	 ld	$C[1],`$LOCALS+4*$SIZE_T`($sp)
214e1051a39Sopenharmony_ci	xor	$A[0][3],$A[0][3],$C[2]
215e1051a39Sopenharmony_ci	xor	$A[0][4],$A[0][4],$C[3]
216e1051a39Sopenharmony_ci	xor	$A[0][2],$A[0][2],$C[0]
217e1051a39Sopenharmony_ci	 ldu	$C[3],8($C[1])				; Iota[i++]
218e1051a39Sopenharmony_ci
219e1051a39Sopenharmony_ci	andc	$C[0],$A[1][2],$A[1][1]
220e1051a39Sopenharmony_ci	 std	$C[1],`$LOCALS+4*$SIZE_T`($sp)
221e1051a39Sopenharmony_ci	andc	$C[1],$A[1][3],$A[1][2]
222e1051a39Sopenharmony_ci	andc	$C[2],$A[1][0],$A[1][4]
223e1051a39Sopenharmony_ci	 xor	$A[0][0],$A[0][0],$C[3]			; A[0][0] ^= Iota
224e1051a39Sopenharmony_ci	andc	$C[3],$A[1][1],$A[1][0]
225e1051a39Sopenharmony_ci	xor	$A[1][0],$A[1][0],$C[0]
226e1051a39Sopenharmony_ci	andc	$C[0],$A[1][4],$A[1][3]
227e1051a39Sopenharmony_ci	xor	$A[1][1],$A[1][1],$C[1]
228e1051a39Sopenharmony_ci	xor	$A[1][3],$A[1][3],$C[2]
229e1051a39Sopenharmony_ci	xor	$A[1][4],$A[1][4],$C[3]
230e1051a39Sopenharmony_ci	xor	$A[1][2],$A[1][2],$C[0]
231e1051a39Sopenharmony_ci
232e1051a39Sopenharmony_ci	andc	$C[0],$A[2][2],$A[2][1]
233e1051a39Sopenharmony_ci	andc	$C[1],$A[2][3],$A[2][2]
234e1051a39Sopenharmony_ci	andc	$C[2],$A[2][0],$A[2][4]
235e1051a39Sopenharmony_ci	andc	$C[3],$A[2][1],$A[2][0]
236e1051a39Sopenharmony_ci	xor	$A[2][0],$A[2][0],$C[0]
237e1051a39Sopenharmony_ci	andc	$C[0],$A[2][4],$A[2][3]
238e1051a39Sopenharmony_ci	xor	$A[2][1],$A[2][1],$C[1]
239e1051a39Sopenharmony_ci	xor	$A[2][3],$A[2][3],$C[2]
240e1051a39Sopenharmony_ci	xor	$A[2][4],$A[2][4],$C[3]
241e1051a39Sopenharmony_ci	xor	$A[2][2],$A[2][2],$C[0]
242e1051a39Sopenharmony_ci
243e1051a39Sopenharmony_ci	andc	$C[0],$A[3][2],$A[3][1]
244e1051a39Sopenharmony_ci	andc	$C[1],$A[3][3],$A[3][2]
245e1051a39Sopenharmony_ci	andc	$C[2],$A[3][0],$A[3][4]
246e1051a39Sopenharmony_ci	andc	$C[3],$A[3][1],$A[3][0]
247e1051a39Sopenharmony_ci	xor	$A[3][0],$A[3][0],$C[0]
248e1051a39Sopenharmony_ci	andc	$C[0],$A[3][4],$A[3][3]
249e1051a39Sopenharmony_ci	xor	$A[3][1],$A[3][1],$C[1]
250e1051a39Sopenharmony_ci	xor	$A[3][3],$A[3][3],$C[2]
251e1051a39Sopenharmony_ci	xor	$A[3][4],$A[3][4],$C[3]
252e1051a39Sopenharmony_ci	xor	$A[3][2],$A[3][2],$C[0]
253e1051a39Sopenharmony_ci
254e1051a39Sopenharmony_ci	andc	$C[0],$A[4][2],$A[4][1]
255e1051a39Sopenharmony_ci	andc	$C[1],$A[4][3],$A[4][2]
256e1051a39Sopenharmony_ci	andc	$C[2],$A[4][0],$A[4][4]
257e1051a39Sopenharmony_ci	andc	$C[3],$A[4][1],$A[4][0]
258e1051a39Sopenharmony_ci	xor	$A[4][0],$A[4][0],$C[0]
259e1051a39Sopenharmony_ci	andc	$C[0],$A[4][4],$A[4][3]
260e1051a39Sopenharmony_ci	xor	$A[4][1],$A[4][1],$C[1]
261e1051a39Sopenharmony_ci	xor	$A[4][3],$A[4][3],$C[2]
262e1051a39Sopenharmony_ci	xor	$A[4][4],$A[4][4],$C[3]
263e1051a39Sopenharmony_ci	xor	$A[4][2],$A[4][2],$C[0]
264e1051a39Sopenharmony_ci
265e1051a39Sopenharmony_ci	bdnz	.Loop
266e1051a39Sopenharmony_ci
267e1051a39Sopenharmony_ci	blr
268e1051a39Sopenharmony_ci	.long	0
269e1051a39Sopenharmony_ci	.byte	0,12,0x14,0,0,0,0,0
270e1051a39Sopenharmony_ci.size	KeccakF1600_int,.-KeccakF1600_int
271e1051a39Sopenharmony_ci
272e1051a39Sopenharmony_ci.type	KeccakF1600,\@function
273e1051a39Sopenharmony_ci.align	5
274e1051a39Sopenharmony_ciKeccakF1600:
275e1051a39Sopenharmony_ci	$STU	$sp,-$FRAME($sp)
276e1051a39Sopenharmony_ci	mflr	r0
277e1051a39Sopenharmony_ci	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
278e1051a39Sopenharmony_ci	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
279e1051a39Sopenharmony_ci	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
280e1051a39Sopenharmony_ci	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
281e1051a39Sopenharmony_ci	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
282e1051a39Sopenharmony_ci	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
283e1051a39Sopenharmony_ci	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
284e1051a39Sopenharmony_ci	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
285e1051a39Sopenharmony_ci	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
286e1051a39Sopenharmony_ci	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
287e1051a39Sopenharmony_ci	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
288e1051a39Sopenharmony_ci	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
289e1051a39Sopenharmony_ci	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
290e1051a39Sopenharmony_ci	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
291e1051a39Sopenharmony_ci	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
292e1051a39Sopenharmony_ci	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
293e1051a39Sopenharmony_ci	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
294e1051a39Sopenharmony_ci	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
295e1051a39Sopenharmony_ci	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
296e1051a39Sopenharmony_ci
297e1051a39Sopenharmony_ci	bl	PICmeup
298e1051a39Sopenharmony_ci	subi	r12,r12,8			; prepare for ldu
299e1051a39Sopenharmony_ci
300e1051a39Sopenharmony_ci	$PUSH	r3,`$LOCALS+0*$SIZE_T`($sp)
301e1051a39Sopenharmony_ci	;$PUSH	r4,`$LOCALS+1*$SIZE_T`($sp)
302e1051a39Sopenharmony_ci	;$PUSH	r5,`$LOCALS+2*$SIZE_T`($sp)
303e1051a39Sopenharmony_ci	;$PUSH	r6,`$LOCALS+3*$SIZE_T`($sp)
304e1051a39Sopenharmony_ci	$PUSH	r12,`$LOCALS+4*$SIZE_T`($sp)
305e1051a39Sopenharmony_ci
306e1051a39Sopenharmony_ci	ld	$A[0][0],`8*0`(r3)		; load A[5][5]
307e1051a39Sopenharmony_ci	ld	$A[0][1],`8*1`(r3)
308e1051a39Sopenharmony_ci	ld	$A[0][2],`8*2`(r3)
309e1051a39Sopenharmony_ci	ld	$A[0][3],`8*3`(r3)
310e1051a39Sopenharmony_ci	ld	$A[0][4],`8*4`(r3)
311e1051a39Sopenharmony_ci	ld	$A[1][0],`8*5`(r3)
312e1051a39Sopenharmony_ci	ld	$A[1][1],`8*6`(r3)
313e1051a39Sopenharmony_ci	ld	$A[1][2],`8*7`(r3)
314e1051a39Sopenharmony_ci	ld	$A[1][3],`8*8`(r3)
315e1051a39Sopenharmony_ci	ld	$A[1][4],`8*9`(r3)
316e1051a39Sopenharmony_ci	ld	$A[2][0],`8*10`(r3)
317e1051a39Sopenharmony_ci	ld	$A[2][1],`8*11`(r3)
318e1051a39Sopenharmony_ci	ld	$A[2][2],`8*12`(r3)
319e1051a39Sopenharmony_ci	ld	$A[2][3],`8*13`(r3)
320e1051a39Sopenharmony_ci	ld	$A[2][4],`8*14`(r3)
321e1051a39Sopenharmony_ci	ld	$A[3][0],`8*15`(r3)
322e1051a39Sopenharmony_ci	ld	$A[3][1],`8*16`(r3)
323e1051a39Sopenharmony_ci	ld	$A[3][2],`8*17`(r3)
324e1051a39Sopenharmony_ci	ld	$A[3][3],`8*18`(r3)
325e1051a39Sopenharmony_ci	ld	$A[3][4],`8*19`(r3)
326e1051a39Sopenharmony_ci	ld	$A[4][0],`8*20`(r3)
327e1051a39Sopenharmony_ci	ld	$A[4][1],`8*21`(r3)
328e1051a39Sopenharmony_ci	ld	$A[4][2],`8*22`(r3)
329e1051a39Sopenharmony_ci	ld	$A[4][3],`8*23`(r3)
330e1051a39Sopenharmony_ci	ld	$A[4][4],`8*24`(r3)
331e1051a39Sopenharmony_ci
332e1051a39Sopenharmony_ci	bl	KeccakF1600_int
333e1051a39Sopenharmony_ci
334e1051a39Sopenharmony_ci	$POP	r3,`$LOCALS+0*$SIZE_T`($sp)
335e1051a39Sopenharmony_ci	std	$A[0][0],`8*0`(r3)		; return A[5][5]
336e1051a39Sopenharmony_ci	std	$A[0][1],`8*1`(r3)
337e1051a39Sopenharmony_ci	std	$A[0][2],`8*2`(r3)
338e1051a39Sopenharmony_ci	std	$A[0][3],`8*3`(r3)
339e1051a39Sopenharmony_ci	std	$A[0][4],`8*4`(r3)
340e1051a39Sopenharmony_ci	std	$A[1][0],`8*5`(r3)
341e1051a39Sopenharmony_ci	std	$A[1][1],`8*6`(r3)
342e1051a39Sopenharmony_ci	std	$A[1][2],`8*7`(r3)
343e1051a39Sopenharmony_ci	std	$A[1][3],`8*8`(r3)
344e1051a39Sopenharmony_ci	std	$A[1][4],`8*9`(r3)
345e1051a39Sopenharmony_ci	std	$A[2][0],`8*10`(r3)
346e1051a39Sopenharmony_ci	std	$A[2][1],`8*11`(r3)
347e1051a39Sopenharmony_ci	std	$A[2][2],`8*12`(r3)
348e1051a39Sopenharmony_ci	std	$A[2][3],`8*13`(r3)
349e1051a39Sopenharmony_ci	std	$A[2][4],`8*14`(r3)
350e1051a39Sopenharmony_ci	std	$A[3][0],`8*15`(r3)
351e1051a39Sopenharmony_ci	std	$A[3][1],`8*16`(r3)
352e1051a39Sopenharmony_ci	std	$A[3][2],`8*17`(r3)
353e1051a39Sopenharmony_ci	std	$A[3][3],`8*18`(r3)
354e1051a39Sopenharmony_ci	std	$A[3][4],`8*19`(r3)
355e1051a39Sopenharmony_ci	std	$A[4][0],`8*20`(r3)
356e1051a39Sopenharmony_ci	std	$A[4][1],`8*21`(r3)
357e1051a39Sopenharmony_ci	std	$A[4][2],`8*22`(r3)
358e1051a39Sopenharmony_ci	std	$A[4][3],`8*23`(r3)
359e1051a39Sopenharmony_ci	std	$A[4][4],`8*24`(r3)
360e1051a39Sopenharmony_ci
361e1051a39Sopenharmony_ci	$POP	r0,`$FRAME+$LRSAVE`($sp)
362e1051a39Sopenharmony_ci	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
363e1051a39Sopenharmony_ci	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
364e1051a39Sopenharmony_ci	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
365e1051a39Sopenharmony_ci	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
366e1051a39Sopenharmony_ci	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
367e1051a39Sopenharmony_ci	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
368e1051a39Sopenharmony_ci	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
369e1051a39Sopenharmony_ci	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
370e1051a39Sopenharmony_ci	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
371e1051a39Sopenharmony_ci	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
372e1051a39Sopenharmony_ci	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
373e1051a39Sopenharmony_ci	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
374e1051a39Sopenharmony_ci	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
375e1051a39Sopenharmony_ci	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
376e1051a39Sopenharmony_ci	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
377e1051a39Sopenharmony_ci	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
378e1051a39Sopenharmony_ci	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
379e1051a39Sopenharmony_ci	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
380e1051a39Sopenharmony_ci	mtlr	r0
381e1051a39Sopenharmony_ci	addi	$sp,$sp,$FRAME
382e1051a39Sopenharmony_ci	blr
383e1051a39Sopenharmony_ci	.long	0
384e1051a39Sopenharmony_ci	.byte	0,12,4,1,0x80,18,1,0
385e1051a39Sopenharmony_ci	.long	0
386e1051a39Sopenharmony_ci.size	KeccakF1600,.-KeccakF1600
387e1051a39Sopenharmony_ci
388e1051a39Sopenharmony_ci.type	dword_le_load,\@function
389e1051a39Sopenharmony_ci.align	5
390e1051a39Sopenharmony_cidword_le_load:
391e1051a39Sopenharmony_ci	lbz	r0,1(r3)
392e1051a39Sopenharmony_ci	lbz	r4,2(r3)
393e1051a39Sopenharmony_ci	lbz	r5,3(r3)
394e1051a39Sopenharmony_ci	insrdi	r0,r4,8,48
395e1051a39Sopenharmony_ci	lbz	r4,4(r3)
396e1051a39Sopenharmony_ci	insrdi	r0,r5,8,40
397e1051a39Sopenharmony_ci	lbz	r5,5(r3)
398e1051a39Sopenharmony_ci	insrdi	r0,r4,8,32
399e1051a39Sopenharmony_ci	lbz	r4,6(r3)
400e1051a39Sopenharmony_ci	insrdi	r0,r5,8,24
401e1051a39Sopenharmony_ci	lbz	r5,7(r3)
402e1051a39Sopenharmony_ci	insrdi	r0,r4,8,16
403e1051a39Sopenharmony_ci	lbzu	r4,8(r3)
404e1051a39Sopenharmony_ci	insrdi	r0,r5,8,8
405e1051a39Sopenharmony_ci	insrdi	r0,r4,8,0
406e1051a39Sopenharmony_ci	blr
407e1051a39Sopenharmony_ci	.long	0
408e1051a39Sopenharmony_ci	.byte	0,12,0x14,0,0,0,1,0
409e1051a39Sopenharmony_ci	.long	0
410e1051a39Sopenharmony_ci.size	dword_le_load,.-dword_le_load
411e1051a39Sopenharmony_ci
412e1051a39Sopenharmony_ci.globl	SHA3_absorb
413e1051a39Sopenharmony_ci.type	SHA3_absorb,\@function
414e1051a39Sopenharmony_ci.align	5
415e1051a39Sopenharmony_ciSHA3_absorb:
416e1051a39Sopenharmony_ci	$STU	$sp,-$FRAME($sp)
417e1051a39Sopenharmony_ci	mflr	r0
418e1051a39Sopenharmony_ci	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
419e1051a39Sopenharmony_ci	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
420e1051a39Sopenharmony_ci	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
421e1051a39Sopenharmony_ci	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
422e1051a39Sopenharmony_ci	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
423e1051a39Sopenharmony_ci	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
424e1051a39Sopenharmony_ci	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
425e1051a39Sopenharmony_ci	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
426e1051a39Sopenharmony_ci	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
427e1051a39Sopenharmony_ci	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
428e1051a39Sopenharmony_ci	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
429e1051a39Sopenharmony_ci	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
430e1051a39Sopenharmony_ci	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
431e1051a39Sopenharmony_ci	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
432e1051a39Sopenharmony_ci	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
433e1051a39Sopenharmony_ci	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
434e1051a39Sopenharmony_ci	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
435e1051a39Sopenharmony_ci	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
436e1051a39Sopenharmony_ci	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
437e1051a39Sopenharmony_ci
438e1051a39Sopenharmony_ci	bl	PICmeup
439e1051a39Sopenharmony_ci	subi	r4,r4,1				; prepare for lbzu
440e1051a39Sopenharmony_ci	subi	r12,r12,8			; prepare for ldu
441e1051a39Sopenharmony_ci
442e1051a39Sopenharmony_ci	$PUSH	r3,`$LOCALS+0*$SIZE_T`($sp)	; save A[][]
443e1051a39Sopenharmony_ci	$PUSH	r4,`$LOCALS+1*$SIZE_T`($sp)	; save inp
444e1051a39Sopenharmony_ci	$PUSH	r5,`$LOCALS+2*$SIZE_T`($sp)	; save len
445e1051a39Sopenharmony_ci	$PUSH	r6,`$LOCALS+3*$SIZE_T`($sp)	; save bsz
446e1051a39Sopenharmony_ci	mr	r0,r6
447e1051a39Sopenharmony_ci	$PUSH	r12,`$LOCALS+4*$SIZE_T`($sp)
448e1051a39Sopenharmony_ci
449e1051a39Sopenharmony_ci	ld	$A[0][0],`8*0`(r3)		; load A[5][5]
450e1051a39Sopenharmony_ci	ld	$A[0][1],`8*1`(r3)
451e1051a39Sopenharmony_ci	ld	$A[0][2],`8*2`(r3)
452e1051a39Sopenharmony_ci	ld	$A[0][3],`8*3`(r3)
453e1051a39Sopenharmony_ci	ld	$A[0][4],`8*4`(r3)
454e1051a39Sopenharmony_ci	ld	$A[1][0],`8*5`(r3)
455e1051a39Sopenharmony_ci	ld	$A[1][1],`8*6`(r3)
456e1051a39Sopenharmony_ci	ld	$A[1][2],`8*7`(r3)
457e1051a39Sopenharmony_ci	ld	$A[1][3],`8*8`(r3)
458e1051a39Sopenharmony_ci	ld	$A[1][4],`8*9`(r3)
459e1051a39Sopenharmony_ci	ld	$A[2][0],`8*10`(r3)
460e1051a39Sopenharmony_ci	ld	$A[2][1],`8*11`(r3)
461e1051a39Sopenharmony_ci	ld	$A[2][2],`8*12`(r3)
462e1051a39Sopenharmony_ci	ld	$A[2][3],`8*13`(r3)
463e1051a39Sopenharmony_ci	ld	$A[2][4],`8*14`(r3)
464e1051a39Sopenharmony_ci	ld	$A[3][0],`8*15`(r3)
465e1051a39Sopenharmony_ci	ld	$A[3][1],`8*16`(r3)
466e1051a39Sopenharmony_ci	ld	$A[3][2],`8*17`(r3)
467e1051a39Sopenharmony_ci	ld	$A[3][3],`8*18`(r3)
468e1051a39Sopenharmony_ci	ld	$A[3][4],`8*19`(r3)
469e1051a39Sopenharmony_ci	ld	$A[4][0],`8*20`(r3)
470e1051a39Sopenharmony_ci	ld	$A[4][1],`8*21`(r3)
471e1051a39Sopenharmony_ci	ld	$A[4][2],`8*22`(r3)
472e1051a39Sopenharmony_ci	ld	$A[4][3],`8*23`(r3)
473e1051a39Sopenharmony_ci	ld	$A[4][4],`8*24`(r3)
474e1051a39Sopenharmony_ci
475e1051a39Sopenharmony_ci	mr	r3,r4
476e1051a39Sopenharmony_ci	mr	r4,r5
477e1051a39Sopenharmony_ci	mr	r5,r0
478e1051a39Sopenharmony_ci
479e1051a39Sopenharmony_ci	b	.Loop_absorb
480e1051a39Sopenharmony_ci
481e1051a39Sopenharmony_ci.align	4
482e1051a39Sopenharmony_ci.Loop_absorb:
483e1051a39Sopenharmony_ci	$UCMP	r4,r5				; len < bsz?
484e1051a39Sopenharmony_ci	blt	.Labsorbed
485e1051a39Sopenharmony_ci
486e1051a39Sopenharmony_ci	sub	r4,r4,r5			; len -= bsz
487e1051a39Sopenharmony_ci	srwi	r5,r5,3
488e1051a39Sopenharmony_ci	$PUSH	r4,`$LOCALS+2*$SIZE_T`($sp)	; save len
489e1051a39Sopenharmony_ci	mtctr	r5
490e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
491e1051a39Sopenharmony_ci	xor	$A[0][0],$A[0][0],r0
492e1051a39Sopenharmony_ci	bdz	.Lprocess_block
493e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
494e1051a39Sopenharmony_ci	xor	$A[0][1],$A[0][1],r0
495e1051a39Sopenharmony_ci	bdz	.Lprocess_block
496e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
497e1051a39Sopenharmony_ci	xor	$A[0][2],$A[0][2],r0
498e1051a39Sopenharmony_ci	bdz	.Lprocess_block
499e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
500e1051a39Sopenharmony_ci	xor	$A[0][3],$A[0][3],r0
501e1051a39Sopenharmony_ci	bdz	.Lprocess_block
502e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
503e1051a39Sopenharmony_ci	xor	$A[0][4],$A[0][4],r0
504e1051a39Sopenharmony_ci	bdz	.Lprocess_block
505e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
506e1051a39Sopenharmony_ci	xor	$A[1][0],$A[1][0],r0
507e1051a39Sopenharmony_ci	bdz	.Lprocess_block
508e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
509e1051a39Sopenharmony_ci	xor	$A[1][1],$A[1][1],r0
510e1051a39Sopenharmony_ci	bdz	.Lprocess_block
511e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
512e1051a39Sopenharmony_ci	xor	$A[1][2],$A[1][2],r0
513e1051a39Sopenharmony_ci	bdz	.Lprocess_block
514e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
515e1051a39Sopenharmony_ci	xor	$A[1][3],$A[1][3],r0
516e1051a39Sopenharmony_ci	bdz	.Lprocess_block
517e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
518e1051a39Sopenharmony_ci	xor	$A[1][4],$A[1][4],r0
519e1051a39Sopenharmony_ci	bdz	.Lprocess_block
520e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
521e1051a39Sopenharmony_ci	xor	$A[2][0],$A[2][0],r0
522e1051a39Sopenharmony_ci	bdz	.Lprocess_block
523e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
524e1051a39Sopenharmony_ci	xor	$A[2][1],$A[2][1],r0
525e1051a39Sopenharmony_ci	bdz	.Lprocess_block
526e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
527e1051a39Sopenharmony_ci	xor	$A[2][2],$A[2][2],r0
528e1051a39Sopenharmony_ci	bdz	.Lprocess_block
529e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
530e1051a39Sopenharmony_ci	xor	$A[2][3],$A[2][3],r0
531e1051a39Sopenharmony_ci	bdz	.Lprocess_block
532e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
533e1051a39Sopenharmony_ci	xor	$A[2][4],$A[2][4],r0
534e1051a39Sopenharmony_ci	bdz	.Lprocess_block
535e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
536e1051a39Sopenharmony_ci	xor	$A[3][0],$A[3][0],r0
537e1051a39Sopenharmony_ci	bdz	.Lprocess_block
538e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
539e1051a39Sopenharmony_ci	xor	$A[3][1],$A[3][1],r0
540e1051a39Sopenharmony_ci	bdz	.Lprocess_block
541e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
542e1051a39Sopenharmony_ci	xor	$A[3][2],$A[3][2],r0
543e1051a39Sopenharmony_ci	bdz	.Lprocess_block
544e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
545e1051a39Sopenharmony_ci	xor	$A[3][3],$A[3][3],r0
546e1051a39Sopenharmony_ci	bdz	.Lprocess_block
547e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
548e1051a39Sopenharmony_ci	xor	$A[3][4],$A[3][4],r0
549e1051a39Sopenharmony_ci	bdz	.Lprocess_block
550e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
551e1051a39Sopenharmony_ci	xor	$A[4][0],$A[4][0],r0
552e1051a39Sopenharmony_ci	bdz	.Lprocess_block
553e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
554e1051a39Sopenharmony_ci	xor	$A[4][1],$A[4][1],r0
555e1051a39Sopenharmony_ci	bdz	.Lprocess_block
556e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
557e1051a39Sopenharmony_ci	xor	$A[4][2],$A[4][2],r0
558e1051a39Sopenharmony_ci	bdz	.Lprocess_block
559e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
560e1051a39Sopenharmony_ci	xor	$A[4][3],$A[4][3],r0
561e1051a39Sopenharmony_ci	bdz	.Lprocess_block
562e1051a39Sopenharmony_ci	bl	dword_le_load			; *inp++
563e1051a39Sopenharmony_ci	xor	$A[4][4],$A[4][4],r0
564e1051a39Sopenharmony_ci
565e1051a39Sopenharmony_ci.Lprocess_block:
566e1051a39Sopenharmony_ci	$PUSH	r3,`$LOCALS+1*$SIZE_T`($sp)	; save inp
567e1051a39Sopenharmony_ci
568e1051a39Sopenharmony_ci	bl	KeccakF1600_int
569e1051a39Sopenharmony_ci
570e1051a39Sopenharmony_ci	$POP	r0,`$LOCALS+4*$SIZE_T`($sp)	; pull iotas[24]
571e1051a39Sopenharmony_ci	$POP	r5,`$LOCALS+3*$SIZE_T`($sp)	; restore bsz
572e1051a39Sopenharmony_ci	$POP	r4,`$LOCALS+2*$SIZE_T`($sp)	; restore len
573e1051a39Sopenharmony_ci	$POP	r3,`$LOCALS+1*$SIZE_T`($sp)	; restore inp
574e1051a39Sopenharmony_ci	addic	r0,r0,`-8*24`			; rewind iotas
575e1051a39Sopenharmony_ci	$PUSH	r0,`$LOCALS+4*$SIZE_T`($sp)
576e1051a39Sopenharmony_ci
577e1051a39Sopenharmony_ci	b	.Loop_absorb
578e1051a39Sopenharmony_ci
579e1051a39Sopenharmony_ci.align	4
580e1051a39Sopenharmony_ci.Labsorbed:
581e1051a39Sopenharmony_ci	$POP	r3,`$LOCALS+0*$SIZE_T`($sp)
582e1051a39Sopenharmony_ci	std	$A[0][0],`8*0`(r3)		; return A[5][5]
583e1051a39Sopenharmony_ci	std	$A[0][1],`8*1`(r3)
584e1051a39Sopenharmony_ci	std	$A[0][2],`8*2`(r3)
585e1051a39Sopenharmony_ci	std	$A[0][3],`8*3`(r3)
586e1051a39Sopenharmony_ci	std	$A[0][4],`8*4`(r3)
587e1051a39Sopenharmony_ci	std	$A[1][0],`8*5`(r3)
588e1051a39Sopenharmony_ci	std	$A[1][1],`8*6`(r3)
589e1051a39Sopenharmony_ci	std	$A[1][2],`8*7`(r3)
590e1051a39Sopenharmony_ci	std	$A[1][3],`8*8`(r3)
591e1051a39Sopenharmony_ci	std	$A[1][4],`8*9`(r3)
592e1051a39Sopenharmony_ci	std	$A[2][0],`8*10`(r3)
593e1051a39Sopenharmony_ci	std	$A[2][1],`8*11`(r3)
594e1051a39Sopenharmony_ci	std	$A[2][2],`8*12`(r3)
595e1051a39Sopenharmony_ci	std	$A[2][3],`8*13`(r3)
596e1051a39Sopenharmony_ci	std	$A[2][4],`8*14`(r3)
597e1051a39Sopenharmony_ci	std	$A[3][0],`8*15`(r3)
598e1051a39Sopenharmony_ci	std	$A[3][1],`8*16`(r3)
599e1051a39Sopenharmony_ci	std	$A[3][2],`8*17`(r3)
600e1051a39Sopenharmony_ci	std	$A[3][3],`8*18`(r3)
601e1051a39Sopenharmony_ci	std	$A[3][4],`8*19`(r3)
602e1051a39Sopenharmony_ci	std	$A[4][0],`8*20`(r3)
603e1051a39Sopenharmony_ci	std	$A[4][1],`8*21`(r3)
604e1051a39Sopenharmony_ci	std	$A[4][2],`8*22`(r3)
605e1051a39Sopenharmony_ci	std	$A[4][3],`8*23`(r3)
606e1051a39Sopenharmony_ci	std	$A[4][4],`8*24`(r3)
607e1051a39Sopenharmony_ci
608e1051a39Sopenharmony_ci	mr	r3,r4				; return value
609e1051a39Sopenharmony_ci	$POP	r0,`$FRAME+$LRSAVE`($sp)
610e1051a39Sopenharmony_ci	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
611e1051a39Sopenharmony_ci	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
612e1051a39Sopenharmony_ci	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
613e1051a39Sopenharmony_ci	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
614e1051a39Sopenharmony_ci	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
615e1051a39Sopenharmony_ci	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
616e1051a39Sopenharmony_ci	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
617e1051a39Sopenharmony_ci	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
618e1051a39Sopenharmony_ci	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
619e1051a39Sopenharmony_ci	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
620e1051a39Sopenharmony_ci	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
621e1051a39Sopenharmony_ci	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
622e1051a39Sopenharmony_ci	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
623e1051a39Sopenharmony_ci	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
624e1051a39Sopenharmony_ci	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
625e1051a39Sopenharmony_ci	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
626e1051a39Sopenharmony_ci	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
627e1051a39Sopenharmony_ci	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
628e1051a39Sopenharmony_ci	mtlr	r0
629e1051a39Sopenharmony_ci	addi	$sp,$sp,$FRAME
630e1051a39Sopenharmony_ci	blr
631e1051a39Sopenharmony_ci	.long	0
632e1051a39Sopenharmony_ci	.byte	0,12,4,1,0x80,18,4,0
633e1051a39Sopenharmony_ci	.long	0
634e1051a39Sopenharmony_ci.size	SHA3_absorb,.-SHA3_absorb
635e1051a39Sopenharmony_ci___
636e1051a39Sopenharmony_ci{
637e1051a39Sopenharmony_cimy ($A_flat,$out,$len,$bsz) = map("r$_",(28..31));
638e1051a39Sopenharmony_ci$code.=<<___;
639e1051a39Sopenharmony_ci.globl	SHA3_squeeze
640e1051a39Sopenharmony_ci.type	SHA3_squeeze,\@function
641e1051a39Sopenharmony_ci.align	5
642e1051a39Sopenharmony_ciSHA3_squeeze:
643e1051a39Sopenharmony_ci	$STU	$sp,`-10*$SIZE_T`($sp)
644e1051a39Sopenharmony_ci	mflr	r0
645e1051a39Sopenharmony_ci	$PUSH	r28,`6*$SIZE_T`($sp)
646e1051a39Sopenharmony_ci	$PUSH	r29,`7*$SIZE_T`($sp)
647e1051a39Sopenharmony_ci	$PUSH	r30,`8*$SIZE_T`($sp)
648e1051a39Sopenharmony_ci	$PUSH	r31,`9*$SIZE_T`($sp)
649e1051a39Sopenharmony_ci	$PUSH	r0,`10*$SIZE_T+$LRSAVE`($sp)
650e1051a39Sopenharmony_ci
651e1051a39Sopenharmony_ci	mr	$A_flat,r3
652e1051a39Sopenharmony_ci	subi	r3,r3,8			; prepare for ldu
653e1051a39Sopenharmony_ci	subi	$out,r4,1		; prepare for stbu
654e1051a39Sopenharmony_ci	mr	$len,r5
655e1051a39Sopenharmony_ci	mr	$bsz,r6
656e1051a39Sopenharmony_ci	b	.Loop_squeeze
657e1051a39Sopenharmony_ci
658e1051a39Sopenharmony_ci.align	4
659e1051a39Sopenharmony_ci.Loop_squeeze:
660e1051a39Sopenharmony_ci	ldu	r0,8(r3)
661e1051a39Sopenharmony_ci	${UCMP}i $len,8
662e1051a39Sopenharmony_ci	blt	.Lsqueeze_tail
663e1051a39Sopenharmony_ci
664e1051a39Sopenharmony_ci	stb	r0,1($out)
665e1051a39Sopenharmony_ci	srdi	r0,r0,8
666e1051a39Sopenharmony_ci	stb	r0,2($out)
667e1051a39Sopenharmony_ci	srdi	r0,r0,8
668e1051a39Sopenharmony_ci	stb	r0,3($out)
669e1051a39Sopenharmony_ci	srdi	r0,r0,8
670e1051a39Sopenharmony_ci	stb	r0,4($out)
671e1051a39Sopenharmony_ci	srdi	r0,r0,8
672e1051a39Sopenharmony_ci	stb	r0,5($out)
673e1051a39Sopenharmony_ci	srdi	r0,r0,8
674e1051a39Sopenharmony_ci	stb	r0,6($out)
675e1051a39Sopenharmony_ci	srdi	r0,r0,8
676e1051a39Sopenharmony_ci	stb	r0,7($out)
677e1051a39Sopenharmony_ci	srdi	r0,r0,8
678e1051a39Sopenharmony_ci	stbu	r0,8($out)
679e1051a39Sopenharmony_ci
680e1051a39Sopenharmony_ci	subic.	$len,$len,8
681e1051a39Sopenharmony_ci	beq	.Lsqueeze_done
682e1051a39Sopenharmony_ci
683e1051a39Sopenharmony_ci	subic.	r6,r6,8
684e1051a39Sopenharmony_ci	bgt	.Loop_squeeze
685e1051a39Sopenharmony_ci
686e1051a39Sopenharmony_ci	mr	r3,$A_flat
687e1051a39Sopenharmony_ci	bl	KeccakF1600
688e1051a39Sopenharmony_ci	subi	r3,$A_flat,8		; prepare for ldu
689e1051a39Sopenharmony_ci	mr	r6,$bsz
690e1051a39Sopenharmony_ci	b	.Loop_squeeze
691e1051a39Sopenharmony_ci
692e1051a39Sopenharmony_ci.align	4
693e1051a39Sopenharmony_ci.Lsqueeze_tail:
694e1051a39Sopenharmony_ci	mtctr	$len
695e1051a39Sopenharmony_ci.Loop_tail:
696e1051a39Sopenharmony_ci	stbu	r0,1($out)
697e1051a39Sopenharmony_ci	srdi	r0,r0,8
698e1051a39Sopenharmony_ci	bdnz	.Loop_tail
699e1051a39Sopenharmony_ci
700e1051a39Sopenharmony_ci.Lsqueeze_done:
701e1051a39Sopenharmony_ci	$POP	r0,`10*$SIZE_T+$LRSAVE`($sp)
702e1051a39Sopenharmony_ci	$POP	r28,`6*$SIZE_T`($sp)
703e1051a39Sopenharmony_ci	$POP	r29,`7*$SIZE_T`($sp)
704e1051a39Sopenharmony_ci	$POP	r30,`8*$SIZE_T`($sp)
705e1051a39Sopenharmony_ci	$POP	r31,`9*$SIZE_T`($sp)
706e1051a39Sopenharmony_ci	mtlr	r0
707e1051a39Sopenharmony_ci	addi	$sp,$sp,`10*$SIZE_T`
708e1051a39Sopenharmony_ci	blr
709e1051a39Sopenharmony_ci	.long	0
710e1051a39Sopenharmony_ci	.byte	0,12,4,1,0x80,4,4,0
711e1051a39Sopenharmony_ci	.long	0
712e1051a39Sopenharmony_ci.size	SHA3_squeeze,.-SHA3_squeeze
713e1051a39Sopenharmony_ci___
714e1051a39Sopenharmony_ci}
715e1051a39Sopenharmony_ci
716e1051a39Sopenharmony_ci# Ugly hack here, because PPC assembler syntax seem to vary too
717e1051a39Sopenharmony_ci# much from platforms to platform...
718e1051a39Sopenharmony_ci$code.=<<___;
719e1051a39Sopenharmony_ci.align	6
720e1051a39Sopenharmony_ciPICmeup:
721e1051a39Sopenharmony_ci	mflr	r0
722e1051a39Sopenharmony_ci	bcl	20,31,\$+4
723e1051a39Sopenharmony_ci	mflr	r12   ; vvvvvv "distance" between . and 1st data entry
724e1051a39Sopenharmony_ci	addi	r12,r12,`64-8`
725e1051a39Sopenharmony_ci	mtlr	r0
726e1051a39Sopenharmony_ci	blr
727e1051a39Sopenharmony_ci	.long	0
728e1051a39Sopenharmony_ci	.byte	0,12,0x14,0,0,0,0,0
729e1051a39Sopenharmony_ci	.space	`64-9*4`
730e1051a39Sopenharmony_ci.type	iotas,\@object
731e1051a39Sopenharmony_ciiotas:
732e1051a39Sopenharmony_ci	.quad	0x0000000000000001
733e1051a39Sopenharmony_ci	.quad	0x0000000000008082
734e1051a39Sopenharmony_ci	.quad	0x800000000000808a
735e1051a39Sopenharmony_ci	.quad	0x8000000080008000
736e1051a39Sopenharmony_ci	.quad	0x000000000000808b
737e1051a39Sopenharmony_ci	.quad	0x0000000080000001
738e1051a39Sopenharmony_ci	.quad	0x8000000080008081
739e1051a39Sopenharmony_ci	.quad	0x8000000000008009
740e1051a39Sopenharmony_ci	.quad	0x000000000000008a
741e1051a39Sopenharmony_ci	.quad	0x0000000000000088
742e1051a39Sopenharmony_ci	.quad	0x0000000080008009
743e1051a39Sopenharmony_ci	.quad	0x000000008000000a
744e1051a39Sopenharmony_ci	.quad	0x000000008000808b
745e1051a39Sopenharmony_ci	.quad	0x800000000000008b
746e1051a39Sopenharmony_ci	.quad	0x8000000000008089
747e1051a39Sopenharmony_ci	.quad	0x8000000000008003
748e1051a39Sopenharmony_ci	.quad	0x8000000000008002
749e1051a39Sopenharmony_ci	.quad	0x8000000000000080
750e1051a39Sopenharmony_ci	.quad	0x000000000000800a
751e1051a39Sopenharmony_ci	.quad	0x800000008000000a
752e1051a39Sopenharmony_ci	.quad	0x8000000080008081
753e1051a39Sopenharmony_ci	.quad	0x8000000000008080
754e1051a39Sopenharmony_ci	.quad	0x0000000080000001
755e1051a39Sopenharmony_ci	.quad	0x8000000080008008
756e1051a39Sopenharmony_ci.size	iotas,.-iotas
757e1051a39Sopenharmony_ci.asciz	"Keccak-1600 absorb and squeeze for PPC64, CRYPTOGAMS by <appro\@openssl.org>"
758e1051a39Sopenharmony_ci___
759e1051a39Sopenharmony_ci
760e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval $1/gem;
761e1051a39Sopenharmony_ciprint $code;
762e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
763