11cb0ef41Sopenharmony_ci#!/usr/bin/env perl
21cb0ef41Sopenharmony_ci# Copyright 2017-2020 The OpenSSL Project Authors. All Rights Reserved.
31cb0ef41Sopenharmony_ci#
41cb0ef41Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
51cb0ef41Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
61cb0ef41Sopenharmony_ci# in the file LICENSE in the source distribution or at
71cb0ef41Sopenharmony_ci# https://www.openssl.org/source/license.html
81cb0ef41Sopenharmony_ci#
91cb0ef41Sopenharmony_ci# ====================================================================
101cb0ef41Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
111cb0ef41Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
121cb0ef41Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
131cb0ef41Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/.
141cb0ef41Sopenharmony_ci# ====================================================================
151cb0ef41Sopenharmony_ci#
161cb0ef41Sopenharmony_ci# Keccak-1600 for PPC64.
171cb0ef41Sopenharmony_ci#
181cb0ef41Sopenharmony_ci# June 2017.
191cb0ef41Sopenharmony_ci#
201cb0ef41Sopenharmony_ci# This is straightforward KECCAK_1X_ALT implementation that works on
211cb0ef41Sopenharmony_ci# *any* PPC64. Then PowerISA 2.07 adds 2x64-bit vector rotate, and
221cb0ef41Sopenharmony_ci# it's possible to achieve performance better than below, but that is
231cb0ef41Sopenharmony_ci# naturally option only for POWER8 and successors...
241cb0ef41Sopenharmony_ci#
251cb0ef41Sopenharmony_ci######################################################################
261cb0ef41Sopenharmony_ci# Numbers are cycles per processed byte.
271cb0ef41Sopenharmony_ci#
281cb0ef41Sopenharmony_ci#		r=1088(*)
291cb0ef41Sopenharmony_ci#
301cb0ef41Sopenharmony_ci# PPC970/G5	14.0/+130%
311cb0ef41Sopenharmony_ci# POWER7	9.7/+110%
321cb0ef41Sopenharmony_ci# POWER8	10.6/+100%
331cb0ef41Sopenharmony_ci# POWER9	8.2/+66%
341cb0ef41Sopenharmony_ci#
351cb0ef41Sopenharmony_ci# (*)	Corresponds to SHA3-256. Percentage after slash is improvement
361cb0ef41Sopenharmony_ci#	over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do
371cb0ef41Sopenharmony_ci#	much better (but watch out for them generating code specific
381cb0ef41Sopenharmony_ci#	to processor they execute on).
391cb0ef41Sopenharmony_ci
401cb0ef41Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension)
411cb0ef41Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file
421cb0ef41Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
431cb0ef41Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
441cb0ef41Sopenharmony_ci
451cb0ef41Sopenharmony_ciif ($flavour =~ /64/) {
461cb0ef41Sopenharmony_ci	$SIZE_T	=8;
471cb0ef41Sopenharmony_ci	$LRSAVE	=2*$SIZE_T;
481cb0ef41Sopenharmony_ci	$UCMP	="cmpld";
491cb0ef41Sopenharmony_ci	$STU	="stdu";
501cb0ef41Sopenharmony_ci	$POP	="ld";
511cb0ef41Sopenharmony_ci	$PUSH	="std";
521cb0ef41Sopenharmony_ci} else { die "nonsense $flavour"; }
531cb0ef41Sopenharmony_ci
541cb0ef41Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
551cb0ef41Sopenharmony_ci( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
561cb0ef41Sopenharmony_ci( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
571cb0ef41Sopenharmony_cidie "can't locate ppc-xlate.pl";
581cb0ef41Sopenharmony_ci
591cb0ef41Sopenharmony_ciopen STDOUT,"| $^X $xlate $flavour \"$output\""
601cb0ef41Sopenharmony_ci    or die "can't call $xlate: $!";
611cb0ef41Sopenharmony_ci
621cb0ef41Sopenharmony_ci$FRAME=24*$SIZE_T+6*$SIZE_T+32;
631cb0ef41Sopenharmony_ci$LOCALS=6*$SIZE_T;
641cb0ef41Sopenharmony_ci$TEMP=$LOCALS+6*$SIZE_T;
651cb0ef41Sopenharmony_ci
661cb0ef41Sopenharmony_cimy $sp ="r1";
671cb0ef41Sopenharmony_ci
681cb0ef41Sopenharmony_cimy @A = map([ "r$_", "r".($_+1), "r".($_+2), "r".($_+3), "r".($_+4) ],
691cb0ef41Sopenharmony_ci            (7, 12, 17, 22, 27));
701cb0ef41Sopenharmony_ci   $A[1][1] = "r6"; # r13 is reserved
711cb0ef41Sopenharmony_ci
721cb0ef41Sopenharmony_cimy @C = map("r$_", (0,3,4,5));
731cb0ef41Sopenharmony_ci
741cb0ef41Sopenharmony_cimy @rhotates = ([  0,  1, 62, 28, 27 ],
751cb0ef41Sopenharmony_ci                [ 36, 44,  6, 55, 20 ],
761cb0ef41Sopenharmony_ci                [  3, 10, 43, 25, 39 ],
771cb0ef41Sopenharmony_ci                [ 41, 45, 15, 21,  8 ],
781cb0ef41Sopenharmony_ci                [ 18,  2, 61, 56, 14 ]);
791cb0ef41Sopenharmony_ci
801cb0ef41Sopenharmony_ci$code.=<<___;
811cb0ef41Sopenharmony_ci.text
821cb0ef41Sopenharmony_ci
831cb0ef41Sopenharmony_ci.type	KeccakF1600_int,\@function
841cb0ef41Sopenharmony_ci.align	5
851cb0ef41Sopenharmony_ciKeccakF1600_int:
861cb0ef41Sopenharmony_ci	li	r0,24
871cb0ef41Sopenharmony_ci	mtctr	r0
881cb0ef41Sopenharmony_ci	b	.Loop
891cb0ef41Sopenharmony_ci.align	4
901cb0ef41Sopenharmony_ci.Loop:
911cb0ef41Sopenharmony_ci	xor	$C[0],$A[0][0],$A[1][0]		; Theta
921cb0ef41Sopenharmony_ci	std	$A[0][4],`$TEMP+0`($sp)
931cb0ef41Sopenharmony_ci	xor	$C[1],$A[0][1],$A[1][1]
941cb0ef41Sopenharmony_ci	std	$A[1][4],`$TEMP+8`($sp)
951cb0ef41Sopenharmony_ci	xor	$C[2],$A[0][2],$A[1][2]
961cb0ef41Sopenharmony_ci	std	$A[2][4],`$TEMP+16`($sp)
971cb0ef41Sopenharmony_ci	xor	$C[3],$A[0][3],$A[1][3]
981cb0ef41Sopenharmony_ci	std	$A[3][4],`$TEMP+24`($sp)
991cb0ef41Sopenharmony_ci___
1001cb0ef41Sopenharmony_ci	$C[4]=$A[0][4];
1011cb0ef41Sopenharmony_ci	$C[5]=$A[1][4];
1021cb0ef41Sopenharmony_ci	$C[6]=$A[2][4];
1031cb0ef41Sopenharmony_ci	$C[7]=$A[3][4];
1041cb0ef41Sopenharmony_ci$code.=<<___;
1051cb0ef41Sopenharmony_ci	xor	$C[4],$A[0][4],$A[1][4]
1061cb0ef41Sopenharmony_ci	xor	$C[0],$C[0],$A[2][0]
1071cb0ef41Sopenharmony_ci	xor	$C[1],$C[1],$A[2][1]
1081cb0ef41Sopenharmony_ci	xor	$C[2],$C[2],$A[2][2]
1091cb0ef41Sopenharmony_ci	xor	$C[3],$C[3],$A[2][3]
1101cb0ef41Sopenharmony_ci	xor	$C[4],$C[4],$A[2][4]
1111cb0ef41Sopenharmony_ci	xor	$C[0],$C[0],$A[3][0]
1121cb0ef41Sopenharmony_ci	xor	$C[1],$C[1],$A[3][1]
1131cb0ef41Sopenharmony_ci	xor	$C[2],$C[2],$A[3][2]
1141cb0ef41Sopenharmony_ci	xor	$C[3],$C[3],$A[3][3]
1151cb0ef41Sopenharmony_ci	xor	$C[4],$C[4],$A[3][4]
1161cb0ef41Sopenharmony_ci	xor	$C[0],$C[0],$A[4][0]
1171cb0ef41Sopenharmony_ci	xor	$C[2],$C[2],$A[4][2]
1181cb0ef41Sopenharmony_ci	xor	$C[1],$C[1],$A[4][1]
1191cb0ef41Sopenharmony_ci	xor	$C[3],$C[3],$A[4][3]
1201cb0ef41Sopenharmony_ci	rotldi	$C[5],$C[2],1
1211cb0ef41Sopenharmony_ci	xor	$C[4],$C[4],$A[4][4]
1221cb0ef41Sopenharmony_ci	rotldi	$C[6],$C[3],1
1231cb0ef41Sopenharmony_ci	xor	$C[5],$C[5],$C[0]
1241cb0ef41Sopenharmony_ci	rotldi	$C[7],$C[4],1
1251cb0ef41Sopenharmony_ci
1261cb0ef41Sopenharmony_ci	xor	$A[0][1],$A[0][1],$C[5]
1271cb0ef41Sopenharmony_ci	xor	$A[1][1],$A[1][1],$C[5]
1281cb0ef41Sopenharmony_ci	xor	$A[2][1],$A[2][1],$C[5]
1291cb0ef41Sopenharmony_ci	xor	$A[3][1],$A[3][1],$C[5]
1301cb0ef41Sopenharmony_ci	xor	$A[4][1],$A[4][1],$C[5]
1311cb0ef41Sopenharmony_ci
1321cb0ef41Sopenharmony_ci	rotldi	$C[5],$C[0],1
1331cb0ef41Sopenharmony_ci	xor	$C[6],$C[6],$C[1]
1341cb0ef41Sopenharmony_ci	xor	$C[2],$C[2],$C[7]
1351cb0ef41Sopenharmony_ci	rotldi	$C[7],$C[1],1
1361cb0ef41Sopenharmony_ci	xor	$C[3],$C[3],$C[5]
1371cb0ef41Sopenharmony_ci	xor	$C[4],$C[4],$C[7]
1381cb0ef41Sopenharmony_ci
1391cb0ef41Sopenharmony_ci	xor	$C[1],   $A[0][2],$C[6]			;mr	$C[1],$A[0][2]
1401cb0ef41Sopenharmony_ci	xor	$A[1][2],$A[1][2],$C[6]
1411cb0ef41Sopenharmony_ci	xor	$A[2][2],$A[2][2],$C[6]
1421cb0ef41Sopenharmony_ci	xor	$A[3][2],$A[3][2],$C[6]
1431cb0ef41Sopenharmony_ci	xor	$A[4][2],$A[4][2],$C[6]
1441cb0ef41Sopenharmony_ci
1451cb0ef41Sopenharmony_ci	xor	$A[0][0],$A[0][0],$C[4]
1461cb0ef41Sopenharmony_ci	xor	$A[1][0],$A[1][0],$C[4]
1471cb0ef41Sopenharmony_ci	xor	$A[2][0],$A[2][0],$C[4]
1481cb0ef41Sopenharmony_ci	xor	$A[3][0],$A[3][0],$C[4]
1491cb0ef41Sopenharmony_ci	xor	$A[4][0],$A[4][0],$C[4]
1501cb0ef41Sopenharmony_ci___
1511cb0ef41Sopenharmony_ci	$C[4]=undef;
1521cb0ef41Sopenharmony_ci	$C[5]=undef;
1531cb0ef41Sopenharmony_ci	$C[6]=undef;
1541cb0ef41Sopenharmony_ci	$C[7]=undef;
1551cb0ef41Sopenharmony_ci$code.=<<___;
1561cb0ef41Sopenharmony_ci	ld	$A[0][4],`$TEMP+0`($sp)
1571cb0ef41Sopenharmony_ci	xor	$C[0],   $A[0][3],$C[2]			;mr	$C[0],$A[0][3]
1581cb0ef41Sopenharmony_ci	ld	$A[1][4],`$TEMP+8`($sp)
1591cb0ef41Sopenharmony_ci	xor	$A[1][3],$A[1][3],$C[2]
1601cb0ef41Sopenharmony_ci	ld	$A[2][4],`$TEMP+16`($sp)
1611cb0ef41Sopenharmony_ci	xor	$A[2][3],$A[2][3],$C[2]
1621cb0ef41Sopenharmony_ci	ld	$A[3][4],`$TEMP+24`($sp)
1631cb0ef41Sopenharmony_ci	xor	$A[3][3],$A[3][3],$C[2]
1641cb0ef41Sopenharmony_ci	xor	$A[4][3],$A[4][3],$C[2]
1651cb0ef41Sopenharmony_ci
1661cb0ef41Sopenharmony_ci	xor	$C[2],   $A[0][4],$C[3]			;mr	$C[2],$A[0][4]
1671cb0ef41Sopenharmony_ci	xor	$A[1][4],$A[1][4],$C[3]
1681cb0ef41Sopenharmony_ci	xor	$A[2][4],$A[2][4],$C[3]
1691cb0ef41Sopenharmony_ci	xor	$A[3][4],$A[3][4],$C[3]
1701cb0ef41Sopenharmony_ci	xor	$A[4][4],$A[4][4],$C[3]
1711cb0ef41Sopenharmony_ci
1721cb0ef41Sopenharmony_ci	mr	$C[3],$A[0][1]				; Rho+Pi
1731cb0ef41Sopenharmony_ci	rotldi	$A[0][1],$A[1][1],$rhotates[1][1]
1741cb0ef41Sopenharmony_ci	;mr	$C[1],$A[0][2]
1751cb0ef41Sopenharmony_ci	rotldi	$A[0][2],$A[2][2],$rhotates[2][2]
1761cb0ef41Sopenharmony_ci	;mr	$C[0],$A[0][3]
1771cb0ef41Sopenharmony_ci	rotldi	$A[0][3],$A[3][3],$rhotates[3][3]
1781cb0ef41Sopenharmony_ci	;mr	$C[2],$A[0][4]
1791cb0ef41Sopenharmony_ci	rotldi	$A[0][4],$A[4][4],$rhotates[4][4]
1801cb0ef41Sopenharmony_ci
1811cb0ef41Sopenharmony_ci	rotldi	$A[1][1],$A[1][4],$rhotates[1][4]
1821cb0ef41Sopenharmony_ci	rotldi	$A[2][2],$A[2][3],$rhotates[2][3]
1831cb0ef41Sopenharmony_ci	rotldi	$A[3][3],$A[3][2],$rhotates[3][2]
1841cb0ef41Sopenharmony_ci	rotldi	$A[4][4],$A[4][1],$rhotates[4][1]
1851cb0ef41Sopenharmony_ci
1861cb0ef41Sopenharmony_ci	rotldi	$A[1][4],$A[4][2],$rhotates[4][2]
1871cb0ef41Sopenharmony_ci	rotldi	$A[2][3],$A[3][4],$rhotates[3][4]
1881cb0ef41Sopenharmony_ci	rotldi	$A[3][2],$A[2][1],$rhotates[2][1]
1891cb0ef41Sopenharmony_ci	rotldi	$A[4][1],$A[1][3],$rhotates[1][3]
1901cb0ef41Sopenharmony_ci
1911cb0ef41Sopenharmony_ci	rotldi	$A[4][2],$A[2][4],$rhotates[2][4]
1921cb0ef41Sopenharmony_ci	rotldi	$A[3][4],$A[4][3],$rhotates[4][3]
1931cb0ef41Sopenharmony_ci	rotldi	$A[2][1],$A[1][2],$rhotates[1][2]
1941cb0ef41Sopenharmony_ci	rotldi	$A[1][3],$A[3][1],$rhotates[3][1]
1951cb0ef41Sopenharmony_ci
1961cb0ef41Sopenharmony_ci	rotldi	$A[2][4],$A[4][0],$rhotates[4][0]
1971cb0ef41Sopenharmony_ci	rotldi	$A[4][3],$A[3][0],$rhotates[3][0]
1981cb0ef41Sopenharmony_ci	rotldi	$A[1][2],$A[2][0],$rhotates[2][0]
1991cb0ef41Sopenharmony_ci	rotldi	$A[3][1],$A[1][0],$rhotates[1][0]
2001cb0ef41Sopenharmony_ci
2011cb0ef41Sopenharmony_ci	rotldi	$A[1][0],$C[0],$rhotates[0][3]
2021cb0ef41Sopenharmony_ci	rotldi	$A[2][0],$C[3],$rhotates[0][1]
2031cb0ef41Sopenharmony_ci	rotldi	$A[3][0],$C[2],$rhotates[0][4]
2041cb0ef41Sopenharmony_ci	rotldi	$A[4][0],$C[1],$rhotates[0][2]
2051cb0ef41Sopenharmony_ci
2061cb0ef41Sopenharmony_ci	andc	$C[0],$A[0][2],$A[0][1]			; Chi+Iota
2071cb0ef41Sopenharmony_ci	andc	$C[1],$A[0][3],$A[0][2]
2081cb0ef41Sopenharmony_ci	andc	$C[2],$A[0][0],$A[0][4]
2091cb0ef41Sopenharmony_ci	andc	$C[3],$A[0][1],$A[0][0]
2101cb0ef41Sopenharmony_ci	xor	$A[0][0],$A[0][0],$C[0]
2111cb0ef41Sopenharmony_ci	andc	$C[0],$A[0][4],$A[0][3]
2121cb0ef41Sopenharmony_ci	xor	$A[0][1],$A[0][1],$C[1]
2131cb0ef41Sopenharmony_ci	 ld	$C[1],`$LOCALS+4*$SIZE_T`($sp)
2141cb0ef41Sopenharmony_ci	xor	$A[0][3],$A[0][3],$C[2]
2151cb0ef41Sopenharmony_ci	xor	$A[0][4],$A[0][4],$C[3]
2161cb0ef41Sopenharmony_ci	xor	$A[0][2],$A[0][2],$C[0]
2171cb0ef41Sopenharmony_ci	 ldu	$C[3],8($C[1])				; Iota[i++]
2181cb0ef41Sopenharmony_ci
2191cb0ef41Sopenharmony_ci	andc	$C[0],$A[1][2],$A[1][1]
2201cb0ef41Sopenharmony_ci	 std	$C[1],`$LOCALS+4*$SIZE_T`($sp)
2211cb0ef41Sopenharmony_ci	andc	$C[1],$A[1][3],$A[1][2]
2221cb0ef41Sopenharmony_ci	andc	$C[2],$A[1][0],$A[1][4]
2231cb0ef41Sopenharmony_ci	 xor	$A[0][0],$A[0][0],$C[3]			; A[0][0] ^= Iota
2241cb0ef41Sopenharmony_ci	andc	$C[3],$A[1][1],$A[1][0]
2251cb0ef41Sopenharmony_ci	xor	$A[1][0],$A[1][0],$C[0]
2261cb0ef41Sopenharmony_ci	andc	$C[0],$A[1][4],$A[1][3]
2271cb0ef41Sopenharmony_ci	xor	$A[1][1],$A[1][1],$C[1]
2281cb0ef41Sopenharmony_ci	xor	$A[1][3],$A[1][3],$C[2]
2291cb0ef41Sopenharmony_ci	xor	$A[1][4],$A[1][4],$C[3]
2301cb0ef41Sopenharmony_ci	xor	$A[1][2],$A[1][2],$C[0]
2311cb0ef41Sopenharmony_ci
2321cb0ef41Sopenharmony_ci	andc	$C[0],$A[2][2],$A[2][1]
2331cb0ef41Sopenharmony_ci	andc	$C[1],$A[2][3],$A[2][2]
2341cb0ef41Sopenharmony_ci	andc	$C[2],$A[2][0],$A[2][4]
2351cb0ef41Sopenharmony_ci	andc	$C[3],$A[2][1],$A[2][0]
2361cb0ef41Sopenharmony_ci	xor	$A[2][0],$A[2][0],$C[0]
2371cb0ef41Sopenharmony_ci	andc	$C[0],$A[2][4],$A[2][3]
2381cb0ef41Sopenharmony_ci	xor	$A[2][1],$A[2][1],$C[1]
2391cb0ef41Sopenharmony_ci	xor	$A[2][3],$A[2][3],$C[2]
2401cb0ef41Sopenharmony_ci	xor	$A[2][4],$A[2][4],$C[3]
2411cb0ef41Sopenharmony_ci	xor	$A[2][2],$A[2][2],$C[0]
2421cb0ef41Sopenharmony_ci
2431cb0ef41Sopenharmony_ci	andc	$C[0],$A[3][2],$A[3][1]
2441cb0ef41Sopenharmony_ci	andc	$C[1],$A[3][3],$A[3][2]
2451cb0ef41Sopenharmony_ci	andc	$C[2],$A[3][0],$A[3][4]
2461cb0ef41Sopenharmony_ci	andc	$C[3],$A[3][1],$A[3][0]
2471cb0ef41Sopenharmony_ci	xor	$A[3][0],$A[3][0],$C[0]
2481cb0ef41Sopenharmony_ci	andc	$C[0],$A[3][4],$A[3][3]
2491cb0ef41Sopenharmony_ci	xor	$A[3][1],$A[3][1],$C[1]
2501cb0ef41Sopenharmony_ci	xor	$A[3][3],$A[3][3],$C[2]
2511cb0ef41Sopenharmony_ci	xor	$A[3][4],$A[3][4],$C[3]
2521cb0ef41Sopenharmony_ci	xor	$A[3][2],$A[3][2],$C[0]
2531cb0ef41Sopenharmony_ci
2541cb0ef41Sopenharmony_ci	andc	$C[0],$A[4][2],$A[4][1]
2551cb0ef41Sopenharmony_ci	andc	$C[1],$A[4][3],$A[4][2]
2561cb0ef41Sopenharmony_ci	andc	$C[2],$A[4][0],$A[4][4]
2571cb0ef41Sopenharmony_ci	andc	$C[3],$A[4][1],$A[4][0]
2581cb0ef41Sopenharmony_ci	xor	$A[4][0],$A[4][0],$C[0]
2591cb0ef41Sopenharmony_ci	andc	$C[0],$A[4][4],$A[4][3]
2601cb0ef41Sopenharmony_ci	xor	$A[4][1],$A[4][1],$C[1]
2611cb0ef41Sopenharmony_ci	xor	$A[4][3],$A[4][3],$C[2]
2621cb0ef41Sopenharmony_ci	xor	$A[4][4],$A[4][4],$C[3]
2631cb0ef41Sopenharmony_ci	xor	$A[4][2],$A[4][2],$C[0]
2641cb0ef41Sopenharmony_ci
2651cb0ef41Sopenharmony_ci	bdnz	.Loop
2661cb0ef41Sopenharmony_ci
2671cb0ef41Sopenharmony_ci	blr
2681cb0ef41Sopenharmony_ci	.long	0
2691cb0ef41Sopenharmony_ci	.byte	0,12,0x14,0,0,0,0,0
2701cb0ef41Sopenharmony_ci.size	KeccakF1600_int,.-KeccakF1600_int
2711cb0ef41Sopenharmony_ci
2721cb0ef41Sopenharmony_ci.type	KeccakF1600,\@function
2731cb0ef41Sopenharmony_ci.align	5
2741cb0ef41Sopenharmony_ciKeccakF1600:
2751cb0ef41Sopenharmony_ci	$STU	$sp,-$FRAME($sp)
2761cb0ef41Sopenharmony_ci	mflr	r0
2771cb0ef41Sopenharmony_ci	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
2781cb0ef41Sopenharmony_ci	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
2791cb0ef41Sopenharmony_ci	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
2801cb0ef41Sopenharmony_ci	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
2811cb0ef41Sopenharmony_ci	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
2821cb0ef41Sopenharmony_ci	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
2831cb0ef41Sopenharmony_ci	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
2841cb0ef41Sopenharmony_ci	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
2851cb0ef41Sopenharmony_ci	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
2861cb0ef41Sopenharmony_ci	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
2871cb0ef41Sopenharmony_ci	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
2881cb0ef41Sopenharmony_ci	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
2891cb0ef41Sopenharmony_ci	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
2901cb0ef41Sopenharmony_ci	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
2911cb0ef41Sopenharmony_ci	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
2921cb0ef41Sopenharmony_ci	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
2931cb0ef41Sopenharmony_ci	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
2941cb0ef41Sopenharmony_ci	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
2951cb0ef41Sopenharmony_ci	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
2961cb0ef41Sopenharmony_ci
2971cb0ef41Sopenharmony_ci	bl	PICmeup
2981cb0ef41Sopenharmony_ci	subi	r12,r12,8			; prepare for ldu
2991cb0ef41Sopenharmony_ci
3001cb0ef41Sopenharmony_ci	$PUSH	r3,`$LOCALS+0*$SIZE_T`($sp)
3011cb0ef41Sopenharmony_ci	;$PUSH	r4,`$LOCALS+1*$SIZE_T`($sp)
3021cb0ef41Sopenharmony_ci	;$PUSH	r5,`$LOCALS+2*$SIZE_T`($sp)
3031cb0ef41Sopenharmony_ci	;$PUSH	r6,`$LOCALS+3*$SIZE_T`($sp)
3041cb0ef41Sopenharmony_ci	$PUSH	r12,`$LOCALS+4*$SIZE_T`($sp)
3051cb0ef41Sopenharmony_ci
3061cb0ef41Sopenharmony_ci	ld	$A[0][0],`8*0`(r3)		; load A[5][5]
3071cb0ef41Sopenharmony_ci	ld	$A[0][1],`8*1`(r3)
3081cb0ef41Sopenharmony_ci	ld	$A[0][2],`8*2`(r3)
3091cb0ef41Sopenharmony_ci	ld	$A[0][3],`8*3`(r3)
3101cb0ef41Sopenharmony_ci	ld	$A[0][4],`8*4`(r3)
3111cb0ef41Sopenharmony_ci	ld	$A[1][0],`8*5`(r3)
3121cb0ef41Sopenharmony_ci	ld	$A[1][1],`8*6`(r3)
3131cb0ef41Sopenharmony_ci	ld	$A[1][2],`8*7`(r3)
3141cb0ef41Sopenharmony_ci	ld	$A[1][3],`8*8`(r3)
3151cb0ef41Sopenharmony_ci	ld	$A[1][4],`8*9`(r3)
3161cb0ef41Sopenharmony_ci	ld	$A[2][0],`8*10`(r3)
3171cb0ef41Sopenharmony_ci	ld	$A[2][1],`8*11`(r3)
3181cb0ef41Sopenharmony_ci	ld	$A[2][2],`8*12`(r3)
3191cb0ef41Sopenharmony_ci	ld	$A[2][3],`8*13`(r3)
3201cb0ef41Sopenharmony_ci	ld	$A[2][4],`8*14`(r3)
3211cb0ef41Sopenharmony_ci	ld	$A[3][0],`8*15`(r3)
3221cb0ef41Sopenharmony_ci	ld	$A[3][1],`8*16`(r3)
3231cb0ef41Sopenharmony_ci	ld	$A[3][2],`8*17`(r3)
3241cb0ef41Sopenharmony_ci	ld	$A[3][3],`8*18`(r3)
3251cb0ef41Sopenharmony_ci	ld	$A[3][4],`8*19`(r3)
3261cb0ef41Sopenharmony_ci	ld	$A[4][0],`8*20`(r3)
3271cb0ef41Sopenharmony_ci	ld	$A[4][1],`8*21`(r3)
3281cb0ef41Sopenharmony_ci	ld	$A[4][2],`8*22`(r3)
3291cb0ef41Sopenharmony_ci	ld	$A[4][3],`8*23`(r3)
3301cb0ef41Sopenharmony_ci	ld	$A[4][4],`8*24`(r3)
3311cb0ef41Sopenharmony_ci
3321cb0ef41Sopenharmony_ci	bl	KeccakF1600_int
3331cb0ef41Sopenharmony_ci
3341cb0ef41Sopenharmony_ci	$POP	r3,`$LOCALS+0*$SIZE_T`($sp)
3351cb0ef41Sopenharmony_ci	std	$A[0][0],`8*0`(r3)		; return A[5][5]
3361cb0ef41Sopenharmony_ci	std	$A[0][1],`8*1`(r3)
3371cb0ef41Sopenharmony_ci	std	$A[0][2],`8*2`(r3)
3381cb0ef41Sopenharmony_ci	std	$A[0][3],`8*3`(r3)
3391cb0ef41Sopenharmony_ci	std	$A[0][4],`8*4`(r3)
3401cb0ef41Sopenharmony_ci	std	$A[1][0],`8*5`(r3)
3411cb0ef41Sopenharmony_ci	std	$A[1][1],`8*6`(r3)
3421cb0ef41Sopenharmony_ci	std	$A[1][2],`8*7`(r3)
3431cb0ef41Sopenharmony_ci	std	$A[1][3],`8*8`(r3)
3441cb0ef41Sopenharmony_ci	std	$A[1][4],`8*9`(r3)
3451cb0ef41Sopenharmony_ci	std	$A[2][0],`8*10`(r3)
3461cb0ef41Sopenharmony_ci	std	$A[2][1],`8*11`(r3)
3471cb0ef41Sopenharmony_ci	std	$A[2][2],`8*12`(r3)
3481cb0ef41Sopenharmony_ci	std	$A[2][3],`8*13`(r3)
3491cb0ef41Sopenharmony_ci	std	$A[2][4],`8*14`(r3)
3501cb0ef41Sopenharmony_ci	std	$A[3][0],`8*15`(r3)
3511cb0ef41Sopenharmony_ci	std	$A[3][1],`8*16`(r3)
3521cb0ef41Sopenharmony_ci	std	$A[3][2],`8*17`(r3)
3531cb0ef41Sopenharmony_ci	std	$A[3][3],`8*18`(r3)
3541cb0ef41Sopenharmony_ci	std	$A[3][4],`8*19`(r3)
3551cb0ef41Sopenharmony_ci	std	$A[4][0],`8*20`(r3)
3561cb0ef41Sopenharmony_ci	std	$A[4][1],`8*21`(r3)
3571cb0ef41Sopenharmony_ci	std	$A[4][2],`8*22`(r3)
3581cb0ef41Sopenharmony_ci	std	$A[4][3],`8*23`(r3)
3591cb0ef41Sopenharmony_ci	std	$A[4][4],`8*24`(r3)
3601cb0ef41Sopenharmony_ci
3611cb0ef41Sopenharmony_ci	$POP	r0,`$FRAME+$LRSAVE`($sp)
3621cb0ef41Sopenharmony_ci	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
3631cb0ef41Sopenharmony_ci	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
3641cb0ef41Sopenharmony_ci	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
3651cb0ef41Sopenharmony_ci	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
3661cb0ef41Sopenharmony_ci	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
3671cb0ef41Sopenharmony_ci	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
3681cb0ef41Sopenharmony_ci	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
3691cb0ef41Sopenharmony_ci	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
3701cb0ef41Sopenharmony_ci	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
3711cb0ef41Sopenharmony_ci	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
3721cb0ef41Sopenharmony_ci	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
3731cb0ef41Sopenharmony_ci	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
3741cb0ef41Sopenharmony_ci	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
3751cb0ef41Sopenharmony_ci	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
3761cb0ef41Sopenharmony_ci	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
3771cb0ef41Sopenharmony_ci	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
3781cb0ef41Sopenharmony_ci	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
3791cb0ef41Sopenharmony_ci	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
3801cb0ef41Sopenharmony_ci	mtlr	r0
3811cb0ef41Sopenharmony_ci	addi	$sp,$sp,$FRAME
3821cb0ef41Sopenharmony_ci	blr
3831cb0ef41Sopenharmony_ci	.long	0
3841cb0ef41Sopenharmony_ci	.byte	0,12,4,1,0x80,18,1,0
3851cb0ef41Sopenharmony_ci	.long	0
3861cb0ef41Sopenharmony_ci.size	KeccakF1600,.-KeccakF1600
3871cb0ef41Sopenharmony_ci
3881cb0ef41Sopenharmony_ci.type	dword_le_load,\@function
3891cb0ef41Sopenharmony_ci.align	5
3901cb0ef41Sopenharmony_cidword_le_load:
3911cb0ef41Sopenharmony_ci	lbz	r0,1(r3)
3921cb0ef41Sopenharmony_ci	lbz	r4,2(r3)
3931cb0ef41Sopenharmony_ci	lbz	r5,3(r3)
3941cb0ef41Sopenharmony_ci	insrdi	r0,r4,8,48
3951cb0ef41Sopenharmony_ci	lbz	r4,4(r3)
3961cb0ef41Sopenharmony_ci	insrdi	r0,r5,8,40
3971cb0ef41Sopenharmony_ci	lbz	r5,5(r3)
3981cb0ef41Sopenharmony_ci	insrdi	r0,r4,8,32
3991cb0ef41Sopenharmony_ci	lbz	r4,6(r3)
4001cb0ef41Sopenharmony_ci	insrdi	r0,r5,8,24
4011cb0ef41Sopenharmony_ci	lbz	r5,7(r3)
4021cb0ef41Sopenharmony_ci	insrdi	r0,r4,8,16
4031cb0ef41Sopenharmony_ci	lbzu	r4,8(r3)
4041cb0ef41Sopenharmony_ci	insrdi	r0,r5,8,8
4051cb0ef41Sopenharmony_ci	insrdi	r0,r4,8,0
4061cb0ef41Sopenharmony_ci	blr
4071cb0ef41Sopenharmony_ci	.long	0
4081cb0ef41Sopenharmony_ci	.byte	0,12,0x14,0,0,0,1,0
4091cb0ef41Sopenharmony_ci	.long	0
4101cb0ef41Sopenharmony_ci.size	dword_le_load,.-dword_le_load
4111cb0ef41Sopenharmony_ci
4121cb0ef41Sopenharmony_ci.globl	SHA3_absorb
4131cb0ef41Sopenharmony_ci.type	SHA3_absorb,\@function
4141cb0ef41Sopenharmony_ci.align	5
4151cb0ef41Sopenharmony_ciSHA3_absorb:
4161cb0ef41Sopenharmony_ci	$STU	$sp,-$FRAME($sp)
4171cb0ef41Sopenharmony_ci	mflr	r0
4181cb0ef41Sopenharmony_ci	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
4191cb0ef41Sopenharmony_ci	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
4201cb0ef41Sopenharmony_ci	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
4211cb0ef41Sopenharmony_ci	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
4221cb0ef41Sopenharmony_ci	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
4231cb0ef41Sopenharmony_ci	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
4241cb0ef41Sopenharmony_ci	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
4251cb0ef41Sopenharmony_ci	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
4261cb0ef41Sopenharmony_ci	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
4271cb0ef41Sopenharmony_ci	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
4281cb0ef41Sopenharmony_ci	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
4291cb0ef41Sopenharmony_ci	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
4301cb0ef41Sopenharmony_ci	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
4311cb0ef41Sopenharmony_ci	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
4321cb0ef41Sopenharmony_ci	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
4331cb0ef41Sopenharmony_ci	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
4341cb0ef41Sopenharmony_ci	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
4351cb0ef41Sopenharmony_ci	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
4361cb0ef41Sopenharmony_ci	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
4371cb0ef41Sopenharmony_ci
4381cb0ef41Sopenharmony_ci	bl	PICmeup
4391cb0ef41Sopenharmony_ci	subi	r4,r4,1				; prepare for lbzu
4401cb0ef41Sopenharmony_ci	subi	r12,r12,8			; prepare for ldu
4411cb0ef41Sopenharmony_ci
4421cb0ef41Sopenharmony_ci	$PUSH	r3,`$LOCALS+0*$SIZE_T`($sp)	; save A[][]
4431cb0ef41Sopenharmony_ci	$PUSH	r4,`$LOCALS+1*$SIZE_T`($sp)	; save inp
4441cb0ef41Sopenharmony_ci	$PUSH	r5,`$LOCALS+2*$SIZE_T`($sp)	; save len
4451cb0ef41Sopenharmony_ci	$PUSH	r6,`$LOCALS+3*$SIZE_T`($sp)	; save bsz
4461cb0ef41Sopenharmony_ci	mr	r0,r6
4471cb0ef41Sopenharmony_ci	$PUSH	r12,`$LOCALS+4*$SIZE_T`($sp)
4481cb0ef41Sopenharmony_ci
4491cb0ef41Sopenharmony_ci	ld	$A[0][0],`8*0`(r3)		; load A[5][5]
4501cb0ef41Sopenharmony_ci	ld	$A[0][1],`8*1`(r3)
4511cb0ef41Sopenharmony_ci	ld	$A[0][2],`8*2`(r3)
4521cb0ef41Sopenharmony_ci	ld	$A[0][3],`8*3`(r3)
4531cb0ef41Sopenharmony_ci	ld	$A[0][4],`8*4`(r3)
4541cb0ef41Sopenharmony_ci	ld	$A[1][0],`8*5`(r3)
4551cb0ef41Sopenharmony_ci	ld	$A[1][1],`8*6`(r3)
4561cb0ef41Sopenharmony_ci	ld	$A[1][2],`8*7`(r3)
4571cb0ef41Sopenharmony_ci	ld	$A[1][3],`8*8`(r3)
4581cb0ef41Sopenharmony_ci	ld	$A[1][4],`8*9`(r3)
4591cb0ef41Sopenharmony_ci	ld	$A[2][0],`8*10`(r3)
4601cb0ef41Sopenharmony_ci	ld	$A[2][1],`8*11`(r3)
4611cb0ef41Sopenharmony_ci	ld	$A[2][2],`8*12`(r3)
4621cb0ef41Sopenharmony_ci	ld	$A[2][3],`8*13`(r3)
4631cb0ef41Sopenharmony_ci	ld	$A[2][4],`8*14`(r3)
4641cb0ef41Sopenharmony_ci	ld	$A[3][0],`8*15`(r3)
4651cb0ef41Sopenharmony_ci	ld	$A[3][1],`8*16`(r3)
4661cb0ef41Sopenharmony_ci	ld	$A[3][2],`8*17`(r3)
4671cb0ef41Sopenharmony_ci	ld	$A[3][3],`8*18`(r3)
4681cb0ef41Sopenharmony_ci	ld	$A[3][4],`8*19`(r3)
4691cb0ef41Sopenharmony_ci	ld	$A[4][0],`8*20`(r3)
4701cb0ef41Sopenharmony_ci	ld	$A[4][1],`8*21`(r3)
4711cb0ef41Sopenharmony_ci	ld	$A[4][2],`8*22`(r3)
4721cb0ef41Sopenharmony_ci	ld	$A[4][3],`8*23`(r3)
4731cb0ef41Sopenharmony_ci	ld	$A[4][4],`8*24`(r3)
4741cb0ef41Sopenharmony_ci
4751cb0ef41Sopenharmony_ci	mr	r3,r4
4761cb0ef41Sopenharmony_ci	mr	r4,r5
4771cb0ef41Sopenharmony_ci	mr	r5,r0
4781cb0ef41Sopenharmony_ci
4791cb0ef41Sopenharmony_ci	b	.Loop_absorb
4801cb0ef41Sopenharmony_ci
4811cb0ef41Sopenharmony_ci.align	4
4821cb0ef41Sopenharmony_ci.Loop_absorb:
4831cb0ef41Sopenharmony_ci	$UCMP	r4,r5				; len < bsz?
4841cb0ef41Sopenharmony_ci	blt	.Labsorbed
4851cb0ef41Sopenharmony_ci
4861cb0ef41Sopenharmony_ci	sub	r4,r4,r5			; len -= bsz
4871cb0ef41Sopenharmony_ci	srwi	r5,r5,3
4881cb0ef41Sopenharmony_ci	$PUSH	r4,`$LOCALS+2*$SIZE_T`($sp)	; save len
4891cb0ef41Sopenharmony_ci	mtctr	r5
4901cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
4911cb0ef41Sopenharmony_ci	xor	$A[0][0],$A[0][0],r0
4921cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
4931cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
4941cb0ef41Sopenharmony_ci	xor	$A[0][1],$A[0][1],r0
4951cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
4961cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
4971cb0ef41Sopenharmony_ci	xor	$A[0][2],$A[0][2],r0
4981cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
4991cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5001cb0ef41Sopenharmony_ci	xor	$A[0][3],$A[0][3],r0
5011cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5021cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5031cb0ef41Sopenharmony_ci	xor	$A[0][4],$A[0][4],r0
5041cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5051cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5061cb0ef41Sopenharmony_ci	xor	$A[1][0],$A[1][0],r0
5071cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5081cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5091cb0ef41Sopenharmony_ci	xor	$A[1][1],$A[1][1],r0
5101cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5111cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5121cb0ef41Sopenharmony_ci	xor	$A[1][2],$A[1][2],r0
5131cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5141cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5151cb0ef41Sopenharmony_ci	xor	$A[1][3],$A[1][3],r0
5161cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5171cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5181cb0ef41Sopenharmony_ci	xor	$A[1][4],$A[1][4],r0
5191cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5201cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5211cb0ef41Sopenharmony_ci	xor	$A[2][0],$A[2][0],r0
5221cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5231cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5241cb0ef41Sopenharmony_ci	xor	$A[2][1],$A[2][1],r0
5251cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5261cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5271cb0ef41Sopenharmony_ci	xor	$A[2][2],$A[2][2],r0
5281cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5291cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5301cb0ef41Sopenharmony_ci	xor	$A[2][3],$A[2][3],r0
5311cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5321cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5331cb0ef41Sopenharmony_ci	xor	$A[2][4],$A[2][4],r0
5341cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5351cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5361cb0ef41Sopenharmony_ci	xor	$A[3][0],$A[3][0],r0
5371cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5381cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5391cb0ef41Sopenharmony_ci	xor	$A[3][1],$A[3][1],r0
5401cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5411cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5421cb0ef41Sopenharmony_ci	xor	$A[3][2],$A[3][2],r0
5431cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5441cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5451cb0ef41Sopenharmony_ci	xor	$A[3][3],$A[3][3],r0
5461cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5471cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5481cb0ef41Sopenharmony_ci	xor	$A[3][4],$A[3][4],r0
5491cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5501cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5511cb0ef41Sopenharmony_ci	xor	$A[4][0],$A[4][0],r0
5521cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5531cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5541cb0ef41Sopenharmony_ci	xor	$A[4][1],$A[4][1],r0
5551cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5561cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5571cb0ef41Sopenharmony_ci	xor	$A[4][2],$A[4][2],r0
5581cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5591cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5601cb0ef41Sopenharmony_ci	xor	$A[4][3],$A[4][3],r0
5611cb0ef41Sopenharmony_ci	bdz	.Lprocess_block
5621cb0ef41Sopenharmony_ci	bl	dword_le_load			; *inp++
5631cb0ef41Sopenharmony_ci	xor	$A[4][4],$A[4][4],r0
5641cb0ef41Sopenharmony_ci
5651cb0ef41Sopenharmony_ci.Lprocess_block:
5661cb0ef41Sopenharmony_ci	$PUSH	r3,`$LOCALS+1*$SIZE_T`($sp)	; save inp
5671cb0ef41Sopenharmony_ci
5681cb0ef41Sopenharmony_ci	bl	KeccakF1600_int
5691cb0ef41Sopenharmony_ci
5701cb0ef41Sopenharmony_ci	$POP	r0,`$LOCALS+4*$SIZE_T`($sp)	; pull iotas[24]
5711cb0ef41Sopenharmony_ci	$POP	r5,`$LOCALS+3*$SIZE_T`($sp)	; restore bsz
5721cb0ef41Sopenharmony_ci	$POP	r4,`$LOCALS+2*$SIZE_T`($sp)	; restore len
5731cb0ef41Sopenharmony_ci	$POP	r3,`$LOCALS+1*$SIZE_T`($sp)	; restore inp
5741cb0ef41Sopenharmony_ci	addic	r0,r0,`-8*24`			; rewind iotas
5751cb0ef41Sopenharmony_ci	$PUSH	r0,`$LOCALS+4*$SIZE_T`($sp)
5761cb0ef41Sopenharmony_ci
5771cb0ef41Sopenharmony_ci	b	.Loop_absorb
5781cb0ef41Sopenharmony_ci
5791cb0ef41Sopenharmony_ci.align	4
5801cb0ef41Sopenharmony_ci.Labsorbed:
5811cb0ef41Sopenharmony_ci	$POP	r3,`$LOCALS+0*$SIZE_T`($sp)
5821cb0ef41Sopenharmony_ci	std	$A[0][0],`8*0`(r3)		; return A[5][5]
5831cb0ef41Sopenharmony_ci	std	$A[0][1],`8*1`(r3)
5841cb0ef41Sopenharmony_ci	std	$A[0][2],`8*2`(r3)
5851cb0ef41Sopenharmony_ci	std	$A[0][3],`8*3`(r3)
5861cb0ef41Sopenharmony_ci	std	$A[0][4],`8*4`(r3)
5871cb0ef41Sopenharmony_ci	std	$A[1][0],`8*5`(r3)
5881cb0ef41Sopenharmony_ci	std	$A[1][1],`8*6`(r3)
5891cb0ef41Sopenharmony_ci	std	$A[1][2],`8*7`(r3)
5901cb0ef41Sopenharmony_ci	std	$A[1][3],`8*8`(r3)
5911cb0ef41Sopenharmony_ci	std	$A[1][4],`8*9`(r3)
5921cb0ef41Sopenharmony_ci	std	$A[2][0],`8*10`(r3)
5931cb0ef41Sopenharmony_ci	std	$A[2][1],`8*11`(r3)
5941cb0ef41Sopenharmony_ci	std	$A[2][2],`8*12`(r3)
5951cb0ef41Sopenharmony_ci	std	$A[2][3],`8*13`(r3)
5961cb0ef41Sopenharmony_ci	std	$A[2][4],`8*14`(r3)
5971cb0ef41Sopenharmony_ci	std	$A[3][0],`8*15`(r3)
5981cb0ef41Sopenharmony_ci	std	$A[3][1],`8*16`(r3)
5991cb0ef41Sopenharmony_ci	std	$A[3][2],`8*17`(r3)
6001cb0ef41Sopenharmony_ci	std	$A[3][3],`8*18`(r3)
6011cb0ef41Sopenharmony_ci	std	$A[3][4],`8*19`(r3)
6021cb0ef41Sopenharmony_ci	std	$A[4][0],`8*20`(r3)
6031cb0ef41Sopenharmony_ci	std	$A[4][1],`8*21`(r3)
6041cb0ef41Sopenharmony_ci	std	$A[4][2],`8*22`(r3)
6051cb0ef41Sopenharmony_ci	std	$A[4][3],`8*23`(r3)
6061cb0ef41Sopenharmony_ci	std	$A[4][4],`8*24`(r3)
6071cb0ef41Sopenharmony_ci
6081cb0ef41Sopenharmony_ci	mr	r3,r4				; return value
6091cb0ef41Sopenharmony_ci	$POP	r0,`$FRAME+$LRSAVE`($sp)
6101cb0ef41Sopenharmony_ci	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
6111cb0ef41Sopenharmony_ci	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
6121cb0ef41Sopenharmony_ci	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
6131cb0ef41Sopenharmony_ci	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
6141cb0ef41Sopenharmony_ci	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
6151cb0ef41Sopenharmony_ci	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
6161cb0ef41Sopenharmony_ci	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
6171cb0ef41Sopenharmony_ci	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
6181cb0ef41Sopenharmony_ci	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
6191cb0ef41Sopenharmony_ci	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
6201cb0ef41Sopenharmony_ci	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
6211cb0ef41Sopenharmony_ci	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
6221cb0ef41Sopenharmony_ci	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
6231cb0ef41Sopenharmony_ci	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
6241cb0ef41Sopenharmony_ci	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
6251cb0ef41Sopenharmony_ci	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
6261cb0ef41Sopenharmony_ci	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
6271cb0ef41Sopenharmony_ci	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
6281cb0ef41Sopenharmony_ci	mtlr	r0
6291cb0ef41Sopenharmony_ci	addi	$sp,$sp,$FRAME
6301cb0ef41Sopenharmony_ci	blr
6311cb0ef41Sopenharmony_ci	.long	0
6321cb0ef41Sopenharmony_ci	.byte	0,12,4,1,0x80,18,4,0
6331cb0ef41Sopenharmony_ci	.long	0
6341cb0ef41Sopenharmony_ci.size	SHA3_absorb,.-SHA3_absorb
6351cb0ef41Sopenharmony_ci___
6361cb0ef41Sopenharmony_ci{
6371cb0ef41Sopenharmony_cimy ($A_flat,$out,$len,$bsz) = map("r$_",(28..31));
6381cb0ef41Sopenharmony_ci$code.=<<___;
6391cb0ef41Sopenharmony_ci.globl	SHA3_squeeze
6401cb0ef41Sopenharmony_ci.type	SHA3_squeeze,\@function
6411cb0ef41Sopenharmony_ci.align	5
6421cb0ef41Sopenharmony_ciSHA3_squeeze:
6431cb0ef41Sopenharmony_ci	$STU	$sp,`-10*$SIZE_T`($sp)
6441cb0ef41Sopenharmony_ci	mflr	r0
6451cb0ef41Sopenharmony_ci	$PUSH	r28,`6*$SIZE_T`($sp)
6461cb0ef41Sopenharmony_ci	$PUSH	r29,`7*$SIZE_T`($sp)
6471cb0ef41Sopenharmony_ci	$PUSH	r30,`8*$SIZE_T`($sp)
6481cb0ef41Sopenharmony_ci	$PUSH	r31,`9*$SIZE_T`($sp)
6491cb0ef41Sopenharmony_ci	$PUSH	r0,`10*$SIZE_T+$LRSAVE`($sp)
6501cb0ef41Sopenharmony_ci
6511cb0ef41Sopenharmony_ci	mr	$A_flat,r3
6521cb0ef41Sopenharmony_ci	subi	r3,r3,8			; prepare for ldu
6531cb0ef41Sopenharmony_ci	subi	$out,r4,1		; prepare for stbu
6541cb0ef41Sopenharmony_ci	mr	$len,r5
6551cb0ef41Sopenharmony_ci	mr	$bsz,r6
6561cb0ef41Sopenharmony_ci	b	.Loop_squeeze
6571cb0ef41Sopenharmony_ci
6581cb0ef41Sopenharmony_ci.align	4
6591cb0ef41Sopenharmony_ci.Loop_squeeze:
6601cb0ef41Sopenharmony_ci	ldu	r0,8(r3)
6611cb0ef41Sopenharmony_ci	${UCMP}i $len,8
6621cb0ef41Sopenharmony_ci	blt	.Lsqueeze_tail
6631cb0ef41Sopenharmony_ci
6641cb0ef41Sopenharmony_ci	stb	r0,1($out)
6651cb0ef41Sopenharmony_ci	srdi	r0,r0,8
6661cb0ef41Sopenharmony_ci	stb	r0,2($out)
6671cb0ef41Sopenharmony_ci	srdi	r0,r0,8
6681cb0ef41Sopenharmony_ci	stb	r0,3($out)
6691cb0ef41Sopenharmony_ci	srdi	r0,r0,8
6701cb0ef41Sopenharmony_ci	stb	r0,4($out)
6711cb0ef41Sopenharmony_ci	srdi	r0,r0,8
6721cb0ef41Sopenharmony_ci	stb	r0,5($out)
6731cb0ef41Sopenharmony_ci	srdi	r0,r0,8
6741cb0ef41Sopenharmony_ci	stb	r0,6($out)
6751cb0ef41Sopenharmony_ci	srdi	r0,r0,8
6761cb0ef41Sopenharmony_ci	stb	r0,7($out)
6771cb0ef41Sopenharmony_ci	srdi	r0,r0,8
6781cb0ef41Sopenharmony_ci	stbu	r0,8($out)
6791cb0ef41Sopenharmony_ci
6801cb0ef41Sopenharmony_ci	subic.	$len,$len,8
6811cb0ef41Sopenharmony_ci	beq	.Lsqueeze_done
6821cb0ef41Sopenharmony_ci
6831cb0ef41Sopenharmony_ci	subic.	r6,r6,8
6841cb0ef41Sopenharmony_ci	bgt	.Loop_squeeze
6851cb0ef41Sopenharmony_ci
6861cb0ef41Sopenharmony_ci	mr	r3,$A_flat
6871cb0ef41Sopenharmony_ci	bl	KeccakF1600
6881cb0ef41Sopenharmony_ci	subi	r3,$A_flat,8		; prepare for ldu
6891cb0ef41Sopenharmony_ci	mr	r6,$bsz
6901cb0ef41Sopenharmony_ci	b	.Loop_squeeze
6911cb0ef41Sopenharmony_ci
6921cb0ef41Sopenharmony_ci.align	4
6931cb0ef41Sopenharmony_ci.Lsqueeze_tail:
6941cb0ef41Sopenharmony_ci	mtctr	$len
6951cb0ef41Sopenharmony_ci.Loop_tail:
6961cb0ef41Sopenharmony_ci	stbu	r0,1($out)
6971cb0ef41Sopenharmony_ci	srdi	r0,r0,8
6981cb0ef41Sopenharmony_ci	bdnz	.Loop_tail
6991cb0ef41Sopenharmony_ci
7001cb0ef41Sopenharmony_ci.Lsqueeze_done:
7011cb0ef41Sopenharmony_ci	$POP	r0,`10*$SIZE_T+$LRSAVE`($sp)
7021cb0ef41Sopenharmony_ci	$POP	r28,`6*$SIZE_T`($sp)
7031cb0ef41Sopenharmony_ci	$POP	r29,`7*$SIZE_T`($sp)
7041cb0ef41Sopenharmony_ci	$POP	r30,`8*$SIZE_T`($sp)
7051cb0ef41Sopenharmony_ci	$POP	r31,`9*$SIZE_T`($sp)
7061cb0ef41Sopenharmony_ci	mtlr	r0
7071cb0ef41Sopenharmony_ci	addi	$sp,$sp,`10*$SIZE_T`
7081cb0ef41Sopenharmony_ci	blr
7091cb0ef41Sopenharmony_ci	.long	0
7101cb0ef41Sopenharmony_ci	.byte	0,12,4,1,0x80,4,4,0
7111cb0ef41Sopenharmony_ci	.long	0
7121cb0ef41Sopenharmony_ci.size	SHA3_squeeze,.-SHA3_squeeze
7131cb0ef41Sopenharmony_ci___
7141cb0ef41Sopenharmony_ci}
7151cb0ef41Sopenharmony_ci
7161cb0ef41Sopenharmony_ci# Ugly hack here, because PPC assembler syntax seem to vary too
7171cb0ef41Sopenharmony_ci# much from platforms to platform...
7181cb0ef41Sopenharmony_ci$code.=<<___;
7191cb0ef41Sopenharmony_ci.align	6
7201cb0ef41Sopenharmony_ciPICmeup:
7211cb0ef41Sopenharmony_ci	mflr	r0
7221cb0ef41Sopenharmony_ci	bcl	20,31,\$+4
7231cb0ef41Sopenharmony_ci	mflr	r12   ; vvvvvv "distance" between . and 1st data entry
7241cb0ef41Sopenharmony_ci	addi	r12,r12,`64-8`
7251cb0ef41Sopenharmony_ci	mtlr	r0
7261cb0ef41Sopenharmony_ci	blr
7271cb0ef41Sopenharmony_ci	.long	0
7281cb0ef41Sopenharmony_ci	.byte	0,12,0x14,0,0,0,0,0
7291cb0ef41Sopenharmony_ci	.space	`64-9*4`
7301cb0ef41Sopenharmony_ci.type	iotas,\@object
7311cb0ef41Sopenharmony_ciiotas:
7321cb0ef41Sopenharmony_ci	.quad	0x0000000000000001
7331cb0ef41Sopenharmony_ci	.quad	0x0000000000008082
7341cb0ef41Sopenharmony_ci	.quad	0x800000000000808a
7351cb0ef41Sopenharmony_ci	.quad	0x8000000080008000
7361cb0ef41Sopenharmony_ci	.quad	0x000000000000808b
7371cb0ef41Sopenharmony_ci	.quad	0x0000000080000001
7381cb0ef41Sopenharmony_ci	.quad	0x8000000080008081
7391cb0ef41Sopenharmony_ci	.quad	0x8000000000008009
7401cb0ef41Sopenharmony_ci	.quad	0x000000000000008a
7411cb0ef41Sopenharmony_ci	.quad	0x0000000000000088
7421cb0ef41Sopenharmony_ci	.quad	0x0000000080008009
7431cb0ef41Sopenharmony_ci	.quad	0x000000008000000a
7441cb0ef41Sopenharmony_ci	.quad	0x000000008000808b
7451cb0ef41Sopenharmony_ci	.quad	0x800000000000008b
7461cb0ef41Sopenharmony_ci	.quad	0x8000000000008089
7471cb0ef41Sopenharmony_ci	.quad	0x8000000000008003
7481cb0ef41Sopenharmony_ci	.quad	0x8000000000008002
7491cb0ef41Sopenharmony_ci	.quad	0x8000000000000080
7501cb0ef41Sopenharmony_ci	.quad	0x000000000000800a
7511cb0ef41Sopenharmony_ci	.quad	0x800000008000000a
7521cb0ef41Sopenharmony_ci	.quad	0x8000000080008081
7531cb0ef41Sopenharmony_ci	.quad	0x8000000000008080
7541cb0ef41Sopenharmony_ci	.quad	0x0000000080000001
7551cb0ef41Sopenharmony_ci	.quad	0x8000000080008008
7561cb0ef41Sopenharmony_ci.size	iotas,.-iotas
7571cb0ef41Sopenharmony_ci.asciz	"Keccak-1600 absorb and squeeze for PPC64, CRYPTOGAMS by <appro\@openssl.org>"
7581cb0ef41Sopenharmony_ci___
7591cb0ef41Sopenharmony_ci
7601cb0ef41Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval $1/gem;
7611cb0ef41Sopenharmony_ciprint $code;
7621cb0ef41Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
763