1e1051a39Sopenharmony_ci#!/usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2017-2020 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci#
9e1051a39Sopenharmony_ci# ====================================================================
10e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
11e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
12e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
13e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/.
14e1051a39Sopenharmony_ci# ====================================================================
15e1051a39Sopenharmony_ci#
16e1051a39Sopenharmony_ci# Keccak-1600 for PowerISA 2.07.
17e1051a39Sopenharmony_ci#
18e1051a39Sopenharmony_ci# June 2017.
19e1051a39Sopenharmony_ci#
20e1051a39Sopenharmony_ci# This is straightforward KECCAK_1X_ALT SIMD implementation, but with
21e1051a39Sopenharmony_ci# disjoint Rho and Pi. The module is ABI-bitness- and endian-neutral.
22e1051a39Sopenharmony_ci# POWER8 processor spends 9.8 cycles to process byte out of large
23e1051a39Sopenharmony_ci# buffer for r=1088, which matches SHA3-256. This is 17% better than
24e1051a39Sopenharmony_ci# scalar PPC64 code. It probably should be noted that if POWER8's
25e1051a39Sopenharmony_ci# successor can achieve higher scalar instruction issue rate, then
26e1051a39Sopenharmony_ci# this module will loose... And it does on POWER9 with 12.0 vs. 9.4.
27e1051a39Sopenharmony_ci
28e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension)
29e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file
30e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
31e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
32e1051a39Sopenharmony_ci
33e1051a39Sopenharmony_ciif ($flavour =~ /64/) {
34e1051a39Sopenharmony_ci	$SIZE_T	=8;
35e1051a39Sopenharmony_ci	$LRSAVE	=2*$SIZE_T;
36e1051a39Sopenharmony_ci	$UCMP	="cmpld";
37e1051a39Sopenharmony_ci	$STU	="stdu";
38e1051a39Sopenharmony_ci	$POP	="ld";
39e1051a39Sopenharmony_ci	$PUSH	="std";
40e1051a39Sopenharmony_ci} elsif ($flavour =~ /32/) {
41e1051a39Sopenharmony_ci	$SIZE_T	=4;
42e1051a39Sopenharmony_ci	$LRSAVE	=$SIZE_T;
43e1051a39Sopenharmony_ci	$STU	="stwu";
44e1051a39Sopenharmony_ci	$POP	="lwz";
45e1051a39Sopenharmony_ci	$PUSH	="stw";
46e1051a39Sopenharmony_ci	$UCMP	="cmplw";
47e1051a39Sopenharmony_ci} else { die "nonsense $flavour"; }
48e1051a39Sopenharmony_ci
49e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
50e1051a39Sopenharmony_ci( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
51e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
52e1051a39Sopenharmony_cidie "can't locate ppc-xlate.pl";
53e1051a39Sopenharmony_ci
54e1051a39Sopenharmony_ciopen STDOUT,"| $^X $xlate $flavour \"$output\""
55e1051a39Sopenharmony_ci    or die "can't call $xlate: $!";
56e1051a39Sopenharmony_ci
57e1051a39Sopenharmony_ci$FRAME=6*$SIZE_T+13*16;	# 13*16 is for v20-v31 offload
58e1051a39Sopenharmony_ci
59e1051a39Sopenharmony_cimy $sp ="r1";
60e1051a39Sopenharmony_ci
61e1051a39Sopenharmony_cimy $iotas = "r12";
62e1051a39Sopenharmony_ci
63e1051a39Sopenharmony_ci########################################################################
64e1051a39Sopenharmony_ci# Register layout:
65e1051a39Sopenharmony_ci#
66e1051a39Sopenharmony_ci# v0		A[0][0] A[1][0]
67e1051a39Sopenharmony_ci# v1		A[0][1] A[1][1]
68e1051a39Sopenharmony_ci# v2		A[0][2] A[1][2]
69e1051a39Sopenharmony_ci# v3		A[0][3] A[1][3]
70e1051a39Sopenharmony_ci# v4		A[0][4] A[1][4]
71e1051a39Sopenharmony_ci#
72e1051a39Sopenharmony_ci# v5		A[2][0] A[3][0]
73e1051a39Sopenharmony_ci# v6		A[2][1] A[3][1]
74e1051a39Sopenharmony_ci# v7		A[2][2] A[3][2]
75e1051a39Sopenharmony_ci# v8		A[2][3] A[3][3]
76e1051a39Sopenharmony_ci# v9		A[2][4] A[3][4]
77e1051a39Sopenharmony_ci#
78e1051a39Sopenharmony_ci# v10		A[4][0] A[4][1]
79e1051a39Sopenharmony_ci# v11		A[4][2] A[4][3]
80e1051a39Sopenharmony_ci# v12		A[4][4] A[4][4]
81e1051a39Sopenharmony_ci#
82e1051a39Sopenharmony_ci# v13..25	rhotates[][]
83e1051a39Sopenharmony_ci# v26..31	volatile
84e1051a39Sopenharmony_ci#
85e1051a39Sopenharmony_ci$code.=<<___;
86e1051a39Sopenharmony_ci.machine	"any"
87e1051a39Sopenharmony_ci.text
88e1051a39Sopenharmony_ci
89e1051a39Sopenharmony_ci.type	KeccakF1600_int,\@function
90e1051a39Sopenharmony_ci.align	5
91e1051a39Sopenharmony_ciKeccakF1600_int:
92e1051a39Sopenharmony_ci	li	r0,24
93e1051a39Sopenharmony_ci	mtctr	r0
94e1051a39Sopenharmony_ci	li	r0,0
95e1051a39Sopenharmony_ci	b	.Loop
96e1051a39Sopenharmony_ci
97e1051a39Sopenharmony_ci.align	4
98e1051a39Sopenharmony_ci.Loop:
99e1051a39Sopenharmony_ci	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Theta
100e1051a39Sopenharmony_ci	vxor	v26,v0, v5		; A[0..1][0]^A[2..3][0]
101e1051a39Sopenharmony_ci	vxor	v27,v1, v6		; A[0..1][1]^A[2..3][1]
102e1051a39Sopenharmony_ci	vxor	v28,v2, v7		; A[0..1][2]^A[2..3][2]
103e1051a39Sopenharmony_ci	vxor	v29,v3, v8		; A[0..1][3]^A[2..3][3]
104e1051a39Sopenharmony_ci	vxor	v30,v4, v9		; A[0..1][4]^A[2..3][4]
105e1051a39Sopenharmony_ci	vpermdi	v31,v26,v27,0b00	; A[0][0..1]^A[2][0..1]
106e1051a39Sopenharmony_ci	vpermdi	v26,v26,v27,0b11	; A[1][0..1]^A[3][0..1]
107e1051a39Sopenharmony_ci	vpermdi	v27,v28,v29,0b00	; A[0][2..3]^A[2][2..3]
108e1051a39Sopenharmony_ci	vpermdi	v28,v28,v29,0b11	; A[1][2..3]^A[3][2..3]
109e1051a39Sopenharmony_ci	vpermdi	v29,v30,v30,0b10	; A[1..0][4]^A[3..2][4]
110e1051a39Sopenharmony_ci	vxor	v26,v26,v31		; C[0..1]
111e1051a39Sopenharmony_ci	vxor	v27,v27,v28		; C[2..3]
112e1051a39Sopenharmony_ci	vxor	v28,v29,v30		; C[4..4]
113e1051a39Sopenharmony_ci	vspltisb v31,1
114e1051a39Sopenharmony_ci	vxor	v26,v26,v10		; C[0..1] ^= A[4][0..1]
115e1051a39Sopenharmony_ci	vxor	v27,v27,v11		; C[2..3] ^= A[4][2..3]
116e1051a39Sopenharmony_ci	vxor	v28,v28,v12		; C[4..4] ^= A[4][4..4], low!
117e1051a39Sopenharmony_ci
118e1051a39Sopenharmony_ci	vrld	v29,v26,v31		; ROL64(C[0..1],1)
119e1051a39Sopenharmony_ci	vrld	v30,v27,v31		; ROL64(C[2..3],1)
120e1051a39Sopenharmony_ci	vrld	v31,v28,v31		; ROL64(C[4..4],1)
121e1051a39Sopenharmony_ci	vpermdi	v31,v31,v29,0b10
122e1051a39Sopenharmony_ci	vxor	v26,v26,v30		; C[0..1] ^= ROL64(C[2..3],1)
123e1051a39Sopenharmony_ci	vxor	v27,v27,v31		; C[2..3] ^= ROL64(C[4..0],1)
124e1051a39Sopenharmony_ci	vxor	v28,v28,v29		; C[4..4] ^= ROL64(C[0..1],1), low!
125e1051a39Sopenharmony_ci
126e1051a39Sopenharmony_ci	vpermdi	v29,v26,v26,0b00	; C[0..0]
127e1051a39Sopenharmony_ci	vpermdi	v30,v28,v26,0b10	; C[4..0]
128e1051a39Sopenharmony_ci	vpermdi	v31,v28,v28,0b11	; C[4..4]
129e1051a39Sopenharmony_ci	vxor	v1, v1, v29		; A[0..1][1] ^= C[0..0]
130e1051a39Sopenharmony_ci	vxor	v6, v6, v29		; A[2..3][1] ^= C[0..0]
131e1051a39Sopenharmony_ci	vxor	v10,v10,v30		; A[4][0..1] ^= C[4..0]
132e1051a39Sopenharmony_ci	vxor	v0, v0, v31		; A[0..1][0] ^= C[4..4]
133e1051a39Sopenharmony_ci	vxor	v5, v5, v31		; A[2..3][0] ^= C[4..4]
134e1051a39Sopenharmony_ci
135e1051a39Sopenharmony_ci	vpermdi	v29,v27,v27,0b00	; C[2..2]
136e1051a39Sopenharmony_ci	vpermdi	v30,v26,v26,0b11	; C[1..1]
137e1051a39Sopenharmony_ci	vpermdi	v31,v26,v27,0b10	; C[1..2]
138e1051a39Sopenharmony_ci	vxor	v3, v3, v29		; A[0..1][3] ^= C[2..2]
139e1051a39Sopenharmony_ci	vxor	v8, v8, v29		; A[2..3][3] ^= C[2..2]
140e1051a39Sopenharmony_ci	vxor	v2, v2, v30		; A[0..1][2] ^= C[1..1]
141e1051a39Sopenharmony_ci	vxor	v7, v7, v30		; A[2..3][2] ^= C[1..1]
142e1051a39Sopenharmony_ci	vxor	v11,v11,v31		; A[4][2..3] ^= C[1..2]
143e1051a39Sopenharmony_ci
144e1051a39Sopenharmony_ci	vpermdi	v29,v27,v27,0b11	; C[3..3]
145e1051a39Sopenharmony_ci	vxor	v4, v4, v29		; A[0..1][4] ^= C[3..3]
146e1051a39Sopenharmony_ci	vxor	v9, v9, v29		; A[2..3][4] ^= C[3..3]
147e1051a39Sopenharmony_ci	vxor	v12,v12,v29		; A[4..4][4] ^= C[3..3]
148e1051a39Sopenharmony_ci
149e1051a39Sopenharmony_ci	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Rho
150e1051a39Sopenharmony_ci	vrld	v26,v0, v13		; v0
151e1051a39Sopenharmony_ci	vrld	v1, v1, v14
152e1051a39Sopenharmony_ci	vrld	v27,v2, v15		; v2
153e1051a39Sopenharmony_ci	vrld	v28,v3, v16		; v3
154e1051a39Sopenharmony_ci	vrld	v4, v4, v17
155e1051a39Sopenharmony_ci	vrld	v5, v5, v18
156e1051a39Sopenharmony_ci	vrld	v6, v6, v19
157e1051a39Sopenharmony_ci	vrld	v29,v7, v20		; v7
158e1051a39Sopenharmony_ci	vrld	v8, v8, v21
159e1051a39Sopenharmony_ci	vrld	v9, v9, v22
160e1051a39Sopenharmony_ci	vrld	v10,v10,v23
161e1051a39Sopenharmony_ci	vrld	v30,v11,v24		; v11
162e1051a39Sopenharmony_ci	vrld	v12,v12,v25
163e1051a39Sopenharmony_ci
164e1051a39Sopenharmony_ci	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Pi
165e1051a39Sopenharmony_ci	vpermdi	v0, v26,v28,0b00	; [0][0] [1][0] < [0][0] [0][3]
166e1051a39Sopenharmony_ci	vpermdi	v2, v29,v5, 0b00	; [0][2] [1][2] < [2][2] [2][0]
167e1051a39Sopenharmony_ci	vpermdi	v11,v9, v5, 0b01	; [4][2] [4][3] < [2][4] [3][0]
168e1051a39Sopenharmony_ci	vpermdi	v5, v1, v4, 0b00	; [2][0] [3][0] < [0][1] [0][4]
169e1051a39Sopenharmony_ci	vpermdi	v1, v1, v4, 0b11	; [0][1] [1][1] < [1][1] [1][4]
170e1051a39Sopenharmony_ci	vpermdi	v3, v8, v6, 0b11	; [0][3] [1][3] < [3][3] [3][1]
171e1051a39Sopenharmony_ci	vpermdi	v4, v12,v30,0b10	; [0][4] [1][4] < [4][4] [4][2]
172e1051a39Sopenharmony_ci	vpermdi	v7, v8, v6, 0b00	; [2][2] [3][2] < [2][3] [2][1]
173e1051a39Sopenharmony_ci	vpermdi	v6, v27,v26,0b11	; [2][1] [3][1] < [1][2] [1][0]
174e1051a39Sopenharmony_ci	vpermdi	v8, v9, v29,0b11	; [2][3] [3][3] < [3][4] [3][2]
175e1051a39Sopenharmony_ci	vpermdi	v12,v10,v10,0b11	; [4][4] [4][4] < [4][1] [4][1]
176e1051a39Sopenharmony_ci	vpermdi	v9, v10,v30,0b01	; [2][4] [3][4] < [4][0] [4][3]
177e1051a39Sopenharmony_ci	vpermdi	v10,v27,v28,0b01	; [4][0] [4][1] < [0][2] [1][3]
178e1051a39Sopenharmony_ci
179e1051a39Sopenharmony_ci	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Chi + Iota
180e1051a39Sopenharmony_ci	lvx_u	v31,$iotas,r0		; iotas[index]
181e1051a39Sopenharmony_ci	addic	r0,r0,16		; index++
182e1051a39Sopenharmony_ci
183e1051a39Sopenharmony_ci	vandc	v26,v2, v1		; (~A[0..1][1] & A[0..1][2])
184e1051a39Sopenharmony_ci	vandc	v27,v3, v2		; (~A[0..1][2] & A[0..1][3])
185e1051a39Sopenharmony_ci	vandc	v28,v4, v3		; (~A[0..1][3] & A[0..1][4])
186e1051a39Sopenharmony_ci	vandc	v29,v0, v4		; (~A[0..1][4] & A[0..1][0])
187e1051a39Sopenharmony_ci	vandc	v30,v1, v0		; (~A[0..1][0] & A[0..1][1])
188e1051a39Sopenharmony_ci	vxor	v0, v0, v26		; A[0..1][0] ^= (~A[0..1][1] & A[0..1][2])
189e1051a39Sopenharmony_ci	vxor	v1, v1, v27		; A[0..1][1] ^= (~A[0..1][2] & A[0..1][3])
190e1051a39Sopenharmony_ci	vxor	v2, v2, v28		; A[0..1][2] ^= (~A[0..1][3] & A[0..1][4])
191e1051a39Sopenharmony_ci	vxor	v3, v3, v29		; A[0..1][3] ^= (~A[0..1][4] & A[0..1][0])
192e1051a39Sopenharmony_ci	vxor	v4, v4, v30		; A[0..1][4] ^= (~A[0..1][0] & A[0..1][1])
193e1051a39Sopenharmony_ci
194e1051a39Sopenharmony_ci	vandc	v26,v7, v6		; (~A[2..3][1] & A[2..3][2])
195e1051a39Sopenharmony_ci	vandc	v27,v8, v7		; (~A[2..3][2] & A[2..3][3])
196e1051a39Sopenharmony_ci	vandc	v28,v9, v8		; (~A[2..3][3] & A[2..3][4])
197e1051a39Sopenharmony_ci	vandc	v29,v5, v9		; (~A[2..3][4] & A[2..3][0])
198e1051a39Sopenharmony_ci	vandc	v30,v6, v5		; (~A[2..3][0] & A[2..3][1])
199e1051a39Sopenharmony_ci	vxor	v5, v5, v26		; A[2..3][0] ^= (~A[2..3][1] & A[2..3][2])
200e1051a39Sopenharmony_ci	vxor	v6, v6, v27		; A[2..3][1] ^= (~A[2..3][2] & A[2..3][3])
201e1051a39Sopenharmony_ci	vxor	v7, v7, v28		; A[2..3][2] ^= (~A[2..3][3] & A[2..3][4])
202e1051a39Sopenharmony_ci	vxor	v8, v8, v29		; A[2..3][3] ^= (~A[2..3][4] & A[2..3][0])
203e1051a39Sopenharmony_ci	vxor	v9, v9, v30		; A[2..3][4] ^= (~A[2..3][0] & A[2..3][1])
204e1051a39Sopenharmony_ci
205e1051a39Sopenharmony_ci	vxor	v0, v0, v31		; A[0][0] ^= iotas[index++]
206e1051a39Sopenharmony_ci
207e1051a39Sopenharmony_ci	vpermdi	v26,v10,v11,0b10	; A[4][1..2]
208e1051a39Sopenharmony_ci	vpermdi	v27,v12,v10,0b00	; A[4][4..0]
209e1051a39Sopenharmony_ci	vpermdi	v28,v11,v12,0b10	; A[4][3..4]
210e1051a39Sopenharmony_ci	vpermdi	v29,v10,v10,0b10	; A[4][1..0]
211e1051a39Sopenharmony_ci	vandc	v26,v11,v26		; (~A[4][1..2] & A[4][2..3])
212e1051a39Sopenharmony_ci	vandc	v27,v27,v28		; (~A[4][3..4] & A[4][4..0])
213e1051a39Sopenharmony_ci	vandc	v28,v10,v29		; (~A[4][1..0] & A[4][0..1])
214e1051a39Sopenharmony_ci	vxor	v10,v10,v26		; A[4][0..1] ^= (~A[4][1..2] & A[4][2..3])
215e1051a39Sopenharmony_ci	vxor	v11,v11,v27		; A[4][2..3] ^= (~A[4][3..4] & A[4][4..0])
216e1051a39Sopenharmony_ci	vxor	v12,v12,v28		; A[4][4..4] ^= (~A[4][0..1] & A[4][1..0])
217e1051a39Sopenharmony_ci
218e1051a39Sopenharmony_ci	bdnz	.Loop
219e1051a39Sopenharmony_ci
220e1051a39Sopenharmony_ci	vpermdi	v12,v12,v12,0b11	; broadcast A[4][4]
221e1051a39Sopenharmony_ci	blr
222e1051a39Sopenharmony_ci	.long	0
223e1051a39Sopenharmony_ci	.byte	0,12,0x14,0,0,0,0,0
224e1051a39Sopenharmony_ci.size	KeccakF1600_int,.-KeccakF1600_int
225e1051a39Sopenharmony_ci
226e1051a39Sopenharmony_ci.type	KeccakF1600,\@function
227e1051a39Sopenharmony_ci.align	5
228e1051a39Sopenharmony_ciKeccakF1600:
229e1051a39Sopenharmony_ci	$STU	$sp,-$FRAME($sp)
230e1051a39Sopenharmony_ci	li	r10,`15+6*$SIZE_T`
231e1051a39Sopenharmony_ci	li	r11,`31+6*$SIZE_T`
232e1051a39Sopenharmony_ci	mflr	r8
233e1051a39Sopenharmony_ci	mfspr	r7, 256			; save vrsave
234e1051a39Sopenharmony_ci	stvx	v20,r10,$sp
235e1051a39Sopenharmony_ci	addi	r10,r10,32
236e1051a39Sopenharmony_ci	stvx	v21,r11,$sp
237e1051a39Sopenharmony_ci	addi	r11,r11,32
238e1051a39Sopenharmony_ci	stvx	v22,r10,$sp
239e1051a39Sopenharmony_ci	addi	r10,r10,32
240e1051a39Sopenharmony_ci	stvx	v23,r11,$sp
241e1051a39Sopenharmony_ci	addi	r11,r11,32
242e1051a39Sopenharmony_ci	stvx	v24,r10,$sp
243e1051a39Sopenharmony_ci	addi	r10,r10,32
244e1051a39Sopenharmony_ci	stvx	v25,r11,$sp
245e1051a39Sopenharmony_ci	addi	r11,r11,32
246e1051a39Sopenharmony_ci	stvx	v26,r10,$sp
247e1051a39Sopenharmony_ci	addi	r10,r10,32
248e1051a39Sopenharmony_ci	stvx	v27,r11,$sp
249e1051a39Sopenharmony_ci	addi	r11,r11,32
250e1051a39Sopenharmony_ci	stvx	v28,r10,$sp
251e1051a39Sopenharmony_ci	addi	r10,r10,32
252e1051a39Sopenharmony_ci	stvx	v29,r11,$sp
253e1051a39Sopenharmony_ci	addi	r11,r11,32
254e1051a39Sopenharmony_ci	stvx	v30,r10,$sp
255e1051a39Sopenharmony_ci	stvx	v31,r11,$sp
256e1051a39Sopenharmony_ci	stw	r7,`$FRAME-4`($sp)	; save vrsave
257e1051a39Sopenharmony_ci	li	r0, -1
258e1051a39Sopenharmony_ci	$PUSH	r8,`$FRAME+$LRSAVE`($sp)
259e1051a39Sopenharmony_ci	mtspr	256, r0			; preserve all AltiVec registers
260e1051a39Sopenharmony_ci
261e1051a39Sopenharmony_ci	li	r11,16
262e1051a39Sopenharmony_ci	lvx_4w	v0,0,r3			; load A[5][5]
263e1051a39Sopenharmony_ci	li	r10,32
264e1051a39Sopenharmony_ci	lvx_4w	v1,r11,r3
265e1051a39Sopenharmony_ci	addi	r11,r11,32
266e1051a39Sopenharmony_ci	lvx_4w	v2,r10,r3
267e1051a39Sopenharmony_ci	addi	r10,r10,32
268e1051a39Sopenharmony_ci	lvx_4w	v3,r11,r3
269e1051a39Sopenharmony_ci	addi	r11,r11,32
270e1051a39Sopenharmony_ci	lvx_4w	v4,r10,r3
271e1051a39Sopenharmony_ci	addi	r10,r10,32
272e1051a39Sopenharmony_ci	lvx_4w	v5,r11,r3
273e1051a39Sopenharmony_ci	addi	r11,r11,32
274e1051a39Sopenharmony_ci	lvx_4w	v6,r10,r3
275e1051a39Sopenharmony_ci	addi	r10,r10,32
276e1051a39Sopenharmony_ci	lvx_4w	v7,r11,r3
277e1051a39Sopenharmony_ci	addi	r11,r11,32
278e1051a39Sopenharmony_ci	lvx_4w	v8,r10,r3
279e1051a39Sopenharmony_ci	addi	r10,r10,32
280e1051a39Sopenharmony_ci	lvx_4w	v9,r11,r3
281e1051a39Sopenharmony_ci	addi	r11,r11,32
282e1051a39Sopenharmony_ci	lvx_4w	v10,r10,r3
283e1051a39Sopenharmony_ci	addi	r10,r10,32
284e1051a39Sopenharmony_ci	lvx_4w	v11,r11,r3
285e1051a39Sopenharmony_ci	lvx_splt v12,r10,r3
286e1051a39Sopenharmony_ci
287e1051a39Sopenharmony_ci	bl	PICmeup
288e1051a39Sopenharmony_ci
289e1051a39Sopenharmony_ci	li	r11,16
290e1051a39Sopenharmony_ci	lvx_u	v13,0,r12		; load rhotates
291e1051a39Sopenharmony_ci	li	r10,32
292e1051a39Sopenharmony_ci	lvx_u	v14,r11,r12
293e1051a39Sopenharmony_ci	addi	r11,r11,32
294e1051a39Sopenharmony_ci	lvx_u	v15,r10,r12
295e1051a39Sopenharmony_ci	addi	r10,r10,32
296e1051a39Sopenharmony_ci	lvx_u	v16,r11,r12
297e1051a39Sopenharmony_ci	addi	r11,r11,32
298e1051a39Sopenharmony_ci	lvx_u	v17,r10,r12
299e1051a39Sopenharmony_ci	addi	r10,r10,32
300e1051a39Sopenharmony_ci	lvx_u	v18,r11,r12
301e1051a39Sopenharmony_ci	addi	r11,r11,32
302e1051a39Sopenharmony_ci	lvx_u	v19,r10,r12
303e1051a39Sopenharmony_ci	addi	r10,r10,32
304e1051a39Sopenharmony_ci	lvx_u	v20,r11,r12
305e1051a39Sopenharmony_ci	addi	r11,r11,32
306e1051a39Sopenharmony_ci	lvx_u	v21,r10,r12
307e1051a39Sopenharmony_ci	addi	r10,r10,32
308e1051a39Sopenharmony_ci	lvx_u	v22,r11,r12
309e1051a39Sopenharmony_ci	addi	r11,r11,32
310e1051a39Sopenharmony_ci	lvx_u	v23,r10,r12
311e1051a39Sopenharmony_ci	addi	r10,r10,32
312e1051a39Sopenharmony_ci	lvx_u	v24,r11,r12
313e1051a39Sopenharmony_ci	lvx_u	v25,r10,r12
314e1051a39Sopenharmony_ci	addi	r12,r12,`16*16`		; points at iotas
315e1051a39Sopenharmony_ci
316e1051a39Sopenharmony_ci	bl	KeccakF1600_int
317e1051a39Sopenharmony_ci
318e1051a39Sopenharmony_ci	li	r11,16
319e1051a39Sopenharmony_ci	stvx_4w	v0,0,r3			; return A[5][5]
320e1051a39Sopenharmony_ci	li	r10,32
321e1051a39Sopenharmony_ci	stvx_4w	v1,r11,r3
322e1051a39Sopenharmony_ci	addi	r11,r11,32
323e1051a39Sopenharmony_ci	stvx_4w	v2,r10,r3
324e1051a39Sopenharmony_ci	addi	r10,r10,32
325e1051a39Sopenharmony_ci	stvx_4w	v3,r11,r3
326e1051a39Sopenharmony_ci	addi	r11,r11,32
327e1051a39Sopenharmony_ci	stvx_4w	v4,r10,r3
328e1051a39Sopenharmony_ci	addi	r10,r10,32
329e1051a39Sopenharmony_ci	stvx_4w	v5,r11,r3
330e1051a39Sopenharmony_ci	addi	r11,r11,32
331e1051a39Sopenharmony_ci	stvx_4w	v6,r10,r3
332e1051a39Sopenharmony_ci	addi	r10,r10,32
333e1051a39Sopenharmony_ci	stvx_4w	v7,r11,r3
334e1051a39Sopenharmony_ci	addi	r11,r11,32
335e1051a39Sopenharmony_ci	stvx_4w	v8,r10,r3
336e1051a39Sopenharmony_ci	addi	r10,r10,32
337e1051a39Sopenharmony_ci	stvx_4w	v9,r11,r3
338e1051a39Sopenharmony_ci	addi	r11,r11,32
339e1051a39Sopenharmony_ci	stvx_4w	v10,r10,r3
340e1051a39Sopenharmony_ci	addi	r10,r10,32
341e1051a39Sopenharmony_ci	stvx_4w	v11,r11,r3
342e1051a39Sopenharmony_ci	stvdx_u v12,r10,r3
343e1051a39Sopenharmony_ci
344e1051a39Sopenharmony_ci	li	r10,`15+6*$SIZE_T`
345e1051a39Sopenharmony_ci	li	r11,`31+6*$SIZE_T`
346e1051a39Sopenharmony_ci	mtlr	r8
347e1051a39Sopenharmony_ci	mtspr	256, r7			; restore vrsave
348e1051a39Sopenharmony_ci	lvx	v20,r10,$sp
349e1051a39Sopenharmony_ci	addi	r10,r10,32
350e1051a39Sopenharmony_ci	lvx	v21,r11,$sp
351e1051a39Sopenharmony_ci	addi	r11,r11,32
352e1051a39Sopenharmony_ci	lvx	v22,r10,$sp
353e1051a39Sopenharmony_ci	addi	r10,r10,32
354e1051a39Sopenharmony_ci	lvx	v23,r11,$sp
355e1051a39Sopenharmony_ci	addi	r11,r11,32
356e1051a39Sopenharmony_ci	lvx	v24,r10,$sp
357e1051a39Sopenharmony_ci	addi	r10,r10,32
358e1051a39Sopenharmony_ci	lvx	v25,r11,$sp
359e1051a39Sopenharmony_ci	addi	r11,r11,32
360e1051a39Sopenharmony_ci	lvx	v26,r10,$sp
361e1051a39Sopenharmony_ci	addi	r10,r10,32
362e1051a39Sopenharmony_ci	lvx	v27,r11,$sp
363e1051a39Sopenharmony_ci	addi	r11,r11,32
364e1051a39Sopenharmony_ci	lvx	v28,r10,$sp
365e1051a39Sopenharmony_ci	addi	r10,r10,32
366e1051a39Sopenharmony_ci	lvx	v29,r11,$sp
367e1051a39Sopenharmony_ci	addi	r11,r11,32
368e1051a39Sopenharmony_ci	lvx	v30,r10,$sp
369e1051a39Sopenharmony_ci	lvx	v31,r11,$sp
370e1051a39Sopenharmony_ci	addi	$sp,$sp,$FRAME
371e1051a39Sopenharmony_ci	blr
372e1051a39Sopenharmony_ci	.long	0
373e1051a39Sopenharmony_ci	.byte	0,12,0x04,1,0x80,0,1,0
374e1051a39Sopenharmony_ci	.long	0
375e1051a39Sopenharmony_ci.size	KeccakF1600,.-KeccakF1600
376e1051a39Sopenharmony_ci___
377e1051a39Sopenharmony_ci{
378e1051a39Sopenharmony_cimy ($A_jagged,$inp,$len,$bsz) = map("r$_",(3..6));
379e1051a39Sopenharmony_ci
380e1051a39Sopenharmony_ci$code.=<<___;
381e1051a39Sopenharmony_ci.globl	SHA3_absorb
382e1051a39Sopenharmony_ci.type	SHA3_absorb,\@function
383e1051a39Sopenharmony_ci.align	5
384e1051a39Sopenharmony_ciSHA3_absorb:
385e1051a39Sopenharmony_ci	$STU	$sp,-$FRAME($sp)
386e1051a39Sopenharmony_ci	li	r10,`15+6*$SIZE_T`
387e1051a39Sopenharmony_ci	li	r11,`31+6*$SIZE_T`
388e1051a39Sopenharmony_ci	mflr	r8
389e1051a39Sopenharmony_ci	mfspr	r7, 256			; save vrsave
390e1051a39Sopenharmony_ci	stvx	v20,r10,$sp
391e1051a39Sopenharmony_ci	addi	r10,r10,32
392e1051a39Sopenharmony_ci	stvx	v21,r11,$sp
393e1051a39Sopenharmony_ci	addi	r11,r11,32
394e1051a39Sopenharmony_ci	stvx	v22,r10,$sp
395e1051a39Sopenharmony_ci	addi	r10,r10,32
396e1051a39Sopenharmony_ci	stvx	v23,r11,$sp
397e1051a39Sopenharmony_ci	addi	r11,r11,32
398e1051a39Sopenharmony_ci	stvx	v24,r10,$sp
399e1051a39Sopenharmony_ci	addi	r10,r10,32
400e1051a39Sopenharmony_ci	stvx	v25,r11,$sp
401e1051a39Sopenharmony_ci	addi	r11,r11,32
402e1051a39Sopenharmony_ci	stvx	v26,r10,$sp
403e1051a39Sopenharmony_ci	addi	r10,r10,32
404e1051a39Sopenharmony_ci	stvx	v27,r11,$sp
405e1051a39Sopenharmony_ci	addi	r11,r11,32
406e1051a39Sopenharmony_ci	stvx	v28,r10,$sp
407e1051a39Sopenharmony_ci	addi	r10,r10,32
408e1051a39Sopenharmony_ci	stvx	v29,r11,$sp
409e1051a39Sopenharmony_ci	addi	r11,r11,32
410e1051a39Sopenharmony_ci	stvx	v30,r10,$sp
411e1051a39Sopenharmony_ci	stvx	v31,r11,$sp
412e1051a39Sopenharmony_ci	stw	r7,`$FRAME-4`($sp)	; save vrsave
413e1051a39Sopenharmony_ci	li	r0, -1
414e1051a39Sopenharmony_ci	$PUSH	r8,`$FRAME+$LRSAVE`($sp)
415e1051a39Sopenharmony_ci	mtspr	256, r0			; preserve all AltiVec registers
416e1051a39Sopenharmony_ci
417e1051a39Sopenharmony_ci	li	r11,16
418e1051a39Sopenharmony_ci	lvx_4w	v0,0,$A_jagged		; load A[5][5]
419e1051a39Sopenharmony_ci	li	r10,32
420e1051a39Sopenharmony_ci	lvx_4w	v1,r11,$A_jagged
421e1051a39Sopenharmony_ci	addi	r11,r11,32
422e1051a39Sopenharmony_ci	lvx_4w	v2,r10,$A_jagged
423e1051a39Sopenharmony_ci	addi	r10,r10,32
424e1051a39Sopenharmony_ci	lvx_4w	v3,r11,$A_jagged
425e1051a39Sopenharmony_ci	addi	r11,r11,32
426e1051a39Sopenharmony_ci	lvx_4w	v4,r10,$A_jagged
427e1051a39Sopenharmony_ci	addi	r10,r10,32
428e1051a39Sopenharmony_ci	lvx_4w	v5,r11,$A_jagged
429e1051a39Sopenharmony_ci	addi	r11,r11,32
430e1051a39Sopenharmony_ci	lvx_4w	v6,r10,$A_jagged
431e1051a39Sopenharmony_ci	addi	r10,r10,32
432e1051a39Sopenharmony_ci	lvx_4w	v7,r11,$A_jagged
433e1051a39Sopenharmony_ci	addi	r11,r11,32
434e1051a39Sopenharmony_ci	lvx_4w	v8,r10,$A_jagged
435e1051a39Sopenharmony_ci	addi	r10,r10,32
436e1051a39Sopenharmony_ci	lvx_4w	v9,r11,$A_jagged
437e1051a39Sopenharmony_ci	addi	r11,r11,32
438e1051a39Sopenharmony_ci	lvx_4w	v10,r10,$A_jagged
439e1051a39Sopenharmony_ci	addi	r10,r10,32
440e1051a39Sopenharmony_ci	lvx_4w	v11,r11,$A_jagged
441e1051a39Sopenharmony_ci	lvx_splt v12,r10,$A_jagged
442e1051a39Sopenharmony_ci
443e1051a39Sopenharmony_ci	bl	PICmeup
444e1051a39Sopenharmony_ci
445e1051a39Sopenharmony_ci	li	r11,16
446e1051a39Sopenharmony_ci	lvx_u	v13,0,r12		; load rhotates
447e1051a39Sopenharmony_ci	li	r10,32
448e1051a39Sopenharmony_ci	lvx_u	v14,r11,r12
449e1051a39Sopenharmony_ci	addi	r11,r11,32
450e1051a39Sopenharmony_ci	lvx_u	v15,r10,r12
451e1051a39Sopenharmony_ci	addi	r10,r10,32
452e1051a39Sopenharmony_ci	lvx_u	v16,r11,r12
453e1051a39Sopenharmony_ci	addi	r11,r11,32
454e1051a39Sopenharmony_ci	lvx_u	v17,r10,r12
455e1051a39Sopenharmony_ci	addi	r10,r10,32
456e1051a39Sopenharmony_ci	lvx_u	v18,r11,r12
457e1051a39Sopenharmony_ci	addi	r11,r11,32
458e1051a39Sopenharmony_ci	lvx_u	v19,r10,r12
459e1051a39Sopenharmony_ci	addi	r10,r10,32
460e1051a39Sopenharmony_ci	lvx_u	v20,r11,r12
461e1051a39Sopenharmony_ci	addi	r11,r11,32
462e1051a39Sopenharmony_ci	lvx_u	v21,r10,r12
463e1051a39Sopenharmony_ci	addi	r10,r10,32
464e1051a39Sopenharmony_ci	lvx_u	v22,r11,r12
465e1051a39Sopenharmony_ci	addi	r11,r11,32
466e1051a39Sopenharmony_ci	lvx_u	v23,r10,r12
467e1051a39Sopenharmony_ci	addi	r10,r10,32
468e1051a39Sopenharmony_ci	lvx_u	v24,r11,r12
469e1051a39Sopenharmony_ci	lvx_u	v25,r10,r12
470e1051a39Sopenharmony_ci	li	r10,-32
471e1051a39Sopenharmony_ci	li	r11,-16
472e1051a39Sopenharmony_ci	addi	r12,r12,`16*16`		; points at iotas
473e1051a39Sopenharmony_ci	b	.Loop_absorb
474e1051a39Sopenharmony_ci
475e1051a39Sopenharmony_ci.align	4
476e1051a39Sopenharmony_ci.Loop_absorb:
477e1051a39Sopenharmony_ci	$UCMP	$len,$bsz		; len < bsz?
478e1051a39Sopenharmony_ci	blt	.Labsorbed
479e1051a39Sopenharmony_ci
480e1051a39Sopenharmony_ci	sub	$len,$len,$bsz		; len -= bsz
481e1051a39Sopenharmony_ci	srwi	r0,$bsz,3
482e1051a39Sopenharmony_ci	mtctr	r0
483e1051a39Sopenharmony_ci
484e1051a39Sopenharmony_ci	lvx_u	v30,r10,r12		; permutation masks
485e1051a39Sopenharmony_ci	lvx_u	v31,r11,r12
486e1051a39Sopenharmony_ci	?vspltisb v27,7			; prepare masks for byte swap
487e1051a39Sopenharmony_ci	?vxor	v30,v30,v27		; on big-endian
488e1051a39Sopenharmony_ci	?vxor	v31,v31,v27
489e1051a39Sopenharmony_ci
490e1051a39Sopenharmony_ci	vxor	v27,v27,v27		; zero
491e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
492e1051a39Sopenharmony_ci	addi	$inp,$inp,8
493e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v30
494e1051a39Sopenharmony_ci	vxor	v0, v0, v26
495e1051a39Sopenharmony_ci	bdz	.Lprocess_block
496e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
497e1051a39Sopenharmony_ci	addi	$inp,$inp,8
498e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v30
499e1051a39Sopenharmony_ci	vxor	v1, v1, v26
500e1051a39Sopenharmony_ci	bdz	.Lprocess_block
501e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
502e1051a39Sopenharmony_ci	addi	$inp,$inp,8
503e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v30
504e1051a39Sopenharmony_ci	vxor	v2, v2, v26
505e1051a39Sopenharmony_ci	bdz	.Lprocess_block
506e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
507e1051a39Sopenharmony_ci	addi	$inp,$inp,8
508e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v30
509e1051a39Sopenharmony_ci	vxor	v3, v3, v26
510e1051a39Sopenharmony_ci	bdz	.Lprocess_block
511e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
512e1051a39Sopenharmony_ci	addi	$inp,$inp,8
513e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v30
514e1051a39Sopenharmony_ci	vxor	v4, v4, v26
515e1051a39Sopenharmony_ci	bdz	.Lprocess_block
516e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
517e1051a39Sopenharmony_ci	addi	$inp,$inp,8
518e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v31
519e1051a39Sopenharmony_ci	vxor	v0, v0, v26
520e1051a39Sopenharmony_ci	bdz	.Lprocess_block
521e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
522e1051a39Sopenharmony_ci	addi	$inp,$inp,8
523e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v31
524e1051a39Sopenharmony_ci	vxor	v1, v1, v26
525e1051a39Sopenharmony_ci	bdz	.Lprocess_block
526e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
527e1051a39Sopenharmony_ci	addi	$inp,$inp,8
528e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v31
529e1051a39Sopenharmony_ci	vxor	v2, v2, v26
530e1051a39Sopenharmony_ci	bdz	.Lprocess_block
531e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
532e1051a39Sopenharmony_ci	addi	$inp,$inp,8
533e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v31
534e1051a39Sopenharmony_ci	vxor	v3, v3, v26
535e1051a39Sopenharmony_ci	bdz	.Lprocess_block
536e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
537e1051a39Sopenharmony_ci	addi	$inp,$inp,8
538e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v31
539e1051a39Sopenharmony_ci	vxor	v4, v4, v26
540e1051a39Sopenharmony_ci	bdz	.Lprocess_block
541e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
542e1051a39Sopenharmony_ci	addi	$inp,$inp,8
543e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v30
544e1051a39Sopenharmony_ci	vxor	v5, v5, v26
545e1051a39Sopenharmony_ci	bdz	.Lprocess_block
546e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
547e1051a39Sopenharmony_ci	addi	$inp,$inp,8
548e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v30
549e1051a39Sopenharmony_ci	vxor	v6, v6, v26
550e1051a39Sopenharmony_ci	bdz	.Lprocess_block
551e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
552e1051a39Sopenharmony_ci	addi	$inp,$inp,8
553e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v30
554e1051a39Sopenharmony_ci	vxor	v7, v7, v26
555e1051a39Sopenharmony_ci	bdz	.Lprocess_block
556e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
557e1051a39Sopenharmony_ci	addi	$inp,$inp,8
558e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v30
559e1051a39Sopenharmony_ci	vxor	v8, v8, v26
560e1051a39Sopenharmony_ci	bdz	.Lprocess_block
561e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
562e1051a39Sopenharmony_ci	addi	$inp,$inp,8
563e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v30
564e1051a39Sopenharmony_ci	vxor	v9, v9, v26
565e1051a39Sopenharmony_ci	bdz	.Lprocess_block
566e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
567e1051a39Sopenharmony_ci	addi	$inp,$inp,8
568e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v31
569e1051a39Sopenharmony_ci	vxor	v5, v5, v26
570e1051a39Sopenharmony_ci	bdz	.Lprocess_block
571e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
572e1051a39Sopenharmony_ci	addi	$inp,$inp,8
573e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v31
574e1051a39Sopenharmony_ci	vxor	v6, v6, v26
575e1051a39Sopenharmony_ci	bdz	.Lprocess_block
576e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
577e1051a39Sopenharmony_ci	addi	$inp,$inp,8
578e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v31
579e1051a39Sopenharmony_ci	vxor	v7, v7, v26
580e1051a39Sopenharmony_ci	bdz	.Lprocess_block
581e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
582e1051a39Sopenharmony_ci	addi	$inp,$inp,8
583e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v31
584e1051a39Sopenharmony_ci	vxor	v8, v8, v26
585e1051a39Sopenharmony_ci	bdz	.Lprocess_block
586e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
587e1051a39Sopenharmony_ci	addi	$inp,$inp,8
588e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v31
589e1051a39Sopenharmony_ci	vxor	v9, v9, v26
590e1051a39Sopenharmony_ci	bdz	.Lprocess_block
591e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
592e1051a39Sopenharmony_ci	addi	$inp,$inp,8
593e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v30
594e1051a39Sopenharmony_ci	vxor	v10, v10, v26
595e1051a39Sopenharmony_ci	bdz	.Lprocess_block
596e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
597e1051a39Sopenharmony_ci	addi	$inp,$inp,8
598e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v31
599e1051a39Sopenharmony_ci	vxor	v10, v10, v26
600e1051a39Sopenharmony_ci	bdz	.Lprocess_block
601e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
602e1051a39Sopenharmony_ci	addi	$inp,$inp,8
603e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v30
604e1051a39Sopenharmony_ci	vxor	v11, v11, v26
605e1051a39Sopenharmony_ci	bdz	.Lprocess_block
606e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
607e1051a39Sopenharmony_ci	addi	$inp,$inp,8
608e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v31
609e1051a39Sopenharmony_ci	vxor	v11, v11, v26
610e1051a39Sopenharmony_ci	bdz	.Lprocess_block
611e1051a39Sopenharmony_ci	lvdx_u	v26,0,$inp
612e1051a39Sopenharmony_ci	addi	$inp,$inp,8
613e1051a39Sopenharmony_ci	vperm	v26,v26,v27,v31
614e1051a39Sopenharmony_ci	vxor	v12, v12, v26
615e1051a39Sopenharmony_ci
616e1051a39Sopenharmony_ci.Lprocess_block:
617e1051a39Sopenharmony_ci	bl	KeccakF1600_int
618e1051a39Sopenharmony_ci
619e1051a39Sopenharmony_ci	b	.Loop_absorb
620e1051a39Sopenharmony_ci
621e1051a39Sopenharmony_ci.align	4
622e1051a39Sopenharmony_ci.Labsorbed:
623e1051a39Sopenharmony_ci	li	r11,16
624e1051a39Sopenharmony_ci	stvx_4w	v0,0,$A_jagged		; return A[5][5]
625e1051a39Sopenharmony_ci	li	r10,32
626e1051a39Sopenharmony_ci	stvx_4w	v1,r11,$A_jagged
627e1051a39Sopenharmony_ci	addi	r11,r11,32
628e1051a39Sopenharmony_ci	stvx_4w	v2,r10,$A_jagged
629e1051a39Sopenharmony_ci	addi	r10,r10,32
630e1051a39Sopenharmony_ci	stvx_4w	v3,r11,$A_jagged
631e1051a39Sopenharmony_ci	addi	r11,r11,32
632e1051a39Sopenharmony_ci	stvx_4w	v4,r10,$A_jagged
633e1051a39Sopenharmony_ci	addi	r10,r10,32
634e1051a39Sopenharmony_ci	stvx_4w	v5,r11,$A_jagged
635e1051a39Sopenharmony_ci	addi	r11,r11,32
636e1051a39Sopenharmony_ci	stvx_4w	v6,r10,$A_jagged
637e1051a39Sopenharmony_ci	addi	r10,r10,32
638e1051a39Sopenharmony_ci	stvx_4w	v7,r11,$A_jagged
639e1051a39Sopenharmony_ci	addi	r11,r11,32
640e1051a39Sopenharmony_ci	stvx_4w	v8,r10,$A_jagged
641e1051a39Sopenharmony_ci	addi	r10,r10,32
642e1051a39Sopenharmony_ci	stvx_4w	v9,r11,$A_jagged
643e1051a39Sopenharmony_ci	addi	r11,r11,32
644e1051a39Sopenharmony_ci	stvx_4w	v10,r10,$A_jagged
645e1051a39Sopenharmony_ci	addi	r10,r10,32
646e1051a39Sopenharmony_ci	stvx_4w	v11,r11,$A_jagged
647e1051a39Sopenharmony_ci	stvdx_u v12,r10,$A_jagged
648e1051a39Sopenharmony_ci
649e1051a39Sopenharmony_ci	mr	r3,$len			; return value
650e1051a39Sopenharmony_ci	li	r10,`15+6*$SIZE_T`
651e1051a39Sopenharmony_ci	li	r11,`31+6*$SIZE_T`
652e1051a39Sopenharmony_ci	mtlr	r8
653e1051a39Sopenharmony_ci	mtspr	256, r7			; restore vrsave
654e1051a39Sopenharmony_ci	lvx	v20,r10,$sp
655e1051a39Sopenharmony_ci	addi	r10,r10,32
656e1051a39Sopenharmony_ci	lvx	v21,r11,$sp
657e1051a39Sopenharmony_ci	addi	r11,r11,32
658e1051a39Sopenharmony_ci	lvx	v22,r10,$sp
659e1051a39Sopenharmony_ci	addi	r10,r10,32
660e1051a39Sopenharmony_ci	lvx	v23,r11,$sp
661e1051a39Sopenharmony_ci	addi	r11,r11,32
662e1051a39Sopenharmony_ci	lvx	v24,r10,$sp
663e1051a39Sopenharmony_ci	addi	r10,r10,32
664e1051a39Sopenharmony_ci	lvx	v25,r11,$sp
665e1051a39Sopenharmony_ci	addi	r11,r11,32
666e1051a39Sopenharmony_ci	lvx	v26,r10,$sp
667e1051a39Sopenharmony_ci	addi	r10,r10,32
668e1051a39Sopenharmony_ci	lvx	v27,r11,$sp
669e1051a39Sopenharmony_ci	addi	r11,r11,32
670e1051a39Sopenharmony_ci	lvx	v28,r10,$sp
671e1051a39Sopenharmony_ci	addi	r10,r10,32
672e1051a39Sopenharmony_ci	lvx	v29,r11,$sp
673e1051a39Sopenharmony_ci	addi	r11,r11,32
674e1051a39Sopenharmony_ci	lvx	v30,r10,$sp
675e1051a39Sopenharmony_ci	lvx	v31,r11,$sp
676e1051a39Sopenharmony_ci	addi	$sp,$sp,$FRAME
677e1051a39Sopenharmony_ci	blr
678e1051a39Sopenharmony_ci	.long	0
679e1051a39Sopenharmony_ci	.byte	0,12,0x04,1,0x80,0,4,0
680e1051a39Sopenharmony_ci	.long	0
681e1051a39Sopenharmony_ci.size	SHA3_absorb,.-SHA3_absorb
682e1051a39Sopenharmony_ci___
683e1051a39Sopenharmony_ci}
684e1051a39Sopenharmony_ci{
685e1051a39Sopenharmony_cimy ($A_jagged,$out,$len,$bsz) = map("r$_",(3..6));
686e1051a39Sopenharmony_ci
687e1051a39Sopenharmony_ci$code.=<<___;
688e1051a39Sopenharmony_ci.globl	SHA3_squeeze
689e1051a39Sopenharmony_ci.type	SHA3_squeeze,\@function
690e1051a39Sopenharmony_ci.align	5
691e1051a39Sopenharmony_ciSHA3_squeeze:
692e1051a39Sopenharmony_ci	mflr	r9			; r9 is not touched by KeccakF1600
693e1051a39Sopenharmony_ci	subi	$out,$out,1		; prepare for stbu
694e1051a39Sopenharmony_ci	addi	r8,$A_jagged,4		; prepare volatiles
695e1051a39Sopenharmony_ci	mr	r10,$bsz
696e1051a39Sopenharmony_ci	li	r11,0
697e1051a39Sopenharmony_ci	b	.Loop_squeeze
698e1051a39Sopenharmony_ci.align	4
699e1051a39Sopenharmony_ci.Loop_squeeze:
700e1051a39Sopenharmony_ci	lwzx	r7,r11,r8		; lo
701e1051a39Sopenharmony_ci	lwzx	r0,r11,$A_jagged	; hi
702e1051a39Sopenharmony_ci	${UCMP}i $len,8
703e1051a39Sopenharmony_ci	blt	.Lsqueeze_tail
704e1051a39Sopenharmony_ci
705e1051a39Sopenharmony_ci	stbu	r7,1($out)		; write lo
706e1051a39Sopenharmony_ci	srwi	r7,r7,8
707e1051a39Sopenharmony_ci	stbu	r7,1($out)
708e1051a39Sopenharmony_ci	srwi	r7,r7,8
709e1051a39Sopenharmony_ci	stbu	r7,1($out)
710e1051a39Sopenharmony_ci	srwi	r7,r7,8
711e1051a39Sopenharmony_ci	stbu	r7,1($out)
712e1051a39Sopenharmony_ci	stbu	r0,1($out)		; write hi
713e1051a39Sopenharmony_ci	srwi	r0,r0,8
714e1051a39Sopenharmony_ci	stbu	r0,1($out)
715e1051a39Sopenharmony_ci	srwi	r0,r0,8
716e1051a39Sopenharmony_ci	stbu	r0,1($out)
717e1051a39Sopenharmony_ci	srwi	r0,r0,8
718e1051a39Sopenharmony_ci	stbu	r0,1($out)
719e1051a39Sopenharmony_ci
720e1051a39Sopenharmony_ci	subic.	$len,$len,8
721e1051a39Sopenharmony_ci	beqlr				; return if done
722e1051a39Sopenharmony_ci
723e1051a39Sopenharmony_ci	subic.	r10,r10,8
724e1051a39Sopenharmony_ci	ble	.Loutput_expand
725e1051a39Sopenharmony_ci
726e1051a39Sopenharmony_ci	addi	r11,r11,16		; calculate jagged index
727e1051a39Sopenharmony_ci	cmplwi	r11,`16*5`
728e1051a39Sopenharmony_ci	blt	.Loop_squeeze
729e1051a39Sopenharmony_ci	subi	r11,r11,72
730e1051a39Sopenharmony_ci	beq	.Loop_squeeze
731e1051a39Sopenharmony_ci	addi	r11,r11,72
732e1051a39Sopenharmony_ci	cmplwi	r11,`16*5+8`
733e1051a39Sopenharmony_ci	subi	r11,r11,8
734e1051a39Sopenharmony_ci	beq	.Loop_squeeze
735e1051a39Sopenharmony_ci	addi	r11,r11,8
736e1051a39Sopenharmony_ci	cmplwi	r11,`16*10`
737e1051a39Sopenharmony_ci	subi	r11,r11,72
738e1051a39Sopenharmony_ci	beq	.Loop_squeeze
739e1051a39Sopenharmony_ci	addi	r11,r11,72
740e1051a39Sopenharmony_ci	blt	.Loop_squeeze
741e1051a39Sopenharmony_ci	subi	r11,r11,8
742e1051a39Sopenharmony_ci	b	.Loop_squeeze
743e1051a39Sopenharmony_ci
744e1051a39Sopenharmony_ci.align	4
745e1051a39Sopenharmony_ci.Loutput_expand:
746e1051a39Sopenharmony_ci	bl	KeccakF1600
747e1051a39Sopenharmony_ci	mtlr	r9
748e1051a39Sopenharmony_ci
749e1051a39Sopenharmony_ci	addi	r8,$A_jagged,4		; restore volatiles
750e1051a39Sopenharmony_ci	mr	r10,$bsz
751e1051a39Sopenharmony_ci	li	r11,0
752e1051a39Sopenharmony_ci	b	.Loop_squeeze
753e1051a39Sopenharmony_ci
754e1051a39Sopenharmony_ci.align	4
755e1051a39Sopenharmony_ci.Lsqueeze_tail:
756e1051a39Sopenharmony_ci	mtctr	$len
757e1051a39Sopenharmony_ci	subic.	$len,$len,4
758e1051a39Sopenharmony_ci	ble	.Loop_tail_lo
759e1051a39Sopenharmony_ci	li	r8,4
760e1051a39Sopenharmony_ci	mtctr	r8
761e1051a39Sopenharmony_ci.Loop_tail_lo:
762e1051a39Sopenharmony_ci	stbu	r7,1($out)
763e1051a39Sopenharmony_ci	srdi	r7,r7,8
764e1051a39Sopenharmony_ci	bdnz	.Loop_tail_lo
765e1051a39Sopenharmony_ci	ble	.Lsqueeze_done
766e1051a39Sopenharmony_ci	mtctr	$len
767e1051a39Sopenharmony_ci.Loop_tail_hi:
768e1051a39Sopenharmony_ci	stbu	r0,1($out)
769e1051a39Sopenharmony_ci	srdi	r0,r0,8
770e1051a39Sopenharmony_ci	bdnz	.Loop_tail_hi
771e1051a39Sopenharmony_ci
772e1051a39Sopenharmony_ci.Lsqueeze_done:
773e1051a39Sopenharmony_ci	blr
774e1051a39Sopenharmony_ci	.long	0
775e1051a39Sopenharmony_ci	.byte	0,12,0x14,0,0,0,4,0
776e1051a39Sopenharmony_ci	.long	0
777e1051a39Sopenharmony_ci.size	SHA3_squeeze,.-SHA3_squeeze
778e1051a39Sopenharmony_ci___
779e1051a39Sopenharmony_ci}
780e1051a39Sopenharmony_ci$code.=<<___;
781e1051a39Sopenharmony_ci.align	6
782e1051a39Sopenharmony_ciPICmeup:
783e1051a39Sopenharmony_ci	mflr	r0
784e1051a39Sopenharmony_ci	bcl	20,31,\$+4
785e1051a39Sopenharmony_ci	mflr	r12   ; vvvvvv "distance" between . and 1st data entry
786e1051a39Sopenharmony_ci	addi	r12,r12,`64-8`
787e1051a39Sopenharmony_ci	mtlr	r0
788e1051a39Sopenharmony_ci	blr
789e1051a39Sopenharmony_ci	.long	0
790e1051a39Sopenharmony_ci	.byte	0,12,0x14,0,0,0,0,0
791e1051a39Sopenharmony_ci	.space	`64-9*4`
792e1051a39Sopenharmony_ci.type	rhotates,\@object
793e1051a39Sopenharmony_ci.align	6
794e1051a39Sopenharmony_cirhotates:
795e1051a39Sopenharmony_ci	.quad	0,  36
796e1051a39Sopenharmony_ci	.quad	1,  44
797e1051a39Sopenharmony_ci	.quad	62,  6
798e1051a39Sopenharmony_ci	.quad	28, 55
799e1051a39Sopenharmony_ci	.quad	27, 20
800e1051a39Sopenharmony_ci	.quad	3,  41
801e1051a39Sopenharmony_ci	.quad	10, 45
802e1051a39Sopenharmony_ci	.quad	43, 15
803e1051a39Sopenharmony_ci	.quad	25, 21
804e1051a39Sopenharmony_ci	.quad	39,  8
805e1051a39Sopenharmony_ci	.quad	18,  2
806e1051a39Sopenharmony_ci	.quad	61, 56
807e1051a39Sopenharmony_ci	.quad	14, 14
808e1051a39Sopenharmony_ci.size	rhotates,.-rhotates
809e1051a39Sopenharmony_ci	.quad	0,0
810e1051a39Sopenharmony_ci	.quad	0x0001020304050607,0x1011121314151617
811e1051a39Sopenharmony_ci	.quad	0x1011121314151617,0x0001020304050607
812e1051a39Sopenharmony_ci.type	iotas,\@object
813e1051a39Sopenharmony_ciiotas:
814e1051a39Sopenharmony_ci	.quad	0x0000000000000001,0
815e1051a39Sopenharmony_ci	.quad	0x0000000000008082,0
816e1051a39Sopenharmony_ci	.quad	0x800000000000808a,0
817e1051a39Sopenharmony_ci	.quad	0x8000000080008000,0
818e1051a39Sopenharmony_ci	.quad	0x000000000000808b,0
819e1051a39Sopenharmony_ci	.quad	0x0000000080000001,0
820e1051a39Sopenharmony_ci	.quad	0x8000000080008081,0
821e1051a39Sopenharmony_ci	.quad	0x8000000000008009,0
822e1051a39Sopenharmony_ci	.quad	0x000000000000008a,0
823e1051a39Sopenharmony_ci	.quad	0x0000000000000088,0
824e1051a39Sopenharmony_ci	.quad	0x0000000080008009,0
825e1051a39Sopenharmony_ci	.quad	0x000000008000000a,0
826e1051a39Sopenharmony_ci	.quad	0x000000008000808b,0
827e1051a39Sopenharmony_ci	.quad	0x800000000000008b,0
828e1051a39Sopenharmony_ci	.quad	0x8000000000008089,0
829e1051a39Sopenharmony_ci	.quad	0x8000000000008003,0
830e1051a39Sopenharmony_ci	.quad	0x8000000000008002,0
831e1051a39Sopenharmony_ci	.quad	0x8000000000000080,0
832e1051a39Sopenharmony_ci	.quad	0x000000000000800a,0
833e1051a39Sopenharmony_ci	.quad	0x800000008000000a,0
834e1051a39Sopenharmony_ci	.quad	0x8000000080008081,0
835e1051a39Sopenharmony_ci	.quad	0x8000000000008080,0
836e1051a39Sopenharmony_ci	.quad	0x0000000080000001,0
837e1051a39Sopenharmony_ci	.quad	0x8000000080008008,0
838e1051a39Sopenharmony_ci.size	iotas,.-iotas
839e1051a39Sopenharmony_ci.asciz	"Keccak-1600 absorb and squeeze for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
840e1051a39Sopenharmony_ci___
841e1051a39Sopenharmony_ci
842e1051a39Sopenharmony_ciforeach  (split("\n",$code)) {
843e1051a39Sopenharmony_ci	s/\`([^\`]*)\`/eval $1/ge;
844e1051a39Sopenharmony_ci
845e1051a39Sopenharmony_ci	if ($flavour =~ /le$/) {	# little-endian
846e1051a39Sopenharmony_ci	    s/\?([a-z]+)/;$1/;
847e1051a39Sopenharmony_ci	} else {			# big-endian
848e1051a39Sopenharmony_ci	    s/\?([a-z]+)/$1/;
849e1051a39Sopenharmony_ci	}
850e1051a39Sopenharmony_ci
851e1051a39Sopenharmony_ci	print $_,"\n";
852e1051a39Sopenharmony_ci}
853e1051a39Sopenharmony_ci
854e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
855