1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci#
10e1051a39Sopenharmony_ci# ====================================================================
11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/.
15e1051a39Sopenharmony_ci# ====================================================================
16e1051a39Sopenharmony_ci#
17e1051a39Sopenharmony_ci# March 2010
18e1051a39Sopenharmony_ci#
19e1051a39Sopenharmony_ci# The module implements "4-bit" GCM GHASH function and underlying
20e1051a39Sopenharmony_ci# single multiplication operation in GF(2^128). "4-bit" means that it
21e1051a39Sopenharmony_ci# uses 256 bytes per-key table [+128 bytes shared table]. Even though
22e1051a39Sopenharmony_ci# loops are aggressively modulo-scheduled in respect to references to
23e1051a39Sopenharmony_ci# Htbl and Z.hi updates for 8 cycles per byte, measured performance is
24e1051a39Sopenharmony_ci# ~12 cycles per processed byte on 21264 CPU. It seems to be a dynamic
25e1051a39Sopenharmony_ci# scheduling "glitch," because uprofile(1) indicates uniform sample
26e1051a39Sopenharmony_ci# distribution, as if all instruction bundles execute in 1.5 cycles.
27e1051a39Sopenharmony_ci# Meaning that it could have been even faster, yet 12 cycles is ~60%
28e1051a39Sopenharmony_ci# better than gcc-generated code and ~80% than code generated by vendor
29e1051a39Sopenharmony_ci# compiler.
30e1051a39Sopenharmony_ci
31e1051a39Sopenharmony_ci$cnt="v0";	# $0
32e1051a39Sopenharmony_ci$t0="t0";
33e1051a39Sopenharmony_ci$t1="t1";
34e1051a39Sopenharmony_ci$t2="t2";
35e1051a39Sopenharmony_ci$Thi0="t3";	# $4
36e1051a39Sopenharmony_ci$Tlo0="t4";
37e1051a39Sopenharmony_ci$Thi1="t5";
38e1051a39Sopenharmony_ci$Tlo1="t6";
39e1051a39Sopenharmony_ci$rem="t7";	# $8
40e1051a39Sopenharmony_ci#################
41e1051a39Sopenharmony_ci$Xi="a0";	# $16, input argument block
42e1051a39Sopenharmony_ci$Htbl="a1";
43e1051a39Sopenharmony_ci$inp="a2";
44e1051a39Sopenharmony_ci$len="a3";
45e1051a39Sopenharmony_ci$nlo="a4";	# $20
46e1051a39Sopenharmony_ci$nhi="a5";
47e1051a39Sopenharmony_ci$Zhi="t8";
48e1051a39Sopenharmony_ci$Zlo="t9";
49e1051a39Sopenharmony_ci$Xhi="t10";	# $24
50e1051a39Sopenharmony_ci$Xlo="t11";
51e1051a39Sopenharmony_ci$remp="t12";
52e1051a39Sopenharmony_ci$rem_4bit="AT";	# $28
53e1051a39Sopenharmony_ci
54e1051a39Sopenharmony_ci{ my $N;
55e1051a39Sopenharmony_ci  sub loop() {
56e1051a39Sopenharmony_ci
57e1051a39Sopenharmony_ci	$N++;
58e1051a39Sopenharmony_ci$code.=<<___;
59e1051a39Sopenharmony_ci.align	4
60e1051a39Sopenharmony_ci	extbl	$Xlo,7,$nlo
61e1051a39Sopenharmony_ci	and	$nlo,0xf0,$nhi
62e1051a39Sopenharmony_ci	sll	$nlo,4,$nlo
63e1051a39Sopenharmony_ci	and	$nlo,0xf0,$nlo
64e1051a39Sopenharmony_ci
65e1051a39Sopenharmony_ci	addq	$nlo,$Htbl,$nlo
66e1051a39Sopenharmony_ci	ldq	$Zlo,8($nlo)
67e1051a39Sopenharmony_ci	addq	$nhi,$Htbl,$nhi
68e1051a39Sopenharmony_ci	ldq	$Zhi,0($nlo)
69e1051a39Sopenharmony_ci
70e1051a39Sopenharmony_ci	and	$Zlo,0x0f,$remp
71e1051a39Sopenharmony_ci	sll	$Zhi,60,$t0
72e1051a39Sopenharmony_ci	lda	$cnt,6(zero)
73e1051a39Sopenharmony_ci	extbl	$Xlo,6,$nlo
74e1051a39Sopenharmony_ci
75e1051a39Sopenharmony_ci	ldq	$Tlo1,8($nhi)
76e1051a39Sopenharmony_ci	s8addq	$remp,$rem_4bit,$remp
77e1051a39Sopenharmony_ci	ldq	$Thi1,0($nhi)
78e1051a39Sopenharmony_ci	srl	$Zlo,4,$Zlo
79e1051a39Sopenharmony_ci
80e1051a39Sopenharmony_ci	ldq	$rem,0($remp)
81e1051a39Sopenharmony_ci	srl	$Zhi,4,$Zhi
82e1051a39Sopenharmony_ci	xor	$t0,$Zlo,$Zlo
83e1051a39Sopenharmony_ci	and	$nlo,0xf0,$nhi
84e1051a39Sopenharmony_ci
85e1051a39Sopenharmony_ci	xor	$Tlo1,$Zlo,$Zlo
86e1051a39Sopenharmony_ci	sll	$nlo,4,$nlo
87e1051a39Sopenharmony_ci	xor	$Thi1,$Zhi,$Zhi
88e1051a39Sopenharmony_ci	and	$nlo,0xf0,$nlo
89e1051a39Sopenharmony_ci
90e1051a39Sopenharmony_ci	addq	$nlo,$Htbl,$nlo
91e1051a39Sopenharmony_ci	ldq	$Tlo0,8($nlo)
92e1051a39Sopenharmony_ci	addq	$nhi,$Htbl,$nhi
93e1051a39Sopenharmony_ci	ldq	$Thi0,0($nlo)
94e1051a39Sopenharmony_ci
95e1051a39Sopenharmony_ci.Looplo$N:
96e1051a39Sopenharmony_ci	and	$Zlo,0x0f,$remp
97e1051a39Sopenharmony_ci	sll	$Zhi,60,$t0
98e1051a39Sopenharmony_ci	subq	$cnt,1,$cnt
99e1051a39Sopenharmony_ci	srl	$Zlo,4,$Zlo
100e1051a39Sopenharmony_ci
101e1051a39Sopenharmony_ci	ldq	$Tlo1,8($nhi)
102e1051a39Sopenharmony_ci	xor	$rem,$Zhi,$Zhi
103e1051a39Sopenharmony_ci	ldq	$Thi1,0($nhi)
104e1051a39Sopenharmony_ci	s8addq	$remp,$rem_4bit,$remp
105e1051a39Sopenharmony_ci
106e1051a39Sopenharmony_ci	ldq	$rem,0($remp)
107e1051a39Sopenharmony_ci	srl	$Zhi,4,$Zhi
108e1051a39Sopenharmony_ci	xor	$t0,$Zlo,$Zlo
109e1051a39Sopenharmony_ci	extbl	$Xlo,$cnt,$nlo
110e1051a39Sopenharmony_ci
111e1051a39Sopenharmony_ci	and	$nlo,0xf0,$nhi
112e1051a39Sopenharmony_ci	xor	$Thi0,$Zhi,$Zhi
113e1051a39Sopenharmony_ci	xor	$Tlo0,$Zlo,$Zlo
114e1051a39Sopenharmony_ci	sll	$nlo,4,$nlo
115e1051a39Sopenharmony_ci
116e1051a39Sopenharmony_ci
117e1051a39Sopenharmony_ci	and	$Zlo,0x0f,$remp
118e1051a39Sopenharmony_ci	sll	$Zhi,60,$t0
119e1051a39Sopenharmony_ci	and	$nlo,0xf0,$nlo
120e1051a39Sopenharmony_ci	srl	$Zlo,4,$Zlo
121e1051a39Sopenharmony_ci
122e1051a39Sopenharmony_ci	s8addq	$remp,$rem_4bit,$remp
123e1051a39Sopenharmony_ci	xor	$rem,$Zhi,$Zhi
124e1051a39Sopenharmony_ci	addq	$nlo,$Htbl,$nlo
125e1051a39Sopenharmony_ci	addq	$nhi,$Htbl,$nhi
126e1051a39Sopenharmony_ci
127e1051a39Sopenharmony_ci	ldq	$rem,0($remp)
128e1051a39Sopenharmony_ci	srl	$Zhi,4,$Zhi
129e1051a39Sopenharmony_ci	ldq	$Tlo0,8($nlo)
130e1051a39Sopenharmony_ci	xor	$t0,$Zlo,$Zlo
131e1051a39Sopenharmony_ci
132e1051a39Sopenharmony_ci	xor	$Tlo1,$Zlo,$Zlo
133e1051a39Sopenharmony_ci	xor	$Thi1,$Zhi,$Zhi
134e1051a39Sopenharmony_ci	ldq	$Thi0,0($nlo)
135e1051a39Sopenharmony_ci	bne	$cnt,.Looplo$N
136e1051a39Sopenharmony_ci
137e1051a39Sopenharmony_ci
138e1051a39Sopenharmony_ci	and	$Zlo,0x0f,$remp
139e1051a39Sopenharmony_ci	sll	$Zhi,60,$t0
140e1051a39Sopenharmony_ci	lda	$cnt,7(zero)
141e1051a39Sopenharmony_ci	srl	$Zlo,4,$Zlo
142e1051a39Sopenharmony_ci
143e1051a39Sopenharmony_ci	ldq	$Tlo1,8($nhi)
144e1051a39Sopenharmony_ci	xor	$rem,$Zhi,$Zhi
145e1051a39Sopenharmony_ci	ldq	$Thi1,0($nhi)
146e1051a39Sopenharmony_ci	s8addq	$remp,$rem_4bit,$remp
147e1051a39Sopenharmony_ci
148e1051a39Sopenharmony_ci	ldq	$rem,0($remp)
149e1051a39Sopenharmony_ci	srl	$Zhi,4,$Zhi
150e1051a39Sopenharmony_ci	xor	$t0,$Zlo,$Zlo
151e1051a39Sopenharmony_ci	extbl	$Xhi,$cnt,$nlo
152e1051a39Sopenharmony_ci
153e1051a39Sopenharmony_ci	and	$nlo,0xf0,$nhi
154e1051a39Sopenharmony_ci	xor	$Thi0,$Zhi,$Zhi
155e1051a39Sopenharmony_ci	xor	$Tlo0,$Zlo,$Zlo
156e1051a39Sopenharmony_ci	sll	$nlo,4,$nlo
157e1051a39Sopenharmony_ci
158e1051a39Sopenharmony_ci	and	$Zlo,0x0f,$remp
159e1051a39Sopenharmony_ci	sll	$Zhi,60,$t0
160e1051a39Sopenharmony_ci	and	$nlo,0xf0,$nlo
161e1051a39Sopenharmony_ci	srl	$Zlo,4,$Zlo
162e1051a39Sopenharmony_ci
163e1051a39Sopenharmony_ci	s8addq	$remp,$rem_4bit,$remp
164e1051a39Sopenharmony_ci	xor	$rem,$Zhi,$Zhi
165e1051a39Sopenharmony_ci	addq	$nlo,$Htbl,$nlo
166e1051a39Sopenharmony_ci	addq	$nhi,$Htbl,$nhi
167e1051a39Sopenharmony_ci
168e1051a39Sopenharmony_ci	ldq	$rem,0($remp)
169e1051a39Sopenharmony_ci	srl	$Zhi,4,$Zhi
170e1051a39Sopenharmony_ci	ldq	$Tlo0,8($nlo)
171e1051a39Sopenharmony_ci	xor	$t0,$Zlo,$Zlo
172e1051a39Sopenharmony_ci
173e1051a39Sopenharmony_ci	xor	$Tlo1,$Zlo,$Zlo
174e1051a39Sopenharmony_ci	xor	$Thi1,$Zhi,$Zhi
175e1051a39Sopenharmony_ci	ldq	$Thi0,0($nlo)
176e1051a39Sopenharmony_ci	unop
177e1051a39Sopenharmony_ci
178e1051a39Sopenharmony_ci
179e1051a39Sopenharmony_ci.Loophi$N:
180e1051a39Sopenharmony_ci	and	$Zlo,0x0f,$remp
181e1051a39Sopenharmony_ci	sll	$Zhi,60,$t0
182e1051a39Sopenharmony_ci	subq	$cnt,1,$cnt
183e1051a39Sopenharmony_ci	srl	$Zlo,4,$Zlo
184e1051a39Sopenharmony_ci
185e1051a39Sopenharmony_ci	ldq	$Tlo1,8($nhi)
186e1051a39Sopenharmony_ci	xor	$rem,$Zhi,$Zhi
187e1051a39Sopenharmony_ci	ldq	$Thi1,0($nhi)
188e1051a39Sopenharmony_ci	s8addq	$remp,$rem_4bit,$remp
189e1051a39Sopenharmony_ci
190e1051a39Sopenharmony_ci	ldq	$rem,0($remp)
191e1051a39Sopenharmony_ci	srl	$Zhi,4,$Zhi
192e1051a39Sopenharmony_ci	xor	$t0,$Zlo,$Zlo
193e1051a39Sopenharmony_ci	extbl	$Xhi,$cnt,$nlo
194e1051a39Sopenharmony_ci
195e1051a39Sopenharmony_ci	and	$nlo,0xf0,$nhi
196e1051a39Sopenharmony_ci	xor	$Thi0,$Zhi,$Zhi
197e1051a39Sopenharmony_ci	xor	$Tlo0,$Zlo,$Zlo
198e1051a39Sopenharmony_ci	sll	$nlo,4,$nlo
199e1051a39Sopenharmony_ci
200e1051a39Sopenharmony_ci
201e1051a39Sopenharmony_ci	and	$Zlo,0x0f,$remp
202e1051a39Sopenharmony_ci	sll	$Zhi,60,$t0
203e1051a39Sopenharmony_ci	and	$nlo,0xf0,$nlo
204e1051a39Sopenharmony_ci	srl	$Zlo,4,$Zlo
205e1051a39Sopenharmony_ci
206e1051a39Sopenharmony_ci	s8addq	$remp,$rem_4bit,$remp
207e1051a39Sopenharmony_ci	xor	$rem,$Zhi,$Zhi
208e1051a39Sopenharmony_ci	addq	$nlo,$Htbl,$nlo
209e1051a39Sopenharmony_ci	addq	$nhi,$Htbl,$nhi
210e1051a39Sopenharmony_ci
211e1051a39Sopenharmony_ci	ldq	$rem,0($remp)
212e1051a39Sopenharmony_ci	srl	$Zhi,4,$Zhi
213e1051a39Sopenharmony_ci	ldq	$Tlo0,8($nlo)
214e1051a39Sopenharmony_ci	xor	$t0,$Zlo,$Zlo
215e1051a39Sopenharmony_ci
216e1051a39Sopenharmony_ci	xor	$Tlo1,$Zlo,$Zlo
217e1051a39Sopenharmony_ci	xor	$Thi1,$Zhi,$Zhi
218e1051a39Sopenharmony_ci	ldq	$Thi0,0($nlo)
219e1051a39Sopenharmony_ci	bne	$cnt,.Loophi$N
220e1051a39Sopenharmony_ci
221e1051a39Sopenharmony_ci
222e1051a39Sopenharmony_ci	and	$Zlo,0x0f,$remp
223e1051a39Sopenharmony_ci	sll	$Zhi,60,$t0
224e1051a39Sopenharmony_ci	srl	$Zlo,4,$Zlo
225e1051a39Sopenharmony_ci
226e1051a39Sopenharmony_ci	ldq	$Tlo1,8($nhi)
227e1051a39Sopenharmony_ci	xor	$rem,$Zhi,$Zhi
228e1051a39Sopenharmony_ci	ldq	$Thi1,0($nhi)
229e1051a39Sopenharmony_ci	s8addq	$remp,$rem_4bit,$remp
230e1051a39Sopenharmony_ci
231e1051a39Sopenharmony_ci	ldq	$rem,0($remp)
232e1051a39Sopenharmony_ci	srl	$Zhi,4,$Zhi
233e1051a39Sopenharmony_ci	xor	$t0,$Zlo,$Zlo
234e1051a39Sopenharmony_ci
235e1051a39Sopenharmony_ci	xor	$Tlo0,$Zlo,$Zlo
236e1051a39Sopenharmony_ci	xor	$Thi0,$Zhi,$Zhi
237e1051a39Sopenharmony_ci
238e1051a39Sopenharmony_ci	and	$Zlo,0x0f,$remp
239e1051a39Sopenharmony_ci	sll	$Zhi,60,$t0
240e1051a39Sopenharmony_ci	srl	$Zlo,4,$Zlo
241e1051a39Sopenharmony_ci
242e1051a39Sopenharmony_ci	s8addq	$remp,$rem_4bit,$remp
243e1051a39Sopenharmony_ci	xor	$rem,$Zhi,$Zhi
244e1051a39Sopenharmony_ci
245e1051a39Sopenharmony_ci	ldq	$rem,0($remp)
246e1051a39Sopenharmony_ci	srl	$Zhi,4,$Zhi
247e1051a39Sopenharmony_ci	xor	$Tlo1,$Zlo,$Zlo
248e1051a39Sopenharmony_ci	xor	$Thi1,$Zhi,$Zhi
249e1051a39Sopenharmony_ci	xor	$t0,$Zlo,$Zlo
250e1051a39Sopenharmony_ci	xor	$rem,$Zhi,$Zhi
251e1051a39Sopenharmony_ci___
252e1051a39Sopenharmony_ci}}
253e1051a39Sopenharmony_ci
254e1051a39Sopenharmony_ci$code=<<___;
255e1051a39Sopenharmony_ci#ifdef __linux__
256e1051a39Sopenharmony_ci#include <asm/regdef.h>
257e1051a39Sopenharmony_ci#else
258e1051a39Sopenharmony_ci#include <asm.h>
259e1051a39Sopenharmony_ci#include <regdef.h>
260e1051a39Sopenharmony_ci#endif
261e1051a39Sopenharmony_ci
262e1051a39Sopenharmony_ci.text
263e1051a39Sopenharmony_ci
264e1051a39Sopenharmony_ci.set	noat
265e1051a39Sopenharmony_ci.set	noreorder
266e1051a39Sopenharmony_ci.globl	gcm_gmult_4bit
267e1051a39Sopenharmony_ci.align	4
268e1051a39Sopenharmony_ci.ent	gcm_gmult_4bit
269e1051a39Sopenharmony_cigcm_gmult_4bit:
270e1051a39Sopenharmony_ci	.frame	sp,0,ra
271e1051a39Sopenharmony_ci	.prologue 0
272e1051a39Sopenharmony_ci
273e1051a39Sopenharmony_ci	ldq	$Xlo,8($Xi)
274e1051a39Sopenharmony_ci	ldq	$Xhi,0($Xi)
275e1051a39Sopenharmony_ci
276e1051a39Sopenharmony_ci	bsr	$t0,picmeup
277e1051a39Sopenharmony_ci	nop
278e1051a39Sopenharmony_ci___
279e1051a39Sopenharmony_ci
280e1051a39Sopenharmony_ci	&loop();
281e1051a39Sopenharmony_ci
282e1051a39Sopenharmony_ci$code.=<<___;
283e1051a39Sopenharmony_ci	srl	$Zlo,24,$t0	# byte swap
284e1051a39Sopenharmony_ci	srl	$Zlo,8,$t1
285e1051a39Sopenharmony_ci
286e1051a39Sopenharmony_ci	sll	$Zlo,8,$t2
287e1051a39Sopenharmony_ci	sll	$Zlo,24,$Zlo
288e1051a39Sopenharmony_ci	zapnot	$t0,0x11,$t0
289e1051a39Sopenharmony_ci	zapnot	$t1,0x22,$t1
290e1051a39Sopenharmony_ci
291e1051a39Sopenharmony_ci	zapnot	$Zlo,0x88,$Zlo
292e1051a39Sopenharmony_ci	or	$t0,$t1,$t0
293e1051a39Sopenharmony_ci	zapnot	$t2,0x44,$t2
294e1051a39Sopenharmony_ci
295e1051a39Sopenharmony_ci	or	$Zlo,$t0,$Zlo
296e1051a39Sopenharmony_ci	srl	$Zhi,24,$t0
297e1051a39Sopenharmony_ci	srl	$Zhi,8,$t1
298e1051a39Sopenharmony_ci
299e1051a39Sopenharmony_ci	or	$Zlo,$t2,$Zlo
300e1051a39Sopenharmony_ci	sll	$Zhi,8,$t2
301e1051a39Sopenharmony_ci	sll	$Zhi,24,$Zhi
302e1051a39Sopenharmony_ci
303e1051a39Sopenharmony_ci	srl	$Zlo,32,$Xlo
304e1051a39Sopenharmony_ci	sll	$Zlo,32,$Zlo
305e1051a39Sopenharmony_ci
306e1051a39Sopenharmony_ci	zapnot	$t0,0x11,$t0
307e1051a39Sopenharmony_ci	zapnot	$t1,0x22,$t1
308e1051a39Sopenharmony_ci	or	$Zlo,$Xlo,$Xlo
309e1051a39Sopenharmony_ci
310e1051a39Sopenharmony_ci	zapnot	$Zhi,0x88,$Zhi
311e1051a39Sopenharmony_ci	or	$t0,$t1,$t0
312e1051a39Sopenharmony_ci	zapnot	$t2,0x44,$t2
313e1051a39Sopenharmony_ci
314e1051a39Sopenharmony_ci	or	$Zhi,$t0,$Zhi
315e1051a39Sopenharmony_ci	or	$Zhi,$t2,$Zhi
316e1051a39Sopenharmony_ci
317e1051a39Sopenharmony_ci	srl	$Zhi,32,$Xhi
318e1051a39Sopenharmony_ci	sll	$Zhi,32,$Zhi
319e1051a39Sopenharmony_ci
320e1051a39Sopenharmony_ci	or	$Zhi,$Xhi,$Xhi
321e1051a39Sopenharmony_ci	stq	$Xlo,8($Xi)
322e1051a39Sopenharmony_ci	stq	$Xhi,0($Xi)
323e1051a39Sopenharmony_ci
324e1051a39Sopenharmony_ci	ret	(ra)
325e1051a39Sopenharmony_ci.end	gcm_gmult_4bit
326e1051a39Sopenharmony_ci___
327e1051a39Sopenharmony_ci
328e1051a39Sopenharmony_ci$inhi="s0";
329e1051a39Sopenharmony_ci$inlo="s1";
330e1051a39Sopenharmony_ci
331e1051a39Sopenharmony_ci$code.=<<___;
332e1051a39Sopenharmony_ci.globl	gcm_ghash_4bit
333e1051a39Sopenharmony_ci.align	4
334e1051a39Sopenharmony_ci.ent	gcm_ghash_4bit
335e1051a39Sopenharmony_cigcm_ghash_4bit:
336e1051a39Sopenharmony_ci	lda	sp,-32(sp)
337e1051a39Sopenharmony_ci	stq	ra,0(sp)
338e1051a39Sopenharmony_ci	stq	s0,8(sp)
339e1051a39Sopenharmony_ci	stq	s1,16(sp)
340e1051a39Sopenharmony_ci	.mask	0x04000600,-32
341e1051a39Sopenharmony_ci	.frame	sp,32,ra
342e1051a39Sopenharmony_ci	.prologue 0
343e1051a39Sopenharmony_ci
344e1051a39Sopenharmony_ci	ldq_u	$inhi,0($inp)
345e1051a39Sopenharmony_ci	ldq_u	$Thi0,7($inp)
346e1051a39Sopenharmony_ci	ldq_u	$inlo,8($inp)
347e1051a39Sopenharmony_ci	ldq_u	$Tlo0,15($inp)
348e1051a39Sopenharmony_ci	ldq	$Xhi,0($Xi)
349e1051a39Sopenharmony_ci	ldq	$Xlo,8($Xi)
350e1051a39Sopenharmony_ci
351e1051a39Sopenharmony_ci	bsr	$t0,picmeup
352e1051a39Sopenharmony_ci	nop
353e1051a39Sopenharmony_ci
354e1051a39Sopenharmony_ci.Louter:
355e1051a39Sopenharmony_ci	extql	$inhi,$inp,$inhi
356e1051a39Sopenharmony_ci	extqh	$Thi0,$inp,$Thi0
357e1051a39Sopenharmony_ci	or	$inhi,$Thi0,$inhi
358e1051a39Sopenharmony_ci	lda	$inp,16($inp)
359e1051a39Sopenharmony_ci
360e1051a39Sopenharmony_ci	extql	$inlo,$inp,$inlo
361e1051a39Sopenharmony_ci	extqh	$Tlo0,$inp,$Tlo0
362e1051a39Sopenharmony_ci	or	$inlo,$Tlo0,$inlo
363e1051a39Sopenharmony_ci	subq	$len,16,$len
364e1051a39Sopenharmony_ci
365e1051a39Sopenharmony_ci	xor	$Xlo,$inlo,$Xlo
366e1051a39Sopenharmony_ci	xor	$Xhi,$inhi,$Xhi
367e1051a39Sopenharmony_ci___
368e1051a39Sopenharmony_ci
369e1051a39Sopenharmony_ci	&loop();
370e1051a39Sopenharmony_ci
371e1051a39Sopenharmony_ci$code.=<<___;
372e1051a39Sopenharmony_ci	srl	$Zlo,24,$t0	# byte swap
373e1051a39Sopenharmony_ci	srl	$Zlo,8,$t1
374e1051a39Sopenharmony_ci
375e1051a39Sopenharmony_ci	sll	$Zlo,8,$t2
376e1051a39Sopenharmony_ci	sll	$Zlo,24,$Zlo
377e1051a39Sopenharmony_ci	zapnot	$t0,0x11,$t0
378e1051a39Sopenharmony_ci	zapnot	$t1,0x22,$t1
379e1051a39Sopenharmony_ci
380e1051a39Sopenharmony_ci	zapnot	$Zlo,0x88,$Zlo
381e1051a39Sopenharmony_ci	or	$t0,$t1,$t0
382e1051a39Sopenharmony_ci	zapnot	$t2,0x44,$t2
383e1051a39Sopenharmony_ci
384e1051a39Sopenharmony_ci	or	$Zlo,$t0,$Zlo
385e1051a39Sopenharmony_ci	srl	$Zhi,24,$t0
386e1051a39Sopenharmony_ci	srl	$Zhi,8,$t1
387e1051a39Sopenharmony_ci
388e1051a39Sopenharmony_ci	or	$Zlo,$t2,$Zlo
389e1051a39Sopenharmony_ci	sll	$Zhi,8,$t2
390e1051a39Sopenharmony_ci	sll	$Zhi,24,$Zhi
391e1051a39Sopenharmony_ci
392e1051a39Sopenharmony_ci	srl	$Zlo,32,$Xlo
393e1051a39Sopenharmony_ci	sll	$Zlo,32,$Zlo
394e1051a39Sopenharmony_ci	beq	$len,.Ldone
395e1051a39Sopenharmony_ci
396e1051a39Sopenharmony_ci	zapnot	$t0,0x11,$t0
397e1051a39Sopenharmony_ci	zapnot	$t1,0x22,$t1
398e1051a39Sopenharmony_ci	or	$Zlo,$Xlo,$Xlo
399e1051a39Sopenharmony_ci	ldq_u	$inhi,0($inp)
400e1051a39Sopenharmony_ci
401e1051a39Sopenharmony_ci	zapnot	$Zhi,0x88,$Zhi
402e1051a39Sopenharmony_ci	or	$t0,$t1,$t0
403e1051a39Sopenharmony_ci	zapnot	$t2,0x44,$t2
404e1051a39Sopenharmony_ci	ldq_u	$Thi0,7($inp)
405e1051a39Sopenharmony_ci
406e1051a39Sopenharmony_ci	or	$Zhi,$t0,$Zhi
407e1051a39Sopenharmony_ci	or	$Zhi,$t2,$Zhi
408e1051a39Sopenharmony_ci	ldq_u	$inlo,8($inp)
409e1051a39Sopenharmony_ci	ldq_u	$Tlo0,15($inp)
410e1051a39Sopenharmony_ci
411e1051a39Sopenharmony_ci	srl	$Zhi,32,$Xhi
412e1051a39Sopenharmony_ci	sll	$Zhi,32,$Zhi
413e1051a39Sopenharmony_ci
414e1051a39Sopenharmony_ci	or	$Zhi,$Xhi,$Xhi
415e1051a39Sopenharmony_ci	br	zero,.Louter
416e1051a39Sopenharmony_ci
417e1051a39Sopenharmony_ci.Ldone:
418e1051a39Sopenharmony_ci	zapnot	$t0,0x11,$t0
419e1051a39Sopenharmony_ci	zapnot	$t1,0x22,$t1
420e1051a39Sopenharmony_ci	or	$Zlo,$Xlo,$Xlo
421e1051a39Sopenharmony_ci
422e1051a39Sopenharmony_ci	zapnot	$Zhi,0x88,$Zhi
423e1051a39Sopenharmony_ci	or	$t0,$t1,$t0
424e1051a39Sopenharmony_ci	zapnot	$t2,0x44,$t2
425e1051a39Sopenharmony_ci
426e1051a39Sopenharmony_ci	or	$Zhi,$t0,$Zhi
427e1051a39Sopenharmony_ci	or	$Zhi,$t2,$Zhi
428e1051a39Sopenharmony_ci
429e1051a39Sopenharmony_ci	srl	$Zhi,32,$Xhi
430e1051a39Sopenharmony_ci	sll	$Zhi,32,$Zhi
431e1051a39Sopenharmony_ci
432e1051a39Sopenharmony_ci	or	$Zhi,$Xhi,$Xhi
433e1051a39Sopenharmony_ci
434e1051a39Sopenharmony_ci	stq	$Xlo,8($Xi)
435e1051a39Sopenharmony_ci	stq	$Xhi,0($Xi)
436e1051a39Sopenharmony_ci
437e1051a39Sopenharmony_ci	.set	noreorder
438e1051a39Sopenharmony_ci	/*ldq	ra,0(sp)*/
439e1051a39Sopenharmony_ci	ldq	s0,8(sp)
440e1051a39Sopenharmony_ci	ldq	s1,16(sp)
441e1051a39Sopenharmony_ci	lda	sp,32(sp)
442e1051a39Sopenharmony_ci	ret	(ra)
443e1051a39Sopenharmony_ci.end	gcm_ghash_4bit
444e1051a39Sopenharmony_ci
445e1051a39Sopenharmony_ci.align	4
446e1051a39Sopenharmony_ci.ent	picmeup
447e1051a39Sopenharmony_cipicmeup:
448e1051a39Sopenharmony_ci	.frame	sp,0,$t0
449e1051a39Sopenharmony_ci	.prologue 0
450e1051a39Sopenharmony_ci	br	$rem_4bit,.Lpic
451e1051a39Sopenharmony_ci.Lpic:	lda	$rem_4bit,12($rem_4bit)
452e1051a39Sopenharmony_ci	ret	($t0)
453e1051a39Sopenharmony_ci.end	picmeup
454e1051a39Sopenharmony_ci	nop
455e1051a39Sopenharmony_cirem_4bit:
456e1051a39Sopenharmony_ci	.long	0,0x0000<<16, 0,0x1C20<<16, 0,0x3840<<16, 0,0x2460<<16
457e1051a39Sopenharmony_ci	.long	0,0x7080<<16, 0,0x6CA0<<16, 0,0x48C0<<16, 0,0x54E0<<16
458e1051a39Sopenharmony_ci	.long	0,0xE100<<16, 0,0xFD20<<16, 0,0xD940<<16, 0,0xC560<<16
459e1051a39Sopenharmony_ci	.long	0,0x9180<<16, 0,0x8DA0<<16, 0,0xA9C0<<16, 0,0xB5E0<<16
460e1051a39Sopenharmony_ci.ascii	"GHASH for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
461e1051a39Sopenharmony_ci.align	4
462e1051a39Sopenharmony_ci
463e1051a39Sopenharmony_ci___
464e1051a39Sopenharmony_ci$output=pop and open STDOUT,">$output";
465e1051a39Sopenharmony_ciprint $code;
466e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
467e1051a39Sopenharmony_ci
468