1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2016-2021 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci#
10e1051a39Sopenharmony_ci# ====================================================================
11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/.
15e1051a39Sopenharmony_ci# ====================================================================
16e1051a39Sopenharmony_ci
17e1051a39Sopenharmony_ci# March 2016
18e1051a39Sopenharmony_ci#
19e1051a39Sopenharmony_ci# Initial support for Fujitsu SPARC64 X/X+ comprises minimally
20e1051a39Sopenharmony_ci# required key setup and single-block procedures.
21e1051a39Sopenharmony_ci#
22e1051a39Sopenharmony_ci# April 2016
23e1051a39Sopenharmony_ci#
24e1051a39Sopenharmony_ci# Add "teaser" CBC and CTR mode-specific subroutines. "Teaser" means
25e1051a39Sopenharmony_ci# that parallelizable nature of CBC decrypt and CTR is not utilized
26e1051a39Sopenharmony_ci# yet. CBC encrypt on the other hand is as good as it can possibly
27e1051a39Sopenharmony_ci# get processing one byte in 4.1 cycles with 128-bit key on SPARC64 X.
28e1051a39Sopenharmony_ci# This is ~6x faster than pure software implementation...
29e1051a39Sopenharmony_ci#
30e1051a39Sopenharmony_ci# July 2016
31e1051a39Sopenharmony_ci#
32e1051a39Sopenharmony_ci# Switch from faligndata to fshiftorx, which allows to omit alignaddr
33e1051a39Sopenharmony_ci# instructions and improve single-block and short-input performance
34e1051a39Sopenharmony_ci# with misaligned data.
35e1051a39Sopenharmony_ci
36e1051a39Sopenharmony_ci$output = pop and open STDOUT,">$output";
37e1051a39Sopenharmony_ci
38e1051a39Sopenharmony_ci{
39e1051a39Sopenharmony_cimy ($inp,$out,$key,$rounds,$tmp,$mask) = map("%o$_",(0..5));
40e1051a39Sopenharmony_ci
41e1051a39Sopenharmony_ci$code.=<<___;
42e1051a39Sopenharmony_ci#ifndef __ASSEMBLER__
43e1051a39Sopenharmony_ci# define __ASSEMBLER__ 1
44e1051a39Sopenharmony_ci#endif
45e1051a39Sopenharmony_ci#include "crypto/sparc_arch.h"
46e1051a39Sopenharmony_ci
47e1051a39Sopenharmony_ci#define LOCALS (STACK_BIAS+STACK_FRAME)
48e1051a39Sopenharmony_ci
49e1051a39Sopenharmony_ci.text
50e1051a39Sopenharmony_ci
51e1051a39Sopenharmony_ci.globl	aes_fx_encrypt
52e1051a39Sopenharmony_ci.align	32
53e1051a39Sopenharmony_ciaes_fx_encrypt:
54e1051a39Sopenharmony_ci	and		$inp, 7, $tmp		! is input aligned?
55e1051a39Sopenharmony_ci	andn		$inp, 7, $inp
56e1051a39Sopenharmony_ci	ldd		[$key +  0], %f6	! round[0]
57e1051a39Sopenharmony_ci	ldd		[$key +  8], %f8
58e1051a39Sopenharmony_ci	mov		%o7, %g1
59e1051a39Sopenharmony_ci	ld		[$key + 240], $rounds
60e1051a39Sopenharmony_ci
61e1051a39Sopenharmony_ci1:	call		.+8
62e1051a39Sopenharmony_ci	add		%o7, .Linp_align-1b, %o7
63e1051a39Sopenharmony_ci
64e1051a39Sopenharmony_ci	sll		$tmp, 3, $tmp
65e1051a39Sopenharmony_ci	ldd		[$inp + 0], %f0		! load input
66e1051a39Sopenharmony_ci	brz,pt		$tmp, .Lenc_inp_aligned
67e1051a39Sopenharmony_ci	ldd		[$inp + 8], %f2
68e1051a39Sopenharmony_ci
69e1051a39Sopenharmony_ci	ldd		[%o7 + $tmp], %f14	! shift left params
70e1051a39Sopenharmony_ci	ldd		[$inp + 16], %f4
71e1051a39Sopenharmony_ci	fshiftorx	%f0, %f2, %f14, %f0
72e1051a39Sopenharmony_ci	fshiftorx	%f2, %f4, %f14, %f2
73e1051a39Sopenharmony_ci
74e1051a39Sopenharmony_ci.Lenc_inp_aligned:
75e1051a39Sopenharmony_ci	ldd		[$key + 16], %f10	! round[1]
76e1051a39Sopenharmony_ci	ldd		[$key + 24], %f12
77e1051a39Sopenharmony_ci
78e1051a39Sopenharmony_ci	fxor		%f0, %f6, %f0		! ^=round[0]
79e1051a39Sopenharmony_ci	fxor		%f2, %f8, %f2
80e1051a39Sopenharmony_ci	ldd		[$key + 32], %f6	! round[2]
81e1051a39Sopenharmony_ci	ldd		[$key + 40], %f8
82e1051a39Sopenharmony_ci	add		$key, 32, $key
83e1051a39Sopenharmony_ci	sub		$rounds, 4, $rounds
84e1051a39Sopenharmony_ci
85e1051a39Sopenharmony_ci.Loop_enc:
86e1051a39Sopenharmony_ci	fmovd		%f0, %f4
87e1051a39Sopenharmony_ci	faesencx	%f2, %f10, %f0
88e1051a39Sopenharmony_ci	faesencx	%f4, %f12, %f2
89e1051a39Sopenharmony_ci	ldd		[$key + 16], %f10
90e1051a39Sopenharmony_ci	ldd		[$key + 24], %f12
91e1051a39Sopenharmony_ci	add		$key, 32, $key
92e1051a39Sopenharmony_ci
93e1051a39Sopenharmony_ci	fmovd		%f0, %f4
94e1051a39Sopenharmony_ci	faesencx	%f2, %f6, %f0
95e1051a39Sopenharmony_ci	faesencx	%f4, %f8, %f2
96e1051a39Sopenharmony_ci	ldd		[$key +  0], %f6
97e1051a39Sopenharmony_ci	ldd		[$key +  8], %f8
98e1051a39Sopenharmony_ci
99e1051a39Sopenharmony_ci	brnz,a		$rounds, .Loop_enc
100e1051a39Sopenharmony_ci	sub		$rounds, 2, $rounds
101e1051a39Sopenharmony_ci
102e1051a39Sopenharmony_ci	andcc		$out, 7, $tmp		! is output aligned?
103e1051a39Sopenharmony_ci	andn		$out, 7, $out
104e1051a39Sopenharmony_ci	mov		0xff, $mask
105e1051a39Sopenharmony_ci	srl		$mask, $tmp, $mask
106e1051a39Sopenharmony_ci	add		%o7, 64, %o7
107e1051a39Sopenharmony_ci	sll		$tmp, 3, $tmp
108e1051a39Sopenharmony_ci
109e1051a39Sopenharmony_ci	fmovd		%f0, %f4
110e1051a39Sopenharmony_ci	faesencx	%f2, %f10, %f0
111e1051a39Sopenharmony_ci	faesencx	%f4, %f12, %f2
112e1051a39Sopenharmony_ci	ldd		[%o7 + $tmp], %f14	! shift right params
113e1051a39Sopenharmony_ci
114e1051a39Sopenharmony_ci	fmovd		%f0, %f4
115e1051a39Sopenharmony_ci	faesenclx	%f2, %f6, %f0
116e1051a39Sopenharmony_ci	faesenclx	%f4, %f8, %f2
117e1051a39Sopenharmony_ci
118e1051a39Sopenharmony_ci	bnz,pn		%icc, .Lenc_out_unaligned
119e1051a39Sopenharmony_ci	mov		%g1, %o7
120e1051a39Sopenharmony_ci
121e1051a39Sopenharmony_ci	std		%f0, [$out + 0]
122e1051a39Sopenharmony_ci	retl
123e1051a39Sopenharmony_ci	std		%f2, [$out + 8]
124e1051a39Sopenharmony_ci
125e1051a39Sopenharmony_ci.align	16
126e1051a39Sopenharmony_ci.Lenc_out_unaligned:
127e1051a39Sopenharmony_ci	add		$out, 16, $inp
128e1051a39Sopenharmony_ci	orn		%g0, $mask, $tmp
129e1051a39Sopenharmony_ci	fshiftorx	%f0, %f0, %f14, %f4
130e1051a39Sopenharmony_ci	fshiftorx	%f0, %f2, %f14, %f6
131e1051a39Sopenharmony_ci	fshiftorx	%f2, %f2, %f14, %f8
132e1051a39Sopenharmony_ci
133e1051a39Sopenharmony_ci	stda		%f4, [$out + $mask]0xc0	! partial store
134e1051a39Sopenharmony_ci	std		%f6, [$out + 8]
135e1051a39Sopenharmony_ci	stda		%f8, [$inp + $tmp]0xc0	! partial store
136e1051a39Sopenharmony_ci	retl
137e1051a39Sopenharmony_ci	nop
138e1051a39Sopenharmony_ci.type	aes_fx_encrypt,#function
139e1051a39Sopenharmony_ci.size	aes_fx_encrypt,.-aes_fx_encrypt
140e1051a39Sopenharmony_ci
141e1051a39Sopenharmony_ci.globl	aes_fx_decrypt
142e1051a39Sopenharmony_ci.align	32
143e1051a39Sopenharmony_ciaes_fx_decrypt:
144e1051a39Sopenharmony_ci	and		$inp, 7, $tmp		! is input aligned?
145e1051a39Sopenharmony_ci	andn		$inp, 7, $inp
146e1051a39Sopenharmony_ci	ldd		[$key +  0], %f6	! round[0]
147e1051a39Sopenharmony_ci	ldd		[$key +  8], %f8
148e1051a39Sopenharmony_ci	mov		%o7, %g1
149e1051a39Sopenharmony_ci	ld		[$key + 240], $rounds
150e1051a39Sopenharmony_ci
151e1051a39Sopenharmony_ci1:	call		.+8
152e1051a39Sopenharmony_ci	add		%o7, .Linp_align-1b, %o7
153e1051a39Sopenharmony_ci
154e1051a39Sopenharmony_ci	sll		$tmp, 3, $tmp
155e1051a39Sopenharmony_ci	ldd		[$inp + 0], %f0		! load input
156e1051a39Sopenharmony_ci	brz,pt		$tmp, .Ldec_inp_aligned
157e1051a39Sopenharmony_ci	ldd		[$inp + 8], %f2
158e1051a39Sopenharmony_ci
159e1051a39Sopenharmony_ci	ldd		[%o7 + $tmp], %f14	! shift left params
160e1051a39Sopenharmony_ci	ldd		[$inp + 16], %f4
161e1051a39Sopenharmony_ci	fshiftorx	%f0, %f2, %f14, %f0
162e1051a39Sopenharmony_ci	fshiftorx	%f2, %f4, %f14, %f2
163e1051a39Sopenharmony_ci
164e1051a39Sopenharmony_ci.Ldec_inp_aligned:
165e1051a39Sopenharmony_ci	ldd		[$key + 16], %f10	! round[1]
166e1051a39Sopenharmony_ci	ldd		[$key + 24], %f12
167e1051a39Sopenharmony_ci
168e1051a39Sopenharmony_ci	fxor		%f0, %f6, %f0		! ^=round[0]
169e1051a39Sopenharmony_ci	fxor		%f2, %f8, %f2
170e1051a39Sopenharmony_ci	ldd		[$key + 32], %f6	! round[2]
171e1051a39Sopenharmony_ci	ldd		[$key + 40], %f8
172e1051a39Sopenharmony_ci	add		$key, 32, $key
173e1051a39Sopenharmony_ci	sub		$rounds, 4, $rounds
174e1051a39Sopenharmony_ci
175e1051a39Sopenharmony_ci.Loop_dec:
176e1051a39Sopenharmony_ci	fmovd		%f0, %f4
177e1051a39Sopenharmony_ci	faesdecx	%f2, %f10, %f0
178e1051a39Sopenharmony_ci	faesdecx	%f4, %f12, %f2
179e1051a39Sopenharmony_ci	ldd		[$key + 16], %f10
180e1051a39Sopenharmony_ci	ldd		[$key + 24], %f12
181e1051a39Sopenharmony_ci	add		$key, 32, $key
182e1051a39Sopenharmony_ci
183e1051a39Sopenharmony_ci	fmovd		%f0, %f4
184e1051a39Sopenharmony_ci	faesdecx	%f2, %f6, %f0
185e1051a39Sopenharmony_ci	faesdecx	%f4, %f8, %f2
186e1051a39Sopenharmony_ci	ldd		[$key +  0], %f6
187e1051a39Sopenharmony_ci	ldd		[$key +  8], %f8
188e1051a39Sopenharmony_ci
189e1051a39Sopenharmony_ci	brnz,a		$rounds, .Loop_dec
190e1051a39Sopenharmony_ci	sub		$rounds, 2, $rounds
191e1051a39Sopenharmony_ci
192e1051a39Sopenharmony_ci	andcc		$out, 7, $tmp		! is output aligned?
193e1051a39Sopenharmony_ci	andn		$out, 7, $out
194e1051a39Sopenharmony_ci	mov		0xff, $mask
195e1051a39Sopenharmony_ci	srl		$mask, $tmp, $mask
196e1051a39Sopenharmony_ci	add		%o7, 64, %o7
197e1051a39Sopenharmony_ci	sll		$tmp, 3, $tmp
198e1051a39Sopenharmony_ci
199e1051a39Sopenharmony_ci	fmovd		%f0, %f4
200e1051a39Sopenharmony_ci	faesdecx	%f2, %f10, %f0
201e1051a39Sopenharmony_ci	faesdecx	%f4, %f12, %f2
202e1051a39Sopenharmony_ci	ldd		[%o7 + $tmp], %f14	! shift right params
203e1051a39Sopenharmony_ci
204e1051a39Sopenharmony_ci	fmovd		%f0, %f4
205e1051a39Sopenharmony_ci	faesdeclx	%f2, %f6, %f0
206e1051a39Sopenharmony_ci	faesdeclx	%f4, %f8, %f2
207e1051a39Sopenharmony_ci
208e1051a39Sopenharmony_ci	bnz,pn		%icc, .Ldec_out_unaligned
209e1051a39Sopenharmony_ci	mov		%g1, %o7
210e1051a39Sopenharmony_ci
211e1051a39Sopenharmony_ci	std		%f0, [$out + 0]
212e1051a39Sopenharmony_ci	retl
213e1051a39Sopenharmony_ci	std		%f2, [$out + 8]
214e1051a39Sopenharmony_ci
215e1051a39Sopenharmony_ci.align	16
216e1051a39Sopenharmony_ci.Ldec_out_unaligned:
217e1051a39Sopenharmony_ci	add		$out, 16, $inp
218e1051a39Sopenharmony_ci	orn		%g0, $mask, $tmp
219e1051a39Sopenharmony_ci	fshiftorx	%f0, %f0, %f14, %f4
220e1051a39Sopenharmony_ci	fshiftorx	%f0, %f2, %f14, %f6
221e1051a39Sopenharmony_ci	fshiftorx	%f2, %f2, %f14, %f8
222e1051a39Sopenharmony_ci
223e1051a39Sopenharmony_ci	stda		%f4, [$out + $mask]0xc0	! partial store
224e1051a39Sopenharmony_ci	std		%f6, [$out + 8]
225e1051a39Sopenharmony_ci	stda		%f8, [$inp + $tmp]0xc0	! partial store
226e1051a39Sopenharmony_ci	retl
227e1051a39Sopenharmony_ci	nop
228e1051a39Sopenharmony_ci.type	aes_fx_decrypt,#function
229e1051a39Sopenharmony_ci.size	aes_fx_decrypt,.-aes_fx_decrypt
230e1051a39Sopenharmony_ci___
231e1051a39Sopenharmony_ci}
232e1051a39Sopenharmony_ci{
233e1051a39Sopenharmony_cimy ($inp,$bits,$out,$tmp,$inc) = map("%o$_",(0..5));
234e1051a39Sopenharmony_ci$code.=<<___;
235e1051a39Sopenharmony_ci.globl	aes_fx_set_decrypt_key
236e1051a39Sopenharmony_ci.align	32
237e1051a39Sopenharmony_ciaes_fx_set_decrypt_key:
238e1051a39Sopenharmony_ci	b		.Lset_encrypt_key
239e1051a39Sopenharmony_ci	mov		-1, $inc
240e1051a39Sopenharmony_ci	retl
241e1051a39Sopenharmony_ci	nop
242e1051a39Sopenharmony_ci.type	aes_fx_set_decrypt_key,#function
243e1051a39Sopenharmony_ci.size	aes_fx_set_decrypt_key,.-aes_fx_set_decrypt_key
244e1051a39Sopenharmony_ci
245e1051a39Sopenharmony_ci.globl	aes_fx_set_encrypt_key
246e1051a39Sopenharmony_ci.align	32
247e1051a39Sopenharmony_ciaes_fx_set_encrypt_key:
248e1051a39Sopenharmony_ci	mov		1, $inc
249e1051a39Sopenharmony_ci	nop
250e1051a39Sopenharmony_ci.Lset_encrypt_key:
251e1051a39Sopenharmony_ci	and		$inp, 7, $tmp
252e1051a39Sopenharmony_ci	andn		$inp, 7, $inp
253e1051a39Sopenharmony_ci	sll		$tmp, 3, $tmp
254e1051a39Sopenharmony_ci	mov		%o7, %g1
255e1051a39Sopenharmony_ci
256e1051a39Sopenharmony_ci1:	call		.+8
257e1051a39Sopenharmony_ci	add		%o7, .Linp_align-1b, %o7
258e1051a39Sopenharmony_ci
259e1051a39Sopenharmony_ci	ldd		[%o7 + $tmp], %f10	! shift left params
260e1051a39Sopenharmony_ci	mov		%g1, %o7
261e1051a39Sopenharmony_ci
262e1051a39Sopenharmony_ci	cmp		$bits, 192
263e1051a39Sopenharmony_ci	ldd		[$inp + 0], %f0
264e1051a39Sopenharmony_ci	bl,pt		%icc, .L128
265e1051a39Sopenharmony_ci	ldd		[$inp + 8], %f2
266e1051a39Sopenharmony_ci
267e1051a39Sopenharmony_ci	be,pt		%icc, .L192
268e1051a39Sopenharmony_ci	ldd		[$inp + 16], %f4
269e1051a39Sopenharmony_ci	brz,pt		$tmp, .L256aligned
270e1051a39Sopenharmony_ci	ldd		[$inp + 24], %f6
271e1051a39Sopenharmony_ci
272e1051a39Sopenharmony_ci	ldd		[$inp + 32], %f8
273e1051a39Sopenharmony_ci	fshiftorx	%f0, %f2, %f10, %f0
274e1051a39Sopenharmony_ci	fshiftorx	%f2, %f4, %f10, %f2
275e1051a39Sopenharmony_ci	fshiftorx	%f4, %f6, %f10, %f4
276e1051a39Sopenharmony_ci	fshiftorx	%f6, %f8, %f10, %f6
277e1051a39Sopenharmony_ci
278e1051a39Sopenharmony_ci.L256aligned:
279e1051a39Sopenharmony_ci	mov		14, $bits
280e1051a39Sopenharmony_ci	and		$inc, `14*16`, $tmp
281e1051a39Sopenharmony_ci	st		$bits, [$out + 240]	! store rounds
282e1051a39Sopenharmony_ci	add		$out, $tmp, $out	! start or end of key schedule
283e1051a39Sopenharmony_ci	sllx		$inc, 4, $inc		! 16 or -16
284e1051a39Sopenharmony_ci___
285e1051a39Sopenharmony_cifor ($i=0; $i<6; $i++) {
286e1051a39Sopenharmony_ci    $code.=<<___;
287e1051a39Sopenharmony_ci	std		%f0, [$out + 0]
288e1051a39Sopenharmony_ci	faeskeyx	%f6, `0x10+$i`, %f0
289e1051a39Sopenharmony_ci	std		%f2, [$out + 8]
290e1051a39Sopenharmony_ci	add		$out, $inc, $out
291e1051a39Sopenharmony_ci	faeskeyx	%f0, 0x00, %f2
292e1051a39Sopenharmony_ci	std		%f4, [$out + 0]
293e1051a39Sopenharmony_ci	faeskeyx	%f2, 0x01, %f4
294e1051a39Sopenharmony_ci	std		%f6, [$out + 8]
295e1051a39Sopenharmony_ci	add		$out, $inc, $out
296e1051a39Sopenharmony_ci	faeskeyx	%f4, 0x00, %f6
297e1051a39Sopenharmony_ci___
298e1051a39Sopenharmony_ci}
299e1051a39Sopenharmony_ci$code.=<<___;
300e1051a39Sopenharmony_ci	std		%f0, [$out + 0]
301e1051a39Sopenharmony_ci	faeskeyx	%f6, `0x10+$i`, %f0
302e1051a39Sopenharmony_ci	std		%f2, [$out + 8]
303e1051a39Sopenharmony_ci	add		$out, $inc, $out
304e1051a39Sopenharmony_ci	faeskeyx	%f0, 0x00, %f2
305e1051a39Sopenharmony_ci	std		%f4,[$out + 0]
306e1051a39Sopenharmony_ci	std		%f6,[$out + 8]
307e1051a39Sopenharmony_ci	add		$out, $inc, $out
308e1051a39Sopenharmony_ci	std		%f0,[$out + 0]
309e1051a39Sopenharmony_ci	std		%f2,[$out + 8]
310e1051a39Sopenharmony_ci	retl
311e1051a39Sopenharmony_ci	xor		%o0, %o0, %o0		! return 0
312e1051a39Sopenharmony_ci
313e1051a39Sopenharmony_ci.align	16
314e1051a39Sopenharmony_ci.L192:
315e1051a39Sopenharmony_ci	brz,pt		$tmp, .L192aligned
316e1051a39Sopenharmony_ci	nop
317e1051a39Sopenharmony_ci
318e1051a39Sopenharmony_ci	ldd		[$inp + 24], %f6
319e1051a39Sopenharmony_ci	fshiftorx	%f0, %f2, %f10, %f0
320e1051a39Sopenharmony_ci	fshiftorx	%f2, %f4, %f10, %f2
321e1051a39Sopenharmony_ci	fshiftorx	%f4, %f6, %f10, %f4
322e1051a39Sopenharmony_ci
323e1051a39Sopenharmony_ci.L192aligned:
324e1051a39Sopenharmony_ci	mov		12, $bits
325e1051a39Sopenharmony_ci	and		$inc, `12*16`, $tmp
326e1051a39Sopenharmony_ci	st		$bits, [$out + 240]	! store rounds
327e1051a39Sopenharmony_ci	add		$out, $tmp, $out	! start or end of key schedule
328e1051a39Sopenharmony_ci	sllx		$inc, 4, $inc		! 16 or -16
329e1051a39Sopenharmony_ci___
330e1051a39Sopenharmony_cifor ($i=0; $i<8; $i+=2) {
331e1051a39Sopenharmony_ci    $code.=<<___;
332e1051a39Sopenharmony_ci	std		%f0, [$out + 0]
333e1051a39Sopenharmony_ci	faeskeyx	%f4, `0x10+$i`, %f0
334e1051a39Sopenharmony_ci	std		%f2, [$out + 8]
335e1051a39Sopenharmony_ci	add		$out, $inc, $out
336e1051a39Sopenharmony_ci	faeskeyx	%f0, 0x00, %f2
337e1051a39Sopenharmony_ci	std		%f4, [$out + 0]
338e1051a39Sopenharmony_ci	faeskeyx	%f2, 0x00, %f4
339e1051a39Sopenharmony_ci	std		%f0, [$out + 8]
340e1051a39Sopenharmony_ci	add		$out, $inc, $out
341e1051a39Sopenharmony_ci	faeskeyx	%f4, `0x10+$i+1`, %f0
342e1051a39Sopenharmony_ci	std		%f2, [$out + 0]
343e1051a39Sopenharmony_ci	faeskeyx	%f0, 0x00, %f2
344e1051a39Sopenharmony_ci	std		%f4, [$out + 8]
345e1051a39Sopenharmony_ci	add		$out, $inc, $out
346e1051a39Sopenharmony_ci___
347e1051a39Sopenharmony_ci$code.=<<___		if ($i<6);
348e1051a39Sopenharmony_ci	faeskeyx	%f2, 0x00, %f4
349e1051a39Sopenharmony_ci___
350e1051a39Sopenharmony_ci}
351e1051a39Sopenharmony_ci$code.=<<___;
352e1051a39Sopenharmony_ci	std		%f0, [$out + 0]
353e1051a39Sopenharmony_ci	std		%f2, [$out + 8]
354e1051a39Sopenharmony_ci	retl
355e1051a39Sopenharmony_ci	xor		%o0, %o0, %o0		! return 0
356e1051a39Sopenharmony_ci
357e1051a39Sopenharmony_ci.align	16
358e1051a39Sopenharmony_ci.L128:
359e1051a39Sopenharmony_ci	brz,pt		$tmp, .L128aligned
360e1051a39Sopenharmony_ci	nop
361e1051a39Sopenharmony_ci
362e1051a39Sopenharmony_ci	ldd		[$inp + 16], %f4
363e1051a39Sopenharmony_ci	fshiftorx	%f0, %f2, %f10, %f0
364e1051a39Sopenharmony_ci	fshiftorx	%f2, %f4, %f10, %f2
365e1051a39Sopenharmony_ci
366e1051a39Sopenharmony_ci.L128aligned:
367e1051a39Sopenharmony_ci	mov		10, $bits
368e1051a39Sopenharmony_ci	and		$inc, `10*16`, $tmp
369e1051a39Sopenharmony_ci	st		$bits, [$out + 240]	! store rounds
370e1051a39Sopenharmony_ci	add		$out, $tmp, $out	! start or end of key schedule
371e1051a39Sopenharmony_ci	sllx		$inc, 4, $inc		! 16 or -16
372e1051a39Sopenharmony_ci___
373e1051a39Sopenharmony_cifor ($i=0; $i<10; $i++) {
374e1051a39Sopenharmony_ci    $code.=<<___;
375e1051a39Sopenharmony_ci	std		%f0, [$out + 0]
376e1051a39Sopenharmony_ci	faeskeyx	%f2, `0x10+$i`, %f0
377e1051a39Sopenharmony_ci	std		%f2, [$out + 8]
378e1051a39Sopenharmony_ci	add		$out, $inc, $out
379e1051a39Sopenharmony_ci	faeskeyx	%f0, 0x00, %f2
380e1051a39Sopenharmony_ci___
381e1051a39Sopenharmony_ci}
382e1051a39Sopenharmony_ci$code.=<<___;
383e1051a39Sopenharmony_ci	std		%f0, [$out + 0]
384e1051a39Sopenharmony_ci	std		%f2, [$out + 8]
385e1051a39Sopenharmony_ci	retl
386e1051a39Sopenharmony_ci	xor		%o0, %o0, %o0		! return 0
387e1051a39Sopenharmony_ci.type	aes_fx_set_encrypt_key,#function
388e1051a39Sopenharmony_ci.size	aes_fx_set_encrypt_key,.-aes_fx_set_encrypt_key
389e1051a39Sopenharmony_ci___
390e1051a39Sopenharmony_ci}
391e1051a39Sopenharmony_ci{
392e1051a39Sopenharmony_cimy ($inp,$out,$len,$key,$ivp,$dir) = map("%i$_",(0..5));
393e1051a39Sopenharmony_cimy ($rounds,$inner,$end,$inc,$ialign,$oalign,$mask) = map("%l$_",(0..7));
394e1051a39Sopenharmony_cimy ($iv0,$iv1,$r0hi,$r0lo,$rlhi,$rllo,$in0,$in1,$intail,$outhead,$fshift)
395e1051a39Sopenharmony_ci   = map("%f$_",grep { !($_ & 1) } (16 .. 62));
396e1051a39Sopenharmony_cimy ($ileft,$iright) = ($ialign,$oalign);
397e1051a39Sopenharmony_ci
398e1051a39Sopenharmony_ci$code.=<<___;
399e1051a39Sopenharmony_ci.globl	aes_fx_cbc_encrypt
400e1051a39Sopenharmony_ci.align	32
401e1051a39Sopenharmony_ciaes_fx_cbc_encrypt:
402e1051a39Sopenharmony_ci	save		%sp, -STACK_FRAME-16, %sp
403e1051a39Sopenharmony_ci	srln		$len, 4, $len
404e1051a39Sopenharmony_ci	and		$inp, 7, $ialign
405e1051a39Sopenharmony_ci	andn		$inp, 7, $inp
406e1051a39Sopenharmony_ci	brz,pn		$len, .Lcbc_no_data
407e1051a39Sopenharmony_ci	sll		$ialign, 3, $ileft
408e1051a39Sopenharmony_ci
409e1051a39Sopenharmony_ci1:	call		.+8
410e1051a39Sopenharmony_ci	add		%o7, .Linp_align-1b, %o7
411e1051a39Sopenharmony_ci
412e1051a39Sopenharmony_ci	ld		[$key + 240], $rounds
413e1051a39Sopenharmony_ci	and		$out, 7, $oalign
414e1051a39Sopenharmony_ci	ld		[$ivp + 0], %f0		! load ivec
415e1051a39Sopenharmony_ci	andn		$out, 7, $out
416e1051a39Sopenharmony_ci	ld		[$ivp + 4], %f1
417e1051a39Sopenharmony_ci	sll		$oalign, 3, $mask
418e1051a39Sopenharmony_ci	ld		[$ivp + 8], %f2
419e1051a39Sopenharmony_ci	ld		[$ivp + 12], %f3
420e1051a39Sopenharmony_ci
421e1051a39Sopenharmony_ci	sll		$rounds, 4, $rounds
422e1051a39Sopenharmony_ci	add		$rounds, $key, $end
423e1051a39Sopenharmony_ci	ldd		[$key + 0], $r0hi	! round[0]
424e1051a39Sopenharmony_ci	ldd		[$key + 8], $r0lo
425e1051a39Sopenharmony_ci
426e1051a39Sopenharmony_ci	add		$inp, 16, $inp
427e1051a39Sopenharmony_ci	sub		$len,  1, $len
428e1051a39Sopenharmony_ci	ldd		[$end + 0], $rlhi	! round[last]
429e1051a39Sopenharmony_ci	ldd		[$end + 8], $rllo
430e1051a39Sopenharmony_ci
431e1051a39Sopenharmony_ci	mov		16, $inc
432e1051a39Sopenharmony_ci	movrz		$len, 0, $inc
433e1051a39Sopenharmony_ci	ldd		[$key + 16], %f10	! round[1]
434e1051a39Sopenharmony_ci	ldd		[$key + 24], %f12
435e1051a39Sopenharmony_ci
436e1051a39Sopenharmony_ci	ldd		[%o7 + $ileft], $fshift	! shift left params
437e1051a39Sopenharmony_ci	add		%o7, 64, %o7
438e1051a39Sopenharmony_ci	ldd		[$inp - 16], $in0	! load input
439e1051a39Sopenharmony_ci	ldd		[$inp -  8], $in1
440e1051a39Sopenharmony_ci	ldda		[$inp]0x82, $intail	! non-faulting load
441e1051a39Sopenharmony_ci	brz		$dir, .Lcbc_decrypt
442e1051a39Sopenharmony_ci	add		$inp, $inc, $inp	! inp+=16
443e1051a39Sopenharmony_ci
444e1051a39Sopenharmony_ci	fxor		$r0hi, %f0, %f0		! ivec^=round[0]
445e1051a39Sopenharmony_ci	fxor		$r0lo, %f2, %f2
446e1051a39Sopenharmony_ci	fshiftorx	$in0, $in1, $fshift, $in0
447e1051a39Sopenharmony_ci	fshiftorx	$in1, $intail, $fshift, $in1
448e1051a39Sopenharmony_ci	nop
449e1051a39Sopenharmony_ci
450e1051a39Sopenharmony_ci.Loop_cbc_enc:
451e1051a39Sopenharmony_ci	fxor		$in0, %f0, %f0		! inp^ivec^round[0]
452e1051a39Sopenharmony_ci	fxor		$in1, %f2, %f2
453e1051a39Sopenharmony_ci	ldd		[$key + 32], %f6	! round[2]
454e1051a39Sopenharmony_ci	ldd		[$key + 40], %f8
455e1051a39Sopenharmony_ci	add		$key, 32, $end
456e1051a39Sopenharmony_ci	sub		$rounds, 16*6, $inner
457e1051a39Sopenharmony_ci
458e1051a39Sopenharmony_ci.Lcbc_enc:
459e1051a39Sopenharmony_ci	fmovd		%f0, %f4
460e1051a39Sopenharmony_ci	faesencx	%f2, %f10, %f0
461e1051a39Sopenharmony_ci	faesencx	%f4, %f12, %f2
462e1051a39Sopenharmony_ci	ldd		[$end + 16], %f10
463e1051a39Sopenharmony_ci	ldd		[$end + 24], %f12
464e1051a39Sopenharmony_ci	add		$end, 32, $end
465e1051a39Sopenharmony_ci
466e1051a39Sopenharmony_ci	fmovd		%f0, %f4
467e1051a39Sopenharmony_ci	faesencx	%f2, %f6, %f0
468e1051a39Sopenharmony_ci	faesencx	%f4, %f8, %f2
469e1051a39Sopenharmony_ci	ldd		[$end + 0], %f6
470e1051a39Sopenharmony_ci	ldd		[$end + 8], %f8
471e1051a39Sopenharmony_ci
472e1051a39Sopenharmony_ci	brnz,a		$inner, .Lcbc_enc
473e1051a39Sopenharmony_ci	sub		$inner, 16*2, $inner
474e1051a39Sopenharmony_ci
475e1051a39Sopenharmony_ci	fmovd		%f0, %f4
476e1051a39Sopenharmony_ci	faesencx	%f2, %f10, %f0
477e1051a39Sopenharmony_ci	faesencx	%f4, %f12, %f2
478e1051a39Sopenharmony_ci	ldd		[$end + 16], %f10	! round[last-1]
479e1051a39Sopenharmony_ci	ldd		[$end + 24], %f12
480e1051a39Sopenharmony_ci
481e1051a39Sopenharmony_ci	movrz		$len, 0, $inc
482e1051a39Sopenharmony_ci	fmovd		$intail, $in0
483e1051a39Sopenharmony_ci	ldd		[$inp - 8], $in1	! load next input block
484e1051a39Sopenharmony_ci	ldda		[$inp]0x82, $intail	! non-faulting load
485e1051a39Sopenharmony_ci	add		$inp, $inc, $inp	! inp+=16
486e1051a39Sopenharmony_ci
487e1051a39Sopenharmony_ci	fmovd		%f0, %f4
488e1051a39Sopenharmony_ci	faesencx	%f2, %f6, %f0
489e1051a39Sopenharmony_ci	faesencx	%f4, %f8, %f2
490e1051a39Sopenharmony_ci
491e1051a39Sopenharmony_ci	fshiftorx	$in0, $in1, $fshift, $in0
492e1051a39Sopenharmony_ci	fshiftorx	$in1, $intail, $fshift, $in1
493e1051a39Sopenharmony_ci
494e1051a39Sopenharmony_ci	fmovd		%f0, %f4
495e1051a39Sopenharmony_ci	faesencx	%f2, %f10, %f0
496e1051a39Sopenharmony_ci	faesencx	%f4, %f12, %f2
497e1051a39Sopenharmony_ci	ldd		[$key + 16], %f10	! round[1]
498e1051a39Sopenharmony_ci	ldd		[$key + 24], %f12
499e1051a39Sopenharmony_ci
500e1051a39Sopenharmony_ci	fxor		$r0hi, $in0, $in0	! inp^=round[0]
501e1051a39Sopenharmony_ci	fxor		$r0lo, $in1, $in1
502e1051a39Sopenharmony_ci
503e1051a39Sopenharmony_ci	fmovd		%f0, %f4
504e1051a39Sopenharmony_ci	faesenclx	%f2, $rlhi, %f0
505e1051a39Sopenharmony_ci	faesenclx	%f4, $rllo, %f2
506e1051a39Sopenharmony_ci
507e1051a39Sopenharmony_ci	brnz,pn		$oalign, .Lcbc_enc_unaligned_out
508e1051a39Sopenharmony_ci	nop
509e1051a39Sopenharmony_ci
510e1051a39Sopenharmony_ci	std		%f0, [$out + 0]
511e1051a39Sopenharmony_ci	std		%f2, [$out + 8]
512e1051a39Sopenharmony_ci	add		$out, 16, $out
513e1051a39Sopenharmony_ci
514e1051a39Sopenharmony_ci	brnz,a		$len, .Loop_cbc_enc
515e1051a39Sopenharmony_ci	sub		$len, 1, $len
516e1051a39Sopenharmony_ci
517e1051a39Sopenharmony_ci	st		%f0, [$ivp + 0]		! output ivec
518e1051a39Sopenharmony_ci	st		%f1, [$ivp + 4]
519e1051a39Sopenharmony_ci	st		%f2, [$ivp + 8]
520e1051a39Sopenharmony_ci	st		%f3, [$ivp + 12]
521e1051a39Sopenharmony_ci
522e1051a39Sopenharmony_ci.Lcbc_no_data:
523e1051a39Sopenharmony_ci	ret
524e1051a39Sopenharmony_ci	restore
525e1051a39Sopenharmony_ci
526e1051a39Sopenharmony_ci.align	32
527e1051a39Sopenharmony_ci.Lcbc_enc_unaligned_out:
528e1051a39Sopenharmony_ci	ldd		[%o7 + $mask], $fshift	! shift right params
529e1051a39Sopenharmony_ci	mov		0xff, $mask
530e1051a39Sopenharmony_ci	srl		$mask, $oalign, $mask
531e1051a39Sopenharmony_ci	sub		%g0, $ileft, $iright
532e1051a39Sopenharmony_ci
533e1051a39Sopenharmony_ci	fshiftorx	%f0, %f0, $fshift, %f6
534e1051a39Sopenharmony_ci	fshiftorx	%f0, %f2, $fshift, %f8
535e1051a39Sopenharmony_ci
536e1051a39Sopenharmony_ci	stda		%f6, [$out + $mask]0xc0	! partial store
537e1051a39Sopenharmony_ci	orn		%g0, $mask, $mask
538e1051a39Sopenharmony_ci	std		%f8, [$out + 8]
539e1051a39Sopenharmony_ci	add		$out, 16, $out
540e1051a39Sopenharmony_ci	brz		$len, .Lcbc_enc_unaligned_out_done
541e1051a39Sopenharmony_ci	sub		$len, 1, $len
542e1051a39Sopenharmony_ci	b		.Loop_cbc_enc_unaligned_out
543e1051a39Sopenharmony_ci	nop
544e1051a39Sopenharmony_ci
545e1051a39Sopenharmony_ci.align	32
546e1051a39Sopenharmony_ci.Loop_cbc_enc_unaligned_out:
547e1051a39Sopenharmony_ci	fmovd		%f2, $outhead
548e1051a39Sopenharmony_ci	fxor		$in0, %f0, %f0		! inp^ivec^round[0]
549e1051a39Sopenharmony_ci	fxor		$in1, %f2, %f2
550e1051a39Sopenharmony_ci	ldd		[$key + 32], %f6	! round[2]
551e1051a39Sopenharmony_ci	ldd		[$key + 40], %f8
552e1051a39Sopenharmony_ci
553e1051a39Sopenharmony_ci	fmovd		%f0, %f4
554e1051a39Sopenharmony_ci	faesencx	%f2, %f10, %f0
555e1051a39Sopenharmony_ci	faesencx	%f4, %f12, %f2
556e1051a39Sopenharmony_ci	ldd		[$key + 48], %f10	! round[3]
557e1051a39Sopenharmony_ci	ldd		[$key + 56], %f12
558e1051a39Sopenharmony_ci
559e1051a39Sopenharmony_ci	ldx		[$inp - 16], %o0
560e1051a39Sopenharmony_ci	ldx		[$inp -  8], %o1
561e1051a39Sopenharmony_ci	brz		$ileft, .Lcbc_enc_aligned_inp
562e1051a39Sopenharmony_ci	movrz		$len, 0, $inc
563e1051a39Sopenharmony_ci
564e1051a39Sopenharmony_ci	ldx		[$inp], %o2
565e1051a39Sopenharmony_ci	sllx		%o0, $ileft, %o0
566e1051a39Sopenharmony_ci	srlx		%o1, $iright, %g1
567e1051a39Sopenharmony_ci	sllx		%o1, $ileft, %o1
568e1051a39Sopenharmony_ci	or		%g1, %o0, %o0
569e1051a39Sopenharmony_ci	srlx		%o2, $iright, %o2
570e1051a39Sopenharmony_ci	or		%o2, %o1, %o1
571e1051a39Sopenharmony_ci
572e1051a39Sopenharmony_ci.Lcbc_enc_aligned_inp:
573e1051a39Sopenharmony_ci	fmovd		%f0, %f4
574e1051a39Sopenharmony_ci	faesencx	%f2, %f6, %f0
575e1051a39Sopenharmony_ci	faesencx	%f4, %f8, %f2
576e1051a39Sopenharmony_ci	ldd		[$key + 64], %f6	! round[4]
577e1051a39Sopenharmony_ci	ldd		[$key + 72], %f8
578e1051a39Sopenharmony_ci	add		$key, 64, $end
579e1051a39Sopenharmony_ci	sub		$rounds, 16*8, $inner
580e1051a39Sopenharmony_ci
581e1051a39Sopenharmony_ci	stx		%o0, [%sp + LOCALS + 0]
582e1051a39Sopenharmony_ci	stx		%o1, [%sp + LOCALS + 8]
583e1051a39Sopenharmony_ci	add		$inp, $inc, $inp	! inp+=16
584e1051a39Sopenharmony_ci	nop
585e1051a39Sopenharmony_ci
586e1051a39Sopenharmony_ci.Lcbc_enc_unaligned:
587e1051a39Sopenharmony_ci	fmovd		%f0, %f4
588e1051a39Sopenharmony_ci	faesencx	%f2, %f10, %f0
589e1051a39Sopenharmony_ci	faesencx	%f4, %f12, %f2
590e1051a39Sopenharmony_ci	ldd		[$end + 16], %f10
591e1051a39Sopenharmony_ci	ldd		[$end + 24], %f12
592e1051a39Sopenharmony_ci	add		$end, 32, $end
593e1051a39Sopenharmony_ci
594e1051a39Sopenharmony_ci	fmovd		%f0, %f4
595e1051a39Sopenharmony_ci	faesencx	%f2, %f6, %f0
596e1051a39Sopenharmony_ci	faesencx	%f4, %f8, %f2
597e1051a39Sopenharmony_ci	ldd		[$end + 0], %f6
598e1051a39Sopenharmony_ci	ldd		[$end + 8], %f8
599e1051a39Sopenharmony_ci
600e1051a39Sopenharmony_ci	brnz,a		$inner, .Lcbc_enc_unaligned
601e1051a39Sopenharmony_ci	sub		$inner, 16*2, $inner
602e1051a39Sopenharmony_ci
603e1051a39Sopenharmony_ci	fmovd		%f0, %f4
604e1051a39Sopenharmony_ci	faesencx	%f2, %f10, %f0
605e1051a39Sopenharmony_ci	faesencx	%f4, %f12, %f2
606e1051a39Sopenharmony_ci	ldd		[$end + 16], %f10	! round[last-1]
607e1051a39Sopenharmony_ci	ldd		[$end + 24], %f12
608e1051a39Sopenharmony_ci
609e1051a39Sopenharmony_ci	fmovd		%f0, %f4
610e1051a39Sopenharmony_ci	faesencx	%f2, %f6, %f0
611e1051a39Sopenharmony_ci	faesencx	%f4, %f8, %f2
612e1051a39Sopenharmony_ci
613e1051a39Sopenharmony_ci	ldd		[%sp + LOCALS + 0], $in0
614e1051a39Sopenharmony_ci	ldd		[%sp + LOCALS + 8], $in1
615e1051a39Sopenharmony_ci
616e1051a39Sopenharmony_ci	fmovd		%f0, %f4
617e1051a39Sopenharmony_ci	faesencx	%f2, %f10, %f0
618e1051a39Sopenharmony_ci	faesencx	%f4, %f12, %f2
619e1051a39Sopenharmony_ci	ldd		[$key + 16], %f10	! round[1]
620e1051a39Sopenharmony_ci	ldd		[$key + 24], %f12
621e1051a39Sopenharmony_ci
622e1051a39Sopenharmony_ci	fxor		$r0hi, $in0, $in0	! inp^=round[0]
623e1051a39Sopenharmony_ci	fxor		$r0lo, $in1, $in1
624e1051a39Sopenharmony_ci
625e1051a39Sopenharmony_ci	fmovd		%f0, %f4
626e1051a39Sopenharmony_ci	faesenclx	%f2, $rlhi, %f0
627e1051a39Sopenharmony_ci	faesenclx	%f4, $rllo, %f2
628e1051a39Sopenharmony_ci
629e1051a39Sopenharmony_ci	fshiftorx	$outhead, %f0, $fshift, %f6
630e1051a39Sopenharmony_ci	fshiftorx	%f0, %f2, $fshift, %f8
631e1051a39Sopenharmony_ci	std		%f6, [$out + 0]
632e1051a39Sopenharmony_ci	std		%f8, [$out + 8]
633e1051a39Sopenharmony_ci	add		$out, 16, $out
634e1051a39Sopenharmony_ci
635e1051a39Sopenharmony_ci	brnz,a		$len, .Loop_cbc_enc_unaligned_out
636e1051a39Sopenharmony_ci	sub		$len, 1, $len
637e1051a39Sopenharmony_ci
638e1051a39Sopenharmony_ci.Lcbc_enc_unaligned_out_done:
639e1051a39Sopenharmony_ci	fshiftorx	%f2, %f2, $fshift, %f8
640e1051a39Sopenharmony_ci	stda		%f8, [$out + $mask]0xc0	! partial store
641e1051a39Sopenharmony_ci
642e1051a39Sopenharmony_ci	st		%f0, [$ivp + 0]		! output ivec
643e1051a39Sopenharmony_ci	st		%f1, [$ivp + 4]
644e1051a39Sopenharmony_ci	st		%f2, [$ivp + 8]
645e1051a39Sopenharmony_ci	st		%f3, [$ivp + 12]
646e1051a39Sopenharmony_ci
647e1051a39Sopenharmony_ci	ret
648e1051a39Sopenharmony_ci	restore
649e1051a39Sopenharmony_ci
650e1051a39Sopenharmony_ci.align	32
651e1051a39Sopenharmony_ci.Lcbc_decrypt:
652e1051a39Sopenharmony_ci	fshiftorx	$in0, $in1, $fshift, $in0
653e1051a39Sopenharmony_ci	fshiftorx	$in1, $intail, $fshift, $in1
654e1051a39Sopenharmony_ci	fmovd		%f0, $iv0
655e1051a39Sopenharmony_ci	fmovd		%f2, $iv1
656e1051a39Sopenharmony_ci
657e1051a39Sopenharmony_ci.Loop_cbc_dec:
658e1051a39Sopenharmony_ci	fxor		$in0, $r0hi, %f0	! inp^round[0]
659e1051a39Sopenharmony_ci	fxor		$in1, $r0lo, %f2
660e1051a39Sopenharmony_ci	ldd		[$key + 32], %f6	! round[2]
661e1051a39Sopenharmony_ci	ldd		[$key + 40], %f8
662e1051a39Sopenharmony_ci	add		$key, 32, $end
663e1051a39Sopenharmony_ci	sub		$rounds, 16*6, $inner
664e1051a39Sopenharmony_ci
665e1051a39Sopenharmony_ci.Lcbc_dec:
666e1051a39Sopenharmony_ci	fmovd		%f0, %f4
667e1051a39Sopenharmony_ci	faesdecx	%f2, %f10, %f0
668e1051a39Sopenharmony_ci	faesdecx	%f4, %f12, %f2
669e1051a39Sopenharmony_ci	ldd		[$end + 16], %f10
670e1051a39Sopenharmony_ci	ldd		[$end + 24], %f12
671e1051a39Sopenharmony_ci	add		$end, 32, $end
672e1051a39Sopenharmony_ci
673e1051a39Sopenharmony_ci	fmovd		%f0, %f4
674e1051a39Sopenharmony_ci	faesdecx	%f2, %f6, %f0
675e1051a39Sopenharmony_ci	faesdecx	%f4, %f8, %f2
676e1051a39Sopenharmony_ci	ldd		[$end + 0], %f6
677e1051a39Sopenharmony_ci	ldd		[$end + 8], %f8
678e1051a39Sopenharmony_ci
679e1051a39Sopenharmony_ci	brnz,a		$inner, .Lcbc_dec
680e1051a39Sopenharmony_ci	sub		$inner, 16*2, $inner
681e1051a39Sopenharmony_ci
682e1051a39Sopenharmony_ci	fmovd		%f0, %f4
683e1051a39Sopenharmony_ci	faesdecx	%f2, %f10, %f0
684e1051a39Sopenharmony_ci	faesdecx	%f4, %f12, %f2
685e1051a39Sopenharmony_ci	ldd		[$end + 16], %f10	! round[last-1]
686e1051a39Sopenharmony_ci	ldd		[$end + 24], %f12
687e1051a39Sopenharmony_ci
688e1051a39Sopenharmony_ci	fmovd		%f0, %f4
689e1051a39Sopenharmony_ci	faesdecx	%f2, %f6, %f0
690e1051a39Sopenharmony_ci	faesdecx	%f4, %f8, %f2
691e1051a39Sopenharmony_ci	fxor		$iv0, $rlhi, %f6	! ivec^round[last]
692e1051a39Sopenharmony_ci	fxor		$iv1, $rllo, %f8
693e1051a39Sopenharmony_ci	fmovd		$in0, $iv0
694e1051a39Sopenharmony_ci	fmovd		$in1, $iv1
695e1051a39Sopenharmony_ci
696e1051a39Sopenharmony_ci	movrz		$len, 0, $inc
697e1051a39Sopenharmony_ci	fmovd		$intail, $in0
698e1051a39Sopenharmony_ci	ldd		[$inp - 8], $in1	! load next input block
699e1051a39Sopenharmony_ci	ldda		[$inp]0x82, $intail	! non-faulting load
700e1051a39Sopenharmony_ci	add		$inp, $inc, $inp	! inp+=16
701e1051a39Sopenharmony_ci
702e1051a39Sopenharmony_ci	fmovd		%f0, %f4
703e1051a39Sopenharmony_ci	faesdecx	%f2, %f10, %f0
704e1051a39Sopenharmony_ci	faesdecx	%f4, %f12, %f2
705e1051a39Sopenharmony_ci	ldd		[$key + 16], %f10	! round[1]
706e1051a39Sopenharmony_ci	ldd		[$key + 24], %f12
707e1051a39Sopenharmony_ci
708e1051a39Sopenharmony_ci	fshiftorx	$in0, $in1, $fshift, $in0
709e1051a39Sopenharmony_ci	fshiftorx	$in1, $intail, $fshift, $in1
710e1051a39Sopenharmony_ci
711e1051a39Sopenharmony_ci	fmovd		%f0, %f4
712e1051a39Sopenharmony_ci	faesdeclx	%f2, %f6, %f0
713e1051a39Sopenharmony_ci	faesdeclx	%f4, %f8, %f2
714e1051a39Sopenharmony_ci
715e1051a39Sopenharmony_ci	brnz,pn		$oalign, .Lcbc_dec_unaligned_out
716e1051a39Sopenharmony_ci	nop
717e1051a39Sopenharmony_ci
718e1051a39Sopenharmony_ci	std		%f0, [$out + 0]
719e1051a39Sopenharmony_ci	std		%f2, [$out + 8]
720e1051a39Sopenharmony_ci	add		$out, 16, $out
721e1051a39Sopenharmony_ci
722e1051a39Sopenharmony_ci	brnz,a		$len, .Loop_cbc_dec
723e1051a39Sopenharmony_ci	sub		$len, 1, $len
724e1051a39Sopenharmony_ci
725e1051a39Sopenharmony_ci	st		$iv0,    [$ivp + 0]	! output ivec
726e1051a39Sopenharmony_ci	st		$iv0#lo, [$ivp + 4]
727e1051a39Sopenharmony_ci	st		$iv1,    [$ivp + 8]
728e1051a39Sopenharmony_ci	st		$iv1#lo, [$ivp + 12]
729e1051a39Sopenharmony_ci
730e1051a39Sopenharmony_ci	ret
731e1051a39Sopenharmony_ci	restore
732e1051a39Sopenharmony_ci
733e1051a39Sopenharmony_ci.align	32
734e1051a39Sopenharmony_ci.Lcbc_dec_unaligned_out:
735e1051a39Sopenharmony_ci	ldd		[%o7 + $mask], $fshift	! shift right params
736e1051a39Sopenharmony_ci	mov		0xff, $mask
737e1051a39Sopenharmony_ci	srl		$mask, $oalign, $mask
738e1051a39Sopenharmony_ci	sub		%g0, $ileft, $iright
739e1051a39Sopenharmony_ci
740e1051a39Sopenharmony_ci	fshiftorx	%f0, %f0, $fshift, %f6
741e1051a39Sopenharmony_ci	fshiftorx	%f0, %f2, $fshift, %f8
742e1051a39Sopenharmony_ci
743e1051a39Sopenharmony_ci	stda		%f6, [$out + $mask]0xc0	! partial store
744e1051a39Sopenharmony_ci	orn		%g0, $mask, $mask
745e1051a39Sopenharmony_ci	std		%f8, [$out + 8]
746e1051a39Sopenharmony_ci	add		$out, 16, $out
747e1051a39Sopenharmony_ci	brz		$len, .Lcbc_dec_unaligned_out_done
748e1051a39Sopenharmony_ci	sub		$len, 1, $len
749e1051a39Sopenharmony_ci	b		.Loop_cbc_dec_unaligned_out
750e1051a39Sopenharmony_ci	nop
751e1051a39Sopenharmony_ci
752e1051a39Sopenharmony_ci.align	32
753e1051a39Sopenharmony_ci.Loop_cbc_dec_unaligned_out:
754e1051a39Sopenharmony_ci	fmovd		%f2, $outhead
755e1051a39Sopenharmony_ci	fxor		$in0, $r0hi, %f0	! inp^round[0]
756e1051a39Sopenharmony_ci	fxor		$in1, $r0lo, %f2
757e1051a39Sopenharmony_ci	ldd		[$key + 32], %f6	! round[2]
758e1051a39Sopenharmony_ci	ldd		[$key + 40], %f8
759e1051a39Sopenharmony_ci
760e1051a39Sopenharmony_ci	fmovd		%f0, %f4
761e1051a39Sopenharmony_ci	faesdecx	%f2, %f10, %f0
762e1051a39Sopenharmony_ci	faesdecx	%f4, %f12, %f2
763e1051a39Sopenharmony_ci	ldd		[$key + 48], %f10	! round[3]
764e1051a39Sopenharmony_ci	ldd		[$key + 56], %f12
765e1051a39Sopenharmony_ci
766e1051a39Sopenharmony_ci	ldx		[$inp - 16], %o0
767e1051a39Sopenharmony_ci	ldx		[$inp - 8], %o1
768e1051a39Sopenharmony_ci	brz		$ileft, .Lcbc_dec_aligned_inp
769e1051a39Sopenharmony_ci	movrz		$len, 0, $inc
770e1051a39Sopenharmony_ci
771e1051a39Sopenharmony_ci	ldx		[$inp], %o2
772e1051a39Sopenharmony_ci	sllx		%o0, $ileft, %o0
773e1051a39Sopenharmony_ci	srlx		%o1, $iright, %g1
774e1051a39Sopenharmony_ci	sllx		%o1, $ileft, %o1
775e1051a39Sopenharmony_ci	or		%g1, %o0, %o0
776e1051a39Sopenharmony_ci	srlx		%o2, $iright, %o2
777e1051a39Sopenharmony_ci	or		%o2, %o1, %o1
778e1051a39Sopenharmony_ci
779e1051a39Sopenharmony_ci.Lcbc_dec_aligned_inp:
780e1051a39Sopenharmony_ci	fmovd		%f0, %f4
781e1051a39Sopenharmony_ci	faesdecx	%f2, %f6, %f0
782e1051a39Sopenharmony_ci	faesdecx	%f4, %f8, %f2
783e1051a39Sopenharmony_ci	ldd		[$key + 64], %f6	! round[4]
784e1051a39Sopenharmony_ci	ldd		[$key + 72], %f8
785e1051a39Sopenharmony_ci	add		$key, 64, $end
786e1051a39Sopenharmony_ci	sub		$rounds, 16*8, $inner
787e1051a39Sopenharmony_ci
788e1051a39Sopenharmony_ci	stx		%o0, [%sp + LOCALS + 0]
789e1051a39Sopenharmony_ci	stx		%o1, [%sp + LOCALS + 8]
790e1051a39Sopenharmony_ci	add		$inp, $inc, $inp	! inp+=16
791e1051a39Sopenharmony_ci	nop
792e1051a39Sopenharmony_ci
793e1051a39Sopenharmony_ci.Lcbc_dec_unaligned:
794e1051a39Sopenharmony_ci	fmovd		%f0, %f4
795e1051a39Sopenharmony_ci	faesdecx	%f2, %f10, %f0
796e1051a39Sopenharmony_ci	faesdecx	%f4, %f12, %f2
797e1051a39Sopenharmony_ci	ldd		[$end + 16], %f10
798e1051a39Sopenharmony_ci	ldd		[$end + 24], %f12
799e1051a39Sopenharmony_ci	add		$end, 32, $end
800e1051a39Sopenharmony_ci
801e1051a39Sopenharmony_ci	fmovd		%f0, %f4
802e1051a39Sopenharmony_ci	faesdecx	%f2, %f6, %f0
803e1051a39Sopenharmony_ci	faesdecx	%f4, %f8, %f2
804e1051a39Sopenharmony_ci	ldd		[$end + 0], %f6
805e1051a39Sopenharmony_ci	ldd		[$end + 8], %f8
806e1051a39Sopenharmony_ci
807e1051a39Sopenharmony_ci	brnz,a		$inner, .Lcbc_dec_unaligned
808e1051a39Sopenharmony_ci	sub		$inner, 16*2, $inner
809e1051a39Sopenharmony_ci
810e1051a39Sopenharmony_ci	fmovd		%f0, %f4
811e1051a39Sopenharmony_ci	faesdecx	%f2, %f10, %f0
812e1051a39Sopenharmony_ci	faesdecx	%f4, %f12, %f2
813e1051a39Sopenharmony_ci	ldd		[$end + 16], %f10	! round[last-1]
814e1051a39Sopenharmony_ci	ldd		[$end + 24], %f12
815e1051a39Sopenharmony_ci
816e1051a39Sopenharmony_ci	fmovd		%f0, %f4
817e1051a39Sopenharmony_ci	faesdecx	%f2, %f6, %f0
818e1051a39Sopenharmony_ci	faesdecx	%f4, %f8, %f2
819e1051a39Sopenharmony_ci
820e1051a39Sopenharmony_ci	fxor		$iv0, $rlhi, %f6	! ivec^round[last]
821e1051a39Sopenharmony_ci	fxor		$iv1, $rllo, %f8
822e1051a39Sopenharmony_ci	fmovd		$in0, $iv0
823e1051a39Sopenharmony_ci	fmovd		$in1, $iv1
824e1051a39Sopenharmony_ci	ldd		[%sp + LOCALS + 0], $in0
825e1051a39Sopenharmony_ci	ldd		[%sp + LOCALS + 8], $in1
826e1051a39Sopenharmony_ci
827e1051a39Sopenharmony_ci	fmovd		%f0, %f4
828e1051a39Sopenharmony_ci	faesdecx	%f2, %f10, %f0
829e1051a39Sopenharmony_ci	faesdecx	%f4, %f12, %f2
830e1051a39Sopenharmony_ci	ldd		[$key + 16], %f10	! round[1]
831e1051a39Sopenharmony_ci	ldd		[$key + 24], %f12
832e1051a39Sopenharmony_ci
833e1051a39Sopenharmony_ci	fmovd		%f0, %f4
834e1051a39Sopenharmony_ci	faesdeclx	%f2, %f6, %f0
835e1051a39Sopenharmony_ci	faesdeclx	%f4, %f8, %f2
836e1051a39Sopenharmony_ci
837e1051a39Sopenharmony_ci	fshiftorx	$outhead, %f0, $fshift, %f6
838e1051a39Sopenharmony_ci	fshiftorx	%f0, %f2, $fshift, %f8
839e1051a39Sopenharmony_ci	std		%f6, [$out + 0]
840e1051a39Sopenharmony_ci	std		%f8, [$out + 8]
841e1051a39Sopenharmony_ci	add		$out, 16, $out
842e1051a39Sopenharmony_ci
843e1051a39Sopenharmony_ci	brnz,a		$len, .Loop_cbc_dec_unaligned_out
844e1051a39Sopenharmony_ci	sub		$len, 1, $len
845e1051a39Sopenharmony_ci
846e1051a39Sopenharmony_ci.Lcbc_dec_unaligned_out_done:
847e1051a39Sopenharmony_ci	fshiftorx	%f2, %f2, $fshift, %f8
848e1051a39Sopenharmony_ci	stda		%f8, [$out + $mask]0xc0	! partial store
849e1051a39Sopenharmony_ci
850e1051a39Sopenharmony_ci	st		$iv0,    [$ivp + 0]	! output ivec
851e1051a39Sopenharmony_ci	st		$iv0#lo, [$ivp + 4]
852e1051a39Sopenharmony_ci	st		$iv1,    [$ivp + 8]
853e1051a39Sopenharmony_ci	st		$iv1#lo, [$ivp + 12]
854e1051a39Sopenharmony_ci
855e1051a39Sopenharmony_ci	ret
856e1051a39Sopenharmony_ci	restore
857e1051a39Sopenharmony_ci.type	aes_fx_cbc_encrypt,#function
858e1051a39Sopenharmony_ci.size	aes_fx_cbc_encrypt,.-aes_fx_cbc_encrypt
859e1051a39Sopenharmony_ci___
860e1051a39Sopenharmony_ci}
861e1051a39Sopenharmony_ci{
862e1051a39Sopenharmony_cimy ($inp,$out,$len,$key,$ivp) = map("%i$_",(0..5));
863e1051a39Sopenharmony_cimy ($rounds,$inner,$end,$inc,$ialign,$oalign,$mask) = map("%l$_",(0..7));
864e1051a39Sopenharmony_cimy ($ctr0,$ctr1,$r0hi,$r0lo,$rlhi,$rllo,$in0,$in1,$intail,$outhead,$fshift)
865e1051a39Sopenharmony_ci   = map("%f$_",grep { !($_ & 1) } (16 .. 62));
866e1051a39Sopenharmony_cimy ($ileft,$iright) = ($ialign, $oalign);
867e1051a39Sopenharmony_cimy $one = "%f14";
868e1051a39Sopenharmony_ci
869e1051a39Sopenharmony_ci$code.=<<___;
870e1051a39Sopenharmony_ci.globl	aes_fx_ctr32_encrypt_blocks
871e1051a39Sopenharmony_ci.align	32
872e1051a39Sopenharmony_ciaes_fx_ctr32_encrypt_blocks:
873e1051a39Sopenharmony_ci	save		%sp, -STACK_FRAME-16, %sp
874e1051a39Sopenharmony_ci	srln		$len, 0, $len
875e1051a39Sopenharmony_ci	and		$inp, 7, $ialign
876e1051a39Sopenharmony_ci	andn		$inp, 7, $inp
877e1051a39Sopenharmony_ci	brz,pn		$len, .Lctr32_no_data
878e1051a39Sopenharmony_ci	sll		$ialign, 3, $ileft
879e1051a39Sopenharmony_ci
880e1051a39Sopenharmony_ci.Lpic:	call		.+8
881e1051a39Sopenharmony_ci	add		%o7, .Linp_align - .Lpic, %o7
882e1051a39Sopenharmony_ci
883e1051a39Sopenharmony_ci	ld		[$key + 240], $rounds
884e1051a39Sopenharmony_ci	and		$out, 7, $oalign
885e1051a39Sopenharmony_ci	ld		[$ivp +  0], $ctr0	! load counter
886e1051a39Sopenharmony_ci	andn		$out, 7, $out
887e1051a39Sopenharmony_ci	ld		[$ivp +  4], $ctr0#lo
888e1051a39Sopenharmony_ci	sll		$oalign, 3, $mask
889e1051a39Sopenharmony_ci	ld		[$ivp +  8], $ctr1
890e1051a39Sopenharmony_ci	ld		[$ivp + 12], $ctr1#lo
891e1051a39Sopenharmony_ci	ldd		[%o7 + 128], $one
892e1051a39Sopenharmony_ci
893e1051a39Sopenharmony_ci	sll		$rounds, 4, $rounds
894e1051a39Sopenharmony_ci	add		$rounds, $key, $end
895e1051a39Sopenharmony_ci	ldd		[$key + 0], $r0hi	! round[0]
896e1051a39Sopenharmony_ci	ldd		[$key + 8], $r0lo
897e1051a39Sopenharmony_ci
898e1051a39Sopenharmony_ci	add		$inp, 16, $inp
899e1051a39Sopenharmony_ci	sub		$len, 1, $len
900e1051a39Sopenharmony_ci	ldd		[$key + 16], %f10	! round[1]
901e1051a39Sopenharmony_ci	ldd		[$key + 24], %f12
902e1051a39Sopenharmony_ci
903e1051a39Sopenharmony_ci	mov		16, $inc
904e1051a39Sopenharmony_ci	movrz		$len, 0, $inc
905e1051a39Sopenharmony_ci	ldd		[$end + 0], $rlhi	! round[last]
906e1051a39Sopenharmony_ci	ldd		[$end + 8], $rllo
907e1051a39Sopenharmony_ci
908e1051a39Sopenharmony_ci	ldd		[%o7 + $ileft], $fshift	! shiftleft params
909e1051a39Sopenharmony_ci	add		%o7, 64, %o7
910e1051a39Sopenharmony_ci	ldd		[$inp - 16], $in0	! load input
911e1051a39Sopenharmony_ci	ldd		[$inp -  8], $in1
912e1051a39Sopenharmony_ci	ldda		[$inp]0x82, $intail	! non-faulting load
913e1051a39Sopenharmony_ci	add		$inp, $inc, $inp	! inp+=16
914e1051a39Sopenharmony_ci
915e1051a39Sopenharmony_ci	fshiftorx	$in0, $in1, $fshift, $in0
916e1051a39Sopenharmony_ci	fshiftorx	$in1, $intail, $fshift, $in1
917e1051a39Sopenharmony_ci
918e1051a39Sopenharmony_ci.Loop_ctr32:
919e1051a39Sopenharmony_ci	fxor		$ctr0, $r0hi, %f0	! counter^round[0]
920e1051a39Sopenharmony_ci	fxor		$ctr1, $r0lo, %f2
921e1051a39Sopenharmony_ci	ldd		[$key + 32], %f6	! round[2]
922e1051a39Sopenharmony_ci	ldd		[$key + 40], %f8
923e1051a39Sopenharmony_ci	add		$key, 32, $end
924e1051a39Sopenharmony_ci	sub		$rounds, 16*6, $inner
925e1051a39Sopenharmony_ci
926e1051a39Sopenharmony_ci.Lctr32_enc:
927e1051a39Sopenharmony_ci	fmovd		%f0, %f4
928e1051a39Sopenharmony_ci	faesencx	%f2, %f10, %f0
929e1051a39Sopenharmony_ci	faesencx	%f4, %f12, %f2
930e1051a39Sopenharmony_ci	ldd		[$end + 16], %f10
931e1051a39Sopenharmony_ci	ldd		[$end + 24], %f12
932e1051a39Sopenharmony_ci	add		$end, 32, $end
933e1051a39Sopenharmony_ci
934e1051a39Sopenharmony_ci	fmovd		%f0, %f4
935e1051a39Sopenharmony_ci	faesencx	%f2, %f6, %f0
936e1051a39Sopenharmony_ci	faesencx	%f4, %f8, %f2
937e1051a39Sopenharmony_ci	ldd		[$end + 0], %f6
938e1051a39Sopenharmony_ci	ldd		[$end + 8], %f8
939e1051a39Sopenharmony_ci
940e1051a39Sopenharmony_ci	brnz,a		$inner, .Lctr32_enc
941e1051a39Sopenharmony_ci	sub		$inner, 16*2, $inner
942e1051a39Sopenharmony_ci
943e1051a39Sopenharmony_ci	fmovd		%f0, %f4
944e1051a39Sopenharmony_ci	faesencx	%f2, %f10, %f0
945e1051a39Sopenharmony_ci	faesencx	%f4, %f12, %f2
946e1051a39Sopenharmony_ci	ldd		[$end + 16], %f10	! round[last-1]
947e1051a39Sopenharmony_ci	ldd		[$end + 24], %f12
948e1051a39Sopenharmony_ci
949e1051a39Sopenharmony_ci	fmovd		%f0, %f4
950e1051a39Sopenharmony_ci	faesencx	%f2, %f6, %f0
951e1051a39Sopenharmony_ci	faesencx	%f4, %f8, %f2
952e1051a39Sopenharmony_ci	fxor		$in0, $rlhi, %f6	! inp^round[last]
953e1051a39Sopenharmony_ci	fxor		$in1, $rllo, %f8
954e1051a39Sopenharmony_ci
955e1051a39Sopenharmony_ci	movrz		$len, 0, $inc
956e1051a39Sopenharmony_ci	fmovd		$intail, $in0
957e1051a39Sopenharmony_ci	ldd		[$inp - 8], $in1	! load next input block
958e1051a39Sopenharmony_ci	ldda		[$inp]0x82, $intail	! non-faulting load
959e1051a39Sopenharmony_ci	add		$inp, $inc, $inp	! inp+=16
960e1051a39Sopenharmony_ci
961e1051a39Sopenharmony_ci	fmovd		%f0, %f4
962e1051a39Sopenharmony_ci	faesencx	%f2, %f10, %f0
963e1051a39Sopenharmony_ci	faesencx	%f4, %f12, %f2
964e1051a39Sopenharmony_ci	ldd		[$key + 16], %f10	! round[1]
965e1051a39Sopenharmony_ci	ldd		[$key + 24], %f12
966e1051a39Sopenharmony_ci
967e1051a39Sopenharmony_ci	fshiftorx	$in0, $in1, $fshift, $in0
968e1051a39Sopenharmony_ci	fshiftorx	$in1, $intail, $fshift, $in1
969e1051a39Sopenharmony_ci	fpadd32		$ctr1, $one, $ctr1	! increment counter
970e1051a39Sopenharmony_ci
971e1051a39Sopenharmony_ci	fmovd		%f0, %f4
972e1051a39Sopenharmony_ci	faesenclx	%f2, %f6, %f0
973e1051a39Sopenharmony_ci	faesenclx	%f4, %f8, %f2
974e1051a39Sopenharmony_ci
975e1051a39Sopenharmony_ci	brnz,pn		$oalign, .Lctr32_unaligned_out
976e1051a39Sopenharmony_ci	nop
977e1051a39Sopenharmony_ci
978e1051a39Sopenharmony_ci	std		%f0, [$out + 0]
979e1051a39Sopenharmony_ci	std		%f2, [$out + 8]
980e1051a39Sopenharmony_ci	add		$out, 16, $out
981e1051a39Sopenharmony_ci
982e1051a39Sopenharmony_ci	brnz,a		$len, .Loop_ctr32
983e1051a39Sopenharmony_ci	sub		$len, 1, $len
984e1051a39Sopenharmony_ci
985e1051a39Sopenharmony_ci.Lctr32_no_data:
986e1051a39Sopenharmony_ci	ret
987e1051a39Sopenharmony_ci	restore
988e1051a39Sopenharmony_ci
989e1051a39Sopenharmony_ci.align	32
990e1051a39Sopenharmony_ci.Lctr32_unaligned_out:
991e1051a39Sopenharmony_ci	ldd		[%o7 + $mask], $fshift	! shift right params
992e1051a39Sopenharmony_ci	mov		0xff, $mask
993e1051a39Sopenharmony_ci	srl		$mask, $oalign, $mask
994e1051a39Sopenharmony_ci	sub		%g0, $ileft, $iright
995e1051a39Sopenharmony_ci
996e1051a39Sopenharmony_ci	fshiftorx	%f0, %f0, $fshift, %f6
997e1051a39Sopenharmony_ci	fshiftorx	%f0, %f2, $fshift, %f8
998e1051a39Sopenharmony_ci
999e1051a39Sopenharmony_ci	stda		%f6, [$out + $mask]0xc0	! partial store
1000e1051a39Sopenharmony_ci	orn		%g0, $mask, $mask
1001e1051a39Sopenharmony_ci	std		%f8, [$out + 8]
1002e1051a39Sopenharmony_ci	add		$out, 16, $out
1003e1051a39Sopenharmony_ci	brz		$len, .Lctr32_unaligned_out_done
1004e1051a39Sopenharmony_ci	sub		$len, 1, $len
1005e1051a39Sopenharmony_ci	b		.Loop_ctr32_unaligned_out
1006e1051a39Sopenharmony_ci	nop
1007e1051a39Sopenharmony_ci
1008e1051a39Sopenharmony_ci.align	32
1009e1051a39Sopenharmony_ci.Loop_ctr32_unaligned_out:
1010e1051a39Sopenharmony_ci	fmovd		%f2, $outhead
1011e1051a39Sopenharmony_ci	fxor		$ctr0, $r0hi, %f0	! counter^round[0]
1012e1051a39Sopenharmony_ci	fxor		$ctr1, $r0lo, %f2
1013e1051a39Sopenharmony_ci	ldd		[$key + 32], %f6	! round[2]
1014e1051a39Sopenharmony_ci	ldd		[$key + 40], %f8
1015e1051a39Sopenharmony_ci
1016e1051a39Sopenharmony_ci	fmovd		%f0, %f4
1017e1051a39Sopenharmony_ci	faesencx	%f2, %f10, %f0
1018e1051a39Sopenharmony_ci	faesencx	%f4, %f12, %f2
1019e1051a39Sopenharmony_ci	ldd		[$key + 48], %f10	! round[3]
1020e1051a39Sopenharmony_ci	ldd		[$key + 56], %f12
1021e1051a39Sopenharmony_ci
1022e1051a39Sopenharmony_ci	ldx		[$inp - 16], %o0
1023e1051a39Sopenharmony_ci	ldx		[$inp -  8], %o1
1024e1051a39Sopenharmony_ci	brz		$ileft, .Lctr32_aligned_inp
1025e1051a39Sopenharmony_ci	movrz		$len, 0, $inc
1026e1051a39Sopenharmony_ci
1027e1051a39Sopenharmony_ci	ldx		[$inp], %o2
1028e1051a39Sopenharmony_ci	sllx		%o0, $ileft, %o0
1029e1051a39Sopenharmony_ci	srlx		%o1, $iright, %g1
1030e1051a39Sopenharmony_ci	sllx		%o1, $ileft, %o1
1031e1051a39Sopenharmony_ci	or		%g1, %o0, %o0
1032e1051a39Sopenharmony_ci	srlx		%o2, $iright, %o2
1033e1051a39Sopenharmony_ci	or		%o2, %o1, %o1
1034e1051a39Sopenharmony_ci
1035e1051a39Sopenharmony_ci.Lctr32_aligned_inp:
1036e1051a39Sopenharmony_ci	fmovd		%f0, %f4
1037e1051a39Sopenharmony_ci	faesencx	%f2, %f6, %f0
1038e1051a39Sopenharmony_ci	faesencx	%f4, %f8, %f2
1039e1051a39Sopenharmony_ci	ldd		[$key + 64], %f6	! round[4]
1040e1051a39Sopenharmony_ci	ldd		[$key + 72], %f8
1041e1051a39Sopenharmony_ci	add		$key, 64, $end
1042e1051a39Sopenharmony_ci	sub		$rounds, 16*8, $inner
1043e1051a39Sopenharmony_ci
1044e1051a39Sopenharmony_ci	stx		%o0, [%sp + LOCALS + 0]
1045e1051a39Sopenharmony_ci	stx		%o1, [%sp + LOCALS + 8]
1046e1051a39Sopenharmony_ci	add		$inp, $inc, $inp	! inp+=16
1047e1051a39Sopenharmony_ci	nop
1048e1051a39Sopenharmony_ci
1049e1051a39Sopenharmony_ci.Lctr32_enc_unaligned:
1050e1051a39Sopenharmony_ci	fmovd		%f0, %f4
1051e1051a39Sopenharmony_ci	faesencx	%f2, %f10, %f0
1052e1051a39Sopenharmony_ci	faesencx	%f4, %f12, %f2
1053e1051a39Sopenharmony_ci	ldd		[$end + 16], %f10
1054e1051a39Sopenharmony_ci	ldd		[$end + 24], %f12
1055e1051a39Sopenharmony_ci	add		$end, 32, $end
1056e1051a39Sopenharmony_ci
1057e1051a39Sopenharmony_ci	fmovd		%f0, %f4
1058e1051a39Sopenharmony_ci	faesencx	%f2, %f6, %f0
1059e1051a39Sopenharmony_ci	faesencx	%f4, %f8, %f2
1060e1051a39Sopenharmony_ci	ldd		[$end + 0], %f6
1061e1051a39Sopenharmony_ci	ldd		[$end + 8], %f8
1062e1051a39Sopenharmony_ci
1063e1051a39Sopenharmony_ci	brnz,a		$inner, .Lctr32_enc_unaligned
1064e1051a39Sopenharmony_ci	sub		$inner, 16*2, $inner
1065e1051a39Sopenharmony_ci
1066e1051a39Sopenharmony_ci	fmovd		%f0, %f4
1067e1051a39Sopenharmony_ci	faesencx	%f2, %f10, %f0
1068e1051a39Sopenharmony_ci	faesencx	%f4, %f12, %f2
1069e1051a39Sopenharmony_ci	ldd		[$end + 16], %f10	! round[last-1]
1070e1051a39Sopenharmony_ci	ldd		[$end + 24], %f12
1071e1051a39Sopenharmony_ci	fpadd32		$ctr1, $one, $ctr1	! increment counter
1072e1051a39Sopenharmony_ci
1073e1051a39Sopenharmony_ci	fmovd		%f0, %f4
1074e1051a39Sopenharmony_ci	faesencx	%f2, %f6, %f0
1075e1051a39Sopenharmony_ci	faesencx	%f4, %f8, %f2
1076e1051a39Sopenharmony_ci	fxor		$in0, $rlhi, %f6	! inp^round[last]
1077e1051a39Sopenharmony_ci	fxor		$in1, $rllo, %f8
1078e1051a39Sopenharmony_ci	ldd		[%sp + LOCALS + 0], $in0
1079e1051a39Sopenharmony_ci	ldd		[%sp + LOCALS + 8], $in1
1080e1051a39Sopenharmony_ci
1081e1051a39Sopenharmony_ci	fmovd		%f0, %f4
1082e1051a39Sopenharmony_ci	faesencx	%f2, %f10, %f0
1083e1051a39Sopenharmony_ci	faesencx	%f4, %f12, %f2
1084e1051a39Sopenharmony_ci	ldd		[$key + 16], %f10	! round[1]
1085e1051a39Sopenharmony_ci	ldd		[$key + 24], %f12
1086e1051a39Sopenharmony_ci
1087e1051a39Sopenharmony_ci	fmovd		%f0, %f4
1088e1051a39Sopenharmony_ci	faesenclx	%f2, %f6, %f0
1089e1051a39Sopenharmony_ci	faesenclx	%f4, %f8, %f2
1090e1051a39Sopenharmony_ci
1091e1051a39Sopenharmony_ci	fshiftorx	$outhead, %f0, $fshift, %f6
1092e1051a39Sopenharmony_ci	fshiftorx	%f0, %f2, $fshift, %f8
1093e1051a39Sopenharmony_ci	std		%f6, [$out + 0]
1094e1051a39Sopenharmony_ci	std		%f8, [$out + 8]
1095e1051a39Sopenharmony_ci	add		$out, 16, $out
1096e1051a39Sopenharmony_ci
1097e1051a39Sopenharmony_ci	brnz,a		$len, .Loop_ctr32_unaligned_out
1098e1051a39Sopenharmony_ci	sub		$len, 1, $len
1099e1051a39Sopenharmony_ci
1100e1051a39Sopenharmony_ci.Lctr32_unaligned_out_done:
1101e1051a39Sopenharmony_ci	fshiftorx	%f2, %f2, $fshift, %f8
1102e1051a39Sopenharmony_ci	stda		%f8, [$out + $mask]0xc0	! partial store
1103e1051a39Sopenharmony_ci
1104e1051a39Sopenharmony_ci	ret
1105e1051a39Sopenharmony_ci	restore
1106e1051a39Sopenharmony_ci.type	aes_fx_ctr32_encrypt_blocks,#function
1107e1051a39Sopenharmony_ci.size	aes_fx_ctr32_encrypt_blocks,.-aes_fx_ctr32_encrypt_blocks
1108e1051a39Sopenharmony_ci
1109e1051a39Sopenharmony_ci.align	32
1110e1051a39Sopenharmony_ci.Linp_align:		! fshiftorx parameters for left shift toward %rs1
1111e1051a39Sopenharmony_ci	.byte	0, 0, 64,  0,	0, 64,  0, -64
1112e1051a39Sopenharmony_ci	.byte	0, 0, 56,  8,	0, 56,  8, -56
1113e1051a39Sopenharmony_ci	.byte	0, 0, 48, 16,	0, 48, 16, -48
1114e1051a39Sopenharmony_ci	.byte	0, 0, 40, 24,	0, 40, 24, -40
1115e1051a39Sopenharmony_ci	.byte	0, 0, 32, 32,	0, 32, 32, -32
1116e1051a39Sopenharmony_ci	.byte	0, 0, 24, 40,	0, 24, 40, -24
1117e1051a39Sopenharmony_ci	.byte	0, 0, 16, 48,	0, 16, 48, -16
1118e1051a39Sopenharmony_ci	.byte	0, 0,  8, 56,	0,  8, 56, -8
1119e1051a39Sopenharmony_ci.Lout_align:		! fshiftorx parameters for right shift toward %rs2
1120e1051a39Sopenharmony_ci	.byte	0, 0,  0, 64,	0,  0, 64,   0
1121e1051a39Sopenharmony_ci	.byte	0, 0,  8, 56,	0,  8, 56,  -8
1122e1051a39Sopenharmony_ci	.byte	0, 0, 16, 48,	0, 16, 48, -16
1123e1051a39Sopenharmony_ci	.byte	0, 0, 24, 40,	0, 24, 40, -24
1124e1051a39Sopenharmony_ci	.byte	0, 0, 32, 32,	0, 32, 32, -32
1125e1051a39Sopenharmony_ci	.byte	0, 0, 40, 24,	0, 40, 24, -40
1126e1051a39Sopenharmony_ci	.byte	0, 0, 48, 16,	0, 48, 16, -48
1127e1051a39Sopenharmony_ci	.byte	0, 0, 56,  8,	0, 56,  8, -56
1128e1051a39Sopenharmony_ci.Lone:
1129e1051a39Sopenharmony_ci	.word	0, 1
1130e1051a39Sopenharmony_ci.asciz	"AES for Fujitsu SPARC64 X, CRYPTOGAMS by <appro\@openssl.org>"
1131e1051a39Sopenharmony_ci.align	4
1132e1051a39Sopenharmony_ci___
1133e1051a39Sopenharmony_ci}
1134e1051a39Sopenharmony_ci# Purpose of these subroutines is to explicitly encode VIS instructions,
1135e1051a39Sopenharmony_ci# so that one can compile the module without having to specify VIS
1136e1051a39Sopenharmony_ci# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
1137e1051a39Sopenharmony_ci# Idea is to reserve for option to produce "universal" binary and let
1138e1051a39Sopenharmony_ci# programmer detect if current CPU is VIS capable at run-time.
1139e1051a39Sopenharmony_cisub unvis {
1140e1051a39Sopenharmony_cimy ($mnemonic,$rs1,$rs2,$rd)=@_;
1141e1051a39Sopenharmony_cimy ($ref,$opf);
1142e1051a39Sopenharmony_cimy %visopf = (	"faligndata"	=> 0x048,
1143e1051a39Sopenharmony_ci		"bshuffle"	=> 0x04c,
1144e1051a39Sopenharmony_ci		"fpadd32"	=> 0x052,
1145e1051a39Sopenharmony_ci		"fxor"		=> 0x06c,
1146e1051a39Sopenharmony_ci		"fsrc2"		=> 0x078	);
1147e1051a39Sopenharmony_ci
1148e1051a39Sopenharmony_ci    $ref = "$mnemonic\t$rs1,$rs2,$rd";
1149e1051a39Sopenharmony_ci
1150e1051a39Sopenharmony_ci    if ($opf=$visopf{$mnemonic}) {
1151e1051a39Sopenharmony_ci	foreach ($rs1,$rs2,$rd) {
1152e1051a39Sopenharmony_ci	    return $ref if (!/%f([0-9]{1,2})/);
1153e1051a39Sopenharmony_ci	    $_=$1;
1154e1051a39Sopenharmony_ci	    if ($1>=32) {
1155e1051a39Sopenharmony_ci		return $ref if ($1&1);
1156e1051a39Sopenharmony_ci		# re-encode for upper double register addressing
1157e1051a39Sopenharmony_ci		$_=($1|$1>>5)&31;
1158e1051a39Sopenharmony_ci	    }
1159e1051a39Sopenharmony_ci	}
1160e1051a39Sopenharmony_ci
1161e1051a39Sopenharmony_ci	return	sprintf ".word\t0x%08x !%s",
1162e1051a39Sopenharmony_ci			0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
1163e1051a39Sopenharmony_ci			$ref;
1164e1051a39Sopenharmony_ci    } else {
1165e1051a39Sopenharmony_ci	return $ref;
1166e1051a39Sopenharmony_ci    }
1167e1051a39Sopenharmony_ci}
1168e1051a39Sopenharmony_ci
1169e1051a39Sopenharmony_cisub unvis3 {
1170e1051a39Sopenharmony_cimy ($mnemonic,$rs1,$rs2,$rd)=@_;
1171e1051a39Sopenharmony_cimy %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
1172e1051a39Sopenharmony_cimy ($ref,$opf);
1173e1051a39Sopenharmony_cimy %visopf = (	"alignaddr"	=> 0x018,
1174e1051a39Sopenharmony_ci		"bmask"		=> 0x019,
1175e1051a39Sopenharmony_ci		"alignaddrl"	=> 0x01a	);
1176e1051a39Sopenharmony_ci
1177e1051a39Sopenharmony_ci    $ref = "$mnemonic\t$rs1,$rs2,$rd";
1178e1051a39Sopenharmony_ci
1179e1051a39Sopenharmony_ci    if ($opf=$visopf{$mnemonic}) {
1180e1051a39Sopenharmony_ci	foreach ($rs1,$rs2,$rd) {
1181e1051a39Sopenharmony_ci	    return $ref if (!/%([goli])([0-9])/);
1182e1051a39Sopenharmony_ci	    $_=$bias{$1}+$2;
1183e1051a39Sopenharmony_ci	}
1184e1051a39Sopenharmony_ci
1185e1051a39Sopenharmony_ci	return	sprintf ".word\t0x%08x !%s",
1186e1051a39Sopenharmony_ci			0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
1187e1051a39Sopenharmony_ci			$ref;
1188e1051a39Sopenharmony_ci    } else {
1189e1051a39Sopenharmony_ci	return $ref;
1190e1051a39Sopenharmony_ci    }
1191e1051a39Sopenharmony_ci}
1192e1051a39Sopenharmony_ci
1193e1051a39Sopenharmony_cisub unfx {
1194e1051a39Sopenharmony_cimy ($mnemonic,$rs1,$rs2,$rd)=@_;
1195e1051a39Sopenharmony_cimy ($ref,$opf);
1196e1051a39Sopenharmony_cimy %aesopf = (	"faesencx"	=> 0x90,
1197e1051a39Sopenharmony_ci		"faesdecx"	=> 0x91,
1198e1051a39Sopenharmony_ci		"faesenclx"	=> 0x92,
1199e1051a39Sopenharmony_ci		"faesdeclx"	=> 0x93,
1200e1051a39Sopenharmony_ci		"faeskeyx"	=> 0x94	);
1201e1051a39Sopenharmony_ci
1202e1051a39Sopenharmony_ci    $ref = "$mnemonic\t$rs1,$rs2,$rd";
1203e1051a39Sopenharmony_ci
1204e1051a39Sopenharmony_ci    if (defined($opf=$aesopf{$mnemonic})) {
1205e1051a39Sopenharmony_ci	$rs2 = ($rs2 =~ /%f([0-6]*[02468])/) ? (($1|$1>>5)&31) : $rs2;
1206e1051a39Sopenharmony_ci	$rs2 = oct($rs2) if ($rs2 =~ /^0/);
1207e1051a39Sopenharmony_ci
1208e1051a39Sopenharmony_ci	foreach ($rs1,$rd) {
1209e1051a39Sopenharmony_ci	    return $ref if (!/%f([0-9]{1,2})/);
1210e1051a39Sopenharmony_ci	    $_=$1;
1211e1051a39Sopenharmony_ci	    if ($1>=32) {
1212e1051a39Sopenharmony_ci		return $ref if ($1&1);
1213e1051a39Sopenharmony_ci		# re-encode for upper double register addressing
1214e1051a39Sopenharmony_ci		$_=($1|$1>>5)&31;
1215e1051a39Sopenharmony_ci	    }
1216e1051a39Sopenharmony_ci	}
1217e1051a39Sopenharmony_ci
1218e1051a39Sopenharmony_ci	return	sprintf ".word\t0x%08x !%s",
1219e1051a39Sopenharmony_ci			2<<30|$rd<<25|0x36<<19|$rs1<<14|$opf<<5|$rs2,
1220e1051a39Sopenharmony_ci			$ref;
1221e1051a39Sopenharmony_ci    } else {
1222e1051a39Sopenharmony_ci	return $ref;
1223e1051a39Sopenharmony_ci    }
1224e1051a39Sopenharmony_ci}
1225e1051a39Sopenharmony_ci
1226e1051a39Sopenharmony_cisub unfx3src {
1227e1051a39Sopenharmony_cimy ($mnemonic,$rs1,$rs2,$rs3,$rd)=@_;
1228e1051a39Sopenharmony_cimy ($ref,$opf);
1229e1051a39Sopenharmony_cimy %aesopf = (	"fshiftorx"	=> 0x0b	);
1230e1051a39Sopenharmony_ci
1231e1051a39Sopenharmony_ci    $ref = "$mnemonic\t$rs1,$rs2,$rs3,$rd";
1232e1051a39Sopenharmony_ci
1233e1051a39Sopenharmony_ci    if (defined($opf=$aesopf{$mnemonic})) {
1234e1051a39Sopenharmony_ci	foreach ($rs1,$rs2,$rs3,$rd) {
1235e1051a39Sopenharmony_ci	    return $ref if (!/%f([0-9]{1,2})/);
1236e1051a39Sopenharmony_ci	    $_=$1;
1237e1051a39Sopenharmony_ci	    if ($1>=32) {
1238e1051a39Sopenharmony_ci		return $ref if ($1&1);
1239e1051a39Sopenharmony_ci		# re-encode for upper double register addressing
1240e1051a39Sopenharmony_ci		$_=($1|$1>>5)&31;
1241e1051a39Sopenharmony_ci	    }
1242e1051a39Sopenharmony_ci	}
1243e1051a39Sopenharmony_ci
1244e1051a39Sopenharmony_ci	return	sprintf ".word\t0x%08x !%s",
1245e1051a39Sopenharmony_ci			2<<30|$rd<<25|0x37<<19|$rs1<<14|$rs3<<9|$opf<<5|$rs2,
1246e1051a39Sopenharmony_ci			$ref;
1247e1051a39Sopenharmony_ci    } else {
1248e1051a39Sopenharmony_ci	return $ref;
1249e1051a39Sopenharmony_ci    }
1250e1051a39Sopenharmony_ci}
1251e1051a39Sopenharmony_ci
1252e1051a39Sopenharmony_ciforeach (split("\n",$code)) {
1253e1051a39Sopenharmony_ci    s/\`([^\`]*)\`/eval $1/ge;
1254e1051a39Sopenharmony_ci
1255e1051a39Sopenharmony_ci    s/%f([0-9]+)#lo/sprintf "%%f%d",$1+1/ge;
1256e1051a39Sopenharmony_ci
1257e1051a39Sopenharmony_ci    s/\b(faes[^x]{3,4}x)\s+(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/
1258e1051a39Sopenharmony_ci		&unfx($1,$2,$3,$4)
1259e1051a39Sopenharmony_ci     /ge or
1260e1051a39Sopenharmony_ci    s/\b([f][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
1261e1051a39Sopenharmony_ci		&unfx3src($1,$2,$3,$4,$5)
1262e1051a39Sopenharmony_ci     /ge or
1263e1051a39Sopenharmony_ci    s/\b([fb][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
1264e1051a39Sopenharmony_ci		&unvis($1,$2,$3,$4)
1265e1051a39Sopenharmony_ci     /ge or
1266e1051a39Sopenharmony_ci    s/\b(alignaddr[l]*)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
1267e1051a39Sopenharmony_ci		&unvis3($1,$2,$3,$4)
1268e1051a39Sopenharmony_ci     /ge;
1269e1051a39Sopenharmony_ci    print $_,"\n";
1270e1051a39Sopenharmony_ci}
1271e1051a39Sopenharmony_ci
1272e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
1273