1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2013-2021 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci
10e1051a39Sopenharmony_ci# ====================================================================
11e1051a39Sopenharmony_ci# Written by David S. Miller and Andy Polyakov.
12e1051a39Sopenharmony_ci# The module is licensed under 2-clause BSD
13e1051a39Sopenharmony_ci# license. March 2013. All rights reserved.
14e1051a39Sopenharmony_ci# ====================================================================
15e1051a39Sopenharmony_ci
16e1051a39Sopenharmony_ci######################################################################
17e1051a39Sopenharmony_ci# DES for SPARC T4.
18e1051a39Sopenharmony_ci#
19e1051a39Sopenharmony_ci# As with other hardware-assisted ciphers CBC encrypt results [for
20e1051a39Sopenharmony_ci# aligned data] are virtually identical to critical path lengths:
21e1051a39Sopenharmony_ci#
22e1051a39Sopenharmony_ci#		DES		Triple-DES
23e1051a39Sopenharmony_ci# CBC encrypt	4.14/4.15(*)	11.7/11.7
24e1051a39Sopenharmony_ci# CBC decrypt	1.77/4.11(**)	6.42/7.47
25e1051a39Sopenharmony_ci#
26e1051a39Sopenharmony_ci#			 (*)	numbers after slash are for
27e1051a39Sopenharmony_ci#				misaligned data;
28e1051a39Sopenharmony_ci#			 (**)	this is result for largest
29e1051a39Sopenharmony_ci#				block size, unlike all other
30e1051a39Sopenharmony_ci#				cases smaller blocks results
31e1051a39Sopenharmony_ci#				are better[?];
32e1051a39Sopenharmony_ci
33e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
34e1051a39Sopenharmony_cipush(@INC,"${dir}","${dir}../../perlasm");
35e1051a39Sopenharmony_cirequire "sparcv9_modes.pl";
36e1051a39Sopenharmony_ci
37e1051a39Sopenharmony_ci$output=pop and open STDOUT,">$output";
38e1051a39Sopenharmony_ci
39e1051a39Sopenharmony_ci$code.=<<___;
40e1051a39Sopenharmony_ci#ifndef __ASSEMBLER__
41e1051a39Sopenharmony_ci# define __ASSEMBLER__ 1
42e1051a39Sopenharmony_ci#endif
43e1051a39Sopenharmony_ci#include "crypto/sparc_arch.h"
44e1051a39Sopenharmony_ci
45e1051a39Sopenharmony_ci#ifdef	__arch64__
46e1051a39Sopenharmony_ci.register       %g2,#scratch
47e1051a39Sopenharmony_ci.register       %g3,#scratch
48e1051a39Sopenharmony_ci#endif
49e1051a39Sopenharmony_ci
50e1051a39Sopenharmony_ci.text
51e1051a39Sopenharmony_ci___
52e1051a39Sopenharmony_ci
53e1051a39Sopenharmony_ci{ my ($inp,$out)=("%o0","%o1");
54e1051a39Sopenharmony_ci
55e1051a39Sopenharmony_ci$code.=<<___;
56e1051a39Sopenharmony_ci.align	32
57e1051a39Sopenharmony_ci.globl	des_t4_key_expand
58e1051a39Sopenharmony_ci.type	des_t4_key_expand,#function
59e1051a39Sopenharmony_cides_t4_key_expand:
60e1051a39Sopenharmony_ci	andcc		$inp, 0x7, %g0
61e1051a39Sopenharmony_ci	alignaddr	$inp, %g0, $inp
62e1051a39Sopenharmony_ci	bz,pt		%icc, 1f
63e1051a39Sopenharmony_ci	ldd		[$inp + 0x00], %f0
64e1051a39Sopenharmony_ci	ldd		[$inp + 0x08], %f2
65e1051a39Sopenharmony_ci	faligndata	%f0, %f2, %f0
66e1051a39Sopenharmony_ci1:	des_kexpand	%f0, 0, %f0
67e1051a39Sopenharmony_ci	des_kexpand	%f0, 1, %f2
68e1051a39Sopenharmony_ci	std		%f0, [$out + 0x00]
69e1051a39Sopenharmony_ci	des_kexpand	%f2, 3, %f6
70e1051a39Sopenharmony_ci	std		%f2, [$out + 0x08]
71e1051a39Sopenharmony_ci	des_kexpand	%f2, 2, %f4
72e1051a39Sopenharmony_ci	des_kexpand	%f6, 3, %f10
73e1051a39Sopenharmony_ci	std		%f6, [$out + 0x18]
74e1051a39Sopenharmony_ci	des_kexpand	%f6, 2, %f8
75e1051a39Sopenharmony_ci	std		%f4, [$out + 0x10]
76e1051a39Sopenharmony_ci	des_kexpand	%f10, 3, %f14
77e1051a39Sopenharmony_ci	std		%f10, [$out + 0x28]
78e1051a39Sopenharmony_ci	des_kexpand	%f10, 2, %f12
79e1051a39Sopenharmony_ci	std		%f8, [$out + 0x20]
80e1051a39Sopenharmony_ci	des_kexpand	%f14, 1, %f16
81e1051a39Sopenharmony_ci	std		%f14, [$out + 0x38]
82e1051a39Sopenharmony_ci	des_kexpand	%f16, 3, %f20
83e1051a39Sopenharmony_ci	std		%f12, [$out + 0x30]
84e1051a39Sopenharmony_ci	des_kexpand	%f16, 2, %f18
85e1051a39Sopenharmony_ci	std		%f16, [$out + 0x40]
86e1051a39Sopenharmony_ci	des_kexpand	%f20, 3, %f24
87e1051a39Sopenharmony_ci	std		%f20, [$out + 0x50]
88e1051a39Sopenharmony_ci	des_kexpand	%f20, 2, %f22
89e1051a39Sopenharmony_ci	std		%f18, [$out + 0x48]
90e1051a39Sopenharmony_ci	des_kexpand	%f24, 3, %f28
91e1051a39Sopenharmony_ci	std		%f24, [$out + 0x60]
92e1051a39Sopenharmony_ci	des_kexpand	%f24, 2, %f26
93e1051a39Sopenharmony_ci	std		%f22, [$out + 0x58]
94e1051a39Sopenharmony_ci	des_kexpand	%f28, 1, %f30
95e1051a39Sopenharmony_ci	std		%f28, [$out + 0x70]
96e1051a39Sopenharmony_ci	std		%f26, [$out + 0x68]
97e1051a39Sopenharmony_ci	retl
98e1051a39Sopenharmony_ci	std		%f30, [$out + 0x78]
99e1051a39Sopenharmony_ci.size	des_t4_key_expand,.-des_t4_key_expand
100e1051a39Sopenharmony_ci___
101e1051a39Sopenharmony_ci}
102e1051a39Sopenharmony_ci{ my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
103e1051a39Sopenharmony_ci  my ($ileft,$iright,$omask) = map("%g$_",(1..3));
104e1051a39Sopenharmony_ci
105e1051a39Sopenharmony_ci$code.=<<___;
106e1051a39Sopenharmony_ci.globl	des_t4_cbc_encrypt
107e1051a39Sopenharmony_ci.align	32
108e1051a39Sopenharmony_cides_t4_cbc_encrypt:
109e1051a39Sopenharmony_ci	cmp		$len, 0
110e1051a39Sopenharmony_ci	be,pn		$::size_t_cc, .Lcbc_abort
111e1051a39Sopenharmony_ci	srln		$len, 0, $len		! needed on v8+, "nop" on v9
112e1051a39Sopenharmony_ci	ld		[$ivec + 0], %f0	! load ivec
113e1051a39Sopenharmony_ci	ld		[$ivec + 4], %f1
114e1051a39Sopenharmony_ci
115e1051a39Sopenharmony_ci	and		$inp, 7, $ileft
116e1051a39Sopenharmony_ci	andn		$inp, 7, $inp
117e1051a39Sopenharmony_ci	sll		$ileft, 3, $ileft
118e1051a39Sopenharmony_ci	mov		0xff, $omask
119e1051a39Sopenharmony_ci	prefetch	[$inp], 20
120e1051a39Sopenharmony_ci	prefetch	[$inp + 63], 20
121e1051a39Sopenharmony_ci	sub		%g0, $ileft, $iright
122e1051a39Sopenharmony_ci	and		$out, 7, %g4
123e1051a39Sopenharmony_ci	alignaddrl	$out, %g0, $out
124e1051a39Sopenharmony_ci	srl		$omask, %g4, $omask
125e1051a39Sopenharmony_ci	srlx		$len, 3, $len
126e1051a39Sopenharmony_ci	movrz		%g4, 0, $omask
127e1051a39Sopenharmony_ci	prefetch	[$out], 22
128e1051a39Sopenharmony_ci
129e1051a39Sopenharmony_ci	ldd		[$key + 0x00], %f4	! load key schedule
130e1051a39Sopenharmony_ci	ldd		[$key + 0x08], %f6
131e1051a39Sopenharmony_ci	ldd		[$key + 0x10], %f8
132e1051a39Sopenharmony_ci	ldd		[$key + 0x18], %f10
133e1051a39Sopenharmony_ci	ldd		[$key + 0x20], %f12
134e1051a39Sopenharmony_ci	ldd		[$key + 0x28], %f14
135e1051a39Sopenharmony_ci	ldd		[$key + 0x30], %f16
136e1051a39Sopenharmony_ci	ldd		[$key + 0x38], %f18
137e1051a39Sopenharmony_ci	ldd		[$key + 0x40], %f20
138e1051a39Sopenharmony_ci	ldd		[$key + 0x48], %f22
139e1051a39Sopenharmony_ci	ldd		[$key + 0x50], %f24
140e1051a39Sopenharmony_ci	ldd		[$key + 0x58], %f26
141e1051a39Sopenharmony_ci	ldd		[$key + 0x60], %f28
142e1051a39Sopenharmony_ci	ldd		[$key + 0x68], %f30
143e1051a39Sopenharmony_ci	ldd		[$key + 0x70], %f32
144e1051a39Sopenharmony_ci	ldd		[$key + 0x78], %f34
145e1051a39Sopenharmony_ci
146e1051a39Sopenharmony_ci.Ldes_cbc_enc_loop:
147e1051a39Sopenharmony_ci	ldx		[$inp + 0], %g4
148e1051a39Sopenharmony_ci	brz,pt		$ileft, 4f
149e1051a39Sopenharmony_ci	nop
150e1051a39Sopenharmony_ci
151e1051a39Sopenharmony_ci	ldx		[$inp + 8], %g5
152e1051a39Sopenharmony_ci	sllx		%g4, $ileft, %g4
153e1051a39Sopenharmony_ci	srlx		%g5, $iright, %g5
154e1051a39Sopenharmony_ci	or		%g5, %g4, %g4
155e1051a39Sopenharmony_ci4:
156e1051a39Sopenharmony_ci	movxtod		%g4, %f2
157e1051a39Sopenharmony_ci	prefetch	[$inp + 8+63], 20
158e1051a39Sopenharmony_ci	add		$inp, 8, $inp
159e1051a39Sopenharmony_ci	fxor		%f2, %f0, %f0		! ^= ivec
160e1051a39Sopenharmony_ci	prefetch	[$out + 63], 22
161e1051a39Sopenharmony_ci
162e1051a39Sopenharmony_ci	des_ip		%f0, %f0
163e1051a39Sopenharmony_ci	des_round	%f4, %f6, %f0, %f0
164e1051a39Sopenharmony_ci	des_round	%f8, %f10, %f0, %f0
165e1051a39Sopenharmony_ci	des_round	%f12, %f14, %f0, %f0
166e1051a39Sopenharmony_ci	des_round	%f16, %f18, %f0, %f0
167e1051a39Sopenharmony_ci	des_round	%f20, %f22, %f0, %f0
168e1051a39Sopenharmony_ci	des_round	%f24, %f26, %f0, %f0
169e1051a39Sopenharmony_ci	des_round	%f28, %f30, %f0, %f0
170e1051a39Sopenharmony_ci	des_round	%f32, %f34, %f0, %f0
171e1051a39Sopenharmony_ci	des_iip		%f0, %f0
172e1051a39Sopenharmony_ci
173e1051a39Sopenharmony_ci	brnz,pn		$omask, 2f
174e1051a39Sopenharmony_ci	sub		$len, 1, $len
175e1051a39Sopenharmony_ci
176e1051a39Sopenharmony_ci	std		%f0, [$out + 0]
177e1051a39Sopenharmony_ci	brnz,pt		$len, .Ldes_cbc_enc_loop
178e1051a39Sopenharmony_ci	add		$out, 8, $out
179e1051a39Sopenharmony_ci
180e1051a39Sopenharmony_ci	st		%f0, [$ivec + 0]	! write out ivec
181e1051a39Sopenharmony_ci	retl
182e1051a39Sopenharmony_ci	st		%f1, [$ivec + 4]
183e1051a39Sopenharmony_ci.Lcbc_abort:
184e1051a39Sopenharmony_ci	retl
185e1051a39Sopenharmony_ci	nop
186e1051a39Sopenharmony_ci
187e1051a39Sopenharmony_ci.align	16
188e1051a39Sopenharmony_ci2:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
189e1051a39Sopenharmony_ci						! and ~4x deterioration
190e1051a39Sopenharmony_ci						! in inp==out case
191e1051a39Sopenharmony_ci	faligndata	%f0, %f0, %f2		! handle unaligned output
192e1051a39Sopenharmony_ci
193e1051a39Sopenharmony_ci	stda		%f2, [$out + $omask]0xc0	! partial store
194e1051a39Sopenharmony_ci	add		$out, 8, $out
195e1051a39Sopenharmony_ci	orn		%g0, $omask, $omask
196e1051a39Sopenharmony_ci	stda		%f2, [$out + $omask]0xc0	! partial store
197e1051a39Sopenharmony_ci
198e1051a39Sopenharmony_ci	brnz,pt		$len, .Ldes_cbc_enc_loop+4
199e1051a39Sopenharmony_ci	orn		%g0, $omask, $omask
200e1051a39Sopenharmony_ci
201e1051a39Sopenharmony_ci	st		%f0, [$ivec + 0]	! write out ivec
202e1051a39Sopenharmony_ci	retl
203e1051a39Sopenharmony_ci	st		%f1, [$ivec + 4]
204e1051a39Sopenharmony_ci.type	des_t4_cbc_encrypt,#function
205e1051a39Sopenharmony_ci.size	des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
206e1051a39Sopenharmony_ci
207e1051a39Sopenharmony_ci.globl	des_t4_cbc_decrypt
208e1051a39Sopenharmony_ci.align	32
209e1051a39Sopenharmony_cides_t4_cbc_decrypt:
210e1051a39Sopenharmony_ci	cmp		$len, 0
211e1051a39Sopenharmony_ci	be,pn		$::size_t_cc, .Lcbc_abort
212e1051a39Sopenharmony_ci	srln		$len, 0, $len		! needed on v8+, "nop" on v9
213e1051a39Sopenharmony_ci	ld		[$ivec + 0], %f2	! load ivec
214e1051a39Sopenharmony_ci	ld		[$ivec + 4], %f3
215e1051a39Sopenharmony_ci
216e1051a39Sopenharmony_ci	and		$inp, 7, $ileft
217e1051a39Sopenharmony_ci	andn		$inp, 7, $inp
218e1051a39Sopenharmony_ci	sll		$ileft, 3, $ileft
219e1051a39Sopenharmony_ci	mov		0xff, $omask
220e1051a39Sopenharmony_ci	prefetch	[$inp], 20
221e1051a39Sopenharmony_ci	prefetch	[$inp + 63], 20
222e1051a39Sopenharmony_ci	sub		%g0, $ileft, $iright
223e1051a39Sopenharmony_ci	and		$out, 7, %g4
224e1051a39Sopenharmony_ci	alignaddrl	$out, %g0, $out
225e1051a39Sopenharmony_ci	srl		$omask, %g4, $omask
226e1051a39Sopenharmony_ci	srlx		$len, 3, $len
227e1051a39Sopenharmony_ci	movrz		%g4, 0, $omask
228e1051a39Sopenharmony_ci	prefetch	[$out], 22
229e1051a39Sopenharmony_ci
230e1051a39Sopenharmony_ci	ldd		[$key + 0x78], %f4	! load key schedule
231e1051a39Sopenharmony_ci	ldd		[$key + 0x70], %f6
232e1051a39Sopenharmony_ci	ldd		[$key + 0x68], %f8
233e1051a39Sopenharmony_ci	ldd		[$key + 0x60], %f10
234e1051a39Sopenharmony_ci	ldd		[$key + 0x58], %f12
235e1051a39Sopenharmony_ci	ldd		[$key + 0x50], %f14
236e1051a39Sopenharmony_ci	ldd		[$key + 0x48], %f16
237e1051a39Sopenharmony_ci	ldd		[$key + 0x40], %f18
238e1051a39Sopenharmony_ci	ldd		[$key + 0x38], %f20
239e1051a39Sopenharmony_ci	ldd		[$key + 0x30], %f22
240e1051a39Sopenharmony_ci	ldd		[$key + 0x28], %f24
241e1051a39Sopenharmony_ci	ldd		[$key + 0x20], %f26
242e1051a39Sopenharmony_ci	ldd		[$key + 0x18], %f28
243e1051a39Sopenharmony_ci	ldd		[$key + 0x10], %f30
244e1051a39Sopenharmony_ci	ldd		[$key + 0x08], %f32
245e1051a39Sopenharmony_ci	ldd		[$key + 0x00], %f34
246e1051a39Sopenharmony_ci
247e1051a39Sopenharmony_ci.Ldes_cbc_dec_loop:
248e1051a39Sopenharmony_ci	ldx		[$inp + 0], %g4
249e1051a39Sopenharmony_ci	brz,pt		$ileft, 4f
250e1051a39Sopenharmony_ci	nop
251e1051a39Sopenharmony_ci
252e1051a39Sopenharmony_ci	ldx		[$inp + 8], %g5
253e1051a39Sopenharmony_ci	sllx		%g4, $ileft, %g4
254e1051a39Sopenharmony_ci	srlx		%g5, $iright, %g5
255e1051a39Sopenharmony_ci	or		%g5, %g4, %g4
256e1051a39Sopenharmony_ci4:
257e1051a39Sopenharmony_ci	movxtod		%g4, %f0
258e1051a39Sopenharmony_ci	prefetch	[$inp + 8+63], 20
259e1051a39Sopenharmony_ci	add		$inp, 8, $inp
260e1051a39Sopenharmony_ci	prefetch	[$out + 63], 22
261e1051a39Sopenharmony_ci
262e1051a39Sopenharmony_ci	des_ip		%f0, %f0
263e1051a39Sopenharmony_ci	des_round	%f4, %f6, %f0, %f0
264e1051a39Sopenharmony_ci	des_round	%f8, %f10, %f0, %f0
265e1051a39Sopenharmony_ci	des_round	%f12, %f14, %f0, %f0
266e1051a39Sopenharmony_ci	des_round	%f16, %f18, %f0, %f0
267e1051a39Sopenharmony_ci	des_round	%f20, %f22, %f0, %f0
268e1051a39Sopenharmony_ci	des_round	%f24, %f26, %f0, %f0
269e1051a39Sopenharmony_ci	des_round	%f28, %f30, %f0, %f0
270e1051a39Sopenharmony_ci	des_round	%f32, %f34, %f0, %f0
271e1051a39Sopenharmony_ci	des_iip		%f0, %f0
272e1051a39Sopenharmony_ci
273e1051a39Sopenharmony_ci	fxor		%f2, %f0, %f0		! ^= ivec
274e1051a39Sopenharmony_ci	movxtod		%g4, %f2
275e1051a39Sopenharmony_ci
276e1051a39Sopenharmony_ci	brnz,pn		$omask, 2f
277e1051a39Sopenharmony_ci	sub		$len, 1, $len
278e1051a39Sopenharmony_ci
279e1051a39Sopenharmony_ci	std		%f0, [$out + 0]
280e1051a39Sopenharmony_ci	brnz,pt		$len, .Ldes_cbc_dec_loop
281e1051a39Sopenharmony_ci	add		$out, 8, $out
282e1051a39Sopenharmony_ci
283e1051a39Sopenharmony_ci	st		%f2, [$ivec + 0]	! write out ivec
284e1051a39Sopenharmony_ci	retl
285e1051a39Sopenharmony_ci	st		%f3, [$ivec + 4]
286e1051a39Sopenharmony_ci
287e1051a39Sopenharmony_ci.align	16
288e1051a39Sopenharmony_ci2:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
289e1051a39Sopenharmony_ci						! and ~4x deterioration
290e1051a39Sopenharmony_ci						! in inp==out case
291e1051a39Sopenharmony_ci	faligndata	%f0, %f0, %f0		! handle unaligned output
292e1051a39Sopenharmony_ci
293e1051a39Sopenharmony_ci	stda		%f0, [$out + $omask]0xc0	! partial store
294e1051a39Sopenharmony_ci	add		$out, 8, $out
295e1051a39Sopenharmony_ci	orn		%g0, $omask, $omask
296e1051a39Sopenharmony_ci	stda		%f0, [$out + $omask]0xc0	! partial store
297e1051a39Sopenharmony_ci
298e1051a39Sopenharmony_ci	brnz,pt		$len, .Ldes_cbc_dec_loop+4
299e1051a39Sopenharmony_ci	orn		%g0, $omask, $omask
300e1051a39Sopenharmony_ci
301e1051a39Sopenharmony_ci	st		%f2, [$ivec + 0]	! write out ivec
302e1051a39Sopenharmony_ci	retl
303e1051a39Sopenharmony_ci	st		%f3, [$ivec + 4]
304e1051a39Sopenharmony_ci.type	des_t4_cbc_decrypt,#function
305e1051a39Sopenharmony_ci.size	des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
306e1051a39Sopenharmony_ci___
307e1051a39Sopenharmony_ci
308e1051a39Sopenharmony_ci# One might wonder why does one have back-to-back des_iip/des_ip
309e1051a39Sopenharmony_ci# pairs between EDE passes. Indeed, aren't they inverse of each other?
310e1051a39Sopenharmony_ci# They almost are. Outcome of the pair is 32-bit words being swapped
311e1051a39Sopenharmony_ci# in target register. Consider pair of des_iip/des_ip as a way to
312e1051a39Sopenharmony_ci# perform the due swap, it's actually fastest way in this case.
313e1051a39Sopenharmony_ci
314e1051a39Sopenharmony_ci$code.=<<___;
315e1051a39Sopenharmony_ci.globl	des_t4_ede3_cbc_encrypt
316e1051a39Sopenharmony_ci.align	32
317e1051a39Sopenharmony_cides_t4_ede3_cbc_encrypt:
318e1051a39Sopenharmony_ci	cmp		$len, 0
319e1051a39Sopenharmony_ci	be,pn		$::size_t_cc, .Lcbc_abort
320e1051a39Sopenharmony_ci	srln		$len, 0, $len		! needed on v8+, "nop" on v9
321e1051a39Sopenharmony_ci	ld		[$ivec + 0], %f0	! load ivec
322e1051a39Sopenharmony_ci	ld		[$ivec + 4], %f1
323e1051a39Sopenharmony_ci
324e1051a39Sopenharmony_ci	and		$inp, 7, $ileft
325e1051a39Sopenharmony_ci	andn		$inp, 7, $inp
326e1051a39Sopenharmony_ci	sll		$ileft, 3, $ileft
327e1051a39Sopenharmony_ci	mov		0xff, $omask
328e1051a39Sopenharmony_ci	prefetch	[$inp], 20
329e1051a39Sopenharmony_ci	prefetch	[$inp + 63], 20
330e1051a39Sopenharmony_ci	sub		%g0, $ileft, $iright
331e1051a39Sopenharmony_ci	and		$out, 7, %g4
332e1051a39Sopenharmony_ci	alignaddrl	$out, %g0, $out
333e1051a39Sopenharmony_ci	srl		$omask, %g4, $omask
334e1051a39Sopenharmony_ci	srlx		$len, 3, $len
335e1051a39Sopenharmony_ci	movrz		%g4, 0, $omask
336e1051a39Sopenharmony_ci	prefetch	[$out], 22
337e1051a39Sopenharmony_ci
338e1051a39Sopenharmony_ci	ldd		[$key + 0x00], %f4	! load key schedule
339e1051a39Sopenharmony_ci	ldd		[$key + 0x08], %f6
340e1051a39Sopenharmony_ci	ldd		[$key + 0x10], %f8
341e1051a39Sopenharmony_ci	ldd		[$key + 0x18], %f10
342e1051a39Sopenharmony_ci	ldd		[$key + 0x20], %f12
343e1051a39Sopenharmony_ci	ldd		[$key + 0x28], %f14
344e1051a39Sopenharmony_ci	ldd		[$key + 0x30], %f16
345e1051a39Sopenharmony_ci	ldd		[$key + 0x38], %f18
346e1051a39Sopenharmony_ci	ldd		[$key + 0x40], %f20
347e1051a39Sopenharmony_ci	ldd		[$key + 0x48], %f22
348e1051a39Sopenharmony_ci	ldd		[$key + 0x50], %f24
349e1051a39Sopenharmony_ci	ldd		[$key + 0x58], %f26
350e1051a39Sopenharmony_ci	ldd		[$key + 0x60], %f28
351e1051a39Sopenharmony_ci	ldd		[$key + 0x68], %f30
352e1051a39Sopenharmony_ci	ldd		[$key + 0x70], %f32
353e1051a39Sopenharmony_ci	ldd		[$key + 0x78], %f34
354e1051a39Sopenharmony_ci
355e1051a39Sopenharmony_ci.Ldes_ede3_cbc_enc_loop:
356e1051a39Sopenharmony_ci	ldx		[$inp + 0], %g4
357e1051a39Sopenharmony_ci	brz,pt		$ileft, 4f
358e1051a39Sopenharmony_ci	nop
359e1051a39Sopenharmony_ci
360e1051a39Sopenharmony_ci	ldx		[$inp + 8], %g5
361e1051a39Sopenharmony_ci	sllx		%g4, $ileft, %g4
362e1051a39Sopenharmony_ci	srlx		%g5, $iright, %g5
363e1051a39Sopenharmony_ci	or		%g5, %g4, %g4
364e1051a39Sopenharmony_ci4:
365e1051a39Sopenharmony_ci	movxtod		%g4, %f2
366e1051a39Sopenharmony_ci	prefetch	[$inp + 8+63], 20
367e1051a39Sopenharmony_ci	add		$inp, 8, $inp
368e1051a39Sopenharmony_ci	fxor		%f2, %f0, %f0		! ^= ivec
369e1051a39Sopenharmony_ci	prefetch	[$out + 63], 22
370e1051a39Sopenharmony_ci
371e1051a39Sopenharmony_ci	des_ip		%f0, %f0
372e1051a39Sopenharmony_ci	des_round	%f4, %f6, %f0, %f0
373e1051a39Sopenharmony_ci	des_round	%f8, %f10, %f0, %f0
374e1051a39Sopenharmony_ci	des_round	%f12, %f14, %f0, %f0
375e1051a39Sopenharmony_ci	des_round	%f16, %f18, %f0, %f0
376e1051a39Sopenharmony_ci	ldd		[$key + 0x100-0x08], %f36
377e1051a39Sopenharmony_ci	ldd		[$key + 0x100-0x10], %f38
378e1051a39Sopenharmony_ci	des_round	%f20, %f22, %f0, %f0
379e1051a39Sopenharmony_ci	ldd		[$key + 0x100-0x18], %f40
380e1051a39Sopenharmony_ci	ldd		[$key + 0x100-0x20], %f42
381e1051a39Sopenharmony_ci	des_round	%f24, %f26, %f0, %f0
382e1051a39Sopenharmony_ci	ldd		[$key + 0x100-0x28], %f44
383e1051a39Sopenharmony_ci	ldd		[$key + 0x100-0x30], %f46
384e1051a39Sopenharmony_ci	des_round	%f28, %f30, %f0, %f0
385e1051a39Sopenharmony_ci	ldd		[$key + 0x100-0x38], %f48
386e1051a39Sopenharmony_ci	ldd		[$key + 0x100-0x40], %f50
387e1051a39Sopenharmony_ci	des_round	%f32, %f34, %f0, %f0
388e1051a39Sopenharmony_ci	ldd		[$key + 0x100-0x48], %f52
389e1051a39Sopenharmony_ci	ldd		[$key + 0x100-0x50], %f54
390e1051a39Sopenharmony_ci	des_iip		%f0, %f0
391e1051a39Sopenharmony_ci
392e1051a39Sopenharmony_ci	ldd		[$key + 0x100-0x58], %f56
393e1051a39Sopenharmony_ci	ldd		[$key + 0x100-0x60], %f58
394e1051a39Sopenharmony_ci	des_ip		%f0, %f0
395e1051a39Sopenharmony_ci	ldd		[$key + 0x100-0x68], %f60
396e1051a39Sopenharmony_ci	ldd		[$key + 0x100-0x70], %f62
397e1051a39Sopenharmony_ci	des_round	%f36, %f38, %f0, %f0
398e1051a39Sopenharmony_ci	ldd		[$key + 0x100-0x78], %f36
399e1051a39Sopenharmony_ci	ldd		[$key + 0x100-0x80], %f38
400e1051a39Sopenharmony_ci	des_round	%f40, %f42, %f0, %f0
401e1051a39Sopenharmony_ci	des_round	%f44, %f46, %f0, %f0
402e1051a39Sopenharmony_ci	des_round	%f48, %f50, %f0, %f0
403e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x00], %f40
404e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x08], %f42
405e1051a39Sopenharmony_ci	des_round	%f52, %f54, %f0, %f0
406e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x10], %f44
407e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x18], %f46
408e1051a39Sopenharmony_ci	des_round	%f56, %f58, %f0, %f0
409e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x20], %f48
410e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x28], %f50
411e1051a39Sopenharmony_ci	des_round	%f60, %f62, %f0, %f0
412e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x30], %f52
413e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x38], %f54
414e1051a39Sopenharmony_ci	des_round	%f36, %f38, %f0, %f0
415e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x40], %f56
416e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x48], %f58
417e1051a39Sopenharmony_ci	des_iip		%f0, %f0
418e1051a39Sopenharmony_ci
419e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x50], %f60
420e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x58], %f62
421e1051a39Sopenharmony_ci	des_ip		%f0, %f0
422e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x60], %f36
423e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x68], %f38
424e1051a39Sopenharmony_ci	des_round	%f40, %f42, %f0, %f0
425e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x70], %f40
426e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x78], %f42
427e1051a39Sopenharmony_ci	des_round	%f44, %f46, %f0, %f0
428e1051a39Sopenharmony_ci	des_round	%f48, %f50, %f0, %f0
429e1051a39Sopenharmony_ci	des_round	%f52, %f54, %f0, %f0
430e1051a39Sopenharmony_ci	des_round	%f56, %f58, %f0, %f0
431e1051a39Sopenharmony_ci	des_round	%f60, %f62, %f0, %f0
432e1051a39Sopenharmony_ci	des_round	%f36, %f38, %f0, %f0
433e1051a39Sopenharmony_ci	des_round	%f40, %f42, %f0, %f0
434e1051a39Sopenharmony_ci	des_iip		%f0, %f0
435e1051a39Sopenharmony_ci
436e1051a39Sopenharmony_ci	brnz,pn		$omask, 2f
437e1051a39Sopenharmony_ci	sub		$len, 1, $len
438e1051a39Sopenharmony_ci
439e1051a39Sopenharmony_ci	std		%f0, [$out + 0]
440e1051a39Sopenharmony_ci	brnz,pt		$len, .Ldes_ede3_cbc_enc_loop
441e1051a39Sopenharmony_ci	add		$out, 8, $out
442e1051a39Sopenharmony_ci
443e1051a39Sopenharmony_ci	st		%f0, [$ivec + 0]	! write out ivec
444e1051a39Sopenharmony_ci	retl
445e1051a39Sopenharmony_ci	st		%f1, [$ivec + 4]
446e1051a39Sopenharmony_ci
447e1051a39Sopenharmony_ci.align	16
448e1051a39Sopenharmony_ci2:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
449e1051a39Sopenharmony_ci						! and ~2x deterioration
450e1051a39Sopenharmony_ci						! in inp==out case
451e1051a39Sopenharmony_ci	faligndata	%f0, %f0, %f2		! handle unaligned output
452e1051a39Sopenharmony_ci
453e1051a39Sopenharmony_ci	stda		%f2, [$out + $omask]0xc0	! partial store
454e1051a39Sopenharmony_ci	add		$out, 8, $out
455e1051a39Sopenharmony_ci	orn		%g0, $omask, $omask
456e1051a39Sopenharmony_ci	stda		%f2, [$out + $omask]0xc0	! partial store
457e1051a39Sopenharmony_ci
458e1051a39Sopenharmony_ci	brnz,pt		$len, .Ldes_ede3_cbc_enc_loop+4
459e1051a39Sopenharmony_ci	orn		%g0, $omask, $omask
460e1051a39Sopenharmony_ci
461e1051a39Sopenharmony_ci	st		%f0, [$ivec + 0]	! write out ivec
462e1051a39Sopenharmony_ci	retl
463e1051a39Sopenharmony_ci	st		%f1, [$ivec + 4]
464e1051a39Sopenharmony_ci.type	des_t4_ede3_cbc_encrypt,#function
465e1051a39Sopenharmony_ci.size	des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
466e1051a39Sopenharmony_ci
467e1051a39Sopenharmony_ci.globl	des_t4_ede3_cbc_decrypt
468e1051a39Sopenharmony_ci.align	32
469e1051a39Sopenharmony_cides_t4_ede3_cbc_decrypt:
470e1051a39Sopenharmony_ci	cmp		$len, 0
471e1051a39Sopenharmony_ci	be,pn		$::size_t_cc, .Lcbc_abort
472e1051a39Sopenharmony_ci	srln		$len, 0, $len		! needed on v8+, "nop" on v9
473e1051a39Sopenharmony_ci	ld		[$ivec + 0], %f2	! load ivec
474e1051a39Sopenharmony_ci	ld		[$ivec + 4], %f3
475e1051a39Sopenharmony_ci
476e1051a39Sopenharmony_ci	and		$inp, 7, $ileft
477e1051a39Sopenharmony_ci	andn		$inp, 7, $inp
478e1051a39Sopenharmony_ci	sll		$ileft, 3, $ileft
479e1051a39Sopenharmony_ci	mov		0xff, $omask
480e1051a39Sopenharmony_ci	prefetch	[$inp], 20
481e1051a39Sopenharmony_ci	prefetch	[$inp + 63], 20
482e1051a39Sopenharmony_ci	sub		%g0, $ileft, $iright
483e1051a39Sopenharmony_ci	and		$out, 7, %g4
484e1051a39Sopenharmony_ci	alignaddrl	$out, %g0, $out
485e1051a39Sopenharmony_ci	srl		$omask, %g4, $omask
486e1051a39Sopenharmony_ci	srlx		$len, 3, $len
487e1051a39Sopenharmony_ci	movrz		%g4, 0, $omask
488e1051a39Sopenharmony_ci	prefetch	[$out], 22
489e1051a39Sopenharmony_ci
490e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x78], %f4	! load key schedule
491e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x70], %f6
492e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x68], %f8
493e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x60], %f10
494e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x58], %f12
495e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x50], %f14
496e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x48], %f16
497e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x40], %f18
498e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x38], %f20
499e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x30], %f22
500e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x28], %f24
501e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x20], %f26
502e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x18], %f28
503e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x10], %f30
504e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x08], %f32
505e1051a39Sopenharmony_ci	ldd		[$key + 0x100+0x00], %f34
506e1051a39Sopenharmony_ci
507e1051a39Sopenharmony_ci.Ldes_ede3_cbc_dec_loop:
508e1051a39Sopenharmony_ci	ldx		[$inp + 0], %g4
509e1051a39Sopenharmony_ci	brz,pt		$ileft, 4f
510e1051a39Sopenharmony_ci	nop
511e1051a39Sopenharmony_ci
512e1051a39Sopenharmony_ci	ldx		[$inp + 8], %g5
513e1051a39Sopenharmony_ci	sllx		%g4, $ileft, %g4
514e1051a39Sopenharmony_ci	srlx		%g5, $iright, %g5
515e1051a39Sopenharmony_ci	or		%g5, %g4, %g4
516e1051a39Sopenharmony_ci4:
517e1051a39Sopenharmony_ci	movxtod		%g4, %f0
518e1051a39Sopenharmony_ci	prefetch	[$inp + 8+63], 20
519e1051a39Sopenharmony_ci	add		$inp, 8, $inp
520e1051a39Sopenharmony_ci	prefetch	[$out + 63], 22
521e1051a39Sopenharmony_ci
522e1051a39Sopenharmony_ci	des_ip		%f0, %f0
523e1051a39Sopenharmony_ci	des_round	%f4, %f6, %f0, %f0
524e1051a39Sopenharmony_ci	des_round	%f8, %f10, %f0, %f0
525e1051a39Sopenharmony_ci	des_round	%f12, %f14, %f0, %f0
526e1051a39Sopenharmony_ci	des_round	%f16, %f18, %f0, %f0
527e1051a39Sopenharmony_ci	ldd		[$key + 0x80+0x00], %f36
528e1051a39Sopenharmony_ci	ldd		[$key + 0x80+0x08], %f38
529e1051a39Sopenharmony_ci	des_round	%f20, %f22, %f0, %f0
530e1051a39Sopenharmony_ci	ldd		[$key + 0x80+0x10], %f40
531e1051a39Sopenharmony_ci	ldd		[$key + 0x80+0x18], %f42
532e1051a39Sopenharmony_ci	des_round	%f24, %f26, %f0, %f0
533e1051a39Sopenharmony_ci	ldd		[$key + 0x80+0x20], %f44
534e1051a39Sopenharmony_ci	ldd		[$key + 0x80+0x28], %f46
535e1051a39Sopenharmony_ci	des_round	%f28, %f30, %f0, %f0
536e1051a39Sopenharmony_ci	ldd		[$key + 0x80+0x30], %f48
537e1051a39Sopenharmony_ci	ldd		[$key + 0x80+0x38], %f50
538e1051a39Sopenharmony_ci	des_round	%f32, %f34, %f0, %f0
539e1051a39Sopenharmony_ci	ldd		[$key + 0x80+0x40], %f52
540e1051a39Sopenharmony_ci	ldd		[$key + 0x80+0x48], %f54
541e1051a39Sopenharmony_ci	des_iip		%f0, %f0
542e1051a39Sopenharmony_ci
543e1051a39Sopenharmony_ci	ldd		[$key + 0x80+0x50], %f56
544e1051a39Sopenharmony_ci	ldd		[$key + 0x80+0x58], %f58
545e1051a39Sopenharmony_ci	des_ip		%f0, %f0
546e1051a39Sopenharmony_ci	ldd		[$key + 0x80+0x60], %f60
547e1051a39Sopenharmony_ci	ldd		[$key + 0x80+0x68], %f62
548e1051a39Sopenharmony_ci	des_round	%f36, %f38, %f0, %f0
549e1051a39Sopenharmony_ci	ldd		[$key + 0x80+0x70], %f36
550e1051a39Sopenharmony_ci	ldd		[$key + 0x80+0x78], %f38
551e1051a39Sopenharmony_ci	des_round	%f40, %f42, %f0, %f0
552e1051a39Sopenharmony_ci	des_round	%f44, %f46, %f0, %f0
553e1051a39Sopenharmony_ci	des_round	%f48, %f50, %f0, %f0
554e1051a39Sopenharmony_ci	ldd		[$key + 0x80-0x08], %f40
555e1051a39Sopenharmony_ci	ldd		[$key + 0x80-0x10], %f42
556e1051a39Sopenharmony_ci	des_round	%f52, %f54, %f0, %f0
557e1051a39Sopenharmony_ci	ldd		[$key + 0x80-0x18], %f44
558e1051a39Sopenharmony_ci	ldd		[$key + 0x80-0x20], %f46
559e1051a39Sopenharmony_ci	des_round	%f56, %f58, %f0, %f0
560e1051a39Sopenharmony_ci	ldd		[$key + 0x80-0x28], %f48
561e1051a39Sopenharmony_ci	ldd		[$key + 0x80-0x30], %f50
562e1051a39Sopenharmony_ci	des_round	%f60, %f62, %f0, %f0
563e1051a39Sopenharmony_ci	ldd		[$key + 0x80-0x38], %f52
564e1051a39Sopenharmony_ci	ldd		[$key + 0x80-0x40], %f54
565e1051a39Sopenharmony_ci	des_round	%f36, %f38, %f0, %f0
566e1051a39Sopenharmony_ci	ldd		[$key + 0x80-0x48], %f56
567e1051a39Sopenharmony_ci	ldd		[$key + 0x80-0x50], %f58
568e1051a39Sopenharmony_ci	des_iip		%f0, %f0
569e1051a39Sopenharmony_ci
570e1051a39Sopenharmony_ci	ldd		[$key + 0x80-0x58], %f60
571e1051a39Sopenharmony_ci	ldd		[$key + 0x80-0x60], %f62
572e1051a39Sopenharmony_ci	des_ip		%f0, %f0
573e1051a39Sopenharmony_ci	ldd		[$key + 0x80-0x68], %f36
574e1051a39Sopenharmony_ci	ldd		[$key + 0x80-0x70], %f38
575e1051a39Sopenharmony_ci	des_round	%f40, %f42, %f0, %f0
576e1051a39Sopenharmony_ci	ldd		[$key + 0x80-0x78], %f40
577e1051a39Sopenharmony_ci	ldd		[$key + 0x80-0x80], %f42
578e1051a39Sopenharmony_ci	des_round	%f44, %f46, %f0, %f0
579e1051a39Sopenharmony_ci	des_round	%f48, %f50, %f0, %f0
580e1051a39Sopenharmony_ci	des_round	%f52, %f54, %f0, %f0
581e1051a39Sopenharmony_ci	des_round	%f56, %f58, %f0, %f0
582e1051a39Sopenharmony_ci	des_round	%f60, %f62, %f0, %f0
583e1051a39Sopenharmony_ci	des_round	%f36, %f38, %f0, %f0
584e1051a39Sopenharmony_ci	des_round	%f40, %f42, %f0, %f0
585e1051a39Sopenharmony_ci	des_iip		%f0, %f0
586e1051a39Sopenharmony_ci
587e1051a39Sopenharmony_ci	fxor		%f2, %f0, %f0		! ^= ivec
588e1051a39Sopenharmony_ci	movxtod		%g4, %f2
589e1051a39Sopenharmony_ci
590e1051a39Sopenharmony_ci	brnz,pn		$omask, 2f
591e1051a39Sopenharmony_ci	sub		$len, 1, $len
592e1051a39Sopenharmony_ci
593e1051a39Sopenharmony_ci	std		%f0, [$out + 0]
594e1051a39Sopenharmony_ci	brnz,pt		$len, .Ldes_ede3_cbc_dec_loop
595e1051a39Sopenharmony_ci	add		$out, 8, $out
596e1051a39Sopenharmony_ci
597e1051a39Sopenharmony_ci	st		%f2, [$ivec + 0]	! write out ivec
598e1051a39Sopenharmony_ci	retl
599e1051a39Sopenharmony_ci	st		%f3, [$ivec + 4]
600e1051a39Sopenharmony_ci
601e1051a39Sopenharmony_ci.align	16
602e1051a39Sopenharmony_ci2:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
603e1051a39Sopenharmony_ci						! and ~3x deterioration
604e1051a39Sopenharmony_ci						! in inp==out case
605e1051a39Sopenharmony_ci	faligndata	%f0, %f0, %f0		! handle unaligned output
606e1051a39Sopenharmony_ci
607e1051a39Sopenharmony_ci	stda		%f0, [$out + $omask]0xc0	! partial store
608e1051a39Sopenharmony_ci	add		$out, 8, $out
609e1051a39Sopenharmony_ci	orn		%g0, $omask, $omask
610e1051a39Sopenharmony_ci	stda		%f0, [$out + $omask]0xc0	! partial store
611e1051a39Sopenharmony_ci
612e1051a39Sopenharmony_ci	brnz,pt		$len, .Ldes_ede3_cbc_dec_loop+4
613e1051a39Sopenharmony_ci	orn		%g0, $omask, $omask
614e1051a39Sopenharmony_ci
615e1051a39Sopenharmony_ci	st		%f2, [$ivec + 0]	! write out ivec
616e1051a39Sopenharmony_ci	retl
617e1051a39Sopenharmony_ci	st		%f3, [$ivec + 4]
618e1051a39Sopenharmony_ci.type	des_t4_ede3_cbc_decrypt,#function
619e1051a39Sopenharmony_ci.size	des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
620e1051a39Sopenharmony_ci___
621e1051a39Sopenharmony_ci}
622e1051a39Sopenharmony_ci$code.=<<___;
623e1051a39Sopenharmony_ci.asciz  "DES for SPARC T4, David S. Miller, Andy Polyakov"
624e1051a39Sopenharmony_ci.align  4
625e1051a39Sopenharmony_ci___
626e1051a39Sopenharmony_ci
627e1051a39Sopenharmony_ci&emit_assembler();
628e1051a39Sopenharmony_ci
629e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
630