1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2008-2020 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci
10e1051a39Sopenharmony_ci# ====================================================================
11e1051a39Sopenharmony_ci# Copyright (c) 2008 Andy Polyakov <appro@openssl.org>
12e1051a39Sopenharmony_ci#
13e1051a39Sopenharmony_ci# This module may be used under the terms of either the GNU General
14e1051a39Sopenharmony_ci# Public License version 2 or later, the GNU Lesser General Public
15e1051a39Sopenharmony_ci# License version 2.1 or later, the Mozilla Public License version
16e1051a39Sopenharmony_ci# 1.1 or the BSD License. The exact terms of either license are
17e1051a39Sopenharmony_ci# distributed along with this module. For further details see
18e1051a39Sopenharmony_ci# http://www.openssl.org/~appro/camellia/.
19e1051a39Sopenharmony_ci# ====================================================================
20e1051a39Sopenharmony_ci
21e1051a39Sopenharmony_ci# Performance in cycles per processed byte (less is better) in
22e1051a39Sopenharmony_ci# 'openssl speed ...' benchmark:
23e1051a39Sopenharmony_ci#
24e1051a39Sopenharmony_ci#			AMD64	Core2	EM64T
25e1051a39Sopenharmony_ci# -evp camellia-128-ecb	16.7	21.0	22.7
26e1051a39Sopenharmony_ci# + over gcc 3.4.6	+25%	+5%	0%
27e1051a39Sopenharmony_ci#
28e1051a39Sopenharmony_ci# camellia-128-cbc	15.7	20.4	21.1
29e1051a39Sopenharmony_ci#
30e1051a39Sopenharmony_ci# 128-bit key setup	128	216	205	cycles/key
31e1051a39Sopenharmony_ci# + over gcc 3.4.6	+54%	+39%	+15%
32e1051a39Sopenharmony_ci#
33e1051a39Sopenharmony_ci# Numbers in "+" rows represent performance improvement over compiler
34e1051a39Sopenharmony_ci# generated code. Key setup timings are impressive on AMD and Core2
35e1051a39Sopenharmony_ci# thanks to 64-bit operations being covertly deployed. Improvement on
36e1051a39Sopenharmony_ci# EM64T, pre-Core2 Intel x86_64 CPU, is not as impressive, because it
37e1051a39Sopenharmony_ci# apparently emulates some of 64-bit operations in [32-bit] microcode.
38e1051a39Sopenharmony_ci
39e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension)
40e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file
41e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
42e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
43e1051a39Sopenharmony_ci
44e1051a39Sopenharmony_ci$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
45e1051a39Sopenharmony_ci
46e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
47e1051a39Sopenharmony_ci( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
48e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
49e1051a39Sopenharmony_cidie "can't locate x86_64-xlate.pl";
50e1051a39Sopenharmony_ci
51e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
52e1051a39Sopenharmony_ci    or die "can't call $xlate: $!";
53e1051a39Sopenharmony_ci*STDOUT=*OUT;
54e1051a39Sopenharmony_ci
55e1051a39Sopenharmony_cisub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/;    $r; }
56e1051a39Sopenharmony_cisub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/;
57e1051a39Sopenharmony_ci                        $r =~ s/%[er]([sd]i)/%\1l/;
58e1051a39Sopenharmony_ci                        $r =~ s/%(r[0-9]+)[d]?/%\1b/;   $r; }
59e1051a39Sopenharmony_ci
60e1051a39Sopenharmony_ci$t0="%eax";$t1="%ebx";$t2="%ecx";$t3="%edx";
61e1051a39Sopenharmony_ci@S=("%r8d","%r9d","%r10d","%r11d");
62e1051a39Sopenharmony_ci$i0="%esi";
63e1051a39Sopenharmony_ci$i1="%edi";
64e1051a39Sopenharmony_ci$Tbl="%rbp";	# size optimization
65e1051a39Sopenharmony_ci$inp="%r12";
66e1051a39Sopenharmony_ci$out="%r13";
67e1051a39Sopenharmony_ci$key="%r14";
68e1051a39Sopenharmony_ci$keyend="%r15";
69e1051a39Sopenharmony_ci$arg0d=$win64?"%ecx":"%edi";
70e1051a39Sopenharmony_ci
71e1051a39Sopenharmony_ci# const unsigned int Camellia_SBOX[4][256];
72e1051a39Sopenharmony_ci# Well, sort of... Camellia_SBOX[0][] is interleaved with [1][],
73e1051a39Sopenharmony_ci# and [2][] - with [3][]. This is done to minimize code size.
74e1051a39Sopenharmony_ci$SBOX1_1110=0;		# Camellia_SBOX[0]
75e1051a39Sopenharmony_ci$SBOX4_4404=4;		# Camellia_SBOX[1]
76e1051a39Sopenharmony_ci$SBOX2_0222=2048;	# Camellia_SBOX[2]
77e1051a39Sopenharmony_ci$SBOX3_3033=2052;	# Camellia_SBOX[3]
78e1051a39Sopenharmony_ci
79e1051a39Sopenharmony_cisub Camellia_Feistel {
80e1051a39Sopenharmony_cimy $i=@_[0];
81e1051a39Sopenharmony_cimy $seed=defined(@_[1])?@_[1]:0;
82e1051a39Sopenharmony_cimy $scale=$seed<0?-8:8;
83e1051a39Sopenharmony_cimy $j=($i&1)*2;
84e1051a39Sopenharmony_cimy ($s0,$s1,$s2,$s3)=(@S[($j)%4],@S[($j+1)%4],@S[($j+2)%4],@S[($j+3)%4]);
85e1051a39Sopenharmony_ci
86e1051a39Sopenharmony_ci$code.=<<___;
87e1051a39Sopenharmony_ci	xor	$s0,$t0				# t0^=key[0]
88e1051a39Sopenharmony_ci	xor	$s1,$t1				# t1^=key[1]
89e1051a39Sopenharmony_ci	movz	`&hi("$t0")`,$i0		# (t0>>8)&0xff
90e1051a39Sopenharmony_ci	movz	`&lo("$t1")`,$i1		# (t1>>0)&0xff
91e1051a39Sopenharmony_ci	mov	$SBOX3_3033($Tbl,$i0,8),$t3	# t3=SBOX3_3033[0]
92e1051a39Sopenharmony_ci	mov	$SBOX1_1110($Tbl,$i1,8),$t2	# t2=SBOX1_1110[1]
93e1051a39Sopenharmony_ci	movz	`&lo("$t0")`,$i0		# (t0>>0)&0xff
94e1051a39Sopenharmony_ci	shr	\$16,$t0
95e1051a39Sopenharmony_ci	movz	`&hi("$t1")`,$i1		# (t1>>8)&0xff
96e1051a39Sopenharmony_ci	xor	$SBOX4_4404($Tbl,$i0,8),$t3	# t3^=SBOX4_4404[0]
97e1051a39Sopenharmony_ci	shr	\$16,$t1
98e1051a39Sopenharmony_ci	xor	$SBOX4_4404($Tbl,$i1,8),$t2	# t2^=SBOX4_4404[1]
99e1051a39Sopenharmony_ci	movz	`&hi("$t0")`,$i0		# (t0>>24)&0xff
100e1051a39Sopenharmony_ci	movz	`&lo("$t1")`,$i1		# (t1>>16)&0xff
101e1051a39Sopenharmony_ci	xor	$SBOX1_1110($Tbl,$i0,8),$t3	# t3^=SBOX1_1110[0]
102e1051a39Sopenharmony_ci	xor	$SBOX3_3033($Tbl,$i1,8),$t2	# t2^=SBOX3_3033[1]
103e1051a39Sopenharmony_ci	movz	`&lo("$t0")`,$i0		# (t0>>16)&0xff
104e1051a39Sopenharmony_ci	movz	`&hi("$t1")`,$i1		# (t1>>24)&0xff
105e1051a39Sopenharmony_ci	xor	$SBOX2_0222($Tbl,$i0,8),$t3	# t3^=SBOX2_0222[0]
106e1051a39Sopenharmony_ci	xor	$SBOX2_0222($Tbl,$i1,8),$t2	# t2^=SBOX2_0222[1]
107e1051a39Sopenharmony_ci	mov	`$seed+($i+1)*$scale`($key),$t1	# prefetch key[i+1]
108e1051a39Sopenharmony_ci	mov	`$seed+($i+1)*$scale+4`($key),$t0
109e1051a39Sopenharmony_ci	xor	$t3,$t2				# t2^=t3
110e1051a39Sopenharmony_ci	ror	\$8,$t3				# t3=RightRotate(t3,8)
111e1051a39Sopenharmony_ci	xor	$t2,$s2
112e1051a39Sopenharmony_ci	xor	$t2,$s3
113e1051a39Sopenharmony_ci	xor	$t3,$s3
114e1051a39Sopenharmony_ci___
115e1051a39Sopenharmony_ci}
116e1051a39Sopenharmony_ci
117e1051a39Sopenharmony_ci# void Camellia_EncryptBlock_Rounds(
118e1051a39Sopenharmony_ci#		int grandRounds,
119e1051a39Sopenharmony_ci#		const Byte plaintext[],
120e1051a39Sopenharmony_ci#		const KEY_TABLE_TYPE keyTable,
121e1051a39Sopenharmony_ci#		Byte ciphertext[])
122e1051a39Sopenharmony_ci$code=<<___;
123e1051a39Sopenharmony_ci.text
124e1051a39Sopenharmony_ci
125e1051a39Sopenharmony_ci# V1.x API
126e1051a39Sopenharmony_ci.globl	Camellia_EncryptBlock
127e1051a39Sopenharmony_ci.type	Camellia_EncryptBlock,\@abi-omnipotent
128e1051a39Sopenharmony_ci.align	16
129e1051a39Sopenharmony_ciCamellia_EncryptBlock:
130e1051a39Sopenharmony_ci.cfi_startproc
131e1051a39Sopenharmony_ci	movl	\$128,%eax
132e1051a39Sopenharmony_ci	subl	$arg0d,%eax
133e1051a39Sopenharmony_ci	movl	\$3,$arg0d
134e1051a39Sopenharmony_ci	adcl	\$0,$arg0d	# keyBitLength==128?3:4
135e1051a39Sopenharmony_ci	jmp	.Lenc_rounds
136e1051a39Sopenharmony_ci.cfi_endproc
137e1051a39Sopenharmony_ci.size	Camellia_EncryptBlock,.-Camellia_EncryptBlock
138e1051a39Sopenharmony_ci# V2
139e1051a39Sopenharmony_ci.globl	Camellia_EncryptBlock_Rounds
140e1051a39Sopenharmony_ci.type	Camellia_EncryptBlock_Rounds,\@function,4
141e1051a39Sopenharmony_ci.align	16
142e1051a39Sopenharmony_ci.Lenc_rounds:
143e1051a39Sopenharmony_ciCamellia_EncryptBlock_Rounds:
144e1051a39Sopenharmony_ci.cfi_startproc
145e1051a39Sopenharmony_ci	push	%rbx
146e1051a39Sopenharmony_ci.cfi_push	%rbx
147e1051a39Sopenharmony_ci	push	%rbp
148e1051a39Sopenharmony_ci.cfi_push	%rbp
149e1051a39Sopenharmony_ci	push	%r13
150e1051a39Sopenharmony_ci.cfi_push	%r13
151e1051a39Sopenharmony_ci	push	%r14
152e1051a39Sopenharmony_ci.cfi_push	%r14
153e1051a39Sopenharmony_ci	push	%r15
154e1051a39Sopenharmony_ci.cfi_push	%r15
155e1051a39Sopenharmony_ci.Lenc_prologue:
156e1051a39Sopenharmony_ci
157e1051a39Sopenharmony_ci	#mov	%rsi,$inp		# put away arguments
158e1051a39Sopenharmony_ci	mov	%rcx,$out
159e1051a39Sopenharmony_ci	mov	%rdx,$key
160e1051a39Sopenharmony_ci
161e1051a39Sopenharmony_ci	shl	\$6,%edi		# process grandRounds
162e1051a39Sopenharmony_ci	lea	.LCamellia_SBOX(%rip),$Tbl
163e1051a39Sopenharmony_ci	lea	($key,%rdi),$keyend
164e1051a39Sopenharmony_ci
165e1051a39Sopenharmony_ci	mov	0(%rsi),@S[0]		# load plaintext
166e1051a39Sopenharmony_ci	mov	4(%rsi),@S[1]
167e1051a39Sopenharmony_ci	mov	8(%rsi),@S[2]
168e1051a39Sopenharmony_ci	bswap	@S[0]
169e1051a39Sopenharmony_ci	mov	12(%rsi),@S[3]
170e1051a39Sopenharmony_ci	bswap	@S[1]
171e1051a39Sopenharmony_ci	bswap	@S[2]
172e1051a39Sopenharmony_ci	bswap	@S[3]
173e1051a39Sopenharmony_ci
174e1051a39Sopenharmony_ci	call	_x86_64_Camellia_encrypt
175e1051a39Sopenharmony_ci
176e1051a39Sopenharmony_ci	bswap	@S[0]
177e1051a39Sopenharmony_ci	bswap	@S[1]
178e1051a39Sopenharmony_ci	bswap	@S[2]
179e1051a39Sopenharmony_ci	mov	@S[0],0($out)
180e1051a39Sopenharmony_ci	bswap	@S[3]
181e1051a39Sopenharmony_ci	mov	@S[1],4($out)
182e1051a39Sopenharmony_ci	mov	@S[2],8($out)
183e1051a39Sopenharmony_ci	mov	@S[3],12($out)
184e1051a39Sopenharmony_ci
185e1051a39Sopenharmony_ci	mov	0(%rsp),%r15
186e1051a39Sopenharmony_ci.cfi_restore	%r15
187e1051a39Sopenharmony_ci	mov	8(%rsp),%r14
188e1051a39Sopenharmony_ci.cfi_restore	%r14
189e1051a39Sopenharmony_ci	mov	16(%rsp),%r13
190e1051a39Sopenharmony_ci.cfi_restore	%r13
191e1051a39Sopenharmony_ci	mov	24(%rsp),%rbp
192e1051a39Sopenharmony_ci.cfi_restore	%rbp
193e1051a39Sopenharmony_ci	mov	32(%rsp),%rbx
194e1051a39Sopenharmony_ci.cfi_restore	%rbx
195e1051a39Sopenharmony_ci	lea	40(%rsp),%rsp
196e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset	-40
197e1051a39Sopenharmony_ci.Lenc_epilogue:
198e1051a39Sopenharmony_ci	ret
199e1051a39Sopenharmony_ci.cfi_endproc
200e1051a39Sopenharmony_ci.size	Camellia_EncryptBlock_Rounds,.-Camellia_EncryptBlock_Rounds
201e1051a39Sopenharmony_ci
202e1051a39Sopenharmony_ci.type	_x86_64_Camellia_encrypt,\@abi-omnipotent
203e1051a39Sopenharmony_ci.align	16
204e1051a39Sopenharmony_ci_x86_64_Camellia_encrypt:
205e1051a39Sopenharmony_ci.cfi_startproc
206e1051a39Sopenharmony_ci	xor	0($key),@S[1]
207e1051a39Sopenharmony_ci	xor	4($key),@S[0]		# ^=key[0-3]
208e1051a39Sopenharmony_ci	xor	8($key),@S[3]
209e1051a39Sopenharmony_ci	xor	12($key),@S[2]
210e1051a39Sopenharmony_ci.align	16
211e1051a39Sopenharmony_ci.Leloop:
212e1051a39Sopenharmony_ci	mov	16($key),$t1		# prefetch key[4-5]
213e1051a39Sopenharmony_ci	mov	20($key),$t0
214e1051a39Sopenharmony_ci
215e1051a39Sopenharmony_ci___
216e1051a39Sopenharmony_ci	for ($i=0;$i<6;$i++) { Camellia_Feistel($i,16); }
217e1051a39Sopenharmony_ci$code.=<<___;
218e1051a39Sopenharmony_ci	lea	16*4($key),$key
219e1051a39Sopenharmony_ci	cmp	$keyend,$key
220e1051a39Sopenharmony_ci	mov	8($key),$t3		# prefetch key[2-3]
221e1051a39Sopenharmony_ci	mov	12($key),$t2
222e1051a39Sopenharmony_ci	je	.Ledone
223e1051a39Sopenharmony_ci
224e1051a39Sopenharmony_ci	and	@S[0],$t0
225e1051a39Sopenharmony_ci	or	@S[3],$t3
226e1051a39Sopenharmony_ci	rol	\$1,$t0
227e1051a39Sopenharmony_ci	xor	$t3,@S[2]		# s2^=s3|key[3];
228e1051a39Sopenharmony_ci	xor	$t0,@S[1]		# s1^=LeftRotate(s0&key[0],1);
229e1051a39Sopenharmony_ci	and	@S[2],$t2
230e1051a39Sopenharmony_ci	or	@S[1],$t1
231e1051a39Sopenharmony_ci	rol	\$1,$t2
232e1051a39Sopenharmony_ci	xor	$t1,@S[0]		# s0^=s1|key[1];
233e1051a39Sopenharmony_ci	xor	$t2,@S[3]		# s3^=LeftRotate(s2&key[2],1);
234e1051a39Sopenharmony_ci	jmp	.Leloop
235e1051a39Sopenharmony_ci
236e1051a39Sopenharmony_ci.align	16
237e1051a39Sopenharmony_ci.Ledone:
238e1051a39Sopenharmony_ci	xor	@S[2],$t0		# SwapHalf
239e1051a39Sopenharmony_ci	xor	@S[3],$t1
240e1051a39Sopenharmony_ci	xor	@S[0],$t2
241e1051a39Sopenharmony_ci	xor	@S[1],$t3
242e1051a39Sopenharmony_ci
243e1051a39Sopenharmony_ci	mov	$t0,@S[0]
244e1051a39Sopenharmony_ci	mov	$t1,@S[1]
245e1051a39Sopenharmony_ci	mov	$t2,@S[2]
246e1051a39Sopenharmony_ci	mov	$t3,@S[3]
247e1051a39Sopenharmony_ci
248e1051a39Sopenharmony_ci	.byte	0xf3,0xc3		# rep ret
249e1051a39Sopenharmony_ci.cfi_endproc
250e1051a39Sopenharmony_ci.size	_x86_64_Camellia_encrypt,.-_x86_64_Camellia_encrypt
251e1051a39Sopenharmony_ci
252e1051a39Sopenharmony_ci# V1.x API
253e1051a39Sopenharmony_ci.globl	Camellia_DecryptBlock
254e1051a39Sopenharmony_ci.type	Camellia_DecryptBlock,\@abi-omnipotent
255e1051a39Sopenharmony_ci.align	16
256e1051a39Sopenharmony_ciCamellia_DecryptBlock:
257e1051a39Sopenharmony_ci.cfi_startproc
258e1051a39Sopenharmony_ci	movl	\$128,%eax
259e1051a39Sopenharmony_ci	subl	$arg0d,%eax
260e1051a39Sopenharmony_ci	movl	\$3,$arg0d
261e1051a39Sopenharmony_ci	adcl	\$0,$arg0d	# keyBitLength==128?3:4
262e1051a39Sopenharmony_ci	jmp	.Ldec_rounds
263e1051a39Sopenharmony_ci.cfi_endproc
264e1051a39Sopenharmony_ci.size	Camellia_DecryptBlock,.-Camellia_DecryptBlock
265e1051a39Sopenharmony_ci# V2
266e1051a39Sopenharmony_ci.globl	Camellia_DecryptBlock_Rounds
267e1051a39Sopenharmony_ci.type	Camellia_DecryptBlock_Rounds,\@function,4
268e1051a39Sopenharmony_ci.align	16
269e1051a39Sopenharmony_ci.Ldec_rounds:
270e1051a39Sopenharmony_ciCamellia_DecryptBlock_Rounds:
271e1051a39Sopenharmony_ci.cfi_startproc
272e1051a39Sopenharmony_ci	push	%rbx
273e1051a39Sopenharmony_ci.cfi_push	%rbx
274e1051a39Sopenharmony_ci	push	%rbp
275e1051a39Sopenharmony_ci.cfi_push	%rbp
276e1051a39Sopenharmony_ci	push	%r13
277e1051a39Sopenharmony_ci.cfi_push	%r13
278e1051a39Sopenharmony_ci	push	%r14
279e1051a39Sopenharmony_ci.cfi_push	%r14
280e1051a39Sopenharmony_ci	push	%r15
281e1051a39Sopenharmony_ci.cfi_push	%r15
282e1051a39Sopenharmony_ci.Ldec_prologue:
283e1051a39Sopenharmony_ci
284e1051a39Sopenharmony_ci	#mov	%rsi,$inp		# put away arguments
285e1051a39Sopenharmony_ci	mov	%rcx,$out
286e1051a39Sopenharmony_ci	mov	%rdx,$keyend
287e1051a39Sopenharmony_ci
288e1051a39Sopenharmony_ci	shl	\$6,%edi		# process grandRounds
289e1051a39Sopenharmony_ci	lea	.LCamellia_SBOX(%rip),$Tbl
290e1051a39Sopenharmony_ci	lea	($keyend,%rdi),$key
291e1051a39Sopenharmony_ci
292e1051a39Sopenharmony_ci	mov	0(%rsi),@S[0]		# load plaintext
293e1051a39Sopenharmony_ci	mov	4(%rsi),@S[1]
294e1051a39Sopenharmony_ci	mov	8(%rsi),@S[2]
295e1051a39Sopenharmony_ci	bswap	@S[0]
296e1051a39Sopenharmony_ci	mov	12(%rsi),@S[3]
297e1051a39Sopenharmony_ci	bswap	@S[1]
298e1051a39Sopenharmony_ci	bswap	@S[2]
299e1051a39Sopenharmony_ci	bswap	@S[3]
300e1051a39Sopenharmony_ci
301e1051a39Sopenharmony_ci	call	_x86_64_Camellia_decrypt
302e1051a39Sopenharmony_ci
303e1051a39Sopenharmony_ci	bswap	@S[0]
304e1051a39Sopenharmony_ci	bswap	@S[1]
305e1051a39Sopenharmony_ci	bswap	@S[2]
306e1051a39Sopenharmony_ci	mov	@S[0],0($out)
307e1051a39Sopenharmony_ci	bswap	@S[3]
308e1051a39Sopenharmony_ci	mov	@S[1],4($out)
309e1051a39Sopenharmony_ci	mov	@S[2],8($out)
310e1051a39Sopenharmony_ci	mov	@S[3],12($out)
311e1051a39Sopenharmony_ci
312e1051a39Sopenharmony_ci	mov	0(%rsp),%r15
313e1051a39Sopenharmony_ci.cfi_restore	%r15
314e1051a39Sopenharmony_ci	mov	8(%rsp),%r14
315e1051a39Sopenharmony_ci.cfi_restore	%r14
316e1051a39Sopenharmony_ci	mov	16(%rsp),%r13
317e1051a39Sopenharmony_ci.cfi_restore	%r13
318e1051a39Sopenharmony_ci	mov	24(%rsp),%rbp
319e1051a39Sopenharmony_ci.cfi_restore	%rbp
320e1051a39Sopenharmony_ci	mov	32(%rsp),%rbx
321e1051a39Sopenharmony_ci.cfi_restore	%rbx
322e1051a39Sopenharmony_ci	lea	40(%rsp),%rsp
323e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset	-40
324e1051a39Sopenharmony_ci.Ldec_epilogue:
325e1051a39Sopenharmony_ci	ret
326e1051a39Sopenharmony_ci.cfi_endproc
327e1051a39Sopenharmony_ci.size	Camellia_DecryptBlock_Rounds,.-Camellia_DecryptBlock_Rounds
328e1051a39Sopenharmony_ci
329e1051a39Sopenharmony_ci.type	_x86_64_Camellia_decrypt,\@abi-omnipotent
330e1051a39Sopenharmony_ci.align	16
331e1051a39Sopenharmony_ci_x86_64_Camellia_decrypt:
332e1051a39Sopenharmony_ci.cfi_startproc
333e1051a39Sopenharmony_ci	xor	0($key),@S[1]
334e1051a39Sopenharmony_ci	xor	4($key),@S[0]		# ^=key[0-3]
335e1051a39Sopenharmony_ci	xor	8($key),@S[3]
336e1051a39Sopenharmony_ci	xor	12($key),@S[2]
337e1051a39Sopenharmony_ci.align	16
338e1051a39Sopenharmony_ci.Ldloop:
339e1051a39Sopenharmony_ci	mov	-8($key),$t1		# prefetch key[4-5]
340e1051a39Sopenharmony_ci	mov	-4($key),$t0
341e1051a39Sopenharmony_ci
342e1051a39Sopenharmony_ci___
343e1051a39Sopenharmony_ci	for ($i=0;$i<6;$i++) { Camellia_Feistel($i,-8); }
344e1051a39Sopenharmony_ci$code.=<<___;
345e1051a39Sopenharmony_ci	lea	-16*4($key),$key
346e1051a39Sopenharmony_ci	cmp	$keyend,$key
347e1051a39Sopenharmony_ci	mov	0($key),$t3		# prefetch key[2-3]
348e1051a39Sopenharmony_ci	mov	4($key),$t2
349e1051a39Sopenharmony_ci	je	.Lddone
350e1051a39Sopenharmony_ci
351e1051a39Sopenharmony_ci	and	@S[0],$t0
352e1051a39Sopenharmony_ci	or	@S[3],$t3
353e1051a39Sopenharmony_ci	rol	\$1,$t0
354e1051a39Sopenharmony_ci	xor	$t3,@S[2]		# s2^=s3|key[3];
355e1051a39Sopenharmony_ci	xor	$t0,@S[1]		# s1^=LeftRotate(s0&key[0],1);
356e1051a39Sopenharmony_ci	and	@S[2],$t2
357e1051a39Sopenharmony_ci	or	@S[1],$t1
358e1051a39Sopenharmony_ci	rol	\$1,$t2
359e1051a39Sopenharmony_ci	xor	$t1,@S[0]		# s0^=s1|key[1];
360e1051a39Sopenharmony_ci	xor	$t2,@S[3]		# s3^=LeftRotate(s2&key[2],1);
361e1051a39Sopenharmony_ci
362e1051a39Sopenharmony_ci	jmp	.Ldloop
363e1051a39Sopenharmony_ci
364e1051a39Sopenharmony_ci.align	16
365e1051a39Sopenharmony_ci.Lddone:
366e1051a39Sopenharmony_ci	xor	@S[2],$t2
367e1051a39Sopenharmony_ci	xor	@S[3],$t3
368e1051a39Sopenharmony_ci	xor	@S[0],$t0
369e1051a39Sopenharmony_ci	xor	@S[1],$t1
370e1051a39Sopenharmony_ci
371e1051a39Sopenharmony_ci	mov	$t2,@S[0]		# SwapHalf
372e1051a39Sopenharmony_ci	mov	$t3,@S[1]
373e1051a39Sopenharmony_ci	mov	$t0,@S[2]
374e1051a39Sopenharmony_ci	mov	$t1,@S[3]
375e1051a39Sopenharmony_ci
376e1051a39Sopenharmony_ci	.byte	0xf3,0xc3		# rep ret
377e1051a39Sopenharmony_ci.cfi_endproc
378e1051a39Sopenharmony_ci.size	_x86_64_Camellia_decrypt,.-_x86_64_Camellia_decrypt
379e1051a39Sopenharmony_ci___
380e1051a39Sopenharmony_ci
381e1051a39Sopenharmony_cisub _saveround {
382e1051a39Sopenharmony_cimy ($rnd,$key,@T)=@_;
383e1051a39Sopenharmony_cimy $bias=int(@T[0])?shift(@T):0;
384e1051a39Sopenharmony_ci
385e1051a39Sopenharmony_ci    if ($#T==3) {
386e1051a39Sopenharmony_ci	$code.=<<___;
387e1051a39Sopenharmony_ci	mov	@T[1],`$bias+$rnd*8+0`($key)
388e1051a39Sopenharmony_ci	mov	@T[0],`$bias+$rnd*8+4`($key)
389e1051a39Sopenharmony_ci	mov	@T[3],`$bias+$rnd*8+8`($key)
390e1051a39Sopenharmony_ci	mov	@T[2],`$bias+$rnd*8+12`($key)
391e1051a39Sopenharmony_ci___
392e1051a39Sopenharmony_ci    } else {
393e1051a39Sopenharmony_ci	$code.="	mov	@T[0],`$bias+$rnd*8+0`($key)\n";
394e1051a39Sopenharmony_ci	$code.="	mov	@T[1],`$bias+$rnd*8+8`($key)\n"	if ($#T>=1);
395e1051a39Sopenharmony_ci    }
396e1051a39Sopenharmony_ci}
397e1051a39Sopenharmony_ci
398e1051a39Sopenharmony_cisub _loadround {
399e1051a39Sopenharmony_cimy ($rnd,$key,@T)=@_;
400e1051a39Sopenharmony_cimy $bias=int(@T[0])?shift(@T):0;
401e1051a39Sopenharmony_ci
402e1051a39Sopenharmony_ci$code.="	mov	`$bias+$rnd*8+0`($key),@T[0]\n";
403e1051a39Sopenharmony_ci$code.="	mov	`$bias+$rnd*8+8`($key),@T[1]\n"	if ($#T>=1);
404e1051a39Sopenharmony_ci}
405e1051a39Sopenharmony_ci
406e1051a39Sopenharmony_ci# shld is very slow on Intel EM64T family. Even on AMD it limits
407e1051a39Sopenharmony_ci# instruction decode rate [because it's VectorPath] and consequently
408e1051a39Sopenharmony_ci# performance...
409e1051a39Sopenharmony_cisub __rotl128 {
410e1051a39Sopenharmony_cimy ($i0,$i1,$rot)=@_;
411e1051a39Sopenharmony_ci
412e1051a39Sopenharmony_ci    if ($rot) {
413e1051a39Sopenharmony_ci	$code.=<<___;
414e1051a39Sopenharmony_ci	mov	$i0,%r11
415e1051a39Sopenharmony_ci	shld	\$$rot,$i1,$i0
416e1051a39Sopenharmony_ci	shld	\$$rot,%r11,$i1
417e1051a39Sopenharmony_ci___
418e1051a39Sopenharmony_ci    }
419e1051a39Sopenharmony_ci}
420e1051a39Sopenharmony_ci
421e1051a39Sopenharmony_ci# ... Implementing 128-bit rotate without shld gives 80% better
422e1051a39Sopenharmony_ci# performance EM64T, +15% on AMD64 and only ~7% degradation on
423e1051a39Sopenharmony_ci# Core2. This is therefore preferred.
424e1051a39Sopenharmony_cisub _rotl128 {
425e1051a39Sopenharmony_cimy ($i0,$i1,$rot)=@_;
426e1051a39Sopenharmony_ci
427e1051a39Sopenharmony_ci    if ($rot) {
428e1051a39Sopenharmony_ci	$code.=<<___;
429e1051a39Sopenharmony_ci	mov	$i0,%r11
430e1051a39Sopenharmony_ci	shl	\$$rot,$i0
431e1051a39Sopenharmony_ci	mov	$i1,%r9
432e1051a39Sopenharmony_ci	shr	\$`64-$rot`,%r9
433e1051a39Sopenharmony_ci	shr	\$`64-$rot`,%r11
434e1051a39Sopenharmony_ci	or	%r9,$i0
435e1051a39Sopenharmony_ci	shl	\$$rot,$i1
436e1051a39Sopenharmony_ci	or	%r11,$i1
437e1051a39Sopenharmony_ci___
438e1051a39Sopenharmony_ci    }
439e1051a39Sopenharmony_ci}
440e1051a39Sopenharmony_ci
441e1051a39Sopenharmony_ci{ my $step=0;
442e1051a39Sopenharmony_ci
443e1051a39Sopenharmony_ci$code.=<<___;
444e1051a39Sopenharmony_ci.globl	Camellia_Ekeygen
445e1051a39Sopenharmony_ci.type	Camellia_Ekeygen,\@function,3
446e1051a39Sopenharmony_ci.align	16
447e1051a39Sopenharmony_ciCamellia_Ekeygen:
448e1051a39Sopenharmony_ci.cfi_startproc
449e1051a39Sopenharmony_ci	push	%rbx
450e1051a39Sopenharmony_ci.cfi_push	%rbx
451e1051a39Sopenharmony_ci	push	%rbp
452e1051a39Sopenharmony_ci.cfi_push	%rbp
453e1051a39Sopenharmony_ci	push	%r13
454e1051a39Sopenharmony_ci.cfi_push	%r13
455e1051a39Sopenharmony_ci	push	%r14
456e1051a39Sopenharmony_ci.cfi_push	%r14
457e1051a39Sopenharmony_ci	push	%r15
458e1051a39Sopenharmony_ci.cfi_push	%r15
459e1051a39Sopenharmony_ci.Lkey_prologue:
460e1051a39Sopenharmony_ci
461e1051a39Sopenharmony_ci	mov	%edi,${keyend}d		# put away arguments, keyBitLength
462e1051a39Sopenharmony_ci	mov	%rdx,$out		# keyTable
463e1051a39Sopenharmony_ci
464e1051a39Sopenharmony_ci	mov	0(%rsi),@S[0]		# load 0-127 bits
465e1051a39Sopenharmony_ci	mov	4(%rsi),@S[1]
466e1051a39Sopenharmony_ci	mov	8(%rsi),@S[2]
467e1051a39Sopenharmony_ci	mov	12(%rsi),@S[3]
468e1051a39Sopenharmony_ci
469e1051a39Sopenharmony_ci	bswap	@S[0]
470e1051a39Sopenharmony_ci	bswap	@S[1]
471e1051a39Sopenharmony_ci	bswap	@S[2]
472e1051a39Sopenharmony_ci	bswap	@S[3]
473e1051a39Sopenharmony_ci___
474e1051a39Sopenharmony_ci	&_saveround	(0,$out,@S);	# KL<<<0
475e1051a39Sopenharmony_ci$code.=<<___;
476e1051a39Sopenharmony_ci	cmp	\$128,$keyend		# check keyBitLength
477e1051a39Sopenharmony_ci	je	.L1st128
478e1051a39Sopenharmony_ci
479e1051a39Sopenharmony_ci	mov	16(%rsi),@S[0]		# load 128-191 bits
480e1051a39Sopenharmony_ci	mov	20(%rsi),@S[1]
481e1051a39Sopenharmony_ci	cmp	\$192,$keyend
482e1051a39Sopenharmony_ci	je	.L1st192
483e1051a39Sopenharmony_ci	mov	24(%rsi),@S[2]		# load 192-255 bits
484e1051a39Sopenharmony_ci	mov	28(%rsi),@S[3]
485e1051a39Sopenharmony_ci	jmp	.L1st256
486e1051a39Sopenharmony_ci.L1st192:
487e1051a39Sopenharmony_ci	mov	@S[0],@S[2]
488e1051a39Sopenharmony_ci	mov	@S[1],@S[3]
489e1051a39Sopenharmony_ci	not	@S[2]
490e1051a39Sopenharmony_ci	not	@S[3]
491e1051a39Sopenharmony_ci.L1st256:
492e1051a39Sopenharmony_ci	bswap	@S[0]
493e1051a39Sopenharmony_ci	bswap	@S[1]
494e1051a39Sopenharmony_ci	bswap	@S[2]
495e1051a39Sopenharmony_ci	bswap	@S[3]
496e1051a39Sopenharmony_ci___
497e1051a39Sopenharmony_ci	&_saveround	(4,$out,@S);	# temp storage for KR!
498e1051a39Sopenharmony_ci$code.=<<___;
499e1051a39Sopenharmony_ci	xor	0($out),@S[1]		# KR^KL
500e1051a39Sopenharmony_ci	xor	4($out),@S[0]
501e1051a39Sopenharmony_ci	xor	8($out),@S[3]
502e1051a39Sopenharmony_ci	xor	12($out),@S[2]
503e1051a39Sopenharmony_ci
504e1051a39Sopenharmony_ci.L1st128:
505e1051a39Sopenharmony_ci	lea	.LCamellia_SIGMA(%rip),$key
506e1051a39Sopenharmony_ci	lea	.LCamellia_SBOX(%rip),$Tbl
507e1051a39Sopenharmony_ci
508e1051a39Sopenharmony_ci	mov	0($key),$t1
509e1051a39Sopenharmony_ci	mov	4($key),$t0
510e1051a39Sopenharmony_ci___
511e1051a39Sopenharmony_ci	&Camellia_Feistel($step++);
512e1051a39Sopenharmony_ci	&Camellia_Feistel($step++);
513e1051a39Sopenharmony_ci$code.=<<___;
514e1051a39Sopenharmony_ci	xor	0($out),@S[1]		# ^KL
515e1051a39Sopenharmony_ci	xor	4($out),@S[0]
516e1051a39Sopenharmony_ci	xor	8($out),@S[3]
517e1051a39Sopenharmony_ci	xor	12($out),@S[2]
518e1051a39Sopenharmony_ci___
519e1051a39Sopenharmony_ci	&Camellia_Feistel($step++);
520e1051a39Sopenharmony_ci	&Camellia_Feistel($step++);
521e1051a39Sopenharmony_ci$code.=<<___;
522e1051a39Sopenharmony_ci	cmp	\$128,$keyend
523e1051a39Sopenharmony_ci	jne	.L2nd256
524e1051a39Sopenharmony_ci
525e1051a39Sopenharmony_ci	lea	128($out),$out		# size optimization
526e1051a39Sopenharmony_ci	shl	\$32,%r8		# @S[0]||
527e1051a39Sopenharmony_ci	shl	\$32,%r10		# @S[2]||
528e1051a39Sopenharmony_ci	or	%r9,%r8			# ||@S[1]
529e1051a39Sopenharmony_ci	or	%r11,%r10		# ||@S[3]
530e1051a39Sopenharmony_ci___
531e1051a39Sopenharmony_ci	&_loadround	(0,$out,-128,"%rax","%rbx");	# KL
532e1051a39Sopenharmony_ci	&_saveround	(2,$out,-128,"%r8","%r10");	# KA<<<0
533e1051a39Sopenharmony_ci	&_rotl128	("%rax","%rbx",15);
534e1051a39Sopenharmony_ci	&_saveround	(4,$out,-128,"%rax","%rbx");	# KL<<<15
535e1051a39Sopenharmony_ci	&_rotl128	("%r8","%r10",15);
536e1051a39Sopenharmony_ci	&_saveround	(6,$out,-128,"%r8","%r10");	# KA<<<15
537e1051a39Sopenharmony_ci	&_rotl128	("%r8","%r10",15);		# 15+15=30
538e1051a39Sopenharmony_ci	&_saveround	(8,$out,-128,"%r8","%r10");	# KA<<<30
539e1051a39Sopenharmony_ci	&_rotl128	("%rax","%rbx",30);		# 15+30=45
540e1051a39Sopenharmony_ci	&_saveround	(10,$out,-128,"%rax","%rbx");	# KL<<<45
541e1051a39Sopenharmony_ci	&_rotl128	("%r8","%r10",15);		# 30+15=45
542e1051a39Sopenharmony_ci	&_saveround	(12,$out,-128,"%r8");		# KA<<<45
543e1051a39Sopenharmony_ci	&_rotl128	("%rax","%rbx",15);		# 45+15=60
544e1051a39Sopenharmony_ci	&_saveround	(13,$out,-128,"%rbx");		# KL<<<60
545e1051a39Sopenharmony_ci	&_rotl128	("%r8","%r10",15);		# 45+15=60
546e1051a39Sopenharmony_ci	&_saveround	(14,$out,-128,"%r8","%r10");	# KA<<<60
547e1051a39Sopenharmony_ci	&_rotl128	("%rax","%rbx",17);		# 60+17=77
548e1051a39Sopenharmony_ci	&_saveround	(16,$out,-128,"%rax","%rbx");	# KL<<<77
549e1051a39Sopenharmony_ci	&_rotl128	("%rax","%rbx",17);		# 77+17=94
550e1051a39Sopenharmony_ci	&_saveround	(18,$out,-128,"%rax","%rbx");	# KL<<<94
551e1051a39Sopenharmony_ci	&_rotl128	("%r8","%r10",34);		# 60+34=94
552e1051a39Sopenharmony_ci	&_saveround	(20,$out,-128,"%r8","%r10");	# KA<<<94
553e1051a39Sopenharmony_ci	&_rotl128	("%rax","%rbx",17);		# 94+17=111
554e1051a39Sopenharmony_ci	&_saveround	(22,$out,-128,"%rax","%rbx");	# KL<<<111
555e1051a39Sopenharmony_ci	&_rotl128	("%r8","%r10",17);		# 94+17=111
556e1051a39Sopenharmony_ci	&_saveround	(24,$out,-128,"%r8","%r10");	# KA<<<111
557e1051a39Sopenharmony_ci$code.=<<___;
558e1051a39Sopenharmony_ci	mov	\$3,%eax
559e1051a39Sopenharmony_ci	jmp	.Ldone
560e1051a39Sopenharmony_ci.align	16
561e1051a39Sopenharmony_ci.L2nd256:
562e1051a39Sopenharmony_ci___
563e1051a39Sopenharmony_ci	&_saveround	(6,$out,@S);	# temp storage for KA!
564e1051a39Sopenharmony_ci$code.=<<___;
565e1051a39Sopenharmony_ci	xor	`4*8+0`($out),@S[1]	# KA^KR
566e1051a39Sopenharmony_ci	xor	`4*8+4`($out),@S[0]
567e1051a39Sopenharmony_ci	xor	`5*8+0`($out),@S[3]
568e1051a39Sopenharmony_ci	xor	`5*8+4`($out),@S[2]
569e1051a39Sopenharmony_ci___
570e1051a39Sopenharmony_ci	&Camellia_Feistel($step++);
571e1051a39Sopenharmony_ci	&Camellia_Feistel($step++);
572e1051a39Sopenharmony_ci
573e1051a39Sopenharmony_ci	&_loadround	(0,$out,"%rax","%rbx");	# KL
574e1051a39Sopenharmony_ci	&_loadround	(4,$out,"%rcx","%rdx");	# KR
575e1051a39Sopenharmony_ci	&_loadround	(6,$out,"%r14","%r15");	# KA
576e1051a39Sopenharmony_ci$code.=<<___;
577e1051a39Sopenharmony_ci	lea	128($out),$out		# size optimization
578e1051a39Sopenharmony_ci	shl	\$32,%r8		# @S[0]||
579e1051a39Sopenharmony_ci	shl	\$32,%r10		# @S[2]||
580e1051a39Sopenharmony_ci	or	%r9,%r8			# ||@S[1]
581e1051a39Sopenharmony_ci	or	%r11,%r10		# ||@S[3]
582e1051a39Sopenharmony_ci___
583e1051a39Sopenharmony_ci	&_saveround	(2,$out,-128,"%r8","%r10");	# KB<<<0
584e1051a39Sopenharmony_ci	&_rotl128	("%rcx","%rdx",15);
585e1051a39Sopenharmony_ci	&_saveround	(4,$out,-128,"%rcx","%rdx");	# KR<<<15
586e1051a39Sopenharmony_ci	&_rotl128	("%r14","%r15",15);
587e1051a39Sopenharmony_ci	&_saveround	(6,$out,-128,"%r14","%r15");	# KA<<<15
588e1051a39Sopenharmony_ci	&_rotl128	("%rcx","%rdx",15);		# 15+15=30
589e1051a39Sopenharmony_ci	&_saveround	(8,$out,-128,"%rcx","%rdx");	# KR<<<30
590e1051a39Sopenharmony_ci	&_rotl128	("%r8","%r10",30);
591e1051a39Sopenharmony_ci	&_saveround	(10,$out,-128,"%r8","%r10");	# KB<<<30
592e1051a39Sopenharmony_ci	&_rotl128	("%rax","%rbx",45);
593e1051a39Sopenharmony_ci	&_saveround	(12,$out,-128,"%rax","%rbx");	# KL<<<45
594e1051a39Sopenharmony_ci	&_rotl128	("%r14","%r15",30);		# 15+30=45
595e1051a39Sopenharmony_ci	&_saveround	(14,$out,-128,"%r14","%r15");	# KA<<<45
596e1051a39Sopenharmony_ci	&_rotl128	("%rax","%rbx",15);		# 45+15=60
597e1051a39Sopenharmony_ci	&_saveround	(16,$out,-128,"%rax","%rbx");	# KL<<<60
598e1051a39Sopenharmony_ci	&_rotl128	("%rcx","%rdx",30);		# 30+30=60
599e1051a39Sopenharmony_ci	&_saveround	(18,$out,-128,"%rcx","%rdx");	# KR<<<60
600e1051a39Sopenharmony_ci	&_rotl128	("%r8","%r10",30);		# 30+30=60
601e1051a39Sopenharmony_ci	&_saveround	(20,$out,-128,"%r8","%r10");	# KB<<<60
602e1051a39Sopenharmony_ci	&_rotl128	("%rax","%rbx",17);		# 60+17=77
603e1051a39Sopenharmony_ci	&_saveround	(22,$out,-128,"%rax","%rbx");	# KL<<<77
604e1051a39Sopenharmony_ci	&_rotl128	("%r14","%r15",32);		# 45+32=77
605e1051a39Sopenharmony_ci	&_saveround	(24,$out,-128,"%r14","%r15");	# KA<<<77
606e1051a39Sopenharmony_ci	&_rotl128	("%rcx","%rdx",34);		# 60+34=94
607e1051a39Sopenharmony_ci	&_saveround	(26,$out,-128,"%rcx","%rdx");	# KR<<<94
608e1051a39Sopenharmony_ci	&_rotl128	("%r14","%r15",17);		# 77+17=94
609e1051a39Sopenharmony_ci	&_saveround	(28,$out,-128,"%r14","%r15");	# KA<<<77
610e1051a39Sopenharmony_ci	&_rotl128	("%rax","%rbx",34);		# 77+34=111
611e1051a39Sopenharmony_ci	&_saveround	(30,$out,-128,"%rax","%rbx");	# KL<<<111
612e1051a39Sopenharmony_ci	&_rotl128	("%r8","%r10",51);		# 60+51=111
613e1051a39Sopenharmony_ci	&_saveround	(32,$out,-128,"%r8","%r10");	# KB<<<111
614e1051a39Sopenharmony_ci$code.=<<___;
615e1051a39Sopenharmony_ci	mov	\$4,%eax
616e1051a39Sopenharmony_ci.Ldone:
617e1051a39Sopenharmony_ci	mov	0(%rsp),%r15
618e1051a39Sopenharmony_ci.cfi_restore	%r15
619e1051a39Sopenharmony_ci	mov	8(%rsp),%r14
620e1051a39Sopenharmony_ci.cfi_restore	%r14
621e1051a39Sopenharmony_ci	mov	16(%rsp),%r13
622e1051a39Sopenharmony_ci.cfi_restore	%r13
623e1051a39Sopenharmony_ci	mov	24(%rsp),%rbp
624e1051a39Sopenharmony_ci.cfi_restore	%rbp
625e1051a39Sopenharmony_ci	mov	32(%rsp),%rbx
626e1051a39Sopenharmony_ci.cfi_restore	%rbx
627e1051a39Sopenharmony_ci	lea	40(%rsp),%rsp
628e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset	-40
629e1051a39Sopenharmony_ci.Lkey_epilogue:
630e1051a39Sopenharmony_ci	ret
631e1051a39Sopenharmony_ci.cfi_endproc
632e1051a39Sopenharmony_ci.size	Camellia_Ekeygen,.-Camellia_Ekeygen
633e1051a39Sopenharmony_ci___
634e1051a39Sopenharmony_ci}
635e1051a39Sopenharmony_ci
636e1051a39Sopenharmony_ci@SBOX=(
637e1051a39Sopenharmony_ci112,130, 44,236,179, 39,192,229,228,133, 87, 53,234, 12,174, 65,
638e1051a39Sopenharmony_ci 35,239,107,147, 69, 25,165, 33,237, 14, 79, 78, 29,101,146,189,
639e1051a39Sopenharmony_ci134,184,175,143,124,235, 31,206, 62, 48,220, 95, 94,197, 11, 26,
640e1051a39Sopenharmony_ci166,225, 57,202,213, 71, 93, 61,217,  1, 90,214, 81, 86,108, 77,
641e1051a39Sopenharmony_ci139, 13,154,102,251,204,176, 45,116, 18, 43, 32,240,177,132,153,
642e1051a39Sopenharmony_ci223, 76,203,194, 52,126,118,  5,109,183,169, 49,209, 23,  4,215,
643e1051a39Sopenharmony_ci 20, 88, 58, 97,222, 27, 17, 28, 50, 15,156, 22, 83, 24,242, 34,
644e1051a39Sopenharmony_ci254, 68,207,178,195,181,122,145, 36,  8,232,168, 96,252,105, 80,
645e1051a39Sopenharmony_ci170,208,160,125,161,137, 98,151, 84, 91, 30,149,224,255,100,210,
646e1051a39Sopenharmony_ci 16,196,  0, 72,163,247,117,219,138,  3,230,218,  9, 63,221,148,
647e1051a39Sopenharmony_ci135, 92,131,  2,205, 74,144, 51,115,103,246,243,157,127,191,226,
648e1051a39Sopenharmony_ci 82,155,216, 38,200, 55,198, 59,129,150,111, 75, 19,190, 99, 46,
649e1051a39Sopenharmony_ci233,121,167,140,159,110,188,142, 41,245,249,182, 47,253,180, 89,
650e1051a39Sopenharmony_ci120,152,  6,106,231, 70,113,186,212, 37,171, 66,136,162,141,250,
651e1051a39Sopenharmony_ci114,  7,185, 85,248,238,172, 10, 54, 73, 42,104, 60, 56,241,164,
652e1051a39Sopenharmony_ci 64, 40,211,123,187,201, 67,193, 21,227,173,244,119,199,128,158);
653e1051a39Sopenharmony_ci
654e1051a39Sopenharmony_cisub S1110 { my $i=shift; $i=@SBOX[$i]; $i=$i<<24|$i<<16|$i<<8; sprintf("0x%08x",$i); }
655e1051a39Sopenharmony_cisub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; $i=$i<<24|$i<<16|$i; sprintf("0x%08x",$i); }
656e1051a39Sopenharmony_cisub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; $i=$i<<16|$i<<8|$i; sprintf("0x%08x",$i); }
657e1051a39Sopenharmony_cisub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; $i=$i<<24|$i<<8|$i; sprintf("0x%08x",$i); }
658e1051a39Sopenharmony_ci
659e1051a39Sopenharmony_ci$code.=<<___;
660e1051a39Sopenharmony_ci.align	64
661e1051a39Sopenharmony_ci.LCamellia_SIGMA:
662e1051a39Sopenharmony_ci.long	0x3bcc908b, 0xa09e667f, 0x4caa73b2, 0xb67ae858
663e1051a39Sopenharmony_ci.long	0xe94f82be, 0xc6ef372f, 0xf1d36f1c, 0x54ff53a5
664e1051a39Sopenharmony_ci.long	0xde682d1d, 0x10e527fa, 0xb3e6c1fd, 0xb05688c2
665e1051a39Sopenharmony_ci.long	0,          0,          0,          0
666e1051a39Sopenharmony_ci.LCamellia_SBOX:
667e1051a39Sopenharmony_ci___
668e1051a39Sopenharmony_ci# tables are interleaved, remember?
669e1051a39Sopenharmony_cisub data_word { $code.=".long\t".join(',',@_)."\n"; }
670e1051a39Sopenharmony_cifor ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); }
671e1051a39Sopenharmony_cifor ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); }
672e1051a39Sopenharmony_ci
673e1051a39Sopenharmony_ci# void Camellia_cbc_encrypt (const void char *inp, unsigned char *out,
674e1051a39Sopenharmony_ci#			size_t length, const CAMELLIA_KEY *key,
675e1051a39Sopenharmony_ci#			unsigned char *ivp,const int enc);
676e1051a39Sopenharmony_ci{
677e1051a39Sopenharmony_ci$_key="0(%rsp)";
678e1051a39Sopenharmony_ci$_end="8(%rsp)";	# inp+len&~15
679e1051a39Sopenharmony_ci$_res="16(%rsp)";	# len&15
680e1051a39Sopenharmony_ci$ivec="24(%rsp)";
681e1051a39Sopenharmony_ci$_ivp="40(%rsp)";
682e1051a39Sopenharmony_ci$_rsp="48(%rsp)";
683e1051a39Sopenharmony_ci
684e1051a39Sopenharmony_ci$code.=<<___;
685e1051a39Sopenharmony_ci.globl	Camellia_cbc_encrypt
686e1051a39Sopenharmony_ci.type	Camellia_cbc_encrypt,\@function,6
687e1051a39Sopenharmony_ci.align	16
688e1051a39Sopenharmony_ciCamellia_cbc_encrypt:
689e1051a39Sopenharmony_ci.cfi_startproc
690e1051a39Sopenharmony_ci	endbranch
691e1051a39Sopenharmony_ci	cmp	\$0,%rdx
692e1051a39Sopenharmony_ci	je	.Lcbc_abort
693e1051a39Sopenharmony_ci	push	%rbx
694e1051a39Sopenharmony_ci.cfi_push	%rbx
695e1051a39Sopenharmony_ci	push	%rbp
696e1051a39Sopenharmony_ci.cfi_push	%rbp
697e1051a39Sopenharmony_ci	push	%r12
698e1051a39Sopenharmony_ci.cfi_push	%r12
699e1051a39Sopenharmony_ci	push	%r13
700e1051a39Sopenharmony_ci.cfi_push	%r13
701e1051a39Sopenharmony_ci	push	%r14
702e1051a39Sopenharmony_ci.cfi_push	%r14
703e1051a39Sopenharmony_ci	push	%r15
704e1051a39Sopenharmony_ci.cfi_push	%r15
705e1051a39Sopenharmony_ci.Lcbc_prologue:
706e1051a39Sopenharmony_ci
707e1051a39Sopenharmony_ci	mov	%rsp,%rbp
708e1051a39Sopenharmony_ci.cfi_def_cfa_register	%rbp
709e1051a39Sopenharmony_ci	sub	\$64,%rsp
710e1051a39Sopenharmony_ci	and	\$-64,%rsp
711e1051a39Sopenharmony_ci
712e1051a39Sopenharmony_ci	# place stack frame just "above mod 1024" the key schedule,
713e1051a39Sopenharmony_ci	# this ensures that cache associativity suffices
714e1051a39Sopenharmony_ci	lea	-64-63(%rcx),%r10
715e1051a39Sopenharmony_ci	sub	%rsp,%r10
716e1051a39Sopenharmony_ci	neg	%r10
717e1051a39Sopenharmony_ci	and	\$0x3C0,%r10
718e1051a39Sopenharmony_ci	sub	%r10,%rsp
719e1051a39Sopenharmony_ci	#add	\$8,%rsp		# 8 is reserved for callee's ra
720e1051a39Sopenharmony_ci
721e1051a39Sopenharmony_ci	mov	%rdi,$inp		# inp argument
722e1051a39Sopenharmony_ci	mov	%rsi,$out		# out argument
723e1051a39Sopenharmony_ci	mov	%r8,%rbx		# ivp argument
724e1051a39Sopenharmony_ci	mov	%rcx,$key		# key argument
725e1051a39Sopenharmony_ci	mov	272(%rcx),${keyend}d	# grandRounds
726e1051a39Sopenharmony_ci
727e1051a39Sopenharmony_ci	mov	%r8,$_ivp
728e1051a39Sopenharmony_ci	mov	%rbp,$_rsp
729e1051a39Sopenharmony_ci.cfi_cfa_expression	$_rsp,deref,+56
730e1051a39Sopenharmony_ci
731e1051a39Sopenharmony_ci.Lcbc_body:
732e1051a39Sopenharmony_ci	lea	.LCamellia_SBOX(%rip),$Tbl
733e1051a39Sopenharmony_ci
734e1051a39Sopenharmony_ci	mov	\$32,%ecx
735e1051a39Sopenharmony_ci.align	4
736e1051a39Sopenharmony_ci.Lcbc_prefetch_sbox:
737e1051a39Sopenharmony_ci	mov	0($Tbl),%rax
738e1051a39Sopenharmony_ci	mov	32($Tbl),%rsi
739e1051a39Sopenharmony_ci	mov	64($Tbl),%rdi
740e1051a39Sopenharmony_ci	mov	96($Tbl),%r11
741e1051a39Sopenharmony_ci	lea	128($Tbl),$Tbl
742e1051a39Sopenharmony_ci	loop	.Lcbc_prefetch_sbox
743e1051a39Sopenharmony_ci	sub	\$4096,$Tbl
744e1051a39Sopenharmony_ci	shl	\$6,$keyend
745e1051a39Sopenharmony_ci	mov	%rdx,%rcx		# len argument
746e1051a39Sopenharmony_ci	lea	($key,$keyend),$keyend
747e1051a39Sopenharmony_ci
748e1051a39Sopenharmony_ci	cmp	\$0,%r9d		# enc argument
749e1051a39Sopenharmony_ci	je	.LCBC_DECRYPT
750e1051a39Sopenharmony_ci
751e1051a39Sopenharmony_ci	and	\$-16,%rdx
752e1051a39Sopenharmony_ci	and	\$15,%rcx		# length residue
753e1051a39Sopenharmony_ci	lea	($inp,%rdx),%rdx
754e1051a39Sopenharmony_ci	mov	$key,$_key
755e1051a39Sopenharmony_ci	mov	%rdx,$_end
756e1051a39Sopenharmony_ci	mov	%rcx,$_res
757e1051a39Sopenharmony_ci
758e1051a39Sopenharmony_ci	cmp	$inp,%rdx
759e1051a39Sopenharmony_ci	mov	0(%rbx),@S[0]		# load IV
760e1051a39Sopenharmony_ci	mov	4(%rbx),@S[1]
761e1051a39Sopenharmony_ci	mov	8(%rbx),@S[2]
762e1051a39Sopenharmony_ci	mov	12(%rbx),@S[3]
763e1051a39Sopenharmony_ci	je	.Lcbc_enc_tail
764e1051a39Sopenharmony_ci	jmp	.Lcbc_eloop
765e1051a39Sopenharmony_ci
766e1051a39Sopenharmony_ci.align	16
767e1051a39Sopenharmony_ci.Lcbc_eloop:
768e1051a39Sopenharmony_ci	xor	0($inp),@S[0]
769e1051a39Sopenharmony_ci	xor	4($inp),@S[1]
770e1051a39Sopenharmony_ci	xor	8($inp),@S[2]
771e1051a39Sopenharmony_ci	bswap	@S[0]
772e1051a39Sopenharmony_ci	xor	12($inp),@S[3]
773e1051a39Sopenharmony_ci	bswap	@S[1]
774e1051a39Sopenharmony_ci	bswap	@S[2]
775e1051a39Sopenharmony_ci	bswap	@S[3]
776e1051a39Sopenharmony_ci
777e1051a39Sopenharmony_ci	call	_x86_64_Camellia_encrypt
778e1051a39Sopenharmony_ci
779e1051a39Sopenharmony_ci	mov	$_key,$key		# "rewind" the key
780e1051a39Sopenharmony_ci	bswap	@S[0]
781e1051a39Sopenharmony_ci	mov	$_end,%rdx
782e1051a39Sopenharmony_ci	bswap	@S[1]
783e1051a39Sopenharmony_ci	mov	$_res,%rcx
784e1051a39Sopenharmony_ci	bswap	@S[2]
785e1051a39Sopenharmony_ci	mov	@S[0],0($out)
786e1051a39Sopenharmony_ci	bswap	@S[3]
787e1051a39Sopenharmony_ci	mov	@S[1],4($out)
788e1051a39Sopenharmony_ci	mov	@S[2],8($out)
789e1051a39Sopenharmony_ci	lea	16($inp),$inp
790e1051a39Sopenharmony_ci	mov	@S[3],12($out)
791e1051a39Sopenharmony_ci	cmp	%rdx,$inp
792e1051a39Sopenharmony_ci	lea	16($out),$out
793e1051a39Sopenharmony_ci	jne	.Lcbc_eloop
794e1051a39Sopenharmony_ci
795e1051a39Sopenharmony_ci	cmp	\$0,%rcx
796e1051a39Sopenharmony_ci	jne	.Lcbc_enc_tail
797e1051a39Sopenharmony_ci
798e1051a39Sopenharmony_ci	mov	$_ivp,$out
799e1051a39Sopenharmony_ci	mov	@S[0],0($out)		# write out IV residue
800e1051a39Sopenharmony_ci	mov	@S[1],4($out)
801e1051a39Sopenharmony_ci	mov	@S[2],8($out)
802e1051a39Sopenharmony_ci	mov	@S[3],12($out)
803e1051a39Sopenharmony_ci	jmp	.Lcbc_done
804e1051a39Sopenharmony_ci
805e1051a39Sopenharmony_ci.align	16
806e1051a39Sopenharmony_ci.Lcbc_enc_tail:
807e1051a39Sopenharmony_ci	xor	%rax,%rax
808e1051a39Sopenharmony_ci	mov	%rax,0+$ivec
809e1051a39Sopenharmony_ci	mov	%rax,8+$ivec
810e1051a39Sopenharmony_ci	mov	%rax,$_res
811e1051a39Sopenharmony_ci
812e1051a39Sopenharmony_ci.Lcbc_enc_pushf:
813e1051a39Sopenharmony_ci	pushfq
814e1051a39Sopenharmony_ci	cld
815e1051a39Sopenharmony_ci	mov	$inp,%rsi
816e1051a39Sopenharmony_ci	lea	8+$ivec,%rdi
817e1051a39Sopenharmony_ci	.long	0x9066A4F3		# rep movsb
818e1051a39Sopenharmony_ci	popfq
819e1051a39Sopenharmony_ci.Lcbc_enc_popf:
820e1051a39Sopenharmony_ci
821e1051a39Sopenharmony_ci	lea	$ivec,$inp
822e1051a39Sopenharmony_ci	lea	16+$ivec,%rax
823e1051a39Sopenharmony_ci	mov	%rax,$_end
824e1051a39Sopenharmony_ci	jmp	.Lcbc_eloop		# one more time
825e1051a39Sopenharmony_ci
826e1051a39Sopenharmony_ci.align	16
827e1051a39Sopenharmony_ci.LCBC_DECRYPT:
828e1051a39Sopenharmony_ci	xchg	$key,$keyend
829e1051a39Sopenharmony_ci	add	\$15,%rdx
830e1051a39Sopenharmony_ci	and	\$15,%rcx		# length residue
831e1051a39Sopenharmony_ci	and	\$-16,%rdx
832e1051a39Sopenharmony_ci	mov	$key,$_key
833e1051a39Sopenharmony_ci	lea	($inp,%rdx),%rdx
834e1051a39Sopenharmony_ci	mov	%rdx,$_end
835e1051a39Sopenharmony_ci	mov	%rcx,$_res
836e1051a39Sopenharmony_ci
837e1051a39Sopenharmony_ci	mov	(%rbx),%rax		# load IV
838e1051a39Sopenharmony_ci	mov	8(%rbx),%rbx
839e1051a39Sopenharmony_ci	jmp	.Lcbc_dloop
840e1051a39Sopenharmony_ci.align	16
841e1051a39Sopenharmony_ci.Lcbc_dloop:
842e1051a39Sopenharmony_ci	mov	0($inp),@S[0]
843e1051a39Sopenharmony_ci	mov	4($inp),@S[1]
844e1051a39Sopenharmony_ci	mov	8($inp),@S[2]
845e1051a39Sopenharmony_ci	bswap	@S[0]
846e1051a39Sopenharmony_ci	mov	12($inp),@S[3]
847e1051a39Sopenharmony_ci	bswap	@S[1]
848e1051a39Sopenharmony_ci	mov	%rax,0+$ivec		# save IV to temporary storage
849e1051a39Sopenharmony_ci	bswap	@S[2]
850e1051a39Sopenharmony_ci	mov	%rbx,8+$ivec
851e1051a39Sopenharmony_ci	bswap	@S[3]
852e1051a39Sopenharmony_ci
853e1051a39Sopenharmony_ci	call	_x86_64_Camellia_decrypt
854e1051a39Sopenharmony_ci
855e1051a39Sopenharmony_ci	mov	$_key,$key		# "rewind" the key
856e1051a39Sopenharmony_ci	mov	$_end,%rdx
857e1051a39Sopenharmony_ci	mov	$_res,%rcx
858e1051a39Sopenharmony_ci
859e1051a39Sopenharmony_ci	bswap	@S[0]
860e1051a39Sopenharmony_ci	mov	($inp),%rax		# load IV for next iteration
861e1051a39Sopenharmony_ci	bswap	@S[1]
862e1051a39Sopenharmony_ci	mov	8($inp),%rbx
863e1051a39Sopenharmony_ci	bswap	@S[2]
864e1051a39Sopenharmony_ci	xor	0+$ivec,@S[0]
865e1051a39Sopenharmony_ci	bswap	@S[3]
866e1051a39Sopenharmony_ci	xor	4+$ivec,@S[1]
867e1051a39Sopenharmony_ci	xor	8+$ivec,@S[2]
868e1051a39Sopenharmony_ci	lea	16($inp),$inp
869e1051a39Sopenharmony_ci	xor	12+$ivec,@S[3]
870e1051a39Sopenharmony_ci	cmp	%rdx,$inp
871e1051a39Sopenharmony_ci	je	.Lcbc_ddone
872e1051a39Sopenharmony_ci
873e1051a39Sopenharmony_ci	mov	@S[0],0($out)
874e1051a39Sopenharmony_ci	mov	@S[1],4($out)
875e1051a39Sopenharmony_ci	mov	@S[2],8($out)
876e1051a39Sopenharmony_ci	mov	@S[3],12($out)
877e1051a39Sopenharmony_ci
878e1051a39Sopenharmony_ci	lea	16($out),$out
879e1051a39Sopenharmony_ci	jmp	.Lcbc_dloop
880e1051a39Sopenharmony_ci
881e1051a39Sopenharmony_ci.align	16
882e1051a39Sopenharmony_ci.Lcbc_ddone:
883e1051a39Sopenharmony_ci	mov	$_ivp,%rdx
884e1051a39Sopenharmony_ci	cmp	\$0,%rcx
885e1051a39Sopenharmony_ci	jne	.Lcbc_dec_tail
886e1051a39Sopenharmony_ci
887e1051a39Sopenharmony_ci	mov	@S[0],0($out)
888e1051a39Sopenharmony_ci	mov	@S[1],4($out)
889e1051a39Sopenharmony_ci	mov	@S[2],8($out)
890e1051a39Sopenharmony_ci	mov	@S[3],12($out)
891e1051a39Sopenharmony_ci
892e1051a39Sopenharmony_ci	mov	%rax,(%rdx)		# write out IV residue
893e1051a39Sopenharmony_ci	mov	%rbx,8(%rdx)
894e1051a39Sopenharmony_ci	jmp	.Lcbc_done
895e1051a39Sopenharmony_ci.align	16
896e1051a39Sopenharmony_ci.Lcbc_dec_tail:
897e1051a39Sopenharmony_ci	mov	@S[0],0+$ivec
898e1051a39Sopenharmony_ci	mov	@S[1],4+$ivec
899e1051a39Sopenharmony_ci	mov	@S[2],8+$ivec
900e1051a39Sopenharmony_ci	mov	@S[3],12+$ivec
901e1051a39Sopenharmony_ci
902e1051a39Sopenharmony_ci.Lcbc_dec_pushf:
903e1051a39Sopenharmony_ci	pushfq
904e1051a39Sopenharmony_ci	cld
905e1051a39Sopenharmony_ci	lea	8+$ivec,%rsi
906e1051a39Sopenharmony_ci	lea	($out),%rdi
907e1051a39Sopenharmony_ci	.long	0x9066A4F3		# rep movsb
908e1051a39Sopenharmony_ci	popfq
909e1051a39Sopenharmony_ci.Lcbc_dec_popf:
910e1051a39Sopenharmony_ci
911e1051a39Sopenharmony_ci	mov	%rax,(%rdx)		# write out IV residue
912e1051a39Sopenharmony_ci	mov	%rbx,8(%rdx)
913e1051a39Sopenharmony_ci	jmp	.Lcbc_done
914e1051a39Sopenharmony_ci
915e1051a39Sopenharmony_ci.align	16
916e1051a39Sopenharmony_ci.Lcbc_done:
917e1051a39Sopenharmony_ci	mov	$_rsp,%rcx
918e1051a39Sopenharmony_ci.cfi_def_cfa	%rcx,56
919e1051a39Sopenharmony_ci	mov	0(%rcx),%r15
920e1051a39Sopenharmony_ci.cfi_restore	%r15
921e1051a39Sopenharmony_ci	mov	8(%rcx),%r14
922e1051a39Sopenharmony_ci.cfi_restore	%r14
923e1051a39Sopenharmony_ci	mov	16(%rcx),%r13
924e1051a39Sopenharmony_ci.cfi_restore	%r13
925e1051a39Sopenharmony_ci	mov	24(%rcx),%r12
926e1051a39Sopenharmony_ci.cfi_restore	%r12
927e1051a39Sopenharmony_ci	mov	32(%rcx),%rbp
928e1051a39Sopenharmony_ci.cfi_restore	%rbp
929e1051a39Sopenharmony_ci	mov	40(%rcx),%rbx
930e1051a39Sopenharmony_ci.cfi_restore	%rbx
931e1051a39Sopenharmony_ci	lea	48(%rcx),%rsp
932e1051a39Sopenharmony_ci.cfi_def_cfa	%rsp,8
933e1051a39Sopenharmony_ci.Lcbc_abort:
934e1051a39Sopenharmony_ci	ret
935e1051a39Sopenharmony_ci.cfi_endproc
936e1051a39Sopenharmony_ci.size	Camellia_cbc_encrypt,.-Camellia_cbc_encrypt
937e1051a39Sopenharmony_ci
938e1051a39Sopenharmony_ci.asciz	"Camellia for x86_64 by <appro\@openssl.org>"
939e1051a39Sopenharmony_ci___
940e1051a39Sopenharmony_ci}
941e1051a39Sopenharmony_ci
942e1051a39Sopenharmony_ci# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
943e1051a39Sopenharmony_ci#		CONTEXT *context,DISPATCHER_CONTEXT *disp)
944e1051a39Sopenharmony_ciif ($win64) {
945e1051a39Sopenharmony_ci$rec="%rcx";
946e1051a39Sopenharmony_ci$frame="%rdx";
947e1051a39Sopenharmony_ci$context="%r8";
948e1051a39Sopenharmony_ci$disp="%r9";
949e1051a39Sopenharmony_ci
950e1051a39Sopenharmony_ci$code.=<<___;
951e1051a39Sopenharmony_ci.extern	__imp_RtlVirtualUnwind
952e1051a39Sopenharmony_ci.type	common_se_handler,\@abi-omnipotent
953e1051a39Sopenharmony_ci.align	16
954e1051a39Sopenharmony_cicommon_se_handler:
955e1051a39Sopenharmony_ci	push	%rsi
956e1051a39Sopenharmony_ci	push	%rdi
957e1051a39Sopenharmony_ci	push	%rbx
958e1051a39Sopenharmony_ci	push	%rbp
959e1051a39Sopenharmony_ci	push	%r12
960e1051a39Sopenharmony_ci	push	%r13
961e1051a39Sopenharmony_ci	push	%r14
962e1051a39Sopenharmony_ci	push	%r15
963e1051a39Sopenharmony_ci	pushfq
964e1051a39Sopenharmony_ci	lea	-64(%rsp),%rsp
965e1051a39Sopenharmony_ci
966e1051a39Sopenharmony_ci	mov	120($context),%rax	# pull context->Rax
967e1051a39Sopenharmony_ci	mov	248($context),%rbx	# pull context->Rip
968e1051a39Sopenharmony_ci
969e1051a39Sopenharmony_ci	mov	8($disp),%rsi		# disp->ImageBase
970e1051a39Sopenharmony_ci	mov	56($disp),%r11		# disp->HandlerData
971e1051a39Sopenharmony_ci
972e1051a39Sopenharmony_ci	mov	0(%r11),%r10d		# HandlerData[0]
973e1051a39Sopenharmony_ci	lea	(%rsi,%r10),%r10	# prologue label
974e1051a39Sopenharmony_ci	cmp	%r10,%rbx		# context->Rip<prologue label
975e1051a39Sopenharmony_ci	jb	.Lin_prologue
976e1051a39Sopenharmony_ci
977e1051a39Sopenharmony_ci	mov	152($context),%rax	# pull context->Rsp
978e1051a39Sopenharmony_ci
979e1051a39Sopenharmony_ci	mov	4(%r11),%r10d		# HandlerData[1]
980e1051a39Sopenharmony_ci	lea	(%rsi,%r10),%r10	# epilogue label
981e1051a39Sopenharmony_ci	cmp	%r10,%rbx		# context->Rip>=epilogue label
982e1051a39Sopenharmony_ci	jae	.Lin_prologue
983e1051a39Sopenharmony_ci
984e1051a39Sopenharmony_ci	lea	40(%rax),%rax
985e1051a39Sopenharmony_ci	mov	-8(%rax),%rbx
986e1051a39Sopenharmony_ci	mov	-16(%rax),%rbp
987e1051a39Sopenharmony_ci	mov	-24(%rax),%r13
988e1051a39Sopenharmony_ci	mov	-32(%rax),%r14
989e1051a39Sopenharmony_ci	mov	-40(%rax),%r15
990e1051a39Sopenharmony_ci	mov	%rbx,144($context)	# restore context->Rbx
991e1051a39Sopenharmony_ci	mov	%rbp,160($context)	# restore context->Rbp
992e1051a39Sopenharmony_ci	mov	%r13,224($context)	# restore context->R13
993e1051a39Sopenharmony_ci	mov	%r14,232($context)	# restore context->R14
994e1051a39Sopenharmony_ci	mov	%r15,240($context)	# restore context->R15
995e1051a39Sopenharmony_ci
996e1051a39Sopenharmony_ci.Lin_prologue:
997e1051a39Sopenharmony_ci	mov	8(%rax),%rdi
998e1051a39Sopenharmony_ci	mov	16(%rax),%rsi
999e1051a39Sopenharmony_ci	mov	%rax,152($context)	# restore context->Rsp
1000e1051a39Sopenharmony_ci	mov	%rsi,168($context)	# restore context->Rsi
1001e1051a39Sopenharmony_ci	mov	%rdi,176($context)	# restore context->Rdi
1002e1051a39Sopenharmony_ci
1003e1051a39Sopenharmony_ci	jmp	.Lcommon_seh_exit
1004e1051a39Sopenharmony_ci.size	common_se_handler,.-common_se_handler
1005e1051a39Sopenharmony_ci
1006e1051a39Sopenharmony_ci.type	cbc_se_handler,\@abi-omnipotent
1007e1051a39Sopenharmony_ci.align	16
1008e1051a39Sopenharmony_cicbc_se_handler:
1009e1051a39Sopenharmony_ci	push	%rsi
1010e1051a39Sopenharmony_ci	push	%rdi
1011e1051a39Sopenharmony_ci	push	%rbx
1012e1051a39Sopenharmony_ci	push	%rbp
1013e1051a39Sopenharmony_ci	push	%r12
1014e1051a39Sopenharmony_ci	push	%r13
1015e1051a39Sopenharmony_ci	push	%r14
1016e1051a39Sopenharmony_ci	push	%r15
1017e1051a39Sopenharmony_ci	pushfq
1018e1051a39Sopenharmony_ci	lea	-64(%rsp),%rsp
1019e1051a39Sopenharmony_ci
1020e1051a39Sopenharmony_ci	mov	120($context),%rax	# pull context->Rax
1021e1051a39Sopenharmony_ci	mov	248($context),%rbx	# pull context->Rip
1022e1051a39Sopenharmony_ci
1023e1051a39Sopenharmony_ci	lea	.Lcbc_prologue(%rip),%r10
1024e1051a39Sopenharmony_ci	cmp	%r10,%rbx		# context->Rip<.Lcbc_prologue
1025e1051a39Sopenharmony_ci	jb	.Lin_cbc_prologue
1026e1051a39Sopenharmony_ci
1027e1051a39Sopenharmony_ci	lea	.Lcbc_body(%rip),%r10
1028e1051a39Sopenharmony_ci	cmp	%r10,%rbx		# context->Rip<.Lcbc_body
1029e1051a39Sopenharmony_ci	jb	.Lin_cbc_frame_setup
1030e1051a39Sopenharmony_ci
1031e1051a39Sopenharmony_ci	mov	152($context),%rax	# pull context->Rsp
1032e1051a39Sopenharmony_ci
1033e1051a39Sopenharmony_ci	lea	.Lcbc_abort(%rip),%r10
1034e1051a39Sopenharmony_ci	cmp	%r10,%rbx		# context->Rip>=.Lcbc_abort
1035e1051a39Sopenharmony_ci	jae	.Lin_cbc_prologue
1036e1051a39Sopenharmony_ci
1037e1051a39Sopenharmony_ci	# handle pushf/popf in Camellia_cbc_encrypt
1038e1051a39Sopenharmony_ci	lea	.Lcbc_enc_pushf(%rip),%r10
1039e1051a39Sopenharmony_ci	cmp	%r10,%rbx		# context->Rip<=.Lcbc_enc_pushf
1040e1051a39Sopenharmony_ci	jbe	.Lin_cbc_no_flag
1041e1051a39Sopenharmony_ci	lea	8(%rax),%rax
1042e1051a39Sopenharmony_ci	lea	.Lcbc_enc_popf(%rip),%r10
1043e1051a39Sopenharmony_ci	cmp	%r10,%rbx		# context->Rip<.Lcbc_enc_popf
1044e1051a39Sopenharmony_ci	jb	.Lin_cbc_no_flag
1045e1051a39Sopenharmony_ci	lea	-8(%rax),%rax
1046e1051a39Sopenharmony_ci	lea	.Lcbc_dec_pushf(%rip),%r10
1047e1051a39Sopenharmony_ci	cmp	%r10,%rbx		# context->Rip<=.Lcbc_dec_pushf
1048e1051a39Sopenharmony_ci	jbe	.Lin_cbc_no_flag
1049e1051a39Sopenharmony_ci	lea	8(%rax),%rax
1050e1051a39Sopenharmony_ci	lea	.Lcbc_dec_popf(%rip),%r10
1051e1051a39Sopenharmony_ci	cmp	%r10,%rbx		# context->Rip<.Lcbc_dec_popf
1052e1051a39Sopenharmony_ci	jb	.Lin_cbc_no_flag
1053e1051a39Sopenharmony_ci	lea	-8(%rax),%rax
1054e1051a39Sopenharmony_ci
1055e1051a39Sopenharmony_ci.Lin_cbc_no_flag:
1056e1051a39Sopenharmony_ci	mov	48(%rax),%rax		# $_rsp
1057e1051a39Sopenharmony_ci	lea	48(%rax),%rax
1058e1051a39Sopenharmony_ci
1059e1051a39Sopenharmony_ci.Lin_cbc_frame_setup:
1060e1051a39Sopenharmony_ci	mov	-8(%rax),%rbx
1061e1051a39Sopenharmony_ci	mov	-16(%rax),%rbp
1062e1051a39Sopenharmony_ci	mov	-24(%rax),%r12
1063e1051a39Sopenharmony_ci	mov	-32(%rax),%r13
1064e1051a39Sopenharmony_ci	mov	-40(%rax),%r14
1065e1051a39Sopenharmony_ci	mov	-48(%rax),%r15
1066e1051a39Sopenharmony_ci	mov	%rbx,144($context)	# restore context->Rbx
1067e1051a39Sopenharmony_ci	mov	%rbp,160($context)	# restore context->Rbp
1068e1051a39Sopenharmony_ci	mov	%r12,216($context)	# restore context->R12
1069e1051a39Sopenharmony_ci	mov	%r13,224($context)	# restore context->R13
1070e1051a39Sopenharmony_ci	mov	%r14,232($context)	# restore context->R14
1071e1051a39Sopenharmony_ci	mov	%r15,240($context)	# restore context->R15
1072e1051a39Sopenharmony_ci
1073e1051a39Sopenharmony_ci.Lin_cbc_prologue:
1074e1051a39Sopenharmony_ci	mov	8(%rax),%rdi
1075e1051a39Sopenharmony_ci	mov	16(%rax),%rsi
1076e1051a39Sopenharmony_ci	mov	%rax,152($context)	# restore context->Rsp
1077e1051a39Sopenharmony_ci	mov	%rsi,168($context)	# restore context->Rsi
1078e1051a39Sopenharmony_ci	mov	%rdi,176($context)	# restore context->Rdi
1079e1051a39Sopenharmony_ci
1080e1051a39Sopenharmony_ci.align	4
1081e1051a39Sopenharmony_ci.Lcommon_seh_exit:
1082e1051a39Sopenharmony_ci
1083e1051a39Sopenharmony_ci	mov	40($disp),%rdi		# disp->ContextRecord
1084e1051a39Sopenharmony_ci	mov	$context,%rsi		# context
1085e1051a39Sopenharmony_ci	mov	\$`1232/8`,%ecx		# sizeof(CONTEXT)
1086e1051a39Sopenharmony_ci	.long	0xa548f3fc		# cld; rep movsq
1087e1051a39Sopenharmony_ci
1088e1051a39Sopenharmony_ci	mov	$disp,%rsi
1089e1051a39Sopenharmony_ci	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
1090e1051a39Sopenharmony_ci	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
1091e1051a39Sopenharmony_ci	mov	0(%rsi),%r8		# arg3, disp->ControlPc
1092e1051a39Sopenharmony_ci	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry
1093e1051a39Sopenharmony_ci	mov	40(%rsi),%r10		# disp->ContextRecord
1094e1051a39Sopenharmony_ci	lea	56(%rsi),%r11		# &disp->HandlerData
1095e1051a39Sopenharmony_ci	lea	24(%rsi),%r12		# &disp->EstablisherFrame
1096e1051a39Sopenharmony_ci	mov	%r10,32(%rsp)		# arg5
1097e1051a39Sopenharmony_ci	mov	%r11,40(%rsp)		# arg6
1098e1051a39Sopenharmony_ci	mov	%r12,48(%rsp)		# arg7
1099e1051a39Sopenharmony_ci	mov	%rcx,56(%rsp)		# arg8, (NULL)
1100e1051a39Sopenharmony_ci	call	*__imp_RtlVirtualUnwind(%rip)
1101e1051a39Sopenharmony_ci
1102e1051a39Sopenharmony_ci	mov	\$1,%eax		# ExceptionContinueSearch
1103e1051a39Sopenharmony_ci	lea	64(%rsp),%rsp
1104e1051a39Sopenharmony_ci	popfq
1105e1051a39Sopenharmony_ci	pop	%r15
1106e1051a39Sopenharmony_ci	pop	%r14
1107e1051a39Sopenharmony_ci	pop	%r13
1108e1051a39Sopenharmony_ci	pop	%r12
1109e1051a39Sopenharmony_ci	pop	%rbp
1110e1051a39Sopenharmony_ci	pop	%rbx
1111e1051a39Sopenharmony_ci	pop	%rdi
1112e1051a39Sopenharmony_ci	pop	%rsi
1113e1051a39Sopenharmony_ci	ret
1114e1051a39Sopenharmony_ci.size	cbc_se_handler,.-cbc_se_handler
1115e1051a39Sopenharmony_ci
1116e1051a39Sopenharmony_ci.section	.pdata
1117e1051a39Sopenharmony_ci.align	4
1118e1051a39Sopenharmony_ci	.rva	.LSEH_begin_Camellia_EncryptBlock_Rounds
1119e1051a39Sopenharmony_ci	.rva	.LSEH_end_Camellia_EncryptBlock_Rounds
1120e1051a39Sopenharmony_ci	.rva	.LSEH_info_Camellia_EncryptBlock_Rounds
1121e1051a39Sopenharmony_ci
1122e1051a39Sopenharmony_ci	.rva	.LSEH_begin_Camellia_DecryptBlock_Rounds
1123e1051a39Sopenharmony_ci	.rva	.LSEH_end_Camellia_DecryptBlock_Rounds
1124e1051a39Sopenharmony_ci	.rva	.LSEH_info_Camellia_DecryptBlock_Rounds
1125e1051a39Sopenharmony_ci
1126e1051a39Sopenharmony_ci	.rva	.LSEH_begin_Camellia_Ekeygen
1127e1051a39Sopenharmony_ci	.rva	.LSEH_end_Camellia_Ekeygen
1128e1051a39Sopenharmony_ci	.rva	.LSEH_info_Camellia_Ekeygen
1129e1051a39Sopenharmony_ci
1130e1051a39Sopenharmony_ci	.rva	.LSEH_begin_Camellia_cbc_encrypt
1131e1051a39Sopenharmony_ci	.rva	.LSEH_end_Camellia_cbc_encrypt
1132e1051a39Sopenharmony_ci	.rva	.LSEH_info_Camellia_cbc_encrypt
1133e1051a39Sopenharmony_ci
1134e1051a39Sopenharmony_ci.section	.xdata
1135e1051a39Sopenharmony_ci.align	8
1136e1051a39Sopenharmony_ci.LSEH_info_Camellia_EncryptBlock_Rounds:
1137e1051a39Sopenharmony_ci	.byte	9,0,0,0
1138e1051a39Sopenharmony_ci	.rva	common_se_handler
1139e1051a39Sopenharmony_ci	.rva	.Lenc_prologue,.Lenc_epilogue	# HandlerData[]
1140e1051a39Sopenharmony_ci.LSEH_info_Camellia_DecryptBlock_Rounds:
1141e1051a39Sopenharmony_ci	.byte	9,0,0,0
1142e1051a39Sopenharmony_ci	.rva	common_se_handler
1143e1051a39Sopenharmony_ci	.rva	.Ldec_prologue,.Ldec_epilogue	# HandlerData[]
1144e1051a39Sopenharmony_ci.LSEH_info_Camellia_Ekeygen:
1145e1051a39Sopenharmony_ci	.byte	9,0,0,0
1146e1051a39Sopenharmony_ci	.rva	common_se_handler
1147e1051a39Sopenharmony_ci	.rva	.Lkey_prologue,.Lkey_epilogue	# HandlerData[]
1148e1051a39Sopenharmony_ci.LSEH_info_Camellia_cbc_encrypt:
1149e1051a39Sopenharmony_ci	.byte	9,0,0,0
1150e1051a39Sopenharmony_ci	.rva	cbc_se_handler
1151e1051a39Sopenharmony_ci___
1152e1051a39Sopenharmony_ci}
1153e1051a39Sopenharmony_ci
1154e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval $1/gem;
1155e1051a39Sopenharmony_ciprint $code;
1156e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
1157