1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2014-2023 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci#
10e1051a39Sopenharmony_ci# ====================================================================
11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/.
15e1051a39Sopenharmony_ci# ====================================================================
16e1051a39Sopenharmony_ci#
17e1051a39Sopenharmony_ci# This module implements support for ARMv8 AES instructions. The
18e1051a39Sopenharmony_ci# module is endian-agnostic in sense that it supports both big- and
19e1051a39Sopenharmony_ci# little-endian cases. As does it support both 32- and 64-bit modes
20e1051a39Sopenharmony_ci# of operation. Latter is achieved by limiting amount of utilized
21e1051a39Sopenharmony_ci# registers to 16, which implies additional NEON load and integer
22e1051a39Sopenharmony_ci# instructions. This has no effect on mighty Apple A7, where results
23e1051a39Sopenharmony_ci# are literally equal to the theoretical estimates based on AES
24e1051a39Sopenharmony_ci# instruction latencies and issue rates. On Cortex-A53, an in-order
25e1051a39Sopenharmony_ci# execution core, this costs up to 10-15%, which is partially
26e1051a39Sopenharmony_ci# compensated by implementing dedicated code path for 128-bit
27e1051a39Sopenharmony_ci# CBC encrypt case. On Cortex-A57 parallelizable mode performance
28e1051a39Sopenharmony_ci# seems to be limited by sheer amount of NEON instructions...
29e1051a39Sopenharmony_ci#
30e1051a39Sopenharmony_ci# April 2019
31e1051a39Sopenharmony_ci#
32e1051a39Sopenharmony_ci# Key to performance of parallelize-able modes is round instruction
33e1051a39Sopenharmony_ci# interleaving. But which factor to use? There is optimal one for
34e1051a39Sopenharmony_ci# each combination of instruction latency and issue rate, beyond
35e1051a39Sopenharmony_ci# which increasing interleave factor doesn't pay off. While on cons
36e1051a39Sopenharmony_ci# side we have code size increase and resource waste on platforms for
37e1051a39Sopenharmony_ci# which interleave factor is too high. In other words you want it to
38e1051a39Sopenharmony_ci# be just right. So far interleave factor of 3x was serving well all
39e1051a39Sopenharmony_ci# platforms. But for ThunderX2 optimal interleave factor was measured
40e1051a39Sopenharmony_ci# to be 5x...
41e1051a39Sopenharmony_ci#
42e1051a39Sopenharmony_ci# Performance in cycles per byte processed with 128-bit key:
43e1051a39Sopenharmony_ci#
44e1051a39Sopenharmony_ci#		CBC enc		CBC dec		CTR
45e1051a39Sopenharmony_ci# Apple A7	2.39		1.20		1.20
46e1051a39Sopenharmony_ci# Cortex-A53	1.32		1.17/1.29(**)	1.36/1.46
47e1051a39Sopenharmony_ci# Cortex-A57(*)	1.95		0.82/0.85	0.89/0.93
48e1051a39Sopenharmony_ci# Cortex-A72	1.33		0.85/0.88	0.92/0.96
49e1051a39Sopenharmony_ci# Denver	1.96		0.65/0.86	0.76/0.80
50e1051a39Sopenharmony_ci# Mongoose	1.33		1.23/1.20	1.30/1.20
51e1051a39Sopenharmony_ci# Kryo		1.26		0.87/0.94	1.00/1.00
52e1051a39Sopenharmony_ci# ThunderX2	5.95		1.25		1.30
53e1051a39Sopenharmony_ci#
54e1051a39Sopenharmony_ci# (*)	original 3.64/1.34/1.32 results were for r0p0 revision
55e1051a39Sopenharmony_ci#	and are still same even for updated module;
56e1051a39Sopenharmony_ci# (**)	numbers after slash are for 32-bit code, which is 3x-
57e1051a39Sopenharmony_ci#	interleaved;
58e1051a39Sopenharmony_ci
59e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension)
60e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file
61e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
62e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
63e1051a39Sopenharmony_ci
64e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
65e1051a39Sopenharmony_ci( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
66e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
67e1051a39Sopenharmony_cidie "can't locate arm-xlate.pl";
68e1051a39Sopenharmony_ci
69e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" $xlate $flavour \"$output\""
70e1051a39Sopenharmony_ci    or die "can't call $xlate: $!";
71e1051a39Sopenharmony_ci*STDOUT=*OUT;
72e1051a39Sopenharmony_ci
73e1051a39Sopenharmony_ci$prefix="aes_v8";
74e1051a39Sopenharmony_ci
75e1051a39Sopenharmony_ci$_byte = ($flavour =~ /win/ ? "DCB" : ".byte");
76e1051a39Sopenharmony_ci
77e1051a39Sopenharmony_ci$code=<<___;
78e1051a39Sopenharmony_ci#include "arm_arch.h"
79e1051a39Sopenharmony_ci
80e1051a39Sopenharmony_ci#if __ARM_MAX_ARCH__>=7
81e1051a39Sopenharmony_ci___
82e1051a39Sopenharmony_ci$code.=".arch	armv8-a+crypto\n.text\n"		if ($flavour =~ /64/);
83e1051a39Sopenharmony_ci$code.=<<___						if ($flavour !~ /64/);
84e1051a39Sopenharmony_ci.arch	armv7-a	// don't confuse not-so-latest binutils with argv8 :-)
85e1051a39Sopenharmony_ci.fpu	neon
86e1051a39Sopenharmony_ci#ifdef	__thumb2__
87e1051a39Sopenharmony_ci.syntax	unified
88e1051a39Sopenharmony_ci.thumb
89e1051a39Sopenharmony_ci# define INST(a,b,c,d)	$_byte	c,d|0xc,a,b
90e1051a39Sopenharmony_ci#else
91e1051a39Sopenharmony_ci.code	32
92e1051a39Sopenharmony_ci# define INST(a,b,c,d)	$_byte	a,b,c,d
93e1051a39Sopenharmony_ci#endif
94e1051a39Sopenharmony_ci
95e1051a39Sopenharmony_ci.text
96e1051a39Sopenharmony_ci___
97e1051a39Sopenharmony_ci
98e1051a39Sopenharmony_ci# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax,
99e1051a39Sopenharmony_ci# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to
100e1051a39Sopenharmony_ci# maintain both 32- and 64-bit codes within single module and
101e1051a39Sopenharmony_ci# transliterate common code to either flavour with regex vodoo.
102e1051a39Sopenharmony_ci#
103e1051a39Sopenharmony_ci{{{
104e1051a39Sopenharmony_cimy ($inp,$bits,$out,$ptr,$rounds)=("x0","w1","x2","x3","w12");
105e1051a39Sopenharmony_cimy ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)=
106e1051a39Sopenharmony_ci	$flavour=~/64/? map("q$_",(0..6)) : map("q$_",(0..3,8..10));
107e1051a39Sopenharmony_ci
108e1051a39Sopenharmony_ci
109e1051a39Sopenharmony_ci#
110e1051a39Sopenharmony_ci# This file generates .s file for 64-bit and 32-bit CPUs.
111e1051a39Sopenharmony_ci# We don't implement .rodata on 32-bit CPUs yet.
112e1051a39Sopenharmony_ci#
113e1051a39Sopenharmony_ci$code.=".rodata\n"	if ($flavour =~ /64/);
114e1051a39Sopenharmony_ci$code.=<<___;
115e1051a39Sopenharmony_ci.align	5
116e1051a39Sopenharmony_ci.Lrcon:
117e1051a39Sopenharmony_ci.long	0x01,0x01,0x01,0x01
118e1051a39Sopenharmony_ci.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
119e1051a39Sopenharmony_ci.long	0x1b,0x1b,0x1b,0x1b
120e1051a39Sopenharmony_ci___
121e1051a39Sopenharmony_ci$code.=".previous\n"	if ($flavour =~ /64/);
122e1051a39Sopenharmony_ci
123e1051a39Sopenharmony_ci$code.=<<___;
124e1051a39Sopenharmony_ci.globl	${prefix}_set_encrypt_key
125e1051a39Sopenharmony_ci.type	${prefix}_set_encrypt_key,%function
126e1051a39Sopenharmony_ci.align	5
127e1051a39Sopenharmony_ci${prefix}_set_encrypt_key:
128e1051a39Sopenharmony_ci.Lenc_key:
129e1051a39Sopenharmony_ci___
130e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
131e1051a39Sopenharmony_ci	stp	x29,x30,[sp,#-16]!
132e1051a39Sopenharmony_ci	add	x29,sp,#0
133e1051a39Sopenharmony_ci___
134e1051a39Sopenharmony_ci$code.=<<___;
135e1051a39Sopenharmony_ci	mov	$ptr,#-1
136e1051a39Sopenharmony_ci	cmp	$inp,#0
137e1051a39Sopenharmony_ci	b.eq	.Lenc_key_abort
138e1051a39Sopenharmony_ci	cmp	$out,#0
139e1051a39Sopenharmony_ci	b.eq	.Lenc_key_abort
140e1051a39Sopenharmony_ci	mov	$ptr,#-2
141e1051a39Sopenharmony_ci	cmp	$bits,#128
142e1051a39Sopenharmony_ci	b.lt	.Lenc_key_abort
143e1051a39Sopenharmony_ci	cmp	$bits,#256
144e1051a39Sopenharmony_ci	b.gt	.Lenc_key_abort
145e1051a39Sopenharmony_ci	tst	$bits,#0x3f
146e1051a39Sopenharmony_ci	b.ne	.Lenc_key_abort
147e1051a39Sopenharmony_ci
148e1051a39Sopenharmony_ci___
149e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
150e1051a39Sopenharmony_ci	adrp	$ptr,.Lrcon
151e1051a39Sopenharmony_ci	add	$ptr,$ptr,:lo12:.Lrcon
152e1051a39Sopenharmony_ci___
153e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /32/);
154e1051a39Sopenharmony_ci	adr	$ptr,.Lrcon
155e1051a39Sopenharmony_ci___
156e1051a39Sopenharmony_ci$code.=<<___;
157e1051a39Sopenharmony_ci	cmp	$bits,#192
158e1051a39Sopenharmony_ci
159e1051a39Sopenharmony_ci	veor	$zero,$zero,$zero
160e1051a39Sopenharmony_ci	vld1.8	{$in0},[$inp],#16
161e1051a39Sopenharmony_ci	mov	$bits,#8		// reuse $bits
162e1051a39Sopenharmony_ci	vld1.32	{$rcon,$mask},[$ptr],#32
163e1051a39Sopenharmony_ci
164e1051a39Sopenharmony_ci	b.lt	.Loop128
165e1051a39Sopenharmony_ci	b.eq	.L192
166e1051a39Sopenharmony_ci	b	.L256
167e1051a39Sopenharmony_ci
168e1051a39Sopenharmony_ci.align	4
169e1051a39Sopenharmony_ci.Loop128:
170e1051a39Sopenharmony_ci	vtbl.8	$key,{$in0},$mask
171e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$in0,#12
172e1051a39Sopenharmony_ci	vst1.32	{$in0},[$out],#16
173e1051a39Sopenharmony_ci	aese	$key,$zero
174e1051a39Sopenharmony_ci	subs	$bits,$bits,#1
175e1051a39Sopenharmony_ci
176e1051a39Sopenharmony_ci	veor	$in0,$in0,$tmp
177e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$tmp,#12
178e1051a39Sopenharmony_ci	veor	$in0,$in0,$tmp
179e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$tmp,#12
180e1051a39Sopenharmony_ci	 veor	$key,$key,$rcon
181e1051a39Sopenharmony_ci	veor	$in0,$in0,$tmp
182e1051a39Sopenharmony_ci	vshl.u8	$rcon,$rcon,#1
183e1051a39Sopenharmony_ci	veor	$in0,$in0,$key
184e1051a39Sopenharmony_ci	b.ne	.Loop128
185e1051a39Sopenharmony_ci
186e1051a39Sopenharmony_ci	vld1.32	{$rcon},[$ptr]
187e1051a39Sopenharmony_ci
188e1051a39Sopenharmony_ci	vtbl.8	$key,{$in0},$mask
189e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$in0,#12
190e1051a39Sopenharmony_ci	vst1.32	{$in0},[$out],#16
191e1051a39Sopenharmony_ci	aese	$key,$zero
192e1051a39Sopenharmony_ci
193e1051a39Sopenharmony_ci	veor	$in0,$in0,$tmp
194e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$tmp,#12
195e1051a39Sopenharmony_ci	veor	$in0,$in0,$tmp
196e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$tmp,#12
197e1051a39Sopenharmony_ci	 veor	$key,$key,$rcon
198e1051a39Sopenharmony_ci	veor	$in0,$in0,$tmp
199e1051a39Sopenharmony_ci	vshl.u8	$rcon,$rcon,#1
200e1051a39Sopenharmony_ci	veor	$in0,$in0,$key
201e1051a39Sopenharmony_ci
202e1051a39Sopenharmony_ci	vtbl.8	$key,{$in0},$mask
203e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$in0,#12
204e1051a39Sopenharmony_ci	vst1.32	{$in0},[$out],#16
205e1051a39Sopenharmony_ci	aese	$key,$zero
206e1051a39Sopenharmony_ci
207e1051a39Sopenharmony_ci	veor	$in0,$in0,$tmp
208e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$tmp,#12
209e1051a39Sopenharmony_ci	veor	$in0,$in0,$tmp
210e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$tmp,#12
211e1051a39Sopenharmony_ci	 veor	$key,$key,$rcon
212e1051a39Sopenharmony_ci	veor	$in0,$in0,$tmp
213e1051a39Sopenharmony_ci	veor	$in0,$in0,$key
214e1051a39Sopenharmony_ci	vst1.32	{$in0},[$out]
215e1051a39Sopenharmony_ci	add	$out,$out,#0x50
216e1051a39Sopenharmony_ci
217e1051a39Sopenharmony_ci	mov	$rounds,#10
218e1051a39Sopenharmony_ci	b	.Ldone
219e1051a39Sopenharmony_ci
220e1051a39Sopenharmony_ci.align	4
221e1051a39Sopenharmony_ci.L192:
222e1051a39Sopenharmony_ci	vld1.8	{$in1},[$inp],#8
223e1051a39Sopenharmony_ci	vmov.i8	$key,#8			// borrow $key
224e1051a39Sopenharmony_ci	vst1.32	{$in0},[$out],#16
225e1051a39Sopenharmony_ci	vsub.i8	$mask,$mask,$key	// adjust the mask
226e1051a39Sopenharmony_ci
227e1051a39Sopenharmony_ci.Loop192:
228e1051a39Sopenharmony_ci	vtbl.8	$key,{$in1},$mask
229e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$in0,#12
230e1051a39Sopenharmony_ci#ifdef __ARMEB__
231e1051a39Sopenharmony_ci	vst1.32	{$in1},[$out],#16
232e1051a39Sopenharmony_ci	sub	$out,$out,#8
233e1051a39Sopenharmony_ci#else
234e1051a39Sopenharmony_ci	vst1.32	{$in1},[$out],#8
235e1051a39Sopenharmony_ci#endif
236e1051a39Sopenharmony_ci	aese	$key,$zero
237e1051a39Sopenharmony_ci	subs	$bits,$bits,#1
238e1051a39Sopenharmony_ci
239e1051a39Sopenharmony_ci	veor	$in0,$in0,$tmp
240e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$tmp,#12
241e1051a39Sopenharmony_ci	veor	$in0,$in0,$tmp
242e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$tmp,#12
243e1051a39Sopenharmony_ci	veor	$in0,$in0,$tmp
244e1051a39Sopenharmony_ci
245e1051a39Sopenharmony_ci	vdup.32	$tmp,${in0}[3]
246e1051a39Sopenharmony_ci	veor	$tmp,$tmp,$in1
247e1051a39Sopenharmony_ci	 veor	$key,$key,$rcon
248e1051a39Sopenharmony_ci	vext.8	$in1,$zero,$in1,#12
249e1051a39Sopenharmony_ci	vshl.u8	$rcon,$rcon,#1
250e1051a39Sopenharmony_ci	veor	$in1,$in1,$tmp
251e1051a39Sopenharmony_ci	veor	$in0,$in0,$key
252e1051a39Sopenharmony_ci	veor	$in1,$in1,$key
253e1051a39Sopenharmony_ci	vst1.32	{$in0},[$out],#16
254e1051a39Sopenharmony_ci	b.ne	.Loop192
255e1051a39Sopenharmony_ci
256e1051a39Sopenharmony_ci	mov	$rounds,#12
257e1051a39Sopenharmony_ci	add	$out,$out,#0x20
258e1051a39Sopenharmony_ci	b	.Ldone
259e1051a39Sopenharmony_ci
260e1051a39Sopenharmony_ci.align	4
261e1051a39Sopenharmony_ci.L256:
262e1051a39Sopenharmony_ci	vld1.8	{$in1},[$inp]
263e1051a39Sopenharmony_ci	mov	$bits,#7
264e1051a39Sopenharmony_ci	mov	$rounds,#14
265e1051a39Sopenharmony_ci	vst1.32	{$in0},[$out],#16
266e1051a39Sopenharmony_ci
267e1051a39Sopenharmony_ci.Loop256:
268e1051a39Sopenharmony_ci	vtbl.8	$key,{$in1},$mask
269e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$in0,#12
270e1051a39Sopenharmony_ci	vst1.32	{$in1},[$out],#16
271e1051a39Sopenharmony_ci	aese	$key,$zero
272e1051a39Sopenharmony_ci	subs	$bits,$bits,#1
273e1051a39Sopenharmony_ci
274e1051a39Sopenharmony_ci	veor	$in0,$in0,$tmp
275e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$tmp,#12
276e1051a39Sopenharmony_ci	veor	$in0,$in0,$tmp
277e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$tmp,#12
278e1051a39Sopenharmony_ci	 veor	$key,$key,$rcon
279e1051a39Sopenharmony_ci	veor	$in0,$in0,$tmp
280e1051a39Sopenharmony_ci	vshl.u8	$rcon,$rcon,#1
281e1051a39Sopenharmony_ci	veor	$in0,$in0,$key
282e1051a39Sopenharmony_ci	vst1.32	{$in0},[$out],#16
283e1051a39Sopenharmony_ci	b.eq	.Ldone
284e1051a39Sopenharmony_ci
285e1051a39Sopenharmony_ci	vdup.32	$key,${in0}[3]		// just splat
286e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$in1,#12
287e1051a39Sopenharmony_ci	aese	$key,$zero
288e1051a39Sopenharmony_ci
289e1051a39Sopenharmony_ci	veor	$in1,$in1,$tmp
290e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$tmp,#12
291e1051a39Sopenharmony_ci	veor	$in1,$in1,$tmp
292e1051a39Sopenharmony_ci	vext.8	$tmp,$zero,$tmp,#12
293e1051a39Sopenharmony_ci	veor	$in1,$in1,$tmp
294e1051a39Sopenharmony_ci
295e1051a39Sopenharmony_ci	veor	$in1,$in1,$key
296e1051a39Sopenharmony_ci	b	.Loop256
297e1051a39Sopenharmony_ci
298e1051a39Sopenharmony_ci.Ldone:
299e1051a39Sopenharmony_ci	str	$rounds,[$out]
300e1051a39Sopenharmony_ci	mov	$ptr,#0
301e1051a39Sopenharmony_ci
302e1051a39Sopenharmony_ci.Lenc_key_abort:
303e1051a39Sopenharmony_ci	mov	x0,$ptr			// return value
304e1051a39Sopenharmony_ci	`"ldr	x29,[sp],#16"		if ($flavour =~ /64/)`
305e1051a39Sopenharmony_ci	ret
306e1051a39Sopenharmony_ci.size	${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key
307e1051a39Sopenharmony_ci
308e1051a39Sopenharmony_ci.globl	${prefix}_set_decrypt_key
309e1051a39Sopenharmony_ci.type	${prefix}_set_decrypt_key,%function
310e1051a39Sopenharmony_ci.align	5
311e1051a39Sopenharmony_ci${prefix}_set_decrypt_key:
312e1051a39Sopenharmony_ci___
313e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
314e1051a39Sopenharmony_ci	.inst	0xd503233f		// paciasp
315e1051a39Sopenharmony_ci	stp	x29,x30,[sp,#-16]!
316e1051a39Sopenharmony_ci	add	x29,sp,#0
317e1051a39Sopenharmony_ci___
318e1051a39Sopenharmony_ci$code.=<<___	if ($flavour !~ /64/);
319e1051a39Sopenharmony_ci	stmdb	sp!,{r4,lr}
320e1051a39Sopenharmony_ci___
321e1051a39Sopenharmony_ci$code.=<<___;
322e1051a39Sopenharmony_ci	bl	.Lenc_key
323e1051a39Sopenharmony_ci
324e1051a39Sopenharmony_ci	cmp	x0,#0
325e1051a39Sopenharmony_ci	b.ne	.Ldec_key_abort
326e1051a39Sopenharmony_ci
327e1051a39Sopenharmony_ci	sub	$out,$out,#240		// restore original $out
328e1051a39Sopenharmony_ci	mov	x4,#-16
329e1051a39Sopenharmony_ci	add	$inp,$out,x12,lsl#4	// end of key schedule
330e1051a39Sopenharmony_ci
331e1051a39Sopenharmony_ci	vld1.32	{v0.16b},[$out]
332e1051a39Sopenharmony_ci	vld1.32	{v1.16b},[$inp]
333e1051a39Sopenharmony_ci	vst1.32	{v0.16b},[$inp],x4
334e1051a39Sopenharmony_ci	vst1.32	{v1.16b},[$out],#16
335e1051a39Sopenharmony_ci
336e1051a39Sopenharmony_ci.Loop_imc:
337e1051a39Sopenharmony_ci	vld1.32	{v0.16b},[$out]
338e1051a39Sopenharmony_ci	vld1.32	{v1.16b},[$inp]
339e1051a39Sopenharmony_ci	aesimc	v0.16b,v0.16b
340e1051a39Sopenharmony_ci	aesimc	v1.16b,v1.16b
341e1051a39Sopenharmony_ci	vst1.32	{v0.16b},[$inp],x4
342e1051a39Sopenharmony_ci	vst1.32	{v1.16b},[$out],#16
343e1051a39Sopenharmony_ci	cmp	$inp,$out
344e1051a39Sopenharmony_ci	b.hi	.Loop_imc
345e1051a39Sopenharmony_ci
346e1051a39Sopenharmony_ci	vld1.32	{v0.16b},[$out]
347e1051a39Sopenharmony_ci	aesimc	v0.16b,v0.16b
348e1051a39Sopenharmony_ci	vst1.32	{v0.16b},[$inp]
349e1051a39Sopenharmony_ci
350e1051a39Sopenharmony_ci	eor	x0,x0,x0		// return value
351e1051a39Sopenharmony_ci.Ldec_key_abort:
352e1051a39Sopenharmony_ci___
353e1051a39Sopenharmony_ci$code.=<<___	if ($flavour !~ /64/);
354e1051a39Sopenharmony_ci	ldmia	sp!,{r4,pc}
355e1051a39Sopenharmony_ci___
356e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
357e1051a39Sopenharmony_ci	ldp	x29,x30,[sp],#16
358e1051a39Sopenharmony_ci	.inst	0xd50323bf		// autiasp
359e1051a39Sopenharmony_ci	ret
360e1051a39Sopenharmony_ci___
361e1051a39Sopenharmony_ci$code.=<<___;
362e1051a39Sopenharmony_ci.size	${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key
363e1051a39Sopenharmony_ci___
364e1051a39Sopenharmony_ci}}}
365e1051a39Sopenharmony_ci{{{
366e1051a39Sopenharmony_cisub gen_block () {
367e1051a39Sopenharmony_cimy $dir = shift;
368e1051a39Sopenharmony_cimy ($e,$mc) = $dir eq "en" ? ("e","mc") : ("d","imc");
369e1051a39Sopenharmony_cimy ($inp,$out,$key)=map("x$_",(0..2));
370e1051a39Sopenharmony_cimy $rounds="w3";
371e1051a39Sopenharmony_cimy ($rndkey0,$rndkey1,$inout)=map("q$_",(0..3));
372e1051a39Sopenharmony_ci
373e1051a39Sopenharmony_ci$code.=<<___;
374e1051a39Sopenharmony_ci.globl	${prefix}_${dir}crypt
375e1051a39Sopenharmony_ci.type	${prefix}_${dir}crypt,%function
376e1051a39Sopenharmony_ci.align	5
377e1051a39Sopenharmony_ci${prefix}_${dir}crypt:
378e1051a39Sopenharmony_ci	ldr	$rounds,[$key,#240]
379e1051a39Sopenharmony_ci	vld1.32	{$rndkey0},[$key],#16
380e1051a39Sopenharmony_ci	vld1.8	{$inout},[$inp]
381e1051a39Sopenharmony_ci	sub	$rounds,$rounds,#2
382e1051a39Sopenharmony_ci	vld1.32	{$rndkey1},[$key],#16
383e1051a39Sopenharmony_ci
384e1051a39Sopenharmony_ci.Loop_${dir}c:
385e1051a39Sopenharmony_ci	aes$e	$inout,$rndkey0
386e1051a39Sopenharmony_ci	aes$mc	$inout,$inout
387e1051a39Sopenharmony_ci	vld1.32	{$rndkey0},[$key],#16
388e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2
389e1051a39Sopenharmony_ci	aes$e	$inout,$rndkey1
390e1051a39Sopenharmony_ci	aes$mc	$inout,$inout
391e1051a39Sopenharmony_ci	vld1.32	{$rndkey1},[$key],#16
392e1051a39Sopenharmony_ci	b.gt	.Loop_${dir}c
393e1051a39Sopenharmony_ci
394e1051a39Sopenharmony_ci	aes$e	$inout,$rndkey0
395e1051a39Sopenharmony_ci	aes$mc	$inout,$inout
396e1051a39Sopenharmony_ci	vld1.32	{$rndkey0},[$key]
397e1051a39Sopenharmony_ci	aes$e	$inout,$rndkey1
398e1051a39Sopenharmony_ci	veor	$inout,$inout,$rndkey0
399e1051a39Sopenharmony_ci
400e1051a39Sopenharmony_ci	vst1.8	{$inout},[$out]
401e1051a39Sopenharmony_ci	ret
402e1051a39Sopenharmony_ci.size	${prefix}_${dir}crypt,.-${prefix}_${dir}crypt
403e1051a39Sopenharmony_ci___
404e1051a39Sopenharmony_ci}
405e1051a39Sopenharmony_ci&gen_block("en");
406e1051a39Sopenharmony_ci&gen_block("de");
407e1051a39Sopenharmony_ci}}}
408e1051a39Sopenharmony_ci
409e1051a39Sopenharmony_ci# Performance in cycles per byte.
410e1051a39Sopenharmony_ci# Processed with AES-ECB different key size.
411e1051a39Sopenharmony_ci# It shows the value before and after optimization as below:
412e1051a39Sopenharmony_ci# (before/after):
413e1051a39Sopenharmony_ci#
414e1051a39Sopenharmony_ci#		AES-128-ECB		AES-192-ECB		AES-256-ECB
415e1051a39Sopenharmony_ci# Cortex-A57	1.85/0.82		2.16/0.96		2.47/1.10
416e1051a39Sopenharmony_ci# Cortex-A72	1.64/0.85		1.82/0.99		2.13/1.14
417e1051a39Sopenharmony_ci
418e1051a39Sopenharmony_ci# Optimization is implemented by loop unrolling and interleaving.
419e1051a39Sopenharmony_ci# Commonly, we choose the unrolling factor as 5, if the input
420e1051a39Sopenharmony_ci# data size smaller than 5 blocks, but not smaller than 3 blocks,
421e1051a39Sopenharmony_ci# choose 3 as the unrolling factor.
422e1051a39Sopenharmony_ci# If the input data size dsize >= 5*16 bytes, then take 5 blocks
423e1051a39Sopenharmony_ci# as one iteration, every loop the left size lsize -= 5*16.
424e1051a39Sopenharmony_ci# If 5*16 > lsize >= 3*16 bytes, take 3 blocks as one iteration,
425e1051a39Sopenharmony_ci# every loop lsize -=3*16.
426e1051a39Sopenharmony_ci# If lsize < 3*16 bytes, treat them as the tail, interleave the
427e1051a39Sopenharmony_ci# two blocks AES instructions.
428e1051a39Sopenharmony_ci# There is one special case, if the original input data size dsize
429e1051a39Sopenharmony_ci# = 16 bytes, we will treat it seperately to improve the
430e1051a39Sopenharmony_ci# performance: one independent code block without LR, FP load and
431e1051a39Sopenharmony_ci# store, just looks like what the original ECB implementation does.
432e1051a39Sopenharmony_ci
433e1051a39Sopenharmony_ci{{{
434e1051a39Sopenharmony_cimy ($inp,$out,$len,$key)=map("x$_",(0..3));
435e1051a39Sopenharmony_cimy ($enc,$rounds,$cnt,$key_,$step)=("w4","w5","w6","x7","x8");
436e1051a39Sopenharmony_cimy ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7));
437e1051a39Sopenharmony_ci
438e1051a39Sopenharmony_cimy ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
439e1051a39Sopenharmony_ci
440e1051a39Sopenharmony_ci### q7	last round key
441e1051a39Sopenharmony_ci### q10-q15	q7 Last 7 round keys
442e1051a39Sopenharmony_ci### q8-q9	preloaded round keys except last 7 keys for big size
443e1051a39Sopenharmony_ci### q5, q6, q8-q9	preloaded round keys except last 7 keys for only 16 byte
444e1051a39Sopenharmony_ci
445e1051a39Sopenharmony_ci{
446e1051a39Sopenharmony_cimy ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
447e1051a39Sopenharmony_ci
448e1051a39Sopenharmony_cimy ($dat3,$in3,$tmp3);	# used only in 64-bit mode
449e1051a39Sopenharmony_cimy ($dat4,$in4,$tmp4);
450e1051a39Sopenharmony_ciif ($flavour =~ /64/) {
451e1051a39Sopenharmony_ci    ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23));
452e1051a39Sopenharmony_ci}
453e1051a39Sopenharmony_ci
454e1051a39Sopenharmony_ci$code.=<<___;
455e1051a39Sopenharmony_ci.globl	${prefix}_ecb_encrypt
456e1051a39Sopenharmony_ci.type	${prefix}_ecb_encrypt,%function
457e1051a39Sopenharmony_ci.align	5
458e1051a39Sopenharmony_ci${prefix}_ecb_encrypt:
459e1051a39Sopenharmony_ci___
460e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
461e1051a39Sopenharmony_ci	subs	$len,$len,#16
462e1051a39Sopenharmony_ci	// Original input data size bigger than 16, jump to big size processing.
463e1051a39Sopenharmony_ci	b.ne    .Lecb_big_size
464e1051a39Sopenharmony_ci	vld1.8	{$dat0},[$inp]
465e1051a39Sopenharmony_ci	cmp	$enc,#0					// en- or decrypting?
466e1051a39Sopenharmony_ci	ldr	$rounds,[$key,#240]
467e1051a39Sopenharmony_ci	vld1.32	{q5-q6},[$key],#32			// load key schedule...
468e1051a39Sopenharmony_ci
469e1051a39Sopenharmony_ci	b.eq .Lecb_small_dec
470e1051a39Sopenharmony_ci	aese	$dat0,q5
471e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
472e1051a39Sopenharmony_ci	vld1.32	{q8-q9},[$key],#32			// load key schedule...
473e1051a39Sopenharmony_ci	aese	$dat0,q6
474e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
475e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#10			// if rounds==10, jump to aes-128-ecb processing
476e1051a39Sopenharmony_ci	b.eq    .Lecb_128_enc
477e1051a39Sopenharmony_ci.Lecb_round_loop:
478e1051a39Sopenharmony_ci	aese	$dat0,q8
479e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
480e1051a39Sopenharmony_ci	vld1.32	{q8},[$key],#16				// load key schedule...
481e1051a39Sopenharmony_ci	aese	$dat0,q9
482e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
483e1051a39Sopenharmony_ci	vld1.32	{q9},[$key],#16				// load key schedule...
484e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2			// bias
485e1051a39Sopenharmony_ci	b.gt    .Lecb_round_loop
486e1051a39Sopenharmony_ci.Lecb_128_enc:
487e1051a39Sopenharmony_ci	vld1.32	{q10-q11},[$key],#32		// load key schedule...
488e1051a39Sopenharmony_ci	aese	$dat0,q8
489e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
490e1051a39Sopenharmony_ci	aese	$dat0,q9
491e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
492e1051a39Sopenharmony_ci	vld1.32	{q12-q13},[$key],#32		// load key schedule...
493e1051a39Sopenharmony_ci	aese	$dat0,q10
494e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
495e1051a39Sopenharmony_ci	aese	$dat0,q11
496e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
497e1051a39Sopenharmony_ci	vld1.32	{q14-q15},[$key],#32		// load key schedule...
498e1051a39Sopenharmony_ci	aese	$dat0,q12
499e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
500e1051a39Sopenharmony_ci	aese	$dat0,q13
501e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
502e1051a39Sopenharmony_ci	vld1.32	{$rndlast},[$key]
503e1051a39Sopenharmony_ci	aese	$dat0,q14
504e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
505e1051a39Sopenharmony_ci	aese	$dat0,q15
506e1051a39Sopenharmony_ci	veor	$dat0,$dat0,$rndlast
507e1051a39Sopenharmony_ci	vst1.8	{$dat0},[$out]
508e1051a39Sopenharmony_ci	b	.Lecb_Final_abort
509e1051a39Sopenharmony_ci.Lecb_small_dec:
510e1051a39Sopenharmony_ci	aesd	$dat0,q5
511e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
512e1051a39Sopenharmony_ci	vld1.32	{q8-q9},[$key],#32			// load key schedule...
513e1051a39Sopenharmony_ci	aesd	$dat0,q6
514e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
515e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#10			// bias
516e1051a39Sopenharmony_ci	b.eq    .Lecb_128_dec
517e1051a39Sopenharmony_ci.Lecb_dec_round_loop:
518e1051a39Sopenharmony_ci	aesd	$dat0,q8
519e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
520e1051a39Sopenharmony_ci	vld1.32	{q8},[$key],#16				// load key schedule...
521e1051a39Sopenharmony_ci	aesd	$dat0,q9
522e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
523e1051a39Sopenharmony_ci	vld1.32	{q9},[$key],#16				// load key schedule...
524e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2			// bias
525e1051a39Sopenharmony_ci	b.gt    .Lecb_dec_round_loop
526e1051a39Sopenharmony_ci.Lecb_128_dec:
527e1051a39Sopenharmony_ci	vld1.32	{q10-q11},[$key],#32		// load key schedule...
528e1051a39Sopenharmony_ci	aesd	$dat0,q8
529e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
530e1051a39Sopenharmony_ci	aesd	$dat0,q9
531e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
532e1051a39Sopenharmony_ci	vld1.32	{q12-q13},[$key],#32		// load key schedule...
533e1051a39Sopenharmony_ci	aesd	$dat0,q10
534e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
535e1051a39Sopenharmony_ci	aesd	$dat0,q11
536e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
537e1051a39Sopenharmony_ci	vld1.32	{q14-q15},[$key],#32		// load key schedule...
538e1051a39Sopenharmony_ci	aesd	$dat0,q12
539e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
540e1051a39Sopenharmony_ci	aesd	$dat0,q13
541e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
542e1051a39Sopenharmony_ci	vld1.32	{$rndlast},[$key]
543e1051a39Sopenharmony_ci	aesd	$dat0,q14
544e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
545e1051a39Sopenharmony_ci	aesd	$dat0,q15
546e1051a39Sopenharmony_ci	veor	$dat0,$dat0,$rndlast
547e1051a39Sopenharmony_ci	vst1.8	{$dat0},[$out]
548e1051a39Sopenharmony_ci	b	.Lecb_Final_abort
549e1051a39Sopenharmony_ci.Lecb_big_size:
550e1051a39Sopenharmony_ci___
551e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
552e1051a39Sopenharmony_ci	stp	x29,x30,[sp,#-16]!
553e1051a39Sopenharmony_ci	add	x29,sp,#0
554e1051a39Sopenharmony_ci___
555e1051a39Sopenharmony_ci$code.=<<___	if ($flavour !~ /64/);
556e1051a39Sopenharmony_ci	mov	ip,sp
557e1051a39Sopenharmony_ci	stmdb	sp!,{r4-r8,lr}
558e1051a39Sopenharmony_ci	vstmdb	sp!,{d8-d15}			@ ABI specification says so
559e1051a39Sopenharmony_ci	ldmia	ip,{r4-r5}			@ load remaining args
560e1051a39Sopenharmony_ci	subs	$len,$len,#16
561e1051a39Sopenharmony_ci___
562e1051a39Sopenharmony_ci$code.=<<___;
563e1051a39Sopenharmony_ci	mov	$step,#16
564e1051a39Sopenharmony_ci	b.lo	.Lecb_done
565e1051a39Sopenharmony_ci	cclr	$step,eq
566e1051a39Sopenharmony_ci
567e1051a39Sopenharmony_ci	cmp	$enc,#0					// en- or decrypting?
568e1051a39Sopenharmony_ci	ldr	$rounds,[$key,#240]
569e1051a39Sopenharmony_ci	and	$len,$len,#-16
570e1051a39Sopenharmony_ci	vld1.8	{$dat},[$inp],$step
571e1051a39Sopenharmony_ci
572e1051a39Sopenharmony_ci	vld1.32	{q8-q9},[$key]				// load key schedule...
573e1051a39Sopenharmony_ci	sub	$rounds,$rounds,#6
574e1051a39Sopenharmony_ci	add	$key_,$key,x5,lsl#4				// pointer to last 7 round keys
575e1051a39Sopenharmony_ci	sub	$rounds,$rounds,#2
576e1051a39Sopenharmony_ci	vld1.32	{q10-q11},[$key_],#32
577e1051a39Sopenharmony_ci	vld1.32	{q12-q13},[$key_],#32
578e1051a39Sopenharmony_ci	vld1.32	{q14-q15},[$key_],#32
579e1051a39Sopenharmony_ci	vld1.32	{$rndlast},[$key_]
580e1051a39Sopenharmony_ci
581e1051a39Sopenharmony_ci	add	$key_,$key,#32
582e1051a39Sopenharmony_ci	mov	$cnt,$rounds
583e1051a39Sopenharmony_ci	b.eq	.Lecb_dec
584e1051a39Sopenharmony_ci
585e1051a39Sopenharmony_ci	vld1.8	{$dat1},[$inp],#16
586e1051a39Sopenharmony_ci	subs	$len,$len,#32				// bias
587e1051a39Sopenharmony_ci	add	$cnt,$rounds,#2
588e1051a39Sopenharmony_ci	vorr	$in1,$dat1,$dat1
589e1051a39Sopenharmony_ci	vorr	$dat2,$dat1,$dat1
590e1051a39Sopenharmony_ci	vorr	$dat1,$dat,$dat
591e1051a39Sopenharmony_ci	b.lo	.Lecb_enc_tail
592e1051a39Sopenharmony_ci
593e1051a39Sopenharmony_ci	vorr	$dat1,$in1,$in1
594e1051a39Sopenharmony_ci	vld1.8	{$dat2},[$inp],#16
595e1051a39Sopenharmony_ci___
596e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
597e1051a39Sopenharmony_ci	cmp	$len,#32
598e1051a39Sopenharmony_ci	b.lo	.Loop3x_ecb_enc
599e1051a39Sopenharmony_ci
600e1051a39Sopenharmony_ci	vld1.8	{$dat3},[$inp],#16
601e1051a39Sopenharmony_ci	vld1.8	{$dat4},[$inp],#16
602e1051a39Sopenharmony_ci	sub	$len,$len,#32				// bias
603e1051a39Sopenharmony_ci	mov	$cnt,$rounds
604e1051a39Sopenharmony_ci
605e1051a39Sopenharmony_ci.Loop5x_ecb_enc:
606e1051a39Sopenharmony_ci	aese	$dat0,q8
607e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
608e1051a39Sopenharmony_ci	aese	$dat1,q8
609e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
610e1051a39Sopenharmony_ci	aese	$dat2,q8
611e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
612e1051a39Sopenharmony_ci	aese	$dat3,q8
613e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
614e1051a39Sopenharmony_ci	aese	$dat4,q8
615e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
616e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16
617e1051a39Sopenharmony_ci	subs	$cnt,$cnt,#2
618e1051a39Sopenharmony_ci	aese	$dat0,q9
619e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
620e1051a39Sopenharmony_ci	aese	$dat1,q9
621e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
622e1051a39Sopenharmony_ci	aese	$dat2,q9
623e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
624e1051a39Sopenharmony_ci	aese	$dat3,q9
625e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
626e1051a39Sopenharmony_ci	aese	$dat4,q9
627e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
628e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16
629e1051a39Sopenharmony_ci	b.gt	.Loop5x_ecb_enc
630e1051a39Sopenharmony_ci
631e1051a39Sopenharmony_ci	aese	$dat0,q8
632e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
633e1051a39Sopenharmony_ci	aese	$dat1,q8
634e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
635e1051a39Sopenharmony_ci	aese	$dat2,q8
636e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
637e1051a39Sopenharmony_ci	aese	$dat3,q8
638e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
639e1051a39Sopenharmony_ci	aese	$dat4,q8
640e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
641e1051a39Sopenharmony_ci	cmp	$len,#0x40					// because .Lecb_enc_tail4x
642e1051a39Sopenharmony_ci	sub	$len,$len,#0x50
643e1051a39Sopenharmony_ci
644e1051a39Sopenharmony_ci	aese	$dat0,q9
645e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
646e1051a39Sopenharmony_ci	aese	$dat1,q9
647e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
648e1051a39Sopenharmony_ci	aese	$dat2,q9
649e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
650e1051a39Sopenharmony_ci	aese	$dat3,q9
651e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
652e1051a39Sopenharmony_ci	aese	$dat4,q9
653e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
654e1051a39Sopenharmony_ci	csel	x6,xzr,$len,gt			// borrow x6, $cnt, "gt" is not typo
655e1051a39Sopenharmony_ci	mov	$key_,$key
656e1051a39Sopenharmony_ci
657e1051a39Sopenharmony_ci	aese	$dat0,q10
658e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
659e1051a39Sopenharmony_ci	aese	$dat1,q10
660e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
661e1051a39Sopenharmony_ci	aese	$dat2,q10
662e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
663e1051a39Sopenharmony_ci	aese	$dat3,q10
664e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
665e1051a39Sopenharmony_ci	aese	$dat4,q10
666e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
667e1051a39Sopenharmony_ci	add	$inp,$inp,x6				// $inp is adjusted in such way that
668e1051a39Sopenharmony_ci							// at exit from the loop $dat1-$dat4
669e1051a39Sopenharmony_ci							// are loaded with last "words"
670e1051a39Sopenharmony_ci	add	x6,$len,#0x60		    // because .Lecb_enc_tail4x
671e1051a39Sopenharmony_ci
672e1051a39Sopenharmony_ci	aese	$dat0,q11
673e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
674e1051a39Sopenharmony_ci	aese	$dat1,q11
675e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
676e1051a39Sopenharmony_ci	aese	$dat2,q11
677e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
678e1051a39Sopenharmony_ci	aese	$dat3,q11
679e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
680e1051a39Sopenharmony_ci	aese	$dat4,q11
681e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
682e1051a39Sopenharmony_ci
683e1051a39Sopenharmony_ci	aese	$dat0,q12
684e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
685e1051a39Sopenharmony_ci	aese	$dat1,q12
686e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
687e1051a39Sopenharmony_ci	aese	$dat2,q12
688e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
689e1051a39Sopenharmony_ci	aese	$dat3,q12
690e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
691e1051a39Sopenharmony_ci	aese	$dat4,q12
692e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
693e1051a39Sopenharmony_ci
694e1051a39Sopenharmony_ci	aese	$dat0,q13
695e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
696e1051a39Sopenharmony_ci	aese	$dat1,q13
697e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
698e1051a39Sopenharmony_ci	aese	$dat2,q13
699e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
700e1051a39Sopenharmony_ci	aese	$dat3,q13
701e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
702e1051a39Sopenharmony_ci	aese	$dat4,q13
703e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
704e1051a39Sopenharmony_ci
705e1051a39Sopenharmony_ci	aese	$dat0,q14
706e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
707e1051a39Sopenharmony_ci	aese	$dat1,q14
708e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
709e1051a39Sopenharmony_ci	aese	$dat2,q14
710e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
711e1051a39Sopenharmony_ci	aese	$dat3,q14
712e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
713e1051a39Sopenharmony_ci	aese	$dat4,q14
714e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
715e1051a39Sopenharmony_ci
716e1051a39Sopenharmony_ci	aese	$dat0,q15
717e1051a39Sopenharmony_ci	vld1.8	{$in0},[$inp],#16
718e1051a39Sopenharmony_ci	aese	$dat1,q15
719e1051a39Sopenharmony_ci	vld1.8	{$in1},[$inp],#16
720e1051a39Sopenharmony_ci	aese	$dat2,q15
721e1051a39Sopenharmony_ci	vld1.8	{$in2},[$inp],#16
722e1051a39Sopenharmony_ci	aese	$dat3,q15
723e1051a39Sopenharmony_ci	vld1.8	{$in3},[$inp],#16
724e1051a39Sopenharmony_ci	aese	$dat4,q15
725e1051a39Sopenharmony_ci	vld1.8	{$in4},[$inp],#16
726e1051a39Sopenharmony_ci	cbz	x6,.Lecb_enc_tail4x
727e1051a39Sopenharmony_ci	vld1.32 {q8},[$key_],#16			// re-pre-load rndkey[0]
728e1051a39Sopenharmony_ci	veor	$tmp0,$rndlast,$dat0
729e1051a39Sopenharmony_ci	vorr	$dat0,$in0,$in0
730e1051a39Sopenharmony_ci	veor	$tmp1,$rndlast,$dat1
731e1051a39Sopenharmony_ci	vorr	$dat1,$in1,$in1
732e1051a39Sopenharmony_ci	veor	$tmp2,$rndlast,$dat2
733e1051a39Sopenharmony_ci	vorr	$dat2,$in2,$in2
734e1051a39Sopenharmony_ci	veor	$tmp3,$rndlast,$dat3
735e1051a39Sopenharmony_ci	vorr	$dat3,$in3,$in3
736e1051a39Sopenharmony_ci	veor	$tmp4,$rndlast,$dat4
737e1051a39Sopenharmony_ci	vst1.8	{$tmp0},[$out],#16
738e1051a39Sopenharmony_ci	vorr	$dat4,$in4,$in4
739e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
740e1051a39Sopenharmony_ci	mov	$cnt,$rounds
741e1051a39Sopenharmony_ci	vst1.8	{$tmp2},[$out],#16
742e1051a39Sopenharmony_ci	vld1.32 {q9},[$key_],#16			// re-pre-load rndkey[1]
743e1051a39Sopenharmony_ci	vst1.8	{$tmp3},[$out],#16
744e1051a39Sopenharmony_ci	vst1.8	{$tmp4},[$out],#16
745e1051a39Sopenharmony_ci	b.hs	.Loop5x_ecb_enc
746e1051a39Sopenharmony_ci
747e1051a39Sopenharmony_ci	add	$len,$len,#0x50
748e1051a39Sopenharmony_ci	cbz	$len,.Lecb_done
749e1051a39Sopenharmony_ci
750e1051a39Sopenharmony_ci	add	$cnt,$rounds,#2
751e1051a39Sopenharmony_ci	subs	$len,$len,#0x30
752e1051a39Sopenharmony_ci	vorr	$dat0,$in2,$in2
753e1051a39Sopenharmony_ci	vorr	$dat1,$in3,$in3
754e1051a39Sopenharmony_ci	vorr	$dat2,$in4,$in4
755e1051a39Sopenharmony_ci	b.lo	.Lecb_enc_tail
756e1051a39Sopenharmony_ci
757e1051a39Sopenharmony_ci	b	.Loop3x_ecb_enc
758e1051a39Sopenharmony_ci
759e1051a39Sopenharmony_ci.align	4
760e1051a39Sopenharmony_ci.Lecb_enc_tail4x:
761e1051a39Sopenharmony_ci	veor	$tmp1,$rndlast,$dat1
762e1051a39Sopenharmony_ci	veor	$tmp2,$rndlast,$dat2
763e1051a39Sopenharmony_ci	veor	$tmp3,$rndlast,$dat3
764e1051a39Sopenharmony_ci	veor	$tmp4,$rndlast,$dat4
765e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
766e1051a39Sopenharmony_ci	vst1.8	{$tmp2},[$out],#16
767e1051a39Sopenharmony_ci	vst1.8	{$tmp3},[$out],#16
768e1051a39Sopenharmony_ci	vst1.8	{$tmp4},[$out],#16
769e1051a39Sopenharmony_ci
770e1051a39Sopenharmony_ci	b	.Lecb_done
771e1051a39Sopenharmony_ci.align	4
772e1051a39Sopenharmony_ci___
773e1051a39Sopenharmony_ci$code.=<<___;
774e1051a39Sopenharmony_ci.Loop3x_ecb_enc:
775e1051a39Sopenharmony_ci	aese	$dat0,q8
776e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
777e1051a39Sopenharmony_ci	aese	$dat1,q8
778e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
779e1051a39Sopenharmony_ci	aese	$dat2,q8
780e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
781e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16
782e1051a39Sopenharmony_ci	subs	$cnt,$cnt,#2
783e1051a39Sopenharmony_ci	aese	$dat0,q9
784e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
785e1051a39Sopenharmony_ci	aese	$dat1,q9
786e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
787e1051a39Sopenharmony_ci	aese	$dat2,q9
788e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
789e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16
790e1051a39Sopenharmony_ci	b.gt	.Loop3x_ecb_enc
791e1051a39Sopenharmony_ci
792e1051a39Sopenharmony_ci	aese	$dat0,q8
793e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
794e1051a39Sopenharmony_ci	aese	$dat1,q8
795e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
796e1051a39Sopenharmony_ci	aese	$dat2,q8
797e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
798e1051a39Sopenharmony_ci	subs	$len,$len,#0x30
799e1051a39Sopenharmony_ci	mov.lo	x6,$len				// x6, $cnt, is zero at this point
800e1051a39Sopenharmony_ci	aese	$dat0,q9
801e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
802e1051a39Sopenharmony_ci	aese	$dat1,q9
803e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
804e1051a39Sopenharmony_ci	aese	$dat2,q9
805e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
806e1051a39Sopenharmony_ci	add	$inp,$inp,x6			// $inp is adjusted in such way that
807e1051a39Sopenharmony_ci						// at exit from the loop $dat1-$dat2
808e1051a39Sopenharmony_ci						// are loaded with last "words"
809e1051a39Sopenharmony_ci	mov	$key_,$key
810e1051a39Sopenharmony_ci	aese	$dat0,q12
811e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
812e1051a39Sopenharmony_ci	aese	$dat1,q12
813e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
814e1051a39Sopenharmony_ci	aese	$dat2,q12
815e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
816e1051a39Sopenharmony_ci	vld1.8	{$in0},[$inp],#16
817e1051a39Sopenharmony_ci	aese	$dat0,q13
818e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
819e1051a39Sopenharmony_ci	aese	$dat1,q13
820e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
821e1051a39Sopenharmony_ci	aese	$dat2,q13
822e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
823e1051a39Sopenharmony_ci	vld1.8	{$in1},[$inp],#16
824e1051a39Sopenharmony_ci	aese	$dat0,q14
825e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
826e1051a39Sopenharmony_ci	aese	$dat1,q14
827e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
828e1051a39Sopenharmony_ci	aese	$dat2,q14
829e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
830e1051a39Sopenharmony_ci	vld1.8	{$in2},[$inp],#16
831e1051a39Sopenharmony_ci	aese	$dat0,q15
832e1051a39Sopenharmony_ci	aese	$dat1,q15
833e1051a39Sopenharmony_ci	aese	$dat2,q15
834e1051a39Sopenharmony_ci	vld1.32 {q8},[$key_],#16		// re-pre-load rndkey[0]
835e1051a39Sopenharmony_ci	add	$cnt,$rounds,#2
836e1051a39Sopenharmony_ci	veor	$tmp0,$rndlast,$dat0
837e1051a39Sopenharmony_ci	veor	$tmp1,$rndlast,$dat1
838e1051a39Sopenharmony_ci	veor	$dat2,$dat2,$rndlast
839e1051a39Sopenharmony_ci	vld1.32 {q9},[$key_],#16		// re-pre-load rndkey[1]
840e1051a39Sopenharmony_ci	vst1.8	{$tmp0},[$out],#16
841e1051a39Sopenharmony_ci	vorr	$dat0,$in0,$in0
842e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
843e1051a39Sopenharmony_ci	vorr	$dat1,$in1,$in1
844e1051a39Sopenharmony_ci	vst1.8	{$dat2},[$out],#16
845e1051a39Sopenharmony_ci	vorr	$dat2,$in2,$in2
846e1051a39Sopenharmony_ci	b.hs	.Loop3x_ecb_enc
847e1051a39Sopenharmony_ci
848e1051a39Sopenharmony_ci	cmn	$len,#0x30
849e1051a39Sopenharmony_ci	b.eq	.Lecb_done
850e1051a39Sopenharmony_ci	nop
851e1051a39Sopenharmony_ci
852e1051a39Sopenharmony_ci.Lecb_enc_tail:
853e1051a39Sopenharmony_ci	aese	$dat1,q8
854e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
855e1051a39Sopenharmony_ci	aese	$dat2,q8
856e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
857e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16
858e1051a39Sopenharmony_ci	subs	$cnt,$cnt,#2
859e1051a39Sopenharmony_ci	aese	$dat1,q9
860e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
861e1051a39Sopenharmony_ci	aese	$dat2,q9
862e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
863e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16
864e1051a39Sopenharmony_ci	b.gt	.Lecb_enc_tail
865e1051a39Sopenharmony_ci
866e1051a39Sopenharmony_ci	aese	$dat1,q8
867e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
868e1051a39Sopenharmony_ci	aese	$dat2,q8
869e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
870e1051a39Sopenharmony_ci	aese	$dat1,q9
871e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
872e1051a39Sopenharmony_ci	aese	$dat2,q9
873e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
874e1051a39Sopenharmony_ci	aese	$dat1,q12
875e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
876e1051a39Sopenharmony_ci	aese	$dat2,q12
877e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
878e1051a39Sopenharmony_ci	cmn	$len,#0x20
879e1051a39Sopenharmony_ci	aese	$dat1,q13
880e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
881e1051a39Sopenharmony_ci	aese	$dat2,q13
882e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
883e1051a39Sopenharmony_ci	aese	$dat1,q14
884e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
885e1051a39Sopenharmony_ci	aese	$dat2,q14
886e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
887e1051a39Sopenharmony_ci	aese	$dat1,q15
888e1051a39Sopenharmony_ci	aese	$dat2,q15
889e1051a39Sopenharmony_ci	b.eq	.Lecb_enc_one
890e1051a39Sopenharmony_ci	veor	$tmp1,$rndlast,$dat1
891e1051a39Sopenharmony_ci	veor	$tmp2,$rndlast,$dat2
892e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
893e1051a39Sopenharmony_ci	vst1.8	{$tmp2},[$out],#16
894e1051a39Sopenharmony_ci	b	.Lecb_done
895e1051a39Sopenharmony_ci
896e1051a39Sopenharmony_ci.Lecb_enc_one:
897e1051a39Sopenharmony_ci	veor	$tmp1,$rndlast,$dat2
898e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
899e1051a39Sopenharmony_ci	b	.Lecb_done
900e1051a39Sopenharmony_ci___
901e1051a39Sopenharmony_ci
902e1051a39Sopenharmony_ci$code.=<<___;
903e1051a39Sopenharmony_ci.align	5
904e1051a39Sopenharmony_ci.Lecb_dec:
905e1051a39Sopenharmony_ci	vld1.8	{$dat1},[$inp],#16
906e1051a39Sopenharmony_ci	subs	$len,$len,#32			// bias
907e1051a39Sopenharmony_ci	add	$cnt,$rounds,#2
908e1051a39Sopenharmony_ci	vorr	$in1,$dat1,$dat1
909e1051a39Sopenharmony_ci	vorr	$dat2,$dat1,$dat1
910e1051a39Sopenharmony_ci	vorr	$dat1,$dat,$dat
911e1051a39Sopenharmony_ci	b.lo	.Lecb_dec_tail
912e1051a39Sopenharmony_ci
913e1051a39Sopenharmony_ci	vorr	$dat1,$in1,$in1
914e1051a39Sopenharmony_ci	vld1.8	{$dat2},[$inp],#16
915e1051a39Sopenharmony_ci___
916e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
917e1051a39Sopenharmony_ci	cmp	$len,#32
918e1051a39Sopenharmony_ci	b.lo	.Loop3x_ecb_dec
919e1051a39Sopenharmony_ci
920e1051a39Sopenharmony_ci	vld1.8	{$dat3},[$inp],#16
921e1051a39Sopenharmony_ci	vld1.8	{$dat4},[$inp],#16
922e1051a39Sopenharmony_ci	sub	$len,$len,#32				// bias
923e1051a39Sopenharmony_ci	mov	$cnt,$rounds
924e1051a39Sopenharmony_ci
925e1051a39Sopenharmony_ci.Loop5x_ecb_dec:
926e1051a39Sopenharmony_ci	aesd	$dat0,q8
927e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
928e1051a39Sopenharmony_ci	aesd	$dat1,q8
929e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
930e1051a39Sopenharmony_ci	aesd	$dat2,q8
931e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
932e1051a39Sopenharmony_ci	aesd	$dat3,q8
933e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
934e1051a39Sopenharmony_ci	aesd	$dat4,q8
935e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
936e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16
937e1051a39Sopenharmony_ci	subs	$cnt,$cnt,#2
938e1051a39Sopenharmony_ci	aesd	$dat0,q9
939e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
940e1051a39Sopenharmony_ci	aesd	$dat1,q9
941e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
942e1051a39Sopenharmony_ci	aesd	$dat2,q9
943e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
944e1051a39Sopenharmony_ci	aesd	$dat3,q9
945e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
946e1051a39Sopenharmony_ci	aesd	$dat4,q9
947e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
948e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16
949e1051a39Sopenharmony_ci	b.gt	.Loop5x_ecb_dec
950e1051a39Sopenharmony_ci
951e1051a39Sopenharmony_ci	aesd	$dat0,q8
952e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
953e1051a39Sopenharmony_ci	aesd	$dat1,q8
954e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
955e1051a39Sopenharmony_ci	aesd	$dat2,q8
956e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
957e1051a39Sopenharmony_ci	aesd	$dat3,q8
958e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
959e1051a39Sopenharmony_ci	aesd	$dat4,q8
960e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
961e1051a39Sopenharmony_ci	cmp	$len,#0x40				// because .Lecb_tail4x
962e1051a39Sopenharmony_ci	sub	$len,$len,#0x50
963e1051a39Sopenharmony_ci
964e1051a39Sopenharmony_ci	aesd	$dat0,q9
965e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
966e1051a39Sopenharmony_ci	aesd	$dat1,q9
967e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
968e1051a39Sopenharmony_ci	aesd	$dat2,q9
969e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
970e1051a39Sopenharmony_ci	aesd	$dat3,q9
971e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
972e1051a39Sopenharmony_ci	aesd	$dat4,q9
973e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
974e1051a39Sopenharmony_ci	csel	x6,xzr,$len,gt		// borrow x6, $cnt, "gt" is not typo
975e1051a39Sopenharmony_ci	mov	$key_,$key
976e1051a39Sopenharmony_ci
977e1051a39Sopenharmony_ci	aesd	$dat0,q10
978e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
979e1051a39Sopenharmony_ci	aesd	$dat1,q10
980e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
981e1051a39Sopenharmony_ci	aesd	$dat2,q10
982e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
983e1051a39Sopenharmony_ci	aesd	$dat3,q10
984e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
985e1051a39Sopenharmony_ci	aesd	$dat4,q10
986e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
987e1051a39Sopenharmony_ci	add	$inp,$inp,x6				// $inp is adjusted in such way that
988e1051a39Sopenharmony_ci							// at exit from the loop $dat1-$dat4
989e1051a39Sopenharmony_ci							// are loaded with last "words"
990e1051a39Sopenharmony_ci	add	x6,$len,#0x60			// because .Lecb_tail4x
991e1051a39Sopenharmony_ci
992e1051a39Sopenharmony_ci	aesd	$dat0,q11
993e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
994e1051a39Sopenharmony_ci	aesd	$dat1,q11
995e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
996e1051a39Sopenharmony_ci	aesd	$dat2,q11
997e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
998e1051a39Sopenharmony_ci	aesd	$dat3,q11
999e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
1000e1051a39Sopenharmony_ci	aesd	$dat4,q11
1001e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
1002e1051a39Sopenharmony_ci
1003e1051a39Sopenharmony_ci	aesd	$dat0,q12
1004e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1005e1051a39Sopenharmony_ci	aesd	$dat1,q12
1006e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1007e1051a39Sopenharmony_ci	aesd	$dat2,q12
1008e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1009e1051a39Sopenharmony_ci	aesd	$dat3,q12
1010e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
1011e1051a39Sopenharmony_ci	aesd	$dat4,q12
1012e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
1013e1051a39Sopenharmony_ci
1014e1051a39Sopenharmony_ci	aesd	$dat0,q13
1015e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1016e1051a39Sopenharmony_ci	aesd	$dat1,q13
1017e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1018e1051a39Sopenharmony_ci	aesd	$dat2,q13
1019e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1020e1051a39Sopenharmony_ci	aesd	$dat3,q13
1021e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
1022e1051a39Sopenharmony_ci	aesd	$dat4,q13
1023e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
1024e1051a39Sopenharmony_ci
1025e1051a39Sopenharmony_ci	aesd	$dat0,q14
1026e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1027e1051a39Sopenharmony_ci	aesd	$dat1,q14
1028e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1029e1051a39Sopenharmony_ci	aesd	$dat2,q14
1030e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1031e1051a39Sopenharmony_ci	aesd	$dat3,q14
1032e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
1033e1051a39Sopenharmony_ci	aesd	$dat4,q14
1034e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
1035e1051a39Sopenharmony_ci
1036e1051a39Sopenharmony_ci	aesd	$dat0,q15
1037e1051a39Sopenharmony_ci	vld1.8	{$in0},[$inp],#16
1038e1051a39Sopenharmony_ci	aesd	$dat1,q15
1039e1051a39Sopenharmony_ci	vld1.8	{$in1},[$inp],#16
1040e1051a39Sopenharmony_ci	aesd	$dat2,q15
1041e1051a39Sopenharmony_ci	vld1.8	{$in2},[$inp],#16
1042e1051a39Sopenharmony_ci	aesd	$dat3,q15
1043e1051a39Sopenharmony_ci	vld1.8	{$in3},[$inp],#16
1044e1051a39Sopenharmony_ci	aesd	$dat4,q15
1045e1051a39Sopenharmony_ci	vld1.8	{$in4},[$inp],#16
1046e1051a39Sopenharmony_ci	cbz	x6,.Lecb_tail4x
1047e1051a39Sopenharmony_ci	vld1.32 {q8},[$key_],#16			// re-pre-load rndkey[0]
1048e1051a39Sopenharmony_ci	veor	$tmp0,$rndlast,$dat0
1049e1051a39Sopenharmony_ci	vorr	$dat0,$in0,$in0
1050e1051a39Sopenharmony_ci	veor	$tmp1,$rndlast,$dat1
1051e1051a39Sopenharmony_ci	vorr	$dat1,$in1,$in1
1052e1051a39Sopenharmony_ci	veor	$tmp2,$rndlast,$dat2
1053e1051a39Sopenharmony_ci	vorr	$dat2,$in2,$in2
1054e1051a39Sopenharmony_ci	veor	$tmp3,$rndlast,$dat3
1055e1051a39Sopenharmony_ci	vorr	$dat3,$in3,$in3
1056e1051a39Sopenharmony_ci	veor	$tmp4,$rndlast,$dat4
1057e1051a39Sopenharmony_ci	vst1.8	{$tmp0},[$out],#16
1058e1051a39Sopenharmony_ci	vorr	$dat4,$in4,$in4
1059e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
1060e1051a39Sopenharmony_ci	mov	$cnt,$rounds
1061e1051a39Sopenharmony_ci	vst1.8	{$tmp2},[$out],#16
1062e1051a39Sopenharmony_ci	vld1.32 {q9},[$key_],#16			// re-pre-load rndkey[1]
1063e1051a39Sopenharmony_ci	vst1.8	{$tmp3},[$out],#16
1064e1051a39Sopenharmony_ci	vst1.8	{$tmp4},[$out],#16
1065e1051a39Sopenharmony_ci	b.hs	.Loop5x_ecb_dec
1066e1051a39Sopenharmony_ci
1067e1051a39Sopenharmony_ci	add	$len,$len,#0x50
1068e1051a39Sopenharmony_ci	cbz	$len,.Lecb_done
1069e1051a39Sopenharmony_ci
1070e1051a39Sopenharmony_ci	add	$cnt,$rounds,#2
1071e1051a39Sopenharmony_ci	subs	$len,$len,#0x30
1072e1051a39Sopenharmony_ci	vorr	$dat0,$in2,$in2
1073e1051a39Sopenharmony_ci	vorr	$dat1,$in3,$in3
1074e1051a39Sopenharmony_ci	vorr	$dat2,$in4,$in4
1075e1051a39Sopenharmony_ci	b.lo	.Lecb_dec_tail
1076e1051a39Sopenharmony_ci
1077e1051a39Sopenharmony_ci	b	.Loop3x_ecb_dec
1078e1051a39Sopenharmony_ci
1079e1051a39Sopenharmony_ci.align	4
1080e1051a39Sopenharmony_ci.Lecb_tail4x:
1081e1051a39Sopenharmony_ci	veor	$tmp1,$rndlast,$dat1
1082e1051a39Sopenharmony_ci	veor	$tmp2,$rndlast,$dat2
1083e1051a39Sopenharmony_ci	veor	$tmp3,$rndlast,$dat3
1084e1051a39Sopenharmony_ci	veor	$tmp4,$rndlast,$dat4
1085e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
1086e1051a39Sopenharmony_ci	vst1.8	{$tmp2},[$out],#16
1087e1051a39Sopenharmony_ci	vst1.8	{$tmp3},[$out],#16
1088e1051a39Sopenharmony_ci	vst1.8	{$tmp4},[$out],#16
1089e1051a39Sopenharmony_ci
1090e1051a39Sopenharmony_ci	b	.Lecb_done
1091e1051a39Sopenharmony_ci.align	4
1092e1051a39Sopenharmony_ci___
1093e1051a39Sopenharmony_ci$code.=<<___;
1094e1051a39Sopenharmony_ci.Loop3x_ecb_dec:
1095e1051a39Sopenharmony_ci	aesd	$dat0,q8
1096e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1097e1051a39Sopenharmony_ci	aesd	$dat1,q8
1098e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1099e1051a39Sopenharmony_ci	aesd	$dat2,q8
1100e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1101e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16
1102e1051a39Sopenharmony_ci	subs	$cnt,$cnt,#2
1103e1051a39Sopenharmony_ci	aesd	$dat0,q9
1104e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1105e1051a39Sopenharmony_ci	aesd	$dat1,q9
1106e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1107e1051a39Sopenharmony_ci	aesd	$dat2,q9
1108e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1109e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16
1110e1051a39Sopenharmony_ci	b.gt	.Loop3x_ecb_dec
1111e1051a39Sopenharmony_ci
1112e1051a39Sopenharmony_ci	aesd	$dat0,q8
1113e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1114e1051a39Sopenharmony_ci	aesd	$dat1,q8
1115e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1116e1051a39Sopenharmony_ci	aesd	$dat2,q8
1117e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1118e1051a39Sopenharmony_ci	subs	$len,$len,#0x30
1119e1051a39Sopenharmony_ci	mov.lo	x6,$len				// x6, $cnt, is zero at this point
1120e1051a39Sopenharmony_ci	aesd	$dat0,q9
1121e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1122e1051a39Sopenharmony_ci	aesd	$dat1,q9
1123e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1124e1051a39Sopenharmony_ci	aesd	$dat2,q9
1125e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1126e1051a39Sopenharmony_ci	add	$inp,$inp,x6 			// $inp is adjusted in such way that
1127e1051a39Sopenharmony_ci						// at exit from the loop $dat1-$dat2
1128e1051a39Sopenharmony_ci						// are loaded with last "words"
1129e1051a39Sopenharmony_ci	mov	$key_,$key
1130e1051a39Sopenharmony_ci	aesd	$dat0,q12
1131e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1132e1051a39Sopenharmony_ci	aesd	$dat1,q12
1133e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1134e1051a39Sopenharmony_ci	aesd	$dat2,q12
1135e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1136e1051a39Sopenharmony_ci	vld1.8	{$in0},[$inp],#16
1137e1051a39Sopenharmony_ci	aesd	$dat0,q13
1138e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1139e1051a39Sopenharmony_ci	aesd	$dat1,q13
1140e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1141e1051a39Sopenharmony_ci	aesd	$dat2,q13
1142e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1143e1051a39Sopenharmony_ci	vld1.8	{$in1},[$inp],#16
1144e1051a39Sopenharmony_ci	aesd	$dat0,q14
1145e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1146e1051a39Sopenharmony_ci	aesd	$dat1,q14
1147e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1148e1051a39Sopenharmony_ci	aesd	$dat2,q14
1149e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1150e1051a39Sopenharmony_ci	vld1.8	{$in2},[$inp],#16
1151e1051a39Sopenharmony_ci	aesd	$dat0,q15
1152e1051a39Sopenharmony_ci	aesd	$dat1,q15
1153e1051a39Sopenharmony_ci	aesd	$dat2,q15
1154e1051a39Sopenharmony_ci	vld1.32 {q8},[$key_],#16			// re-pre-load rndkey[0]
1155e1051a39Sopenharmony_ci	add	$cnt,$rounds,#2
1156e1051a39Sopenharmony_ci	veor	$tmp0,$rndlast,$dat0
1157e1051a39Sopenharmony_ci	veor	$tmp1,$rndlast,$dat1
1158e1051a39Sopenharmony_ci	veor	$dat2,$dat2,$rndlast
1159e1051a39Sopenharmony_ci	vld1.32 {q9},[$key_],#16			// re-pre-load rndkey[1]
1160e1051a39Sopenharmony_ci	vst1.8	{$tmp0},[$out],#16
1161e1051a39Sopenharmony_ci	vorr	$dat0,$in0,$in0
1162e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
1163e1051a39Sopenharmony_ci	vorr	$dat1,$in1,$in1
1164e1051a39Sopenharmony_ci	vst1.8	{$dat2},[$out],#16
1165e1051a39Sopenharmony_ci	vorr	$dat2,$in2,$in2
1166e1051a39Sopenharmony_ci	b.hs	.Loop3x_ecb_dec
1167e1051a39Sopenharmony_ci
1168e1051a39Sopenharmony_ci	cmn	$len,#0x30
1169e1051a39Sopenharmony_ci	b.eq	.Lecb_done
1170e1051a39Sopenharmony_ci	nop
1171e1051a39Sopenharmony_ci
1172e1051a39Sopenharmony_ci.Lecb_dec_tail:
1173e1051a39Sopenharmony_ci	aesd	$dat1,q8
1174e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1175e1051a39Sopenharmony_ci	aesd	$dat2,q8
1176e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1177e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16
1178e1051a39Sopenharmony_ci	subs	$cnt,$cnt,#2
1179e1051a39Sopenharmony_ci	aesd	$dat1,q9
1180e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1181e1051a39Sopenharmony_ci	aesd	$dat2,q9
1182e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1183e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16
1184e1051a39Sopenharmony_ci	b.gt	.Lecb_dec_tail
1185e1051a39Sopenharmony_ci
1186e1051a39Sopenharmony_ci	aesd	$dat1,q8
1187e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1188e1051a39Sopenharmony_ci	aesd	$dat2,q8
1189e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1190e1051a39Sopenharmony_ci	aesd	$dat1,q9
1191e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1192e1051a39Sopenharmony_ci	aesd	$dat2,q9
1193e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1194e1051a39Sopenharmony_ci	aesd	$dat1,q12
1195e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1196e1051a39Sopenharmony_ci	aesd	$dat2,q12
1197e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1198e1051a39Sopenharmony_ci	cmn	$len,#0x20
1199e1051a39Sopenharmony_ci	aesd	$dat1,q13
1200e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1201e1051a39Sopenharmony_ci	aesd	$dat2,q13
1202e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1203e1051a39Sopenharmony_ci	aesd	$dat1,q14
1204e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1205e1051a39Sopenharmony_ci	aesd	$dat2,q14
1206e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1207e1051a39Sopenharmony_ci	aesd	$dat1,q15
1208e1051a39Sopenharmony_ci	aesd	$dat2,q15
1209e1051a39Sopenharmony_ci	b.eq	.Lecb_dec_one
1210e1051a39Sopenharmony_ci	veor	$tmp1,$rndlast,$dat1
1211e1051a39Sopenharmony_ci	veor	$tmp2,$rndlast,$dat2
1212e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
1213e1051a39Sopenharmony_ci	vst1.8	{$tmp2},[$out],#16
1214e1051a39Sopenharmony_ci	b	.Lecb_done
1215e1051a39Sopenharmony_ci
1216e1051a39Sopenharmony_ci.Lecb_dec_one:
1217e1051a39Sopenharmony_ci	veor	$tmp1,$rndlast,$dat2
1218e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
1219e1051a39Sopenharmony_ci
1220e1051a39Sopenharmony_ci.Lecb_done:
1221e1051a39Sopenharmony_ci___
1222e1051a39Sopenharmony_ci}
1223e1051a39Sopenharmony_ci$code.=<<___	if ($flavour !~ /64/);
1224e1051a39Sopenharmony_ci	vldmia	sp!,{d8-d15}
1225e1051a39Sopenharmony_ci	ldmia	sp!,{r4-r8,pc}
1226e1051a39Sopenharmony_ci___
1227e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
1228e1051a39Sopenharmony_ci	ldr	x29,[sp],#16
1229e1051a39Sopenharmony_ci___
1230e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
1231e1051a39Sopenharmony_ci.Lecb_Final_abort:
1232e1051a39Sopenharmony_ci	ret
1233e1051a39Sopenharmony_ci___
1234e1051a39Sopenharmony_ci$code.=<<___;
1235e1051a39Sopenharmony_ci.size	${prefix}_ecb_encrypt,.-${prefix}_ecb_encrypt
1236e1051a39Sopenharmony_ci___
1237e1051a39Sopenharmony_ci}}}
1238e1051a39Sopenharmony_ci{{{
1239e1051a39Sopenharmony_cimy ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5";
1240e1051a39Sopenharmony_cimy ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12");
1241e1051a39Sopenharmony_cimy ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));
1242e1051a39Sopenharmony_ci
1243e1051a39Sopenharmony_cimy ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
1244e1051a39Sopenharmony_cimy ($key4,$key5,$key6,$key7)=("x6","x12","x14",$key);
1245e1051a39Sopenharmony_ci
1246e1051a39Sopenharmony_ci### q8-q15	preloaded key schedule
1247e1051a39Sopenharmony_ci
1248e1051a39Sopenharmony_ci$code.=<<___;
1249e1051a39Sopenharmony_ci.globl	${prefix}_cbc_encrypt
1250e1051a39Sopenharmony_ci.type	${prefix}_cbc_encrypt,%function
1251e1051a39Sopenharmony_ci.align	5
1252e1051a39Sopenharmony_ci${prefix}_cbc_encrypt:
1253e1051a39Sopenharmony_ci___
1254e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
1255e1051a39Sopenharmony_ci	stp	x29,x30,[sp,#-16]!
1256e1051a39Sopenharmony_ci	add	x29,sp,#0
1257e1051a39Sopenharmony_ci___
1258e1051a39Sopenharmony_ci$code.=<<___	if ($flavour !~ /64/);
1259e1051a39Sopenharmony_ci	mov	ip,sp
1260e1051a39Sopenharmony_ci	stmdb	sp!,{r4-r8,lr}
1261e1051a39Sopenharmony_ci	vstmdb	sp!,{d8-d15}            @ ABI specification says so
1262e1051a39Sopenharmony_ci	ldmia	ip,{r4-r5}		@ load remaining args
1263e1051a39Sopenharmony_ci___
1264e1051a39Sopenharmony_ci$code.=<<___;
1265e1051a39Sopenharmony_ci	subs	$len,$len,#16
1266e1051a39Sopenharmony_ci	mov	$step,#16
1267e1051a39Sopenharmony_ci	b.lo	.Lcbc_abort
1268e1051a39Sopenharmony_ci	cclr	$step,eq
1269e1051a39Sopenharmony_ci
1270e1051a39Sopenharmony_ci	cmp	$enc,#0			// en- or decrypting?
1271e1051a39Sopenharmony_ci	ldr	$rounds,[$key,#240]
1272e1051a39Sopenharmony_ci	and	$len,$len,#-16
1273e1051a39Sopenharmony_ci	vld1.8	{$ivec},[$ivp]
1274e1051a39Sopenharmony_ci	vld1.8	{$dat},[$inp],$step
1275e1051a39Sopenharmony_ci
1276e1051a39Sopenharmony_ci	vld1.32	{q8-q9},[$key]		// load key schedule...
1277e1051a39Sopenharmony_ci	sub	$rounds,$rounds,#6
1278e1051a39Sopenharmony_ci	add	$key_,$key,x5,lsl#4	// pointer to last 7 round keys
1279e1051a39Sopenharmony_ci	sub	$rounds,$rounds,#2
1280e1051a39Sopenharmony_ci	vld1.32	{q10-q11},[$key_],#32
1281e1051a39Sopenharmony_ci	vld1.32	{q12-q13},[$key_],#32
1282e1051a39Sopenharmony_ci	vld1.32	{q14-q15},[$key_],#32
1283e1051a39Sopenharmony_ci	vld1.32	{$rndlast},[$key_]
1284e1051a39Sopenharmony_ci
1285e1051a39Sopenharmony_ci	add	$key_,$key,#32
1286e1051a39Sopenharmony_ci	mov	$cnt,$rounds
1287e1051a39Sopenharmony_ci	b.eq	.Lcbc_dec
1288e1051a39Sopenharmony_ci
1289e1051a39Sopenharmony_ci	cmp	$rounds,#2
1290e1051a39Sopenharmony_ci	veor	$dat,$dat,$ivec
1291e1051a39Sopenharmony_ci	veor	$rndzero_n_last,q8,$rndlast
1292e1051a39Sopenharmony_ci	b.eq	.Lcbc_enc128
1293e1051a39Sopenharmony_ci
1294e1051a39Sopenharmony_ci	vld1.32	{$in0-$in1},[$key_]
1295e1051a39Sopenharmony_ci	add	$key_,$key,#16
1296e1051a39Sopenharmony_ci	add	$key4,$key,#16*4
1297e1051a39Sopenharmony_ci	add	$key5,$key,#16*5
1298e1051a39Sopenharmony_ci	aese	$dat,q8
1299e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1300e1051a39Sopenharmony_ci	add	$key6,$key,#16*6
1301e1051a39Sopenharmony_ci	add	$key7,$key,#16*7
1302e1051a39Sopenharmony_ci	b	.Lenter_cbc_enc
1303e1051a39Sopenharmony_ci
1304e1051a39Sopenharmony_ci.align	4
1305e1051a39Sopenharmony_ci.Loop_cbc_enc:
1306e1051a39Sopenharmony_ci	aese	$dat,q8
1307e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1308e1051a39Sopenharmony_ci	 vst1.8	{$ivec},[$out],#16
1309e1051a39Sopenharmony_ci.Lenter_cbc_enc:
1310e1051a39Sopenharmony_ci	aese	$dat,q9
1311e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1312e1051a39Sopenharmony_ci	aese	$dat,$in0
1313e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1314e1051a39Sopenharmony_ci	vld1.32	{q8},[$key4]
1315e1051a39Sopenharmony_ci	cmp	$rounds,#4
1316e1051a39Sopenharmony_ci	aese	$dat,$in1
1317e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1318e1051a39Sopenharmony_ci	vld1.32	{q9},[$key5]
1319e1051a39Sopenharmony_ci	b.eq	.Lcbc_enc192
1320e1051a39Sopenharmony_ci
1321e1051a39Sopenharmony_ci	aese	$dat,q8
1322e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1323e1051a39Sopenharmony_ci	vld1.32	{q8},[$key6]
1324e1051a39Sopenharmony_ci	aese	$dat,q9
1325e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1326e1051a39Sopenharmony_ci	vld1.32	{q9},[$key7]
1327e1051a39Sopenharmony_ci	nop
1328e1051a39Sopenharmony_ci
1329e1051a39Sopenharmony_ci.Lcbc_enc192:
1330e1051a39Sopenharmony_ci	aese	$dat,q8
1331e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1332e1051a39Sopenharmony_ci	 subs	$len,$len,#16
1333e1051a39Sopenharmony_ci	aese	$dat,q9
1334e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1335e1051a39Sopenharmony_ci	 cclr	$step,eq
1336e1051a39Sopenharmony_ci	aese	$dat,q10
1337e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1338e1051a39Sopenharmony_ci	aese	$dat,q11
1339e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1340e1051a39Sopenharmony_ci	 vld1.8	{q8},[$inp],$step
1341e1051a39Sopenharmony_ci	aese	$dat,q12
1342e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1343e1051a39Sopenharmony_ci	 veor	q8,q8,$rndzero_n_last
1344e1051a39Sopenharmony_ci	aese	$dat,q13
1345e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1346e1051a39Sopenharmony_ci	 vld1.32 {q9},[$key_]		// re-pre-load rndkey[1]
1347e1051a39Sopenharmony_ci	aese	$dat,q14
1348e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1349e1051a39Sopenharmony_ci	aese	$dat,q15
1350e1051a39Sopenharmony_ci	veor	$ivec,$dat,$rndlast
1351e1051a39Sopenharmony_ci	b.hs	.Loop_cbc_enc
1352e1051a39Sopenharmony_ci
1353e1051a39Sopenharmony_ci	vst1.8	{$ivec},[$out],#16
1354e1051a39Sopenharmony_ci	b	.Lcbc_done
1355e1051a39Sopenharmony_ci
1356e1051a39Sopenharmony_ci.align	5
1357e1051a39Sopenharmony_ci.Lcbc_enc128:
1358e1051a39Sopenharmony_ci	vld1.32	{$in0-$in1},[$key_]
1359e1051a39Sopenharmony_ci	aese	$dat,q8
1360e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1361e1051a39Sopenharmony_ci	b	.Lenter_cbc_enc128
1362e1051a39Sopenharmony_ci.Loop_cbc_enc128:
1363e1051a39Sopenharmony_ci	aese	$dat,q8
1364e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1365e1051a39Sopenharmony_ci	 vst1.8	{$ivec},[$out],#16
1366e1051a39Sopenharmony_ci.Lenter_cbc_enc128:
1367e1051a39Sopenharmony_ci	aese	$dat,q9
1368e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1369e1051a39Sopenharmony_ci	 subs	$len,$len,#16
1370e1051a39Sopenharmony_ci	aese	$dat,$in0
1371e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1372e1051a39Sopenharmony_ci	 cclr	$step,eq
1373e1051a39Sopenharmony_ci	aese	$dat,$in1
1374e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1375e1051a39Sopenharmony_ci	aese	$dat,q10
1376e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1377e1051a39Sopenharmony_ci	aese	$dat,q11
1378e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1379e1051a39Sopenharmony_ci	 vld1.8	{q8},[$inp],$step
1380e1051a39Sopenharmony_ci	aese	$dat,q12
1381e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1382e1051a39Sopenharmony_ci	aese	$dat,q13
1383e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1384e1051a39Sopenharmony_ci	aese	$dat,q14
1385e1051a39Sopenharmony_ci	aesmc	$dat,$dat
1386e1051a39Sopenharmony_ci	 veor	q8,q8,$rndzero_n_last
1387e1051a39Sopenharmony_ci	aese	$dat,q15
1388e1051a39Sopenharmony_ci	veor	$ivec,$dat,$rndlast
1389e1051a39Sopenharmony_ci	b.hs	.Loop_cbc_enc128
1390e1051a39Sopenharmony_ci
1391e1051a39Sopenharmony_ci	vst1.8	{$ivec},[$out],#16
1392e1051a39Sopenharmony_ci	b	.Lcbc_done
1393e1051a39Sopenharmony_ci___
1394e1051a39Sopenharmony_ci{
1395e1051a39Sopenharmony_cimy ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
1396e1051a39Sopenharmony_ci
1397e1051a39Sopenharmony_cimy ($dat3,$in3,$tmp3);	# used only in 64-bit mode
1398e1051a39Sopenharmony_cimy ($dat4,$in4,$tmp4);
1399e1051a39Sopenharmony_ciif ($flavour =~ /64/) {
1400e1051a39Sopenharmony_ci    ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23));
1401e1051a39Sopenharmony_ci}
1402e1051a39Sopenharmony_ci
1403e1051a39Sopenharmony_ci$code.=<<___;
1404e1051a39Sopenharmony_ci.align	5
1405e1051a39Sopenharmony_ci.Lcbc_dec:
1406e1051a39Sopenharmony_ci	vld1.8	{$dat2},[$inp],#16
1407e1051a39Sopenharmony_ci	subs	$len,$len,#32		// bias
1408e1051a39Sopenharmony_ci	add	$cnt,$rounds,#2
1409e1051a39Sopenharmony_ci	vorr	$in1,$dat,$dat
1410e1051a39Sopenharmony_ci	vorr	$dat1,$dat,$dat
1411e1051a39Sopenharmony_ci	vorr	$in2,$dat2,$dat2
1412e1051a39Sopenharmony_ci	b.lo	.Lcbc_dec_tail
1413e1051a39Sopenharmony_ci
1414e1051a39Sopenharmony_ci	vorr	$dat1,$dat2,$dat2
1415e1051a39Sopenharmony_ci	vld1.8	{$dat2},[$inp],#16
1416e1051a39Sopenharmony_ci	vorr	$in0,$dat,$dat
1417e1051a39Sopenharmony_ci	vorr	$in1,$dat1,$dat1
1418e1051a39Sopenharmony_ci	vorr	$in2,$dat2,$dat2
1419e1051a39Sopenharmony_ci___
1420e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
1421e1051a39Sopenharmony_ci	cmp	$len,#32
1422e1051a39Sopenharmony_ci	b.lo	.Loop3x_cbc_dec
1423e1051a39Sopenharmony_ci
1424e1051a39Sopenharmony_ci	vld1.8	{$dat3},[$inp],#16
1425e1051a39Sopenharmony_ci	vld1.8	{$dat4},[$inp],#16
1426e1051a39Sopenharmony_ci	sub	$len,$len,#32		// bias
1427e1051a39Sopenharmony_ci	mov	$cnt,$rounds
1428e1051a39Sopenharmony_ci	vorr	$in3,$dat3,$dat3
1429e1051a39Sopenharmony_ci	vorr	$in4,$dat4,$dat4
1430e1051a39Sopenharmony_ci
1431e1051a39Sopenharmony_ci.Loop5x_cbc_dec:
1432e1051a39Sopenharmony_ci	aesd	$dat0,q8
1433e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1434e1051a39Sopenharmony_ci	aesd	$dat1,q8
1435e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1436e1051a39Sopenharmony_ci	aesd	$dat2,q8
1437e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1438e1051a39Sopenharmony_ci	aesd	$dat3,q8
1439e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
1440e1051a39Sopenharmony_ci	aesd	$dat4,q8
1441e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
1442e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16
1443e1051a39Sopenharmony_ci	subs	$cnt,$cnt,#2
1444e1051a39Sopenharmony_ci	aesd	$dat0,q9
1445e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1446e1051a39Sopenharmony_ci	aesd	$dat1,q9
1447e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1448e1051a39Sopenharmony_ci	aesd	$dat2,q9
1449e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1450e1051a39Sopenharmony_ci	aesd	$dat3,q9
1451e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
1452e1051a39Sopenharmony_ci	aesd	$dat4,q9
1453e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
1454e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16
1455e1051a39Sopenharmony_ci	b.gt	.Loop5x_cbc_dec
1456e1051a39Sopenharmony_ci
1457e1051a39Sopenharmony_ci	aesd	$dat0,q8
1458e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1459e1051a39Sopenharmony_ci	aesd	$dat1,q8
1460e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1461e1051a39Sopenharmony_ci	aesd	$dat2,q8
1462e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1463e1051a39Sopenharmony_ci	aesd	$dat3,q8
1464e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
1465e1051a39Sopenharmony_ci	aesd	$dat4,q8
1466e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
1467e1051a39Sopenharmony_ci	 cmp	$len,#0x40		// because .Lcbc_tail4x
1468e1051a39Sopenharmony_ci	 sub	$len,$len,#0x50
1469e1051a39Sopenharmony_ci
1470e1051a39Sopenharmony_ci	aesd	$dat0,q9
1471e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1472e1051a39Sopenharmony_ci	aesd	$dat1,q9
1473e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1474e1051a39Sopenharmony_ci	aesd	$dat2,q9
1475e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1476e1051a39Sopenharmony_ci	aesd	$dat3,q9
1477e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
1478e1051a39Sopenharmony_ci	aesd	$dat4,q9
1479e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
1480e1051a39Sopenharmony_ci	 csel	x6,xzr,$len,gt		// borrow x6, $cnt, "gt" is not typo
1481e1051a39Sopenharmony_ci	 mov	$key_,$key
1482e1051a39Sopenharmony_ci
1483e1051a39Sopenharmony_ci	aesd	$dat0,q10
1484e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1485e1051a39Sopenharmony_ci	aesd	$dat1,q10
1486e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1487e1051a39Sopenharmony_ci	aesd	$dat2,q10
1488e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1489e1051a39Sopenharmony_ci	aesd	$dat3,q10
1490e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
1491e1051a39Sopenharmony_ci	aesd	$dat4,q10
1492e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
1493e1051a39Sopenharmony_ci	 add	$inp,$inp,x6		// $inp is adjusted in such way that
1494e1051a39Sopenharmony_ci					// at exit from the loop $dat1-$dat4
1495e1051a39Sopenharmony_ci					// are loaded with last "words"
1496e1051a39Sopenharmony_ci	 add	x6,$len,#0x60		// because .Lcbc_tail4x
1497e1051a39Sopenharmony_ci
1498e1051a39Sopenharmony_ci	aesd	$dat0,q11
1499e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1500e1051a39Sopenharmony_ci	aesd	$dat1,q11
1501e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1502e1051a39Sopenharmony_ci	aesd	$dat2,q11
1503e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1504e1051a39Sopenharmony_ci	aesd	$dat3,q11
1505e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
1506e1051a39Sopenharmony_ci	aesd	$dat4,q11
1507e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
1508e1051a39Sopenharmony_ci
1509e1051a39Sopenharmony_ci	aesd	$dat0,q12
1510e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1511e1051a39Sopenharmony_ci	aesd	$dat1,q12
1512e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1513e1051a39Sopenharmony_ci	aesd	$dat2,q12
1514e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1515e1051a39Sopenharmony_ci	aesd	$dat3,q12
1516e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
1517e1051a39Sopenharmony_ci	aesd	$dat4,q12
1518e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
1519e1051a39Sopenharmony_ci
1520e1051a39Sopenharmony_ci	aesd	$dat0,q13
1521e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1522e1051a39Sopenharmony_ci	aesd	$dat1,q13
1523e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1524e1051a39Sopenharmony_ci	aesd	$dat2,q13
1525e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1526e1051a39Sopenharmony_ci	aesd	$dat3,q13
1527e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
1528e1051a39Sopenharmony_ci	aesd	$dat4,q13
1529e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
1530e1051a39Sopenharmony_ci
1531e1051a39Sopenharmony_ci	aesd	$dat0,q14
1532e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1533e1051a39Sopenharmony_ci	aesd	$dat1,q14
1534e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1535e1051a39Sopenharmony_ci	aesd	$dat2,q14
1536e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1537e1051a39Sopenharmony_ci	aesd	$dat3,q14
1538e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
1539e1051a39Sopenharmony_ci	aesd	$dat4,q14
1540e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
1541e1051a39Sopenharmony_ci
1542e1051a39Sopenharmony_ci	 veor	$tmp0,$ivec,$rndlast
1543e1051a39Sopenharmony_ci	aesd	$dat0,q15
1544e1051a39Sopenharmony_ci	 veor	$tmp1,$in0,$rndlast
1545e1051a39Sopenharmony_ci	 vld1.8	{$in0},[$inp],#16
1546e1051a39Sopenharmony_ci	aesd	$dat1,q15
1547e1051a39Sopenharmony_ci	 veor	$tmp2,$in1,$rndlast
1548e1051a39Sopenharmony_ci	 vld1.8	{$in1},[$inp],#16
1549e1051a39Sopenharmony_ci	aesd	$dat2,q15
1550e1051a39Sopenharmony_ci	 veor	$tmp3,$in2,$rndlast
1551e1051a39Sopenharmony_ci	 vld1.8	{$in2},[$inp],#16
1552e1051a39Sopenharmony_ci	aesd	$dat3,q15
1553e1051a39Sopenharmony_ci	 veor	$tmp4,$in3,$rndlast
1554e1051a39Sopenharmony_ci	 vld1.8	{$in3},[$inp],#16
1555e1051a39Sopenharmony_ci	aesd	$dat4,q15
1556e1051a39Sopenharmony_ci	 vorr	$ivec,$in4,$in4
1557e1051a39Sopenharmony_ci	 vld1.8	{$in4},[$inp],#16
1558e1051a39Sopenharmony_ci	cbz	x6,.Lcbc_tail4x
1559e1051a39Sopenharmony_ci	 vld1.32 {q8},[$key_],#16	// re-pre-load rndkey[0]
1560e1051a39Sopenharmony_ci	veor	$tmp0,$tmp0,$dat0
1561e1051a39Sopenharmony_ci	 vorr	$dat0,$in0,$in0
1562e1051a39Sopenharmony_ci	veor	$tmp1,$tmp1,$dat1
1563e1051a39Sopenharmony_ci	 vorr	$dat1,$in1,$in1
1564e1051a39Sopenharmony_ci	veor	$tmp2,$tmp2,$dat2
1565e1051a39Sopenharmony_ci	 vorr	$dat2,$in2,$in2
1566e1051a39Sopenharmony_ci	veor	$tmp3,$tmp3,$dat3
1567e1051a39Sopenharmony_ci	 vorr	$dat3,$in3,$in3
1568e1051a39Sopenharmony_ci	veor	$tmp4,$tmp4,$dat4
1569e1051a39Sopenharmony_ci	vst1.8	{$tmp0},[$out],#16
1570e1051a39Sopenharmony_ci	 vorr	$dat4,$in4,$in4
1571e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
1572e1051a39Sopenharmony_ci	 mov	$cnt,$rounds
1573e1051a39Sopenharmony_ci	vst1.8	{$tmp2},[$out],#16
1574e1051a39Sopenharmony_ci	 vld1.32 {q9},[$key_],#16	// re-pre-load rndkey[1]
1575e1051a39Sopenharmony_ci	vst1.8	{$tmp3},[$out],#16
1576e1051a39Sopenharmony_ci	vst1.8	{$tmp4},[$out],#16
1577e1051a39Sopenharmony_ci	b.hs	.Loop5x_cbc_dec
1578e1051a39Sopenharmony_ci
1579e1051a39Sopenharmony_ci	add	$len,$len,#0x50
1580e1051a39Sopenharmony_ci	cbz	$len,.Lcbc_done
1581e1051a39Sopenharmony_ci
1582e1051a39Sopenharmony_ci	add	$cnt,$rounds,#2
1583e1051a39Sopenharmony_ci	subs	$len,$len,#0x30
1584e1051a39Sopenharmony_ci	vorr	$dat0,$in2,$in2
1585e1051a39Sopenharmony_ci	vorr	$in0,$in2,$in2
1586e1051a39Sopenharmony_ci	vorr	$dat1,$in3,$in3
1587e1051a39Sopenharmony_ci	vorr	$in1,$in3,$in3
1588e1051a39Sopenharmony_ci	vorr	$dat2,$in4,$in4
1589e1051a39Sopenharmony_ci	vorr	$in2,$in4,$in4
1590e1051a39Sopenharmony_ci	b.lo	.Lcbc_dec_tail
1591e1051a39Sopenharmony_ci
1592e1051a39Sopenharmony_ci	b	.Loop3x_cbc_dec
1593e1051a39Sopenharmony_ci
1594e1051a39Sopenharmony_ci.align	4
1595e1051a39Sopenharmony_ci.Lcbc_tail4x:
1596e1051a39Sopenharmony_ci	veor	$tmp1,$tmp0,$dat1
1597e1051a39Sopenharmony_ci	veor	$tmp2,$tmp2,$dat2
1598e1051a39Sopenharmony_ci	veor	$tmp3,$tmp3,$dat3
1599e1051a39Sopenharmony_ci	veor	$tmp4,$tmp4,$dat4
1600e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
1601e1051a39Sopenharmony_ci	vst1.8	{$tmp2},[$out],#16
1602e1051a39Sopenharmony_ci	vst1.8	{$tmp3},[$out],#16
1603e1051a39Sopenharmony_ci	vst1.8	{$tmp4},[$out],#16
1604e1051a39Sopenharmony_ci
1605e1051a39Sopenharmony_ci	b	.Lcbc_done
1606e1051a39Sopenharmony_ci.align	4
1607e1051a39Sopenharmony_ci___
1608e1051a39Sopenharmony_ci$code.=<<___;
1609e1051a39Sopenharmony_ci.Loop3x_cbc_dec:
1610e1051a39Sopenharmony_ci	aesd	$dat0,q8
1611e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1612e1051a39Sopenharmony_ci	aesd	$dat1,q8
1613e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1614e1051a39Sopenharmony_ci	aesd	$dat2,q8
1615e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1616e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16
1617e1051a39Sopenharmony_ci	subs	$cnt,$cnt,#2
1618e1051a39Sopenharmony_ci	aesd	$dat0,q9
1619e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1620e1051a39Sopenharmony_ci	aesd	$dat1,q9
1621e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1622e1051a39Sopenharmony_ci	aesd	$dat2,q9
1623e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1624e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16
1625e1051a39Sopenharmony_ci	b.gt	.Loop3x_cbc_dec
1626e1051a39Sopenharmony_ci
1627e1051a39Sopenharmony_ci	aesd	$dat0,q8
1628e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1629e1051a39Sopenharmony_ci	aesd	$dat1,q8
1630e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1631e1051a39Sopenharmony_ci	aesd	$dat2,q8
1632e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1633e1051a39Sopenharmony_ci	 veor	$tmp0,$ivec,$rndlast
1634e1051a39Sopenharmony_ci	 subs	$len,$len,#0x30
1635e1051a39Sopenharmony_ci	 veor	$tmp1,$in0,$rndlast
1636e1051a39Sopenharmony_ci	 mov.lo	x6,$len			// x6, $cnt, is zero at this point
1637e1051a39Sopenharmony_ci	aesd	$dat0,q9
1638e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1639e1051a39Sopenharmony_ci	aesd	$dat1,q9
1640e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1641e1051a39Sopenharmony_ci	aesd	$dat2,q9
1642e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1643e1051a39Sopenharmony_ci	 veor	$tmp2,$in1,$rndlast
1644e1051a39Sopenharmony_ci	 add	$inp,$inp,x6		// $inp is adjusted in such way that
1645e1051a39Sopenharmony_ci					// at exit from the loop $dat1-$dat2
1646e1051a39Sopenharmony_ci					// are loaded with last "words"
1647e1051a39Sopenharmony_ci	 vorr	$ivec,$in2,$in2
1648e1051a39Sopenharmony_ci	 mov	$key_,$key
1649e1051a39Sopenharmony_ci	aesd	$dat0,q12
1650e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1651e1051a39Sopenharmony_ci	aesd	$dat1,q12
1652e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1653e1051a39Sopenharmony_ci	aesd	$dat2,q12
1654e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1655e1051a39Sopenharmony_ci	 vld1.8	{$in0},[$inp],#16
1656e1051a39Sopenharmony_ci	aesd	$dat0,q13
1657e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1658e1051a39Sopenharmony_ci	aesd	$dat1,q13
1659e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1660e1051a39Sopenharmony_ci	aesd	$dat2,q13
1661e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1662e1051a39Sopenharmony_ci	 vld1.8	{$in1},[$inp],#16
1663e1051a39Sopenharmony_ci	aesd	$dat0,q14
1664e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
1665e1051a39Sopenharmony_ci	aesd	$dat1,q14
1666e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1667e1051a39Sopenharmony_ci	aesd	$dat2,q14
1668e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1669e1051a39Sopenharmony_ci	 vld1.8	{$in2},[$inp],#16
1670e1051a39Sopenharmony_ci	aesd	$dat0,q15
1671e1051a39Sopenharmony_ci	aesd	$dat1,q15
1672e1051a39Sopenharmony_ci	aesd	$dat2,q15
1673e1051a39Sopenharmony_ci	 vld1.32 {q8},[$key_],#16	// re-pre-load rndkey[0]
1674e1051a39Sopenharmony_ci	 add	$cnt,$rounds,#2
1675e1051a39Sopenharmony_ci	veor	$tmp0,$tmp0,$dat0
1676e1051a39Sopenharmony_ci	veor	$tmp1,$tmp1,$dat1
1677e1051a39Sopenharmony_ci	veor	$dat2,$dat2,$tmp2
1678e1051a39Sopenharmony_ci	 vld1.32 {q9},[$key_],#16	// re-pre-load rndkey[1]
1679e1051a39Sopenharmony_ci	vst1.8	{$tmp0},[$out],#16
1680e1051a39Sopenharmony_ci	 vorr	$dat0,$in0,$in0
1681e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
1682e1051a39Sopenharmony_ci	 vorr	$dat1,$in1,$in1
1683e1051a39Sopenharmony_ci	vst1.8	{$dat2},[$out],#16
1684e1051a39Sopenharmony_ci	 vorr	$dat2,$in2,$in2
1685e1051a39Sopenharmony_ci	b.hs	.Loop3x_cbc_dec
1686e1051a39Sopenharmony_ci
1687e1051a39Sopenharmony_ci	cmn	$len,#0x30
1688e1051a39Sopenharmony_ci	b.eq	.Lcbc_done
1689e1051a39Sopenharmony_ci	nop
1690e1051a39Sopenharmony_ci
1691e1051a39Sopenharmony_ci.Lcbc_dec_tail:
1692e1051a39Sopenharmony_ci	aesd	$dat1,q8
1693e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1694e1051a39Sopenharmony_ci	aesd	$dat2,q8
1695e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1696e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16
1697e1051a39Sopenharmony_ci	subs	$cnt,$cnt,#2
1698e1051a39Sopenharmony_ci	aesd	$dat1,q9
1699e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1700e1051a39Sopenharmony_ci	aesd	$dat2,q9
1701e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1702e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16
1703e1051a39Sopenharmony_ci	b.gt	.Lcbc_dec_tail
1704e1051a39Sopenharmony_ci
1705e1051a39Sopenharmony_ci	aesd	$dat1,q8
1706e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1707e1051a39Sopenharmony_ci	aesd	$dat2,q8
1708e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1709e1051a39Sopenharmony_ci	aesd	$dat1,q9
1710e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1711e1051a39Sopenharmony_ci	aesd	$dat2,q9
1712e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1713e1051a39Sopenharmony_ci	aesd	$dat1,q12
1714e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1715e1051a39Sopenharmony_ci	aesd	$dat2,q12
1716e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1717e1051a39Sopenharmony_ci	 cmn	$len,#0x20
1718e1051a39Sopenharmony_ci	aesd	$dat1,q13
1719e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1720e1051a39Sopenharmony_ci	aesd	$dat2,q13
1721e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1722e1051a39Sopenharmony_ci	 veor	$tmp1,$ivec,$rndlast
1723e1051a39Sopenharmony_ci	aesd	$dat1,q14
1724e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
1725e1051a39Sopenharmony_ci	aesd	$dat2,q14
1726e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
1727e1051a39Sopenharmony_ci	 veor	$tmp2,$in1,$rndlast
1728e1051a39Sopenharmony_ci	aesd	$dat1,q15
1729e1051a39Sopenharmony_ci	aesd	$dat2,q15
1730e1051a39Sopenharmony_ci	b.eq	.Lcbc_dec_one
1731e1051a39Sopenharmony_ci	veor	$tmp1,$tmp1,$dat1
1732e1051a39Sopenharmony_ci	veor	$tmp2,$tmp2,$dat2
1733e1051a39Sopenharmony_ci	 vorr	$ivec,$in2,$in2
1734e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
1735e1051a39Sopenharmony_ci	vst1.8	{$tmp2},[$out],#16
1736e1051a39Sopenharmony_ci	b	.Lcbc_done
1737e1051a39Sopenharmony_ci
1738e1051a39Sopenharmony_ci.Lcbc_dec_one:
1739e1051a39Sopenharmony_ci	veor	$tmp1,$tmp1,$dat2
1740e1051a39Sopenharmony_ci	 vorr	$ivec,$in2,$in2
1741e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
1742e1051a39Sopenharmony_ci
1743e1051a39Sopenharmony_ci.Lcbc_done:
1744e1051a39Sopenharmony_ci	vst1.8	{$ivec},[$ivp]
1745e1051a39Sopenharmony_ci.Lcbc_abort:
1746e1051a39Sopenharmony_ci___
1747e1051a39Sopenharmony_ci}
1748e1051a39Sopenharmony_ci$code.=<<___	if ($flavour !~ /64/);
1749e1051a39Sopenharmony_ci	vldmia	sp!,{d8-d15}
1750e1051a39Sopenharmony_ci	ldmia	sp!,{r4-r8,pc}
1751e1051a39Sopenharmony_ci___
1752e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
1753e1051a39Sopenharmony_ci	ldr	x29,[sp],#16
1754e1051a39Sopenharmony_ci	ret
1755e1051a39Sopenharmony_ci___
1756e1051a39Sopenharmony_ci$code.=<<___;
1757e1051a39Sopenharmony_ci.size	${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt
1758e1051a39Sopenharmony_ci___
1759e1051a39Sopenharmony_ci}}}
1760e1051a39Sopenharmony_ci{{{
1761e1051a39Sopenharmony_cimy ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4));
1762e1051a39Sopenharmony_cimy ($rounds,$cnt,$key_)=("w5","w6","x7");
1763e1051a39Sopenharmony_cimy ($ctr,$tctr0,$tctr1,$tctr2)=map("w$_",(8..10,12));
1764e1051a39Sopenharmony_cimy $step="x12";		# aliases with $tctr2
1765e1051a39Sopenharmony_ci
1766e1051a39Sopenharmony_cimy ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));
1767e1051a39Sopenharmony_cimy ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
1768e1051a39Sopenharmony_ci
1769e1051a39Sopenharmony_ci# used only in 64-bit mode...
1770e1051a39Sopenharmony_cimy ($dat3,$dat4,$in3,$in4)=map("q$_",(16..23));
1771e1051a39Sopenharmony_ci
1772e1051a39Sopenharmony_cimy ($dat,$tmp)=($dat0,$tmp0);
1773e1051a39Sopenharmony_ci
1774e1051a39Sopenharmony_ci### q8-q15	preloaded key schedule
1775e1051a39Sopenharmony_ci
1776e1051a39Sopenharmony_ci$code.=<<___;
1777e1051a39Sopenharmony_ci.globl	${prefix}_ctr32_encrypt_blocks
1778e1051a39Sopenharmony_ci.type	${prefix}_ctr32_encrypt_blocks,%function
1779e1051a39Sopenharmony_ci.align	5
1780e1051a39Sopenharmony_ci${prefix}_ctr32_encrypt_blocks:
1781e1051a39Sopenharmony_ci___
1782e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
1783e1051a39Sopenharmony_ci	stp		x29,x30,[sp,#-16]!
1784e1051a39Sopenharmony_ci	add		x29,sp,#0
1785e1051a39Sopenharmony_ci___
1786e1051a39Sopenharmony_ci$code.=<<___	if ($flavour !~ /64/);
1787e1051a39Sopenharmony_ci	mov		ip,sp
1788e1051a39Sopenharmony_ci	stmdb		sp!,{r4-r10,lr}
1789e1051a39Sopenharmony_ci	vstmdb		sp!,{d8-d15}            @ ABI specification says so
1790e1051a39Sopenharmony_ci	ldr		r4, [ip]		@ load remaining arg
1791e1051a39Sopenharmony_ci___
1792e1051a39Sopenharmony_ci$code.=<<___;
1793e1051a39Sopenharmony_ci	ldr		$rounds,[$key,#240]
1794e1051a39Sopenharmony_ci
1795e1051a39Sopenharmony_ci	ldr		$ctr, [$ivp, #12]
1796e1051a39Sopenharmony_ci#ifdef __ARMEB__
1797e1051a39Sopenharmony_ci	vld1.8		{$dat0},[$ivp]
1798e1051a39Sopenharmony_ci#else
1799e1051a39Sopenharmony_ci	vld1.32		{$dat0},[$ivp]
1800e1051a39Sopenharmony_ci#endif
1801e1051a39Sopenharmony_ci	vld1.32		{q8-q9},[$key]		// load key schedule...
1802e1051a39Sopenharmony_ci	sub		$rounds,$rounds,#4
1803e1051a39Sopenharmony_ci	mov		$step,#16
1804e1051a39Sopenharmony_ci	cmp		$len,#2
1805e1051a39Sopenharmony_ci	add		$key_,$key,x5,lsl#4	// pointer to last 5 round keys
1806e1051a39Sopenharmony_ci	sub		$rounds,$rounds,#2
1807e1051a39Sopenharmony_ci	vld1.32		{q12-q13},[$key_],#32
1808e1051a39Sopenharmony_ci	vld1.32		{q14-q15},[$key_],#32
1809e1051a39Sopenharmony_ci	vld1.32		{$rndlast},[$key_]
1810e1051a39Sopenharmony_ci	add		$key_,$key,#32
1811e1051a39Sopenharmony_ci	mov		$cnt,$rounds
1812e1051a39Sopenharmony_ci	cclr		$step,lo
1813e1051a39Sopenharmony_ci#ifndef __ARMEB__
1814e1051a39Sopenharmony_ci	rev		$ctr, $ctr
1815e1051a39Sopenharmony_ci#endif
1816e1051a39Sopenharmony_ci___
1817e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
1818e1051a39Sopenharmony_ci	vorr		$dat1,$dat0,$dat0
1819e1051a39Sopenharmony_ci	add		$tctr1, $ctr, #1
1820e1051a39Sopenharmony_ci	vorr		$dat2,$dat0,$dat0
1821e1051a39Sopenharmony_ci	add		$ctr, $ctr, #2
1822e1051a39Sopenharmony_ci	vorr		$ivec,$dat0,$dat0
1823e1051a39Sopenharmony_ci	rev		$tctr1, $tctr1
1824e1051a39Sopenharmony_ci	vmov.32		${dat1}[3],$tctr1
1825e1051a39Sopenharmony_ci	b.ls		.Lctr32_tail
1826e1051a39Sopenharmony_ci	rev		$tctr2, $ctr
1827e1051a39Sopenharmony_ci	sub		$len,$len,#3		// bias
1828e1051a39Sopenharmony_ci	vmov.32		${dat2}[3],$tctr2
1829e1051a39Sopenharmony_ci___
1830e1051a39Sopenharmony_ci$code.=<<___	if ($flavour !~ /64/);
1831e1051a39Sopenharmony_ci	add		$tctr1, $ctr, #1
1832e1051a39Sopenharmony_ci	vorr		$ivec,$dat0,$dat0
1833e1051a39Sopenharmony_ci	rev		$tctr1, $tctr1
1834e1051a39Sopenharmony_ci	vmov.32		${ivec}[3],$tctr1
1835e1051a39Sopenharmony_ci	add		$ctr, $ctr, #2
1836e1051a39Sopenharmony_ci	vorr		$dat1,$ivec,$ivec
1837e1051a39Sopenharmony_ci	b.ls		.Lctr32_tail
1838e1051a39Sopenharmony_ci	rev		$tctr2, $ctr
1839e1051a39Sopenharmony_ci	vmov.32		${ivec}[3],$tctr2
1840e1051a39Sopenharmony_ci	sub		$len,$len,#3		// bias
1841e1051a39Sopenharmony_ci	vorr		$dat2,$ivec,$ivec
1842e1051a39Sopenharmony_ci___
1843e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
1844e1051a39Sopenharmony_ci	cmp		$len,#32
1845e1051a39Sopenharmony_ci	b.lo		.Loop3x_ctr32
1846e1051a39Sopenharmony_ci
1847e1051a39Sopenharmony_ci	add		w13,$ctr,#1
1848e1051a39Sopenharmony_ci	add		w14,$ctr,#2
1849e1051a39Sopenharmony_ci	vorr		$dat3,$dat0,$dat0
1850e1051a39Sopenharmony_ci	rev		w13,w13
1851e1051a39Sopenharmony_ci	vorr		$dat4,$dat0,$dat0
1852e1051a39Sopenharmony_ci	rev		w14,w14
1853e1051a39Sopenharmony_ci	vmov.32		${dat3}[3],w13
1854e1051a39Sopenharmony_ci	sub		$len,$len,#2		// bias
1855e1051a39Sopenharmony_ci	vmov.32		${dat4}[3],w14
1856e1051a39Sopenharmony_ci	add		$ctr,$ctr,#2
1857e1051a39Sopenharmony_ci	b		.Loop5x_ctr32
1858e1051a39Sopenharmony_ci
1859e1051a39Sopenharmony_ci.align	4
1860e1051a39Sopenharmony_ci.Loop5x_ctr32:
1861e1051a39Sopenharmony_ci	aese		$dat0,q8
1862e1051a39Sopenharmony_ci	aesmc		$dat0,$dat0
1863e1051a39Sopenharmony_ci	aese		$dat1,q8
1864e1051a39Sopenharmony_ci	aesmc		$dat1,$dat1
1865e1051a39Sopenharmony_ci	aese		$dat2,q8
1866e1051a39Sopenharmony_ci	aesmc		$dat2,$dat2
1867e1051a39Sopenharmony_ci	aese		$dat3,q8
1868e1051a39Sopenharmony_ci	aesmc		$dat3,$dat3
1869e1051a39Sopenharmony_ci	aese		$dat4,q8
1870e1051a39Sopenharmony_ci	aesmc		$dat4,$dat4
1871e1051a39Sopenharmony_ci	vld1.32		{q8},[$key_],#16
1872e1051a39Sopenharmony_ci	subs		$cnt,$cnt,#2
1873e1051a39Sopenharmony_ci	aese		$dat0,q9
1874e1051a39Sopenharmony_ci	aesmc		$dat0,$dat0
1875e1051a39Sopenharmony_ci	aese		$dat1,q9
1876e1051a39Sopenharmony_ci	aesmc		$dat1,$dat1
1877e1051a39Sopenharmony_ci	aese		$dat2,q9
1878e1051a39Sopenharmony_ci	aesmc		$dat2,$dat2
1879e1051a39Sopenharmony_ci	aese		$dat3,q9
1880e1051a39Sopenharmony_ci	aesmc		$dat3,$dat3
1881e1051a39Sopenharmony_ci	aese		$dat4,q9
1882e1051a39Sopenharmony_ci	aesmc		$dat4,$dat4
1883e1051a39Sopenharmony_ci	vld1.32		{q9},[$key_],#16
1884e1051a39Sopenharmony_ci	b.gt		.Loop5x_ctr32
1885e1051a39Sopenharmony_ci
1886e1051a39Sopenharmony_ci	mov		$key_,$key
1887e1051a39Sopenharmony_ci	aese		$dat0,q8
1888e1051a39Sopenharmony_ci	aesmc		$dat0,$dat0
1889e1051a39Sopenharmony_ci	aese		$dat1,q8
1890e1051a39Sopenharmony_ci	aesmc		$dat1,$dat1
1891e1051a39Sopenharmony_ci	aese		$dat2,q8
1892e1051a39Sopenharmony_ci	aesmc		$dat2,$dat2
1893e1051a39Sopenharmony_ci	aese		$dat3,q8
1894e1051a39Sopenharmony_ci	aesmc		$dat3,$dat3
1895e1051a39Sopenharmony_ci	aese		$dat4,q8
1896e1051a39Sopenharmony_ci	aesmc		$dat4,$dat4
1897e1051a39Sopenharmony_ci	vld1.32	 	{q8},[$key_],#16	// re-pre-load rndkey[0]
1898e1051a39Sopenharmony_ci
1899e1051a39Sopenharmony_ci	aese		$dat0,q9
1900e1051a39Sopenharmony_ci	aesmc		$dat0,$dat0
1901e1051a39Sopenharmony_ci	aese		$dat1,q9
1902e1051a39Sopenharmony_ci	aesmc		$dat1,$dat1
1903e1051a39Sopenharmony_ci	aese		$dat2,q9
1904e1051a39Sopenharmony_ci	aesmc		$dat2,$dat2
1905e1051a39Sopenharmony_ci	aese		$dat3,q9
1906e1051a39Sopenharmony_ci	aesmc		$dat3,$dat3
1907e1051a39Sopenharmony_ci	aese		$dat4,q9
1908e1051a39Sopenharmony_ci	aesmc		$dat4,$dat4
1909e1051a39Sopenharmony_ci	vld1.32	 	{q9},[$key_],#16	// re-pre-load rndkey[1]
1910e1051a39Sopenharmony_ci
1911e1051a39Sopenharmony_ci	aese		$dat0,q12
1912e1051a39Sopenharmony_ci	aesmc		$dat0,$dat0
1913e1051a39Sopenharmony_ci	 add		$tctr0,$ctr,#1
1914e1051a39Sopenharmony_ci	 add		$tctr1,$ctr,#2
1915e1051a39Sopenharmony_ci	aese		$dat1,q12
1916e1051a39Sopenharmony_ci	aesmc		$dat1,$dat1
1917e1051a39Sopenharmony_ci	 add		$tctr2,$ctr,#3
1918e1051a39Sopenharmony_ci	 add		w13,$ctr,#4
1919e1051a39Sopenharmony_ci	aese		$dat2,q12
1920e1051a39Sopenharmony_ci	aesmc		$dat2,$dat2
1921e1051a39Sopenharmony_ci	 add		w14,$ctr,#5
1922e1051a39Sopenharmony_ci	 rev		$tctr0,$tctr0
1923e1051a39Sopenharmony_ci	aese		$dat3,q12
1924e1051a39Sopenharmony_ci	aesmc		$dat3,$dat3
1925e1051a39Sopenharmony_ci	 rev		$tctr1,$tctr1
1926e1051a39Sopenharmony_ci	 rev		$tctr2,$tctr2
1927e1051a39Sopenharmony_ci	aese		$dat4,q12
1928e1051a39Sopenharmony_ci	aesmc		$dat4,$dat4
1929e1051a39Sopenharmony_ci	 rev		w13,w13
1930e1051a39Sopenharmony_ci	 rev		w14,w14
1931e1051a39Sopenharmony_ci
1932e1051a39Sopenharmony_ci	aese		$dat0,q13
1933e1051a39Sopenharmony_ci	aesmc		$dat0,$dat0
1934e1051a39Sopenharmony_ci	aese		$dat1,q13
1935e1051a39Sopenharmony_ci	aesmc		$dat1,$dat1
1936e1051a39Sopenharmony_ci	aese		$dat2,q13
1937e1051a39Sopenharmony_ci	aesmc		$dat2,$dat2
1938e1051a39Sopenharmony_ci	aese		$dat3,q13
1939e1051a39Sopenharmony_ci	aesmc		$dat3,$dat3
1940e1051a39Sopenharmony_ci	aese		$dat4,q13
1941e1051a39Sopenharmony_ci	aesmc		$dat4,$dat4
1942e1051a39Sopenharmony_ci
1943e1051a39Sopenharmony_ci	aese		$dat0,q14
1944e1051a39Sopenharmony_ci	aesmc		$dat0,$dat0
1945e1051a39Sopenharmony_ci	 vld1.8		{$in0},[$inp],#16
1946e1051a39Sopenharmony_ci	aese		$dat1,q14
1947e1051a39Sopenharmony_ci	aesmc		$dat1,$dat1
1948e1051a39Sopenharmony_ci	 vld1.8		{$in1},[$inp],#16
1949e1051a39Sopenharmony_ci	aese		$dat2,q14
1950e1051a39Sopenharmony_ci	aesmc		$dat2,$dat2
1951e1051a39Sopenharmony_ci	 vld1.8		{$in2},[$inp],#16
1952e1051a39Sopenharmony_ci	aese		$dat3,q14
1953e1051a39Sopenharmony_ci	aesmc		$dat3,$dat3
1954e1051a39Sopenharmony_ci	 vld1.8		{$in3},[$inp],#16
1955e1051a39Sopenharmony_ci	aese		$dat4,q14
1956e1051a39Sopenharmony_ci	aesmc		$dat4,$dat4
1957e1051a39Sopenharmony_ci	 vld1.8		{$in4},[$inp],#16
1958e1051a39Sopenharmony_ci
1959e1051a39Sopenharmony_ci	aese		$dat0,q15
1960e1051a39Sopenharmony_ci	 veor		$in0,$in0,$rndlast
1961e1051a39Sopenharmony_ci	aese		$dat1,q15
1962e1051a39Sopenharmony_ci	 veor		$in1,$in1,$rndlast
1963e1051a39Sopenharmony_ci	aese		$dat2,q15
1964e1051a39Sopenharmony_ci	 veor		$in2,$in2,$rndlast
1965e1051a39Sopenharmony_ci	aese		$dat3,q15
1966e1051a39Sopenharmony_ci	 veor		$in3,$in3,$rndlast
1967e1051a39Sopenharmony_ci	aese		$dat4,q15
1968e1051a39Sopenharmony_ci	 veor		$in4,$in4,$rndlast
1969e1051a39Sopenharmony_ci
1970e1051a39Sopenharmony_ci	veor		$in0,$in0,$dat0
1971e1051a39Sopenharmony_ci	 vorr		$dat0,$ivec,$ivec
1972e1051a39Sopenharmony_ci	veor		$in1,$in1,$dat1
1973e1051a39Sopenharmony_ci	 vorr		$dat1,$ivec,$ivec
1974e1051a39Sopenharmony_ci	veor		$in2,$in2,$dat2
1975e1051a39Sopenharmony_ci	 vorr		$dat2,$ivec,$ivec
1976e1051a39Sopenharmony_ci	veor		$in3,$in3,$dat3
1977e1051a39Sopenharmony_ci	 vorr		$dat3,$ivec,$ivec
1978e1051a39Sopenharmony_ci	veor		$in4,$in4,$dat4
1979e1051a39Sopenharmony_ci	 vorr		$dat4,$ivec,$ivec
1980e1051a39Sopenharmony_ci
1981e1051a39Sopenharmony_ci	vst1.8		{$in0},[$out],#16
1982e1051a39Sopenharmony_ci	 vmov.32	${dat0}[3],$tctr0
1983e1051a39Sopenharmony_ci	vst1.8		{$in1},[$out],#16
1984e1051a39Sopenharmony_ci	 vmov.32	${dat1}[3],$tctr1
1985e1051a39Sopenharmony_ci	vst1.8		{$in2},[$out],#16
1986e1051a39Sopenharmony_ci	 vmov.32	${dat2}[3],$tctr2
1987e1051a39Sopenharmony_ci	vst1.8		{$in3},[$out],#16
1988e1051a39Sopenharmony_ci	 vmov.32	${dat3}[3],w13
1989e1051a39Sopenharmony_ci	vst1.8		{$in4},[$out],#16
1990e1051a39Sopenharmony_ci	 vmov.32	${dat4}[3],w14
1991e1051a39Sopenharmony_ci
1992e1051a39Sopenharmony_ci	mov		$cnt,$rounds
1993e1051a39Sopenharmony_ci	cbz		$len,.Lctr32_done
1994e1051a39Sopenharmony_ci
1995e1051a39Sopenharmony_ci	add		$ctr,$ctr,#5
1996e1051a39Sopenharmony_ci	subs		$len,$len,#5
1997e1051a39Sopenharmony_ci	b.hs		.Loop5x_ctr32
1998e1051a39Sopenharmony_ci
1999e1051a39Sopenharmony_ci	add		$len,$len,#5
2000e1051a39Sopenharmony_ci	sub		$ctr,$ctr,#5
2001e1051a39Sopenharmony_ci
2002e1051a39Sopenharmony_ci	cmp		$len,#2
2003e1051a39Sopenharmony_ci	mov		$step,#16
2004e1051a39Sopenharmony_ci	cclr		$step,lo
2005e1051a39Sopenharmony_ci	b.ls		.Lctr32_tail
2006e1051a39Sopenharmony_ci
2007e1051a39Sopenharmony_ci	sub		$len,$len,#3		// bias
2008e1051a39Sopenharmony_ci	add		$ctr,$ctr,#3
2009e1051a39Sopenharmony_ci___
2010e1051a39Sopenharmony_ci$code.=<<___;
2011e1051a39Sopenharmony_ci	b		.Loop3x_ctr32
2012e1051a39Sopenharmony_ci
2013e1051a39Sopenharmony_ci.align	4
2014e1051a39Sopenharmony_ci.Loop3x_ctr32:
2015e1051a39Sopenharmony_ci	aese		$dat0,q8
2016e1051a39Sopenharmony_ci	aesmc		$dat0,$dat0
2017e1051a39Sopenharmony_ci	aese		$dat1,q8
2018e1051a39Sopenharmony_ci	aesmc		$dat1,$dat1
2019e1051a39Sopenharmony_ci	aese		$dat2,q8
2020e1051a39Sopenharmony_ci	aesmc		$dat2,$dat2
2021e1051a39Sopenharmony_ci	vld1.32		{q8},[$key_],#16
2022e1051a39Sopenharmony_ci	subs		$cnt,$cnt,#2
2023e1051a39Sopenharmony_ci	aese		$dat0,q9
2024e1051a39Sopenharmony_ci	aesmc		$dat0,$dat0
2025e1051a39Sopenharmony_ci	aese		$dat1,q9
2026e1051a39Sopenharmony_ci	aesmc		$dat1,$dat1
2027e1051a39Sopenharmony_ci	aese		$dat2,q9
2028e1051a39Sopenharmony_ci	aesmc		$dat2,$dat2
2029e1051a39Sopenharmony_ci	vld1.32		{q9},[$key_],#16
2030e1051a39Sopenharmony_ci	b.gt		.Loop3x_ctr32
2031e1051a39Sopenharmony_ci
2032e1051a39Sopenharmony_ci	aese		$dat0,q8
2033e1051a39Sopenharmony_ci	aesmc		$tmp0,$dat0
2034e1051a39Sopenharmony_ci	aese		$dat1,q8
2035e1051a39Sopenharmony_ci	aesmc		$tmp1,$dat1
2036e1051a39Sopenharmony_ci	 vld1.8		{$in0},[$inp],#16
2037e1051a39Sopenharmony_ci___
2038e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
2039e1051a39Sopenharmony_ci	 vorr		$dat0,$ivec,$ivec
2040e1051a39Sopenharmony_ci___
2041e1051a39Sopenharmony_ci$code.=<<___	if ($flavour !~ /64/);
2042e1051a39Sopenharmony_ci	 add		$tctr0,$ctr,#1
2043e1051a39Sopenharmony_ci___
2044e1051a39Sopenharmony_ci$code.=<<___;
2045e1051a39Sopenharmony_ci	aese		$dat2,q8
2046e1051a39Sopenharmony_ci	aesmc		$dat2,$dat2
2047e1051a39Sopenharmony_ci	 vld1.8		{$in1},[$inp],#16
2048e1051a39Sopenharmony_ci___
2049e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
2050e1051a39Sopenharmony_ci	 vorr		$dat1,$ivec,$ivec
2051e1051a39Sopenharmony_ci___
2052e1051a39Sopenharmony_ci$code.=<<___	if ($flavour !~ /64/);
2053e1051a39Sopenharmony_ci	 rev		$tctr0,$tctr0
2054e1051a39Sopenharmony_ci___
2055e1051a39Sopenharmony_ci$code.=<<___;
2056e1051a39Sopenharmony_ci	aese		$tmp0,q9
2057e1051a39Sopenharmony_ci	aesmc		$tmp0,$tmp0
2058e1051a39Sopenharmony_ci	aese		$tmp1,q9
2059e1051a39Sopenharmony_ci	aesmc		$tmp1,$tmp1
2060e1051a39Sopenharmony_ci	 vld1.8		{$in2},[$inp],#16
2061e1051a39Sopenharmony_ci	 mov		$key_,$key
2062e1051a39Sopenharmony_ci	aese		$dat2,q9
2063e1051a39Sopenharmony_ci	aesmc		$tmp2,$dat2
2064e1051a39Sopenharmony_ci___
2065e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
2066e1051a39Sopenharmony_ci	 vorr		$dat2,$ivec,$ivec
2067e1051a39Sopenharmony_ci	 add		$tctr0,$ctr,#1
2068e1051a39Sopenharmony_ci___
2069e1051a39Sopenharmony_ci$code.=<<___;
2070e1051a39Sopenharmony_ci	aese		$tmp0,q12
2071e1051a39Sopenharmony_ci	aesmc		$tmp0,$tmp0
2072e1051a39Sopenharmony_ci	aese		$tmp1,q12
2073e1051a39Sopenharmony_ci	aesmc		$tmp1,$tmp1
2074e1051a39Sopenharmony_ci	 veor		$in0,$in0,$rndlast
2075e1051a39Sopenharmony_ci	 add		$tctr1,$ctr,#2
2076e1051a39Sopenharmony_ci	aese		$tmp2,q12
2077e1051a39Sopenharmony_ci	aesmc		$tmp2,$tmp2
2078e1051a39Sopenharmony_ci	 veor		$in1,$in1,$rndlast
2079e1051a39Sopenharmony_ci	 add		$ctr,$ctr,#3
2080e1051a39Sopenharmony_ci	aese		$tmp0,q13
2081e1051a39Sopenharmony_ci	aesmc		$tmp0,$tmp0
2082e1051a39Sopenharmony_ci	aese		$tmp1,q13
2083e1051a39Sopenharmony_ci	aesmc		$tmp1,$tmp1
2084e1051a39Sopenharmony_ci	 veor		$in2,$in2,$rndlast
2085e1051a39Sopenharmony_ci___
2086e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
2087e1051a39Sopenharmony_ci	 rev		$tctr0,$tctr0
2088e1051a39Sopenharmony_ci	aese		$tmp2,q13
2089e1051a39Sopenharmony_ci	aesmc		$tmp2,$tmp2
2090e1051a39Sopenharmony_ci	 vmov.32	${dat0}[3], $tctr0
2091e1051a39Sopenharmony_ci___
2092e1051a39Sopenharmony_ci$code.=<<___	if ($flavour !~ /64/);
2093e1051a39Sopenharmony_ci	 vmov.32	${ivec}[3], $tctr0
2094e1051a39Sopenharmony_ci	aese		$tmp2,q13
2095e1051a39Sopenharmony_ci	aesmc		$tmp2,$tmp2
2096e1051a39Sopenharmony_ci	 vorr		$dat0,$ivec,$ivec
2097e1051a39Sopenharmony_ci___
2098e1051a39Sopenharmony_ci$code.=<<___;
2099e1051a39Sopenharmony_ci	 rev		$tctr1,$tctr1
2100e1051a39Sopenharmony_ci	aese		$tmp0,q14
2101e1051a39Sopenharmony_ci	aesmc		$tmp0,$tmp0
2102e1051a39Sopenharmony_ci___
2103e1051a39Sopenharmony_ci$code.=<<___	if ($flavour !~ /64/);
2104e1051a39Sopenharmony_ci	 vmov.32	${ivec}[3], $tctr1
2105e1051a39Sopenharmony_ci	 rev		$tctr2,$ctr
2106e1051a39Sopenharmony_ci___
2107e1051a39Sopenharmony_ci$code.=<<___;
2108e1051a39Sopenharmony_ci	aese		$tmp1,q14
2109e1051a39Sopenharmony_ci	aesmc		$tmp1,$tmp1
2110e1051a39Sopenharmony_ci___
2111e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
2112e1051a39Sopenharmony_ci	 vmov.32	${dat1}[3], $tctr1
2113e1051a39Sopenharmony_ci	 rev		$tctr2,$ctr
2114e1051a39Sopenharmony_ci	aese		$tmp2,q14
2115e1051a39Sopenharmony_ci	aesmc		$tmp2,$tmp2
2116e1051a39Sopenharmony_ci	 vmov.32	${dat2}[3], $tctr2
2117e1051a39Sopenharmony_ci___
2118e1051a39Sopenharmony_ci$code.=<<___	if ($flavour !~ /64/);
2119e1051a39Sopenharmony_ci	 vorr		$dat1,$ivec,$ivec
2120e1051a39Sopenharmony_ci	 vmov.32	${ivec}[3], $tctr2
2121e1051a39Sopenharmony_ci	aese		$tmp2,q14
2122e1051a39Sopenharmony_ci	aesmc		$tmp2,$tmp2
2123e1051a39Sopenharmony_ci	 vorr		$dat2,$ivec,$ivec
2124e1051a39Sopenharmony_ci___
2125e1051a39Sopenharmony_ci$code.=<<___;
2126e1051a39Sopenharmony_ci	 subs		$len,$len,#3
2127e1051a39Sopenharmony_ci	aese		$tmp0,q15
2128e1051a39Sopenharmony_ci	aese		$tmp1,q15
2129e1051a39Sopenharmony_ci	aese		$tmp2,q15
2130e1051a39Sopenharmony_ci
2131e1051a39Sopenharmony_ci	veor		$in0,$in0,$tmp0
2132e1051a39Sopenharmony_ci	 vld1.32	 {q8},[$key_],#16	// re-pre-load rndkey[0]
2133e1051a39Sopenharmony_ci	vst1.8		{$in0},[$out],#16
2134e1051a39Sopenharmony_ci	veor		$in1,$in1,$tmp1
2135e1051a39Sopenharmony_ci	 mov		$cnt,$rounds
2136e1051a39Sopenharmony_ci	vst1.8		{$in1},[$out],#16
2137e1051a39Sopenharmony_ci	veor		$in2,$in2,$tmp2
2138e1051a39Sopenharmony_ci	 vld1.32	 {q9},[$key_],#16	// re-pre-load rndkey[1]
2139e1051a39Sopenharmony_ci	vst1.8		{$in2},[$out],#16
2140e1051a39Sopenharmony_ci	b.hs		.Loop3x_ctr32
2141e1051a39Sopenharmony_ci
2142e1051a39Sopenharmony_ci	adds		$len,$len,#3
2143e1051a39Sopenharmony_ci	b.eq		.Lctr32_done
2144e1051a39Sopenharmony_ci	cmp		$len,#1
2145e1051a39Sopenharmony_ci	mov		$step,#16
2146e1051a39Sopenharmony_ci	cclr		$step,eq
2147e1051a39Sopenharmony_ci
2148e1051a39Sopenharmony_ci.Lctr32_tail:
2149e1051a39Sopenharmony_ci	aese		$dat0,q8
2150e1051a39Sopenharmony_ci	aesmc		$dat0,$dat0
2151e1051a39Sopenharmony_ci	aese		$dat1,q8
2152e1051a39Sopenharmony_ci	aesmc		$dat1,$dat1
2153e1051a39Sopenharmony_ci	vld1.32		{q8},[$key_],#16
2154e1051a39Sopenharmony_ci	subs		$cnt,$cnt,#2
2155e1051a39Sopenharmony_ci	aese		$dat0,q9
2156e1051a39Sopenharmony_ci	aesmc		$dat0,$dat0
2157e1051a39Sopenharmony_ci	aese		$dat1,q9
2158e1051a39Sopenharmony_ci	aesmc		$dat1,$dat1
2159e1051a39Sopenharmony_ci	vld1.32		{q9},[$key_],#16
2160e1051a39Sopenharmony_ci	b.gt		.Lctr32_tail
2161e1051a39Sopenharmony_ci
2162e1051a39Sopenharmony_ci	aese		$dat0,q8
2163e1051a39Sopenharmony_ci	aesmc		$dat0,$dat0
2164e1051a39Sopenharmony_ci	aese		$dat1,q8
2165e1051a39Sopenharmony_ci	aesmc		$dat1,$dat1
2166e1051a39Sopenharmony_ci	aese		$dat0,q9
2167e1051a39Sopenharmony_ci	aesmc		$dat0,$dat0
2168e1051a39Sopenharmony_ci	aese		$dat1,q9
2169e1051a39Sopenharmony_ci	aesmc		$dat1,$dat1
2170e1051a39Sopenharmony_ci	 vld1.8		{$in0},[$inp],$step
2171e1051a39Sopenharmony_ci	aese		$dat0,q12
2172e1051a39Sopenharmony_ci	aesmc		$dat0,$dat0
2173e1051a39Sopenharmony_ci	aese		$dat1,q12
2174e1051a39Sopenharmony_ci	aesmc		$dat1,$dat1
2175e1051a39Sopenharmony_ci	 vld1.8		{$in1},[$inp]
2176e1051a39Sopenharmony_ci	aese		$dat0,q13
2177e1051a39Sopenharmony_ci	aesmc		$dat0,$dat0
2178e1051a39Sopenharmony_ci	aese		$dat1,q13
2179e1051a39Sopenharmony_ci	aesmc		$dat1,$dat1
2180e1051a39Sopenharmony_ci	 veor		$in0,$in0,$rndlast
2181e1051a39Sopenharmony_ci	aese		$dat0,q14
2182e1051a39Sopenharmony_ci	aesmc		$dat0,$dat0
2183e1051a39Sopenharmony_ci	aese		$dat1,q14
2184e1051a39Sopenharmony_ci	aesmc		$dat1,$dat1
2185e1051a39Sopenharmony_ci	 veor		$in1,$in1,$rndlast
2186e1051a39Sopenharmony_ci	aese		$dat0,q15
2187e1051a39Sopenharmony_ci	aese		$dat1,q15
2188e1051a39Sopenharmony_ci
2189e1051a39Sopenharmony_ci	cmp		$len,#1
2190e1051a39Sopenharmony_ci	veor		$in0,$in0,$dat0
2191e1051a39Sopenharmony_ci	veor		$in1,$in1,$dat1
2192e1051a39Sopenharmony_ci	vst1.8		{$in0},[$out],#16
2193e1051a39Sopenharmony_ci	b.eq		.Lctr32_done
2194e1051a39Sopenharmony_ci	vst1.8		{$in1},[$out]
2195e1051a39Sopenharmony_ci
2196e1051a39Sopenharmony_ci.Lctr32_done:
2197e1051a39Sopenharmony_ci___
2198e1051a39Sopenharmony_ci$code.=<<___	if ($flavour !~ /64/);
2199e1051a39Sopenharmony_ci	vldmia		sp!,{d8-d15}
2200e1051a39Sopenharmony_ci	ldmia		sp!,{r4-r10,pc}
2201e1051a39Sopenharmony_ci___
2202e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
2203e1051a39Sopenharmony_ci	ldr		x29,[sp],#16
2204e1051a39Sopenharmony_ci	ret
2205e1051a39Sopenharmony_ci___
2206e1051a39Sopenharmony_ci$code.=<<___;
2207e1051a39Sopenharmony_ci.size	${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks
2208e1051a39Sopenharmony_ci___
2209e1051a39Sopenharmony_ci}}}
2210e1051a39Sopenharmony_ci# Performance in cycles per byte.
2211e1051a39Sopenharmony_ci# Processed with AES-XTS different key size.
2212e1051a39Sopenharmony_ci# It shows the value before and after optimization as below:
2213e1051a39Sopenharmony_ci# (before/after):
2214e1051a39Sopenharmony_ci#
2215e1051a39Sopenharmony_ci#		AES-128-XTS		AES-256-XTS
2216e1051a39Sopenharmony_ci# Cortex-A57	3.36/1.09		4.02/1.37
2217e1051a39Sopenharmony_ci# Cortex-A72	3.03/1.02		3.28/1.33
2218e1051a39Sopenharmony_ci
2219e1051a39Sopenharmony_ci# Optimization is implemented by loop unrolling and interleaving.
2220e1051a39Sopenharmony_ci# Commonly, we choose the unrolling factor as 5, if the input
2221e1051a39Sopenharmony_ci# data size smaller than 5 blocks, but not smaller than 3 blocks,
2222e1051a39Sopenharmony_ci# choose 3 as the unrolling factor.
2223e1051a39Sopenharmony_ci# If the input data size dsize >= 5*16 bytes, then take 5 blocks
2224e1051a39Sopenharmony_ci# as one iteration, every loop the left size lsize -= 5*16.
2225e1051a39Sopenharmony_ci# If lsize < 5*16 bytes, treat them as the tail. Note: left 4*16 bytes
2226e1051a39Sopenharmony_ci# will be processed specially, which be integrated into the 5*16 bytes
2227e1051a39Sopenharmony_ci# loop to improve the efficiency.
2228e1051a39Sopenharmony_ci# There is one special case, if the original input data size dsize
2229e1051a39Sopenharmony_ci# = 16 bytes, we will treat it seperately to improve the
2230e1051a39Sopenharmony_ci# performance: one independent code block without LR, FP load and
2231e1051a39Sopenharmony_ci# store.
2232e1051a39Sopenharmony_ci# Encryption will process the (length -tailcnt) bytes as mentioned
2233e1051a39Sopenharmony_ci# previously, then encrypt the composite block as last second
2234e1051a39Sopenharmony_ci# cipher block.
2235e1051a39Sopenharmony_ci# Decryption will process the (length -tailcnt -1) bytes as mentioned
2236e1051a39Sopenharmony_ci# previously, then decrypt the last second cipher block to get the
2237e1051a39Sopenharmony_ci# last plain block(tail), decrypt the composite block as last second
2238e1051a39Sopenharmony_ci# plain text block.
2239e1051a39Sopenharmony_ci
2240e1051a39Sopenharmony_ci{{{
2241e1051a39Sopenharmony_cimy ($inp,$out,$len,$key1,$key2,$ivp)=map("x$_",(0..5));
2242e1051a39Sopenharmony_cimy ($rounds0,$rounds,$key_,$step,$ivl,$ivh)=("w5","w6","x7","x8","x9","x10");
2243e1051a39Sopenharmony_cimy ($tmpoutp,$loutp,$l2outp,$tmpinp)=("x13","w14","w15","x20");
2244e1051a39Sopenharmony_cimy ($tailcnt,$midnum,$midnumx,$constnum,$constnumx)=("x21","w22","x22","w19","x19");
2245e1051a39Sopenharmony_cimy ($xoffset,$tmpmx,$tmpmw)=("x6","x11","w11");
2246e1051a39Sopenharmony_cimy ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7));
2247e1051a39Sopenharmony_cimy ($iv0,$iv1,$iv2,$iv3,$iv4)=("v6.16b","v8.16b","v9.16b","v10.16b","v11.16b");
2248e1051a39Sopenharmony_cimy ($ivd00,$ivd01,$ivd20,$ivd21)=("d6","v6.d[1]","d9","v9.d[1]");
2249e1051a39Sopenharmony_cimy ($ivd10,$ivd11,$ivd30,$ivd31,$ivd40,$ivd41)=("d8","v8.d[1]","d10","v10.d[1]","d11","v11.d[1]");
2250e1051a39Sopenharmony_ci
2251e1051a39Sopenharmony_cimy ($tmpin)=("v26.16b");
2252e1051a39Sopenharmony_cimy ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
2253e1051a39Sopenharmony_ci
2254e1051a39Sopenharmony_ci# q7	last round key
2255e1051a39Sopenharmony_ci# q10-q15, q7	Last 7 round keys
2256e1051a39Sopenharmony_ci# q8-q9	preloaded round keys except last 7 keys for big size
2257e1051a39Sopenharmony_ci# q20, q21, q8-q9	preloaded round keys except last 7 keys for only 16 byte
2258e1051a39Sopenharmony_ci
2259e1051a39Sopenharmony_ci
2260e1051a39Sopenharmony_cimy ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
2261e1051a39Sopenharmony_ci
2262e1051a39Sopenharmony_cimy ($dat3,$in3,$tmp3);	# used only in 64-bit mode
2263e1051a39Sopenharmony_cimy ($dat4,$in4,$tmp4);
2264e1051a39Sopenharmony_ciif ($flavour =~ /64/) {
2265e1051a39Sopenharmony_ci    ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23));
2266e1051a39Sopenharmony_ci}
2267e1051a39Sopenharmony_ci
2268e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
2269e1051a39Sopenharmony_ci.globl	${prefix}_xts_encrypt
2270e1051a39Sopenharmony_ci.type	${prefix}_xts_encrypt,%function
2271e1051a39Sopenharmony_ci.align	5
2272e1051a39Sopenharmony_ci${prefix}_xts_encrypt:
2273e1051a39Sopenharmony_ci___
2274e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
2275e1051a39Sopenharmony_ci	cmp	$len,#16
2276e1051a39Sopenharmony_ci	// Original input data size bigger than 16, jump to big size processing.
2277e1051a39Sopenharmony_ci	b.ne	.Lxts_enc_big_size
2278e1051a39Sopenharmony_ci	// Encrypt the iv with key2, as the first XEX iv.
2279e1051a39Sopenharmony_ci	ldr	$rounds,[$key2,#240]
2280e1051a39Sopenharmony_ci	vld1.32	{$dat},[$key2],#16
2281e1051a39Sopenharmony_ci	vld1.8	{$iv0},[$ivp]
2282e1051a39Sopenharmony_ci	sub	$rounds,$rounds,#2
2283e1051a39Sopenharmony_ci	vld1.32	{$dat1},[$key2],#16
2284e1051a39Sopenharmony_ci
2285e1051a39Sopenharmony_ci.Loop_enc_iv_enc:
2286e1051a39Sopenharmony_ci	aese	$iv0,$dat
2287e1051a39Sopenharmony_ci	aesmc	$iv0,$iv0
2288e1051a39Sopenharmony_ci	vld1.32	{$dat},[$key2],#16
2289e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2
2290e1051a39Sopenharmony_ci	aese	$iv0,$dat1
2291e1051a39Sopenharmony_ci	aesmc	$iv0,$iv0
2292e1051a39Sopenharmony_ci	vld1.32	{$dat1},[$key2],#16
2293e1051a39Sopenharmony_ci	b.gt	.Loop_enc_iv_enc
2294e1051a39Sopenharmony_ci
2295e1051a39Sopenharmony_ci	aese	$iv0,$dat
2296e1051a39Sopenharmony_ci	aesmc	$iv0,$iv0
2297e1051a39Sopenharmony_ci	vld1.32	{$dat},[$key2]
2298e1051a39Sopenharmony_ci	aese	$iv0,$dat1
2299e1051a39Sopenharmony_ci	veor	$iv0,$iv0,$dat
2300e1051a39Sopenharmony_ci
2301e1051a39Sopenharmony_ci	vld1.8	{$dat0},[$inp]
2302e1051a39Sopenharmony_ci	veor	$dat0,$iv0,$dat0
2303e1051a39Sopenharmony_ci
2304e1051a39Sopenharmony_ci	ldr	$rounds,[$key1,#240]
2305e1051a39Sopenharmony_ci	vld1.32	{q20-q21},[$key1],#32		// load key schedule...
2306e1051a39Sopenharmony_ci
2307e1051a39Sopenharmony_ci	aese	$dat0,q20
2308e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2309e1051a39Sopenharmony_ci	vld1.32	{q8-q9},[$key1],#32		// load key schedule...
2310e1051a39Sopenharmony_ci	aese	$dat0,q21
2311e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2312e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#10		// if rounds==10, jump to aes-128-xts processing
2313e1051a39Sopenharmony_ci	b.eq	.Lxts_128_enc
2314e1051a39Sopenharmony_ci.Lxts_enc_round_loop:
2315e1051a39Sopenharmony_ci	aese	$dat0,q8
2316e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2317e1051a39Sopenharmony_ci	vld1.32	{q8},[$key1],#16		// load key schedule...
2318e1051a39Sopenharmony_ci	aese	$dat0,q9
2319e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2320e1051a39Sopenharmony_ci	vld1.32	{q9},[$key1],#16		// load key schedule...
2321e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2		// bias
2322e1051a39Sopenharmony_ci	b.gt	.Lxts_enc_round_loop
2323e1051a39Sopenharmony_ci.Lxts_128_enc:
2324e1051a39Sopenharmony_ci	vld1.32	{q10-q11},[$key1],#32		// load key schedule...
2325e1051a39Sopenharmony_ci	aese	$dat0,q8
2326e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2327e1051a39Sopenharmony_ci	aese	$dat0,q9
2328e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2329e1051a39Sopenharmony_ci	vld1.32	{q12-q13},[$key1],#32		// load key schedule...
2330e1051a39Sopenharmony_ci	aese	$dat0,q10
2331e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2332e1051a39Sopenharmony_ci	aese	$dat0,q11
2333e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2334e1051a39Sopenharmony_ci	vld1.32	{q14-q15},[$key1],#32		// load key schedule...
2335e1051a39Sopenharmony_ci	aese	$dat0,q12
2336e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2337e1051a39Sopenharmony_ci	aese	$dat0,q13
2338e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2339e1051a39Sopenharmony_ci	vld1.32	{$rndlast},[$key1]
2340e1051a39Sopenharmony_ci	aese	$dat0,q14
2341e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2342e1051a39Sopenharmony_ci	aese	$dat0,q15
2343e1051a39Sopenharmony_ci	veor	$dat0,$dat0,$rndlast
2344e1051a39Sopenharmony_ci	veor	$dat0,$dat0,$iv0
2345e1051a39Sopenharmony_ci	vst1.8	{$dat0},[$out]
2346e1051a39Sopenharmony_ci	b	.Lxts_enc_final_abort
2347e1051a39Sopenharmony_ci
2348e1051a39Sopenharmony_ci.align	4
2349e1051a39Sopenharmony_ci.Lxts_enc_big_size:
2350e1051a39Sopenharmony_ci___
2351e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
2352e1051a39Sopenharmony_ci	stp	$constnumx,$tmpinp,[sp,#-64]!
2353e1051a39Sopenharmony_ci	stp	$tailcnt,$midnumx,[sp,#48]
2354e1051a39Sopenharmony_ci	stp	$ivd10,$ivd20,[sp,#32]
2355e1051a39Sopenharmony_ci	stp	$ivd30,$ivd40,[sp,#16]
2356e1051a39Sopenharmony_ci
2357e1051a39Sopenharmony_ci	// tailcnt store the tail value of length%16.
2358e1051a39Sopenharmony_ci	and	$tailcnt,$len,#0xf
2359e1051a39Sopenharmony_ci	and	$len,$len,#-16
2360e1051a39Sopenharmony_ci	subs	$len,$len,#16
2361e1051a39Sopenharmony_ci	mov	$step,#16
2362e1051a39Sopenharmony_ci	b.lo	.Lxts_abort
2363e1051a39Sopenharmony_ci	csel	$step,xzr,$step,eq
2364e1051a39Sopenharmony_ci
2365e1051a39Sopenharmony_ci	// Firstly, encrypt the iv with key2, as the first iv of XEX.
2366e1051a39Sopenharmony_ci	ldr	$rounds,[$key2,#240]
2367e1051a39Sopenharmony_ci	vld1.32	{$dat},[$key2],#16
2368e1051a39Sopenharmony_ci	vld1.8	{$iv0},[$ivp]
2369e1051a39Sopenharmony_ci	sub	$rounds,$rounds,#2
2370e1051a39Sopenharmony_ci	vld1.32	{$dat1},[$key2],#16
2371e1051a39Sopenharmony_ci
2372e1051a39Sopenharmony_ci.Loop_iv_enc:
2373e1051a39Sopenharmony_ci	aese	$iv0,$dat
2374e1051a39Sopenharmony_ci	aesmc	$iv0,$iv0
2375e1051a39Sopenharmony_ci	vld1.32	{$dat},[$key2],#16
2376e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2
2377e1051a39Sopenharmony_ci	aese	$iv0,$dat1
2378e1051a39Sopenharmony_ci	aesmc	$iv0,$iv0
2379e1051a39Sopenharmony_ci	vld1.32	{$dat1},[$key2],#16
2380e1051a39Sopenharmony_ci	b.gt	.Loop_iv_enc
2381e1051a39Sopenharmony_ci
2382e1051a39Sopenharmony_ci	aese	$iv0,$dat
2383e1051a39Sopenharmony_ci	aesmc	$iv0,$iv0
2384e1051a39Sopenharmony_ci	vld1.32	{$dat},[$key2]
2385e1051a39Sopenharmony_ci	aese	$iv0,$dat1
2386e1051a39Sopenharmony_ci	veor	$iv0,$iv0,$dat
2387e1051a39Sopenharmony_ci
2388e1051a39Sopenharmony_ci	// The iv for second block
2389e1051a39Sopenharmony_ci	// $ivl- iv(low), $ivh - iv(high)
2390e1051a39Sopenharmony_ci	// the five ivs stored into, $iv0,$iv1,$iv2,$iv3,$iv4
2391e1051a39Sopenharmony_ci	fmov	$ivl,$ivd00
2392e1051a39Sopenharmony_ci	fmov	$ivh,$ivd01
2393e1051a39Sopenharmony_ci	mov	$constnum,#0x87
2394e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
2395e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
2396e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr#31
2397e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl#1
2398e1051a39Sopenharmony_ci	fmov	$ivd10,$ivl
2399e1051a39Sopenharmony_ci	fmov	$ivd11,$ivh
2400e1051a39Sopenharmony_ci
2401e1051a39Sopenharmony_ci	ldr	$rounds0,[$key1,#240]		// next starting point
2402e1051a39Sopenharmony_ci	vld1.8	{$dat},[$inp],$step
2403e1051a39Sopenharmony_ci
2404e1051a39Sopenharmony_ci	vld1.32	{q8-q9},[$key1]			// load key schedule...
2405e1051a39Sopenharmony_ci	sub	$rounds0,$rounds0,#6
2406e1051a39Sopenharmony_ci	add	$key_,$key1,$ivp,lsl#4		// pointer to last 7 round keys
2407e1051a39Sopenharmony_ci	sub	$rounds0,$rounds0,#2
2408e1051a39Sopenharmony_ci	vld1.32	{q10-q11},[$key_],#32
2409e1051a39Sopenharmony_ci	vld1.32	{q12-q13},[$key_],#32
2410e1051a39Sopenharmony_ci	vld1.32	{q14-q15},[$key_],#32
2411e1051a39Sopenharmony_ci	vld1.32	{$rndlast},[$key_]
2412e1051a39Sopenharmony_ci
2413e1051a39Sopenharmony_ci	add	$key_,$key1,#32
2414e1051a39Sopenharmony_ci	mov	$rounds,$rounds0
2415e1051a39Sopenharmony_ci
2416e1051a39Sopenharmony_ci	// Encryption
2417e1051a39Sopenharmony_ci.Lxts_enc:
2418e1051a39Sopenharmony_ci	vld1.8	{$dat2},[$inp],#16
2419e1051a39Sopenharmony_ci	subs	$len,$len,#32			// bias
2420e1051a39Sopenharmony_ci	add	$rounds,$rounds0,#2
2421e1051a39Sopenharmony_ci	vorr	$in1,$dat,$dat
2422e1051a39Sopenharmony_ci	vorr	$dat1,$dat,$dat
2423e1051a39Sopenharmony_ci	vorr	$in3,$dat,$dat
2424e1051a39Sopenharmony_ci	vorr	$in2,$dat2,$dat2
2425e1051a39Sopenharmony_ci	vorr	$in4,$dat2,$dat2
2426e1051a39Sopenharmony_ci	b.lo	.Lxts_inner_enc_tail
2427e1051a39Sopenharmony_ci	veor	$dat,$dat,$iv0			// before encryption, xor with iv
2428e1051a39Sopenharmony_ci	veor	$dat2,$dat2,$iv1
2429e1051a39Sopenharmony_ci
2430e1051a39Sopenharmony_ci	// The iv for third block
2431e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
2432e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
2433e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr#31
2434e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl#1
2435e1051a39Sopenharmony_ci	fmov	$ivd20,$ivl
2436e1051a39Sopenharmony_ci	fmov	$ivd21,$ivh
2437e1051a39Sopenharmony_ci
2438e1051a39Sopenharmony_ci
2439e1051a39Sopenharmony_ci	vorr	$dat1,$dat2,$dat2
2440e1051a39Sopenharmony_ci	vld1.8	{$dat2},[$inp],#16
2441e1051a39Sopenharmony_ci	vorr	$in0,$dat,$dat
2442e1051a39Sopenharmony_ci	vorr	$in1,$dat1,$dat1
2443e1051a39Sopenharmony_ci	veor	$in2,$dat2,$iv2 		// the third block
2444e1051a39Sopenharmony_ci	veor	$dat2,$dat2,$iv2
2445e1051a39Sopenharmony_ci	cmp	$len,#32
2446e1051a39Sopenharmony_ci	b.lo	.Lxts_outer_enc_tail
2447e1051a39Sopenharmony_ci
2448e1051a39Sopenharmony_ci	// The iv for fourth block
2449e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
2450e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
2451e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr#31
2452e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl#1
2453e1051a39Sopenharmony_ci	fmov	$ivd30,$ivl
2454e1051a39Sopenharmony_ci	fmov	$ivd31,$ivh
2455e1051a39Sopenharmony_ci
2456e1051a39Sopenharmony_ci	vld1.8	{$dat3},[$inp],#16
2457e1051a39Sopenharmony_ci	// The iv for fifth block
2458e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
2459e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
2460e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr#31
2461e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl#1
2462e1051a39Sopenharmony_ci	fmov	$ivd40,$ivl
2463e1051a39Sopenharmony_ci	fmov	$ivd41,$ivh
2464e1051a39Sopenharmony_ci
2465e1051a39Sopenharmony_ci	vld1.8	{$dat4},[$inp],#16
2466e1051a39Sopenharmony_ci	veor	$dat3,$dat3,$iv3		// the fourth block
2467e1051a39Sopenharmony_ci	veor	$dat4,$dat4,$iv4
2468e1051a39Sopenharmony_ci	sub	$len,$len,#32			// bias
2469e1051a39Sopenharmony_ci	mov	$rounds,$rounds0
2470e1051a39Sopenharmony_ci	b	.Loop5x_xts_enc
2471e1051a39Sopenharmony_ci
2472e1051a39Sopenharmony_ci.align	4
2473e1051a39Sopenharmony_ci.Loop5x_xts_enc:
2474e1051a39Sopenharmony_ci	aese	$dat0,q8
2475e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2476e1051a39Sopenharmony_ci	aese	$dat1,q8
2477e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2478e1051a39Sopenharmony_ci	aese	$dat2,q8
2479e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2480e1051a39Sopenharmony_ci	aese	$dat3,q8
2481e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
2482e1051a39Sopenharmony_ci	aese	$dat4,q8
2483e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
2484e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16
2485e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2
2486e1051a39Sopenharmony_ci	aese	$dat0,q9
2487e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2488e1051a39Sopenharmony_ci	aese	$dat1,q9
2489e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2490e1051a39Sopenharmony_ci	aese	$dat2,q9
2491e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2492e1051a39Sopenharmony_ci	aese	$dat3,q9
2493e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
2494e1051a39Sopenharmony_ci	aese	$dat4,q9
2495e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
2496e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16
2497e1051a39Sopenharmony_ci	b.gt	.Loop5x_xts_enc
2498e1051a39Sopenharmony_ci
2499e1051a39Sopenharmony_ci	aese	$dat0,q8
2500e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2501e1051a39Sopenharmony_ci	aese	$dat1,q8
2502e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2503e1051a39Sopenharmony_ci	aese	$dat2,q8
2504e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2505e1051a39Sopenharmony_ci	aese	$dat3,q8
2506e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
2507e1051a39Sopenharmony_ci	aese	$dat4,q8
2508e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
2509e1051a39Sopenharmony_ci	subs	$len,$len,#0x50			// because .Lxts_enc_tail4x
2510e1051a39Sopenharmony_ci
2511e1051a39Sopenharmony_ci	aese	$dat0,q9
2512e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2513e1051a39Sopenharmony_ci	aese	$dat1,q9
2514e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2515e1051a39Sopenharmony_ci	aese	$dat2,q9
2516e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2517e1051a39Sopenharmony_ci	aese	$dat3,q9
2518e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
2519e1051a39Sopenharmony_ci	aese	$dat4,q9
2520e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
2521e1051a39Sopenharmony_ci	csel	$xoffset,xzr,$len,gt		// borrow x6, w6, "gt" is not typo
2522e1051a39Sopenharmony_ci	mov	$key_,$key1
2523e1051a39Sopenharmony_ci
2524e1051a39Sopenharmony_ci	aese	$dat0,q10
2525e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2526e1051a39Sopenharmony_ci	aese	$dat1,q10
2527e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2528e1051a39Sopenharmony_ci	aese	$dat2,q10
2529e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2530e1051a39Sopenharmony_ci	aese	$dat3,q10
2531e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
2532e1051a39Sopenharmony_ci	aese	$dat4,q10
2533e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
2534e1051a39Sopenharmony_ci	add	$inp,$inp,$xoffset		// x0 is adjusted in such way that
2535e1051a39Sopenharmony_ci						// at exit from the loop v1.16b-v26.16b
2536e1051a39Sopenharmony_ci						// are loaded with last "words"
2537e1051a39Sopenharmony_ci	add	$xoffset,$len,#0x60		// because .Lxts_enc_tail4x
2538e1051a39Sopenharmony_ci
2539e1051a39Sopenharmony_ci	aese	$dat0,q11
2540e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2541e1051a39Sopenharmony_ci	aese	$dat1,q11
2542e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2543e1051a39Sopenharmony_ci	aese	$dat2,q11
2544e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2545e1051a39Sopenharmony_ci	aese	$dat3,q11
2546e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
2547e1051a39Sopenharmony_ci	aese	$dat4,q11
2548e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
2549e1051a39Sopenharmony_ci
2550e1051a39Sopenharmony_ci	aese	$dat0,q12
2551e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2552e1051a39Sopenharmony_ci	aese	$dat1,q12
2553e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2554e1051a39Sopenharmony_ci	aese	$dat2,q12
2555e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2556e1051a39Sopenharmony_ci	aese	$dat3,q12
2557e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
2558e1051a39Sopenharmony_ci	aese	$dat4,q12
2559e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
2560e1051a39Sopenharmony_ci
2561e1051a39Sopenharmony_ci	aese	$dat0,q13
2562e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2563e1051a39Sopenharmony_ci	aese	$dat1,q13
2564e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2565e1051a39Sopenharmony_ci	aese	$dat2,q13
2566e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2567e1051a39Sopenharmony_ci	aese	$dat3,q13
2568e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
2569e1051a39Sopenharmony_ci	aese	$dat4,q13
2570e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
2571e1051a39Sopenharmony_ci
2572e1051a39Sopenharmony_ci	aese	$dat0,q14
2573e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2574e1051a39Sopenharmony_ci	aese	$dat1,q14
2575e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2576e1051a39Sopenharmony_ci	aese	$dat2,q14
2577e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2578e1051a39Sopenharmony_ci	aese	$dat3,q14
2579e1051a39Sopenharmony_ci	aesmc	$dat3,$dat3
2580e1051a39Sopenharmony_ci	aese	$dat4,q14
2581e1051a39Sopenharmony_ci	aesmc	$dat4,$dat4
2582e1051a39Sopenharmony_ci
2583e1051a39Sopenharmony_ci	veor	$tmp0,$rndlast,$iv0
2584e1051a39Sopenharmony_ci	aese	$dat0,q15
2585e1051a39Sopenharmony_ci	// The iv for first block of one iteration
2586e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
2587e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
2588e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr#31
2589e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl#1
2590e1051a39Sopenharmony_ci	fmov	$ivd00,$ivl
2591e1051a39Sopenharmony_ci	fmov	$ivd01,$ivh
2592e1051a39Sopenharmony_ci	veor	$tmp1,$rndlast,$iv1
2593e1051a39Sopenharmony_ci	vld1.8	{$in0},[$inp],#16
2594e1051a39Sopenharmony_ci	aese	$dat1,q15
2595e1051a39Sopenharmony_ci	// The iv for second block
2596e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
2597e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
2598e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr#31
2599e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl#1
2600e1051a39Sopenharmony_ci	fmov	$ivd10,$ivl
2601e1051a39Sopenharmony_ci	fmov	$ivd11,$ivh
2602e1051a39Sopenharmony_ci	veor	$tmp2,$rndlast,$iv2
2603e1051a39Sopenharmony_ci	vld1.8	{$in1},[$inp],#16
2604e1051a39Sopenharmony_ci	aese	$dat2,q15
2605e1051a39Sopenharmony_ci	// The iv for third block
2606e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
2607e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
2608e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr#31
2609e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl#1
2610e1051a39Sopenharmony_ci	fmov	$ivd20,$ivl
2611e1051a39Sopenharmony_ci	fmov	$ivd21,$ivh
2612e1051a39Sopenharmony_ci	veor	$tmp3,$rndlast,$iv3
2613e1051a39Sopenharmony_ci	vld1.8	{$in2},[$inp],#16
2614e1051a39Sopenharmony_ci	aese	$dat3,q15
2615e1051a39Sopenharmony_ci	// The iv for fourth block
2616e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
2617e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
2618e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr#31
2619e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl#1
2620e1051a39Sopenharmony_ci	fmov	$ivd30,$ivl
2621e1051a39Sopenharmony_ci	fmov	$ivd31,$ivh
2622e1051a39Sopenharmony_ci	veor	$tmp4,$rndlast,$iv4
2623e1051a39Sopenharmony_ci	vld1.8	{$in3},[$inp],#16
2624e1051a39Sopenharmony_ci	aese	$dat4,q15
2625e1051a39Sopenharmony_ci
2626e1051a39Sopenharmony_ci	// The iv for fifth block
2627e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
2628e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
2629e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr #31
2630e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl #1
2631e1051a39Sopenharmony_ci	fmov	$ivd40,$ivl
2632e1051a39Sopenharmony_ci	fmov	$ivd41,$ivh
2633e1051a39Sopenharmony_ci
2634e1051a39Sopenharmony_ci	vld1.8	{$in4},[$inp],#16
2635e1051a39Sopenharmony_ci	cbz	$xoffset,.Lxts_enc_tail4x
2636e1051a39Sopenharmony_ci	vld1.32 {q8},[$key_],#16		// re-pre-load rndkey[0]
2637e1051a39Sopenharmony_ci	veor	$tmp0,$tmp0,$dat0
2638e1051a39Sopenharmony_ci	veor	$dat0,$in0,$iv0
2639e1051a39Sopenharmony_ci	veor	$tmp1,$tmp1,$dat1
2640e1051a39Sopenharmony_ci	veor	$dat1,$in1,$iv1
2641e1051a39Sopenharmony_ci	veor	$tmp2,$tmp2,$dat2
2642e1051a39Sopenharmony_ci	veor	$dat2,$in2,$iv2
2643e1051a39Sopenharmony_ci	veor	$tmp3,$tmp3,$dat3
2644e1051a39Sopenharmony_ci	veor	$dat3,$in3,$iv3
2645e1051a39Sopenharmony_ci	veor	$tmp4,$tmp4,$dat4
2646e1051a39Sopenharmony_ci	vst1.8	{$tmp0},[$out],#16
2647e1051a39Sopenharmony_ci	veor	$dat4,$in4,$iv4
2648e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
2649e1051a39Sopenharmony_ci	mov	$rounds,$rounds0
2650e1051a39Sopenharmony_ci	vst1.8	{$tmp2},[$out],#16
2651e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16		// re-pre-load rndkey[1]
2652e1051a39Sopenharmony_ci	vst1.8	{$tmp3},[$out],#16
2653e1051a39Sopenharmony_ci	vst1.8	{$tmp4},[$out],#16
2654e1051a39Sopenharmony_ci	b.hs	.Loop5x_xts_enc
2655e1051a39Sopenharmony_ci
2656e1051a39Sopenharmony_ci
2657e1051a39Sopenharmony_ci	// If left 4 blocks, borrow the five block's processing.
2658e1051a39Sopenharmony_ci	cmn	$len,#0x10
2659e1051a39Sopenharmony_ci	b.ne	.Loop5x_enc_after
2660e1051a39Sopenharmony_ci	vorr	$iv4,$iv3,$iv3
2661e1051a39Sopenharmony_ci	vorr	$iv3,$iv2,$iv2
2662e1051a39Sopenharmony_ci	vorr	$iv2,$iv1,$iv1
2663e1051a39Sopenharmony_ci	vorr	$iv1,$iv0,$iv0
2664e1051a39Sopenharmony_ci	fmov	$ivl,$ivd40
2665e1051a39Sopenharmony_ci	fmov	$ivh,$ivd41
2666e1051a39Sopenharmony_ci	veor	$dat0,$iv0,$in0
2667e1051a39Sopenharmony_ci	veor	$dat1,$iv1,$in1
2668e1051a39Sopenharmony_ci	veor	$dat2,$in2,$iv2
2669e1051a39Sopenharmony_ci	veor	$dat3,$in3,$iv3
2670e1051a39Sopenharmony_ci	veor	$dat4,$in4,$iv4
2671e1051a39Sopenharmony_ci	b.eq	.Loop5x_xts_enc
2672e1051a39Sopenharmony_ci
2673e1051a39Sopenharmony_ci.Loop5x_enc_after:
2674e1051a39Sopenharmony_ci	add	$len,$len,#0x50
2675e1051a39Sopenharmony_ci	cbz	$len,.Lxts_enc_done
2676e1051a39Sopenharmony_ci
2677e1051a39Sopenharmony_ci	add	$rounds,$rounds0,#2
2678e1051a39Sopenharmony_ci	subs	$len,$len,#0x30
2679e1051a39Sopenharmony_ci	b.lo	.Lxts_inner_enc_tail
2680e1051a39Sopenharmony_ci
2681e1051a39Sopenharmony_ci	veor	$dat0,$iv0,$in2
2682e1051a39Sopenharmony_ci	veor	$dat1,$iv1,$in3
2683e1051a39Sopenharmony_ci	veor	$dat2,$in4,$iv2
2684e1051a39Sopenharmony_ci	b	.Lxts_outer_enc_tail
2685e1051a39Sopenharmony_ci
2686e1051a39Sopenharmony_ci.align	4
2687e1051a39Sopenharmony_ci.Lxts_enc_tail4x:
2688e1051a39Sopenharmony_ci	add	$inp,$inp,#16
2689e1051a39Sopenharmony_ci	veor	$tmp1,$dat1,$tmp1
2690e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
2691e1051a39Sopenharmony_ci	veor	$tmp2,$dat2,$tmp2
2692e1051a39Sopenharmony_ci	vst1.8	{$tmp2},[$out],#16
2693e1051a39Sopenharmony_ci	veor	$tmp3,$dat3,$tmp3
2694e1051a39Sopenharmony_ci	veor	$tmp4,$dat4,$tmp4
2695e1051a39Sopenharmony_ci	vst1.8	{$tmp3-$tmp4},[$out],#32
2696e1051a39Sopenharmony_ci
2697e1051a39Sopenharmony_ci	b	.Lxts_enc_done
2698e1051a39Sopenharmony_ci.align	4
2699e1051a39Sopenharmony_ci.Lxts_outer_enc_tail:
2700e1051a39Sopenharmony_ci	aese	$dat0,q8
2701e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2702e1051a39Sopenharmony_ci	aese	$dat1,q8
2703e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2704e1051a39Sopenharmony_ci	aese	$dat2,q8
2705e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2706e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16
2707e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2
2708e1051a39Sopenharmony_ci	aese	$dat0,q9
2709e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2710e1051a39Sopenharmony_ci	aese	$dat1,q9
2711e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2712e1051a39Sopenharmony_ci	aese	$dat2,q9
2713e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2714e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16
2715e1051a39Sopenharmony_ci	b.gt	.Lxts_outer_enc_tail
2716e1051a39Sopenharmony_ci
2717e1051a39Sopenharmony_ci	aese	$dat0,q8
2718e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2719e1051a39Sopenharmony_ci	aese	$dat1,q8
2720e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2721e1051a39Sopenharmony_ci	aese	$dat2,q8
2722e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2723e1051a39Sopenharmony_ci	veor	$tmp0,$iv0,$rndlast
2724e1051a39Sopenharmony_ci	subs	$len,$len,#0x30
2725e1051a39Sopenharmony_ci	// The iv for first block
2726e1051a39Sopenharmony_ci	fmov	$ivl,$ivd20
2727e1051a39Sopenharmony_ci	fmov	$ivh,$ivd21
2728e1051a39Sopenharmony_ci	//mov	$constnum,#0x87
2729e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
2730e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
2731e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr#31
2732e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl#1
2733e1051a39Sopenharmony_ci	fmov	$ivd00,$ivl
2734e1051a39Sopenharmony_ci	fmov	$ivd01,$ivh
2735e1051a39Sopenharmony_ci	veor	$tmp1,$iv1,$rndlast
2736e1051a39Sopenharmony_ci	csel	$xoffset,$len,$xoffset,lo       // x6, w6, is zero at this point
2737e1051a39Sopenharmony_ci	aese	$dat0,q9
2738e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2739e1051a39Sopenharmony_ci	aese	$dat1,q9
2740e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2741e1051a39Sopenharmony_ci	aese	$dat2,q9
2742e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2743e1051a39Sopenharmony_ci	veor	$tmp2,$iv2,$rndlast
2744e1051a39Sopenharmony_ci
2745e1051a39Sopenharmony_ci	add	$xoffset,$xoffset,#0x20
2746e1051a39Sopenharmony_ci	add	$inp,$inp,$xoffset
2747e1051a39Sopenharmony_ci	mov	$key_,$key1
2748e1051a39Sopenharmony_ci
2749e1051a39Sopenharmony_ci	aese	$dat0,q12
2750e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2751e1051a39Sopenharmony_ci	aese	$dat1,q12
2752e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2753e1051a39Sopenharmony_ci	aese	$dat2,q12
2754e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2755e1051a39Sopenharmony_ci	aese	$dat0,q13
2756e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2757e1051a39Sopenharmony_ci	aese	$dat1,q13
2758e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2759e1051a39Sopenharmony_ci	aese	$dat2,q13
2760e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2761e1051a39Sopenharmony_ci	aese	$dat0,q14
2762e1051a39Sopenharmony_ci	aesmc	$dat0,$dat0
2763e1051a39Sopenharmony_ci	aese	$dat1,q14
2764e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2765e1051a39Sopenharmony_ci	aese	$dat2,q14
2766e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2767e1051a39Sopenharmony_ci	aese	$dat0,q15
2768e1051a39Sopenharmony_ci	aese	$dat1,q15
2769e1051a39Sopenharmony_ci	aese	$dat2,q15
2770e1051a39Sopenharmony_ci	vld1.8	{$in2},[$inp],#16
2771e1051a39Sopenharmony_ci	add	$rounds,$rounds0,#2
2772e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16                // re-pre-load rndkey[0]
2773e1051a39Sopenharmony_ci	veor	$tmp0,$tmp0,$dat0
2774e1051a39Sopenharmony_ci	veor	$tmp1,$tmp1,$dat1
2775e1051a39Sopenharmony_ci	veor	$dat2,$dat2,$tmp2
2776e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16                // re-pre-load rndkey[1]
2777e1051a39Sopenharmony_ci	vst1.8	{$tmp0},[$out],#16
2778e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
2779e1051a39Sopenharmony_ci	vst1.8	{$dat2},[$out],#16
2780e1051a39Sopenharmony_ci	cmn	$len,#0x30
2781e1051a39Sopenharmony_ci	b.eq	.Lxts_enc_done
2782e1051a39Sopenharmony_ci.Lxts_encxor_one:
2783e1051a39Sopenharmony_ci	vorr	$in3,$in1,$in1
2784e1051a39Sopenharmony_ci	vorr	$in4,$in2,$in2
2785e1051a39Sopenharmony_ci	nop
2786e1051a39Sopenharmony_ci
2787e1051a39Sopenharmony_ci.Lxts_inner_enc_tail:
2788e1051a39Sopenharmony_ci	cmn	$len,#0x10
2789e1051a39Sopenharmony_ci	veor	$dat1,$in3,$iv0
2790e1051a39Sopenharmony_ci	veor	$dat2,$in4,$iv1
2791e1051a39Sopenharmony_ci	b.eq	.Lxts_enc_tail_loop
2792e1051a39Sopenharmony_ci	veor	$dat2,$in4,$iv0
2793e1051a39Sopenharmony_ci.Lxts_enc_tail_loop:
2794e1051a39Sopenharmony_ci	aese	$dat1,q8
2795e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2796e1051a39Sopenharmony_ci	aese	$dat2,q8
2797e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2798e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16
2799e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2
2800e1051a39Sopenharmony_ci	aese	$dat1,q9
2801e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2802e1051a39Sopenharmony_ci	aese	$dat2,q9
2803e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2804e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16
2805e1051a39Sopenharmony_ci	b.gt	.Lxts_enc_tail_loop
2806e1051a39Sopenharmony_ci
2807e1051a39Sopenharmony_ci	aese	$dat1,q8
2808e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2809e1051a39Sopenharmony_ci	aese	$dat2,q8
2810e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2811e1051a39Sopenharmony_ci	aese	$dat1,q9
2812e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2813e1051a39Sopenharmony_ci	aese	$dat2,q9
2814e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2815e1051a39Sopenharmony_ci	aese	$dat1,q12
2816e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2817e1051a39Sopenharmony_ci	aese	$dat2,q12
2818e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2819e1051a39Sopenharmony_ci	cmn	$len,#0x20
2820e1051a39Sopenharmony_ci	aese	$dat1,q13
2821e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2822e1051a39Sopenharmony_ci	aese	$dat2,q13
2823e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2824e1051a39Sopenharmony_ci	veor	$tmp1,$iv0,$rndlast
2825e1051a39Sopenharmony_ci	aese	$dat1,q14
2826e1051a39Sopenharmony_ci	aesmc	$dat1,$dat1
2827e1051a39Sopenharmony_ci	aese	$dat2,q14
2828e1051a39Sopenharmony_ci	aesmc	$dat2,$dat2
2829e1051a39Sopenharmony_ci	veor	$tmp2,$iv1,$rndlast
2830e1051a39Sopenharmony_ci	aese	$dat1,q15
2831e1051a39Sopenharmony_ci	aese	$dat2,q15
2832e1051a39Sopenharmony_ci	b.eq	.Lxts_enc_one
2833e1051a39Sopenharmony_ci	veor	$tmp1,$tmp1,$dat1
2834e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
2835e1051a39Sopenharmony_ci	veor	$tmp2,$tmp2,$dat2
2836e1051a39Sopenharmony_ci	vorr	$iv0,$iv1,$iv1
2837e1051a39Sopenharmony_ci	vst1.8	{$tmp2},[$out],#16
2838e1051a39Sopenharmony_ci	fmov	$ivl,$ivd10
2839e1051a39Sopenharmony_ci	fmov	$ivh,$ivd11
2840e1051a39Sopenharmony_ci	mov	$constnum,#0x87
2841e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
2842e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
2843e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr #31
2844e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl #1
2845e1051a39Sopenharmony_ci	fmov	$ivd00,$ivl
2846e1051a39Sopenharmony_ci	fmov	$ivd01,$ivh
2847e1051a39Sopenharmony_ci	b	.Lxts_enc_done
2848e1051a39Sopenharmony_ci
2849e1051a39Sopenharmony_ci.Lxts_enc_one:
2850e1051a39Sopenharmony_ci	veor	$tmp1,$tmp1,$dat2
2851e1051a39Sopenharmony_ci	vorr	$iv0,$iv0,$iv0
2852e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
2853e1051a39Sopenharmony_ci	fmov	$ivl,$ivd00
2854e1051a39Sopenharmony_ci	fmov	$ivh,$ivd01
2855e1051a39Sopenharmony_ci	mov	$constnum,#0x87
2856e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
2857e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
2858e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr #31
2859e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl #1
2860e1051a39Sopenharmony_ci	fmov	$ivd00,$ivl
2861e1051a39Sopenharmony_ci	fmov	$ivd01,$ivh
2862e1051a39Sopenharmony_ci	b	.Lxts_enc_done
2863e1051a39Sopenharmony_ci.align	5
2864e1051a39Sopenharmony_ci.Lxts_enc_done:
2865e1051a39Sopenharmony_ci	// Process the tail block with cipher stealing.
2866e1051a39Sopenharmony_ci	tst	$tailcnt,#0xf
2867e1051a39Sopenharmony_ci	b.eq	.Lxts_abort
2868e1051a39Sopenharmony_ci
2869e1051a39Sopenharmony_ci	mov	$tmpinp,$inp
2870e1051a39Sopenharmony_ci	mov	$tmpoutp,$out
2871e1051a39Sopenharmony_ci	sub	$out,$out,#16
2872e1051a39Sopenharmony_ci.composite_enc_loop:
2873e1051a39Sopenharmony_ci	subs	$tailcnt,$tailcnt,#1
2874e1051a39Sopenharmony_ci	ldrb	$l2outp,[$out,$tailcnt]
2875e1051a39Sopenharmony_ci	ldrb	$loutp,[$tmpinp,$tailcnt]
2876e1051a39Sopenharmony_ci	strb	$l2outp,[$tmpoutp,$tailcnt]
2877e1051a39Sopenharmony_ci	strb	$loutp,[$out,$tailcnt]
2878e1051a39Sopenharmony_ci	b.gt	.composite_enc_loop
2879e1051a39Sopenharmony_ci.Lxts_enc_load_done:
2880e1051a39Sopenharmony_ci	vld1.8	{$tmpin},[$out]
2881e1051a39Sopenharmony_ci	veor	$tmpin,$tmpin,$iv0
2882e1051a39Sopenharmony_ci
2883e1051a39Sopenharmony_ci	// Encrypt the composite block to get the last second encrypted text block
2884e1051a39Sopenharmony_ci	ldr	$rounds,[$key1,#240]		// load key schedule...
2885e1051a39Sopenharmony_ci	vld1.32	{$dat},[$key1],#16
2886e1051a39Sopenharmony_ci	sub	$rounds,$rounds,#2
2887e1051a39Sopenharmony_ci	vld1.32	{$dat1},[$key1],#16		// load key schedule...
2888e1051a39Sopenharmony_ci.Loop_final_enc:
2889e1051a39Sopenharmony_ci	aese	$tmpin,$dat0
2890e1051a39Sopenharmony_ci	aesmc	$tmpin,$tmpin
2891e1051a39Sopenharmony_ci	vld1.32	{$dat0},[$key1],#16
2892e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2
2893e1051a39Sopenharmony_ci	aese	$tmpin,$dat1
2894e1051a39Sopenharmony_ci	aesmc	$tmpin,$tmpin
2895e1051a39Sopenharmony_ci	vld1.32	{$dat1},[$key1],#16
2896e1051a39Sopenharmony_ci	b.gt	.Loop_final_enc
2897e1051a39Sopenharmony_ci
2898e1051a39Sopenharmony_ci	aese	$tmpin,$dat0
2899e1051a39Sopenharmony_ci	aesmc	$tmpin,$tmpin
2900e1051a39Sopenharmony_ci	vld1.32	{$dat0},[$key1]
2901e1051a39Sopenharmony_ci	aese	$tmpin,$dat1
2902e1051a39Sopenharmony_ci	veor	$tmpin,$tmpin,$dat0
2903e1051a39Sopenharmony_ci	veor	$tmpin,$tmpin,$iv0
2904e1051a39Sopenharmony_ci	vst1.8	{$tmpin},[$out]
2905e1051a39Sopenharmony_ci
2906e1051a39Sopenharmony_ci.Lxts_abort:
2907e1051a39Sopenharmony_ci	ldp	$tailcnt,$midnumx,[sp,#48]
2908e1051a39Sopenharmony_ci	ldp	$ivd10,$ivd20,[sp,#32]
2909e1051a39Sopenharmony_ci	ldp	$ivd30,$ivd40,[sp,#16]
2910e1051a39Sopenharmony_ci	ldp	$constnumx,$tmpinp,[sp],#64
2911e1051a39Sopenharmony_ci.Lxts_enc_final_abort:
2912e1051a39Sopenharmony_ci	ret
2913e1051a39Sopenharmony_ci.size	${prefix}_xts_encrypt,.-${prefix}_xts_encrypt
2914e1051a39Sopenharmony_ci___
2915e1051a39Sopenharmony_ci
2916e1051a39Sopenharmony_ci}}}
2917e1051a39Sopenharmony_ci{{{
2918e1051a39Sopenharmony_cimy ($inp,$out,$len,$key1,$key2,$ivp)=map("x$_",(0..5));
2919e1051a39Sopenharmony_cimy ($rounds0,$rounds,$key_,$step,$ivl,$ivh)=("w5","w6","x7","x8","x9","x10");
2920e1051a39Sopenharmony_cimy ($tmpoutp,$loutp,$l2outp,$tmpinp)=("x13","w14","w15","x20");
2921e1051a39Sopenharmony_cimy ($tailcnt,$midnum,$midnumx,$constnum,$constnumx)=("x21","w22","x22","w19","x19");
2922e1051a39Sopenharmony_cimy ($xoffset,$tmpmx,$tmpmw)=("x6","x11","w11");
2923e1051a39Sopenharmony_cimy ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7));
2924e1051a39Sopenharmony_cimy ($iv0,$iv1,$iv2,$iv3,$iv4,$tmpin)=("v6.16b","v8.16b","v9.16b","v10.16b","v11.16b","v26.16b");
2925e1051a39Sopenharmony_cimy ($ivd00,$ivd01,$ivd20,$ivd21)=("d6","v6.d[1]","d9","v9.d[1]");
2926e1051a39Sopenharmony_cimy ($ivd10,$ivd11,$ivd30,$ivd31,$ivd40,$ivd41)=("d8","v8.d[1]","d10","v10.d[1]","d11","v11.d[1]");
2927e1051a39Sopenharmony_ci
2928e1051a39Sopenharmony_cimy ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
2929e1051a39Sopenharmony_ci
2930e1051a39Sopenharmony_ci# q7	last round key
2931e1051a39Sopenharmony_ci# q10-q15, q7	Last 7 round keys
2932e1051a39Sopenharmony_ci# q8-q9	preloaded round keys except last 7 keys for big size
2933e1051a39Sopenharmony_ci# q20, q21, q8-q9	preloaded round keys except last 7 keys for only 16 byte
2934e1051a39Sopenharmony_ci
2935e1051a39Sopenharmony_ci{
2936e1051a39Sopenharmony_cimy ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
2937e1051a39Sopenharmony_ci
2938e1051a39Sopenharmony_cimy ($dat3,$in3,$tmp3);	# used only in 64-bit mode
2939e1051a39Sopenharmony_cimy ($dat4,$in4,$tmp4);
2940e1051a39Sopenharmony_ciif ($flavour =~ /64/) {
2941e1051a39Sopenharmony_ci    ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23));
2942e1051a39Sopenharmony_ci}
2943e1051a39Sopenharmony_ci
2944e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
2945e1051a39Sopenharmony_ci.globl	${prefix}_xts_decrypt
2946e1051a39Sopenharmony_ci.type	${prefix}_xts_decrypt,%function
2947e1051a39Sopenharmony_ci.align	5
2948e1051a39Sopenharmony_ci${prefix}_xts_decrypt:
2949e1051a39Sopenharmony_ci___
2950e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
2951e1051a39Sopenharmony_ci	cmp	$len,#16
2952e1051a39Sopenharmony_ci	// Original input data size bigger than 16, jump to big size processing.
2953e1051a39Sopenharmony_ci	b.ne	.Lxts_dec_big_size
2954e1051a39Sopenharmony_ci	// Encrypt the iv with key2, as the first XEX iv.
2955e1051a39Sopenharmony_ci	ldr	$rounds,[$key2,#240]
2956e1051a39Sopenharmony_ci	vld1.32	{$dat},[$key2],#16
2957e1051a39Sopenharmony_ci	vld1.8	{$iv0},[$ivp]
2958e1051a39Sopenharmony_ci	sub	$rounds,$rounds,#2
2959e1051a39Sopenharmony_ci	vld1.32	{$dat1},[$key2],#16
2960e1051a39Sopenharmony_ci
2961e1051a39Sopenharmony_ci.Loop_dec_small_iv_enc:
2962e1051a39Sopenharmony_ci	aese	$iv0,$dat
2963e1051a39Sopenharmony_ci	aesmc	$iv0,$iv0
2964e1051a39Sopenharmony_ci	vld1.32	{$dat},[$key2],#16
2965e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2
2966e1051a39Sopenharmony_ci	aese	$iv0,$dat1
2967e1051a39Sopenharmony_ci	aesmc	$iv0,$iv0
2968e1051a39Sopenharmony_ci	vld1.32	{$dat1},[$key2],#16
2969e1051a39Sopenharmony_ci	b.gt	.Loop_dec_small_iv_enc
2970e1051a39Sopenharmony_ci
2971e1051a39Sopenharmony_ci	aese	$iv0,$dat
2972e1051a39Sopenharmony_ci	aesmc	$iv0,$iv0
2973e1051a39Sopenharmony_ci	vld1.32	{$dat},[$key2]
2974e1051a39Sopenharmony_ci	aese	$iv0,$dat1
2975e1051a39Sopenharmony_ci	veor	$iv0,$iv0,$dat
2976e1051a39Sopenharmony_ci
2977e1051a39Sopenharmony_ci	vld1.8	{$dat0},[$inp]
2978e1051a39Sopenharmony_ci	veor	$dat0,$iv0,$dat0
2979e1051a39Sopenharmony_ci
2980e1051a39Sopenharmony_ci	ldr	$rounds,[$key1,#240]
2981e1051a39Sopenharmony_ci	vld1.32	{q20-q21},[$key1],#32			// load key schedule...
2982e1051a39Sopenharmony_ci
2983e1051a39Sopenharmony_ci	aesd	$dat0,q20
2984e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
2985e1051a39Sopenharmony_ci	vld1.32	{q8-q9},[$key1],#32			// load key schedule...
2986e1051a39Sopenharmony_ci	aesd	$dat0,q21
2987e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
2988e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#10			// bias
2989e1051a39Sopenharmony_ci	b.eq	.Lxts_128_dec
2990e1051a39Sopenharmony_ci.Lxts_dec_round_loop:
2991e1051a39Sopenharmony_ci	aesd	$dat0,q8
2992e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
2993e1051a39Sopenharmony_ci	vld1.32	{q8},[$key1],#16			// load key schedule...
2994e1051a39Sopenharmony_ci	aesd	$dat0,q9
2995e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
2996e1051a39Sopenharmony_ci	vld1.32	{q9},[$key1],#16			// load key schedule...
2997e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2			// bias
2998e1051a39Sopenharmony_ci	b.gt	.Lxts_dec_round_loop
2999e1051a39Sopenharmony_ci.Lxts_128_dec:
3000e1051a39Sopenharmony_ci	vld1.32	{q10-q11},[$key1],#32			// load key schedule...
3001e1051a39Sopenharmony_ci	aesd	$dat0,q8
3002e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3003e1051a39Sopenharmony_ci	aesd	$dat0,q9
3004e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3005e1051a39Sopenharmony_ci	vld1.32	{q12-q13},[$key1],#32			// load key schedule...
3006e1051a39Sopenharmony_ci	aesd	$dat0,q10
3007e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3008e1051a39Sopenharmony_ci	aesd	$dat0,q11
3009e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3010e1051a39Sopenharmony_ci	vld1.32	{q14-q15},[$key1],#32			// load key schedule...
3011e1051a39Sopenharmony_ci	aesd	$dat0,q12
3012e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3013e1051a39Sopenharmony_ci	aesd	$dat0,q13
3014e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3015e1051a39Sopenharmony_ci	vld1.32	{$rndlast},[$key1]
3016e1051a39Sopenharmony_ci	aesd	$dat0,q14
3017e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3018e1051a39Sopenharmony_ci	aesd	$dat0,q15
3019e1051a39Sopenharmony_ci	veor	$dat0,$dat0,$rndlast
3020e1051a39Sopenharmony_ci	veor	$dat0,$iv0,$dat0
3021e1051a39Sopenharmony_ci	vst1.8	{$dat0},[$out]
3022e1051a39Sopenharmony_ci	b	.Lxts_dec_final_abort
3023e1051a39Sopenharmony_ci.Lxts_dec_big_size:
3024e1051a39Sopenharmony_ci___
3025e1051a39Sopenharmony_ci$code.=<<___	if ($flavour =~ /64/);
3026e1051a39Sopenharmony_ci	stp	$constnumx,$tmpinp,[sp,#-64]!
3027e1051a39Sopenharmony_ci	stp	$tailcnt,$midnumx,[sp,#48]
3028e1051a39Sopenharmony_ci	stp	$ivd10,$ivd20,[sp,#32]
3029e1051a39Sopenharmony_ci	stp	$ivd30,$ivd40,[sp,#16]
3030e1051a39Sopenharmony_ci
3031e1051a39Sopenharmony_ci	and	$tailcnt,$len,#0xf
3032e1051a39Sopenharmony_ci	and	$len,$len,#-16
3033e1051a39Sopenharmony_ci	subs	$len,$len,#16
3034e1051a39Sopenharmony_ci	mov	$step,#16
3035e1051a39Sopenharmony_ci	b.lo	.Lxts_dec_abort
3036e1051a39Sopenharmony_ci
3037e1051a39Sopenharmony_ci	// Encrypt the iv with key2, as the first XEX iv
3038e1051a39Sopenharmony_ci	ldr	$rounds,[$key2,#240]
3039e1051a39Sopenharmony_ci	vld1.32	{$dat},[$key2],#16
3040e1051a39Sopenharmony_ci	vld1.8	{$iv0},[$ivp]
3041e1051a39Sopenharmony_ci	sub	$rounds,$rounds,#2
3042e1051a39Sopenharmony_ci	vld1.32	{$dat1},[$key2],#16
3043e1051a39Sopenharmony_ci
3044e1051a39Sopenharmony_ci.Loop_dec_iv_enc:
3045e1051a39Sopenharmony_ci	aese	$iv0,$dat
3046e1051a39Sopenharmony_ci	aesmc	$iv0,$iv0
3047e1051a39Sopenharmony_ci	vld1.32	{$dat},[$key2],#16
3048e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2
3049e1051a39Sopenharmony_ci	aese	$iv0,$dat1
3050e1051a39Sopenharmony_ci	aesmc	$iv0,$iv0
3051e1051a39Sopenharmony_ci	vld1.32	{$dat1},[$key2],#16
3052e1051a39Sopenharmony_ci	b.gt	.Loop_dec_iv_enc
3053e1051a39Sopenharmony_ci
3054e1051a39Sopenharmony_ci	aese	$iv0,$dat
3055e1051a39Sopenharmony_ci	aesmc	$iv0,$iv0
3056e1051a39Sopenharmony_ci	vld1.32	{$dat},[$key2]
3057e1051a39Sopenharmony_ci	aese	$iv0,$dat1
3058e1051a39Sopenharmony_ci	veor	$iv0,$iv0,$dat
3059e1051a39Sopenharmony_ci
3060e1051a39Sopenharmony_ci	// The iv for second block
3061e1051a39Sopenharmony_ci	// $ivl- iv(low), $ivh - iv(high)
3062e1051a39Sopenharmony_ci	// the five ivs stored into, $iv0,$iv1,$iv2,$iv3,$iv4
3063e1051a39Sopenharmony_ci	fmov	$ivl,$ivd00
3064e1051a39Sopenharmony_ci	fmov	$ivh,$ivd01
3065e1051a39Sopenharmony_ci	mov	$constnum,#0x87
3066e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
3067e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
3068e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr #31
3069e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl #1
3070e1051a39Sopenharmony_ci	fmov	$ivd10,$ivl
3071e1051a39Sopenharmony_ci	fmov	$ivd11,$ivh
3072e1051a39Sopenharmony_ci
3073e1051a39Sopenharmony_ci	ldr	$rounds0,[$key1,#240]		// load rounds number
3074e1051a39Sopenharmony_ci
3075e1051a39Sopenharmony_ci	// The iv for third block
3076e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
3077e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
3078e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr #31
3079e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl #1
3080e1051a39Sopenharmony_ci	fmov	$ivd20,$ivl
3081e1051a39Sopenharmony_ci	fmov	$ivd21,$ivh
3082e1051a39Sopenharmony_ci
3083e1051a39Sopenharmony_ci	vld1.32	{q8-q9},[$key1]			// load key schedule...
3084e1051a39Sopenharmony_ci	sub	$rounds0,$rounds0,#6
3085e1051a39Sopenharmony_ci	add	$key_,$key1,$ivp,lsl#4		// pointer to last 7 round keys
3086e1051a39Sopenharmony_ci	sub	$rounds0,$rounds0,#2
3087e1051a39Sopenharmony_ci	vld1.32	{q10-q11},[$key_],#32		// load key schedule...
3088e1051a39Sopenharmony_ci	vld1.32	{q12-q13},[$key_],#32
3089e1051a39Sopenharmony_ci	vld1.32	{q14-q15},[$key_],#32
3090e1051a39Sopenharmony_ci	vld1.32	{$rndlast},[$key_]
3091e1051a39Sopenharmony_ci
3092e1051a39Sopenharmony_ci	// The iv for fourth block
3093e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
3094e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
3095e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr #31
3096e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl #1
3097e1051a39Sopenharmony_ci	fmov	$ivd30,$ivl
3098e1051a39Sopenharmony_ci	fmov	$ivd31,$ivh
3099e1051a39Sopenharmony_ci
3100e1051a39Sopenharmony_ci	add	$key_,$key1,#32
3101e1051a39Sopenharmony_ci	mov	$rounds,$rounds0
3102e1051a39Sopenharmony_ci	b	.Lxts_dec
3103e1051a39Sopenharmony_ci
3104e1051a39Sopenharmony_ci	// Decryption
3105e1051a39Sopenharmony_ci.align	5
3106e1051a39Sopenharmony_ci.Lxts_dec:
3107e1051a39Sopenharmony_ci	tst	$tailcnt,#0xf
3108e1051a39Sopenharmony_ci	b.eq	.Lxts_dec_begin
3109e1051a39Sopenharmony_ci	subs	$len,$len,#16
3110e1051a39Sopenharmony_ci	csel	$step,xzr,$step,eq
3111e1051a39Sopenharmony_ci	vld1.8	{$dat},[$inp],#16
3112e1051a39Sopenharmony_ci	b.lo	.Lxts_done
3113e1051a39Sopenharmony_ci	sub	$inp,$inp,#16
3114e1051a39Sopenharmony_ci.Lxts_dec_begin:
3115e1051a39Sopenharmony_ci	vld1.8	{$dat},[$inp],$step
3116e1051a39Sopenharmony_ci	subs	$len,$len,#32			// bias
3117e1051a39Sopenharmony_ci	add	$rounds,$rounds0,#2
3118e1051a39Sopenharmony_ci	vorr	$in1,$dat,$dat
3119e1051a39Sopenharmony_ci	vorr	$dat1,$dat,$dat
3120e1051a39Sopenharmony_ci	vorr	$in3,$dat,$dat
3121e1051a39Sopenharmony_ci	vld1.8	{$dat2},[$inp],#16
3122e1051a39Sopenharmony_ci	vorr	$in2,$dat2,$dat2
3123e1051a39Sopenharmony_ci	vorr	$in4,$dat2,$dat2
3124e1051a39Sopenharmony_ci	b.lo	.Lxts_inner_dec_tail
3125e1051a39Sopenharmony_ci	veor	$dat,$dat,$iv0			// before decryt, xor with iv
3126e1051a39Sopenharmony_ci	veor	$dat2,$dat2,$iv1
3127e1051a39Sopenharmony_ci
3128e1051a39Sopenharmony_ci	vorr	$dat1,$dat2,$dat2
3129e1051a39Sopenharmony_ci	vld1.8	{$dat2},[$inp],#16
3130e1051a39Sopenharmony_ci	vorr	$in0,$dat,$dat
3131e1051a39Sopenharmony_ci	vorr	$in1,$dat1,$dat1
3132e1051a39Sopenharmony_ci	veor	$in2,$dat2,$iv2			// third block xox with third iv
3133e1051a39Sopenharmony_ci	veor	$dat2,$dat2,$iv2
3134e1051a39Sopenharmony_ci	cmp	$len,#32
3135e1051a39Sopenharmony_ci	b.lo	.Lxts_outer_dec_tail
3136e1051a39Sopenharmony_ci
3137e1051a39Sopenharmony_ci	vld1.8	{$dat3},[$inp],#16
3138e1051a39Sopenharmony_ci
3139e1051a39Sopenharmony_ci	// The iv for fifth block
3140e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
3141e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
3142e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr #31
3143e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl #1
3144e1051a39Sopenharmony_ci	fmov	$ivd40,$ivl
3145e1051a39Sopenharmony_ci	fmov	$ivd41,$ivh
3146e1051a39Sopenharmony_ci
3147e1051a39Sopenharmony_ci	vld1.8	{$dat4},[$inp],#16
3148e1051a39Sopenharmony_ci	veor	$dat3,$dat3,$iv3		// the fourth block
3149e1051a39Sopenharmony_ci	veor	$dat4,$dat4,$iv4
3150e1051a39Sopenharmony_ci	sub $len,$len,#32			// bias
3151e1051a39Sopenharmony_ci	mov	$rounds,$rounds0
3152e1051a39Sopenharmony_ci	b	.Loop5x_xts_dec
3153e1051a39Sopenharmony_ci
3154e1051a39Sopenharmony_ci.align	4
3155e1051a39Sopenharmony_ci.Loop5x_xts_dec:
3156e1051a39Sopenharmony_ci	aesd	$dat0,q8
3157e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3158e1051a39Sopenharmony_ci	aesd	$dat1,q8
3159e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3160e1051a39Sopenharmony_ci	aesd	$dat2,q8
3161e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3162e1051a39Sopenharmony_ci	aesd	$dat3,q8
3163e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
3164e1051a39Sopenharmony_ci	aesd	$dat4,q8
3165e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
3166e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16		// load key schedule...
3167e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2
3168e1051a39Sopenharmony_ci	aesd	$dat0,q9
3169e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3170e1051a39Sopenharmony_ci	aesd	$dat1,q9
3171e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3172e1051a39Sopenharmony_ci	aesd	$dat2,q9
3173e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3174e1051a39Sopenharmony_ci	aesd	$dat3,q9
3175e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
3176e1051a39Sopenharmony_ci	aesd	$dat4,q9
3177e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
3178e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16		// load key schedule...
3179e1051a39Sopenharmony_ci	b.gt	.Loop5x_xts_dec
3180e1051a39Sopenharmony_ci
3181e1051a39Sopenharmony_ci	aesd	$dat0,q8
3182e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3183e1051a39Sopenharmony_ci	aesd	$dat1,q8
3184e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3185e1051a39Sopenharmony_ci	aesd	$dat2,q8
3186e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3187e1051a39Sopenharmony_ci	aesd	$dat3,q8
3188e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
3189e1051a39Sopenharmony_ci	aesd	$dat4,q8
3190e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
3191e1051a39Sopenharmony_ci	subs	$len,$len,#0x50			// because .Lxts_dec_tail4x
3192e1051a39Sopenharmony_ci
3193e1051a39Sopenharmony_ci	aesd	$dat0,q9
3194e1051a39Sopenharmony_ci	aesimc	$dat0,$dat
3195e1051a39Sopenharmony_ci	aesd	$dat1,q9
3196e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3197e1051a39Sopenharmony_ci	aesd	$dat2,q9
3198e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3199e1051a39Sopenharmony_ci	aesd	$dat3,q9
3200e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
3201e1051a39Sopenharmony_ci	aesd	$dat4,q9
3202e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
3203e1051a39Sopenharmony_ci	csel	$xoffset,xzr,$len,gt		// borrow x6, w6, "gt" is not typo
3204e1051a39Sopenharmony_ci	mov	$key_,$key1
3205e1051a39Sopenharmony_ci
3206e1051a39Sopenharmony_ci	aesd	$dat0,q10
3207e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3208e1051a39Sopenharmony_ci	aesd	$dat1,q10
3209e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3210e1051a39Sopenharmony_ci	aesd	$dat2,q10
3211e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3212e1051a39Sopenharmony_ci	aesd	$dat3,q10
3213e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
3214e1051a39Sopenharmony_ci	aesd	$dat4,q10
3215e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
3216e1051a39Sopenharmony_ci	add	$inp,$inp,$xoffset		// x0 is adjusted in such way that
3217e1051a39Sopenharmony_ci						// at exit from the loop v1.16b-v26.16b
3218e1051a39Sopenharmony_ci						// are loaded with last "words"
3219e1051a39Sopenharmony_ci	add	$xoffset,$len,#0x60		// because .Lxts_dec_tail4x
3220e1051a39Sopenharmony_ci
3221e1051a39Sopenharmony_ci	aesd	$dat0,q11
3222e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3223e1051a39Sopenharmony_ci	aesd	$dat1,q11
3224e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3225e1051a39Sopenharmony_ci	aesd	$dat2,q11
3226e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3227e1051a39Sopenharmony_ci	aesd	$dat3,q11
3228e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
3229e1051a39Sopenharmony_ci	aesd	$dat4,q11
3230e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
3231e1051a39Sopenharmony_ci
3232e1051a39Sopenharmony_ci	aesd	$dat0,q12
3233e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3234e1051a39Sopenharmony_ci	aesd	$dat1,q12
3235e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3236e1051a39Sopenharmony_ci	aesd	$dat2,q12
3237e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3238e1051a39Sopenharmony_ci	aesd	$dat3,q12
3239e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
3240e1051a39Sopenharmony_ci	aesd	$dat4,q12
3241e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
3242e1051a39Sopenharmony_ci
3243e1051a39Sopenharmony_ci	aesd	$dat0,q13
3244e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3245e1051a39Sopenharmony_ci	aesd	$dat1,q13
3246e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3247e1051a39Sopenharmony_ci	aesd	$dat2,q13
3248e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3249e1051a39Sopenharmony_ci	aesd	$dat3,q13
3250e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
3251e1051a39Sopenharmony_ci	aesd	$dat4,q13
3252e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
3253e1051a39Sopenharmony_ci
3254e1051a39Sopenharmony_ci	aesd	$dat0,q14
3255e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3256e1051a39Sopenharmony_ci	aesd	$dat1,q14
3257e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3258e1051a39Sopenharmony_ci	aesd	$dat2,q14
3259e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3260e1051a39Sopenharmony_ci	aesd	$dat3,q14
3261e1051a39Sopenharmony_ci	aesimc	$dat3,$dat3
3262e1051a39Sopenharmony_ci	aesd	$dat4,q14
3263e1051a39Sopenharmony_ci	aesimc	$dat4,$dat4
3264e1051a39Sopenharmony_ci
3265e1051a39Sopenharmony_ci	veor	$tmp0,$rndlast,$iv0
3266e1051a39Sopenharmony_ci	aesd	$dat0,q15
3267e1051a39Sopenharmony_ci	// The iv for first block of next iteration.
3268e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
3269e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
3270e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr #31
3271e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl #1
3272e1051a39Sopenharmony_ci	fmov	$ivd00,$ivl
3273e1051a39Sopenharmony_ci	fmov	$ivd01,$ivh
3274e1051a39Sopenharmony_ci	veor	$tmp1,$rndlast,$iv1
3275e1051a39Sopenharmony_ci	vld1.8	{$in0},[$inp],#16
3276e1051a39Sopenharmony_ci	aesd	$dat1,q15
3277e1051a39Sopenharmony_ci	// The iv for second block
3278e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
3279e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
3280e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr #31
3281e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl #1
3282e1051a39Sopenharmony_ci	fmov	$ivd10,$ivl
3283e1051a39Sopenharmony_ci	fmov	$ivd11,$ivh
3284e1051a39Sopenharmony_ci	veor	$tmp2,$rndlast,$iv2
3285e1051a39Sopenharmony_ci	vld1.8	{$in1},[$inp],#16
3286e1051a39Sopenharmony_ci	aesd	$dat2,q15
3287e1051a39Sopenharmony_ci	// The iv for third block
3288e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
3289e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
3290e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr #31
3291e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl #1
3292e1051a39Sopenharmony_ci	fmov	$ivd20,$ivl
3293e1051a39Sopenharmony_ci	fmov	$ivd21,$ivh
3294e1051a39Sopenharmony_ci	veor	$tmp3,$rndlast,$iv3
3295e1051a39Sopenharmony_ci	vld1.8	{$in2},[$inp],#16
3296e1051a39Sopenharmony_ci	aesd	$dat3,q15
3297e1051a39Sopenharmony_ci	// The iv for fourth block
3298e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
3299e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
3300e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr #31
3301e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl #1
3302e1051a39Sopenharmony_ci	fmov	$ivd30,$ivl
3303e1051a39Sopenharmony_ci	fmov	$ivd31,$ivh
3304e1051a39Sopenharmony_ci	veor	$tmp4,$rndlast,$iv4
3305e1051a39Sopenharmony_ci	vld1.8	{$in3},[$inp],#16
3306e1051a39Sopenharmony_ci	aesd	$dat4,q15
3307e1051a39Sopenharmony_ci
3308e1051a39Sopenharmony_ci	// The iv for fifth block
3309e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
3310e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
3311e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr #31
3312e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl #1
3313e1051a39Sopenharmony_ci	fmov	$ivd40,$ivl
3314e1051a39Sopenharmony_ci	fmov	$ivd41,$ivh
3315e1051a39Sopenharmony_ci
3316e1051a39Sopenharmony_ci	vld1.8	{$in4},[$inp],#16
3317e1051a39Sopenharmony_ci	cbz	$xoffset,.Lxts_dec_tail4x
3318e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16		// re-pre-load rndkey[0]
3319e1051a39Sopenharmony_ci	veor	$tmp0,$tmp0,$dat0
3320e1051a39Sopenharmony_ci	veor	$dat0,$in0,$iv0
3321e1051a39Sopenharmony_ci	veor	$tmp1,$tmp1,$dat1
3322e1051a39Sopenharmony_ci	veor	$dat1,$in1,$iv1
3323e1051a39Sopenharmony_ci	veor	$tmp2,$tmp2,$dat2
3324e1051a39Sopenharmony_ci	veor	$dat2,$in2,$iv2
3325e1051a39Sopenharmony_ci	veor	$tmp3,$tmp3,$dat3
3326e1051a39Sopenharmony_ci	veor	$dat3,$in3,$iv3
3327e1051a39Sopenharmony_ci	veor	$tmp4,$tmp4,$dat4
3328e1051a39Sopenharmony_ci	vst1.8	{$tmp0},[$out],#16
3329e1051a39Sopenharmony_ci	veor	$dat4,$in4,$iv4
3330e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
3331e1051a39Sopenharmony_ci	mov	$rounds,$rounds0
3332e1051a39Sopenharmony_ci	vst1.8	{$tmp2},[$out],#16
3333e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16		// re-pre-load rndkey[1]
3334e1051a39Sopenharmony_ci	vst1.8	{$tmp3},[$out],#16
3335e1051a39Sopenharmony_ci	vst1.8	{$tmp4},[$out],#16
3336e1051a39Sopenharmony_ci	b.hs	.Loop5x_xts_dec
3337e1051a39Sopenharmony_ci
3338e1051a39Sopenharmony_ci	cmn	$len,#0x10
3339e1051a39Sopenharmony_ci	b.ne	.Loop5x_dec_after
3340e1051a39Sopenharmony_ci	// If x2($len) equal to -0x10, the left blocks is 4.
3341e1051a39Sopenharmony_ci	// After specially processing, utilize the five blocks processing again.
3342e1051a39Sopenharmony_ci	// It will use the following IVs: $iv0,$iv0,$iv1,$iv2,$iv3.
3343e1051a39Sopenharmony_ci	vorr	$iv4,$iv3,$iv3
3344e1051a39Sopenharmony_ci	vorr	$iv3,$iv2,$iv2
3345e1051a39Sopenharmony_ci	vorr	$iv2,$iv1,$iv1
3346e1051a39Sopenharmony_ci	vorr	$iv1,$iv0,$iv0
3347e1051a39Sopenharmony_ci	fmov	$ivl,$ivd40
3348e1051a39Sopenharmony_ci	fmov	$ivh,$ivd41
3349e1051a39Sopenharmony_ci	veor	$dat0,$iv0,$in0
3350e1051a39Sopenharmony_ci	veor	$dat1,$iv1,$in1
3351e1051a39Sopenharmony_ci	veor	$dat2,$in2,$iv2
3352e1051a39Sopenharmony_ci	veor	$dat3,$in3,$iv3
3353e1051a39Sopenharmony_ci	veor	$dat4,$in4,$iv4
3354e1051a39Sopenharmony_ci	b.eq	.Loop5x_xts_dec
3355e1051a39Sopenharmony_ci
3356e1051a39Sopenharmony_ci.Loop5x_dec_after:
3357e1051a39Sopenharmony_ci	add	$len,$len,#0x50
3358e1051a39Sopenharmony_ci	cbz	$len,.Lxts_done
3359e1051a39Sopenharmony_ci
3360e1051a39Sopenharmony_ci	add	$rounds,$rounds0,#2
3361e1051a39Sopenharmony_ci	subs	$len,$len,#0x30
3362e1051a39Sopenharmony_ci	b.lo	.Lxts_inner_dec_tail
3363e1051a39Sopenharmony_ci
3364e1051a39Sopenharmony_ci	veor	$dat0,$iv0,$in2
3365e1051a39Sopenharmony_ci	veor	$dat1,$iv1,$in3
3366e1051a39Sopenharmony_ci	veor	$dat2,$in4,$iv2
3367e1051a39Sopenharmony_ci	b	.Lxts_outer_dec_tail
3368e1051a39Sopenharmony_ci
3369e1051a39Sopenharmony_ci.align	4
3370e1051a39Sopenharmony_ci.Lxts_dec_tail4x:
3371e1051a39Sopenharmony_ci	add	$inp,$inp,#16
3372e1051a39Sopenharmony_ci	tst	$tailcnt,#0xf
3373e1051a39Sopenharmony_ci	veor	$tmp1,$dat1,$tmp0
3374e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
3375e1051a39Sopenharmony_ci	veor	$tmp2,$dat2,$tmp2
3376e1051a39Sopenharmony_ci	vst1.8	{$tmp2},[$out],#16
3377e1051a39Sopenharmony_ci	veor	$tmp3,$dat3,$tmp3
3378e1051a39Sopenharmony_ci	veor	$tmp4,$dat4,$tmp4
3379e1051a39Sopenharmony_ci	vst1.8	{$tmp3-$tmp4},[$out],#32
3380e1051a39Sopenharmony_ci
3381e1051a39Sopenharmony_ci	b.eq	.Lxts_dec_abort
3382e1051a39Sopenharmony_ci	vld1.8	{$dat0},[$inp],#16
3383e1051a39Sopenharmony_ci	b	.Lxts_done
3384e1051a39Sopenharmony_ci.align	4
3385e1051a39Sopenharmony_ci.Lxts_outer_dec_tail:
3386e1051a39Sopenharmony_ci	aesd	$dat0,q8
3387e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3388e1051a39Sopenharmony_ci	aesd	$dat1,q8
3389e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3390e1051a39Sopenharmony_ci	aesd	$dat2,q8
3391e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3392e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16
3393e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2
3394e1051a39Sopenharmony_ci	aesd	$dat0,q9
3395e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3396e1051a39Sopenharmony_ci	aesd	$dat1,q9
3397e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3398e1051a39Sopenharmony_ci	aesd	$dat2,q9
3399e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3400e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16
3401e1051a39Sopenharmony_ci	b.gt	.Lxts_outer_dec_tail
3402e1051a39Sopenharmony_ci
3403e1051a39Sopenharmony_ci	aesd	$dat0,q8
3404e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3405e1051a39Sopenharmony_ci	aesd	$dat1,q8
3406e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3407e1051a39Sopenharmony_ci	aesd	$dat2,q8
3408e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3409e1051a39Sopenharmony_ci	veor	$tmp0,$iv0,$rndlast
3410e1051a39Sopenharmony_ci	subs	$len,$len,#0x30
3411e1051a39Sopenharmony_ci	// The iv for first block
3412e1051a39Sopenharmony_ci	fmov	$ivl,$ivd20
3413e1051a39Sopenharmony_ci	fmov	$ivh,$ivd21
3414e1051a39Sopenharmony_ci	mov	$constnum,#0x87
3415e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
3416e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
3417e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr #31
3418e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl #1
3419e1051a39Sopenharmony_ci	fmov	$ivd00,$ivl
3420e1051a39Sopenharmony_ci	fmov	$ivd01,$ivh
3421e1051a39Sopenharmony_ci	veor	$tmp1,$iv1,$rndlast
3422e1051a39Sopenharmony_ci	csel	$xoffset,$len,$xoffset,lo	// x6, w6, is zero at this point
3423e1051a39Sopenharmony_ci	aesd	$dat0,q9
3424e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3425e1051a39Sopenharmony_ci	aesd	$dat1,q9
3426e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3427e1051a39Sopenharmony_ci	aesd	$dat2,q9
3428e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3429e1051a39Sopenharmony_ci	veor	$tmp2,$iv2,$rndlast
3430e1051a39Sopenharmony_ci	// The iv for second block
3431e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
3432e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
3433e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr #31
3434e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl #1
3435e1051a39Sopenharmony_ci	fmov	$ivd10,$ivl
3436e1051a39Sopenharmony_ci	fmov	$ivd11,$ivh
3437e1051a39Sopenharmony_ci
3438e1051a39Sopenharmony_ci	add	$xoffset,$xoffset,#0x20
3439e1051a39Sopenharmony_ci	add	$inp,$inp,$xoffset		// $inp is adjusted to the last data
3440e1051a39Sopenharmony_ci
3441e1051a39Sopenharmony_ci	mov	$key_,$key1
3442e1051a39Sopenharmony_ci
3443e1051a39Sopenharmony_ci	// The iv for third block
3444e1051a39Sopenharmony_ci	extr	$midnumx,$ivh,$ivh,#32
3445e1051a39Sopenharmony_ci	extr	$ivh,$ivh,$ivl,#63
3446e1051a39Sopenharmony_ci	and	$tmpmw,$constnum,$midnum,asr #31
3447e1051a39Sopenharmony_ci	eor	$ivl,$tmpmx,$ivl,lsl #1
3448e1051a39Sopenharmony_ci	fmov	$ivd20,$ivl
3449e1051a39Sopenharmony_ci	fmov	$ivd21,$ivh
3450e1051a39Sopenharmony_ci
3451e1051a39Sopenharmony_ci	aesd	$dat0,q12
3452e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3453e1051a39Sopenharmony_ci	aesd	$dat1,q12
3454e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3455e1051a39Sopenharmony_ci	aesd	$dat2,q12
3456e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3457e1051a39Sopenharmony_ci	aesd	$dat0,q13
3458e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3459e1051a39Sopenharmony_ci	aesd	$dat1,q13
3460e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3461e1051a39Sopenharmony_ci	aesd	$dat2,q13
3462e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3463e1051a39Sopenharmony_ci	aesd	$dat0,q14
3464e1051a39Sopenharmony_ci	aesimc	$dat0,$dat0
3465e1051a39Sopenharmony_ci	aesd	$dat1,q14
3466e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3467e1051a39Sopenharmony_ci	aesd	$dat2,q14
3468e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3469e1051a39Sopenharmony_ci	vld1.8	{$in2},[$inp],#16
3470e1051a39Sopenharmony_ci	aesd	$dat0,q15
3471e1051a39Sopenharmony_ci	aesd	$dat1,q15
3472e1051a39Sopenharmony_ci	aesd	$dat2,q15
3473e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16		// re-pre-load rndkey[0]
3474e1051a39Sopenharmony_ci	add	$rounds,$rounds0,#2
3475e1051a39Sopenharmony_ci	veor	$tmp0,$tmp0,$dat0
3476e1051a39Sopenharmony_ci	veor	$tmp1,$tmp1,$dat1
3477e1051a39Sopenharmony_ci	veor	$dat2,$dat2,$tmp2
3478e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16		// re-pre-load rndkey[1]
3479e1051a39Sopenharmony_ci	vst1.8	{$tmp0},[$out],#16
3480e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
3481e1051a39Sopenharmony_ci	vst1.8	{$dat2},[$out],#16
3482e1051a39Sopenharmony_ci
3483e1051a39Sopenharmony_ci	cmn	$len,#0x30
3484e1051a39Sopenharmony_ci	add	$len,$len,#0x30
3485e1051a39Sopenharmony_ci	b.eq	.Lxts_done
3486e1051a39Sopenharmony_ci	sub	$len,$len,#0x30
3487e1051a39Sopenharmony_ci	vorr	$in3,$in1,$in1
3488e1051a39Sopenharmony_ci	vorr	$in4,$in2,$in2
3489e1051a39Sopenharmony_ci	nop
3490e1051a39Sopenharmony_ci
3491e1051a39Sopenharmony_ci.Lxts_inner_dec_tail:
3492e1051a39Sopenharmony_ci	// $len == -0x10 means two blocks left.
3493e1051a39Sopenharmony_ci	cmn	$len,#0x10
3494e1051a39Sopenharmony_ci	veor	$dat1,$in3,$iv0
3495e1051a39Sopenharmony_ci	veor	$dat2,$in4,$iv1
3496e1051a39Sopenharmony_ci	b.eq	.Lxts_dec_tail_loop
3497e1051a39Sopenharmony_ci	veor	$dat2,$in4,$iv0
3498e1051a39Sopenharmony_ci.Lxts_dec_tail_loop:
3499e1051a39Sopenharmony_ci	aesd	$dat1,q8
3500e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3501e1051a39Sopenharmony_ci	aesd	$dat2,q8
3502e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3503e1051a39Sopenharmony_ci	vld1.32	{q8},[$key_],#16
3504e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2
3505e1051a39Sopenharmony_ci	aesd	$dat1,q9
3506e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3507e1051a39Sopenharmony_ci	aesd	$dat2,q9
3508e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3509e1051a39Sopenharmony_ci	vld1.32	{q9},[$key_],#16
3510e1051a39Sopenharmony_ci	b.gt	.Lxts_dec_tail_loop
3511e1051a39Sopenharmony_ci
3512e1051a39Sopenharmony_ci	aesd	$dat1,q8
3513e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3514e1051a39Sopenharmony_ci	aesd	$dat2,q8
3515e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3516e1051a39Sopenharmony_ci	aesd	$dat1,q9
3517e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3518e1051a39Sopenharmony_ci	aesd	$dat2,q9
3519e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3520e1051a39Sopenharmony_ci	aesd	$dat1,q12
3521e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3522e1051a39Sopenharmony_ci	aesd	$dat2,q12
3523e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3524e1051a39Sopenharmony_ci	cmn	$len,#0x20
3525e1051a39Sopenharmony_ci	aesd	$dat1,q13
3526e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3527e1051a39Sopenharmony_ci	aesd	$dat2,q13
3528e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3529e1051a39Sopenharmony_ci	veor	$tmp1,$iv0,$rndlast
3530e1051a39Sopenharmony_ci	aesd	$dat1,q14
3531e1051a39Sopenharmony_ci	aesimc	$dat1,$dat1
3532e1051a39Sopenharmony_ci	aesd	$dat2,q14
3533e1051a39Sopenharmony_ci	aesimc	$dat2,$dat2
3534e1051a39Sopenharmony_ci	veor	$tmp2,$iv1,$rndlast
3535e1051a39Sopenharmony_ci	aesd	$dat1,q15
3536e1051a39Sopenharmony_ci	aesd	$dat2,q15
3537e1051a39Sopenharmony_ci	b.eq	.Lxts_dec_one
3538e1051a39Sopenharmony_ci	veor	$tmp1,$tmp1,$dat1
3539e1051a39Sopenharmony_ci	veor	$tmp2,$tmp2,$dat2
3540e1051a39Sopenharmony_ci	vorr	$iv0,$iv2,$iv2
3541e1051a39Sopenharmony_ci	vorr	$iv1,$iv3,$iv3
3542e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
3543e1051a39Sopenharmony_ci	vst1.8	{$tmp2},[$out],#16
3544e1051a39Sopenharmony_ci	add	$len,$len,#16
3545e1051a39Sopenharmony_ci	b	.Lxts_done
3546e1051a39Sopenharmony_ci
3547e1051a39Sopenharmony_ci.Lxts_dec_one:
3548e1051a39Sopenharmony_ci	veor	$tmp1,$tmp1,$dat2
3549e1051a39Sopenharmony_ci	vorr	$iv0,$iv1,$iv1
3550e1051a39Sopenharmony_ci	vorr	$iv1,$iv2,$iv2
3551e1051a39Sopenharmony_ci	vst1.8	{$tmp1},[$out],#16
3552e1051a39Sopenharmony_ci	add	$len,$len,#32
3553e1051a39Sopenharmony_ci
3554e1051a39Sopenharmony_ci.Lxts_done:
3555e1051a39Sopenharmony_ci	tst	$tailcnt,#0xf
3556e1051a39Sopenharmony_ci	b.eq	.Lxts_dec_abort
3557e1051a39Sopenharmony_ci	// Processing the last two blocks with cipher stealing.
3558e1051a39Sopenharmony_ci	mov	x7,x3
3559e1051a39Sopenharmony_ci	cbnz	x2,.Lxts_dec_1st_done
3560e1051a39Sopenharmony_ci	vld1.8	{$dat0},[$inp],#16
3561e1051a39Sopenharmony_ci
3562e1051a39Sopenharmony_ci	// Decrypt the last secod block to get the last plain text block
3563e1051a39Sopenharmony_ci.Lxts_dec_1st_done:
3564e1051a39Sopenharmony_ci	eor	$tmpin,$dat0,$iv1
3565e1051a39Sopenharmony_ci	ldr	$rounds,[$key1,#240]
3566e1051a39Sopenharmony_ci	vld1.32	{$dat0},[$key1],#16
3567e1051a39Sopenharmony_ci	sub	$rounds,$rounds,#2
3568e1051a39Sopenharmony_ci	vld1.32	{$dat1},[$key1],#16
3569e1051a39Sopenharmony_ci.Loop_final_2nd_dec:
3570e1051a39Sopenharmony_ci	aesd	$tmpin,$dat0
3571e1051a39Sopenharmony_ci	aesimc	$tmpin,$tmpin
3572e1051a39Sopenharmony_ci	vld1.32	{$dat0},[$key1],#16		// load key schedule...
3573e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2
3574e1051a39Sopenharmony_ci	aesd	$tmpin,$dat1
3575e1051a39Sopenharmony_ci	aesimc	$tmpin,$tmpin
3576e1051a39Sopenharmony_ci	vld1.32	{$dat1},[$key1],#16		// load key schedule...
3577e1051a39Sopenharmony_ci	b.gt	.Loop_final_2nd_dec
3578e1051a39Sopenharmony_ci
3579e1051a39Sopenharmony_ci	aesd	$tmpin,$dat0
3580e1051a39Sopenharmony_ci	aesimc	$tmpin,$tmpin
3581e1051a39Sopenharmony_ci	vld1.32	{$dat0},[$key1]
3582e1051a39Sopenharmony_ci	aesd	$tmpin,$dat1
3583e1051a39Sopenharmony_ci	veor	$tmpin,$tmpin,$dat0
3584e1051a39Sopenharmony_ci	veor	$tmpin,$tmpin,$iv1
3585e1051a39Sopenharmony_ci	vst1.8	{$tmpin},[$out]
3586e1051a39Sopenharmony_ci
3587e1051a39Sopenharmony_ci	mov	$tmpinp,$inp
3588e1051a39Sopenharmony_ci	add	$tmpoutp,$out,#16
3589e1051a39Sopenharmony_ci
3590e1051a39Sopenharmony_ci	// Composite the tailcnt "16 byte not aligned block" into the last second plain blocks
3591e1051a39Sopenharmony_ci	// to get the last encrypted block.
3592e1051a39Sopenharmony_ci.composite_dec_loop:
3593e1051a39Sopenharmony_ci	subs	$tailcnt,$tailcnt,#1
3594e1051a39Sopenharmony_ci	ldrb	$l2outp,[$out,$tailcnt]
3595e1051a39Sopenharmony_ci	ldrb	$loutp,[$tmpinp,$tailcnt]
3596e1051a39Sopenharmony_ci	strb	$l2outp,[$tmpoutp,$tailcnt]
3597e1051a39Sopenharmony_ci	strb	$loutp,[$out,$tailcnt]
3598e1051a39Sopenharmony_ci	b.gt	.composite_dec_loop
3599e1051a39Sopenharmony_ci.Lxts_dec_load_done:
3600e1051a39Sopenharmony_ci	vld1.8	{$tmpin},[$out]
3601e1051a39Sopenharmony_ci	veor	$tmpin,$tmpin,$iv0
3602e1051a39Sopenharmony_ci
3603e1051a39Sopenharmony_ci	// Decrypt the composite block to get the last second plain text block
3604e1051a39Sopenharmony_ci	ldr	$rounds,[$key_,#240]
3605e1051a39Sopenharmony_ci	vld1.32	{$dat},[$key_],#16
3606e1051a39Sopenharmony_ci	sub	$rounds,$rounds,#2
3607e1051a39Sopenharmony_ci	vld1.32	{$dat1},[$key_],#16
3608e1051a39Sopenharmony_ci.Loop_final_dec:
3609e1051a39Sopenharmony_ci	aesd	$tmpin,$dat0
3610e1051a39Sopenharmony_ci	aesimc	$tmpin,$tmpin
3611e1051a39Sopenharmony_ci	vld1.32	{$dat0},[$key_],#16		// load key schedule...
3612e1051a39Sopenharmony_ci	subs	$rounds,$rounds,#2
3613e1051a39Sopenharmony_ci	aesd	$tmpin,$dat1
3614e1051a39Sopenharmony_ci	aesimc	$tmpin,$tmpin
3615e1051a39Sopenharmony_ci	vld1.32	{$dat1},[$key_],#16		// load key schedule...
3616e1051a39Sopenharmony_ci	b.gt	.Loop_final_dec
3617e1051a39Sopenharmony_ci
3618e1051a39Sopenharmony_ci	aesd	$tmpin,$dat0
3619e1051a39Sopenharmony_ci	aesimc	$tmpin,$tmpin
3620e1051a39Sopenharmony_ci	vld1.32	{$dat0},[$key_]
3621e1051a39Sopenharmony_ci	aesd	$tmpin,$dat1
3622e1051a39Sopenharmony_ci	veor	$tmpin,$tmpin,$dat0
3623e1051a39Sopenharmony_ci	veor	$tmpin,$tmpin,$iv0
3624e1051a39Sopenharmony_ci	vst1.8	{$tmpin},[$out]
3625e1051a39Sopenharmony_ci
3626e1051a39Sopenharmony_ci.Lxts_dec_abort:
3627e1051a39Sopenharmony_ci	ldp	$tailcnt,$midnumx,[sp,#48]
3628e1051a39Sopenharmony_ci	ldp	$ivd10,$ivd20,[sp,#32]
3629e1051a39Sopenharmony_ci	ldp	$ivd30,$ivd40,[sp,#16]
3630e1051a39Sopenharmony_ci	ldp	$constnumx,$tmpinp,[sp],#64
3631e1051a39Sopenharmony_ci
3632e1051a39Sopenharmony_ci.Lxts_dec_final_abort:
3633e1051a39Sopenharmony_ci	ret
3634e1051a39Sopenharmony_ci.size	${prefix}_xts_decrypt,.-${prefix}_xts_decrypt
3635e1051a39Sopenharmony_ci___
3636e1051a39Sopenharmony_ci}
3637e1051a39Sopenharmony_ci}}}
3638e1051a39Sopenharmony_ci$code.=<<___;
3639e1051a39Sopenharmony_ci#endif
3640e1051a39Sopenharmony_ci___
3641e1051a39Sopenharmony_ci########################################
3642e1051a39Sopenharmony_ciif ($flavour =~ /64/) {			######## 64-bit code
3643e1051a39Sopenharmony_ci    my %opcode = (
3644e1051a39Sopenharmony_ci	"aesd"	=>	0x4e285800,	"aese"	=>	0x4e284800,
3645e1051a39Sopenharmony_ci	"aesimc"=>	0x4e287800,	"aesmc"	=>	0x4e286800	);
3646e1051a39Sopenharmony_ci
3647e1051a39Sopenharmony_ci    local *unaes = sub {
3648e1051a39Sopenharmony_ci	my ($mnemonic,$arg)=@_;
3649e1051a39Sopenharmony_ci
3650e1051a39Sopenharmony_ci	$arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o	&&
3651e1051a39Sopenharmony_ci	sprintf ".inst\t0x%08x\t//%s %s",
3652e1051a39Sopenharmony_ci			$opcode{$mnemonic}|$1|($2<<5),
3653e1051a39Sopenharmony_ci			$mnemonic,$arg;
3654e1051a39Sopenharmony_ci    };
3655e1051a39Sopenharmony_ci
3656e1051a39Sopenharmony_ci    foreach(split("\n",$code)) {
3657e1051a39Sopenharmony_ci	s/\`([^\`]*)\`/eval($1)/geo;
3658e1051a39Sopenharmony_ci
3659e1051a39Sopenharmony_ci	s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo;	# old->new registers
3660e1051a39Sopenharmony_ci	s/@\s/\/\//o;			# old->new style commentary
3661e1051a39Sopenharmony_ci
3662e1051a39Sopenharmony_ci	#s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo	or
3663e1051a39Sopenharmony_ci	s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel	$1$2,$1zr,$1$2,$3/o	or
3664e1051a39Sopenharmony_ci	s/mov\.([a-z]+)\s+([wx][0-9]+),\s*([wx][0-9]+)/csel	$2,$3,$2,$1/o	or
3665e1051a39Sopenharmony_ci	s/vmov\.i8/movi/o	or	# fix up legacy mnemonics
3666e1051a39Sopenharmony_ci	s/vext\.8/ext/o		or
3667e1051a39Sopenharmony_ci	s/vrev32\.8/rev32/o	or
3668e1051a39Sopenharmony_ci	s/vtst\.8/cmtst/o	or
3669e1051a39Sopenharmony_ci	s/vshr/ushr/o		or
3670e1051a39Sopenharmony_ci	s/^(\s+)v/$1/o		or	# strip off v prefix
3671e1051a39Sopenharmony_ci	s/\bbx\s+lr\b/ret/o;
3672e1051a39Sopenharmony_ci
3673e1051a39Sopenharmony_ci	# fix up remaining legacy suffixes
3674e1051a39Sopenharmony_ci	s/\.[ui]?8//o;
3675e1051a39Sopenharmony_ci	m/\],#8/o and s/\.16b/\.8b/go;
3676e1051a39Sopenharmony_ci	s/\.[ui]?32//o and s/\.16b/\.4s/go;
3677e1051a39Sopenharmony_ci	s/\.[ui]?64//o and s/\.16b/\.2d/go;
3678e1051a39Sopenharmony_ci	s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o;
3679e1051a39Sopenharmony_ci
3680e1051a39Sopenharmony_ci	print $_,"\n";
3681e1051a39Sopenharmony_ci    }
3682e1051a39Sopenharmony_ci} else {				######## 32-bit code
3683e1051a39Sopenharmony_ci    my %opcode = (
3684e1051a39Sopenharmony_ci	"aesd"	=>	0xf3b00340,	"aese"	=>	0xf3b00300,
3685e1051a39Sopenharmony_ci	"aesimc"=>	0xf3b003c0,	"aesmc"	=>	0xf3b00380	);
3686e1051a39Sopenharmony_ci
3687e1051a39Sopenharmony_ci    local *unaes = sub {
3688e1051a39Sopenharmony_ci	my ($mnemonic,$arg)=@_;
3689e1051a39Sopenharmony_ci
3690e1051a39Sopenharmony_ci	if ($arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o) {
3691e1051a39Sopenharmony_ci	    my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
3692e1051a39Sopenharmony_ci					 |(($2&7)<<1) |(($2&8)<<2);
3693e1051a39Sopenharmony_ci	    # since ARMv7 instructions are always encoded little-endian.
3694e1051a39Sopenharmony_ci	    # correct solution is to use .inst directive, but older
3695e1051a39Sopenharmony_ci	    # assemblers don't implement it:-(
3696e1051a39Sopenharmony_ci	    sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
3697e1051a39Sopenharmony_ci			$word&0xff,($word>>8)&0xff,
3698e1051a39Sopenharmony_ci			($word>>16)&0xff,($word>>24)&0xff,
3699e1051a39Sopenharmony_ci			$mnemonic,$arg;
3700e1051a39Sopenharmony_ci	}
3701e1051a39Sopenharmony_ci    };
3702e1051a39Sopenharmony_ci
3703e1051a39Sopenharmony_ci    sub unvtbl {
3704e1051a39Sopenharmony_ci	my $arg=shift;
3705e1051a39Sopenharmony_ci
3706e1051a39Sopenharmony_ci	$arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o &&
3707e1051a39Sopenharmony_ci	sprintf	"vtbl.8	d%d,{q%d},d%d\n\t".
3708e1051a39Sopenharmony_ci		"vtbl.8	d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1;
3709e1051a39Sopenharmony_ci    }
3710e1051a39Sopenharmony_ci
3711e1051a39Sopenharmony_ci    sub unvdup32 {
3712e1051a39Sopenharmony_ci	my $arg=shift;
3713e1051a39Sopenharmony_ci
3714e1051a39Sopenharmony_ci	$arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o &&
3715e1051a39Sopenharmony_ci	sprintf	"vdup.32	q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1;
3716e1051a39Sopenharmony_ci    }
3717e1051a39Sopenharmony_ci
3718e1051a39Sopenharmony_ci    sub unvmov32 {
3719e1051a39Sopenharmony_ci	my $arg=shift;
3720e1051a39Sopenharmony_ci
3721e1051a39Sopenharmony_ci	$arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o &&
3722e1051a39Sopenharmony_ci	sprintf	"vmov.32	d%d[%d],%s",2*$1+($2>>1),$2&1,$3;
3723e1051a39Sopenharmony_ci    }
3724e1051a39Sopenharmony_ci
3725e1051a39Sopenharmony_ci    foreach(split("\n",$code)) {
3726e1051a39Sopenharmony_ci	s/\`([^\`]*)\`/eval($1)/geo;
3727e1051a39Sopenharmony_ci
3728e1051a39Sopenharmony_ci	s/\b[wx]([0-9]+)\b/r$1/go;		# new->old registers
3729e1051a39Sopenharmony_ci	s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go;	# new->old registers
3730e1051a39Sopenharmony_ci	s/\/\/\s?/@ /o;				# new->old style commentary
3731e1051a39Sopenharmony_ci
3732e1051a39Sopenharmony_ci	# fix up remaining new-style suffixes
3733e1051a39Sopenharmony_ci	s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo	or
3734e1051a39Sopenharmony_ci	s/\],#[0-9]+/]!/o;
3735e1051a39Sopenharmony_ci
3736e1051a39Sopenharmony_ci	s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo	or
3737e1051a39Sopenharmony_ci	s/cclr\s+([^,]+),\s*([a-z]+)/mov.$2	$1,#0/o	or
3738e1051a39Sopenharmony_ci	s/vtbl\.8\s+(.*)/unvtbl($1)/geo			or
3739e1051a39Sopenharmony_ci	s/vdup\.32\s+(.*)/unvdup32($1)/geo		or
3740e1051a39Sopenharmony_ci	s/vmov\.32\s+(.*)/unvmov32($1)/geo		or
3741e1051a39Sopenharmony_ci	s/^(\s+)b\./$1b/o				or
3742e1051a39Sopenharmony_ci	s/^(\s+)ret/$1bx\tlr/o;
3743e1051a39Sopenharmony_ci
3744e1051a39Sopenharmony_ci	if (s/^(\s+)mov\.([a-z]+)/$1mov$2/) {
3745e1051a39Sopenharmony_ci	    print "	it	$2\n";
3746e1051a39Sopenharmony_ci	}
3747e1051a39Sopenharmony_ci
3748e1051a39Sopenharmony_ci	print $_,"\n";
3749e1051a39Sopenharmony_ci    }
3750e1051a39Sopenharmony_ci}
3751e1051a39Sopenharmony_ci
3752e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
3753