162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Bit sliced AES using NEON instructions
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2017 Linaro Ltd.
662306a36Sopenharmony_ci * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci/*
1062306a36Sopenharmony_ci * The algorithm implemented here is described in detail by the paper
1162306a36Sopenharmony_ci * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
1262306a36Sopenharmony_ci * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci * This implementation is based primarily on the OpenSSL implementation
1562306a36Sopenharmony_ci * for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
1662306a36Sopenharmony_ci */
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#include <linux/linkage.h>
1962306a36Sopenharmony_ci#include <asm/assembler.h>
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci	.text
2262306a36Sopenharmony_ci	.fpu		neon
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci	rounds		.req	ip
2562306a36Sopenharmony_ci	bskey		.req	r4
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci	q0l		.req	d0
2862306a36Sopenharmony_ci	q0h		.req	d1
2962306a36Sopenharmony_ci	q1l		.req	d2
3062306a36Sopenharmony_ci	q1h		.req	d3
3162306a36Sopenharmony_ci	q2l		.req	d4
3262306a36Sopenharmony_ci	q2h		.req	d5
3362306a36Sopenharmony_ci	q3l		.req	d6
3462306a36Sopenharmony_ci	q3h		.req	d7
3562306a36Sopenharmony_ci	q4l		.req	d8
3662306a36Sopenharmony_ci	q4h		.req	d9
3762306a36Sopenharmony_ci	q5l		.req	d10
3862306a36Sopenharmony_ci	q5h		.req	d11
3962306a36Sopenharmony_ci	q6l		.req	d12
4062306a36Sopenharmony_ci	q6h		.req	d13
4162306a36Sopenharmony_ci	q7l		.req	d14
4262306a36Sopenharmony_ci	q7h		.req	d15
4362306a36Sopenharmony_ci	q8l		.req	d16
4462306a36Sopenharmony_ci	q8h		.req	d17
4562306a36Sopenharmony_ci	q9l		.req	d18
4662306a36Sopenharmony_ci	q9h		.req	d19
4762306a36Sopenharmony_ci	q10l		.req	d20
4862306a36Sopenharmony_ci	q10h		.req	d21
4962306a36Sopenharmony_ci	q11l		.req	d22
5062306a36Sopenharmony_ci	q11h		.req	d23
5162306a36Sopenharmony_ci	q12l		.req	d24
5262306a36Sopenharmony_ci	q12h		.req	d25
5362306a36Sopenharmony_ci	q13l		.req	d26
5462306a36Sopenharmony_ci	q13h		.req	d27
5562306a36Sopenharmony_ci	q14l		.req	d28
5662306a36Sopenharmony_ci	q14h		.req	d29
5762306a36Sopenharmony_ci	q15l		.req	d30
5862306a36Sopenharmony_ci	q15h		.req	d31
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	.macro		__tbl, out, tbl, in, tmp
6162306a36Sopenharmony_ci	.ifc		\out, \tbl
6262306a36Sopenharmony_ci	.ifb		\tmp
6362306a36Sopenharmony_ci	.error		__tbl needs temp register if out == tbl
6462306a36Sopenharmony_ci	.endif
6562306a36Sopenharmony_ci	vmov		\tmp, \out
6662306a36Sopenharmony_ci	.endif
6762306a36Sopenharmony_ci	vtbl.8		\out\()l, {\tbl}, \in\()l
6862306a36Sopenharmony_ci	.ifc		\out, \tbl
6962306a36Sopenharmony_ci	vtbl.8		\out\()h, {\tmp}, \in\()h
7062306a36Sopenharmony_ci	.else
7162306a36Sopenharmony_ci	vtbl.8		\out\()h, {\tbl}, \in\()h
7262306a36Sopenharmony_ci	.endif
7362306a36Sopenharmony_ci	.endm
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	.macro		__ldr, out, sym
7662306a36Sopenharmony_ci	vldr		\out\()l, \sym
7762306a36Sopenharmony_ci	vldr		\out\()h, \sym + 8
7862306a36Sopenharmony_ci	.endm
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	.macro		in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
8162306a36Sopenharmony_ci	veor		\b2, \b2, \b1
8262306a36Sopenharmony_ci	veor		\b5, \b5, \b6
8362306a36Sopenharmony_ci	veor		\b3, \b3, \b0
8462306a36Sopenharmony_ci	veor		\b6, \b6, \b2
8562306a36Sopenharmony_ci	veor		\b5, \b5, \b0
8662306a36Sopenharmony_ci	veor		\b6, \b6, \b3
8762306a36Sopenharmony_ci	veor		\b3, \b3, \b7
8862306a36Sopenharmony_ci	veor		\b7, \b7, \b5
8962306a36Sopenharmony_ci	veor		\b3, \b3, \b4
9062306a36Sopenharmony_ci	veor		\b4, \b4, \b5
9162306a36Sopenharmony_ci	veor		\b2, \b2, \b7
9262306a36Sopenharmony_ci	veor		\b3, \b3, \b1
9362306a36Sopenharmony_ci	veor		\b1, \b1, \b5
9462306a36Sopenharmony_ci	.endm
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci	.macro		out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
9762306a36Sopenharmony_ci	veor		\b0, \b0, \b6
9862306a36Sopenharmony_ci	veor		\b1, \b1, \b4
9962306a36Sopenharmony_ci	veor		\b4, \b4, \b6
10062306a36Sopenharmony_ci	veor		\b2, \b2, \b0
10162306a36Sopenharmony_ci	veor		\b6, \b6, \b1
10262306a36Sopenharmony_ci	veor		\b1, \b1, \b5
10362306a36Sopenharmony_ci	veor		\b5, \b5, \b3
10462306a36Sopenharmony_ci	veor		\b3, \b3, \b7
10562306a36Sopenharmony_ci	veor		\b7, \b7, \b5
10662306a36Sopenharmony_ci	veor		\b2, \b2, \b5
10762306a36Sopenharmony_ci	veor		\b4, \b4, \b7
10862306a36Sopenharmony_ci	.endm
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	.macro		inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
11162306a36Sopenharmony_ci	veor		\b1, \b1, \b7
11262306a36Sopenharmony_ci	veor		\b4, \b4, \b7
11362306a36Sopenharmony_ci	veor		\b7, \b7, \b5
11462306a36Sopenharmony_ci	veor		\b1, \b1, \b3
11562306a36Sopenharmony_ci	veor		\b2, \b2, \b5
11662306a36Sopenharmony_ci	veor		\b3, \b3, \b7
11762306a36Sopenharmony_ci	veor		\b6, \b6, \b1
11862306a36Sopenharmony_ci	veor		\b2, \b2, \b0
11962306a36Sopenharmony_ci	veor		\b5, \b5, \b3
12062306a36Sopenharmony_ci	veor		\b4, \b4, \b6
12162306a36Sopenharmony_ci	veor		\b0, \b0, \b6
12262306a36Sopenharmony_ci	veor		\b1, \b1, \b4
12362306a36Sopenharmony_ci	.endm
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	.macro		inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
12662306a36Sopenharmony_ci	veor		\b1, \b1, \b5
12762306a36Sopenharmony_ci	veor		\b2, \b2, \b7
12862306a36Sopenharmony_ci	veor		\b3, \b3, \b1
12962306a36Sopenharmony_ci	veor		\b4, \b4, \b5
13062306a36Sopenharmony_ci	veor		\b7, \b7, \b5
13162306a36Sopenharmony_ci	veor		\b3, \b3, \b4
13262306a36Sopenharmony_ci	veor 		\b5, \b5, \b0
13362306a36Sopenharmony_ci	veor		\b3, \b3, \b7
13462306a36Sopenharmony_ci	veor		\b6, \b6, \b2
13562306a36Sopenharmony_ci	veor		\b2, \b2, \b1
13662306a36Sopenharmony_ci	veor		\b6, \b6, \b3
13762306a36Sopenharmony_ci	veor		\b3, \b3, \b0
13862306a36Sopenharmony_ci	veor		\b5, \b5, \b6
13962306a36Sopenharmony_ci	.endm
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_ci	.macro		mul_gf4, x0, x1, y0, y1, t0, t1
14262306a36Sopenharmony_ci	veor 		\t0, \y0, \y1
14362306a36Sopenharmony_ci	vand		\t0, \t0, \x0
14462306a36Sopenharmony_ci	veor		\x0, \x0, \x1
14562306a36Sopenharmony_ci	vand		\t1, \x1, \y0
14662306a36Sopenharmony_ci	vand		\x0, \x0, \y1
14762306a36Sopenharmony_ci	veor		\x1, \t1, \t0
14862306a36Sopenharmony_ci	veor		\x0, \x0, \t1
14962306a36Sopenharmony_ci	.endm
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	.macro		mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
15262306a36Sopenharmony_ci	veor		\t0, \y0, \y1
15362306a36Sopenharmony_ci	veor 		\t1, \y2, \y3
15462306a36Sopenharmony_ci	vand		\t0, \t0, \x0
15562306a36Sopenharmony_ci	vand		\t1, \t1, \x2
15662306a36Sopenharmony_ci	veor		\x0, \x0, \x1
15762306a36Sopenharmony_ci	veor		\x2, \x2, \x3
15862306a36Sopenharmony_ci	vand		\x1, \x1, \y0
15962306a36Sopenharmony_ci	vand		\x3, \x3, \y2
16062306a36Sopenharmony_ci	vand		\x0, \x0, \y1
16162306a36Sopenharmony_ci	vand		\x2, \x2, \y3
16262306a36Sopenharmony_ci	veor		\x1, \x1, \x0
16362306a36Sopenharmony_ci	veor		\x2, \x2, \x3
16462306a36Sopenharmony_ci	veor		\x0, \x0, \t0
16562306a36Sopenharmony_ci	veor		\x3, \x3, \t1
16662306a36Sopenharmony_ci	.endm
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	.macro		mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
16962306a36Sopenharmony_ci				    y0, y1, y2, y3, t0, t1, t2, t3
17062306a36Sopenharmony_ci	veor		\t0, \x0, \x2
17162306a36Sopenharmony_ci	veor		\t1, \x1, \x3
17262306a36Sopenharmony_ci	mul_gf4  	\x0, \x1, \y0, \y1, \t2, \t3
17362306a36Sopenharmony_ci	veor		\y0, \y0, \y2
17462306a36Sopenharmony_ci	veor		\y1, \y1, \y3
17562306a36Sopenharmony_ci	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
17662306a36Sopenharmony_ci	veor		\x0, \x0, \t0
17762306a36Sopenharmony_ci	veor		\x2, \x2, \t0
17862306a36Sopenharmony_ci	veor		\x1, \x1, \t1
17962306a36Sopenharmony_ci	veor		\x3, \x3, \t1
18062306a36Sopenharmony_ci	veor		\t0, \x4, \x6
18162306a36Sopenharmony_ci	veor		\t1, \x5, \x7
18262306a36Sopenharmony_ci	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
18362306a36Sopenharmony_ci	veor		\y0, \y0, \y2
18462306a36Sopenharmony_ci	veor		\y1, \y1, \y3
18562306a36Sopenharmony_ci	mul_gf4  	\x4, \x5, \y0, \y1, \t2, \t3
18662306a36Sopenharmony_ci	veor		\x4, \x4, \t0
18762306a36Sopenharmony_ci	veor		\x6, \x6, \t0
18862306a36Sopenharmony_ci	veor		\x5, \x5, \t1
18962306a36Sopenharmony_ci	veor		\x7, \x7, \t1
19062306a36Sopenharmony_ci	.endm
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	.macro		inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
19362306a36Sopenharmony_ci				   t0, t1, t2, t3, s0, s1, s2, s3
19462306a36Sopenharmony_ci	veor		\t3, \x4, \x6
19562306a36Sopenharmony_ci	veor		\t0, \x5, \x7
19662306a36Sopenharmony_ci	veor		\t1, \x1, \x3
19762306a36Sopenharmony_ci	veor		\s1, \x7, \x6
19862306a36Sopenharmony_ci	veor		\s0, \x0, \x2
19962306a36Sopenharmony_ci	veor		\s3, \t3, \t0
20062306a36Sopenharmony_ci	vorr		\t2, \t0, \t1
20162306a36Sopenharmony_ci	vand		\s2, \t3, \s0
20262306a36Sopenharmony_ci	vorr		\t3, \t3, \s0
20362306a36Sopenharmony_ci	veor		\s0, \s0, \t1
20462306a36Sopenharmony_ci	vand		\t0, \t0, \t1
20562306a36Sopenharmony_ci	veor		\t1, \x3, \x2
20662306a36Sopenharmony_ci	vand		\s3, \s3, \s0
20762306a36Sopenharmony_ci	vand		\s1, \s1, \t1
20862306a36Sopenharmony_ci	veor		\t1, \x4, \x5
20962306a36Sopenharmony_ci	veor		\s0, \x1, \x0
21062306a36Sopenharmony_ci	veor		\t3, \t3, \s1
21162306a36Sopenharmony_ci	veor		\t2, \t2, \s1
21262306a36Sopenharmony_ci	vand		\s1, \t1, \s0
21362306a36Sopenharmony_ci	vorr		\t1, \t1, \s0
21462306a36Sopenharmony_ci	veor		\t3, \t3, \s3
21562306a36Sopenharmony_ci	veor		\t0, \t0, \s1
21662306a36Sopenharmony_ci	veor		\t2, \t2, \s2
21762306a36Sopenharmony_ci	veor		\t1, \t1, \s3
21862306a36Sopenharmony_ci	veor		\t0, \t0, \s2
21962306a36Sopenharmony_ci	vand		\s0, \x7, \x3
22062306a36Sopenharmony_ci	veor		\t1, \t1, \s2
22162306a36Sopenharmony_ci	vand		\s1, \x6, \x2
22262306a36Sopenharmony_ci	vand		\s2, \x5, \x1
22362306a36Sopenharmony_ci	vorr		\s3, \x4, \x0
22462306a36Sopenharmony_ci	veor		\t3, \t3, \s0
22562306a36Sopenharmony_ci	veor		\t1, \t1, \s2
22662306a36Sopenharmony_ci	veor		\s0, \t0, \s3
22762306a36Sopenharmony_ci	veor		\t2, \t2, \s1
22862306a36Sopenharmony_ci	vand		\s2, \t3, \t1
22962306a36Sopenharmony_ci	veor		\s1, \t2, \s2
23062306a36Sopenharmony_ci	veor		\s3, \s0, \s2
23162306a36Sopenharmony_ci	vbsl		\s1, \t1, \s0
23262306a36Sopenharmony_ci	vmvn		\t0, \s0
23362306a36Sopenharmony_ci	vbsl		\s0, \s1, \s3
23462306a36Sopenharmony_ci	vbsl		\t0, \s1, \s3
23562306a36Sopenharmony_ci	vbsl		\s3, \t3, \t2
23662306a36Sopenharmony_ci	veor		\t3, \t3, \t2
23762306a36Sopenharmony_ci	vand		\s2, \s0, \s3
23862306a36Sopenharmony_ci	veor		\t1, \t1, \t0
23962306a36Sopenharmony_ci	veor		\s2, \s2, \t3
24062306a36Sopenharmony_ci	mul_gf16_2	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
24162306a36Sopenharmony_ci			\s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
24262306a36Sopenharmony_ci	.endm
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci	.macro		sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
24562306a36Sopenharmony_ci			      t0, t1, t2, t3, s0, s1, s2, s3
24662306a36Sopenharmony_ci	in_bs_ch	\b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7
24762306a36Sopenharmony_ci	inv_gf256	\b6, \b5, \b0, \b3, \b7, \b1, \b4, \b2, \
24862306a36Sopenharmony_ci			\t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3
24962306a36Sopenharmony_ci	out_bs_ch	\b7, \b1, \b4, \b2, \b6, \b5, \b0, \b3
25062306a36Sopenharmony_ci	.endm
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	.macro		inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
25362306a36Sopenharmony_ci				  t0, t1, t2, t3, s0, s1, s2, s3
25462306a36Sopenharmony_ci	inv_in_bs_ch	\b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7
25562306a36Sopenharmony_ci	inv_gf256	\b5, \b1, \b2, \b6, \b3, \b7, \b0, \b4, \
25662306a36Sopenharmony_ci			\t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3
25762306a36Sopenharmony_ci	inv_out_bs_ch	\b3, \b7, \b0, \b4, \b5, \b1, \b2, \b6
25862306a36Sopenharmony_ci	.endm
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci	.macro		shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \
26162306a36Sopenharmony_ci				    t0, t1, t2, t3, mask
26262306a36Sopenharmony_ci	vld1.8		{\t0-\t1}, [bskey, :256]!
26362306a36Sopenharmony_ci	veor		\t0, \t0, \x0
26462306a36Sopenharmony_ci	vld1.8		{\t2-\t3}, [bskey, :256]!
26562306a36Sopenharmony_ci	veor		\t1, \t1, \x1
26662306a36Sopenharmony_ci	__tbl		\x0, \t0, \mask
26762306a36Sopenharmony_ci	veor		\t2, \t2, \x2
26862306a36Sopenharmony_ci	__tbl		\x1, \t1, \mask
26962306a36Sopenharmony_ci	vld1.8		{\t0-\t1}, [bskey, :256]!
27062306a36Sopenharmony_ci	veor		\t3, \t3, \x3
27162306a36Sopenharmony_ci	__tbl		\x2, \t2, \mask
27262306a36Sopenharmony_ci	__tbl		\x3, \t3, \mask
27362306a36Sopenharmony_ci	vld1.8		{\t2-\t3}, [bskey, :256]!
27462306a36Sopenharmony_ci	veor		\t0, \t0, \x4
27562306a36Sopenharmony_ci	veor		\t1, \t1, \x5
27662306a36Sopenharmony_ci	__tbl		\x4, \t0, \mask
27762306a36Sopenharmony_ci	veor		\t2, \t2, \x6
27862306a36Sopenharmony_ci	__tbl		\x5, \t1, \mask
27962306a36Sopenharmony_ci	veor		\t3, \t3, \x7
28062306a36Sopenharmony_ci	__tbl		\x6, \t2, \mask
28162306a36Sopenharmony_ci	__tbl		\x7, \t3, \mask
28262306a36Sopenharmony_ci	.endm
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci	.macro		inv_shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \
28562306a36Sopenharmony_ci					t0, t1, t2, t3, mask
28662306a36Sopenharmony_ci	__tbl		\x0, \x0, \mask, \t0
28762306a36Sopenharmony_ci	__tbl		\x1, \x1, \mask, \t1
28862306a36Sopenharmony_ci	__tbl		\x2, \x2, \mask, \t2
28962306a36Sopenharmony_ci	__tbl		\x3, \x3, \mask, \t3
29062306a36Sopenharmony_ci	__tbl		\x4, \x4, \mask, \t0
29162306a36Sopenharmony_ci	__tbl		\x5, \x5, \mask, \t1
29262306a36Sopenharmony_ci	__tbl		\x6, \x6, \mask, \t2
29362306a36Sopenharmony_ci	__tbl		\x7, \x7, \mask, \t3
29462306a36Sopenharmony_ci	.endm
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	.macro		mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
29762306a36Sopenharmony_ci				  t0, t1, t2, t3, t4, t5, t6, t7, inv
29862306a36Sopenharmony_ci	vext.8		\t0, \x0, \x0, #12
29962306a36Sopenharmony_ci	vext.8		\t1, \x1, \x1, #12
30062306a36Sopenharmony_ci	veor		\x0, \x0, \t0
30162306a36Sopenharmony_ci	vext.8		\t2, \x2, \x2, #12
30262306a36Sopenharmony_ci	veor		\x1, \x1, \t1
30362306a36Sopenharmony_ci	vext.8		\t3, \x3, \x3, #12
30462306a36Sopenharmony_ci	veor		\x2, \x2, \t2
30562306a36Sopenharmony_ci	vext.8		\t4, \x4, \x4, #12
30662306a36Sopenharmony_ci	veor		\x3, \x3, \t3
30762306a36Sopenharmony_ci	vext.8		\t5, \x5, \x5, #12
30862306a36Sopenharmony_ci	veor		\x4, \x4, \t4
30962306a36Sopenharmony_ci	vext.8		\t6, \x6, \x6, #12
31062306a36Sopenharmony_ci	veor		\x5, \x5, \t5
31162306a36Sopenharmony_ci	vext.8		\t7, \x7, \x7, #12
31262306a36Sopenharmony_ci	veor		\x6, \x6, \t6
31362306a36Sopenharmony_ci	veor		\t1, \t1, \x0
31462306a36Sopenharmony_ci	veor.8		\x7, \x7, \t7
31562306a36Sopenharmony_ci	vext.8		\x0, \x0, \x0, #8
31662306a36Sopenharmony_ci	veor		\t2, \t2, \x1
31762306a36Sopenharmony_ci	veor		\t0, \t0, \x7
31862306a36Sopenharmony_ci	veor		\t1, \t1, \x7
31962306a36Sopenharmony_ci	vext.8		\x1, \x1, \x1, #8
32062306a36Sopenharmony_ci	veor		\t5, \t5, \x4
32162306a36Sopenharmony_ci	veor		\x0, \x0, \t0
32262306a36Sopenharmony_ci	veor		\t6, \t6, \x5
32362306a36Sopenharmony_ci	veor		\x1, \x1, \t1
32462306a36Sopenharmony_ci	vext.8		\t0, \x4, \x4, #8
32562306a36Sopenharmony_ci	veor		\t4, \t4, \x3
32662306a36Sopenharmony_ci	vext.8		\t1, \x5, \x5, #8
32762306a36Sopenharmony_ci	veor		\t7, \t7, \x6
32862306a36Sopenharmony_ci	vext.8		\x4, \x3, \x3, #8
32962306a36Sopenharmony_ci	veor		\t3, \t3, \x2
33062306a36Sopenharmony_ci	vext.8		\x5, \x7, \x7, #8
33162306a36Sopenharmony_ci	veor		\t4, \t4, \x7
33262306a36Sopenharmony_ci	vext.8		\x3, \x6, \x6, #8
33362306a36Sopenharmony_ci	veor		\t3, \t3, \x7
33462306a36Sopenharmony_ci	vext.8		\x6, \x2, \x2, #8
33562306a36Sopenharmony_ci	veor		\x7, \t1, \t5
33662306a36Sopenharmony_ci	.ifb		\inv
33762306a36Sopenharmony_ci	veor		\x2, \t0, \t4
33862306a36Sopenharmony_ci	veor		\x4, \x4, \t3
33962306a36Sopenharmony_ci	veor		\x5, \x5, \t7
34062306a36Sopenharmony_ci	veor		\x3, \x3, \t6
34162306a36Sopenharmony_ci	veor		\x6, \x6, \t2
34262306a36Sopenharmony_ci	.else
34362306a36Sopenharmony_ci	veor		\t3, \t3, \x4
34462306a36Sopenharmony_ci	veor		\x5, \x5, \t7
34562306a36Sopenharmony_ci	veor		\x2, \x3, \t6
34662306a36Sopenharmony_ci	veor		\x3, \t0, \t4
34762306a36Sopenharmony_ci	veor		\x4, \x6, \t2
34862306a36Sopenharmony_ci	vmov		\x6, \t3
34962306a36Sopenharmony_ci	.endif
35062306a36Sopenharmony_ci	.endm
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci	.macro		inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
35362306a36Sopenharmony_ci				      t0, t1, t2, t3, t4, t5, t6, t7
35462306a36Sopenharmony_ci	vld1.8		{\t0-\t1}, [bskey, :256]!
35562306a36Sopenharmony_ci	veor		\x0, \x0, \t0
35662306a36Sopenharmony_ci	vld1.8		{\t2-\t3}, [bskey, :256]!
35762306a36Sopenharmony_ci	veor		\x1, \x1, \t1
35862306a36Sopenharmony_ci	vld1.8		{\t4-\t5}, [bskey, :256]!
35962306a36Sopenharmony_ci	veor		\x2, \x2, \t2
36062306a36Sopenharmony_ci	vld1.8		{\t6-\t7}, [bskey, :256]
36162306a36Sopenharmony_ci	sub		bskey, bskey, #224
36262306a36Sopenharmony_ci	veor		\x3, \x3, \t3
36362306a36Sopenharmony_ci	veor		\x4, \x4, \t4
36462306a36Sopenharmony_ci	veor		\x5, \x5, \t5
36562306a36Sopenharmony_ci	veor		\x6, \x6, \t6
36662306a36Sopenharmony_ci	veor		\x7, \x7, \t7
36762306a36Sopenharmony_ci	vext.8		\t0, \x0, \x0, #8
36862306a36Sopenharmony_ci	vext.8		\t6, \x6, \x6, #8
36962306a36Sopenharmony_ci	vext.8		\t7, \x7, \x7, #8
37062306a36Sopenharmony_ci	veor		\t0, \t0, \x0
37162306a36Sopenharmony_ci	vext.8		\t1, \x1, \x1, #8
37262306a36Sopenharmony_ci	veor		\t6, \t6, \x6
37362306a36Sopenharmony_ci	vext.8		\t2, \x2, \x2, #8
37462306a36Sopenharmony_ci	veor		\t7, \t7, \x7
37562306a36Sopenharmony_ci	vext.8		\t3, \x3, \x3, #8
37662306a36Sopenharmony_ci	veor		\t1, \t1, \x1
37762306a36Sopenharmony_ci	vext.8		\t4, \x4, \x4, #8
37862306a36Sopenharmony_ci	veor		\t2, \t2, \x2
37962306a36Sopenharmony_ci	vext.8		\t5, \x5, \x5, #8
38062306a36Sopenharmony_ci	veor		\t3, \t3, \x3
38162306a36Sopenharmony_ci	veor		\t4, \t4, \x4
38262306a36Sopenharmony_ci	veor		\t5, \t5, \x5
38362306a36Sopenharmony_ci	veor		\x0, \x0, \t6
38462306a36Sopenharmony_ci	veor		\x1, \x1, \t6
38562306a36Sopenharmony_ci	veor		\x2, \x2, \t0
38662306a36Sopenharmony_ci	veor		\x4, \x4, \t2
38762306a36Sopenharmony_ci	veor		\x3, \x3, \t1
38862306a36Sopenharmony_ci	veor		\x1, \x1, \t7
38962306a36Sopenharmony_ci	veor		\x2, \x2, \t7
39062306a36Sopenharmony_ci	veor		\x4, \x4, \t6
39162306a36Sopenharmony_ci	veor		\x5, \x5, \t3
39262306a36Sopenharmony_ci	veor		\x3, \x3, \t6
39362306a36Sopenharmony_ci	veor		\x6, \x6, \t4
39462306a36Sopenharmony_ci	veor		\x4, \x4, \t7
39562306a36Sopenharmony_ci	veor		\x5, \x5, \t7
39662306a36Sopenharmony_ci	veor		\x7, \x7, \t5
39762306a36Sopenharmony_ci	mix_cols	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
39862306a36Sopenharmony_ci			\t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
39962306a36Sopenharmony_ci	.endm
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci	.macro		swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
40262306a36Sopenharmony_ci	vshr.u64	\t0, \b0, #\n
40362306a36Sopenharmony_ci	vshr.u64	\t1, \b1, #\n
40462306a36Sopenharmony_ci	veor		\t0, \t0, \a0
40562306a36Sopenharmony_ci	veor		\t1, \t1, \a1
40662306a36Sopenharmony_ci	vand		\t0, \t0, \mask
40762306a36Sopenharmony_ci	vand		\t1, \t1, \mask
40862306a36Sopenharmony_ci	veor		\a0, \a0, \t0
40962306a36Sopenharmony_ci	vshl.s64	\t0, \t0, #\n
41062306a36Sopenharmony_ci	veor		\a1, \a1, \t1
41162306a36Sopenharmony_ci	vshl.s64	\t1, \t1, #\n
41262306a36Sopenharmony_ci	veor		\b0, \b0, \t0
41362306a36Sopenharmony_ci	veor		\b1, \b1, \t1
41462306a36Sopenharmony_ci	.endm
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_ci	.macro		bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
41762306a36Sopenharmony_ci	vmov.i8		\t0, #0x55
41862306a36Sopenharmony_ci	vmov.i8		\t1, #0x33
41962306a36Sopenharmony_ci	swapmove_2x	\x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
42062306a36Sopenharmony_ci	swapmove_2x	\x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
42162306a36Sopenharmony_ci	vmov.i8		\t0, #0x0f
42262306a36Sopenharmony_ci	swapmove_2x	\x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
42362306a36Sopenharmony_ci	swapmove_2x	\x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
42462306a36Sopenharmony_ci	swapmove_2x	\x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
42562306a36Sopenharmony_ci	swapmove_2x	\x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
42662306a36Sopenharmony_ci	.endm
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_ci	.align		4
42962306a36Sopenharmony_ciM0:	.quad		0x02060a0e03070b0f, 0x0004080c0105090d
43062306a36Sopenharmony_ci
43162306a36Sopenharmony_ci	/*
43262306a36Sopenharmony_ci	 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
43362306a36Sopenharmony_ci	 */
43462306a36Sopenharmony_ciENTRY(aesbs_convert_key)
43562306a36Sopenharmony_ci	vld1.32		{q7}, [r1]!		// load round 0 key
43662306a36Sopenharmony_ci	vld1.32		{q15}, [r1]!		// load round 1 key
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci	vmov.i8		q8,  #0x01		// bit masks
43962306a36Sopenharmony_ci	vmov.i8		q9,  #0x02
44062306a36Sopenharmony_ci	vmov.i8		q10, #0x04
44162306a36Sopenharmony_ci	vmov.i8		q11, #0x08
44262306a36Sopenharmony_ci	vmov.i8		q12, #0x10
44362306a36Sopenharmony_ci	vmov.i8		q13, #0x20
44462306a36Sopenharmony_ci	__ldr		q14, M0
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_ci	sub		r2, r2, #1
44762306a36Sopenharmony_ci	vst1.8		{q7}, [r0, :128]!	// save round 0 key
44862306a36Sopenharmony_ci
44962306a36Sopenharmony_ci.Lkey_loop:
45062306a36Sopenharmony_ci	__tbl		q7, q15, q14
45162306a36Sopenharmony_ci	vmov.i8		q6, #0x40
45262306a36Sopenharmony_ci	vmov.i8		q15, #0x80
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_ci	vtst.8		q0, q7, q8
45562306a36Sopenharmony_ci	vtst.8		q1, q7, q9
45662306a36Sopenharmony_ci	vtst.8		q2, q7, q10
45762306a36Sopenharmony_ci	vtst.8		q3, q7, q11
45862306a36Sopenharmony_ci	vtst.8		q4, q7, q12
45962306a36Sopenharmony_ci	vtst.8		q5, q7, q13
46062306a36Sopenharmony_ci	vtst.8		q6, q7, q6
46162306a36Sopenharmony_ci	vtst.8		q7, q7, q15
46262306a36Sopenharmony_ci	vld1.32		{q15}, [r1]!		// load next round key
46362306a36Sopenharmony_ci	vmvn		q0, q0
46462306a36Sopenharmony_ci	vmvn		q1, q1
46562306a36Sopenharmony_ci	vmvn		q5, q5
46662306a36Sopenharmony_ci	vmvn		q6, q6
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	subs		r2, r2, #1
46962306a36Sopenharmony_ci	vst1.8		{q0-q1}, [r0, :256]!
47062306a36Sopenharmony_ci	vst1.8		{q2-q3}, [r0, :256]!
47162306a36Sopenharmony_ci	vst1.8		{q4-q5}, [r0, :256]!
47262306a36Sopenharmony_ci	vst1.8		{q6-q7}, [r0, :256]!
47362306a36Sopenharmony_ci	bne		.Lkey_loop
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ci	vmov.i8		q7, #0x63		// compose .L63
47662306a36Sopenharmony_ci	veor		q15, q15, q7
47762306a36Sopenharmony_ci	vst1.8		{q15}, [r0, :128]
47862306a36Sopenharmony_ci	bx		lr
47962306a36Sopenharmony_ciENDPROC(aesbs_convert_key)
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	.align		4
48262306a36Sopenharmony_ciM0SR:	.quad		0x0a0e02060f03070b, 0x0004080c05090d01
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ciaesbs_encrypt8:
48562306a36Sopenharmony_ci	vld1.8		{q9}, [bskey, :128]!	// round 0 key
48662306a36Sopenharmony_ci	__ldr		q8, M0SR
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci	veor		q10, q0, q9		// xor with round0 key
48962306a36Sopenharmony_ci	veor		q11, q1, q9
49062306a36Sopenharmony_ci	__tbl		q0, q10, q8
49162306a36Sopenharmony_ci	veor		q12, q2, q9
49262306a36Sopenharmony_ci	__tbl		q1, q11, q8
49362306a36Sopenharmony_ci	veor		q13, q3, q9
49462306a36Sopenharmony_ci	__tbl		q2, q12, q8
49562306a36Sopenharmony_ci	veor		q14, q4, q9
49662306a36Sopenharmony_ci	__tbl		q3, q13, q8
49762306a36Sopenharmony_ci	veor		q15, q5, q9
49862306a36Sopenharmony_ci	__tbl		q4, q14, q8
49962306a36Sopenharmony_ci	veor		q10, q6, q9
50062306a36Sopenharmony_ci	__tbl		q5, q15, q8
50162306a36Sopenharmony_ci	veor		q11, q7, q9
50262306a36Sopenharmony_ci	__tbl		q6, q10, q8
50362306a36Sopenharmony_ci	__tbl		q7, q11, q8
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_ci	bitslice	q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	sub		rounds, rounds, #1
50862306a36Sopenharmony_ci	b		.Lenc_sbox
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_ci	.align		5
51162306a36Sopenharmony_ciSR:	.quad		0x0504070600030201, 0x0f0e0d0c0a09080b
51262306a36Sopenharmony_ciSRM0:	.quad		0x0304090e00050a0f, 0x01060b0c0207080d
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_ci.Lenc_last:
51562306a36Sopenharmony_ci	__ldr		q12, SRM0
51662306a36Sopenharmony_ci.Lenc_loop:
51762306a36Sopenharmony_ci	shift_rows	q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12
51862306a36Sopenharmony_ci.Lenc_sbox:
51962306a36Sopenharmony_ci	sbox		q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \
52062306a36Sopenharmony_ci								q13, q14, q15
52162306a36Sopenharmony_ci	subs		rounds, rounds, #1
52262306a36Sopenharmony_ci	bcc		.Lenc_done
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci	mix_cols	q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11, q12, \
52562306a36Sopenharmony_ci								q13, q14, q15
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci	beq		.Lenc_last
52862306a36Sopenharmony_ci	__ldr		q12, SR
52962306a36Sopenharmony_ci	b		.Lenc_loop
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci.Lenc_done:
53262306a36Sopenharmony_ci	vld1.8		{q12}, [bskey, :128]	// last round key
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci	bitslice	q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_ci	veor		q0, q0, q12
53762306a36Sopenharmony_ci	veor		q1, q1, q12
53862306a36Sopenharmony_ci	veor		q4, q4, q12
53962306a36Sopenharmony_ci	veor		q6, q6, q12
54062306a36Sopenharmony_ci	veor		q3, q3, q12
54162306a36Sopenharmony_ci	veor		q7, q7, q12
54262306a36Sopenharmony_ci	veor		q2, q2, q12
54362306a36Sopenharmony_ci	veor		q5, q5, q12
54462306a36Sopenharmony_ci	bx		lr
54562306a36Sopenharmony_ciENDPROC(aesbs_encrypt8)
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci	.align		4
54862306a36Sopenharmony_ciM0ISR:	.quad		0x0a0e0206070b0f03, 0x0004080c0d010509
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_ciaesbs_decrypt8:
55162306a36Sopenharmony_ci	add		bskey, bskey, rounds, lsl #7
55262306a36Sopenharmony_ci	sub		bskey, bskey, #112
55362306a36Sopenharmony_ci	vld1.8		{q9}, [bskey, :128]	// round 0 key
55462306a36Sopenharmony_ci	sub		bskey, bskey, #128
55562306a36Sopenharmony_ci	__ldr		q8, M0ISR
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci	veor		q10, q0, q9		// xor with round0 key
55862306a36Sopenharmony_ci	veor		q11, q1, q9
55962306a36Sopenharmony_ci	__tbl		q0, q10, q8
56062306a36Sopenharmony_ci	veor		q12, q2, q9
56162306a36Sopenharmony_ci	__tbl		q1, q11, q8
56262306a36Sopenharmony_ci	veor		q13, q3, q9
56362306a36Sopenharmony_ci	__tbl		q2, q12, q8
56462306a36Sopenharmony_ci	veor		q14, q4, q9
56562306a36Sopenharmony_ci	__tbl		q3, q13, q8
56662306a36Sopenharmony_ci	veor		q15, q5, q9
56762306a36Sopenharmony_ci	__tbl		q4, q14, q8
56862306a36Sopenharmony_ci	veor		q10, q6, q9
56962306a36Sopenharmony_ci	__tbl		q5, q15, q8
57062306a36Sopenharmony_ci	veor		q11, q7, q9
57162306a36Sopenharmony_ci	__tbl		q6, q10, q8
57262306a36Sopenharmony_ci	__tbl		q7, q11, q8
57362306a36Sopenharmony_ci
57462306a36Sopenharmony_ci	bitslice	q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci	sub		rounds, rounds, #1
57762306a36Sopenharmony_ci	b		.Ldec_sbox
57862306a36Sopenharmony_ci
57962306a36Sopenharmony_ci	.align		5
58062306a36Sopenharmony_ciISR:	.quad		0x0504070602010003, 0x0f0e0d0c080b0a09
58162306a36Sopenharmony_ciISRM0:	.quad		0x01040b0e0205080f, 0x0306090c00070a0d
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci.Ldec_last:
58462306a36Sopenharmony_ci	__ldr		q12, ISRM0
58562306a36Sopenharmony_ci.Ldec_loop:
58662306a36Sopenharmony_ci	inv_shift_rows	q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12
58762306a36Sopenharmony_ci.Ldec_sbox:
58862306a36Sopenharmony_ci	inv_sbox	q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \
58962306a36Sopenharmony_ci								q13, q14, q15
59062306a36Sopenharmony_ci	subs		rounds, rounds, #1
59162306a36Sopenharmony_ci	bcc		.Ldec_done
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_ci	inv_mix_cols	q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11, q12, \
59462306a36Sopenharmony_ci								q13, q14, q15
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci	beq		.Ldec_last
59762306a36Sopenharmony_ci	__ldr		q12, ISR
59862306a36Sopenharmony_ci	b		.Ldec_loop
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci.Ldec_done:
60162306a36Sopenharmony_ci	add		bskey, bskey, #112
60262306a36Sopenharmony_ci	vld1.8		{q12}, [bskey, :128]	// last round key
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci	bitslice	q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11
60562306a36Sopenharmony_ci
60662306a36Sopenharmony_ci	veor		q0, q0, q12
60762306a36Sopenharmony_ci	veor		q1, q1, q12
60862306a36Sopenharmony_ci	veor		q6, q6, q12
60962306a36Sopenharmony_ci	veor		q4, q4, q12
61062306a36Sopenharmony_ci	veor		q2, q2, q12
61162306a36Sopenharmony_ci	veor		q7, q7, q12
61262306a36Sopenharmony_ci	veor		q3, q3, q12
61362306a36Sopenharmony_ci	veor		q5, q5, q12
61462306a36Sopenharmony_ci	bx		lr
61562306a36Sopenharmony_ciENDPROC(aesbs_decrypt8)
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci	/*
61862306a36Sopenharmony_ci	 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
61962306a36Sopenharmony_ci	 *		     int blocks)
62062306a36Sopenharmony_ci	 * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
62162306a36Sopenharmony_ci	 *		     int blocks)
62262306a36Sopenharmony_ci	 */
62362306a36Sopenharmony_ci	.macro		__ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
62462306a36Sopenharmony_ci	push		{r4-r6, lr}
62562306a36Sopenharmony_ci	ldr		r5, [sp, #16]		// number of blocks
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_ci99:	adr		ip, 0f
62862306a36Sopenharmony_ci	and		lr, r5, #7
62962306a36Sopenharmony_ci	cmp		r5, #8
63062306a36Sopenharmony_ci	sub		ip, ip, lr, lsl #2
63162306a36Sopenharmony_ci	movlt		pc, ip			// computed goto if blocks < 8
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_ci	vld1.8		{q0}, [r1]!
63462306a36Sopenharmony_ci	vld1.8		{q1}, [r1]!
63562306a36Sopenharmony_ci	vld1.8		{q2}, [r1]!
63662306a36Sopenharmony_ci	vld1.8		{q3}, [r1]!
63762306a36Sopenharmony_ci	vld1.8		{q4}, [r1]!
63862306a36Sopenharmony_ci	vld1.8		{q5}, [r1]!
63962306a36Sopenharmony_ci	vld1.8		{q6}, [r1]!
64062306a36Sopenharmony_ci	vld1.8		{q7}, [r1]!
64162306a36Sopenharmony_ci
64262306a36Sopenharmony_ci0:	mov		bskey, r2
64362306a36Sopenharmony_ci	mov		rounds, r3
64462306a36Sopenharmony_ci	bl		\do8
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_ci	adr		ip, 1f
64762306a36Sopenharmony_ci	and		lr, r5, #7
64862306a36Sopenharmony_ci	cmp		r5, #8
64962306a36Sopenharmony_ci	sub		ip, ip, lr, lsl #2
65062306a36Sopenharmony_ci	movlt		pc, ip			// computed goto if blocks < 8
65162306a36Sopenharmony_ci
65262306a36Sopenharmony_ci	vst1.8		{\o0}, [r0]!
65362306a36Sopenharmony_ci	vst1.8		{\o1}, [r0]!
65462306a36Sopenharmony_ci	vst1.8		{\o2}, [r0]!
65562306a36Sopenharmony_ci	vst1.8		{\o3}, [r0]!
65662306a36Sopenharmony_ci	vst1.8		{\o4}, [r0]!
65762306a36Sopenharmony_ci	vst1.8		{\o5}, [r0]!
65862306a36Sopenharmony_ci	vst1.8		{\o6}, [r0]!
65962306a36Sopenharmony_ci	vst1.8		{\o7}, [r0]!
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_ci1:	subs		r5, r5, #8
66262306a36Sopenharmony_ci	bgt		99b
66362306a36Sopenharmony_ci
66462306a36Sopenharmony_ci	pop		{r4-r6, pc}
66562306a36Sopenharmony_ci	.endm
66662306a36Sopenharmony_ci
66762306a36Sopenharmony_ci	.align		4
66862306a36Sopenharmony_ciENTRY(aesbs_ecb_encrypt)
66962306a36Sopenharmony_ci	__ecb_crypt	aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5
67062306a36Sopenharmony_ciENDPROC(aesbs_ecb_encrypt)
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_ci	.align		4
67362306a36Sopenharmony_ciENTRY(aesbs_ecb_decrypt)
67462306a36Sopenharmony_ci	__ecb_crypt	aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5
67562306a36Sopenharmony_ciENDPROC(aesbs_ecb_decrypt)
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_ci	/*
67862306a36Sopenharmony_ci	 * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
67962306a36Sopenharmony_ci	 *		     int rounds, int blocks, u8 iv[])
68062306a36Sopenharmony_ci	 */
68162306a36Sopenharmony_ci	.align		4
68262306a36Sopenharmony_ciENTRY(aesbs_cbc_decrypt)
68362306a36Sopenharmony_ci	mov		ip, sp
68462306a36Sopenharmony_ci	push		{r4-r6, lr}
68562306a36Sopenharmony_ci	ldm		ip, {r5-r6}		// load args 4-5
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci99:	adr		ip, 0f
68862306a36Sopenharmony_ci	and		lr, r5, #7
68962306a36Sopenharmony_ci	cmp		r5, #8
69062306a36Sopenharmony_ci	sub		ip, ip, lr, lsl #2
69162306a36Sopenharmony_ci	mov		lr, r1
69262306a36Sopenharmony_ci	movlt		pc, ip			// computed goto if blocks < 8
69362306a36Sopenharmony_ci
69462306a36Sopenharmony_ci	vld1.8		{q0}, [lr]!
69562306a36Sopenharmony_ci	vld1.8		{q1}, [lr]!
69662306a36Sopenharmony_ci	vld1.8		{q2}, [lr]!
69762306a36Sopenharmony_ci	vld1.8		{q3}, [lr]!
69862306a36Sopenharmony_ci	vld1.8		{q4}, [lr]!
69962306a36Sopenharmony_ci	vld1.8		{q5}, [lr]!
70062306a36Sopenharmony_ci	vld1.8		{q6}, [lr]!
70162306a36Sopenharmony_ci	vld1.8		{q7}, [lr]
70262306a36Sopenharmony_ci
70362306a36Sopenharmony_ci0:	mov		bskey, r2
70462306a36Sopenharmony_ci	mov		rounds, r3
70562306a36Sopenharmony_ci	bl		aesbs_decrypt8
70662306a36Sopenharmony_ci
70762306a36Sopenharmony_ci	vld1.8		{q8}, [r6]
70862306a36Sopenharmony_ci	vmov		q9, q8
70962306a36Sopenharmony_ci	vmov		q10, q8
71062306a36Sopenharmony_ci	vmov		q11, q8
71162306a36Sopenharmony_ci	vmov		q12, q8
71262306a36Sopenharmony_ci	vmov		q13, q8
71362306a36Sopenharmony_ci	vmov		q14, q8
71462306a36Sopenharmony_ci	vmov		q15, q8
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_ci	adr		ip, 1f
71762306a36Sopenharmony_ci	and		lr, r5, #7
71862306a36Sopenharmony_ci	cmp		r5, #8
71962306a36Sopenharmony_ci	sub		ip, ip, lr, lsl #2
72062306a36Sopenharmony_ci	movlt		pc, ip			// computed goto if blocks < 8
72162306a36Sopenharmony_ci
72262306a36Sopenharmony_ci	vld1.8		{q9}, [r1]!
72362306a36Sopenharmony_ci	vld1.8		{q10}, [r1]!
72462306a36Sopenharmony_ci	vld1.8		{q11}, [r1]!
72562306a36Sopenharmony_ci	vld1.8		{q12}, [r1]!
72662306a36Sopenharmony_ci	vld1.8		{q13}, [r1]!
72762306a36Sopenharmony_ci	vld1.8		{q14}, [r1]!
72862306a36Sopenharmony_ci	vld1.8		{q15}, [r1]!
72962306a36Sopenharmony_ci	W(nop)
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_ci1:	adr		ip, 2f
73262306a36Sopenharmony_ci	sub		ip, ip, lr, lsl #3
73362306a36Sopenharmony_ci	movlt		pc, ip			// computed goto if blocks < 8
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_ci	veor		q0, q0, q8
73662306a36Sopenharmony_ci	vst1.8		{q0}, [r0]!
73762306a36Sopenharmony_ci	veor		q1, q1, q9
73862306a36Sopenharmony_ci	vst1.8		{q1}, [r0]!
73962306a36Sopenharmony_ci	veor		q6, q6, q10
74062306a36Sopenharmony_ci	vst1.8		{q6}, [r0]!
74162306a36Sopenharmony_ci	veor		q4, q4, q11
74262306a36Sopenharmony_ci	vst1.8		{q4}, [r0]!
74362306a36Sopenharmony_ci	veor		q2, q2, q12
74462306a36Sopenharmony_ci	vst1.8		{q2}, [r0]!
74562306a36Sopenharmony_ci	veor		q7, q7, q13
74662306a36Sopenharmony_ci	vst1.8		{q7}, [r0]!
74762306a36Sopenharmony_ci	veor		q3, q3, q14
74862306a36Sopenharmony_ci	vst1.8		{q3}, [r0]!
74962306a36Sopenharmony_ci	veor		q5, q5, q15
75062306a36Sopenharmony_ci	vld1.8		{q8}, [r1]!		// load next round's iv
75162306a36Sopenharmony_ci2:	vst1.8		{q5}, [r0]!
75262306a36Sopenharmony_ci
75362306a36Sopenharmony_ci	subs		r5, r5, #8
75462306a36Sopenharmony_ci	vst1.8		{q8}, [r6]		// store next round's iv
75562306a36Sopenharmony_ci	bgt		99b
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_ci	pop		{r4-r6, pc}
75862306a36Sopenharmony_ciENDPROC(aesbs_cbc_decrypt)
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ci	.macro		next_ctr, q
76162306a36Sopenharmony_ci	vmov		\q\()h, r9, r10
76262306a36Sopenharmony_ci	adds		r10, r10, #1
76362306a36Sopenharmony_ci	adcs		r9, r9, #0
76462306a36Sopenharmony_ci	vmov		\q\()l, r7, r8
76562306a36Sopenharmony_ci	adcs		r8, r8, #0
76662306a36Sopenharmony_ci	adc		r7, r7, #0
76762306a36Sopenharmony_ci	vrev32.8	\q, \q
76862306a36Sopenharmony_ci	.endm
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_ci	/*
77162306a36Sopenharmony_ci	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
77262306a36Sopenharmony_ci	 *		     int rounds, int bytes, u8 ctr[])
77362306a36Sopenharmony_ci	 */
77462306a36Sopenharmony_ciENTRY(aesbs_ctr_encrypt)
77562306a36Sopenharmony_ci	mov		ip, sp
77662306a36Sopenharmony_ci	push		{r4-r10, lr}
77762306a36Sopenharmony_ci
77862306a36Sopenharmony_ci	ldm		ip, {r5, r6}		// load args 4-5
77962306a36Sopenharmony_ci	vld1.8		{q0}, [r6]		// load counter
78062306a36Sopenharmony_ci	vrev32.8	q1, q0
78162306a36Sopenharmony_ci	vmov		r9, r10, d3
78262306a36Sopenharmony_ci	vmov		r7, r8, d2
78362306a36Sopenharmony_ci
78462306a36Sopenharmony_ci	adds		r10, r10, #1
78562306a36Sopenharmony_ci	adcs		r9, r9, #0
78662306a36Sopenharmony_ci	adcs		r8, r8, #0
78762306a36Sopenharmony_ci	adc		r7, r7, #0
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_ci99:	vmov		q1, q0
79062306a36Sopenharmony_ci	sub		lr, r5, #1
79162306a36Sopenharmony_ci	vmov		q2, q0
79262306a36Sopenharmony_ci	adr		ip, 0f
79362306a36Sopenharmony_ci	vmov		q3, q0
79462306a36Sopenharmony_ci	and		lr, lr, #112
79562306a36Sopenharmony_ci	vmov		q4, q0
79662306a36Sopenharmony_ci	cmp		r5, #112
79762306a36Sopenharmony_ci	vmov		q5, q0
79862306a36Sopenharmony_ci	sub		ip, ip, lr, lsl #1
79962306a36Sopenharmony_ci	vmov		q6, q0
80062306a36Sopenharmony_ci	add		ip, ip, lr, lsr #2
80162306a36Sopenharmony_ci	vmov		q7, q0
80262306a36Sopenharmony_ci	movle		pc, ip			// computed goto if bytes < 112
80362306a36Sopenharmony_ci
80462306a36Sopenharmony_ci	next_ctr	q1
80562306a36Sopenharmony_ci	next_ctr	q2
80662306a36Sopenharmony_ci	next_ctr	q3
80762306a36Sopenharmony_ci	next_ctr	q4
80862306a36Sopenharmony_ci	next_ctr	q5
80962306a36Sopenharmony_ci	next_ctr	q6
81062306a36Sopenharmony_ci	next_ctr	q7
81162306a36Sopenharmony_ci
81262306a36Sopenharmony_ci0:	mov		bskey, r2
81362306a36Sopenharmony_ci	mov		rounds, r3
81462306a36Sopenharmony_ci	bl		aesbs_encrypt8
81562306a36Sopenharmony_ci
81662306a36Sopenharmony_ci	adr		ip, 1f
81762306a36Sopenharmony_ci	sub		lr, r5, #1
81862306a36Sopenharmony_ci	cmp		r5, #128
81962306a36Sopenharmony_ci	bic		lr, lr, #15
82062306a36Sopenharmony_ci	ands		r4, r5, #15		// preserves C flag
82162306a36Sopenharmony_ci	teqcs		r5, r5			// set Z flag if not last iteration
82262306a36Sopenharmony_ci	sub		ip, ip, lr, lsr #2
82362306a36Sopenharmony_ci	rsb		r4, r4, #16
82462306a36Sopenharmony_ci	movcc		pc, ip			// computed goto if bytes < 128
82562306a36Sopenharmony_ci
82662306a36Sopenharmony_ci	vld1.8		{q8}, [r1]!
82762306a36Sopenharmony_ci	vld1.8		{q9}, [r1]!
82862306a36Sopenharmony_ci	vld1.8		{q10}, [r1]!
82962306a36Sopenharmony_ci	vld1.8		{q11}, [r1]!
83062306a36Sopenharmony_ci	vld1.8		{q12}, [r1]!
83162306a36Sopenharmony_ci	vld1.8		{q13}, [r1]!
83262306a36Sopenharmony_ci	vld1.8		{q14}, [r1]!
83362306a36Sopenharmony_ci1:	subne		r1, r1, r4
83462306a36Sopenharmony_ci	vld1.8		{q15}, [r1]!
83562306a36Sopenharmony_ci
83662306a36Sopenharmony_ci	add		ip, ip, #2f - 1b
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_ci	veor		q0, q0, q8
83962306a36Sopenharmony_ci	veor		q1, q1, q9
84062306a36Sopenharmony_ci	veor		q4, q4, q10
84162306a36Sopenharmony_ci	veor		q6, q6, q11
84262306a36Sopenharmony_ci	veor		q3, q3, q12
84362306a36Sopenharmony_ci	veor		q7, q7, q13
84462306a36Sopenharmony_ci	veor		q2, q2, q14
84562306a36Sopenharmony_ci	bne		3f
84662306a36Sopenharmony_ci	veor		q5, q5, q15
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ci	movcc		pc, ip			// computed goto if bytes < 128
84962306a36Sopenharmony_ci
85062306a36Sopenharmony_ci	vst1.8		{q0}, [r0]!
85162306a36Sopenharmony_ci	vst1.8		{q1}, [r0]!
85262306a36Sopenharmony_ci	vst1.8		{q4}, [r0]!
85362306a36Sopenharmony_ci	vst1.8		{q6}, [r0]!
85462306a36Sopenharmony_ci	vst1.8		{q3}, [r0]!
85562306a36Sopenharmony_ci	vst1.8		{q7}, [r0]!
85662306a36Sopenharmony_ci	vst1.8		{q2}, [r0]!
85762306a36Sopenharmony_ci2:	subne		r0, r0, r4
85862306a36Sopenharmony_ci	vst1.8		{q5}, [r0]!
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ci	next_ctr	q0
86162306a36Sopenharmony_ci
86262306a36Sopenharmony_ci	subs		r5, r5, #128
86362306a36Sopenharmony_ci	bgt		99b
86462306a36Sopenharmony_ci
86562306a36Sopenharmony_ci	vst1.8		{q0}, [r6]
86662306a36Sopenharmony_ci	pop		{r4-r10, pc}
86762306a36Sopenharmony_ci
86862306a36Sopenharmony_ci3:	adr		lr, .Lpermute_table + 16
86962306a36Sopenharmony_ci	cmp		r5, #16			// Z flag remains cleared
87062306a36Sopenharmony_ci	sub		lr, lr, r4
87162306a36Sopenharmony_ci	vld1.8		{q8-q9}, [lr]
87262306a36Sopenharmony_ci	vtbl.8		d16, {q5}, d16
87362306a36Sopenharmony_ci	vtbl.8		d17, {q5}, d17
87462306a36Sopenharmony_ci	veor		q5, q8, q15
87562306a36Sopenharmony_ci	bcc		4f			// have to reload prev if R5 < 16
87662306a36Sopenharmony_ci	vtbx.8		d10, {q2}, d18
87762306a36Sopenharmony_ci	vtbx.8		d11, {q2}, d19
87862306a36Sopenharmony_ci	mov		pc, ip			// branch back to VST sequence
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_ci4:	sub		r0, r0, r4
88162306a36Sopenharmony_ci	vshr.s8		q9, q9, #7		// create mask for VBIF
88262306a36Sopenharmony_ci	vld1.8		{q8}, [r0]		// reload
88362306a36Sopenharmony_ci	vbif		q5, q8, q9
88462306a36Sopenharmony_ci	vst1.8		{q5}, [r0]
88562306a36Sopenharmony_ci	pop		{r4-r10, pc}
88662306a36Sopenharmony_ciENDPROC(aesbs_ctr_encrypt)
88762306a36Sopenharmony_ci
88862306a36Sopenharmony_ci	.align		6
88962306a36Sopenharmony_ci.Lpermute_table:
89062306a36Sopenharmony_ci	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
89162306a36Sopenharmony_ci	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
89262306a36Sopenharmony_ci	.byte		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
89362306a36Sopenharmony_ci	.byte		0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
89462306a36Sopenharmony_ci	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
89562306a36Sopenharmony_ci	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci	.macro		next_tweak, out, in, const, tmp
89862306a36Sopenharmony_ci	vshr.s64	\tmp, \in, #63
89962306a36Sopenharmony_ci	vand		\tmp, \tmp, \const
90062306a36Sopenharmony_ci	vadd.u64	\out, \in, \in
90162306a36Sopenharmony_ci	vext.8		\tmp, \tmp, \tmp, #8
90262306a36Sopenharmony_ci	veor		\out, \out, \tmp
90362306a36Sopenharmony_ci	.endm
90462306a36Sopenharmony_ci
90562306a36Sopenharmony_ci	/*
90662306a36Sopenharmony_ci	 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
90762306a36Sopenharmony_ci	 *		     int blocks, u8 iv[], int reorder_last_tweak)
90862306a36Sopenharmony_ci	 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
90962306a36Sopenharmony_ci	 *		     int blocks, u8 iv[], int reorder_last_tweak)
91062306a36Sopenharmony_ci	 */
91162306a36Sopenharmony_ci	.align		6
91262306a36Sopenharmony_ci__xts_prepare8:
91362306a36Sopenharmony_ci	vld1.8		{q14}, [r7]		// load iv
91462306a36Sopenharmony_ci	vmov.i32	d30, #0x87		// compose tweak mask vector
91562306a36Sopenharmony_ci	vmovl.u32	q15, d30
91662306a36Sopenharmony_ci	vshr.u64	d30, d31, #7
91762306a36Sopenharmony_ci	vmov		q12, q14
91862306a36Sopenharmony_ci
91962306a36Sopenharmony_ci	adr		ip, 0f
92062306a36Sopenharmony_ci	and		r4, r6, #7
92162306a36Sopenharmony_ci	cmp		r6, #8
92262306a36Sopenharmony_ci	sub		ip, ip, r4, lsl #5
92362306a36Sopenharmony_ci	mov		r4, sp
92462306a36Sopenharmony_ci	movlt		pc, ip			// computed goto if blocks < 8
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci	vld1.8		{q0}, [r1]!
92762306a36Sopenharmony_ci	next_tweak	q12, q14, q15, q13
92862306a36Sopenharmony_ci	veor		q0, q0, q14
92962306a36Sopenharmony_ci	vst1.8		{q14}, [r4, :128]!
93062306a36Sopenharmony_ci
93162306a36Sopenharmony_ci	vld1.8		{q1}, [r1]!
93262306a36Sopenharmony_ci	next_tweak	q14, q12, q15, q13
93362306a36Sopenharmony_ci	veor		q1, q1, q12
93462306a36Sopenharmony_ci	vst1.8		{q12}, [r4, :128]!
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ci	vld1.8		{q2}, [r1]!
93762306a36Sopenharmony_ci	next_tweak	q12, q14, q15, q13
93862306a36Sopenharmony_ci	veor		q2, q2, q14
93962306a36Sopenharmony_ci	vst1.8		{q14}, [r4, :128]!
94062306a36Sopenharmony_ci
94162306a36Sopenharmony_ci	vld1.8		{q3}, [r1]!
94262306a36Sopenharmony_ci	next_tweak	q14, q12, q15, q13
94362306a36Sopenharmony_ci	veor		q3, q3, q12
94462306a36Sopenharmony_ci	vst1.8		{q12}, [r4, :128]!
94562306a36Sopenharmony_ci
94662306a36Sopenharmony_ci	vld1.8		{q4}, [r1]!
94762306a36Sopenharmony_ci	next_tweak	q12, q14, q15, q13
94862306a36Sopenharmony_ci	veor		q4, q4, q14
94962306a36Sopenharmony_ci	vst1.8		{q14}, [r4, :128]!
95062306a36Sopenharmony_ci
95162306a36Sopenharmony_ci	vld1.8		{q5}, [r1]!
95262306a36Sopenharmony_ci	next_tweak	q14, q12, q15, q13
95362306a36Sopenharmony_ci	veor		q5, q5, q12
95462306a36Sopenharmony_ci	vst1.8		{q12}, [r4, :128]!
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_ci	vld1.8		{q6}, [r1]!
95762306a36Sopenharmony_ci	next_tweak	q12, q14, q15, q13
95862306a36Sopenharmony_ci	veor		q6, q6, q14
95962306a36Sopenharmony_ci	vst1.8		{q14}, [r4, :128]!
96062306a36Sopenharmony_ci
96162306a36Sopenharmony_ci	vld1.8		{q7}, [r1]!
96262306a36Sopenharmony_ci	next_tweak	q14, q12, q15, q13
96362306a36Sopenharmony_ciTHUMB(	itt		le		)
96462306a36Sopenharmony_ci	W(cmple)	r8, #0
96562306a36Sopenharmony_ci	ble		1f
96662306a36Sopenharmony_ci0:	veor		q7, q7, q12
96762306a36Sopenharmony_ci	vst1.8		{q12}, [r4, :128]
96862306a36Sopenharmony_ci
96962306a36Sopenharmony_ci	vst1.8		{q14}, [r7]		// store next iv
97062306a36Sopenharmony_ci	bx		lr
97162306a36Sopenharmony_ci
97262306a36Sopenharmony_ci1:	vswp		q12, q14
97362306a36Sopenharmony_ci	b		0b
97462306a36Sopenharmony_ciENDPROC(__xts_prepare8)
97562306a36Sopenharmony_ci
97662306a36Sopenharmony_ci	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
97762306a36Sopenharmony_ci	push		{r4-r8, lr}
97862306a36Sopenharmony_ci	mov		r5, sp			// preserve sp
97962306a36Sopenharmony_ci	ldrd		r6, r7, [sp, #24]	// get blocks and iv args
98062306a36Sopenharmony_ci	rsb		r8, ip, #1
98162306a36Sopenharmony_ci	sub		ip, sp, #128		// make room for 8x tweak
98262306a36Sopenharmony_ci	bic		ip, ip, #0xf		// align sp to 16 bytes
98362306a36Sopenharmony_ci	mov		sp, ip
98462306a36Sopenharmony_ci
98562306a36Sopenharmony_ci99:	bl		__xts_prepare8
98662306a36Sopenharmony_ci
98762306a36Sopenharmony_ci	mov		bskey, r2
98862306a36Sopenharmony_ci	mov		rounds, r3
98962306a36Sopenharmony_ci	bl		\do8
99062306a36Sopenharmony_ci
99162306a36Sopenharmony_ci	adr		ip, 0f
99262306a36Sopenharmony_ci	and		lr, r6, #7
99362306a36Sopenharmony_ci	cmp		r6, #8
99462306a36Sopenharmony_ci	sub		ip, ip, lr, lsl #2
99562306a36Sopenharmony_ci	mov		r4, sp
99662306a36Sopenharmony_ci	movlt		pc, ip			// computed goto if blocks < 8
99762306a36Sopenharmony_ci
99862306a36Sopenharmony_ci	vld1.8		{q8}, [r4, :128]!
99962306a36Sopenharmony_ci	vld1.8		{q9}, [r4, :128]!
100062306a36Sopenharmony_ci	vld1.8		{q10}, [r4, :128]!
100162306a36Sopenharmony_ci	vld1.8		{q11}, [r4, :128]!
100262306a36Sopenharmony_ci	vld1.8		{q12}, [r4, :128]!
100362306a36Sopenharmony_ci	vld1.8		{q13}, [r4, :128]!
100462306a36Sopenharmony_ci	vld1.8		{q14}, [r4, :128]!
100562306a36Sopenharmony_ci	vld1.8		{q15}, [r4, :128]
100662306a36Sopenharmony_ci
100762306a36Sopenharmony_ci0:	adr		ip, 1f
100862306a36Sopenharmony_ci	sub		ip, ip, lr, lsl #3
100962306a36Sopenharmony_ci	movlt		pc, ip			// computed goto if blocks < 8
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_ci	veor		\o0, \o0, q8
101262306a36Sopenharmony_ci	vst1.8		{\o0}, [r0]!
101362306a36Sopenharmony_ci	veor		\o1, \o1, q9
101462306a36Sopenharmony_ci	vst1.8		{\o1}, [r0]!
101562306a36Sopenharmony_ci	veor		\o2, \o2, q10
101662306a36Sopenharmony_ci	vst1.8		{\o2}, [r0]!
101762306a36Sopenharmony_ci	veor		\o3, \o3, q11
101862306a36Sopenharmony_ci	vst1.8		{\o3}, [r0]!
101962306a36Sopenharmony_ci	veor		\o4, \o4, q12
102062306a36Sopenharmony_ci	vst1.8		{\o4}, [r0]!
102162306a36Sopenharmony_ci	veor		\o5, \o5, q13
102262306a36Sopenharmony_ci	vst1.8		{\o5}, [r0]!
102362306a36Sopenharmony_ci	veor		\o6, \o6, q14
102462306a36Sopenharmony_ci	vst1.8		{\o6}, [r0]!
102562306a36Sopenharmony_ci	veor		\o7, \o7, q15
102662306a36Sopenharmony_ci	vst1.8		{\o7}, [r0]!
102762306a36Sopenharmony_ci
102862306a36Sopenharmony_ci1:	subs		r6, r6, #8
102962306a36Sopenharmony_ci	bgt		99b
103062306a36Sopenharmony_ci
103162306a36Sopenharmony_ci	mov		sp, r5
103262306a36Sopenharmony_ci	pop		{r4-r8, pc}
103362306a36Sopenharmony_ci	.endm
103462306a36Sopenharmony_ci
103562306a36Sopenharmony_ciENTRY(aesbs_xts_encrypt)
103662306a36Sopenharmony_ci	mov		ip, #0			// never reorder final tweak
103762306a36Sopenharmony_ci	__xts_crypt	aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5
103862306a36Sopenharmony_ciENDPROC(aesbs_xts_encrypt)
103962306a36Sopenharmony_ci
104062306a36Sopenharmony_ciENTRY(aesbs_xts_decrypt)
104162306a36Sopenharmony_ci	ldr		ip, [sp, #8]		// reorder final tweak?
104262306a36Sopenharmony_ci	__xts_crypt	aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5
104362306a36Sopenharmony_ciENDPROC(aesbs_xts_decrypt)
1044