162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Bit sliced AES using NEON instructions
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci/*
962306a36Sopenharmony_ci * The algorithm implemented here is described in detail by the paper
1062306a36Sopenharmony_ci * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
1162306a36Sopenharmony_ci * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
1262306a36Sopenharmony_ci *
1362306a36Sopenharmony_ci * This implementation is based primarily on the OpenSSL implementation
1462306a36Sopenharmony_ci * for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
1562306a36Sopenharmony_ci */
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci#include <linux/linkage.h>
1862306a36Sopenharmony_ci#include <linux/cfi_types.h>
1962306a36Sopenharmony_ci#include <asm/assembler.h>
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci	.text
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci	rounds		.req	x11
2462306a36Sopenharmony_ci	bskey		.req	x12
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci	.macro		in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
2762306a36Sopenharmony_ci	eor		\b2, \b2, \b1
2862306a36Sopenharmony_ci	eor		\b5, \b5, \b6
2962306a36Sopenharmony_ci	eor		\b3, \b3, \b0
3062306a36Sopenharmony_ci	eor		\b6, \b6, \b2
3162306a36Sopenharmony_ci	eor		\b5, \b5, \b0
3262306a36Sopenharmony_ci	eor		\b6, \b6, \b3
3362306a36Sopenharmony_ci	eor		\b3, \b3, \b7
3462306a36Sopenharmony_ci	eor		\b7, \b7, \b5
3562306a36Sopenharmony_ci	eor		\b3, \b3, \b4
3662306a36Sopenharmony_ci	eor		\b4, \b4, \b5
3762306a36Sopenharmony_ci	eor		\b2, \b2, \b7
3862306a36Sopenharmony_ci	eor		\b3, \b3, \b1
3962306a36Sopenharmony_ci	eor		\b1, \b1, \b5
4062306a36Sopenharmony_ci	.endm
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	.macro		out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
4362306a36Sopenharmony_ci	eor		\b0, \b0, \b6
4462306a36Sopenharmony_ci	eor		\b1, \b1, \b4
4562306a36Sopenharmony_ci	eor		\b4, \b4, \b6
4662306a36Sopenharmony_ci	eor		\b2, \b2, \b0
4762306a36Sopenharmony_ci	eor		\b6, \b6, \b1
4862306a36Sopenharmony_ci	eor		\b1, \b1, \b5
4962306a36Sopenharmony_ci	eor		\b5, \b5, \b3
5062306a36Sopenharmony_ci	eor		\b3, \b3, \b7
5162306a36Sopenharmony_ci	eor		\b7, \b7, \b5
5262306a36Sopenharmony_ci	eor		\b2, \b2, \b5
5362306a36Sopenharmony_ci	eor		\b4, \b4, \b7
5462306a36Sopenharmony_ci	.endm
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci	.macro		inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
5762306a36Sopenharmony_ci	eor		\b1, \b1, \b7
5862306a36Sopenharmony_ci	eor		\b4, \b4, \b7
5962306a36Sopenharmony_ci	eor		\b7, \b7, \b5
6062306a36Sopenharmony_ci	eor		\b1, \b1, \b3
6162306a36Sopenharmony_ci	eor		\b2, \b2, \b5
6262306a36Sopenharmony_ci	eor		\b3, \b3, \b7
6362306a36Sopenharmony_ci	eor		\b6, \b6, \b1
6462306a36Sopenharmony_ci	eor		\b2, \b2, \b0
6562306a36Sopenharmony_ci	eor		\b5, \b5, \b3
6662306a36Sopenharmony_ci	eor		\b4, \b4, \b6
6762306a36Sopenharmony_ci	eor		\b0, \b0, \b6
6862306a36Sopenharmony_ci	eor		\b1, \b1, \b4
6962306a36Sopenharmony_ci	.endm
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	.macro		inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
7262306a36Sopenharmony_ci	eor		\b1, \b1, \b5
7362306a36Sopenharmony_ci	eor		\b2, \b2, \b7
7462306a36Sopenharmony_ci	eor		\b3, \b3, \b1
7562306a36Sopenharmony_ci	eor		\b4, \b4, \b5
7662306a36Sopenharmony_ci	eor		\b7, \b7, \b5
7762306a36Sopenharmony_ci	eor		\b3, \b3, \b4
7862306a36Sopenharmony_ci	eor 		\b5, \b5, \b0
7962306a36Sopenharmony_ci	eor		\b3, \b3, \b7
8062306a36Sopenharmony_ci	eor		\b6, \b6, \b2
8162306a36Sopenharmony_ci	eor		\b2, \b2, \b1
8262306a36Sopenharmony_ci	eor		\b6, \b6, \b3
8362306a36Sopenharmony_ci	eor		\b3, \b3, \b0
8462306a36Sopenharmony_ci	eor		\b5, \b5, \b6
8562306a36Sopenharmony_ci	.endm
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	.macro		mul_gf4, x0, x1, y0, y1, t0, t1
8862306a36Sopenharmony_ci	eor 		\t0, \y0, \y1
8962306a36Sopenharmony_ci	and		\t0, \t0, \x0
9062306a36Sopenharmony_ci	eor		\x0, \x0, \x1
9162306a36Sopenharmony_ci	and		\t1, \x1, \y0
9262306a36Sopenharmony_ci	and		\x0, \x0, \y1
9362306a36Sopenharmony_ci	eor		\x1, \t1, \t0
9462306a36Sopenharmony_ci	eor		\x0, \x0, \t1
9562306a36Sopenharmony_ci	.endm
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	.macro		mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
9862306a36Sopenharmony_ci	eor		\t0, \y0, \y1
9962306a36Sopenharmony_ci	eor 		\t1, \y2, \y3
10062306a36Sopenharmony_ci	and		\t0, \t0, \x0
10162306a36Sopenharmony_ci	and		\t1, \t1, \x2
10262306a36Sopenharmony_ci	eor		\x0, \x0, \x1
10362306a36Sopenharmony_ci	eor		\x2, \x2, \x3
10462306a36Sopenharmony_ci	and		\x1, \x1, \y0
10562306a36Sopenharmony_ci	and		\x3, \x3, \y2
10662306a36Sopenharmony_ci	and		\x0, \x0, \y1
10762306a36Sopenharmony_ci	and		\x2, \x2, \y3
10862306a36Sopenharmony_ci	eor		\x1, \x1, \x0
10962306a36Sopenharmony_ci	eor		\x2, \x2, \x3
11062306a36Sopenharmony_ci	eor		\x0, \x0, \t0
11162306a36Sopenharmony_ci	eor		\x3, \x3, \t1
11262306a36Sopenharmony_ci	.endm
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	.macro		mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
11562306a36Sopenharmony_ci				    y0, y1, y2, y3, t0, t1, t2, t3
11662306a36Sopenharmony_ci	eor		\t0, \x0, \x2
11762306a36Sopenharmony_ci	eor		\t1, \x1, \x3
11862306a36Sopenharmony_ci	mul_gf4  	\x0, \x1, \y0, \y1, \t2, \t3
11962306a36Sopenharmony_ci	eor		\y0, \y0, \y2
12062306a36Sopenharmony_ci	eor		\y1, \y1, \y3
12162306a36Sopenharmony_ci	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
12262306a36Sopenharmony_ci	eor		\x0, \x0, \t0
12362306a36Sopenharmony_ci	eor		\x2, \x2, \t0
12462306a36Sopenharmony_ci	eor		\x1, \x1, \t1
12562306a36Sopenharmony_ci	eor		\x3, \x3, \t1
12662306a36Sopenharmony_ci	eor		\t0, \x4, \x6
12762306a36Sopenharmony_ci	eor		\t1, \x5, \x7
12862306a36Sopenharmony_ci	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
12962306a36Sopenharmony_ci	eor		\y0, \y0, \y2
13062306a36Sopenharmony_ci	eor		\y1, \y1, \y3
13162306a36Sopenharmony_ci	mul_gf4  	\x4, \x5, \y0, \y1, \t2, \t3
13262306a36Sopenharmony_ci	eor		\x4, \x4, \t0
13362306a36Sopenharmony_ci	eor		\x6, \x6, \t0
13462306a36Sopenharmony_ci	eor		\x5, \x5, \t1
13562306a36Sopenharmony_ci	eor		\x7, \x7, \t1
13662306a36Sopenharmony_ci	.endm
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	.macro		inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
13962306a36Sopenharmony_ci				   t0, t1, t2, t3, s0, s1, s2, s3
14062306a36Sopenharmony_ci	eor		\t3, \x4, \x6
14162306a36Sopenharmony_ci	eor		\t0, \x5, \x7
14262306a36Sopenharmony_ci	eor		\t1, \x1, \x3
14362306a36Sopenharmony_ci	eor		\s1, \x7, \x6
14462306a36Sopenharmony_ci	eor		\s0, \x0, \x2
14562306a36Sopenharmony_ci	eor		\s3, \t3, \t0
14662306a36Sopenharmony_ci	orr		\t2, \t0, \t1
14762306a36Sopenharmony_ci	and		\s2, \t3, \s0
14862306a36Sopenharmony_ci	orr		\t3, \t3, \s0
14962306a36Sopenharmony_ci	eor		\s0, \s0, \t1
15062306a36Sopenharmony_ci	and		\t0, \t0, \t1
15162306a36Sopenharmony_ci	eor		\t1, \x3, \x2
15262306a36Sopenharmony_ci	and		\s3, \s3, \s0
15362306a36Sopenharmony_ci	and		\s1, \s1, \t1
15462306a36Sopenharmony_ci	eor		\t1, \x4, \x5
15562306a36Sopenharmony_ci	eor		\s0, \x1, \x0
15662306a36Sopenharmony_ci	eor		\t3, \t3, \s1
15762306a36Sopenharmony_ci	eor		\t2, \t2, \s1
15862306a36Sopenharmony_ci	and		\s1, \t1, \s0
15962306a36Sopenharmony_ci	orr		\t1, \t1, \s0
16062306a36Sopenharmony_ci	eor		\t3, \t3, \s3
16162306a36Sopenharmony_ci	eor		\t0, \t0, \s1
16262306a36Sopenharmony_ci	eor		\t2, \t2, \s2
16362306a36Sopenharmony_ci	eor		\t1, \t1, \s3
16462306a36Sopenharmony_ci	eor		\t0, \t0, \s2
16562306a36Sopenharmony_ci	and		\s0, \x7, \x3
16662306a36Sopenharmony_ci	eor		\t1, \t1, \s2
16762306a36Sopenharmony_ci	and		\s1, \x6, \x2
16862306a36Sopenharmony_ci	and		\s2, \x5, \x1
16962306a36Sopenharmony_ci	orr		\s3, \x4, \x0
17062306a36Sopenharmony_ci	eor		\t3, \t3, \s0
17162306a36Sopenharmony_ci	eor		\t1, \t1, \s2
17262306a36Sopenharmony_ci	eor		\s0, \t0, \s3
17362306a36Sopenharmony_ci	eor		\t2, \t2, \s1
17462306a36Sopenharmony_ci	and		\s2, \t3, \t1
17562306a36Sopenharmony_ci	eor		\s1, \t2, \s2
17662306a36Sopenharmony_ci	eor		\s3, \s0, \s2
17762306a36Sopenharmony_ci	bsl		\s1, \t1, \s0
17862306a36Sopenharmony_ci	not		\t0, \s0
17962306a36Sopenharmony_ci	bsl		\s0, \s1, \s3
18062306a36Sopenharmony_ci	bsl		\t0, \s1, \s3
18162306a36Sopenharmony_ci	bsl		\s3, \t3, \t2
18262306a36Sopenharmony_ci	eor		\t3, \t3, \t2
18362306a36Sopenharmony_ci	and		\s2, \s0, \s3
18462306a36Sopenharmony_ci	eor		\t1, \t1, \t0
18562306a36Sopenharmony_ci	eor		\s2, \s2, \t3
18662306a36Sopenharmony_ci	mul_gf16_2	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
18762306a36Sopenharmony_ci			\s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
18862306a36Sopenharmony_ci	.endm
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci	.macro		sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
19162306a36Sopenharmony_ci			      t0, t1, t2, t3, s0, s1, s2, s3
19262306a36Sopenharmony_ci	in_bs_ch	\b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
19362306a36Sopenharmony_ci			\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
19462306a36Sopenharmony_ci	inv_gf256	\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
19562306a36Sopenharmony_ci			\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
19662306a36Sopenharmony_ci			\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
19762306a36Sopenharmony_ci			\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
19862306a36Sopenharmony_ci	out_bs_ch	\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
19962306a36Sopenharmony_ci			\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
20062306a36Sopenharmony_ci	.endm
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	.macro		inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
20362306a36Sopenharmony_ci				  t0, t1, t2, t3, s0, s1, s2, s3
20462306a36Sopenharmony_ci	inv_in_bs_ch	\b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
20562306a36Sopenharmony_ci			\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
20662306a36Sopenharmony_ci	inv_gf256	\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
20762306a36Sopenharmony_ci			\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
20862306a36Sopenharmony_ci			\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
20962306a36Sopenharmony_ci			\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
21062306a36Sopenharmony_ci	inv_out_bs_ch	\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
21162306a36Sopenharmony_ci			\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
21262306a36Sopenharmony_ci	.endm
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	.macro		enc_next_rk
21562306a36Sopenharmony_ci	ldp		q16, q17, [bskey], #128
21662306a36Sopenharmony_ci	ldp		q18, q19, [bskey, #-96]
21762306a36Sopenharmony_ci	ldp		q20, q21, [bskey, #-64]
21862306a36Sopenharmony_ci	ldp		q22, q23, [bskey, #-32]
21962306a36Sopenharmony_ci	.endm
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	.macro		dec_next_rk
22262306a36Sopenharmony_ci	ldp		q16, q17, [bskey, #-128]!
22362306a36Sopenharmony_ci	ldp		q18, q19, [bskey, #32]
22462306a36Sopenharmony_ci	ldp		q20, q21, [bskey, #64]
22562306a36Sopenharmony_ci	ldp		q22, q23, [bskey, #96]
22662306a36Sopenharmony_ci	.endm
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	.macro		add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
22962306a36Sopenharmony_ci	eor		\x0\().16b, \x0\().16b, v16.16b
23062306a36Sopenharmony_ci	eor		\x1\().16b, \x1\().16b, v17.16b
23162306a36Sopenharmony_ci	eor		\x2\().16b, \x2\().16b, v18.16b
23262306a36Sopenharmony_ci	eor		\x3\().16b, \x3\().16b, v19.16b
23362306a36Sopenharmony_ci	eor		\x4\().16b, \x4\().16b, v20.16b
23462306a36Sopenharmony_ci	eor		\x5\().16b, \x5\().16b, v21.16b
23562306a36Sopenharmony_ci	eor		\x6\().16b, \x6\().16b, v22.16b
23662306a36Sopenharmony_ci	eor		\x7\().16b, \x7\().16b, v23.16b
23762306a36Sopenharmony_ci	.endm
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	.macro		shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
24062306a36Sopenharmony_ci	tbl		\x0\().16b, {\x0\().16b}, \mask\().16b
24162306a36Sopenharmony_ci	tbl		\x1\().16b, {\x1\().16b}, \mask\().16b
24262306a36Sopenharmony_ci	tbl		\x2\().16b, {\x2\().16b}, \mask\().16b
24362306a36Sopenharmony_ci	tbl		\x3\().16b, {\x3\().16b}, \mask\().16b
24462306a36Sopenharmony_ci	tbl		\x4\().16b, {\x4\().16b}, \mask\().16b
24562306a36Sopenharmony_ci	tbl		\x5\().16b, {\x5\().16b}, \mask\().16b
24662306a36Sopenharmony_ci	tbl		\x6\().16b, {\x6\().16b}, \mask\().16b
24762306a36Sopenharmony_ci	tbl		\x7\().16b, {\x7\().16b}, \mask\().16b
24862306a36Sopenharmony_ci	.endm
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	.macro		mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
25162306a36Sopenharmony_ci				  t0, t1, t2, t3, t4, t5, t6, t7, inv
25262306a36Sopenharmony_ci	ext		\t0\().16b, \x0\().16b, \x0\().16b, #12
25362306a36Sopenharmony_ci	ext		\t1\().16b, \x1\().16b, \x1\().16b, #12
25462306a36Sopenharmony_ci	eor		\x0\().16b, \x0\().16b, \t0\().16b
25562306a36Sopenharmony_ci	ext		\t2\().16b, \x2\().16b, \x2\().16b, #12
25662306a36Sopenharmony_ci	eor		\x1\().16b, \x1\().16b, \t1\().16b
25762306a36Sopenharmony_ci	ext		\t3\().16b, \x3\().16b, \x3\().16b, #12
25862306a36Sopenharmony_ci	eor		\x2\().16b, \x2\().16b, \t2\().16b
25962306a36Sopenharmony_ci	ext		\t4\().16b, \x4\().16b, \x4\().16b, #12
26062306a36Sopenharmony_ci	eor		\x3\().16b, \x3\().16b, \t3\().16b
26162306a36Sopenharmony_ci	ext		\t5\().16b, \x5\().16b, \x5\().16b, #12
26262306a36Sopenharmony_ci	eor		\x4\().16b, \x4\().16b, \t4\().16b
26362306a36Sopenharmony_ci	ext		\t6\().16b, \x6\().16b, \x6\().16b, #12
26462306a36Sopenharmony_ci	eor		\x5\().16b, \x5\().16b, \t5\().16b
26562306a36Sopenharmony_ci	ext		\t7\().16b, \x7\().16b, \x7\().16b, #12
26662306a36Sopenharmony_ci	eor		\x6\().16b, \x6\().16b, \t6\().16b
26762306a36Sopenharmony_ci	eor		\t1\().16b, \t1\().16b, \x0\().16b
26862306a36Sopenharmony_ci	eor		\x7\().16b, \x7\().16b, \t7\().16b
26962306a36Sopenharmony_ci	ext		\x0\().16b, \x0\().16b, \x0\().16b, #8
27062306a36Sopenharmony_ci	eor		\t2\().16b, \t2\().16b, \x1\().16b
27162306a36Sopenharmony_ci	eor		\t0\().16b, \t0\().16b, \x7\().16b
27262306a36Sopenharmony_ci	eor		\t1\().16b, \t1\().16b, \x7\().16b
27362306a36Sopenharmony_ci	ext		\x1\().16b, \x1\().16b, \x1\().16b, #8
27462306a36Sopenharmony_ci	eor		\t5\().16b, \t5\().16b, \x4\().16b
27562306a36Sopenharmony_ci	eor		\x0\().16b, \x0\().16b, \t0\().16b
27662306a36Sopenharmony_ci	eor		\t6\().16b, \t6\().16b, \x5\().16b
27762306a36Sopenharmony_ci	eor		\x1\().16b, \x1\().16b, \t1\().16b
27862306a36Sopenharmony_ci	ext		\t0\().16b, \x4\().16b, \x4\().16b, #8
27962306a36Sopenharmony_ci	eor		\t4\().16b, \t4\().16b, \x3\().16b
28062306a36Sopenharmony_ci	ext		\t1\().16b, \x5\().16b, \x5\().16b, #8
28162306a36Sopenharmony_ci	eor		\t7\().16b, \t7\().16b, \x6\().16b
28262306a36Sopenharmony_ci	ext		\x4\().16b, \x3\().16b, \x3\().16b, #8
28362306a36Sopenharmony_ci	eor		\t3\().16b, \t3\().16b, \x2\().16b
28462306a36Sopenharmony_ci	ext		\x5\().16b, \x7\().16b, \x7\().16b, #8
28562306a36Sopenharmony_ci	eor		\t4\().16b, \t4\().16b, \x7\().16b
28662306a36Sopenharmony_ci	ext		\x3\().16b, \x6\().16b, \x6\().16b, #8
28762306a36Sopenharmony_ci	eor		\t3\().16b, \t3\().16b, \x7\().16b
28862306a36Sopenharmony_ci	ext		\x6\().16b, \x2\().16b, \x2\().16b, #8
28962306a36Sopenharmony_ci	eor		\x7\().16b, \t1\().16b, \t5\().16b
29062306a36Sopenharmony_ci	.ifb		\inv
29162306a36Sopenharmony_ci	eor		\x2\().16b, \t0\().16b, \t4\().16b
29262306a36Sopenharmony_ci	eor		\x4\().16b, \x4\().16b, \t3\().16b
29362306a36Sopenharmony_ci	eor		\x5\().16b, \x5\().16b, \t7\().16b
29462306a36Sopenharmony_ci	eor		\x3\().16b, \x3\().16b, \t6\().16b
29562306a36Sopenharmony_ci	eor		\x6\().16b, \x6\().16b, \t2\().16b
29662306a36Sopenharmony_ci	.else
29762306a36Sopenharmony_ci	eor		\t3\().16b, \t3\().16b, \x4\().16b
29862306a36Sopenharmony_ci	eor		\x5\().16b, \x5\().16b, \t7\().16b
29962306a36Sopenharmony_ci	eor		\x2\().16b, \x3\().16b, \t6\().16b
30062306a36Sopenharmony_ci	eor		\x3\().16b, \t0\().16b, \t4\().16b
30162306a36Sopenharmony_ci	eor		\x4\().16b, \x6\().16b, \t2\().16b
30262306a36Sopenharmony_ci	mov		\x6\().16b, \t3\().16b
30362306a36Sopenharmony_ci	.endif
30462306a36Sopenharmony_ci	.endm
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci	.macro		inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
30762306a36Sopenharmony_ci				      t0, t1, t2, t3, t4, t5, t6, t7
30862306a36Sopenharmony_ci	ext		\t0\().16b, \x0\().16b, \x0\().16b, #8
30962306a36Sopenharmony_ci	ext		\t6\().16b, \x6\().16b, \x6\().16b, #8
31062306a36Sopenharmony_ci	ext		\t7\().16b, \x7\().16b, \x7\().16b, #8
31162306a36Sopenharmony_ci	eor		\t0\().16b, \t0\().16b, \x0\().16b
31262306a36Sopenharmony_ci	ext		\t1\().16b, \x1\().16b, \x1\().16b, #8
31362306a36Sopenharmony_ci	eor		\t6\().16b, \t6\().16b, \x6\().16b
31462306a36Sopenharmony_ci	ext		\t2\().16b, \x2\().16b, \x2\().16b, #8
31562306a36Sopenharmony_ci	eor		\t7\().16b, \t7\().16b, \x7\().16b
31662306a36Sopenharmony_ci	ext		\t3\().16b, \x3\().16b, \x3\().16b, #8
31762306a36Sopenharmony_ci	eor		\t1\().16b, \t1\().16b, \x1\().16b
31862306a36Sopenharmony_ci	ext		\t4\().16b, \x4\().16b, \x4\().16b, #8
31962306a36Sopenharmony_ci	eor		\t2\().16b, \t2\().16b, \x2\().16b
32062306a36Sopenharmony_ci	ext		\t5\().16b, \x5\().16b, \x5\().16b, #8
32162306a36Sopenharmony_ci	eor		\t3\().16b, \t3\().16b, \x3\().16b
32262306a36Sopenharmony_ci	eor		\t4\().16b, \t4\().16b, \x4\().16b
32362306a36Sopenharmony_ci	eor		\t5\().16b, \t5\().16b, \x5\().16b
32462306a36Sopenharmony_ci	eor		\x0\().16b, \x0\().16b, \t6\().16b
32562306a36Sopenharmony_ci	eor		\x1\().16b, \x1\().16b, \t6\().16b
32662306a36Sopenharmony_ci	eor		\x2\().16b, \x2\().16b, \t0\().16b
32762306a36Sopenharmony_ci	eor		\x4\().16b, \x4\().16b, \t2\().16b
32862306a36Sopenharmony_ci	eor		\x3\().16b, \x3\().16b, \t1\().16b
32962306a36Sopenharmony_ci	eor		\x1\().16b, \x1\().16b, \t7\().16b
33062306a36Sopenharmony_ci	eor		\x2\().16b, \x2\().16b, \t7\().16b
33162306a36Sopenharmony_ci	eor		\x4\().16b, \x4\().16b, \t6\().16b
33262306a36Sopenharmony_ci	eor		\x5\().16b, \x5\().16b, \t3\().16b
33362306a36Sopenharmony_ci	eor		\x3\().16b, \x3\().16b, \t6\().16b
33462306a36Sopenharmony_ci	eor		\x6\().16b, \x6\().16b, \t4\().16b
33562306a36Sopenharmony_ci	eor		\x4\().16b, \x4\().16b, \t7\().16b
33662306a36Sopenharmony_ci	eor		\x5\().16b, \x5\().16b, \t7\().16b
33762306a36Sopenharmony_ci	eor		\x7\().16b, \x7\().16b, \t5\().16b
33862306a36Sopenharmony_ci	mix_cols	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
33962306a36Sopenharmony_ci			\t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
34062306a36Sopenharmony_ci	.endm
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ci	.macro		swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
34362306a36Sopenharmony_ci	ushr		\t0\().2d, \b0\().2d, #\n
34462306a36Sopenharmony_ci	ushr		\t1\().2d, \b1\().2d, #\n
34562306a36Sopenharmony_ci	eor		\t0\().16b, \t0\().16b, \a0\().16b
34662306a36Sopenharmony_ci	eor		\t1\().16b, \t1\().16b, \a1\().16b
34762306a36Sopenharmony_ci	and		\t0\().16b, \t0\().16b, \mask\().16b
34862306a36Sopenharmony_ci	and		\t1\().16b, \t1\().16b, \mask\().16b
34962306a36Sopenharmony_ci	eor		\a0\().16b, \a0\().16b, \t0\().16b
35062306a36Sopenharmony_ci	shl		\t0\().2d, \t0\().2d, #\n
35162306a36Sopenharmony_ci	eor		\a1\().16b, \a1\().16b, \t1\().16b
35262306a36Sopenharmony_ci	shl		\t1\().2d, \t1\().2d, #\n
35362306a36Sopenharmony_ci	eor		\b0\().16b, \b0\().16b, \t0\().16b
35462306a36Sopenharmony_ci	eor		\b1\().16b, \b1\().16b, \t1\().16b
35562306a36Sopenharmony_ci	.endm
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ci	.macro		bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
35862306a36Sopenharmony_ci	movi		\t0\().16b, #0x55
35962306a36Sopenharmony_ci	movi		\t1\().16b, #0x33
36062306a36Sopenharmony_ci	swapmove_2x	\x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
36162306a36Sopenharmony_ci	swapmove_2x	\x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
36262306a36Sopenharmony_ci	movi		\t0\().16b, #0x0f
36362306a36Sopenharmony_ci	swapmove_2x	\x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
36462306a36Sopenharmony_ci	swapmove_2x	\x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
36562306a36Sopenharmony_ci	swapmove_2x	\x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
36662306a36Sopenharmony_ci	swapmove_2x	\x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
36762306a36Sopenharmony_ci	.endm
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	.align		6
37162306a36Sopenharmony_ciM0:	.octa		0x0004080c0105090d02060a0e03070b0f
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ciM0SR:	.octa		0x0004080c05090d010a0e02060f03070b
37462306a36Sopenharmony_ciSR:	.octa		0x0f0e0d0c0a09080b0504070600030201
37562306a36Sopenharmony_ciSRM0:	.octa		0x01060b0c0207080d0304090e00050a0f
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ciM0ISR:	.octa		0x0004080c0d0105090a0e0206070b0f03
37862306a36Sopenharmony_ciISR:	.octa		0x0f0e0d0c080b0a090504070602010003
37962306a36Sopenharmony_ciISRM0:	.octa		0x0306090c00070a0d01040b0e0205080f
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_ci	/*
38262306a36Sopenharmony_ci	 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
38362306a36Sopenharmony_ci	 */
38462306a36Sopenharmony_ciSYM_FUNC_START(aesbs_convert_key)
38562306a36Sopenharmony_ci	ld1		{v7.4s}, [x1], #16		// load round 0 key
38662306a36Sopenharmony_ci	ld1		{v17.4s}, [x1], #16		// load round 1 key
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci	movi		v8.16b,  #0x01			// bit masks
38962306a36Sopenharmony_ci	movi		v9.16b,  #0x02
39062306a36Sopenharmony_ci	movi		v10.16b, #0x04
39162306a36Sopenharmony_ci	movi		v11.16b, #0x08
39262306a36Sopenharmony_ci	movi		v12.16b, #0x10
39362306a36Sopenharmony_ci	movi		v13.16b, #0x20
39462306a36Sopenharmony_ci	movi		v14.16b, #0x40
39562306a36Sopenharmony_ci	movi		v15.16b, #0x80
39662306a36Sopenharmony_ci	ldr		q16, M0
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci	sub		x2, x2, #1
39962306a36Sopenharmony_ci	str		q7, [x0], #16		// save round 0 key
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci.Lkey_loop:
40262306a36Sopenharmony_ci	tbl		v7.16b ,{v17.16b}, v16.16b
40362306a36Sopenharmony_ci	ld1		{v17.4s}, [x1], #16		// load next round key
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_ci	cmtst		v0.16b, v7.16b, v8.16b
40662306a36Sopenharmony_ci	cmtst		v1.16b, v7.16b, v9.16b
40762306a36Sopenharmony_ci	cmtst		v2.16b, v7.16b, v10.16b
40862306a36Sopenharmony_ci	cmtst		v3.16b, v7.16b, v11.16b
40962306a36Sopenharmony_ci	cmtst		v4.16b, v7.16b, v12.16b
41062306a36Sopenharmony_ci	cmtst		v5.16b, v7.16b, v13.16b
41162306a36Sopenharmony_ci	cmtst		v6.16b, v7.16b, v14.16b
41262306a36Sopenharmony_ci	cmtst		v7.16b, v7.16b, v15.16b
41362306a36Sopenharmony_ci	not		v0.16b, v0.16b
41462306a36Sopenharmony_ci	not		v1.16b, v1.16b
41562306a36Sopenharmony_ci	not		v5.16b, v5.16b
41662306a36Sopenharmony_ci	not		v6.16b, v6.16b
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci	subs		x2, x2, #1
41962306a36Sopenharmony_ci	stp		q0, q1, [x0], #128
42062306a36Sopenharmony_ci	stp		q2, q3, [x0, #-96]
42162306a36Sopenharmony_ci	stp		q4, q5, [x0, #-64]
42262306a36Sopenharmony_ci	stp		q6, q7, [x0, #-32]
42362306a36Sopenharmony_ci	b.ne		.Lkey_loop
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci	movi		v7.16b, #0x63			// compose .L63
42662306a36Sopenharmony_ci	eor		v17.16b, v17.16b, v7.16b
42762306a36Sopenharmony_ci	str		q17, [x0]
42862306a36Sopenharmony_ci	ret
42962306a36Sopenharmony_ciSYM_FUNC_END(aesbs_convert_key)
43062306a36Sopenharmony_ci
43162306a36Sopenharmony_ci	.align		4
43262306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(aesbs_encrypt8)
43362306a36Sopenharmony_ci	ldr		q9, [bskey], #16		// round 0 key
43462306a36Sopenharmony_ci	ldr		q8, M0SR
43562306a36Sopenharmony_ci	ldr		q24, SR
43662306a36Sopenharmony_ci
43762306a36Sopenharmony_ci	eor		v10.16b, v0.16b, v9.16b		// xor with round0 key
43862306a36Sopenharmony_ci	eor		v11.16b, v1.16b, v9.16b
43962306a36Sopenharmony_ci	tbl		v0.16b, {v10.16b}, v8.16b
44062306a36Sopenharmony_ci	eor		v12.16b, v2.16b, v9.16b
44162306a36Sopenharmony_ci	tbl		v1.16b, {v11.16b}, v8.16b
44262306a36Sopenharmony_ci	eor		v13.16b, v3.16b, v9.16b
44362306a36Sopenharmony_ci	tbl		v2.16b, {v12.16b}, v8.16b
44462306a36Sopenharmony_ci	eor		v14.16b, v4.16b, v9.16b
44562306a36Sopenharmony_ci	tbl		v3.16b, {v13.16b}, v8.16b
44662306a36Sopenharmony_ci	eor		v15.16b, v5.16b, v9.16b
44762306a36Sopenharmony_ci	tbl		v4.16b, {v14.16b}, v8.16b
44862306a36Sopenharmony_ci	eor		v10.16b, v6.16b, v9.16b
44962306a36Sopenharmony_ci	tbl		v5.16b, {v15.16b}, v8.16b
45062306a36Sopenharmony_ci	eor		v11.16b, v7.16b, v9.16b
45162306a36Sopenharmony_ci	tbl		v6.16b, {v10.16b}, v8.16b
45262306a36Sopenharmony_ci	tbl		v7.16b, {v11.16b}, v8.16b
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_ci	bitslice	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci	sub		rounds, rounds, #1
45762306a36Sopenharmony_ci	b		.Lenc_sbox
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_ci.Lenc_loop:
46062306a36Sopenharmony_ci	shift_rows	v0, v1, v2, v3, v4, v5, v6, v7, v24
46162306a36Sopenharmony_ci.Lenc_sbox:
46262306a36Sopenharmony_ci	sbox		v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
46362306a36Sopenharmony_ci								v13, v14, v15
46462306a36Sopenharmony_ci	subs		rounds, rounds, #1
46562306a36Sopenharmony_ci	b.cc		.Lenc_done
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci	enc_next_rk
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_ci	mix_cols	v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
47062306a36Sopenharmony_ci								v13, v14, v15
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci	add_round_key	v0, v1, v2, v3, v4, v5, v6, v7
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ci	b.ne		.Lenc_loop
47562306a36Sopenharmony_ci	ldr		q24, SRM0
47662306a36Sopenharmony_ci	b		.Lenc_loop
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci.Lenc_done:
47962306a36Sopenharmony_ci	ldr		q12, [bskey]			// last round key
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	bitslice	v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v12.16b
48462306a36Sopenharmony_ci	eor		v1.16b, v1.16b, v12.16b
48562306a36Sopenharmony_ci	eor		v4.16b, v4.16b, v12.16b
48662306a36Sopenharmony_ci	eor		v6.16b, v6.16b, v12.16b
48762306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v12.16b
48862306a36Sopenharmony_ci	eor		v7.16b, v7.16b, v12.16b
48962306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v12.16b
49062306a36Sopenharmony_ci	eor		v5.16b, v5.16b, v12.16b
49162306a36Sopenharmony_ci	ret
49262306a36Sopenharmony_ciSYM_FUNC_END(aesbs_encrypt8)
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci	.align		4
49562306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(aesbs_decrypt8)
49662306a36Sopenharmony_ci	lsl		x9, rounds, #7
49762306a36Sopenharmony_ci	add		bskey, bskey, x9
49862306a36Sopenharmony_ci
49962306a36Sopenharmony_ci	ldr		q9, [bskey, #-112]!		// round 0 key
50062306a36Sopenharmony_ci	ldr		q8, M0ISR
50162306a36Sopenharmony_ci	ldr		q24, ISR
50262306a36Sopenharmony_ci
50362306a36Sopenharmony_ci	eor		v10.16b, v0.16b, v9.16b		// xor with round0 key
50462306a36Sopenharmony_ci	eor		v11.16b, v1.16b, v9.16b
50562306a36Sopenharmony_ci	tbl		v0.16b, {v10.16b}, v8.16b
50662306a36Sopenharmony_ci	eor		v12.16b, v2.16b, v9.16b
50762306a36Sopenharmony_ci	tbl		v1.16b, {v11.16b}, v8.16b
50862306a36Sopenharmony_ci	eor		v13.16b, v3.16b, v9.16b
50962306a36Sopenharmony_ci	tbl		v2.16b, {v12.16b}, v8.16b
51062306a36Sopenharmony_ci	eor		v14.16b, v4.16b, v9.16b
51162306a36Sopenharmony_ci	tbl		v3.16b, {v13.16b}, v8.16b
51262306a36Sopenharmony_ci	eor		v15.16b, v5.16b, v9.16b
51362306a36Sopenharmony_ci	tbl		v4.16b, {v14.16b}, v8.16b
51462306a36Sopenharmony_ci	eor		v10.16b, v6.16b, v9.16b
51562306a36Sopenharmony_ci	tbl		v5.16b, {v15.16b}, v8.16b
51662306a36Sopenharmony_ci	eor		v11.16b, v7.16b, v9.16b
51762306a36Sopenharmony_ci	tbl		v6.16b, {v10.16b}, v8.16b
51862306a36Sopenharmony_ci	tbl		v7.16b, {v11.16b}, v8.16b
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci	bitslice	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci	sub		rounds, rounds, #1
52362306a36Sopenharmony_ci	b		.Ldec_sbox
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci.Ldec_loop:
52662306a36Sopenharmony_ci	shift_rows	v0, v1, v2, v3, v4, v5, v6, v7, v24
52762306a36Sopenharmony_ci.Ldec_sbox:
52862306a36Sopenharmony_ci	inv_sbox	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
52962306a36Sopenharmony_ci								v13, v14, v15
53062306a36Sopenharmony_ci	subs		rounds, rounds, #1
53162306a36Sopenharmony_ci	b.cc		.Ldec_done
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci	dec_next_rk
53462306a36Sopenharmony_ci
53562306a36Sopenharmony_ci	add_round_key	v0, v1, v6, v4, v2, v7, v3, v5
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci	inv_mix_cols	v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
53862306a36Sopenharmony_ci								v13, v14, v15
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	b.ne		.Ldec_loop
54162306a36Sopenharmony_ci	ldr		q24, ISRM0
54262306a36Sopenharmony_ci	b		.Ldec_loop
54362306a36Sopenharmony_ci.Ldec_done:
54462306a36Sopenharmony_ci	ldr		q12, [bskey, #-16]		// last round key
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_ci	bitslice	v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v12.16b
54962306a36Sopenharmony_ci	eor		v1.16b, v1.16b, v12.16b
55062306a36Sopenharmony_ci	eor		v6.16b, v6.16b, v12.16b
55162306a36Sopenharmony_ci	eor		v4.16b, v4.16b, v12.16b
55262306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v12.16b
55362306a36Sopenharmony_ci	eor		v7.16b, v7.16b, v12.16b
55462306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v12.16b
55562306a36Sopenharmony_ci	eor		v5.16b, v5.16b, v12.16b
55662306a36Sopenharmony_ci	ret
55762306a36Sopenharmony_ciSYM_FUNC_END(aesbs_decrypt8)
55862306a36Sopenharmony_ci
55962306a36Sopenharmony_ci	/*
56062306a36Sopenharmony_ci	 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
56162306a36Sopenharmony_ci	 *		     int blocks)
56262306a36Sopenharmony_ci	 * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
56362306a36Sopenharmony_ci	 *		     int blocks)
56462306a36Sopenharmony_ci	 */
56562306a36Sopenharmony_ci	.macro		__ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
56662306a36Sopenharmony_ci	frame_push	5
56762306a36Sopenharmony_ci
56862306a36Sopenharmony_ci	mov		x19, x0
56962306a36Sopenharmony_ci	mov		x20, x1
57062306a36Sopenharmony_ci	mov		x21, x2
57162306a36Sopenharmony_ci	mov		x22, x3
57262306a36Sopenharmony_ci	mov		x23, x4
57362306a36Sopenharmony_ci
57462306a36Sopenharmony_ci99:	mov		x5, #1
57562306a36Sopenharmony_ci	lsl		x5, x5, x23
57662306a36Sopenharmony_ci	subs		w23, w23, #8
57762306a36Sopenharmony_ci	csel		x23, x23, xzr, pl
57862306a36Sopenharmony_ci	csel		x5, x5, xzr, mi
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_ci	ld1		{v0.16b}, [x20], #16
58162306a36Sopenharmony_ci	tbnz		x5, #1, 0f
58262306a36Sopenharmony_ci	ld1		{v1.16b}, [x20], #16
58362306a36Sopenharmony_ci	tbnz		x5, #2, 0f
58462306a36Sopenharmony_ci	ld1		{v2.16b}, [x20], #16
58562306a36Sopenharmony_ci	tbnz		x5, #3, 0f
58662306a36Sopenharmony_ci	ld1		{v3.16b}, [x20], #16
58762306a36Sopenharmony_ci	tbnz		x5, #4, 0f
58862306a36Sopenharmony_ci	ld1		{v4.16b}, [x20], #16
58962306a36Sopenharmony_ci	tbnz		x5, #5, 0f
59062306a36Sopenharmony_ci	ld1		{v5.16b}, [x20], #16
59162306a36Sopenharmony_ci	tbnz		x5, #6, 0f
59262306a36Sopenharmony_ci	ld1		{v6.16b}, [x20], #16
59362306a36Sopenharmony_ci	tbnz		x5, #7, 0f
59462306a36Sopenharmony_ci	ld1		{v7.16b}, [x20], #16
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci0:	mov		bskey, x21
59762306a36Sopenharmony_ci	mov		rounds, x22
59862306a36Sopenharmony_ci	bl		\do8
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci	st1		{\o0\().16b}, [x19], #16
60162306a36Sopenharmony_ci	tbnz		x5, #1, 1f
60262306a36Sopenharmony_ci	st1		{\o1\().16b}, [x19], #16
60362306a36Sopenharmony_ci	tbnz		x5, #2, 1f
60462306a36Sopenharmony_ci	st1		{\o2\().16b}, [x19], #16
60562306a36Sopenharmony_ci	tbnz		x5, #3, 1f
60662306a36Sopenharmony_ci	st1		{\o3\().16b}, [x19], #16
60762306a36Sopenharmony_ci	tbnz		x5, #4, 1f
60862306a36Sopenharmony_ci	st1		{\o4\().16b}, [x19], #16
60962306a36Sopenharmony_ci	tbnz		x5, #5, 1f
61062306a36Sopenharmony_ci	st1		{\o5\().16b}, [x19], #16
61162306a36Sopenharmony_ci	tbnz		x5, #6, 1f
61262306a36Sopenharmony_ci	st1		{\o6\().16b}, [x19], #16
61362306a36Sopenharmony_ci	tbnz		x5, #7, 1f
61462306a36Sopenharmony_ci	st1		{\o7\().16b}, [x19], #16
61562306a36Sopenharmony_ci
61662306a36Sopenharmony_ci	cbz		x23, 1f
61762306a36Sopenharmony_ci	b		99b
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci1:	frame_pop
62062306a36Sopenharmony_ci	ret
62162306a36Sopenharmony_ci	.endm
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci	.align		4
62462306a36Sopenharmony_ciSYM_TYPED_FUNC_START(aesbs_ecb_encrypt)
62562306a36Sopenharmony_ci	__ecb_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
62662306a36Sopenharmony_ciSYM_FUNC_END(aesbs_ecb_encrypt)
62762306a36Sopenharmony_ci
62862306a36Sopenharmony_ci	.align		4
62962306a36Sopenharmony_ciSYM_TYPED_FUNC_START(aesbs_ecb_decrypt)
63062306a36Sopenharmony_ci	__ecb_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
63162306a36Sopenharmony_ciSYM_FUNC_END(aesbs_ecb_decrypt)
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_ci	/*
63462306a36Sopenharmony_ci	 * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
63562306a36Sopenharmony_ci	 *		     int blocks, u8 iv[])
63662306a36Sopenharmony_ci	 */
63762306a36Sopenharmony_ci	.align		4
63862306a36Sopenharmony_ciSYM_FUNC_START(aesbs_cbc_decrypt)
63962306a36Sopenharmony_ci	frame_push	6
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_ci	mov		x19, x0
64262306a36Sopenharmony_ci	mov		x20, x1
64362306a36Sopenharmony_ci	mov		x21, x2
64462306a36Sopenharmony_ci	mov		x22, x3
64562306a36Sopenharmony_ci	mov		x23, x4
64662306a36Sopenharmony_ci	mov		x24, x5
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci99:	mov		x6, #1
64962306a36Sopenharmony_ci	lsl		x6, x6, x23
65062306a36Sopenharmony_ci	subs		w23, w23, #8
65162306a36Sopenharmony_ci	csel		x23, x23, xzr, pl
65262306a36Sopenharmony_ci	csel		x6, x6, xzr, mi
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_ci	ld1		{v0.16b}, [x20], #16
65562306a36Sopenharmony_ci	mov		v25.16b, v0.16b
65662306a36Sopenharmony_ci	tbnz		x6, #1, 0f
65762306a36Sopenharmony_ci	ld1		{v1.16b}, [x20], #16
65862306a36Sopenharmony_ci	mov		v26.16b, v1.16b
65962306a36Sopenharmony_ci	tbnz		x6, #2, 0f
66062306a36Sopenharmony_ci	ld1		{v2.16b}, [x20], #16
66162306a36Sopenharmony_ci	mov		v27.16b, v2.16b
66262306a36Sopenharmony_ci	tbnz		x6, #3, 0f
66362306a36Sopenharmony_ci	ld1		{v3.16b}, [x20], #16
66462306a36Sopenharmony_ci	mov		v28.16b, v3.16b
66562306a36Sopenharmony_ci	tbnz		x6, #4, 0f
66662306a36Sopenharmony_ci	ld1		{v4.16b}, [x20], #16
66762306a36Sopenharmony_ci	mov		v29.16b, v4.16b
66862306a36Sopenharmony_ci	tbnz		x6, #5, 0f
66962306a36Sopenharmony_ci	ld1		{v5.16b}, [x20], #16
67062306a36Sopenharmony_ci	mov		v30.16b, v5.16b
67162306a36Sopenharmony_ci	tbnz		x6, #6, 0f
67262306a36Sopenharmony_ci	ld1		{v6.16b}, [x20], #16
67362306a36Sopenharmony_ci	mov		v31.16b, v6.16b
67462306a36Sopenharmony_ci	tbnz		x6, #7, 0f
67562306a36Sopenharmony_ci	ld1		{v7.16b}, [x20]
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_ci0:	mov		bskey, x21
67862306a36Sopenharmony_ci	mov		rounds, x22
67962306a36Sopenharmony_ci	bl		aesbs_decrypt8
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci	ld1		{v24.16b}, [x24]		// load IV
68262306a36Sopenharmony_ci
68362306a36Sopenharmony_ci	eor		v1.16b, v1.16b, v25.16b
68462306a36Sopenharmony_ci	eor		v6.16b, v6.16b, v26.16b
68562306a36Sopenharmony_ci	eor		v4.16b, v4.16b, v27.16b
68662306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v28.16b
68762306a36Sopenharmony_ci	eor		v7.16b, v7.16b, v29.16b
68862306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v24.16b
68962306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v30.16b
69062306a36Sopenharmony_ci	eor		v5.16b, v5.16b, v31.16b
69162306a36Sopenharmony_ci
69262306a36Sopenharmony_ci	st1		{v0.16b}, [x19], #16
69362306a36Sopenharmony_ci	mov		v24.16b, v25.16b
69462306a36Sopenharmony_ci	tbnz		x6, #1, 1f
69562306a36Sopenharmony_ci	st1		{v1.16b}, [x19], #16
69662306a36Sopenharmony_ci	mov		v24.16b, v26.16b
69762306a36Sopenharmony_ci	tbnz		x6, #2, 1f
69862306a36Sopenharmony_ci	st1		{v6.16b}, [x19], #16
69962306a36Sopenharmony_ci	mov		v24.16b, v27.16b
70062306a36Sopenharmony_ci	tbnz		x6, #3, 1f
70162306a36Sopenharmony_ci	st1		{v4.16b}, [x19], #16
70262306a36Sopenharmony_ci	mov		v24.16b, v28.16b
70362306a36Sopenharmony_ci	tbnz		x6, #4, 1f
70462306a36Sopenharmony_ci	st1		{v2.16b}, [x19], #16
70562306a36Sopenharmony_ci	mov		v24.16b, v29.16b
70662306a36Sopenharmony_ci	tbnz		x6, #5, 1f
70762306a36Sopenharmony_ci	st1		{v7.16b}, [x19], #16
70862306a36Sopenharmony_ci	mov		v24.16b, v30.16b
70962306a36Sopenharmony_ci	tbnz		x6, #6, 1f
71062306a36Sopenharmony_ci	st1		{v3.16b}, [x19], #16
71162306a36Sopenharmony_ci	mov		v24.16b, v31.16b
71262306a36Sopenharmony_ci	tbnz		x6, #7, 1f
71362306a36Sopenharmony_ci	ld1		{v24.16b}, [x20], #16
71462306a36Sopenharmony_ci	st1		{v5.16b}, [x19], #16
71562306a36Sopenharmony_ci1:	st1		{v24.16b}, [x24]		// store IV
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_ci	cbz		x23, 2f
71862306a36Sopenharmony_ci	b		99b
71962306a36Sopenharmony_ci
72062306a36Sopenharmony_ci2:	frame_pop
72162306a36Sopenharmony_ci	ret
72262306a36Sopenharmony_ciSYM_FUNC_END(aesbs_cbc_decrypt)
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci	.macro		next_tweak, out, in, const, tmp
72562306a36Sopenharmony_ci	sshr		\tmp\().2d,  \in\().2d,   #63
72662306a36Sopenharmony_ci	and		\tmp\().16b, \tmp\().16b, \const\().16b
72762306a36Sopenharmony_ci	add		\out\().2d,  \in\().2d,   \in\().2d
72862306a36Sopenharmony_ci	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
72962306a36Sopenharmony_ci	eor		\out\().16b, \out\().16b, \tmp\().16b
73062306a36Sopenharmony_ci	.endm
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci	/*
73362306a36Sopenharmony_ci	 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
73462306a36Sopenharmony_ci	 *		     int blocks, u8 iv[])
73562306a36Sopenharmony_ci	 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
73662306a36Sopenharmony_ci	 *		     int blocks, u8 iv[])
73762306a36Sopenharmony_ci	 */
73862306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(__xts_crypt8)
73962306a36Sopenharmony_ci	movi		v18.2s, #0x1
74062306a36Sopenharmony_ci	movi		v19.2s, #0x87
74162306a36Sopenharmony_ci	uzp1		v18.4s, v18.4s, v19.4s
74262306a36Sopenharmony_ci
74362306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x1], #64
74462306a36Sopenharmony_ci	ld1		{v4.16b-v7.16b}, [x1], #64
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_ci	next_tweak	v26, v25, v18, v19
74762306a36Sopenharmony_ci	next_tweak	v27, v26, v18, v19
74862306a36Sopenharmony_ci	next_tweak	v28, v27, v18, v19
74962306a36Sopenharmony_ci	next_tweak	v29, v28, v18, v19
75062306a36Sopenharmony_ci	next_tweak	v30, v29, v18, v19
75162306a36Sopenharmony_ci	next_tweak	v31, v30, v18, v19
75262306a36Sopenharmony_ci	next_tweak	v16, v31, v18, v19
75362306a36Sopenharmony_ci	next_tweak	v17, v16, v18, v19
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v25.16b
75662306a36Sopenharmony_ci	eor		v1.16b, v1.16b, v26.16b
75762306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v27.16b
75862306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v28.16b
75962306a36Sopenharmony_ci	eor		v4.16b, v4.16b, v29.16b
76062306a36Sopenharmony_ci	eor		v5.16b, v5.16b, v30.16b
76162306a36Sopenharmony_ci	eor		v6.16b, v6.16b, v31.16b
76262306a36Sopenharmony_ci	eor		v7.16b, v7.16b, v16.16b
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_ci	stp		q16, q17, [x6]
76562306a36Sopenharmony_ci
76662306a36Sopenharmony_ci	mov		bskey, x2
76762306a36Sopenharmony_ci	mov		rounds, x3
76862306a36Sopenharmony_ci	br		x16
76962306a36Sopenharmony_ciSYM_FUNC_END(__xts_crypt8)
77062306a36Sopenharmony_ci
77162306a36Sopenharmony_ci	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
77262306a36Sopenharmony_ci	frame_push	0, 32
77362306a36Sopenharmony_ci	add		x6, sp, #.Lframe_local_offset
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci	ld1		{v25.16b}, [x5]
77662306a36Sopenharmony_ci
77762306a36Sopenharmony_ci0:	adr		x16, \do8
77862306a36Sopenharmony_ci	bl		__xts_crypt8
77962306a36Sopenharmony_ci
78062306a36Sopenharmony_ci	eor		v16.16b, \o0\().16b, v25.16b
78162306a36Sopenharmony_ci	eor		v17.16b, \o1\().16b, v26.16b
78262306a36Sopenharmony_ci	eor		v18.16b, \o2\().16b, v27.16b
78362306a36Sopenharmony_ci	eor		v19.16b, \o3\().16b, v28.16b
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_ci	ldp		q24, q25, [x6]
78662306a36Sopenharmony_ci
78762306a36Sopenharmony_ci	eor		v20.16b, \o4\().16b, v29.16b
78862306a36Sopenharmony_ci	eor		v21.16b, \o5\().16b, v30.16b
78962306a36Sopenharmony_ci	eor		v22.16b, \o6\().16b, v31.16b
79062306a36Sopenharmony_ci	eor		v23.16b, \o7\().16b, v24.16b
79162306a36Sopenharmony_ci
79262306a36Sopenharmony_ci	st1		{v16.16b-v19.16b}, [x0], #64
79362306a36Sopenharmony_ci	st1		{v20.16b-v23.16b}, [x0], #64
79462306a36Sopenharmony_ci
79562306a36Sopenharmony_ci	subs		x4, x4, #8
79662306a36Sopenharmony_ci	b.gt		0b
79762306a36Sopenharmony_ci
79862306a36Sopenharmony_ci	st1		{v25.16b}, [x5]
79962306a36Sopenharmony_ci	frame_pop
80062306a36Sopenharmony_ci	ret
80162306a36Sopenharmony_ci	.endm
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ciSYM_TYPED_FUNC_START(aesbs_xts_encrypt)
80462306a36Sopenharmony_ci	__xts_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
80562306a36Sopenharmony_ciSYM_FUNC_END(aesbs_xts_encrypt)
80662306a36Sopenharmony_ci
80762306a36Sopenharmony_ciSYM_TYPED_FUNC_START(aesbs_xts_decrypt)
80862306a36Sopenharmony_ci	__xts_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
80962306a36Sopenharmony_ciSYM_FUNC_END(aesbs_xts_decrypt)
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_ci	.macro		next_ctr, v
81262306a36Sopenharmony_ci	mov		\v\().d[1], x8
81362306a36Sopenharmony_ci	adds		x8, x8, #1
81462306a36Sopenharmony_ci	mov		\v\().d[0], x7
81562306a36Sopenharmony_ci	adc		x7, x7, xzr
81662306a36Sopenharmony_ci	rev64		\v\().16b, \v\().16b
81762306a36Sopenharmony_ci	.endm
81862306a36Sopenharmony_ci
81962306a36Sopenharmony_ci	/*
82062306a36Sopenharmony_ci	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
82162306a36Sopenharmony_ci	 *		     int rounds, int blocks, u8 iv[])
82262306a36Sopenharmony_ci	 */
82362306a36Sopenharmony_ciSYM_FUNC_START(aesbs_ctr_encrypt)
82462306a36Sopenharmony_ci	frame_push	0
82562306a36Sopenharmony_ci	ldp		x7, x8, [x5]
82662306a36Sopenharmony_ci	ld1		{v0.16b}, [x5]
82762306a36Sopenharmony_ciCPU_LE(	rev		x7, x7		)
82862306a36Sopenharmony_ciCPU_LE(	rev		x8, x8		)
82962306a36Sopenharmony_ci	adds		x8, x8, #1
83062306a36Sopenharmony_ci	adc		x7, x7, xzr
83162306a36Sopenharmony_ci
83262306a36Sopenharmony_ci0:	next_ctr	v1
83362306a36Sopenharmony_ci	next_ctr	v2
83462306a36Sopenharmony_ci	next_ctr	v3
83562306a36Sopenharmony_ci	next_ctr	v4
83662306a36Sopenharmony_ci	next_ctr	v5
83762306a36Sopenharmony_ci	next_ctr	v6
83862306a36Sopenharmony_ci	next_ctr	v7
83962306a36Sopenharmony_ci
84062306a36Sopenharmony_ci	mov		bskey, x2
84162306a36Sopenharmony_ci	mov		rounds, x3
84262306a36Sopenharmony_ci	bl		aesbs_encrypt8
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_ci	ld1		{ v8.16b-v11.16b}, [x1], #64
84562306a36Sopenharmony_ci	ld1		{v12.16b-v15.16b}, [x1], #64
84662306a36Sopenharmony_ci
84762306a36Sopenharmony_ci	eor		v8.16b, v0.16b, v8.16b
84862306a36Sopenharmony_ci	eor		v9.16b, v1.16b, v9.16b
84962306a36Sopenharmony_ci	eor		v10.16b, v4.16b, v10.16b
85062306a36Sopenharmony_ci	eor		v11.16b, v6.16b, v11.16b
85162306a36Sopenharmony_ci	eor		v12.16b, v3.16b, v12.16b
85262306a36Sopenharmony_ci	eor		v13.16b, v7.16b, v13.16b
85362306a36Sopenharmony_ci	eor		v14.16b, v2.16b, v14.16b
85462306a36Sopenharmony_ci	eor		v15.16b, v5.16b, v15.16b
85562306a36Sopenharmony_ci
85662306a36Sopenharmony_ci	st1		{ v8.16b-v11.16b}, [x0], #64
85762306a36Sopenharmony_ci	st1		{v12.16b-v15.16b}, [x0], #64
85862306a36Sopenharmony_ci
85962306a36Sopenharmony_ci	next_ctr	v0
86062306a36Sopenharmony_ci	subs		x4, x4, #8
86162306a36Sopenharmony_ci	b.gt		0b
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci	st1		{v0.16b}, [x5]
86462306a36Sopenharmony_ci	frame_pop
86562306a36Sopenharmony_ci	ret
86662306a36Sopenharmony_ciSYM_FUNC_END(aesbs_ctr_encrypt)
867