162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/linkage.h>
962306a36Sopenharmony_ci#include <asm/assembler.h>
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#define AES_FUNC_START(func)		SYM_FUNC_START(neon_ ## func)
1262306a36Sopenharmony_ci#define AES_FUNC_END(func)		SYM_FUNC_END(neon_ ## func)
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci	xtsmask		.req	v7
1562306a36Sopenharmony_ci	cbciv		.req	v7
1662306a36Sopenharmony_ci	vctr		.req	v4
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci	.macro		xts_reload_mask, tmp
1962306a36Sopenharmony_ci	xts_load_mask	\tmp
2062306a36Sopenharmony_ci	.endm
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci	/* special case for the neon-bs driver calling into this one for CTS */
2362306a36Sopenharmony_ci	.macro		xts_cts_skip_tw, reg, lbl
2462306a36Sopenharmony_ci	tbnz		\reg, #1, \lbl
2562306a36Sopenharmony_ci	.endm
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci	/* multiply by polynomial 'x' in GF(2^8) */
2862306a36Sopenharmony_ci	.macro		mul_by_x, out, in, temp, const
2962306a36Sopenharmony_ci	sshr		\temp, \in, #7
3062306a36Sopenharmony_ci	shl		\out, \in, #1
3162306a36Sopenharmony_ci	and		\temp, \temp, \const
3262306a36Sopenharmony_ci	eor		\out, \out, \temp
3362306a36Sopenharmony_ci	.endm
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci	/* multiply by polynomial 'x^2' in GF(2^8) */
3662306a36Sopenharmony_ci	.macro		mul_by_x2, out, in, temp, const
3762306a36Sopenharmony_ci	ushr		\temp, \in, #6
3862306a36Sopenharmony_ci	shl		\out, \in, #2
3962306a36Sopenharmony_ci	pmul		\temp, \temp, \const
4062306a36Sopenharmony_ci	eor		\out, \out, \temp
4162306a36Sopenharmony_ci	.endm
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci	/* preload the entire Sbox */
4462306a36Sopenharmony_ci	.macro		prepare, sbox, shiftrows, temp
4562306a36Sopenharmony_ci	movi		v12.16b, #0x1b
4662306a36Sopenharmony_ci	ldr_l		q13, \shiftrows, \temp
4762306a36Sopenharmony_ci	ldr_l		q14, .Lror32by8, \temp
4862306a36Sopenharmony_ci	adr_l		\temp, \sbox
4962306a36Sopenharmony_ci	ld1		{v16.16b-v19.16b}, [\temp], #64
5062306a36Sopenharmony_ci	ld1		{v20.16b-v23.16b}, [\temp], #64
5162306a36Sopenharmony_ci	ld1		{v24.16b-v27.16b}, [\temp], #64
5262306a36Sopenharmony_ci	ld1		{v28.16b-v31.16b}, [\temp]
5362306a36Sopenharmony_ci	.endm
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci	/* do preload for encryption */
5662306a36Sopenharmony_ci	.macro		enc_prepare, ignore0, ignore1, temp
5762306a36Sopenharmony_ci	prepare		crypto_aes_sbox, .LForward_ShiftRows, \temp
5862306a36Sopenharmony_ci	.endm
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	.macro		enc_switch_key, ignore0, ignore1, temp
6162306a36Sopenharmony_ci	/* do nothing */
6262306a36Sopenharmony_ci	.endm
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci	/* do preload for decryption */
6562306a36Sopenharmony_ci	.macro		dec_prepare, ignore0, ignore1, temp
6662306a36Sopenharmony_ci	prepare		crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
6762306a36Sopenharmony_ci	.endm
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	/* apply SubBytes transformation using the preloaded Sbox */
7062306a36Sopenharmony_ci	.macro		sub_bytes, in
7162306a36Sopenharmony_ci	sub		v9.16b, \in\().16b, v15.16b
7262306a36Sopenharmony_ci	tbl		\in\().16b, {v16.16b-v19.16b}, \in\().16b
7362306a36Sopenharmony_ci	sub		v10.16b, v9.16b, v15.16b
7462306a36Sopenharmony_ci	tbx		\in\().16b, {v20.16b-v23.16b}, v9.16b
7562306a36Sopenharmony_ci	sub		v11.16b, v10.16b, v15.16b
7662306a36Sopenharmony_ci	tbx		\in\().16b, {v24.16b-v27.16b}, v10.16b
7762306a36Sopenharmony_ci	tbx		\in\().16b, {v28.16b-v31.16b}, v11.16b
7862306a36Sopenharmony_ci	.endm
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	/* apply MixColumns transformation */
8162306a36Sopenharmony_ci	.macro		mix_columns, in, enc
8262306a36Sopenharmony_ci	.if		\enc == 0
8362306a36Sopenharmony_ci	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
8462306a36Sopenharmony_ci	mul_by_x2	v8.16b, \in\().16b, v9.16b, v12.16b
8562306a36Sopenharmony_ci	eor		\in\().16b, \in\().16b, v8.16b
8662306a36Sopenharmony_ci	rev32		v8.8h, v8.8h
8762306a36Sopenharmony_ci	eor		\in\().16b, \in\().16b, v8.16b
8862306a36Sopenharmony_ci	.endif
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	mul_by_x	v9.16b, \in\().16b, v8.16b, v12.16b
9162306a36Sopenharmony_ci	rev32		v8.8h, \in\().8h
9262306a36Sopenharmony_ci	eor		v8.16b, v8.16b, v9.16b
9362306a36Sopenharmony_ci	eor		\in\().16b, \in\().16b, v8.16b
9462306a36Sopenharmony_ci	tbl		\in\().16b, {\in\().16b}, v14.16b
9562306a36Sopenharmony_ci	eor		\in\().16b, \in\().16b, v8.16b
9662306a36Sopenharmony_ci	.endm
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	.macro		do_block, enc, in, rounds, rk, rkp, i
9962306a36Sopenharmony_ci	ld1		{v15.4s}, [\rk]
10062306a36Sopenharmony_ci	add		\rkp, \rk, #16
10162306a36Sopenharmony_ci	mov		\i, \rounds
10262306a36Sopenharmony_ci1111:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
10362306a36Sopenharmony_ci	movi		v15.16b, #0x40
10462306a36Sopenharmony_ci	tbl		\in\().16b, {\in\().16b}, v13.16b	/* ShiftRows */
10562306a36Sopenharmony_ci	sub_bytes	\in
10662306a36Sopenharmony_ci	subs		\i, \i, #1
10762306a36Sopenharmony_ci	ld1		{v15.4s}, [\rkp], #16
10862306a36Sopenharmony_ci	beq		2222f
10962306a36Sopenharmony_ci	mix_columns	\in, \enc
11062306a36Sopenharmony_ci	b		1111b
11162306a36Sopenharmony_ci2222:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
11262306a36Sopenharmony_ci	.endm
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	.macro		encrypt_block, in, rounds, rk, rkp, i
11562306a36Sopenharmony_ci	do_block	1, \in, \rounds, \rk, \rkp, \i
11662306a36Sopenharmony_ci	.endm
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci	.macro		decrypt_block, in, rounds, rk, rkp, i
11962306a36Sopenharmony_ci	do_block	0, \in, \rounds, \rk, \rkp, \i
12062306a36Sopenharmony_ci	.endm
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	/*
12362306a36Sopenharmony_ci	 * Interleaved versions: functionally equivalent to the
12462306a36Sopenharmony_ci	 * ones above, but applied to AES states in parallel.
12562306a36Sopenharmony_ci	 */
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	.macro		sub_bytes_4x, in0, in1, in2, in3
12862306a36Sopenharmony_ci	sub		v8.16b, \in0\().16b, v15.16b
12962306a36Sopenharmony_ci	tbl		\in0\().16b, {v16.16b-v19.16b}, \in0\().16b
13062306a36Sopenharmony_ci	sub		v9.16b, \in1\().16b, v15.16b
13162306a36Sopenharmony_ci	tbl		\in1\().16b, {v16.16b-v19.16b}, \in1\().16b
13262306a36Sopenharmony_ci	sub		v10.16b, \in2\().16b, v15.16b
13362306a36Sopenharmony_ci	tbl		\in2\().16b, {v16.16b-v19.16b}, \in2\().16b
13462306a36Sopenharmony_ci	sub		v11.16b, \in3\().16b, v15.16b
13562306a36Sopenharmony_ci	tbl		\in3\().16b, {v16.16b-v19.16b}, \in3\().16b
13662306a36Sopenharmony_ci	tbx		\in0\().16b, {v20.16b-v23.16b}, v8.16b
13762306a36Sopenharmony_ci	tbx		\in1\().16b, {v20.16b-v23.16b}, v9.16b
13862306a36Sopenharmony_ci	sub		v8.16b, v8.16b, v15.16b
13962306a36Sopenharmony_ci	tbx		\in2\().16b, {v20.16b-v23.16b}, v10.16b
14062306a36Sopenharmony_ci	sub		v9.16b, v9.16b, v15.16b
14162306a36Sopenharmony_ci	tbx		\in3\().16b, {v20.16b-v23.16b}, v11.16b
14262306a36Sopenharmony_ci	sub		v10.16b, v10.16b, v15.16b
14362306a36Sopenharmony_ci	tbx		\in0\().16b, {v24.16b-v27.16b}, v8.16b
14462306a36Sopenharmony_ci	sub		v11.16b, v11.16b, v15.16b
14562306a36Sopenharmony_ci	tbx		\in1\().16b, {v24.16b-v27.16b}, v9.16b
14662306a36Sopenharmony_ci	sub		v8.16b, v8.16b, v15.16b
14762306a36Sopenharmony_ci	tbx		\in2\().16b, {v24.16b-v27.16b}, v10.16b
14862306a36Sopenharmony_ci	sub		v9.16b, v9.16b, v15.16b
14962306a36Sopenharmony_ci	tbx		\in3\().16b, {v24.16b-v27.16b}, v11.16b
15062306a36Sopenharmony_ci	sub		v10.16b, v10.16b, v15.16b
15162306a36Sopenharmony_ci	tbx		\in0\().16b, {v28.16b-v31.16b}, v8.16b
15262306a36Sopenharmony_ci	sub		v11.16b, v11.16b, v15.16b
15362306a36Sopenharmony_ci	tbx		\in1\().16b, {v28.16b-v31.16b}, v9.16b
15462306a36Sopenharmony_ci	tbx		\in2\().16b, {v28.16b-v31.16b}, v10.16b
15562306a36Sopenharmony_ci	tbx		\in3\().16b, {v28.16b-v31.16b}, v11.16b
15662306a36Sopenharmony_ci	.endm
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci	.macro		mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
15962306a36Sopenharmony_ci	sshr		\tmp0\().16b, \in0\().16b, #7
16062306a36Sopenharmony_ci	shl		\out0\().16b, \in0\().16b, #1
16162306a36Sopenharmony_ci	sshr		\tmp1\().16b, \in1\().16b, #7
16262306a36Sopenharmony_ci	and		\tmp0\().16b, \tmp0\().16b, \const\().16b
16362306a36Sopenharmony_ci	shl		\out1\().16b, \in1\().16b, #1
16462306a36Sopenharmony_ci	and		\tmp1\().16b, \tmp1\().16b, \const\().16b
16562306a36Sopenharmony_ci	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
16662306a36Sopenharmony_ci	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
16762306a36Sopenharmony_ci	.endm
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci	.macro		mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
17062306a36Sopenharmony_ci	ushr		\tmp0\().16b, \in0\().16b, #6
17162306a36Sopenharmony_ci	shl		\out0\().16b, \in0\().16b, #2
17262306a36Sopenharmony_ci	ushr		\tmp1\().16b, \in1\().16b, #6
17362306a36Sopenharmony_ci	pmul		\tmp0\().16b, \tmp0\().16b, \const\().16b
17462306a36Sopenharmony_ci	shl		\out1\().16b, \in1\().16b, #2
17562306a36Sopenharmony_ci	pmul		\tmp1\().16b, \tmp1\().16b, \const\().16b
17662306a36Sopenharmony_ci	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
17762306a36Sopenharmony_ci	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
17862306a36Sopenharmony_ci	.endm
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	.macro		mix_columns_2x, in0, in1, enc
18162306a36Sopenharmony_ci	.if		\enc == 0
18262306a36Sopenharmony_ci	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
18362306a36Sopenharmony_ci	mul_by_x2_2x	v8, v9, \in0, \in1, v10, v11, v12
18462306a36Sopenharmony_ci	eor		\in0\().16b, \in0\().16b, v8.16b
18562306a36Sopenharmony_ci	rev32		v8.8h, v8.8h
18662306a36Sopenharmony_ci	eor		\in1\().16b, \in1\().16b, v9.16b
18762306a36Sopenharmony_ci	rev32		v9.8h, v9.8h
18862306a36Sopenharmony_ci	eor		\in0\().16b, \in0\().16b, v8.16b
18962306a36Sopenharmony_ci	eor		\in1\().16b, \in1\().16b, v9.16b
19062306a36Sopenharmony_ci	.endif
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v12
19362306a36Sopenharmony_ci	rev32		v10.8h, \in0\().8h
19462306a36Sopenharmony_ci	rev32		v11.8h, \in1\().8h
19562306a36Sopenharmony_ci	eor		v10.16b, v10.16b, v8.16b
19662306a36Sopenharmony_ci	eor		v11.16b, v11.16b, v9.16b
19762306a36Sopenharmony_ci	eor		\in0\().16b, \in0\().16b, v10.16b
19862306a36Sopenharmony_ci	eor		\in1\().16b, \in1\().16b, v11.16b
19962306a36Sopenharmony_ci	tbl		\in0\().16b, {\in0\().16b}, v14.16b
20062306a36Sopenharmony_ci	tbl		\in1\().16b, {\in1\().16b}, v14.16b
20162306a36Sopenharmony_ci	eor		\in0\().16b, \in0\().16b, v10.16b
20262306a36Sopenharmony_ci	eor		\in1\().16b, \in1\().16b, v11.16b
20362306a36Sopenharmony_ci	.endm
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci	.macro		do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
20662306a36Sopenharmony_ci	ld1		{v15.4s}, [\rk]
20762306a36Sopenharmony_ci	add		\rkp, \rk, #16
20862306a36Sopenharmony_ci	mov		\i, \rounds
20962306a36Sopenharmony_ci1111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
21062306a36Sopenharmony_ci	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
21162306a36Sopenharmony_ci	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
21262306a36Sopenharmony_ci	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
21362306a36Sopenharmony_ci	movi		v15.16b, #0x40
21462306a36Sopenharmony_ci	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
21562306a36Sopenharmony_ci	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
21662306a36Sopenharmony_ci	tbl		\in2\().16b, {\in2\().16b}, v13.16b	/* ShiftRows */
21762306a36Sopenharmony_ci	tbl		\in3\().16b, {\in3\().16b}, v13.16b	/* ShiftRows */
21862306a36Sopenharmony_ci	sub_bytes_4x	\in0, \in1, \in2, \in3
21962306a36Sopenharmony_ci	subs		\i, \i, #1
22062306a36Sopenharmony_ci	ld1		{v15.4s}, [\rkp], #16
22162306a36Sopenharmony_ci	beq		2222f
22262306a36Sopenharmony_ci	mix_columns_2x	\in0, \in1, \enc
22362306a36Sopenharmony_ci	mix_columns_2x	\in2, \in3, \enc
22462306a36Sopenharmony_ci	b		1111b
22562306a36Sopenharmony_ci2222:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
22662306a36Sopenharmony_ci	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
22762306a36Sopenharmony_ci	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
22862306a36Sopenharmony_ci	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
22962306a36Sopenharmony_ci	.endm
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci	.macro		encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
23262306a36Sopenharmony_ci	do_block_4x	1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
23362306a36Sopenharmony_ci	.endm
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	.macro		decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
23662306a36Sopenharmony_ci	do_block_4x	0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
23762306a36Sopenharmony_ci	.endm
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci#include "aes-modes.S"
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	.section	".rodata", "a"
24262306a36Sopenharmony_ci	.align		4
24362306a36Sopenharmony_ci.LForward_ShiftRows:
24462306a36Sopenharmony_ci	.octa		0x0b06010c07020d08030e09040f0a0500
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci.LReverse_ShiftRows:
24762306a36Sopenharmony_ci	.octa		0x0306090c0f0205080b0e0104070a0d00
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci.Lror32by8:
25062306a36Sopenharmony_ci	.octa		0x0c0f0e0d080b0a090407060500030201
251