162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/linkage.h>
962306a36Sopenharmony_ci#include <asm/assembler.h>
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci	.text
1262306a36Sopenharmony_ci	.arch	armv8-a+crypto
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci	/*
1562306a36Sopenharmony_ci	 * u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
1662306a36Sopenharmony_ci	 *			    u32 macp, u8 const rk[], u32 rounds);
1762306a36Sopenharmony_ci	 */
1862306a36Sopenharmony_ciSYM_FUNC_START(ce_aes_ccm_auth_data)
1962306a36Sopenharmony_ci	ld1	{v0.16b}, [x0]			/* load mac */
2062306a36Sopenharmony_ci	cbz	w3, 1f
2162306a36Sopenharmony_ci	sub	w3, w3, #16
2262306a36Sopenharmony_ci	eor	v1.16b, v1.16b, v1.16b
2362306a36Sopenharmony_ci0:	ldrb	w7, [x1], #1			/* get 1 byte of input */
2462306a36Sopenharmony_ci	subs	w2, w2, #1
2562306a36Sopenharmony_ci	add	w3, w3, #1
2662306a36Sopenharmony_ci	ins	v1.b[0], w7
2762306a36Sopenharmony_ci	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
2862306a36Sopenharmony_ci	beq	8f				/* out of input? */
2962306a36Sopenharmony_ci	cbnz	w3, 0b
3062306a36Sopenharmony_ci	eor	v0.16b, v0.16b, v1.16b
3162306a36Sopenharmony_ci1:	ld1	{v3.4s}, [x4]			/* load first round key */
3262306a36Sopenharmony_ci	prfm	pldl1strm, [x1]
3362306a36Sopenharmony_ci	cmp	w5, #12				/* which key size? */
3462306a36Sopenharmony_ci	add	x6, x4, #16
3562306a36Sopenharmony_ci	sub	w7, w5, #2			/* modified # of rounds */
3662306a36Sopenharmony_ci	bmi	2f
3762306a36Sopenharmony_ci	bne	5f
3862306a36Sopenharmony_ci	mov	v5.16b, v3.16b
3962306a36Sopenharmony_ci	b	4f
4062306a36Sopenharmony_ci2:	mov	v4.16b, v3.16b
4162306a36Sopenharmony_ci	ld1	{v5.4s}, [x6], #16		/* load 2nd round key */
4262306a36Sopenharmony_ci3:	aese	v0.16b, v4.16b
4362306a36Sopenharmony_ci	aesmc	v0.16b, v0.16b
4462306a36Sopenharmony_ci4:	ld1	{v3.4s}, [x6], #16		/* load next round key */
4562306a36Sopenharmony_ci	aese	v0.16b, v5.16b
4662306a36Sopenharmony_ci	aesmc	v0.16b, v0.16b
4762306a36Sopenharmony_ci5:	ld1	{v4.4s}, [x6], #16		/* load next round key */
4862306a36Sopenharmony_ci	subs	w7, w7, #3
4962306a36Sopenharmony_ci	aese	v0.16b, v3.16b
5062306a36Sopenharmony_ci	aesmc	v0.16b, v0.16b
5162306a36Sopenharmony_ci	ld1	{v5.4s}, [x6], #16		/* load next round key */
5262306a36Sopenharmony_ci	bpl	3b
5362306a36Sopenharmony_ci	aese	v0.16b, v4.16b
5462306a36Sopenharmony_ci	subs	w2, w2, #16			/* last data? */
5562306a36Sopenharmony_ci	eor	v0.16b, v0.16b, v5.16b		/* final round */
5662306a36Sopenharmony_ci	bmi	6f
5762306a36Sopenharmony_ci	ld1	{v1.16b}, [x1], #16		/* load next input block */
5862306a36Sopenharmony_ci	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
5962306a36Sopenharmony_ci	bne	1b
6062306a36Sopenharmony_ci6:	st1	{v0.16b}, [x0]			/* store mac */
6162306a36Sopenharmony_ci	beq	10f
6262306a36Sopenharmony_ci	adds	w2, w2, #16
6362306a36Sopenharmony_ci	beq	10f
6462306a36Sopenharmony_ci	mov	w3, w2
6562306a36Sopenharmony_ci7:	ldrb	w7, [x1], #1
6662306a36Sopenharmony_ci	umov	w6, v0.b[0]
6762306a36Sopenharmony_ci	eor	w6, w6, w7
6862306a36Sopenharmony_ci	strb	w6, [x0], #1
6962306a36Sopenharmony_ci	subs	w2, w2, #1
7062306a36Sopenharmony_ci	beq	10f
7162306a36Sopenharmony_ci	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
7262306a36Sopenharmony_ci	b	7b
7362306a36Sopenharmony_ci8:	cbz	w3, 91f
7462306a36Sopenharmony_ci	mov	w7, w3
7562306a36Sopenharmony_ci	add	w3, w3, #16
7662306a36Sopenharmony_ci9:	ext	v1.16b, v1.16b, v1.16b, #1
7762306a36Sopenharmony_ci	adds	w7, w7, #1
7862306a36Sopenharmony_ci	bne	9b
7962306a36Sopenharmony_ci91:	eor	v0.16b, v0.16b, v1.16b
8062306a36Sopenharmony_ci	st1	{v0.16b}, [x0]
8162306a36Sopenharmony_ci10:	mov	w0, w3
8262306a36Sopenharmony_ci	ret
8362306a36Sopenharmony_ciSYM_FUNC_END(ce_aes_ccm_auth_data)
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	/*
8662306a36Sopenharmony_ci	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
8762306a36Sopenharmony_ci	 * 			 u32 rounds);
8862306a36Sopenharmony_ci	 */
8962306a36Sopenharmony_ciSYM_FUNC_START(ce_aes_ccm_final)
9062306a36Sopenharmony_ci	ld1	{v3.4s}, [x2], #16		/* load first round key */
9162306a36Sopenharmony_ci	ld1	{v0.16b}, [x0]			/* load mac */
9262306a36Sopenharmony_ci	cmp	w3, #12				/* which key size? */
9362306a36Sopenharmony_ci	sub	w3, w3, #2			/* modified # of rounds */
9462306a36Sopenharmony_ci	ld1	{v1.16b}, [x1]			/* load 1st ctriv */
9562306a36Sopenharmony_ci	bmi	0f
9662306a36Sopenharmony_ci	bne	3f
9762306a36Sopenharmony_ci	mov	v5.16b, v3.16b
9862306a36Sopenharmony_ci	b	2f
9962306a36Sopenharmony_ci0:	mov	v4.16b, v3.16b
10062306a36Sopenharmony_ci1:	ld1	{v5.4s}, [x2], #16		/* load next round key */
10162306a36Sopenharmony_ci	aese	v0.16b, v4.16b
10262306a36Sopenharmony_ci	aesmc	v0.16b, v0.16b
10362306a36Sopenharmony_ci	aese	v1.16b, v4.16b
10462306a36Sopenharmony_ci	aesmc	v1.16b, v1.16b
10562306a36Sopenharmony_ci2:	ld1	{v3.4s}, [x2], #16		/* load next round key */
10662306a36Sopenharmony_ci	aese	v0.16b, v5.16b
10762306a36Sopenharmony_ci	aesmc	v0.16b, v0.16b
10862306a36Sopenharmony_ci	aese	v1.16b, v5.16b
10962306a36Sopenharmony_ci	aesmc	v1.16b, v1.16b
11062306a36Sopenharmony_ci3:	ld1	{v4.4s}, [x2], #16		/* load next round key */
11162306a36Sopenharmony_ci	subs	w3, w3, #3
11262306a36Sopenharmony_ci	aese	v0.16b, v3.16b
11362306a36Sopenharmony_ci	aesmc	v0.16b, v0.16b
11462306a36Sopenharmony_ci	aese	v1.16b, v3.16b
11562306a36Sopenharmony_ci	aesmc	v1.16b, v1.16b
11662306a36Sopenharmony_ci	bpl	1b
11762306a36Sopenharmony_ci	aese	v0.16b, v4.16b
11862306a36Sopenharmony_ci	aese	v1.16b, v4.16b
11962306a36Sopenharmony_ci	/* final round key cancels out */
12062306a36Sopenharmony_ci	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
12162306a36Sopenharmony_ci	st1	{v0.16b}, [x0]			/* store result */
12262306a36Sopenharmony_ci	ret
12362306a36Sopenharmony_ciSYM_FUNC_END(ce_aes_ccm_final)
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	.macro	aes_ccm_do_crypt,enc
12662306a36Sopenharmony_ci	cbz	x2, 5f
12762306a36Sopenharmony_ci	ldr	x8, [x6, #8]			/* load lower ctr */
12862306a36Sopenharmony_ci	ld1	{v0.16b}, [x5]			/* load mac */
12962306a36Sopenharmony_ciCPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
13062306a36Sopenharmony_ci0:	/* outer loop */
13162306a36Sopenharmony_ci	ld1	{v1.8b}, [x6]			/* load upper ctr */
13262306a36Sopenharmony_ci	prfm	pldl1strm, [x1]
13362306a36Sopenharmony_ci	add	x8, x8, #1
13462306a36Sopenharmony_ci	rev	x9, x8
13562306a36Sopenharmony_ci	cmp	w4, #12				/* which key size? */
13662306a36Sopenharmony_ci	sub	w7, w4, #2			/* get modified # of rounds */
13762306a36Sopenharmony_ci	ins	v1.d[1], x9			/* no carry in lower ctr */
13862306a36Sopenharmony_ci	ld1	{v3.4s}, [x3]			/* load first round key */
13962306a36Sopenharmony_ci	add	x10, x3, #16
14062306a36Sopenharmony_ci	bmi	1f
14162306a36Sopenharmony_ci	bne	4f
14262306a36Sopenharmony_ci	mov	v5.16b, v3.16b
14362306a36Sopenharmony_ci	b	3f
14462306a36Sopenharmony_ci1:	mov	v4.16b, v3.16b
14562306a36Sopenharmony_ci	ld1	{v5.4s}, [x10], #16		/* load 2nd round key */
14662306a36Sopenharmony_ci2:	/* inner loop: 3 rounds, 2x interleaved */
14762306a36Sopenharmony_ci	aese	v0.16b, v4.16b
14862306a36Sopenharmony_ci	aesmc	v0.16b, v0.16b
14962306a36Sopenharmony_ci	aese	v1.16b, v4.16b
15062306a36Sopenharmony_ci	aesmc	v1.16b, v1.16b
15162306a36Sopenharmony_ci3:	ld1	{v3.4s}, [x10], #16		/* load next round key */
15262306a36Sopenharmony_ci	aese	v0.16b, v5.16b
15362306a36Sopenharmony_ci	aesmc	v0.16b, v0.16b
15462306a36Sopenharmony_ci	aese	v1.16b, v5.16b
15562306a36Sopenharmony_ci	aesmc	v1.16b, v1.16b
15662306a36Sopenharmony_ci4:	ld1	{v4.4s}, [x10], #16		/* load next round key */
15762306a36Sopenharmony_ci	subs	w7, w7, #3
15862306a36Sopenharmony_ci	aese	v0.16b, v3.16b
15962306a36Sopenharmony_ci	aesmc	v0.16b, v0.16b
16062306a36Sopenharmony_ci	aese	v1.16b, v3.16b
16162306a36Sopenharmony_ci	aesmc	v1.16b, v1.16b
16262306a36Sopenharmony_ci	ld1	{v5.4s}, [x10], #16		/* load next round key */
16362306a36Sopenharmony_ci	bpl	2b
16462306a36Sopenharmony_ci	aese	v0.16b, v4.16b
16562306a36Sopenharmony_ci	aese	v1.16b, v4.16b
16662306a36Sopenharmony_ci	subs	w2, w2, #16
16762306a36Sopenharmony_ci	bmi	6f				/* partial block? */
16862306a36Sopenharmony_ci	ld1	{v2.16b}, [x1], #16		/* load next input block */
16962306a36Sopenharmony_ci	.if	\enc == 1
17062306a36Sopenharmony_ci	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
17162306a36Sopenharmony_ci	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
17262306a36Sopenharmony_ci	.else
17362306a36Sopenharmony_ci	eor	v2.16b, v2.16b, v1.16b		/* xor with crypted ctr */
17462306a36Sopenharmony_ci	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
17562306a36Sopenharmony_ci	.endif
17662306a36Sopenharmony_ci	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
17762306a36Sopenharmony_ci	st1	{v1.16b}, [x0], #16		/* write output block */
17862306a36Sopenharmony_ci	bne	0b
17962306a36Sopenharmony_ciCPU_LE(	rev	x8, x8			)
18062306a36Sopenharmony_ci	st1	{v0.16b}, [x5]			/* store mac */
18162306a36Sopenharmony_ci	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
18262306a36Sopenharmony_ci5:	ret
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
18562306a36Sopenharmony_ci	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
18662306a36Sopenharmony_ci	st1	{v0.16b}, [x5]			/* store mac */
18762306a36Sopenharmony_ci	add	w2, w2, #16			/* process partial tail block */
18862306a36Sopenharmony_ci7:	ldrb	w9, [x1], #1			/* get 1 byte of input */
18962306a36Sopenharmony_ci	umov	w6, v1.b[0]			/* get top crypted ctr byte */
19062306a36Sopenharmony_ci	umov	w7, v0.b[0]			/* get top mac byte */
19162306a36Sopenharmony_ci	.if	\enc == 1
19262306a36Sopenharmony_ci	eor	w7, w7, w9
19362306a36Sopenharmony_ci	eor	w9, w9, w6
19462306a36Sopenharmony_ci	.else
19562306a36Sopenharmony_ci	eor	w9, w9, w6
19662306a36Sopenharmony_ci	eor	w7, w7, w9
19762306a36Sopenharmony_ci	.endif
19862306a36Sopenharmony_ci	strb	w9, [x0], #1			/* store out byte */
19962306a36Sopenharmony_ci	strb	w7, [x5], #1			/* store mac byte */
20062306a36Sopenharmony_ci	subs	w2, w2, #1
20162306a36Sopenharmony_ci	beq	5b
20262306a36Sopenharmony_ci	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
20362306a36Sopenharmony_ci	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
20462306a36Sopenharmony_ci	b	7b
20562306a36Sopenharmony_ci	.endm
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci	/*
20862306a36Sopenharmony_ci	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
20962306a36Sopenharmony_ci	 * 			   u8 const rk[], u32 rounds, u8 mac[],
21062306a36Sopenharmony_ci	 * 			   u8 ctr[]);
21162306a36Sopenharmony_ci	 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
21262306a36Sopenharmony_ci	 * 			   u8 const rk[], u32 rounds, u8 mac[],
21362306a36Sopenharmony_ci	 * 			   u8 ctr[]);
21462306a36Sopenharmony_ci	 */
21562306a36Sopenharmony_ciSYM_FUNC_START(ce_aes_ccm_encrypt)
21662306a36Sopenharmony_ci	aes_ccm_do_crypt	1
21762306a36Sopenharmony_ciSYM_FUNC_END(ce_aes_ccm_encrypt)
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ciSYM_FUNC_START(ce_aes_ccm_decrypt)
22062306a36Sopenharmony_ci	aes_ccm_do_crypt	0
22162306a36Sopenharmony_ciSYM_FUNC_END(ce_aes_ccm_decrypt)
222