18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <linux/linkage.h>
98c2ecf20Sopenharmony_ci#include <asm/assembler.h>
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci	.text
128c2ecf20Sopenharmony_ci	.arch	armv8-a+crypto
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci	/*
158c2ecf20Sopenharmony_ci	 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
168c2ecf20Sopenharmony_ci	 *			     u32 *macp, u8 const rk[], u32 rounds);
178c2ecf20Sopenharmony_ci	 */
188c2ecf20Sopenharmony_ciSYM_FUNC_START(ce_aes_ccm_auth_data)
198c2ecf20Sopenharmony_ci	ldr	w8, [x3]			/* leftover from prev round? */
208c2ecf20Sopenharmony_ci	ld1	{v0.16b}, [x0]			/* load mac */
218c2ecf20Sopenharmony_ci	cbz	w8, 1f
228c2ecf20Sopenharmony_ci	sub	w8, w8, #16
238c2ecf20Sopenharmony_ci	eor	v1.16b, v1.16b, v1.16b
248c2ecf20Sopenharmony_ci0:	ldrb	w7, [x1], #1			/* get 1 byte of input */
258c2ecf20Sopenharmony_ci	subs	w2, w2, #1
268c2ecf20Sopenharmony_ci	add	w8, w8, #1
278c2ecf20Sopenharmony_ci	ins	v1.b[0], w7
288c2ecf20Sopenharmony_ci	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
298c2ecf20Sopenharmony_ci	beq	8f				/* out of input? */
308c2ecf20Sopenharmony_ci	cbnz	w8, 0b
318c2ecf20Sopenharmony_ci	eor	v0.16b, v0.16b, v1.16b
328c2ecf20Sopenharmony_ci1:	ld1	{v3.4s}, [x4]			/* load first round key */
338c2ecf20Sopenharmony_ci	prfm	pldl1strm, [x1]
348c2ecf20Sopenharmony_ci	cmp	w5, #12				/* which key size? */
358c2ecf20Sopenharmony_ci	add	x6, x4, #16
368c2ecf20Sopenharmony_ci	sub	w7, w5, #2			/* modified # of rounds */
378c2ecf20Sopenharmony_ci	bmi	2f
388c2ecf20Sopenharmony_ci	bne	5f
398c2ecf20Sopenharmony_ci	mov	v5.16b, v3.16b
408c2ecf20Sopenharmony_ci	b	4f
418c2ecf20Sopenharmony_ci2:	mov	v4.16b, v3.16b
428c2ecf20Sopenharmony_ci	ld1	{v5.4s}, [x6], #16		/* load 2nd round key */
438c2ecf20Sopenharmony_ci3:	aese	v0.16b, v4.16b
448c2ecf20Sopenharmony_ci	aesmc	v0.16b, v0.16b
458c2ecf20Sopenharmony_ci4:	ld1	{v3.4s}, [x6], #16		/* load next round key */
468c2ecf20Sopenharmony_ci	aese	v0.16b, v5.16b
478c2ecf20Sopenharmony_ci	aesmc	v0.16b, v0.16b
488c2ecf20Sopenharmony_ci5:	ld1	{v4.4s}, [x6], #16		/* load next round key */
498c2ecf20Sopenharmony_ci	subs	w7, w7, #3
508c2ecf20Sopenharmony_ci	aese	v0.16b, v3.16b
518c2ecf20Sopenharmony_ci	aesmc	v0.16b, v0.16b
528c2ecf20Sopenharmony_ci	ld1	{v5.4s}, [x6], #16		/* load next round key */
538c2ecf20Sopenharmony_ci	bpl	3b
548c2ecf20Sopenharmony_ci	aese	v0.16b, v4.16b
558c2ecf20Sopenharmony_ci	subs	w2, w2, #16			/* last data? */
568c2ecf20Sopenharmony_ci	eor	v0.16b, v0.16b, v5.16b		/* final round */
578c2ecf20Sopenharmony_ci	bmi	6f
588c2ecf20Sopenharmony_ci	ld1	{v1.16b}, [x1], #16		/* load next input block */
598c2ecf20Sopenharmony_ci	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
608c2ecf20Sopenharmony_ci	bne	1b
618c2ecf20Sopenharmony_ci6:	st1	{v0.16b}, [x0]			/* store mac */
628c2ecf20Sopenharmony_ci	beq	10f
638c2ecf20Sopenharmony_ci	adds	w2, w2, #16
648c2ecf20Sopenharmony_ci	beq	10f
658c2ecf20Sopenharmony_ci	mov	w8, w2
668c2ecf20Sopenharmony_ci7:	ldrb	w7, [x1], #1
678c2ecf20Sopenharmony_ci	umov	w6, v0.b[0]
688c2ecf20Sopenharmony_ci	eor	w6, w6, w7
698c2ecf20Sopenharmony_ci	strb	w6, [x0], #1
708c2ecf20Sopenharmony_ci	subs	w2, w2, #1
718c2ecf20Sopenharmony_ci	beq	10f
728c2ecf20Sopenharmony_ci	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
738c2ecf20Sopenharmony_ci	b	7b
748c2ecf20Sopenharmony_ci8:	cbz	w8, 91f
758c2ecf20Sopenharmony_ci	mov	w7, w8
768c2ecf20Sopenharmony_ci	add	w8, w8, #16
778c2ecf20Sopenharmony_ci9:	ext	v1.16b, v1.16b, v1.16b, #1
788c2ecf20Sopenharmony_ci	adds	w7, w7, #1
798c2ecf20Sopenharmony_ci	bne	9b
808c2ecf20Sopenharmony_ci91:	eor	v0.16b, v0.16b, v1.16b
818c2ecf20Sopenharmony_ci	st1	{v0.16b}, [x0]
828c2ecf20Sopenharmony_ci10:	str	w8, [x3]
838c2ecf20Sopenharmony_ci	ret
848c2ecf20Sopenharmony_ciSYM_FUNC_END(ce_aes_ccm_auth_data)
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	/*
878c2ecf20Sopenharmony_ci	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
888c2ecf20Sopenharmony_ci	 * 			 u32 rounds);
898c2ecf20Sopenharmony_ci	 */
908c2ecf20Sopenharmony_ciSYM_FUNC_START(ce_aes_ccm_final)
918c2ecf20Sopenharmony_ci	ld1	{v3.4s}, [x2], #16		/* load first round key */
928c2ecf20Sopenharmony_ci	ld1	{v0.16b}, [x0]			/* load mac */
938c2ecf20Sopenharmony_ci	cmp	w3, #12				/* which key size? */
948c2ecf20Sopenharmony_ci	sub	w3, w3, #2			/* modified # of rounds */
958c2ecf20Sopenharmony_ci	ld1	{v1.16b}, [x1]			/* load 1st ctriv */
968c2ecf20Sopenharmony_ci	bmi	0f
978c2ecf20Sopenharmony_ci	bne	3f
988c2ecf20Sopenharmony_ci	mov	v5.16b, v3.16b
998c2ecf20Sopenharmony_ci	b	2f
1008c2ecf20Sopenharmony_ci0:	mov	v4.16b, v3.16b
1018c2ecf20Sopenharmony_ci1:	ld1	{v5.4s}, [x2], #16		/* load next round key */
1028c2ecf20Sopenharmony_ci	aese	v0.16b, v4.16b
1038c2ecf20Sopenharmony_ci	aesmc	v0.16b, v0.16b
1048c2ecf20Sopenharmony_ci	aese	v1.16b, v4.16b
1058c2ecf20Sopenharmony_ci	aesmc	v1.16b, v1.16b
1068c2ecf20Sopenharmony_ci2:	ld1	{v3.4s}, [x2], #16		/* load next round key */
1078c2ecf20Sopenharmony_ci	aese	v0.16b, v5.16b
1088c2ecf20Sopenharmony_ci	aesmc	v0.16b, v0.16b
1098c2ecf20Sopenharmony_ci	aese	v1.16b, v5.16b
1108c2ecf20Sopenharmony_ci	aesmc	v1.16b, v1.16b
1118c2ecf20Sopenharmony_ci3:	ld1	{v4.4s}, [x2], #16		/* load next round key */
1128c2ecf20Sopenharmony_ci	subs	w3, w3, #3
1138c2ecf20Sopenharmony_ci	aese	v0.16b, v3.16b
1148c2ecf20Sopenharmony_ci	aesmc	v0.16b, v0.16b
1158c2ecf20Sopenharmony_ci	aese	v1.16b, v3.16b
1168c2ecf20Sopenharmony_ci	aesmc	v1.16b, v1.16b
1178c2ecf20Sopenharmony_ci	bpl	1b
1188c2ecf20Sopenharmony_ci	aese	v0.16b, v4.16b
1198c2ecf20Sopenharmony_ci	aese	v1.16b, v4.16b
1208c2ecf20Sopenharmony_ci	/* final round key cancels out */
1218c2ecf20Sopenharmony_ci	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
1228c2ecf20Sopenharmony_ci	st1	{v0.16b}, [x0]			/* store result */
1238c2ecf20Sopenharmony_ci	ret
1248c2ecf20Sopenharmony_ciSYM_FUNC_END(ce_aes_ccm_final)
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci	.macro	aes_ccm_do_crypt,enc
1278c2ecf20Sopenharmony_ci	ldr	x8, [x6, #8]			/* load lower ctr */
1288c2ecf20Sopenharmony_ci	ld1	{v0.16b}, [x5]			/* load mac */
1298c2ecf20Sopenharmony_ciCPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
1308c2ecf20Sopenharmony_ci0:	/* outer loop */
1318c2ecf20Sopenharmony_ci	ld1	{v1.8b}, [x6]			/* load upper ctr */
1328c2ecf20Sopenharmony_ci	prfm	pldl1strm, [x1]
1338c2ecf20Sopenharmony_ci	add	x8, x8, #1
1348c2ecf20Sopenharmony_ci	rev	x9, x8
1358c2ecf20Sopenharmony_ci	cmp	w4, #12				/* which key size? */
1368c2ecf20Sopenharmony_ci	sub	w7, w4, #2			/* get modified # of rounds */
1378c2ecf20Sopenharmony_ci	ins	v1.d[1], x9			/* no carry in lower ctr */
1388c2ecf20Sopenharmony_ci	ld1	{v3.4s}, [x3]			/* load first round key */
1398c2ecf20Sopenharmony_ci	add	x10, x3, #16
1408c2ecf20Sopenharmony_ci	bmi	1f
1418c2ecf20Sopenharmony_ci	bne	4f
1428c2ecf20Sopenharmony_ci	mov	v5.16b, v3.16b
1438c2ecf20Sopenharmony_ci	b	3f
1448c2ecf20Sopenharmony_ci1:	mov	v4.16b, v3.16b
1458c2ecf20Sopenharmony_ci	ld1	{v5.4s}, [x10], #16		/* load 2nd round key */
1468c2ecf20Sopenharmony_ci2:	/* inner loop: 3 rounds, 2x interleaved */
1478c2ecf20Sopenharmony_ci	aese	v0.16b, v4.16b
1488c2ecf20Sopenharmony_ci	aesmc	v0.16b, v0.16b
1498c2ecf20Sopenharmony_ci	aese	v1.16b, v4.16b
1508c2ecf20Sopenharmony_ci	aesmc	v1.16b, v1.16b
1518c2ecf20Sopenharmony_ci3:	ld1	{v3.4s}, [x10], #16		/* load next round key */
1528c2ecf20Sopenharmony_ci	aese	v0.16b, v5.16b
1538c2ecf20Sopenharmony_ci	aesmc	v0.16b, v0.16b
1548c2ecf20Sopenharmony_ci	aese	v1.16b, v5.16b
1558c2ecf20Sopenharmony_ci	aesmc	v1.16b, v1.16b
1568c2ecf20Sopenharmony_ci4:	ld1	{v4.4s}, [x10], #16		/* load next round key */
1578c2ecf20Sopenharmony_ci	subs	w7, w7, #3
1588c2ecf20Sopenharmony_ci	aese	v0.16b, v3.16b
1598c2ecf20Sopenharmony_ci	aesmc	v0.16b, v0.16b
1608c2ecf20Sopenharmony_ci	aese	v1.16b, v3.16b
1618c2ecf20Sopenharmony_ci	aesmc	v1.16b, v1.16b
1628c2ecf20Sopenharmony_ci	ld1	{v5.4s}, [x10], #16		/* load next round key */
1638c2ecf20Sopenharmony_ci	bpl	2b
1648c2ecf20Sopenharmony_ci	aese	v0.16b, v4.16b
1658c2ecf20Sopenharmony_ci	aese	v1.16b, v4.16b
1668c2ecf20Sopenharmony_ci	subs	w2, w2, #16
1678c2ecf20Sopenharmony_ci	bmi	6f				/* partial block? */
1688c2ecf20Sopenharmony_ci	ld1	{v2.16b}, [x1], #16		/* load next input block */
1698c2ecf20Sopenharmony_ci	.if	\enc == 1
1708c2ecf20Sopenharmony_ci	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
1718c2ecf20Sopenharmony_ci	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
1728c2ecf20Sopenharmony_ci	.else
1738c2ecf20Sopenharmony_ci	eor	v2.16b, v2.16b, v1.16b		/* xor with crypted ctr */
1748c2ecf20Sopenharmony_ci	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
1758c2ecf20Sopenharmony_ci	.endif
1768c2ecf20Sopenharmony_ci	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
1778c2ecf20Sopenharmony_ci	st1	{v1.16b}, [x0], #16		/* write output block */
1788c2ecf20Sopenharmony_ci	bne	0b
1798c2ecf20Sopenharmony_ciCPU_LE(	rev	x8, x8			)
1808c2ecf20Sopenharmony_ci	st1	{v0.16b}, [x5]			/* store mac */
1818c2ecf20Sopenharmony_ci	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
1828c2ecf20Sopenharmony_ci5:	ret
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
1858c2ecf20Sopenharmony_ci	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
1868c2ecf20Sopenharmony_ci	st1	{v0.16b}, [x5]			/* store mac */
1878c2ecf20Sopenharmony_ci	add	w2, w2, #16			/* process partial tail block */
1888c2ecf20Sopenharmony_ci7:	ldrb	w9, [x1], #1			/* get 1 byte of input */
1898c2ecf20Sopenharmony_ci	umov	w6, v1.b[0]			/* get top crypted ctr byte */
1908c2ecf20Sopenharmony_ci	umov	w7, v0.b[0]			/* get top mac byte */
1918c2ecf20Sopenharmony_ci	.if	\enc == 1
1928c2ecf20Sopenharmony_ci	eor	w7, w7, w9
1938c2ecf20Sopenharmony_ci	eor	w9, w9, w6
1948c2ecf20Sopenharmony_ci	.else
1958c2ecf20Sopenharmony_ci	eor	w9, w9, w6
1968c2ecf20Sopenharmony_ci	eor	w7, w7, w9
1978c2ecf20Sopenharmony_ci	.endif
1988c2ecf20Sopenharmony_ci	strb	w9, [x0], #1			/* store out byte */
1998c2ecf20Sopenharmony_ci	strb	w7, [x5], #1			/* store mac byte */
2008c2ecf20Sopenharmony_ci	subs	w2, w2, #1
2018c2ecf20Sopenharmony_ci	beq	5b
2028c2ecf20Sopenharmony_ci	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
2038c2ecf20Sopenharmony_ci	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
2048c2ecf20Sopenharmony_ci	b	7b
2058c2ecf20Sopenharmony_ci	.endm
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci	/*
2088c2ecf20Sopenharmony_ci	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
2098c2ecf20Sopenharmony_ci	 * 			   u8 const rk[], u32 rounds, u8 mac[],
2108c2ecf20Sopenharmony_ci	 * 			   u8 ctr[]);
2118c2ecf20Sopenharmony_ci	 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
2128c2ecf20Sopenharmony_ci	 * 			   u8 const rk[], u32 rounds, u8 mac[],
2138c2ecf20Sopenharmony_ci	 * 			   u8 ctr[]);
2148c2ecf20Sopenharmony_ci	 */
2158c2ecf20Sopenharmony_ciSYM_FUNC_START(ce_aes_ccm_encrypt)
2168c2ecf20Sopenharmony_ci	aes_ccm_do_crypt	1
2178c2ecf20Sopenharmony_ciSYM_FUNC_END(ce_aes_ccm_encrypt)
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ciSYM_FUNC_START(ce_aes_ccm_decrypt)
2208c2ecf20Sopenharmony_ci	aes_ccm_do_crypt	0
2218c2ecf20Sopenharmony_ciSYM_FUNC_END(ce_aes_ccm_decrypt)
222