18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci/* included by aes-ce.S and aes-neon.S */
98c2ecf20Sopenharmony_ci
108c2ecf20Sopenharmony_ci	.text
118c2ecf20Sopenharmony_ci	.align		4
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#ifndef MAX_STRIDE
148c2ecf20Sopenharmony_ci#define MAX_STRIDE	4
158c2ecf20Sopenharmony_ci#endif
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_ci#if MAX_STRIDE == 4
188c2ecf20Sopenharmony_ci#define ST4(x...) x
198c2ecf20Sopenharmony_ci#define ST5(x...)
208c2ecf20Sopenharmony_ci#else
218c2ecf20Sopenharmony_ci#define ST4(x...)
228c2ecf20Sopenharmony_ci#define ST5(x...) x
238c2ecf20Sopenharmony_ci#endif
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(aes_encrypt_block4x)
268c2ecf20Sopenharmony_ci	encrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
278c2ecf20Sopenharmony_ci	ret
288c2ecf20Sopenharmony_ciSYM_FUNC_END(aes_encrypt_block4x)
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(aes_decrypt_block4x)
318c2ecf20Sopenharmony_ci	decrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
328c2ecf20Sopenharmony_ci	ret
338c2ecf20Sopenharmony_ciSYM_FUNC_END(aes_decrypt_block4x)
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci#if MAX_STRIDE == 5
368c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(aes_encrypt_block5x)
378c2ecf20Sopenharmony_ci	encrypt_block5x	v0, v1, v2, v3, v4, w3, x2, x8, w7
388c2ecf20Sopenharmony_ci	ret
398c2ecf20Sopenharmony_ciSYM_FUNC_END(aes_encrypt_block5x)
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(aes_decrypt_block5x)
428c2ecf20Sopenharmony_ci	decrypt_block5x	v0, v1, v2, v3, v4, w3, x2, x8, w7
438c2ecf20Sopenharmony_ci	ret
448c2ecf20Sopenharmony_ciSYM_FUNC_END(aes_decrypt_block5x)
458c2ecf20Sopenharmony_ci#endif
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci	/*
488c2ecf20Sopenharmony_ci	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
498c2ecf20Sopenharmony_ci	 *		   int blocks)
508c2ecf20Sopenharmony_ci	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
518c2ecf20Sopenharmony_ci	 *		   int blocks)
528c2ecf20Sopenharmony_ci	 */
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ciAES_FUNC_START(aes_ecb_encrypt)
558c2ecf20Sopenharmony_ci	stp		x29, x30, [sp, #-16]!
568c2ecf20Sopenharmony_ci	mov		x29, sp
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci	enc_prepare	w3, x2, x5
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci.LecbencloopNx:
618c2ecf20Sopenharmony_ci	subs		w4, w4, #MAX_STRIDE
628c2ecf20Sopenharmony_ci	bmi		.Lecbenc1x
638c2ecf20Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
648c2ecf20Sopenharmony_ciST4(	bl		aes_encrypt_block4x		)
658c2ecf20Sopenharmony_ciST5(	ld1		{v4.16b}, [x1], #16		)
668c2ecf20Sopenharmony_ciST5(	bl		aes_encrypt_block5x		)
678c2ecf20Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x0], #64
688c2ecf20Sopenharmony_ciST5(	st1		{v4.16b}, [x0], #16		)
698c2ecf20Sopenharmony_ci	b		.LecbencloopNx
708c2ecf20Sopenharmony_ci.Lecbenc1x:
718c2ecf20Sopenharmony_ci	adds		w4, w4, #MAX_STRIDE
728c2ecf20Sopenharmony_ci	beq		.Lecbencout
738c2ecf20Sopenharmony_ci.Lecbencloop:
748c2ecf20Sopenharmony_ci	ld1		{v0.16b}, [x1], #16		/* get next pt block */
758c2ecf20Sopenharmony_ci	encrypt_block	v0, w3, x2, x5, w6
768c2ecf20Sopenharmony_ci	st1		{v0.16b}, [x0], #16
778c2ecf20Sopenharmony_ci	subs		w4, w4, #1
788c2ecf20Sopenharmony_ci	bne		.Lecbencloop
798c2ecf20Sopenharmony_ci.Lecbencout:
808c2ecf20Sopenharmony_ci	ldp		x29, x30, [sp], #16
818c2ecf20Sopenharmony_ci	ret
828c2ecf20Sopenharmony_ciAES_FUNC_END(aes_ecb_encrypt)
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ciAES_FUNC_START(aes_ecb_decrypt)
868c2ecf20Sopenharmony_ci	stp		x29, x30, [sp, #-16]!
878c2ecf20Sopenharmony_ci	mov		x29, sp
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci	dec_prepare	w3, x2, x5
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci.LecbdecloopNx:
928c2ecf20Sopenharmony_ci	subs		w4, w4, #MAX_STRIDE
938c2ecf20Sopenharmony_ci	bmi		.Lecbdec1x
948c2ecf20Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
958c2ecf20Sopenharmony_ciST4(	bl		aes_decrypt_block4x		)
968c2ecf20Sopenharmony_ciST5(	ld1		{v4.16b}, [x1], #16		)
978c2ecf20Sopenharmony_ciST5(	bl		aes_decrypt_block5x		)
988c2ecf20Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x0], #64
998c2ecf20Sopenharmony_ciST5(	st1		{v4.16b}, [x0], #16		)
1008c2ecf20Sopenharmony_ci	b		.LecbdecloopNx
1018c2ecf20Sopenharmony_ci.Lecbdec1x:
1028c2ecf20Sopenharmony_ci	adds		w4, w4, #MAX_STRIDE
1038c2ecf20Sopenharmony_ci	beq		.Lecbdecout
1048c2ecf20Sopenharmony_ci.Lecbdecloop:
1058c2ecf20Sopenharmony_ci	ld1		{v0.16b}, [x1], #16		/* get next ct block */
1068c2ecf20Sopenharmony_ci	decrypt_block	v0, w3, x2, x5, w6
1078c2ecf20Sopenharmony_ci	st1		{v0.16b}, [x0], #16
1088c2ecf20Sopenharmony_ci	subs		w4, w4, #1
1098c2ecf20Sopenharmony_ci	bne		.Lecbdecloop
1108c2ecf20Sopenharmony_ci.Lecbdecout:
1118c2ecf20Sopenharmony_ci	ldp		x29, x30, [sp], #16
1128c2ecf20Sopenharmony_ci	ret
1138c2ecf20Sopenharmony_ciAES_FUNC_END(aes_ecb_decrypt)
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci	/*
1178c2ecf20Sopenharmony_ci	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
1188c2ecf20Sopenharmony_ci	 *		   int blocks, u8 iv[])
1198c2ecf20Sopenharmony_ci	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
1208c2ecf20Sopenharmony_ci	 *		   int blocks, u8 iv[])
1218c2ecf20Sopenharmony_ci	 * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[],
1228c2ecf20Sopenharmony_ci	 *			 int rounds, int blocks, u8 iv[],
1238c2ecf20Sopenharmony_ci	 *			 u32 const rk2[]);
1248c2ecf20Sopenharmony_ci	 * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[],
1258c2ecf20Sopenharmony_ci	 *			 int rounds, int blocks, u8 iv[],
1268c2ecf20Sopenharmony_ci	 *			 u32 const rk2[]);
1278c2ecf20Sopenharmony_ci	 */
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ciAES_FUNC_START(aes_essiv_cbc_encrypt)
1308c2ecf20Sopenharmony_ci	ld1		{v4.16b}, [x5]			/* get iv */
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci	mov		w8, #14				/* AES-256: 14 rounds */
1338c2ecf20Sopenharmony_ci	enc_prepare	w8, x6, x7
1348c2ecf20Sopenharmony_ci	encrypt_block	v4, w8, x6, x7, w9
1358c2ecf20Sopenharmony_ci	enc_switch_key	w3, x2, x6
1368c2ecf20Sopenharmony_ci	b		.Lcbcencloop4x
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ciAES_FUNC_START(aes_cbc_encrypt)
1398c2ecf20Sopenharmony_ci	ld1		{v4.16b}, [x5]			/* get iv */
1408c2ecf20Sopenharmony_ci	enc_prepare	w3, x2, x6
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_ci.Lcbcencloop4x:
1438c2ecf20Sopenharmony_ci	subs		w4, w4, #4
1448c2ecf20Sopenharmony_ci	bmi		.Lcbcenc1x
1458c2ecf20Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
1468c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v4.16b		/* ..and xor with iv */
1478c2ecf20Sopenharmony_ci	encrypt_block	v0, w3, x2, x6, w7
1488c2ecf20Sopenharmony_ci	eor		v1.16b, v1.16b, v0.16b
1498c2ecf20Sopenharmony_ci	encrypt_block	v1, w3, x2, x6, w7
1508c2ecf20Sopenharmony_ci	eor		v2.16b, v2.16b, v1.16b
1518c2ecf20Sopenharmony_ci	encrypt_block	v2, w3, x2, x6, w7
1528c2ecf20Sopenharmony_ci	eor		v3.16b, v3.16b, v2.16b
1538c2ecf20Sopenharmony_ci	encrypt_block	v3, w3, x2, x6, w7
1548c2ecf20Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x0], #64
1558c2ecf20Sopenharmony_ci	mov		v4.16b, v3.16b
1568c2ecf20Sopenharmony_ci	b		.Lcbcencloop4x
1578c2ecf20Sopenharmony_ci.Lcbcenc1x:
1588c2ecf20Sopenharmony_ci	adds		w4, w4, #4
1598c2ecf20Sopenharmony_ci	beq		.Lcbcencout
1608c2ecf20Sopenharmony_ci.Lcbcencloop:
1618c2ecf20Sopenharmony_ci	ld1		{v0.16b}, [x1], #16		/* get next pt block */
1628c2ecf20Sopenharmony_ci	eor		v4.16b, v4.16b, v0.16b		/* ..and xor with iv */
1638c2ecf20Sopenharmony_ci	encrypt_block	v4, w3, x2, x6, w7
1648c2ecf20Sopenharmony_ci	st1		{v4.16b}, [x0], #16
1658c2ecf20Sopenharmony_ci	subs		w4, w4, #1
1668c2ecf20Sopenharmony_ci	bne		.Lcbcencloop
1678c2ecf20Sopenharmony_ci.Lcbcencout:
1688c2ecf20Sopenharmony_ci	st1		{v4.16b}, [x5]			/* return iv */
1698c2ecf20Sopenharmony_ci	ret
1708c2ecf20Sopenharmony_ciAES_FUNC_END(aes_cbc_encrypt)
1718c2ecf20Sopenharmony_ciAES_FUNC_END(aes_essiv_cbc_encrypt)
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_ciAES_FUNC_START(aes_essiv_cbc_decrypt)
1748c2ecf20Sopenharmony_ci	stp		x29, x30, [sp, #-16]!
1758c2ecf20Sopenharmony_ci	mov		x29, sp
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_ci	ld1		{cbciv.16b}, [x5]		/* get iv */
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci	mov		w8, #14				/* AES-256: 14 rounds */
1808c2ecf20Sopenharmony_ci	enc_prepare	w8, x6, x7
1818c2ecf20Sopenharmony_ci	encrypt_block	cbciv, w8, x6, x7, w9
1828c2ecf20Sopenharmony_ci	b		.Lessivcbcdecstart
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ciAES_FUNC_START(aes_cbc_decrypt)
1858c2ecf20Sopenharmony_ci	stp		x29, x30, [sp, #-16]!
1868c2ecf20Sopenharmony_ci	mov		x29, sp
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci	ld1		{cbciv.16b}, [x5]		/* get iv */
1898c2ecf20Sopenharmony_ci.Lessivcbcdecstart:
1908c2ecf20Sopenharmony_ci	dec_prepare	w3, x2, x6
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci.LcbcdecloopNx:
1938c2ecf20Sopenharmony_ci	subs		w4, w4, #MAX_STRIDE
1948c2ecf20Sopenharmony_ci	bmi		.Lcbcdec1x
1958c2ecf20Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
1968c2ecf20Sopenharmony_ci#if MAX_STRIDE == 5
1978c2ecf20Sopenharmony_ci	ld1		{v4.16b}, [x1], #16		/* get 1 ct block */
1988c2ecf20Sopenharmony_ci	mov		v5.16b, v0.16b
1998c2ecf20Sopenharmony_ci	mov		v6.16b, v1.16b
2008c2ecf20Sopenharmony_ci	mov		v7.16b, v2.16b
2018c2ecf20Sopenharmony_ci	bl		aes_decrypt_block5x
2028c2ecf20Sopenharmony_ci	sub		x1, x1, #32
2038c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, cbciv.16b
2048c2ecf20Sopenharmony_ci	eor		v1.16b, v1.16b, v5.16b
2058c2ecf20Sopenharmony_ci	ld1		{v5.16b}, [x1], #16		/* reload 1 ct block */
2068c2ecf20Sopenharmony_ci	ld1		{cbciv.16b}, [x1], #16		/* reload 1 ct block */
2078c2ecf20Sopenharmony_ci	eor		v2.16b, v2.16b, v6.16b
2088c2ecf20Sopenharmony_ci	eor		v3.16b, v3.16b, v7.16b
2098c2ecf20Sopenharmony_ci	eor		v4.16b, v4.16b, v5.16b
2108c2ecf20Sopenharmony_ci#else
2118c2ecf20Sopenharmony_ci	mov		v4.16b, v0.16b
2128c2ecf20Sopenharmony_ci	mov		v5.16b, v1.16b
2138c2ecf20Sopenharmony_ci	mov		v6.16b, v2.16b
2148c2ecf20Sopenharmony_ci	bl		aes_decrypt_block4x
2158c2ecf20Sopenharmony_ci	sub		x1, x1, #16
2168c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, cbciv.16b
2178c2ecf20Sopenharmony_ci	eor		v1.16b, v1.16b, v4.16b
2188c2ecf20Sopenharmony_ci	ld1		{cbciv.16b}, [x1], #16		/* reload 1 ct block */
2198c2ecf20Sopenharmony_ci	eor		v2.16b, v2.16b, v5.16b
2208c2ecf20Sopenharmony_ci	eor		v3.16b, v3.16b, v6.16b
2218c2ecf20Sopenharmony_ci#endif
2228c2ecf20Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x0], #64
2238c2ecf20Sopenharmony_ciST5(	st1		{v4.16b}, [x0], #16		)
2248c2ecf20Sopenharmony_ci	b		.LcbcdecloopNx
2258c2ecf20Sopenharmony_ci.Lcbcdec1x:
2268c2ecf20Sopenharmony_ci	adds		w4, w4, #MAX_STRIDE
2278c2ecf20Sopenharmony_ci	beq		.Lcbcdecout
2288c2ecf20Sopenharmony_ci.Lcbcdecloop:
2298c2ecf20Sopenharmony_ci	ld1		{v1.16b}, [x1], #16		/* get next ct block */
2308c2ecf20Sopenharmony_ci	mov		v0.16b, v1.16b			/* ...and copy to v0 */
2318c2ecf20Sopenharmony_ci	decrypt_block	v0, w3, x2, x6, w7
2328c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, cbciv.16b	/* xor with iv => pt */
2338c2ecf20Sopenharmony_ci	mov		cbciv.16b, v1.16b		/* ct is next iv */
2348c2ecf20Sopenharmony_ci	st1		{v0.16b}, [x0], #16
2358c2ecf20Sopenharmony_ci	subs		w4, w4, #1
2368c2ecf20Sopenharmony_ci	bne		.Lcbcdecloop
2378c2ecf20Sopenharmony_ci.Lcbcdecout:
2388c2ecf20Sopenharmony_ci	st1		{cbciv.16b}, [x5]		/* return iv */
2398c2ecf20Sopenharmony_ci	ldp		x29, x30, [sp], #16
2408c2ecf20Sopenharmony_ci	ret
2418c2ecf20Sopenharmony_ciAES_FUNC_END(aes_cbc_decrypt)
2428c2ecf20Sopenharmony_ciAES_FUNC_END(aes_essiv_cbc_decrypt)
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_ci	/*
2468c2ecf20Sopenharmony_ci	 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
2478c2ecf20Sopenharmony_ci	 *		       int rounds, int bytes, u8 const iv[])
2488c2ecf20Sopenharmony_ci	 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
2498c2ecf20Sopenharmony_ci	 *		       int rounds, int bytes, u8 const iv[])
2508c2ecf20Sopenharmony_ci	 */
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_ciAES_FUNC_START(aes_cbc_cts_encrypt)
2538c2ecf20Sopenharmony_ci	adr_l		x8, .Lcts_permute_table
2548c2ecf20Sopenharmony_ci	sub		x4, x4, #16
2558c2ecf20Sopenharmony_ci	add		x9, x8, #32
2568c2ecf20Sopenharmony_ci	add		x8, x8, x4
2578c2ecf20Sopenharmony_ci	sub		x9, x9, x4
2588c2ecf20Sopenharmony_ci	ld1		{v3.16b}, [x8]
2598c2ecf20Sopenharmony_ci	ld1		{v4.16b}, [x9]
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_ci	ld1		{v0.16b}, [x1], x4		/* overlapping loads */
2628c2ecf20Sopenharmony_ci	ld1		{v1.16b}, [x1]
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ci	ld1		{v5.16b}, [x5]			/* get iv */
2658c2ecf20Sopenharmony_ci	enc_prepare	w3, x2, x6
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v5.16b		/* xor with iv */
2688c2ecf20Sopenharmony_ci	tbl		v1.16b, {v1.16b}, v4.16b
2698c2ecf20Sopenharmony_ci	encrypt_block	v0, w3, x2, x6, w7
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci	eor		v1.16b, v1.16b, v0.16b
2728c2ecf20Sopenharmony_ci	tbl		v0.16b, {v0.16b}, v3.16b
2738c2ecf20Sopenharmony_ci	encrypt_block	v1, w3, x2, x6, w7
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_ci	add		x4, x0, x4
2768c2ecf20Sopenharmony_ci	st1		{v0.16b}, [x4]			/* overlapping stores */
2778c2ecf20Sopenharmony_ci	st1		{v1.16b}, [x0]
2788c2ecf20Sopenharmony_ci	ret
2798c2ecf20Sopenharmony_ciAES_FUNC_END(aes_cbc_cts_encrypt)
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ciAES_FUNC_START(aes_cbc_cts_decrypt)
2828c2ecf20Sopenharmony_ci	adr_l		x8, .Lcts_permute_table
2838c2ecf20Sopenharmony_ci	sub		x4, x4, #16
2848c2ecf20Sopenharmony_ci	add		x9, x8, #32
2858c2ecf20Sopenharmony_ci	add		x8, x8, x4
2868c2ecf20Sopenharmony_ci	sub		x9, x9, x4
2878c2ecf20Sopenharmony_ci	ld1		{v3.16b}, [x8]
2888c2ecf20Sopenharmony_ci	ld1		{v4.16b}, [x9]
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_ci	ld1		{v0.16b}, [x1], x4		/* overlapping loads */
2918c2ecf20Sopenharmony_ci	ld1		{v1.16b}, [x1]
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_ci	ld1		{v5.16b}, [x5]			/* get iv */
2948c2ecf20Sopenharmony_ci	dec_prepare	w3, x2, x6
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_ci	decrypt_block	v0, w3, x2, x6, w7
2978c2ecf20Sopenharmony_ci	tbl		v2.16b, {v0.16b}, v3.16b
2988c2ecf20Sopenharmony_ci	eor		v2.16b, v2.16b, v1.16b
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci	tbx		v0.16b, {v1.16b}, v4.16b
3018c2ecf20Sopenharmony_ci	decrypt_block	v0, w3, x2, x6, w7
3028c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v5.16b		/* xor with iv */
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci	add		x4, x0, x4
3058c2ecf20Sopenharmony_ci	st1		{v2.16b}, [x4]			/* overlapping stores */
3068c2ecf20Sopenharmony_ci	st1		{v0.16b}, [x0]
3078c2ecf20Sopenharmony_ci	ret
3088c2ecf20Sopenharmony_ciAES_FUNC_END(aes_cbc_cts_decrypt)
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_ci	.section	".rodata", "a"
3118c2ecf20Sopenharmony_ci	.align		6
3128c2ecf20Sopenharmony_ci.Lcts_permute_table:
3138c2ecf20Sopenharmony_ci	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
3148c2ecf20Sopenharmony_ci	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
3158c2ecf20Sopenharmony_ci	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
3168c2ecf20Sopenharmony_ci	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
3178c2ecf20Sopenharmony_ci	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
3188c2ecf20Sopenharmony_ci	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
3198c2ecf20Sopenharmony_ci	.previous
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_ci	/*
3238c2ecf20Sopenharmony_ci	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
3248c2ecf20Sopenharmony_ci	 *		   int blocks, u8 ctr[])
3258c2ecf20Sopenharmony_ci	 */
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ciAES_FUNC_START(aes_ctr_encrypt)
3288c2ecf20Sopenharmony_ci	stp		x29, x30, [sp, #-16]!
3298c2ecf20Sopenharmony_ci	mov		x29, sp
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_ci	enc_prepare	w3, x2, x6
3328c2ecf20Sopenharmony_ci	ld1		{vctr.16b}, [x5]
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_ci	umov		x6, vctr.d[1]		/* keep swabbed ctr in reg */
3358c2ecf20Sopenharmony_ci	rev		x6, x6
3368c2ecf20Sopenharmony_ci	cmn		w6, w4			/* 32 bit overflow? */
3378c2ecf20Sopenharmony_ci	bcs		.Lctrloop
3388c2ecf20Sopenharmony_ci.LctrloopNx:
3398c2ecf20Sopenharmony_ci	subs		w4, w4, #MAX_STRIDE
3408c2ecf20Sopenharmony_ci	bmi		.Lctr1x
3418c2ecf20Sopenharmony_ci	add		w7, w6, #1
3428c2ecf20Sopenharmony_ci	mov		v0.16b, vctr.16b
3438c2ecf20Sopenharmony_ci	add		w8, w6, #2
3448c2ecf20Sopenharmony_ci	mov		v1.16b, vctr.16b
3458c2ecf20Sopenharmony_ci	add		w9, w6, #3
3468c2ecf20Sopenharmony_ci	mov		v2.16b, vctr.16b
3478c2ecf20Sopenharmony_ci	add		w9, w6, #3
3488c2ecf20Sopenharmony_ci	rev		w7, w7
3498c2ecf20Sopenharmony_ci	mov		v3.16b, vctr.16b
3508c2ecf20Sopenharmony_ci	rev		w8, w8
3518c2ecf20Sopenharmony_ciST5(	mov		v4.16b, vctr.16b		)
3528c2ecf20Sopenharmony_ci	mov		v1.s[3], w7
3538c2ecf20Sopenharmony_ci	rev		w9, w9
3548c2ecf20Sopenharmony_ciST5(	add		w10, w6, #4			)
3558c2ecf20Sopenharmony_ci	mov		v2.s[3], w8
3568c2ecf20Sopenharmony_ciST5(	rev		w10, w10			)
3578c2ecf20Sopenharmony_ci	mov		v3.s[3], w9
3588c2ecf20Sopenharmony_ciST5(	mov		v4.s[3], w10			)
3598c2ecf20Sopenharmony_ci	ld1		{v5.16b-v7.16b}, [x1], #48	/* get 3 input blocks */
3608c2ecf20Sopenharmony_ciST4(	bl		aes_encrypt_block4x		)
3618c2ecf20Sopenharmony_ciST5(	bl		aes_encrypt_block5x		)
3628c2ecf20Sopenharmony_ci	eor		v0.16b, v5.16b, v0.16b
3638c2ecf20Sopenharmony_ciST4(	ld1		{v5.16b}, [x1], #16		)
3648c2ecf20Sopenharmony_ci	eor		v1.16b, v6.16b, v1.16b
3658c2ecf20Sopenharmony_ciST5(	ld1		{v5.16b-v6.16b}, [x1], #32	)
3668c2ecf20Sopenharmony_ci	eor		v2.16b, v7.16b, v2.16b
3678c2ecf20Sopenharmony_ci	eor		v3.16b, v5.16b, v3.16b
3688c2ecf20Sopenharmony_ciST5(	eor		v4.16b, v6.16b, v4.16b		)
3698c2ecf20Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x0], #64
3708c2ecf20Sopenharmony_ciST5(	st1		{v4.16b}, [x0], #16		)
3718c2ecf20Sopenharmony_ci	add		x6, x6, #MAX_STRIDE
3728c2ecf20Sopenharmony_ci	rev		x7, x6
3738c2ecf20Sopenharmony_ci	ins		vctr.d[1], x7
3748c2ecf20Sopenharmony_ci	cbz		w4, .Lctrout
3758c2ecf20Sopenharmony_ci	b		.LctrloopNx
3768c2ecf20Sopenharmony_ci.Lctr1x:
3778c2ecf20Sopenharmony_ci	adds		w4, w4, #MAX_STRIDE
3788c2ecf20Sopenharmony_ci	beq		.Lctrout
3798c2ecf20Sopenharmony_ci.Lctrloop:
3808c2ecf20Sopenharmony_ci	mov		v0.16b, vctr.16b
3818c2ecf20Sopenharmony_ci	encrypt_block	v0, w3, x2, x8, w7
3828c2ecf20Sopenharmony_ci
3838c2ecf20Sopenharmony_ci	adds		x6, x6, #1		/* increment BE ctr */
3848c2ecf20Sopenharmony_ci	rev		x7, x6
3858c2ecf20Sopenharmony_ci	ins		vctr.d[1], x7
3868c2ecf20Sopenharmony_ci	bcs		.Lctrcarry		/* overflow? */
3878c2ecf20Sopenharmony_ci
3888c2ecf20Sopenharmony_ci.Lctrcarrydone:
3898c2ecf20Sopenharmony_ci	subs		w4, w4, #1
3908c2ecf20Sopenharmony_ci	bmi		.Lctrtailblock		/* blocks <0 means tail block */
3918c2ecf20Sopenharmony_ci	ld1		{v3.16b}, [x1], #16
3928c2ecf20Sopenharmony_ci	eor		v3.16b, v0.16b, v3.16b
3938c2ecf20Sopenharmony_ci	st1		{v3.16b}, [x0], #16
3948c2ecf20Sopenharmony_ci	bne		.Lctrloop
3958c2ecf20Sopenharmony_ci
3968c2ecf20Sopenharmony_ci.Lctrout:
3978c2ecf20Sopenharmony_ci	st1		{vctr.16b}, [x5]	/* return next CTR value */
3988c2ecf20Sopenharmony_ci	ldp		x29, x30, [sp], #16
3998c2ecf20Sopenharmony_ci	ret
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci.Lctrtailblock:
4028c2ecf20Sopenharmony_ci	st1		{v0.16b}, [x0]
4038c2ecf20Sopenharmony_ci	b		.Lctrout
4048c2ecf20Sopenharmony_ci
4058c2ecf20Sopenharmony_ci.Lctrcarry:
4068c2ecf20Sopenharmony_ci	umov		x7, vctr.d[0]		/* load upper word of ctr  */
4078c2ecf20Sopenharmony_ci	rev		x7, x7			/* ... to handle the carry */
4088c2ecf20Sopenharmony_ci	add		x7, x7, #1
4098c2ecf20Sopenharmony_ci	rev		x7, x7
4108c2ecf20Sopenharmony_ci	ins		vctr.d[0], x7
4118c2ecf20Sopenharmony_ci	b		.Lctrcarrydone
4128c2ecf20Sopenharmony_ciAES_FUNC_END(aes_ctr_encrypt)
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci
4158c2ecf20Sopenharmony_ci	/*
4168c2ecf20Sopenharmony_ci	 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
4178c2ecf20Sopenharmony_ci	 *		   int bytes, u8 const rk2[], u8 iv[], int first)
4188c2ecf20Sopenharmony_ci	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
4198c2ecf20Sopenharmony_ci	 *		   int bytes, u8 const rk2[], u8 iv[], int first)
4208c2ecf20Sopenharmony_ci	 */
4218c2ecf20Sopenharmony_ci
4228c2ecf20Sopenharmony_ci	.macro		next_tweak, out, in, tmp
4238c2ecf20Sopenharmony_ci	sshr		\tmp\().2d,  \in\().2d,   #63
4248c2ecf20Sopenharmony_ci	and		\tmp\().16b, \tmp\().16b, xtsmask.16b
4258c2ecf20Sopenharmony_ci	add		\out\().2d,  \in\().2d,   \in\().2d
4268c2ecf20Sopenharmony_ci	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
4278c2ecf20Sopenharmony_ci	eor		\out\().16b, \out\().16b, \tmp\().16b
4288c2ecf20Sopenharmony_ci	.endm
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci	.macro		xts_load_mask, tmp
4318c2ecf20Sopenharmony_ci	movi		xtsmask.2s, #0x1
4328c2ecf20Sopenharmony_ci	movi		\tmp\().2s, #0x87
4338c2ecf20Sopenharmony_ci	uzp1		xtsmask.4s, xtsmask.4s, \tmp\().4s
4348c2ecf20Sopenharmony_ci	.endm
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_ciAES_FUNC_START(aes_xts_encrypt)
4378c2ecf20Sopenharmony_ci	stp		x29, x30, [sp, #-16]!
4388c2ecf20Sopenharmony_ci	mov		x29, sp
4398c2ecf20Sopenharmony_ci
4408c2ecf20Sopenharmony_ci	ld1		{v4.16b}, [x6]
4418c2ecf20Sopenharmony_ci	xts_load_mask	v8
4428c2ecf20Sopenharmony_ci	cbz		w7, .Lxtsencnotfirst
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_ci	enc_prepare	w3, x5, x8
4458c2ecf20Sopenharmony_ci	xts_cts_skip_tw	w7, .LxtsencNx
4468c2ecf20Sopenharmony_ci	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
4478c2ecf20Sopenharmony_ci	enc_switch_key	w3, x2, x8
4488c2ecf20Sopenharmony_ci	b		.LxtsencNx
4498c2ecf20Sopenharmony_ci
4508c2ecf20Sopenharmony_ci.Lxtsencnotfirst:
4518c2ecf20Sopenharmony_ci	enc_prepare	w3, x2, x8
4528c2ecf20Sopenharmony_ci.LxtsencloopNx:
4538c2ecf20Sopenharmony_ci	next_tweak	v4, v4, v8
4548c2ecf20Sopenharmony_ci.LxtsencNx:
4558c2ecf20Sopenharmony_ci	subs		w4, w4, #64
4568c2ecf20Sopenharmony_ci	bmi		.Lxtsenc1x
4578c2ecf20Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
4588c2ecf20Sopenharmony_ci	next_tweak	v5, v4, v8
4598c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v4.16b
4608c2ecf20Sopenharmony_ci	next_tweak	v6, v5, v8
4618c2ecf20Sopenharmony_ci	eor		v1.16b, v1.16b, v5.16b
4628c2ecf20Sopenharmony_ci	eor		v2.16b, v2.16b, v6.16b
4638c2ecf20Sopenharmony_ci	next_tweak	v7, v6, v8
4648c2ecf20Sopenharmony_ci	eor		v3.16b, v3.16b, v7.16b
4658c2ecf20Sopenharmony_ci	bl		aes_encrypt_block4x
4668c2ecf20Sopenharmony_ci	eor		v3.16b, v3.16b, v7.16b
4678c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v4.16b
4688c2ecf20Sopenharmony_ci	eor		v1.16b, v1.16b, v5.16b
4698c2ecf20Sopenharmony_ci	eor		v2.16b, v2.16b, v6.16b
4708c2ecf20Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x0], #64
4718c2ecf20Sopenharmony_ci	mov		v4.16b, v7.16b
4728c2ecf20Sopenharmony_ci	cbz		w4, .Lxtsencret
4738c2ecf20Sopenharmony_ci	xts_reload_mask	v8
4748c2ecf20Sopenharmony_ci	b		.LxtsencloopNx
4758c2ecf20Sopenharmony_ci.Lxtsenc1x:
4768c2ecf20Sopenharmony_ci	adds		w4, w4, #64
4778c2ecf20Sopenharmony_ci	beq		.Lxtsencout
4788c2ecf20Sopenharmony_ci	subs		w4, w4, #16
4798c2ecf20Sopenharmony_ci	bmi		.LxtsencctsNx
4808c2ecf20Sopenharmony_ci.Lxtsencloop:
4818c2ecf20Sopenharmony_ci	ld1		{v0.16b}, [x1], #16
4828c2ecf20Sopenharmony_ci.Lxtsencctsout:
4838c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v4.16b
4848c2ecf20Sopenharmony_ci	encrypt_block	v0, w3, x2, x8, w7
4858c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v4.16b
4868c2ecf20Sopenharmony_ci	cbz		w4, .Lxtsencout
4878c2ecf20Sopenharmony_ci	subs		w4, w4, #16
4888c2ecf20Sopenharmony_ci	next_tweak	v4, v4, v8
4898c2ecf20Sopenharmony_ci	bmi		.Lxtsenccts
4908c2ecf20Sopenharmony_ci	st1		{v0.16b}, [x0], #16
4918c2ecf20Sopenharmony_ci	b		.Lxtsencloop
4928c2ecf20Sopenharmony_ci.Lxtsencout:
4938c2ecf20Sopenharmony_ci	st1		{v0.16b}, [x0]
4948c2ecf20Sopenharmony_ci.Lxtsencret:
4958c2ecf20Sopenharmony_ci	st1		{v4.16b}, [x6]
4968c2ecf20Sopenharmony_ci	ldp		x29, x30, [sp], #16
4978c2ecf20Sopenharmony_ci	ret
4988c2ecf20Sopenharmony_ci
4998c2ecf20Sopenharmony_ci.LxtsencctsNx:
5008c2ecf20Sopenharmony_ci	mov		v0.16b, v3.16b
5018c2ecf20Sopenharmony_ci	sub		x0, x0, #16
5028c2ecf20Sopenharmony_ci.Lxtsenccts:
5038c2ecf20Sopenharmony_ci	adr_l		x8, .Lcts_permute_table
5048c2ecf20Sopenharmony_ci
5058c2ecf20Sopenharmony_ci	add		x1, x1, w4, sxtw	/* rewind input pointer */
5068c2ecf20Sopenharmony_ci	add		w4, w4, #16		/* # bytes in final block */
5078c2ecf20Sopenharmony_ci	add		x9, x8, #32
5088c2ecf20Sopenharmony_ci	add		x8, x8, x4
5098c2ecf20Sopenharmony_ci	sub		x9, x9, x4
5108c2ecf20Sopenharmony_ci	add		x4, x0, x4		/* output address of final block */
5118c2ecf20Sopenharmony_ci
5128c2ecf20Sopenharmony_ci	ld1		{v1.16b}, [x1]		/* load final block */
5138c2ecf20Sopenharmony_ci	ld1		{v2.16b}, [x8]
5148c2ecf20Sopenharmony_ci	ld1		{v3.16b}, [x9]
5158c2ecf20Sopenharmony_ci
5168c2ecf20Sopenharmony_ci	tbl		v2.16b, {v0.16b}, v2.16b
5178c2ecf20Sopenharmony_ci	tbx		v0.16b, {v1.16b}, v3.16b
5188c2ecf20Sopenharmony_ci	st1		{v2.16b}, [x4]			/* overlapping stores */
5198c2ecf20Sopenharmony_ci	mov		w4, wzr
5208c2ecf20Sopenharmony_ci	b		.Lxtsencctsout
5218c2ecf20Sopenharmony_ciAES_FUNC_END(aes_xts_encrypt)
5228c2ecf20Sopenharmony_ci
5238c2ecf20Sopenharmony_ciAES_FUNC_START(aes_xts_decrypt)
5248c2ecf20Sopenharmony_ci	stp		x29, x30, [sp, #-16]!
5258c2ecf20Sopenharmony_ci	mov		x29, sp
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci	/* subtract 16 bytes if we are doing CTS */
5288c2ecf20Sopenharmony_ci	sub		w8, w4, #0x10
5298c2ecf20Sopenharmony_ci	tst		w4, #0xf
5308c2ecf20Sopenharmony_ci	csel		w4, w4, w8, eq
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ci	ld1		{v4.16b}, [x6]
5338c2ecf20Sopenharmony_ci	xts_load_mask	v8
5348c2ecf20Sopenharmony_ci	xts_cts_skip_tw	w7, .Lxtsdecskiptw
5358c2ecf20Sopenharmony_ci	cbz		w7, .Lxtsdecnotfirst
5368c2ecf20Sopenharmony_ci
5378c2ecf20Sopenharmony_ci	enc_prepare	w3, x5, x8
5388c2ecf20Sopenharmony_ci	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
5398c2ecf20Sopenharmony_ci.Lxtsdecskiptw:
5408c2ecf20Sopenharmony_ci	dec_prepare	w3, x2, x8
5418c2ecf20Sopenharmony_ci	b		.LxtsdecNx
5428c2ecf20Sopenharmony_ci
5438c2ecf20Sopenharmony_ci.Lxtsdecnotfirst:
5448c2ecf20Sopenharmony_ci	dec_prepare	w3, x2, x8
5458c2ecf20Sopenharmony_ci.LxtsdecloopNx:
5468c2ecf20Sopenharmony_ci	next_tweak	v4, v4, v8
5478c2ecf20Sopenharmony_ci.LxtsdecNx:
5488c2ecf20Sopenharmony_ci	subs		w4, w4, #64
5498c2ecf20Sopenharmony_ci	bmi		.Lxtsdec1x
5508c2ecf20Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
5518c2ecf20Sopenharmony_ci	next_tweak	v5, v4, v8
5528c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v4.16b
5538c2ecf20Sopenharmony_ci	next_tweak	v6, v5, v8
5548c2ecf20Sopenharmony_ci	eor		v1.16b, v1.16b, v5.16b
5558c2ecf20Sopenharmony_ci	eor		v2.16b, v2.16b, v6.16b
5568c2ecf20Sopenharmony_ci	next_tweak	v7, v6, v8
5578c2ecf20Sopenharmony_ci	eor		v3.16b, v3.16b, v7.16b
5588c2ecf20Sopenharmony_ci	bl		aes_decrypt_block4x
5598c2ecf20Sopenharmony_ci	eor		v3.16b, v3.16b, v7.16b
5608c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v4.16b
5618c2ecf20Sopenharmony_ci	eor		v1.16b, v1.16b, v5.16b
5628c2ecf20Sopenharmony_ci	eor		v2.16b, v2.16b, v6.16b
5638c2ecf20Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x0], #64
5648c2ecf20Sopenharmony_ci	mov		v4.16b, v7.16b
5658c2ecf20Sopenharmony_ci	cbz		w4, .Lxtsdecout
5668c2ecf20Sopenharmony_ci	xts_reload_mask	v8
5678c2ecf20Sopenharmony_ci	b		.LxtsdecloopNx
5688c2ecf20Sopenharmony_ci.Lxtsdec1x:
5698c2ecf20Sopenharmony_ci	adds		w4, w4, #64
5708c2ecf20Sopenharmony_ci	beq		.Lxtsdecout
5718c2ecf20Sopenharmony_ci	subs		w4, w4, #16
5728c2ecf20Sopenharmony_ci.Lxtsdecloop:
5738c2ecf20Sopenharmony_ci	ld1		{v0.16b}, [x1], #16
5748c2ecf20Sopenharmony_ci	bmi		.Lxtsdeccts
5758c2ecf20Sopenharmony_ci.Lxtsdecctsout:
5768c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v4.16b
5778c2ecf20Sopenharmony_ci	decrypt_block	v0, w3, x2, x8, w7
5788c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v4.16b
5798c2ecf20Sopenharmony_ci	st1		{v0.16b}, [x0], #16
5808c2ecf20Sopenharmony_ci	cbz		w4, .Lxtsdecout
5818c2ecf20Sopenharmony_ci	subs		w4, w4, #16
5828c2ecf20Sopenharmony_ci	next_tweak	v4, v4, v8
5838c2ecf20Sopenharmony_ci	b		.Lxtsdecloop
5848c2ecf20Sopenharmony_ci.Lxtsdecout:
5858c2ecf20Sopenharmony_ci	st1		{v4.16b}, [x6]
5868c2ecf20Sopenharmony_ci	ldp		x29, x30, [sp], #16
5878c2ecf20Sopenharmony_ci	ret
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci.Lxtsdeccts:
5908c2ecf20Sopenharmony_ci	adr_l		x8, .Lcts_permute_table
5918c2ecf20Sopenharmony_ci
5928c2ecf20Sopenharmony_ci	add		x1, x1, w4, sxtw	/* rewind input pointer */
5938c2ecf20Sopenharmony_ci	add		w4, w4, #16		/* # bytes in final block */
5948c2ecf20Sopenharmony_ci	add		x9, x8, #32
5958c2ecf20Sopenharmony_ci	add		x8, x8, x4
5968c2ecf20Sopenharmony_ci	sub		x9, x9, x4
5978c2ecf20Sopenharmony_ci	add		x4, x0, x4		/* output address of final block */
5988c2ecf20Sopenharmony_ci
5998c2ecf20Sopenharmony_ci	next_tweak	v5, v4, v8
6008c2ecf20Sopenharmony_ci
6018c2ecf20Sopenharmony_ci	ld1		{v1.16b}, [x1]		/* load final block */
6028c2ecf20Sopenharmony_ci	ld1		{v2.16b}, [x8]
6038c2ecf20Sopenharmony_ci	ld1		{v3.16b}, [x9]
6048c2ecf20Sopenharmony_ci
6058c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v5.16b
6068c2ecf20Sopenharmony_ci	decrypt_block	v0, w3, x2, x8, w7
6078c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v5.16b
6088c2ecf20Sopenharmony_ci
6098c2ecf20Sopenharmony_ci	tbl		v2.16b, {v0.16b}, v2.16b
6108c2ecf20Sopenharmony_ci	tbx		v0.16b, {v1.16b}, v3.16b
6118c2ecf20Sopenharmony_ci
6128c2ecf20Sopenharmony_ci	st1		{v2.16b}, [x4]			/* overlapping stores */
6138c2ecf20Sopenharmony_ci	mov		w4, wzr
6148c2ecf20Sopenharmony_ci	b		.Lxtsdecctsout
6158c2ecf20Sopenharmony_ciAES_FUNC_END(aes_xts_decrypt)
6168c2ecf20Sopenharmony_ci
6178c2ecf20Sopenharmony_ci	/*
6188c2ecf20Sopenharmony_ci	 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
6198c2ecf20Sopenharmony_ci	 *		  int blocks, u8 dg[], int enc_before, int enc_after)
6208c2ecf20Sopenharmony_ci	 */
6218c2ecf20Sopenharmony_ciAES_FUNC_START(aes_mac_update)
6228c2ecf20Sopenharmony_ci	ld1		{v0.16b}, [x4]			/* get dg */
6238c2ecf20Sopenharmony_ci	enc_prepare	w2, x1, x7
6248c2ecf20Sopenharmony_ci	cbz		w5, .Lmacloop4x
6258c2ecf20Sopenharmony_ci
6268c2ecf20Sopenharmony_ci	encrypt_block	v0, w2, x1, x7, w8
6278c2ecf20Sopenharmony_ci
6288c2ecf20Sopenharmony_ci.Lmacloop4x:
6298c2ecf20Sopenharmony_ci	subs		w3, w3, #4
6308c2ecf20Sopenharmony_ci	bmi		.Lmac1x
6318c2ecf20Sopenharmony_ci	ld1		{v1.16b-v4.16b}, [x0], #64	/* get next pt block */
6328c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
6338c2ecf20Sopenharmony_ci	encrypt_block	v0, w2, x1, x7, w8
6348c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v2.16b
6358c2ecf20Sopenharmony_ci	encrypt_block	v0, w2, x1, x7, w8
6368c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v3.16b
6378c2ecf20Sopenharmony_ci	encrypt_block	v0, w2, x1, x7, w8
6388c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v4.16b
6398c2ecf20Sopenharmony_ci	cmp		w3, wzr
6408c2ecf20Sopenharmony_ci	csinv		x5, x6, xzr, eq
6418c2ecf20Sopenharmony_ci	cbz		w5, .Lmacout
6428c2ecf20Sopenharmony_ci	encrypt_block	v0, w2, x1, x7, w8
6438c2ecf20Sopenharmony_ci	st1		{v0.16b}, [x4]			/* return dg */
6448c2ecf20Sopenharmony_ci	cond_yield	.Lmacout, x7, x8
6458c2ecf20Sopenharmony_ci	b		.Lmacloop4x
6468c2ecf20Sopenharmony_ci.Lmac1x:
6478c2ecf20Sopenharmony_ci	add		w3, w3, #4
6488c2ecf20Sopenharmony_ci.Lmacloop:
6498c2ecf20Sopenharmony_ci	cbz		w3, .Lmacout
6508c2ecf20Sopenharmony_ci	ld1		{v1.16b}, [x0], #16		/* get next pt block */
6518c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
6528c2ecf20Sopenharmony_ci
6538c2ecf20Sopenharmony_ci	subs		w3, w3, #1
6548c2ecf20Sopenharmony_ci	csinv		x5, x6, xzr, eq
6558c2ecf20Sopenharmony_ci	cbz		w5, .Lmacout
6568c2ecf20Sopenharmony_ci
6578c2ecf20Sopenharmony_ci.Lmacenc:
6588c2ecf20Sopenharmony_ci	encrypt_block	v0, w2, x1, x7, w8
6598c2ecf20Sopenharmony_ci	b		.Lmacloop
6608c2ecf20Sopenharmony_ci
6618c2ecf20Sopenharmony_ci.Lmacout:
6628c2ecf20Sopenharmony_ci	st1		{v0.16b}, [x4]			/* return dg */
6638c2ecf20Sopenharmony_ci	mov		w0, w3
6648c2ecf20Sopenharmony_ci	ret
6658c2ecf20Sopenharmony_ciAES_FUNC_END(aes_mac_update)
666