18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <linux/linkage.h>
98c2ecf20Sopenharmony_ci#include <asm/assembler.h>
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci#define AES_FUNC_START(func)		SYM_FUNC_START(neon_ ## func)
128c2ecf20Sopenharmony_ci#define AES_FUNC_END(func)		SYM_FUNC_END(neon_ ## func)
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci	xtsmask		.req	v7
158c2ecf20Sopenharmony_ci	cbciv		.req	v7
168c2ecf20Sopenharmony_ci	vctr		.req	v4
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_ci	.macro		xts_reload_mask, tmp
198c2ecf20Sopenharmony_ci	xts_load_mask	\tmp
208c2ecf20Sopenharmony_ci	.endm
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci	/* special case for the neon-bs driver calling into this one for CTS */
238c2ecf20Sopenharmony_ci	.macro		xts_cts_skip_tw, reg, lbl
248c2ecf20Sopenharmony_ci	tbnz		\reg, #1, \lbl
258c2ecf20Sopenharmony_ci	.endm
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci	/* multiply by polynomial 'x' in GF(2^8) */
288c2ecf20Sopenharmony_ci	.macro		mul_by_x, out, in, temp, const
298c2ecf20Sopenharmony_ci	sshr		\temp, \in, #7
308c2ecf20Sopenharmony_ci	shl		\out, \in, #1
318c2ecf20Sopenharmony_ci	and		\temp, \temp, \const
328c2ecf20Sopenharmony_ci	eor		\out, \out, \temp
338c2ecf20Sopenharmony_ci	.endm
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci	/* multiply by polynomial 'x^2' in GF(2^8) */
368c2ecf20Sopenharmony_ci	.macro		mul_by_x2, out, in, temp, const
378c2ecf20Sopenharmony_ci	ushr		\temp, \in, #6
388c2ecf20Sopenharmony_ci	shl		\out, \in, #2
398c2ecf20Sopenharmony_ci	pmul		\temp, \temp, \const
408c2ecf20Sopenharmony_ci	eor		\out, \out, \temp
418c2ecf20Sopenharmony_ci	.endm
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci	/* preload the entire Sbox */
448c2ecf20Sopenharmony_ci	.macro		prepare, sbox, shiftrows, temp
458c2ecf20Sopenharmony_ci	movi		v12.16b, #0x1b
468c2ecf20Sopenharmony_ci	ldr_l		q13, \shiftrows, \temp
478c2ecf20Sopenharmony_ci	ldr_l		q14, .Lror32by8, \temp
488c2ecf20Sopenharmony_ci	adr_l		\temp, \sbox
498c2ecf20Sopenharmony_ci	ld1		{v16.16b-v19.16b}, [\temp], #64
508c2ecf20Sopenharmony_ci	ld1		{v20.16b-v23.16b}, [\temp], #64
518c2ecf20Sopenharmony_ci	ld1		{v24.16b-v27.16b}, [\temp], #64
528c2ecf20Sopenharmony_ci	ld1		{v28.16b-v31.16b}, [\temp]
538c2ecf20Sopenharmony_ci	.endm
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci	/* do preload for encryption */
568c2ecf20Sopenharmony_ci	.macro		enc_prepare, ignore0, ignore1, temp
578c2ecf20Sopenharmony_ci	prepare		crypto_aes_sbox, .LForward_ShiftRows, \temp
588c2ecf20Sopenharmony_ci	.endm
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci	.macro		enc_switch_key, ignore0, ignore1, temp
618c2ecf20Sopenharmony_ci	/* do nothing */
628c2ecf20Sopenharmony_ci	.endm
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	/* do preload for decryption */
658c2ecf20Sopenharmony_ci	.macro		dec_prepare, ignore0, ignore1, temp
668c2ecf20Sopenharmony_ci	prepare		crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
678c2ecf20Sopenharmony_ci	.endm
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci	/* apply SubBytes transformation using the the preloaded Sbox */
708c2ecf20Sopenharmony_ci	.macro		sub_bytes, in
718c2ecf20Sopenharmony_ci	sub		v9.16b, \in\().16b, v15.16b
728c2ecf20Sopenharmony_ci	tbl		\in\().16b, {v16.16b-v19.16b}, \in\().16b
738c2ecf20Sopenharmony_ci	sub		v10.16b, v9.16b, v15.16b
748c2ecf20Sopenharmony_ci	tbx		\in\().16b, {v20.16b-v23.16b}, v9.16b
758c2ecf20Sopenharmony_ci	sub		v11.16b, v10.16b, v15.16b
768c2ecf20Sopenharmony_ci	tbx		\in\().16b, {v24.16b-v27.16b}, v10.16b
778c2ecf20Sopenharmony_ci	tbx		\in\().16b, {v28.16b-v31.16b}, v11.16b
788c2ecf20Sopenharmony_ci	.endm
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	/* apply MixColumns transformation */
818c2ecf20Sopenharmony_ci	.macro		mix_columns, in, enc
828c2ecf20Sopenharmony_ci	.if		\enc == 0
838c2ecf20Sopenharmony_ci	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
848c2ecf20Sopenharmony_ci	mul_by_x2	v8.16b, \in\().16b, v9.16b, v12.16b
858c2ecf20Sopenharmony_ci	eor		\in\().16b, \in\().16b, v8.16b
868c2ecf20Sopenharmony_ci	rev32		v8.8h, v8.8h
878c2ecf20Sopenharmony_ci	eor		\in\().16b, \in\().16b, v8.16b
888c2ecf20Sopenharmony_ci	.endif
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci	mul_by_x	v9.16b, \in\().16b, v8.16b, v12.16b
918c2ecf20Sopenharmony_ci	rev32		v8.8h, \in\().8h
928c2ecf20Sopenharmony_ci	eor		v8.16b, v8.16b, v9.16b
938c2ecf20Sopenharmony_ci	eor		\in\().16b, \in\().16b, v8.16b
948c2ecf20Sopenharmony_ci	tbl		\in\().16b, {\in\().16b}, v14.16b
958c2ecf20Sopenharmony_ci	eor		\in\().16b, \in\().16b, v8.16b
968c2ecf20Sopenharmony_ci	.endm
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci	.macro		do_block, enc, in, rounds, rk, rkp, i
998c2ecf20Sopenharmony_ci	ld1		{v15.4s}, [\rk]
1008c2ecf20Sopenharmony_ci	add		\rkp, \rk, #16
1018c2ecf20Sopenharmony_ci	mov		\i, \rounds
1028c2ecf20Sopenharmony_ci1111:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
1038c2ecf20Sopenharmony_ci	movi		v15.16b, #0x40
1048c2ecf20Sopenharmony_ci	tbl		\in\().16b, {\in\().16b}, v13.16b	/* ShiftRows */
1058c2ecf20Sopenharmony_ci	sub_bytes	\in
1068c2ecf20Sopenharmony_ci	subs		\i, \i, #1
1078c2ecf20Sopenharmony_ci	ld1		{v15.4s}, [\rkp], #16
1088c2ecf20Sopenharmony_ci	beq		2222f
1098c2ecf20Sopenharmony_ci	mix_columns	\in, \enc
1108c2ecf20Sopenharmony_ci	b		1111b
1118c2ecf20Sopenharmony_ci2222:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
1128c2ecf20Sopenharmony_ci	.endm
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci	.macro		encrypt_block, in, rounds, rk, rkp, i
1158c2ecf20Sopenharmony_ci	do_block	1, \in, \rounds, \rk, \rkp, \i
1168c2ecf20Sopenharmony_ci	.endm
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci	.macro		decrypt_block, in, rounds, rk, rkp, i
1198c2ecf20Sopenharmony_ci	do_block	0, \in, \rounds, \rk, \rkp, \i
1208c2ecf20Sopenharmony_ci	.endm
1218c2ecf20Sopenharmony_ci
1228c2ecf20Sopenharmony_ci	/*
1238c2ecf20Sopenharmony_ci	 * Interleaved versions: functionally equivalent to the
1248c2ecf20Sopenharmony_ci	 * ones above, but applied to AES states in parallel.
1258c2ecf20Sopenharmony_ci	 */
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci	.macro		sub_bytes_4x, in0, in1, in2, in3
1288c2ecf20Sopenharmony_ci	sub		v8.16b, \in0\().16b, v15.16b
1298c2ecf20Sopenharmony_ci	tbl		\in0\().16b, {v16.16b-v19.16b}, \in0\().16b
1308c2ecf20Sopenharmony_ci	sub		v9.16b, \in1\().16b, v15.16b
1318c2ecf20Sopenharmony_ci	tbl		\in1\().16b, {v16.16b-v19.16b}, \in1\().16b
1328c2ecf20Sopenharmony_ci	sub		v10.16b, \in2\().16b, v15.16b
1338c2ecf20Sopenharmony_ci	tbl		\in2\().16b, {v16.16b-v19.16b}, \in2\().16b
1348c2ecf20Sopenharmony_ci	sub		v11.16b, \in3\().16b, v15.16b
1358c2ecf20Sopenharmony_ci	tbl		\in3\().16b, {v16.16b-v19.16b}, \in3\().16b
1368c2ecf20Sopenharmony_ci	tbx		\in0\().16b, {v20.16b-v23.16b}, v8.16b
1378c2ecf20Sopenharmony_ci	tbx		\in1\().16b, {v20.16b-v23.16b}, v9.16b
1388c2ecf20Sopenharmony_ci	sub		v8.16b, v8.16b, v15.16b
1398c2ecf20Sopenharmony_ci	tbx		\in2\().16b, {v20.16b-v23.16b}, v10.16b
1408c2ecf20Sopenharmony_ci	sub		v9.16b, v9.16b, v15.16b
1418c2ecf20Sopenharmony_ci	tbx		\in3\().16b, {v20.16b-v23.16b}, v11.16b
1428c2ecf20Sopenharmony_ci	sub		v10.16b, v10.16b, v15.16b
1438c2ecf20Sopenharmony_ci	tbx		\in0\().16b, {v24.16b-v27.16b}, v8.16b
1448c2ecf20Sopenharmony_ci	sub		v11.16b, v11.16b, v15.16b
1458c2ecf20Sopenharmony_ci	tbx		\in1\().16b, {v24.16b-v27.16b}, v9.16b
1468c2ecf20Sopenharmony_ci	sub		v8.16b, v8.16b, v15.16b
1478c2ecf20Sopenharmony_ci	tbx		\in2\().16b, {v24.16b-v27.16b}, v10.16b
1488c2ecf20Sopenharmony_ci	sub		v9.16b, v9.16b, v15.16b
1498c2ecf20Sopenharmony_ci	tbx		\in3\().16b, {v24.16b-v27.16b}, v11.16b
1508c2ecf20Sopenharmony_ci	sub		v10.16b, v10.16b, v15.16b
1518c2ecf20Sopenharmony_ci	tbx		\in0\().16b, {v28.16b-v31.16b}, v8.16b
1528c2ecf20Sopenharmony_ci	sub		v11.16b, v11.16b, v15.16b
1538c2ecf20Sopenharmony_ci	tbx		\in1\().16b, {v28.16b-v31.16b}, v9.16b
1548c2ecf20Sopenharmony_ci	tbx		\in2\().16b, {v28.16b-v31.16b}, v10.16b
1558c2ecf20Sopenharmony_ci	tbx		\in3\().16b, {v28.16b-v31.16b}, v11.16b
1568c2ecf20Sopenharmony_ci	.endm
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci	.macro		mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
1598c2ecf20Sopenharmony_ci	sshr		\tmp0\().16b, \in0\().16b, #7
1608c2ecf20Sopenharmony_ci	shl		\out0\().16b, \in0\().16b, #1
1618c2ecf20Sopenharmony_ci	sshr		\tmp1\().16b, \in1\().16b, #7
1628c2ecf20Sopenharmony_ci	and		\tmp0\().16b, \tmp0\().16b, \const\().16b
1638c2ecf20Sopenharmony_ci	shl		\out1\().16b, \in1\().16b, #1
1648c2ecf20Sopenharmony_ci	and		\tmp1\().16b, \tmp1\().16b, \const\().16b
1658c2ecf20Sopenharmony_ci	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
1668c2ecf20Sopenharmony_ci	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
1678c2ecf20Sopenharmony_ci	.endm
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_ci	.macro		mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
1708c2ecf20Sopenharmony_ci	ushr		\tmp0\().16b, \in0\().16b, #6
1718c2ecf20Sopenharmony_ci	shl		\out0\().16b, \in0\().16b, #2
1728c2ecf20Sopenharmony_ci	ushr		\tmp1\().16b, \in1\().16b, #6
1738c2ecf20Sopenharmony_ci	pmul		\tmp0\().16b, \tmp0\().16b, \const\().16b
1748c2ecf20Sopenharmony_ci	shl		\out1\().16b, \in1\().16b, #2
1758c2ecf20Sopenharmony_ci	pmul		\tmp1\().16b, \tmp1\().16b, \const\().16b
1768c2ecf20Sopenharmony_ci	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
1778c2ecf20Sopenharmony_ci	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
1788c2ecf20Sopenharmony_ci	.endm
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci	.macro		mix_columns_2x, in0, in1, enc
1818c2ecf20Sopenharmony_ci	.if		\enc == 0
1828c2ecf20Sopenharmony_ci	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
1838c2ecf20Sopenharmony_ci	mul_by_x2_2x	v8, v9, \in0, \in1, v10, v11, v12
1848c2ecf20Sopenharmony_ci	eor		\in0\().16b, \in0\().16b, v8.16b
1858c2ecf20Sopenharmony_ci	rev32		v8.8h, v8.8h
1868c2ecf20Sopenharmony_ci	eor		\in1\().16b, \in1\().16b, v9.16b
1878c2ecf20Sopenharmony_ci	rev32		v9.8h, v9.8h
1888c2ecf20Sopenharmony_ci	eor		\in0\().16b, \in0\().16b, v8.16b
1898c2ecf20Sopenharmony_ci	eor		\in1\().16b, \in1\().16b, v9.16b
1908c2ecf20Sopenharmony_ci	.endif
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v12
1938c2ecf20Sopenharmony_ci	rev32		v10.8h, \in0\().8h
1948c2ecf20Sopenharmony_ci	rev32		v11.8h, \in1\().8h
1958c2ecf20Sopenharmony_ci	eor		v10.16b, v10.16b, v8.16b
1968c2ecf20Sopenharmony_ci	eor		v11.16b, v11.16b, v9.16b
1978c2ecf20Sopenharmony_ci	eor		\in0\().16b, \in0\().16b, v10.16b
1988c2ecf20Sopenharmony_ci	eor		\in1\().16b, \in1\().16b, v11.16b
1998c2ecf20Sopenharmony_ci	tbl		\in0\().16b, {\in0\().16b}, v14.16b
2008c2ecf20Sopenharmony_ci	tbl		\in1\().16b, {\in1\().16b}, v14.16b
2018c2ecf20Sopenharmony_ci	eor		\in0\().16b, \in0\().16b, v10.16b
2028c2ecf20Sopenharmony_ci	eor		\in1\().16b, \in1\().16b, v11.16b
2038c2ecf20Sopenharmony_ci	.endm
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_ci	.macro		do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
2068c2ecf20Sopenharmony_ci	ld1		{v15.4s}, [\rk]
2078c2ecf20Sopenharmony_ci	add		\rkp, \rk, #16
2088c2ecf20Sopenharmony_ci	mov		\i, \rounds
2098c2ecf20Sopenharmony_ci1111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
2108c2ecf20Sopenharmony_ci	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
2118c2ecf20Sopenharmony_ci	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
2128c2ecf20Sopenharmony_ci	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
2138c2ecf20Sopenharmony_ci	movi		v15.16b, #0x40
2148c2ecf20Sopenharmony_ci	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
2158c2ecf20Sopenharmony_ci	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
2168c2ecf20Sopenharmony_ci	tbl		\in2\().16b, {\in2\().16b}, v13.16b	/* ShiftRows */
2178c2ecf20Sopenharmony_ci	tbl		\in3\().16b, {\in3\().16b}, v13.16b	/* ShiftRows */
2188c2ecf20Sopenharmony_ci	sub_bytes_4x	\in0, \in1, \in2, \in3
2198c2ecf20Sopenharmony_ci	subs		\i, \i, #1
2208c2ecf20Sopenharmony_ci	ld1		{v15.4s}, [\rkp], #16
2218c2ecf20Sopenharmony_ci	beq		2222f
2228c2ecf20Sopenharmony_ci	mix_columns_2x	\in0, \in1, \enc
2238c2ecf20Sopenharmony_ci	mix_columns_2x	\in2, \in3, \enc
2248c2ecf20Sopenharmony_ci	b		1111b
2258c2ecf20Sopenharmony_ci2222:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
2268c2ecf20Sopenharmony_ci	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
2278c2ecf20Sopenharmony_ci	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
2288c2ecf20Sopenharmony_ci	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
2298c2ecf20Sopenharmony_ci	.endm
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci	.macro		encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
2328c2ecf20Sopenharmony_ci	do_block_4x	1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
2338c2ecf20Sopenharmony_ci	.endm
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_ci	.macro		decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
2368c2ecf20Sopenharmony_ci	do_block_4x	0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
2378c2ecf20Sopenharmony_ci	.endm
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci#include "aes-modes.S"
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_ci	.section	".rodata", "a"
2428c2ecf20Sopenharmony_ci	.align		4
2438c2ecf20Sopenharmony_ci.LForward_ShiftRows:
2448c2ecf20Sopenharmony_ci	.octa		0x0b06010c07020d08030e09040f0a0500
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_ci.LReverse_ShiftRows:
2478c2ecf20Sopenharmony_ci	.octa		0x0306090c0f0205080b0e0104070a0d00
2488c2ecf20Sopenharmony_ci
2498c2ecf20Sopenharmony_ci.Lror32by8:
2508c2ecf20Sopenharmony_ci	.octa		0x0c0f0e0d080b0a090407060500030201
251