162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Scalar AES core transform
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2017 Linaro Ltd.
662306a36Sopenharmony_ci * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#include <linux/linkage.h>
1062306a36Sopenharmony_ci#include <asm/assembler.h>
1162306a36Sopenharmony_ci#include <asm/cache.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci	.text
1462306a36Sopenharmony_ci	.align		5
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci	rk		.req	r0
1762306a36Sopenharmony_ci	rounds		.req	r1
1862306a36Sopenharmony_ci	in		.req	r2
1962306a36Sopenharmony_ci	out		.req	r3
2062306a36Sopenharmony_ci	ttab		.req	ip
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci	t0		.req	lr
2362306a36Sopenharmony_ci	t1		.req	r2
2462306a36Sopenharmony_ci	t2		.req	r3
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci	.macro		__select, out, in, idx
2762306a36Sopenharmony_ci	.if		__LINUX_ARM_ARCH__ < 7
2862306a36Sopenharmony_ci	and		\out, \in, #0xff << (8 * \idx)
2962306a36Sopenharmony_ci	.else
3062306a36Sopenharmony_ci	ubfx		\out, \in, #(8 * \idx), #8
3162306a36Sopenharmony_ci	.endif
3262306a36Sopenharmony_ci	.endm
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci	.macro		__load, out, in, idx, sz, op
3562306a36Sopenharmony_ci	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
3662306a36Sopenharmony_ci	ldr\op		\out, [ttab, \in, lsr #(8 * \idx) - \sz]
3762306a36Sopenharmony_ci	.else
3862306a36Sopenharmony_ci	ldr\op		\out, [ttab, \in, lsl #\sz]
3962306a36Sopenharmony_ci	.endif
4062306a36Sopenharmony_ci	.endm
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
4362306a36Sopenharmony_ci	__select	\out0, \in0, 0
4462306a36Sopenharmony_ci	__select	t0, \in1, 1
4562306a36Sopenharmony_ci	__load		\out0, \out0, 0, \sz, \op
4662306a36Sopenharmony_ci	__load		t0, t0, 1, \sz, \op
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci	.if		\enc
4962306a36Sopenharmony_ci	__select	\out1, \in1, 0
5062306a36Sopenharmony_ci	__select	t1, \in2, 1
5162306a36Sopenharmony_ci	.else
5262306a36Sopenharmony_ci	__select	\out1, \in3, 0
5362306a36Sopenharmony_ci	__select	t1, \in0, 1
5462306a36Sopenharmony_ci	.endif
5562306a36Sopenharmony_ci	__load		\out1, \out1, 0, \sz, \op
5662306a36Sopenharmony_ci	__select	t2, \in2, 2
5762306a36Sopenharmony_ci	__load		t1, t1, 1, \sz, \op
5862306a36Sopenharmony_ci	__load		t2, t2, 2, \sz, \op
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	eor		\out0, \out0, t0, ror #24
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci	__select	t0, \in3, 3
6362306a36Sopenharmony_ci	.if		\enc
6462306a36Sopenharmony_ci	__select	\t3, \in3, 2
6562306a36Sopenharmony_ci	__select	\t4, \in0, 3
6662306a36Sopenharmony_ci	.else
6762306a36Sopenharmony_ci	__select	\t3, \in1, 2
6862306a36Sopenharmony_ci	__select	\t4, \in2, 3
6962306a36Sopenharmony_ci	.endif
7062306a36Sopenharmony_ci	__load		\t3, \t3, 2, \sz, \op
7162306a36Sopenharmony_ci	__load		t0, t0, 3, \sz, \op
7262306a36Sopenharmony_ci	__load		\t4, \t4, 3, \sz, \op
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	.ifnb		\oldcpsr
7562306a36Sopenharmony_ci	/*
7662306a36Sopenharmony_ci	 * This is the final round and we're done with all data-dependent table
7762306a36Sopenharmony_ci	 * lookups, so we can safely re-enable interrupts.
7862306a36Sopenharmony_ci	 */
7962306a36Sopenharmony_ci	restore_irqs	\oldcpsr
8062306a36Sopenharmony_ci	.endif
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	eor		\out1, \out1, t1, ror #24
8362306a36Sopenharmony_ci	eor		\out0, \out0, t2, ror #16
8462306a36Sopenharmony_ci	ldm		rk!, {t1, t2}
8562306a36Sopenharmony_ci	eor		\out1, \out1, \t3, ror #16
8662306a36Sopenharmony_ci	eor		\out0, \out0, t0, ror #8
8762306a36Sopenharmony_ci	eor		\out1, \out1, \t4, ror #8
8862306a36Sopenharmony_ci	eor		\out0, \out0, t1
8962306a36Sopenharmony_ci	eor		\out1, \out1, t2
9062306a36Sopenharmony_ci	.endm
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
9362306a36Sopenharmony_ci	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
9462306a36Sopenharmony_ci	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
9562306a36Sopenharmony_ci	.endm
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
9862306a36Sopenharmony_ci	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
9962306a36Sopenharmony_ci	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
10062306a36Sopenharmony_ci	.endm
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	.macro		do_crypt, round, ttab, ltab, bsz
10362306a36Sopenharmony_ci	push		{r3-r11, lr}
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci	// Load keys first, to reduce latency in case they're not cached yet.
10662306a36Sopenharmony_ci	ldm		rk!, {r8-r11}
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci	ldr		r4, [in]
10962306a36Sopenharmony_ci	ldr		r5, [in, #4]
11062306a36Sopenharmony_ci	ldr		r6, [in, #8]
11162306a36Sopenharmony_ci	ldr		r7, [in, #12]
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci#ifdef CONFIG_CPU_BIG_ENDIAN
11462306a36Sopenharmony_ci	rev_l		r4, t0
11562306a36Sopenharmony_ci	rev_l		r5, t0
11662306a36Sopenharmony_ci	rev_l		r6, t0
11762306a36Sopenharmony_ci	rev_l		r7, t0
11862306a36Sopenharmony_ci#endif
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	eor		r4, r4, r8
12162306a36Sopenharmony_ci	eor		r5, r5, r9
12262306a36Sopenharmony_ci	eor		r6, r6, r10
12362306a36Sopenharmony_ci	eor		r7, r7, r11
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	mov_l		ttab, \ttab
12662306a36Sopenharmony_ci	/*
12762306a36Sopenharmony_ci	 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
12862306a36Sopenharmony_ci	 * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
12962306a36Sopenharmony_ci	 * intended to make cache-timing attacks more difficult.  They may not
13062306a36Sopenharmony_ci	 * be fully prevented, however; see the paper
13162306a36Sopenharmony_ci	 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
13262306a36Sopenharmony_ci	 * ("Cache-timing attacks on AES") for a discussion of the many
13362306a36Sopenharmony_ci	 * difficulties involved in writing truly constant-time AES software.
13462306a36Sopenharmony_ci	 */
13562306a36Sopenharmony_ci	 save_and_disable_irqs	t0
13662306a36Sopenharmony_ci	.set		i, 0
13762306a36Sopenharmony_ci	.rept		1024 / 128
13862306a36Sopenharmony_ci	ldr		r8, [ttab, #i + 0]
13962306a36Sopenharmony_ci	ldr		r9, [ttab, #i + 32]
14062306a36Sopenharmony_ci	ldr		r10, [ttab, #i + 64]
14162306a36Sopenharmony_ci	ldr		r11, [ttab, #i + 96]
14262306a36Sopenharmony_ci	.set		i, i + 128
14362306a36Sopenharmony_ci	.endr
14462306a36Sopenharmony_ci	push		{t0}		// oldcpsr
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci	tst		rounds, #2
14762306a36Sopenharmony_ci	bne		1f
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci0:	\round		r8, r9, r10, r11, r4, r5, r6, r7
15062306a36Sopenharmony_ci	\round		r4, r5, r6, r7, r8, r9, r10, r11
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci1:	subs		rounds, rounds, #4
15362306a36Sopenharmony_ci	\round		r8, r9, r10, r11, r4, r5, r6, r7
15462306a36Sopenharmony_ci	bls		2f
15562306a36Sopenharmony_ci	\round		r4, r5, r6, r7, r8, r9, r10, r11
15662306a36Sopenharmony_ci	b		0b
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci2:	.ifb		\ltab
15962306a36Sopenharmony_ci	add		ttab, ttab, #1
16062306a36Sopenharmony_ci	.else
16162306a36Sopenharmony_ci	mov_l		ttab, \ltab
16262306a36Sopenharmony_ci	// Prefetch inverse S-box for final round; see explanation above
16362306a36Sopenharmony_ci	.set		i, 0
16462306a36Sopenharmony_ci	.rept		256 / 64
16562306a36Sopenharmony_ci	ldr		t0, [ttab, #i + 0]
16662306a36Sopenharmony_ci	ldr		t1, [ttab, #i + 32]
16762306a36Sopenharmony_ci	.set		i, i + 64
16862306a36Sopenharmony_ci	.endr
16962306a36Sopenharmony_ci	.endif
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	pop		{rounds}	// oldcpsr
17262306a36Sopenharmony_ci	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci#ifdef CONFIG_CPU_BIG_ENDIAN
17562306a36Sopenharmony_ci	rev_l		r4, t0
17662306a36Sopenharmony_ci	rev_l		r5, t0
17762306a36Sopenharmony_ci	rev_l		r6, t0
17862306a36Sopenharmony_ci	rev_l		r7, t0
17962306a36Sopenharmony_ci#endif
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	ldr		out, [sp]
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	str		r4, [out]
18462306a36Sopenharmony_ci	str		r5, [out, #4]
18562306a36Sopenharmony_ci	str		r6, [out, #8]
18662306a36Sopenharmony_ci	str		r7, [out, #12]
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	pop		{r3-r11, pc}
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci	.align		3
19162306a36Sopenharmony_ci	.ltorg
19262306a36Sopenharmony_ci	.endm
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ciENTRY(__aes_arm_encrypt)
19562306a36Sopenharmony_ci	do_crypt	fround, crypto_ft_tab,, 2
19662306a36Sopenharmony_ciENDPROC(__aes_arm_encrypt)
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	.align		5
19962306a36Sopenharmony_ciENTRY(__aes_arm_decrypt)
20062306a36Sopenharmony_ci	do_crypt	iround, crypto_it_tab, crypto_aes_inv_sbox, 0
20162306a36Sopenharmony_ciENDPROC(__aes_arm_decrypt)
202