1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Scalar AES core transform
4  *
5  * Copyright (C) 2017 Linaro Ltd.
6  * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
7  */
8 
9 #include <linux/linkage.h>
10 #include <asm/assembler.h>
11 #include <asm/cache.h>
12 
13 	.text
14 	.align		5
15 
16 	rk		.req	r0
17 	rounds		.req	r1
18 	in		.req	r2
19 	out		.req	r3
20 	ttab		.req	ip
21 
22 	t0		.req	lr
23 	t1		.req	r2
24 	t2		.req	r3
25 
26 	.macro		__select, out, in, idx
27 	.if		__LINUX_ARM_ARCH__ < 7
28 	and		\out, \in, #0xff << (8 * \idx)
29 	.else
30 	ubfx		\out, \in, #(8 * \idx), #8
31 	.endif
32 	.endm
33 
34 	.macro		__load, out, in, idx, sz, op
35 	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
36 	ldr\op		\out, [ttab, \in, lsr #(8 * \idx) - \sz]
37 	.else
38 	ldr\op		\out, [ttab, \in, lsl #\sz]
39 	.endif
40 	.endm
41 
42 	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
43 	__select	\out0, \in0, 0
44 	__select	t0, \in1, 1
45 	__load		\out0, \out0, 0, \sz, \op
46 	__load		t0, t0, 1, \sz, \op
47 
48 	.if		\enc
49 	__select	\out1, \in1, 0
50 	__select	t1, \in2, 1
51 	.else
52 	__select	\out1, \in3, 0
53 	__select	t1, \in0, 1
54 	.endif
55 	__load		\out1, \out1, 0, \sz, \op
56 	__select	t2, \in2, 2
57 	__load		t1, t1, 1, \sz, \op
58 	__load		t2, t2, 2, \sz, \op
59 
60 	eor		\out0, \out0, t0, ror #24
61 
62 	__select	t0, \in3, 3
63 	.if		\enc
64 	__select	\t3, \in3, 2
65 	__select	\t4, \in0, 3
66 	.else
67 	__select	\t3, \in1, 2
68 	__select	\t4, \in2, 3
69 	.endif
70 	__load		\t3, \t3, 2, \sz, \op
71 	__load		t0, t0, 3, \sz, \op
72 	__load		\t4, \t4, 3, \sz, \op
73 
74 	.ifnb		\oldcpsr
75 	/*
76 	 * This is the final round and we're done with all data-dependent table
77 	 * lookups, so we can safely re-enable interrupts.
78 	 */
79 	restore_irqs	\oldcpsr
80 	.endif
81 
82 	eor		\out1, \out1, t1, ror #24
83 	eor		\out0, \out0, t2, ror #16
84 	ldm		rk!, {t1, t2}
85 	eor		\out1, \out1, \t3, ror #16
86 	eor		\out0, \out0, t0, ror #8
87 	eor		\out1, \out1, \t4, ror #8
88 	eor		\out0, \out0, t1
89 	eor		\out1, \out1, t2
90 	.endm
91 
92 	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
93 	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
94 	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
95 	.endm
96 
97 	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
98 	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
99 	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
100 	.endm
101 
102 	.macro		do_crypt, round, ttab, ltab, bsz
103 	push		{r3-r11, lr}
104 
105 	// Load keys first, to reduce latency in case they're not cached yet.
106 	ldm		rk!, {r8-r11}
107 
108 	ldr		r4, [in]
109 	ldr		r5, [in, #4]
110 	ldr		r6, [in, #8]
111 	ldr		r7, [in, #12]
112 
113 #ifdef CONFIG_CPU_BIG_ENDIAN
114 	rev_l		r4, t0
115 	rev_l		r5, t0
116 	rev_l		r6, t0
117 	rev_l		r7, t0
118 #endif
119 
120 	eor		r4, r4, r8
121 	eor		r5, r5, r9
122 	eor		r6, r6, r10
123 	eor		r7, r7, r11
124 
125 	mov_l		ttab, \ttab
126 	/*
127 	 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
128 	 * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
129 	 * intended to make cache-timing attacks more difficult.  They may not
130 	 * be fully prevented, however; see the paper
131 	 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
132 	 * ("Cache-timing attacks on AES") for a discussion of the many
133 	 * difficulties involved in writing truly constant-time AES software.
134 	 */
135 	 save_and_disable_irqs	t0
136 	.set		i, 0
137 	.rept		1024 / 128
138 	ldr		r8, [ttab, #i + 0]
139 	ldr		r9, [ttab, #i + 32]
140 	ldr		r10, [ttab, #i + 64]
141 	ldr		r11, [ttab, #i + 96]
142 	.set		i, i + 128
143 	.endr
144 	push		{t0}		// oldcpsr
145 
146 	tst		rounds, #2
147 	bne		1f
148 
149 0:	\round		r8, r9, r10, r11, r4, r5, r6, r7
150 	\round		r4, r5, r6, r7, r8, r9, r10, r11
151 
152 1:	subs		rounds, rounds, #4
153 	\round		r8, r9, r10, r11, r4, r5, r6, r7
154 	bls		2f
155 	\round		r4, r5, r6, r7, r8, r9, r10, r11
156 	b		0b
157 
158 2:	.ifb		\ltab
159 	add		ttab, ttab, #1
160 	.else
161 	mov_l		ttab, \ltab
162 	// Prefetch inverse S-box for final round; see explanation above
163 	.set		i, 0
164 	.rept		256 / 64
165 	ldr		t0, [ttab, #i + 0]
166 	ldr		t1, [ttab, #i + 32]
167 	.set		i, i + 64
168 	.endr
169 	.endif
170 
171 	pop		{rounds}	// oldcpsr
172 	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
173 
174 #ifdef CONFIG_CPU_BIG_ENDIAN
175 	rev_l		r4, t0
176 	rev_l		r5, t0
177 	rev_l		r6, t0
178 	rev_l		r7, t0
179 #endif
180 
181 	ldr		out, [sp]
182 
183 	str		r4, [out]
184 	str		r5, [out, #4]
185 	str		r6, [out, #8]
186 	str		r7, [out, #12]
187 
188 	pop		{r3-r11, pc}
189 
190 	.align		3
191 	.ltorg
192 	.endm
193 
194 ENTRY(__aes_arm_encrypt)
195 	do_crypt	fround, crypto_ft_tab,, 2
196 ENDPROC(__aes_arm_encrypt)
197 
198 	.align		5
199 ENTRY(__aes_arm_decrypt)
200 	do_crypt	iround, crypto_it_tab, crypto_aes_inv_sbox, 0
201 ENDPROC(__aes_arm_decrypt)
202