1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
4  *
5  * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
6  */
7 
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
10 
11 #define AES_FUNC_START(func)		SYM_FUNC_START(neon_ ## func)
12 #define AES_FUNC_END(func)		SYM_FUNC_END(neon_ ## func)
13 
14 	xtsmask		.req	v7
15 	cbciv		.req	v7
16 	vctr		.req	v4
17 
18 	.macro		xts_reload_mask, tmp
19 	xts_load_mask	\tmp
20 	.endm
21 
22 	/* special case for the neon-bs driver calling into this one for CTS */
23 	.macro		xts_cts_skip_tw, reg, lbl
24 	tbnz		\reg, #1, \lbl
25 	.endm
26 
27 	/* multiply by polynomial 'x' in GF(2^8) */
28 	.macro		mul_by_x, out, in, temp, const
29 	sshr		\temp, \in, #7
30 	shl		\out, \in, #1
31 	and		\temp, \temp, \const
32 	eor		\out, \out, \temp
33 	.endm
34 
35 	/* multiply by polynomial 'x^2' in GF(2^8) */
36 	.macro		mul_by_x2, out, in, temp, const
37 	ushr		\temp, \in, #6
38 	shl		\out, \in, #2
39 	pmul		\temp, \temp, \const
40 	eor		\out, \out, \temp
41 	.endm
42 
43 	/* preload the entire Sbox */
44 	.macro		prepare, sbox, shiftrows, temp
45 	movi		v12.16b, #0x1b
46 	ldr_l		q13, \shiftrows, \temp
47 	ldr_l		q14, .Lror32by8, \temp
48 	adr_l		\temp, \sbox
49 	ld1		{v16.16b-v19.16b}, [\temp], #64
50 	ld1		{v20.16b-v23.16b}, [\temp], #64
51 	ld1		{v24.16b-v27.16b}, [\temp], #64
52 	ld1		{v28.16b-v31.16b}, [\temp]
53 	.endm
54 
55 	/* do preload for encryption */
56 	.macro		enc_prepare, ignore0, ignore1, temp
57 	prepare		crypto_aes_sbox, .LForward_ShiftRows, \temp
58 	.endm
59 
60 	.macro		enc_switch_key, ignore0, ignore1, temp
61 	/* do nothing */
62 	.endm
63 
64 	/* do preload for decryption */
65 	.macro		dec_prepare, ignore0, ignore1, temp
66 	prepare		crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
67 	.endm
68 
69 	/* apply SubBytes transformation using the preloaded Sbox */
70 	.macro		sub_bytes, in
71 	sub		v9.16b, \in\().16b, v15.16b
72 	tbl		\in\().16b, {v16.16b-v19.16b}, \in\().16b
73 	sub		v10.16b, v9.16b, v15.16b
74 	tbx		\in\().16b, {v20.16b-v23.16b}, v9.16b
75 	sub		v11.16b, v10.16b, v15.16b
76 	tbx		\in\().16b, {v24.16b-v27.16b}, v10.16b
77 	tbx		\in\().16b, {v28.16b-v31.16b}, v11.16b
78 	.endm
79 
80 	/* apply MixColumns transformation */
81 	.macro		mix_columns, in, enc
82 	.if		\enc == 0
83 	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
84 	mul_by_x2	v8.16b, \in\().16b, v9.16b, v12.16b
85 	eor		\in\().16b, \in\().16b, v8.16b
86 	rev32		v8.8h, v8.8h
87 	eor		\in\().16b, \in\().16b, v8.16b
88 	.endif
89 
90 	mul_by_x	v9.16b, \in\().16b, v8.16b, v12.16b
91 	rev32		v8.8h, \in\().8h
92 	eor		v8.16b, v8.16b, v9.16b
93 	eor		\in\().16b, \in\().16b, v8.16b
94 	tbl		\in\().16b, {\in\().16b}, v14.16b
95 	eor		\in\().16b, \in\().16b, v8.16b
96 	.endm
97 
98 	.macro		do_block, enc, in, rounds, rk, rkp, i
99 	ld1		{v15.4s}, [\rk]
100 	add		\rkp, \rk, #16
101 	mov		\i, \rounds
102 1111:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
103 	movi		v15.16b, #0x40
104 	tbl		\in\().16b, {\in\().16b}, v13.16b	/* ShiftRows */
105 	sub_bytes	\in
106 	subs		\i, \i, #1
107 	ld1		{v15.4s}, [\rkp], #16
108 	beq		2222f
109 	mix_columns	\in, \enc
110 	b		1111b
111 2222:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
112 	.endm
113 
114 	.macro		encrypt_block, in, rounds, rk, rkp, i
115 	do_block	1, \in, \rounds, \rk, \rkp, \i
116 	.endm
117 
118 	.macro		decrypt_block, in, rounds, rk, rkp, i
119 	do_block	0, \in, \rounds, \rk, \rkp, \i
120 	.endm
121 
122 	/*
123 	 * Interleaved versions: functionally equivalent to the
124 	 * ones above, but applied to AES states in parallel.
125 	 */
126 
127 	.macro		sub_bytes_4x, in0, in1, in2, in3
128 	sub		v8.16b, \in0\().16b, v15.16b
129 	tbl		\in0\().16b, {v16.16b-v19.16b}, \in0\().16b
130 	sub		v9.16b, \in1\().16b, v15.16b
131 	tbl		\in1\().16b, {v16.16b-v19.16b}, \in1\().16b
132 	sub		v10.16b, \in2\().16b, v15.16b
133 	tbl		\in2\().16b, {v16.16b-v19.16b}, \in2\().16b
134 	sub		v11.16b, \in3\().16b, v15.16b
135 	tbl		\in3\().16b, {v16.16b-v19.16b}, \in3\().16b
136 	tbx		\in0\().16b, {v20.16b-v23.16b}, v8.16b
137 	tbx		\in1\().16b, {v20.16b-v23.16b}, v9.16b
138 	sub		v8.16b, v8.16b, v15.16b
139 	tbx		\in2\().16b, {v20.16b-v23.16b}, v10.16b
140 	sub		v9.16b, v9.16b, v15.16b
141 	tbx		\in3\().16b, {v20.16b-v23.16b}, v11.16b
142 	sub		v10.16b, v10.16b, v15.16b
143 	tbx		\in0\().16b, {v24.16b-v27.16b}, v8.16b
144 	sub		v11.16b, v11.16b, v15.16b
145 	tbx		\in1\().16b, {v24.16b-v27.16b}, v9.16b
146 	sub		v8.16b, v8.16b, v15.16b
147 	tbx		\in2\().16b, {v24.16b-v27.16b}, v10.16b
148 	sub		v9.16b, v9.16b, v15.16b
149 	tbx		\in3\().16b, {v24.16b-v27.16b}, v11.16b
150 	sub		v10.16b, v10.16b, v15.16b
151 	tbx		\in0\().16b, {v28.16b-v31.16b}, v8.16b
152 	sub		v11.16b, v11.16b, v15.16b
153 	tbx		\in1\().16b, {v28.16b-v31.16b}, v9.16b
154 	tbx		\in2\().16b, {v28.16b-v31.16b}, v10.16b
155 	tbx		\in3\().16b, {v28.16b-v31.16b}, v11.16b
156 	.endm
157 
158 	.macro		mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
159 	sshr		\tmp0\().16b, \in0\().16b, #7
160 	shl		\out0\().16b, \in0\().16b, #1
161 	sshr		\tmp1\().16b, \in1\().16b, #7
162 	and		\tmp0\().16b, \tmp0\().16b, \const\().16b
163 	shl		\out1\().16b, \in1\().16b, #1
164 	and		\tmp1\().16b, \tmp1\().16b, \const\().16b
165 	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
166 	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
167 	.endm
168 
169 	.macro		mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
170 	ushr		\tmp0\().16b, \in0\().16b, #6
171 	shl		\out0\().16b, \in0\().16b, #2
172 	ushr		\tmp1\().16b, \in1\().16b, #6
173 	pmul		\tmp0\().16b, \tmp0\().16b, \const\().16b
174 	shl		\out1\().16b, \in1\().16b, #2
175 	pmul		\tmp1\().16b, \tmp1\().16b, \const\().16b
176 	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
177 	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
178 	.endm
179 
180 	.macro		mix_columns_2x, in0, in1, enc
181 	.if		\enc == 0
182 	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
183 	mul_by_x2_2x	v8, v9, \in0, \in1, v10, v11, v12
184 	eor		\in0\().16b, \in0\().16b, v8.16b
185 	rev32		v8.8h, v8.8h
186 	eor		\in1\().16b, \in1\().16b, v9.16b
187 	rev32		v9.8h, v9.8h
188 	eor		\in0\().16b, \in0\().16b, v8.16b
189 	eor		\in1\().16b, \in1\().16b, v9.16b
190 	.endif
191 
192 	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v12
193 	rev32		v10.8h, \in0\().8h
194 	rev32		v11.8h, \in1\().8h
195 	eor		v10.16b, v10.16b, v8.16b
196 	eor		v11.16b, v11.16b, v9.16b
197 	eor		\in0\().16b, \in0\().16b, v10.16b
198 	eor		\in1\().16b, \in1\().16b, v11.16b
199 	tbl		\in0\().16b, {\in0\().16b}, v14.16b
200 	tbl		\in1\().16b, {\in1\().16b}, v14.16b
201 	eor		\in0\().16b, \in0\().16b, v10.16b
202 	eor		\in1\().16b, \in1\().16b, v11.16b
203 	.endm
204 
205 	.macro		do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
206 	ld1		{v15.4s}, [\rk]
207 	add		\rkp, \rk, #16
208 	mov		\i, \rounds
209 1111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
210 	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
211 	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
212 	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
213 	movi		v15.16b, #0x40
214 	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
215 	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
216 	tbl		\in2\().16b, {\in2\().16b}, v13.16b	/* ShiftRows */
217 	tbl		\in3\().16b, {\in3\().16b}, v13.16b	/* ShiftRows */
218 	sub_bytes_4x	\in0, \in1, \in2, \in3
219 	subs		\i, \i, #1
220 	ld1		{v15.4s}, [\rkp], #16
221 	beq		2222f
222 	mix_columns_2x	\in0, \in1, \enc
223 	mix_columns_2x	\in2, \in3, \enc
224 	b		1111b
225 2222:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
226 	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
227 	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
228 	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
229 	.endm
230 
231 	.macro		encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
232 	do_block_4x	1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
233 	.endm
234 
235 	.macro		decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
236 	do_block_4x	0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
237 	.endm
238 
239 #include "aes-modes.S"
240 
241 	.section	".rodata", "a"
242 	.align		4
243 .LForward_ShiftRows:
244 	.octa		0x0b06010c07020d08030e09040f0a0500
245 
246 .LReverse_ShiftRows:
247 	.octa		0x0306090c0f0205080b0e0104070a0d00
248 
249 .Lror32by8:
250 	.octa		0x0c0f0e0d080b0a090407060500030201
251