1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions
4  * as specified in rfc8998
5  * https://datatracker.ietf.org/doc/html/rfc8998
6  *
7  * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
8  */
9 
10 #include <linux/linkage.h>
11 #include <linux/cfi_types.h>
12 #include <asm/assembler.h>
13 #include "sm4-ce-asm.h"
14 
15 .arch	armv8-a+crypto
16 
17 .irp b, 0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 27, 28, 29, 30, 31
18 	.set .Lv\b\().4s, \b
19 .endr
20 
21 .macro sm4e, vd, vn
22 	.inst 0xcec08400 | (.L\vn << 5) | .L\vd
23 .endm
24 
25 /* Register macros */
26 
27 #define RMAC	v16
28 
29 /* Helper macros. */
30 
31 #define inc_le128(vctr)					\
32 		mov		vctr.d[1], x8;		\
33 		mov		vctr.d[0], x7;		\
34 		adds		x8, x8, #1;		\
35 		rev64		vctr.16b, vctr.16b;	\
36 		adc		x7, x7, xzr;
37 
38 
39 .align 3
40 SYM_FUNC_START(sm4_ce_cbcmac_update)
41 	/* input:
42 	 *   x0: round key array, CTX
43 	 *   x1: mac
44 	 *   x2: src
45 	 *   w3: nblocks
46 	 */
47 	SM4_PREPARE(x0)
48 
49 	ld1		{RMAC.16b}, [x1]
50 
51 .Lcbcmac_loop_4x:
52 	cmp		w3, #4
53 	blt		.Lcbcmac_loop_1x
54 
55 	sub		w3, w3, #4
56 
57 	ld1		{v0.16b-v3.16b}, [x2], #64
58 
59 	SM4_CRYPT_BLK(RMAC)
60 	eor		RMAC.16b, RMAC.16b, v0.16b
61 	SM4_CRYPT_BLK(RMAC)
62 	eor		RMAC.16b, RMAC.16b, v1.16b
63 	SM4_CRYPT_BLK(RMAC)
64 	eor		RMAC.16b, RMAC.16b, v2.16b
65 	SM4_CRYPT_BLK(RMAC)
66 	eor		RMAC.16b, RMAC.16b, v3.16b
67 
68 	cbz		w3, .Lcbcmac_end
69 	b		.Lcbcmac_loop_4x
70 
71 .Lcbcmac_loop_1x:
72 	sub		w3, w3, #1
73 
74 	ld1		{v0.16b}, [x2], #16
75 
76 	SM4_CRYPT_BLK(RMAC)
77 	eor		RMAC.16b, RMAC.16b, v0.16b
78 
79 	cbnz		w3, .Lcbcmac_loop_1x
80 
81 .Lcbcmac_end:
82 	st1		{RMAC.16b}, [x1]
83 	ret
84 SYM_FUNC_END(sm4_ce_cbcmac_update)
85 
86 .align 3
87 SYM_FUNC_START(sm4_ce_ccm_final)
88 	/* input:
89 	 *   x0: round key array, CTX
90 	 *   x1: ctr0 (big endian, 128 bit)
91 	 *   x2: mac
92 	 */
93 	SM4_PREPARE(x0)
94 
95 	ld1		{RMAC.16b}, [x2]
96 	ld1		{v0.16b}, [x1]
97 
98 	SM4_CRYPT_BLK2(RMAC, v0)
99 
100 	/* en-/decrypt the mac with ctr0 */
101 	eor		RMAC.16b, RMAC.16b, v0.16b
102 	st1		{RMAC.16b}, [x2]
103 
104 	ret
105 SYM_FUNC_END(sm4_ce_ccm_final)
106 
107 .align 3
108 SYM_TYPED_FUNC_START(sm4_ce_ccm_enc)
109 	/* input:
110 	 *   x0: round key array, CTX
111 	 *   x1: dst
112 	 *   x2: src
113 	 *   x3: ctr (big endian, 128 bit)
114 	 *   w4: nbytes
115 	 *   x5: mac
116 	 */
117 	SM4_PREPARE(x0)
118 
119 	ldp		x7, x8, [x3]
120 	rev		x7, x7
121 	rev		x8, x8
122 
123 	ld1		{RMAC.16b}, [x5]
124 
125 .Lccm_enc_loop_4x:
126 	cmp		w4, #(4 * 16)
127 	blt		.Lccm_enc_loop_1x
128 
129 	sub		w4, w4, #(4 * 16)
130 
131 	/* construct CTRs */
132 	inc_le128(v8)			/* +0 */
133 	inc_le128(v9)			/* +1 */
134 	inc_le128(v10)			/* +2 */
135 	inc_le128(v11)			/* +3 */
136 
137 	ld1		{v0.16b-v3.16b}, [x2], #64
138 
139 	SM4_CRYPT_BLK2(v8, RMAC)
140 	eor		v8.16b, v8.16b, v0.16b
141 	eor		RMAC.16b, RMAC.16b, v0.16b
142 	SM4_CRYPT_BLK2(v9, RMAC)
143 	eor		v9.16b, v9.16b, v1.16b
144 	eor		RMAC.16b, RMAC.16b, v1.16b
145 	SM4_CRYPT_BLK2(v10, RMAC)
146 	eor		v10.16b, v10.16b, v2.16b
147 	eor		RMAC.16b, RMAC.16b, v2.16b
148 	SM4_CRYPT_BLK2(v11, RMAC)
149 	eor		v11.16b, v11.16b, v3.16b
150 	eor		RMAC.16b, RMAC.16b, v3.16b
151 
152 	st1		{v8.16b-v11.16b}, [x1], #64
153 
154 	cbz		w4, .Lccm_enc_end
155 	b		.Lccm_enc_loop_4x
156 
157 .Lccm_enc_loop_1x:
158 	cmp		w4, #16
159 	blt		.Lccm_enc_tail
160 
161 	sub		w4, w4, #16
162 
163 	/* construct CTRs */
164 	inc_le128(v8)
165 
166 	ld1		{v0.16b}, [x2], #16
167 
168 	SM4_CRYPT_BLK2(v8, RMAC)
169 	eor		v8.16b, v8.16b, v0.16b
170 	eor		RMAC.16b, RMAC.16b, v0.16b
171 
172 	st1		{v8.16b}, [x1], #16
173 
174 	cbz		w4, .Lccm_enc_end
175 	b		.Lccm_enc_loop_1x
176 
177 .Lccm_enc_tail:
178 	/* construct CTRs */
179 	inc_le128(v8)
180 
181 	SM4_CRYPT_BLK2(RMAC, v8)
182 
183 	/* store new MAC */
184 	st1		{RMAC.16b}, [x5]
185 
186 .Lccm_enc_tail_loop:
187 	ldrb		w0, [x2], #1		/* get 1 byte from input */
188 	umov		w9, v8.b[0]		/* get top crypted CTR byte */
189 	umov		w6, RMAC.b[0]		/* get top MAC byte */
190 
191 	eor		w9, w9, w0		/* w9 = CTR ^ input */
192 	eor		w6, w6, w0		/* w6 = MAC ^ input */
193 
194 	strb		w9, [x1], #1		/* store out byte */
195 	strb		w6, [x5], #1		/* store MAC byte */
196 
197 	subs		w4, w4, #1
198 	beq		.Lccm_enc_ret
199 
200 	/* shift out one byte */
201 	ext		RMAC.16b, RMAC.16b, RMAC.16b, #1
202 	ext		v8.16b, v8.16b, v8.16b, #1
203 
204 	b		.Lccm_enc_tail_loop
205 
206 .Lccm_enc_end:
207 	/* store new MAC */
208 	st1		{RMAC.16b}, [x5]
209 
210 	/* store new CTR */
211 	rev		x7, x7
212 	rev		x8, x8
213 	stp		x7, x8, [x3]
214 
215 .Lccm_enc_ret:
216 	ret
217 SYM_FUNC_END(sm4_ce_ccm_enc)
218 
219 .align 3
220 SYM_TYPED_FUNC_START(sm4_ce_ccm_dec)
221 	/* input:
222 	 *   x0: round key array, CTX
223 	 *   x1: dst
224 	 *   x2: src
225 	 *   x3: ctr (big endian, 128 bit)
226 	 *   w4: nbytes
227 	 *   x5: mac
228 	 */
229 	SM4_PREPARE(x0)
230 
231 	ldp		x7, x8, [x3]
232 	rev		x7, x7
233 	rev		x8, x8
234 
235 	ld1		{RMAC.16b}, [x5]
236 
237 .Lccm_dec_loop_4x:
238 	cmp		w4, #(4 * 16)
239 	blt		.Lccm_dec_loop_1x
240 
241 	sub		w4, w4, #(4 * 16)
242 
243 	/* construct CTRs */
244 	inc_le128(v8)			/* +0 */
245 	inc_le128(v9)			/* +1 */
246 	inc_le128(v10)			/* +2 */
247 	inc_le128(v11)			/* +3 */
248 
249 	ld1		{v0.16b-v3.16b}, [x2], #64
250 
251 	SM4_CRYPT_BLK2(v8, RMAC)
252 	eor		v8.16b, v8.16b, v0.16b
253 	eor		RMAC.16b, RMAC.16b, v8.16b
254 	SM4_CRYPT_BLK2(v9, RMAC)
255 	eor		v9.16b, v9.16b, v1.16b
256 	eor		RMAC.16b, RMAC.16b, v9.16b
257 	SM4_CRYPT_BLK2(v10, RMAC)
258 	eor		v10.16b, v10.16b, v2.16b
259 	eor		RMAC.16b, RMAC.16b, v10.16b
260 	SM4_CRYPT_BLK2(v11, RMAC)
261 	eor		v11.16b, v11.16b, v3.16b
262 	eor		RMAC.16b, RMAC.16b, v11.16b
263 
264 	st1		{v8.16b-v11.16b}, [x1], #64
265 
266 	cbz		w4, .Lccm_dec_end
267 	b		.Lccm_dec_loop_4x
268 
269 .Lccm_dec_loop_1x:
270 	cmp		w4, #16
271 	blt		.Lccm_dec_tail
272 
273 	sub		w4, w4, #16
274 
275 	/* construct CTRs */
276 	inc_le128(v8)
277 
278 	ld1		{v0.16b}, [x2], #16
279 
280 	SM4_CRYPT_BLK2(v8, RMAC)
281 	eor		v8.16b, v8.16b, v0.16b
282 	eor		RMAC.16b, RMAC.16b, v8.16b
283 
284 	st1		{v8.16b}, [x1], #16
285 
286 	cbz		w4, .Lccm_dec_end
287 	b		.Lccm_dec_loop_1x
288 
289 .Lccm_dec_tail:
290 	/* construct CTRs */
291 	inc_le128(v8)
292 
293 	SM4_CRYPT_BLK2(RMAC, v8)
294 
295 	/* store new MAC */
296 	st1		{RMAC.16b}, [x5]
297 
298 .Lccm_dec_tail_loop:
299 	ldrb		w0, [x2], #1		/* get 1 byte from input */
300 	umov		w9, v8.b[0]		/* get top crypted CTR byte */
301 	umov		w6, RMAC.b[0]		/* get top MAC byte */
302 
303 	eor		w9, w9, w0		/* w9 = CTR ^ input */
304 	eor		w6, w6, w9		/* w6 = MAC ^ output */
305 
306 	strb		w9, [x1], #1		/* store out byte */
307 	strb		w6, [x5], #1		/* store MAC byte */
308 
309 	subs		w4, w4, #1
310 	beq		.Lccm_dec_ret
311 
312 	/* shift out one byte */
313 	ext		RMAC.16b, RMAC.16b, RMAC.16b, #1
314 	ext		v8.16b, v8.16b, v8.16b, #1
315 
316 	b		.Lccm_dec_tail_loop
317 
318 .Lccm_dec_end:
319 	/* store new MAC */
320 	st1		{RMAC.16b}, [x5]
321 
322 	/* store new CTR */
323 	rev		x7, x7
324 	rev		x8, x8
325 	stp		x7, x8, [x3]
326 
327 .Lccm_dec_ret:
328 	ret
329 SYM_FUNC_END(sm4_ce_ccm_dec)
330