162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions
462306a36Sopenharmony_ci * as specified in rfc8998
562306a36Sopenharmony_ci * https://datatracker.ietf.org/doc/html/rfc8998
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/linkage.h>
1162306a36Sopenharmony_ci#include <linux/cfi_types.h>
1262306a36Sopenharmony_ci#include <asm/assembler.h>
1362306a36Sopenharmony_ci#include "sm4-ce-asm.h"
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci.arch	armv8-a+crypto
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci.irp b, 0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 27, 28, 29, 30, 31
1862306a36Sopenharmony_ci	.set .Lv\b\().4s, \b
1962306a36Sopenharmony_ci.endr
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci.macro sm4e, vd, vn
2262306a36Sopenharmony_ci	.inst 0xcec08400 | (.L\vn << 5) | .L\vd
2362306a36Sopenharmony_ci.endm
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci/* Register macros */
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci#define RMAC	v16
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci/* Helper macros. */
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci#define inc_le128(vctr)					\
3262306a36Sopenharmony_ci		mov		vctr.d[1], x8;		\
3362306a36Sopenharmony_ci		mov		vctr.d[0], x7;		\
3462306a36Sopenharmony_ci		adds		x8, x8, #1;		\
3562306a36Sopenharmony_ci		rev64		vctr.16b, vctr.16b;	\
3662306a36Sopenharmony_ci		adc		x7, x7, xzr;
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci.align 3
4062306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_cbcmac_update)
4162306a36Sopenharmony_ci	/* input:
4262306a36Sopenharmony_ci	 *   x0: round key array, CTX
4362306a36Sopenharmony_ci	 *   x1: mac
4462306a36Sopenharmony_ci	 *   x2: src
4562306a36Sopenharmony_ci	 *   w3: nblocks
4662306a36Sopenharmony_ci	 */
4762306a36Sopenharmony_ci	SM4_PREPARE(x0)
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci	ld1		{RMAC.16b}, [x1]
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci.Lcbcmac_loop_4x:
5262306a36Sopenharmony_ci	cmp		w3, #4
5362306a36Sopenharmony_ci	blt		.Lcbcmac_loop_1x
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci	sub		w3, w3, #4
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x2], #64
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci	SM4_CRYPT_BLK(RMAC)
6062306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v0.16b
6162306a36Sopenharmony_ci	SM4_CRYPT_BLK(RMAC)
6262306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v1.16b
6362306a36Sopenharmony_ci	SM4_CRYPT_BLK(RMAC)
6462306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v2.16b
6562306a36Sopenharmony_ci	SM4_CRYPT_BLK(RMAC)
6662306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v3.16b
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci	cbz		w3, .Lcbcmac_end
6962306a36Sopenharmony_ci	b		.Lcbcmac_loop_4x
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci.Lcbcmac_loop_1x:
7262306a36Sopenharmony_ci	sub		w3, w3, #1
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	ld1		{v0.16b}, [x2], #16
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci	SM4_CRYPT_BLK(RMAC)
7762306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v0.16b
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	cbnz		w3, .Lcbcmac_loop_1x
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci.Lcbcmac_end:
8262306a36Sopenharmony_ci	st1		{RMAC.16b}, [x1]
8362306a36Sopenharmony_ci	ret
8462306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_cbcmac_update)
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci.align 3
8762306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_ccm_final)
8862306a36Sopenharmony_ci	/* input:
8962306a36Sopenharmony_ci	 *   x0: round key array, CTX
9062306a36Sopenharmony_ci	 *   x1: ctr0 (big endian, 128 bit)
9162306a36Sopenharmony_ci	 *   x2: mac
9262306a36Sopenharmony_ci	 */
9362306a36Sopenharmony_ci	SM4_PREPARE(x0)
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	ld1		{RMAC.16b}, [x2]
9662306a36Sopenharmony_ci	ld1		{v0.16b}, [x1]
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	SM4_CRYPT_BLK2(RMAC, v0)
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	/* en-/decrypt the mac with ctr0 */
10162306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v0.16b
10262306a36Sopenharmony_ci	st1		{RMAC.16b}, [x2]
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci	ret
10562306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_ccm_final)
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci.align 3
10862306a36Sopenharmony_ciSYM_TYPED_FUNC_START(sm4_ce_ccm_enc)
10962306a36Sopenharmony_ci	/* input:
11062306a36Sopenharmony_ci	 *   x0: round key array, CTX
11162306a36Sopenharmony_ci	 *   x1: dst
11262306a36Sopenharmony_ci	 *   x2: src
11362306a36Sopenharmony_ci	 *   x3: ctr (big endian, 128 bit)
11462306a36Sopenharmony_ci	 *   w4: nbytes
11562306a36Sopenharmony_ci	 *   x5: mac
11662306a36Sopenharmony_ci	 */
11762306a36Sopenharmony_ci	SM4_PREPARE(x0)
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci	ldp		x7, x8, [x3]
12062306a36Sopenharmony_ci	rev		x7, x7
12162306a36Sopenharmony_ci	rev		x8, x8
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	ld1		{RMAC.16b}, [x5]
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci.Lccm_enc_loop_4x:
12662306a36Sopenharmony_ci	cmp		w4, #(4 * 16)
12762306a36Sopenharmony_ci	blt		.Lccm_enc_loop_1x
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	sub		w4, w4, #(4 * 16)
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	/* construct CTRs */
13262306a36Sopenharmony_ci	inc_le128(v8)			/* +0 */
13362306a36Sopenharmony_ci	inc_le128(v9)			/* +1 */
13462306a36Sopenharmony_ci	inc_le128(v10)			/* +2 */
13562306a36Sopenharmony_ci	inc_le128(v11)			/* +3 */
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x2], #64
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	SM4_CRYPT_BLK2(v8, RMAC)
14062306a36Sopenharmony_ci	eor		v8.16b, v8.16b, v0.16b
14162306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v0.16b
14262306a36Sopenharmony_ci	SM4_CRYPT_BLK2(v9, RMAC)
14362306a36Sopenharmony_ci	eor		v9.16b, v9.16b, v1.16b
14462306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v1.16b
14562306a36Sopenharmony_ci	SM4_CRYPT_BLK2(v10, RMAC)
14662306a36Sopenharmony_ci	eor		v10.16b, v10.16b, v2.16b
14762306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v2.16b
14862306a36Sopenharmony_ci	SM4_CRYPT_BLK2(v11, RMAC)
14962306a36Sopenharmony_ci	eor		v11.16b, v11.16b, v3.16b
15062306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v3.16b
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci	st1		{v8.16b-v11.16b}, [x1], #64
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	cbz		w4, .Lccm_enc_end
15562306a36Sopenharmony_ci	b		.Lccm_enc_loop_4x
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci.Lccm_enc_loop_1x:
15862306a36Sopenharmony_ci	cmp		w4, #16
15962306a36Sopenharmony_ci	blt		.Lccm_enc_tail
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci	sub		w4, w4, #16
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci	/* construct CTRs */
16462306a36Sopenharmony_ci	inc_le128(v8)
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	ld1		{v0.16b}, [x2], #16
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	SM4_CRYPT_BLK2(v8, RMAC)
16962306a36Sopenharmony_ci	eor		v8.16b, v8.16b, v0.16b
17062306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v0.16b
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci	st1		{v8.16b}, [x1], #16
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci	cbz		w4, .Lccm_enc_end
17562306a36Sopenharmony_ci	b		.Lccm_enc_loop_1x
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci.Lccm_enc_tail:
17862306a36Sopenharmony_ci	/* construct CTRs */
17962306a36Sopenharmony_ci	inc_le128(v8)
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	SM4_CRYPT_BLK2(RMAC, v8)
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	/* store new MAC */
18462306a36Sopenharmony_ci	st1		{RMAC.16b}, [x5]
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci.Lccm_enc_tail_loop:
18762306a36Sopenharmony_ci	ldrb		w0, [x2], #1		/* get 1 byte from input */
18862306a36Sopenharmony_ci	umov		w9, v8.b[0]		/* get top crypted CTR byte */
18962306a36Sopenharmony_ci	umov		w6, RMAC.b[0]		/* get top MAC byte */
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	eor		w9, w9, w0		/* w9 = CTR ^ input */
19262306a36Sopenharmony_ci	eor		w6, w6, w0		/* w6 = MAC ^ input */
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci	strb		w9, [x1], #1		/* store out byte */
19562306a36Sopenharmony_ci	strb		w6, [x5], #1		/* store MAC byte */
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci	subs		w4, w4, #1
19862306a36Sopenharmony_ci	beq		.Lccm_enc_ret
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	/* shift out one byte */
20162306a36Sopenharmony_ci	ext		RMAC.16b, RMAC.16b, RMAC.16b, #1
20262306a36Sopenharmony_ci	ext		v8.16b, v8.16b, v8.16b, #1
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	b		.Lccm_enc_tail_loop
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci.Lccm_enc_end:
20762306a36Sopenharmony_ci	/* store new MAC */
20862306a36Sopenharmony_ci	st1		{RMAC.16b}, [x5]
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci	/* store new CTR */
21162306a36Sopenharmony_ci	rev		x7, x7
21262306a36Sopenharmony_ci	rev		x8, x8
21362306a36Sopenharmony_ci	stp		x7, x8, [x3]
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci.Lccm_enc_ret:
21662306a36Sopenharmony_ci	ret
21762306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_ccm_enc)
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci.align 3
22062306a36Sopenharmony_ciSYM_TYPED_FUNC_START(sm4_ce_ccm_dec)
22162306a36Sopenharmony_ci	/* input:
22262306a36Sopenharmony_ci	 *   x0: round key array, CTX
22362306a36Sopenharmony_ci	 *   x1: dst
22462306a36Sopenharmony_ci	 *   x2: src
22562306a36Sopenharmony_ci	 *   x3: ctr (big endian, 128 bit)
22662306a36Sopenharmony_ci	 *   w4: nbytes
22762306a36Sopenharmony_ci	 *   x5: mac
22862306a36Sopenharmony_ci	 */
22962306a36Sopenharmony_ci	SM4_PREPARE(x0)
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci	ldp		x7, x8, [x3]
23262306a36Sopenharmony_ci	rev		x7, x7
23362306a36Sopenharmony_ci	rev		x8, x8
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	ld1		{RMAC.16b}, [x5]
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci.Lccm_dec_loop_4x:
23862306a36Sopenharmony_ci	cmp		w4, #(4 * 16)
23962306a36Sopenharmony_ci	blt		.Lccm_dec_loop_1x
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	sub		w4, w4, #(4 * 16)
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	/* construct CTRs */
24462306a36Sopenharmony_ci	inc_le128(v8)			/* +0 */
24562306a36Sopenharmony_ci	inc_le128(v9)			/* +1 */
24662306a36Sopenharmony_ci	inc_le128(v10)			/* +2 */
24762306a36Sopenharmony_ci	inc_le128(v11)			/* +3 */
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x2], #64
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci	SM4_CRYPT_BLK2(v8, RMAC)
25262306a36Sopenharmony_ci	eor		v8.16b, v8.16b, v0.16b
25362306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v8.16b
25462306a36Sopenharmony_ci	SM4_CRYPT_BLK2(v9, RMAC)
25562306a36Sopenharmony_ci	eor		v9.16b, v9.16b, v1.16b
25662306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v9.16b
25762306a36Sopenharmony_ci	SM4_CRYPT_BLK2(v10, RMAC)
25862306a36Sopenharmony_ci	eor		v10.16b, v10.16b, v2.16b
25962306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v10.16b
26062306a36Sopenharmony_ci	SM4_CRYPT_BLK2(v11, RMAC)
26162306a36Sopenharmony_ci	eor		v11.16b, v11.16b, v3.16b
26262306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v11.16b
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	st1		{v8.16b-v11.16b}, [x1], #64
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci	cbz		w4, .Lccm_dec_end
26762306a36Sopenharmony_ci	b		.Lccm_dec_loop_4x
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci.Lccm_dec_loop_1x:
27062306a36Sopenharmony_ci	cmp		w4, #16
27162306a36Sopenharmony_ci	blt		.Lccm_dec_tail
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci	sub		w4, w4, #16
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci	/* construct CTRs */
27662306a36Sopenharmony_ci	inc_le128(v8)
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci	ld1		{v0.16b}, [x2], #16
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci	SM4_CRYPT_BLK2(v8, RMAC)
28162306a36Sopenharmony_ci	eor		v8.16b, v8.16b, v0.16b
28262306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v8.16b
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci	st1		{v8.16b}, [x1], #16
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci	cbz		w4, .Lccm_dec_end
28762306a36Sopenharmony_ci	b		.Lccm_dec_loop_1x
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci.Lccm_dec_tail:
29062306a36Sopenharmony_ci	/* construct CTRs */
29162306a36Sopenharmony_ci	inc_le128(v8)
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci	SM4_CRYPT_BLK2(RMAC, v8)
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_ci	/* store new MAC */
29662306a36Sopenharmony_ci	st1		{RMAC.16b}, [x5]
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci.Lccm_dec_tail_loop:
29962306a36Sopenharmony_ci	ldrb		w0, [x2], #1		/* get 1 byte from input */
30062306a36Sopenharmony_ci	umov		w9, v8.b[0]		/* get top crypted CTR byte */
30162306a36Sopenharmony_ci	umov		w6, RMAC.b[0]		/* get top MAC byte */
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci	eor		w9, w9, w0		/* w9 = CTR ^ input */
30462306a36Sopenharmony_ci	eor		w6, w6, w9		/* w6 = MAC ^ output */
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci	strb		w9, [x1], #1		/* store out byte */
30762306a36Sopenharmony_ci	strb		w6, [x5], #1		/* store MAC byte */
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_ci	subs		w4, w4, #1
31062306a36Sopenharmony_ci	beq		.Lccm_dec_ret
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ci	/* shift out one byte */
31362306a36Sopenharmony_ci	ext		RMAC.16b, RMAC.16b, RMAC.16b, #1
31462306a36Sopenharmony_ci	ext		v8.16b, v8.16b, v8.16b, #1
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	b		.Lccm_dec_tail_loop
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci.Lccm_dec_end:
31962306a36Sopenharmony_ci	/* store new MAC */
32062306a36Sopenharmony_ci	st1		{RMAC.16b}, [x5]
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci	/* store new CTR */
32362306a36Sopenharmony_ci	rev		x7, x7
32462306a36Sopenharmony_ci	rev		x8, x8
32562306a36Sopenharmony_ci	stp		x7, x8, [x3]
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci.Lccm_dec_ret:
32862306a36Sopenharmony_ci	ret
32962306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_ccm_dec)
330