162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * SM4 Cipher Algorithm for ARMv8 with Crypto Extensions
462306a36Sopenharmony_ci * as specified in
562306a36Sopenharmony_ci * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Copyright (C) 2022, Alibaba Group.
862306a36Sopenharmony_ci * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
962306a36Sopenharmony_ci */
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include <linux/linkage.h>
1262306a36Sopenharmony_ci#include <asm/assembler.h>
1362306a36Sopenharmony_ci#include "sm4-ce-asm.h"
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci.arch	armv8-a+crypto
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \
1862306a36Sopenharmony_ci		20, 24, 25, 26, 27, 28, 29, 30, 31
1962306a36Sopenharmony_ci	.set .Lv\b\().4s, \b
2062306a36Sopenharmony_ci.endr
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci.macro sm4e, vd, vn
2362306a36Sopenharmony_ci	.inst 0xcec08400 | (.L\vn << 5) | .L\vd
2462306a36Sopenharmony_ci.endm
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci.macro sm4ekey, vd, vn, vm
2762306a36Sopenharmony_ci	.inst 0xce60c800 | (.L\vm << 16) | (.L\vn << 5) | .L\vd
2862306a36Sopenharmony_ci.endm
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci/* Register macros */
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci#define RTMP0	v16
3362306a36Sopenharmony_ci#define RTMP1	v17
3462306a36Sopenharmony_ci#define RTMP2	v18
3562306a36Sopenharmony_ci#define RTMP3	v19
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci#define RIV	v20
3862306a36Sopenharmony_ci#define RMAC	v20
3962306a36Sopenharmony_ci#define RMASK	v21
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci.align 3
4362306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_expand_key)
4462306a36Sopenharmony_ci	/* input:
4562306a36Sopenharmony_ci	 *   x0: 128-bit key
4662306a36Sopenharmony_ci	 *   x1: rkey_enc
4762306a36Sopenharmony_ci	 *   x2: rkey_dec
4862306a36Sopenharmony_ci	 *   x3: fk array
4962306a36Sopenharmony_ci	 *   x4: ck array
5062306a36Sopenharmony_ci	 */
5162306a36Sopenharmony_ci	ld1		{v0.16b}, [x0];
5262306a36Sopenharmony_ci	rev32		v0.16b, v0.16b;
5362306a36Sopenharmony_ci	ld1		{v1.16b}, [x3];
5462306a36Sopenharmony_ci	/* load ck */
5562306a36Sopenharmony_ci	ld1		{v24.16b-v27.16b}, [x4], #64;
5662306a36Sopenharmony_ci	ld1		{v28.16b-v31.16b}, [x4];
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci	/* input ^ fk */
5962306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v1.16b;
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci	sm4ekey		v0.4s, v0.4s, v24.4s;
6262306a36Sopenharmony_ci	sm4ekey		v1.4s, v0.4s, v25.4s;
6362306a36Sopenharmony_ci	sm4ekey		v2.4s, v1.4s, v26.4s;
6462306a36Sopenharmony_ci	sm4ekey		v3.4s, v2.4s, v27.4s;
6562306a36Sopenharmony_ci	sm4ekey		v4.4s, v3.4s, v28.4s;
6662306a36Sopenharmony_ci	sm4ekey		v5.4s, v4.4s, v29.4s;
6762306a36Sopenharmony_ci	sm4ekey		v6.4s, v5.4s, v30.4s;
6862306a36Sopenharmony_ci	sm4ekey		v7.4s, v6.4s, v31.4s;
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci	adr_l		x5, .Lbswap128_mask
7162306a36Sopenharmony_ci	ld1		{v24.16b}, [x5]
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x1], #64;
7462306a36Sopenharmony_ci	st1		{v4.16b-v7.16b}, [x1];
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci	tbl		v16.16b, {v7.16b}, v24.16b
7762306a36Sopenharmony_ci	tbl		v17.16b, {v6.16b}, v24.16b
7862306a36Sopenharmony_ci	tbl		v18.16b, {v5.16b}, v24.16b
7962306a36Sopenharmony_ci	tbl		v19.16b, {v4.16b}, v24.16b
8062306a36Sopenharmony_ci	tbl		v20.16b, {v3.16b}, v24.16b
8162306a36Sopenharmony_ci	tbl		v21.16b, {v2.16b}, v24.16b
8262306a36Sopenharmony_ci	tbl		v22.16b, {v1.16b}, v24.16b
8362306a36Sopenharmony_ci	tbl		v23.16b, {v0.16b}, v24.16b
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	st1		{v16.16b-v19.16b}, [x2], #64
8662306a36Sopenharmony_ci	st1		{v20.16b-v23.16b}, [x2]
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	ret;
8962306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_expand_key)
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci.align 3
9262306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_crypt_block)
9362306a36Sopenharmony_ci	/* input:
9462306a36Sopenharmony_ci	 *   x0: round key array, CTX
9562306a36Sopenharmony_ci	 *   x1: dst
9662306a36Sopenharmony_ci	 *   x2: src
9762306a36Sopenharmony_ci	 */
9862306a36Sopenharmony_ci	SM4_PREPARE(x0)
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	ld1		{v0.16b}, [x2];
10162306a36Sopenharmony_ci	SM4_CRYPT_BLK(v0);
10262306a36Sopenharmony_ci	st1		{v0.16b}, [x1];
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci	ret;
10562306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_crypt_block)
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci.align 3
10862306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_crypt)
10962306a36Sopenharmony_ci	/* input:
11062306a36Sopenharmony_ci	 *   x0: round key array, CTX
11162306a36Sopenharmony_ci	 *   x1: dst
11262306a36Sopenharmony_ci	 *   x2: src
11362306a36Sopenharmony_ci	 *   w3: nblocks
11462306a36Sopenharmony_ci	 */
11562306a36Sopenharmony_ci	SM4_PREPARE(x0)
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci.Lcrypt_loop_blk:
11862306a36Sopenharmony_ci	sub		w3, w3, #8;
11962306a36Sopenharmony_ci	tbnz		w3, #31, .Lcrypt_tail8;
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x2], #64;
12262306a36Sopenharmony_ci	ld1		{v4.16b-v7.16b}, [x2], #64;
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x1], #64;
12762306a36Sopenharmony_ci	st1		{v4.16b-v7.16b}, [x1], #64;
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	cbz		w3, .Lcrypt_end;
13062306a36Sopenharmony_ci	b		.Lcrypt_loop_blk;
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci.Lcrypt_tail8:
13362306a36Sopenharmony_ci	add		w3, w3, #8;
13462306a36Sopenharmony_ci	cmp		w3, #4;
13562306a36Sopenharmony_ci	blt		.Lcrypt_tail4;
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	sub		w3, w3, #4;
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x2], #64;
14062306a36Sopenharmony_ci	SM4_CRYPT_BLK4(v0, v1, v2, v3);
14162306a36Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x1], #64;
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	cbz		w3, .Lcrypt_end;
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci.Lcrypt_tail4:
14662306a36Sopenharmony_ci	sub		w3, w3, #1;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	ld1		{v0.16b}, [x2], #16;
14962306a36Sopenharmony_ci	SM4_CRYPT_BLK(v0);
15062306a36Sopenharmony_ci	st1		{v0.16b}, [x1], #16;
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci	cbnz		w3, .Lcrypt_tail4;
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci.Lcrypt_end:
15562306a36Sopenharmony_ci	ret;
15662306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_crypt)
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci.align 3
15962306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_cbc_enc)
16062306a36Sopenharmony_ci	/* input:
16162306a36Sopenharmony_ci	 *   x0: round key array, CTX
16262306a36Sopenharmony_ci	 *   x1: dst
16362306a36Sopenharmony_ci	 *   x2: src
16462306a36Sopenharmony_ci	 *   x3: iv (big endian, 128 bit)
16562306a36Sopenharmony_ci	 *   w4: nblocks
16662306a36Sopenharmony_ci	 */
16762306a36Sopenharmony_ci	SM4_PREPARE(x0)
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci	ld1		{RIV.16b}, [x3]
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci.Lcbc_enc_loop_4x:
17262306a36Sopenharmony_ci	cmp		w4, #4
17362306a36Sopenharmony_ci	blt		.Lcbc_enc_loop_1x
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	sub		w4, w4, #4
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x2], #64
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci	eor		v0.16b, v0.16b, RIV.16b
18062306a36Sopenharmony_ci	SM4_CRYPT_BLK(v0)
18162306a36Sopenharmony_ci	eor		v1.16b, v1.16b, v0.16b
18262306a36Sopenharmony_ci	SM4_CRYPT_BLK(v1)
18362306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v1.16b
18462306a36Sopenharmony_ci	SM4_CRYPT_BLK(v2)
18562306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v2.16b
18662306a36Sopenharmony_ci	SM4_CRYPT_BLK(v3)
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x1], #64
18962306a36Sopenharmony_ci	mov		RIV.16b, v3.16b
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	cbz		w4, .Lcbc_enc_end
19262306a36Sopenharmony_ci	b		.Lcbc_enc_loop_4x
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci.Lcbc_enc_loop_1x:
19562306a36Sopenharmony_ci	sub		w4, w4, #1
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci	ld1		{v0.16b}, [x2], #16
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	eor		RIV.16b, RIV.16b, v0.16b
20062306a36Sopenharmony_ci	SM4_CRYPT_BLK(RIV)
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	st1		{RIV.16b}, [x1], #16
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	cbnz		w4, .Lcbc_enc_loop_1x
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci.Lcbc_enc_end:
20762306a36Sopenharmony_ci	/* store new IV */
20862306a36Sopenharmony_ci	st1		{RIV.16b}, [x3]
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci	ret
21162306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_cbc_enc)
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci.align 3
21462306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_cbc_dec)
21562306a36Sopenharmony_ci	/* input:
21662306a36Sopenharmony_ci	 *   x0: round key array, CTX
21762306a36Sopenharmony_ci	 *   x1: dst
21862306a36Sopenharmony_ci	 *   x2: src
21962306a36Sopenharmony_ci	 *   x3: iv (big endian, 128 bit)
22062306a36Sopenharmony_ci	 *   w4: nblocks
22162306a36Sopenharmony_ci	 */
22262306a36Sopenharmony_ci	SM4_PREPARE(x0)
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	ld1		{RIV.16b}, [x3]
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci.Lcbc_dec_loop_8x:
22762306a36Sopenharmony_ci	sub		w4, w4, #8
22862306a36Sopenharmony_ci	tbnz		w4, #31, .Lcbc_dec_4x
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x2], #64
23162306a36Sopenharmony_ci	ld1		{v4.16b-v7.16b}, [x2], #64
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	rev32		v8.16b, v0.16b
23462306a36Sopenharmony_ci	rev32		v9.16b, v1.16b
23562306a36Sopenharmony_ci	rev32		v10.16b, v2.16b
23662306a36Sopenharmony_ci	rev32		v11.16b, v3.16b
23762306a36Sopenharmony_ci	rev32		v12.16b, v4.16b
23862306a36Sopenharmony_ci	rev32		v13.16b, v5.16b
23962306a36Sopenharmony_ci	rev32		v14.16b, v6.16b
24062306a36Sopenharmony_ci	rev32		v15.16b, v7.16b
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci	SM4_CRYPT_BLK8_BE(v8, v9, v10, v11, v12, v13, v14, v15)
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci	eor		v8.16b, v8.16b, RIV.16b
24562306a36Sopenharmony_ci	eor		v9.16b, v9.16b, v0.16b
24662306a36Sopenharmony_ci	eor		v10.16b, v10.16b, v1.16b
24762306a36Sopenharmony_ci	eor		v11.16b, v11.16b, v2.16b
24862306a36Sopenharmony_ci	eor		v12.16b, v12.16b, v3.16b
24962306a36Sopenharmony_ci	eor		v13.16b, v13.16b, v4.16b
25062306a36Sopenharmony_ci	eor		v14.16b, v14.16b, v5.16b
25162306a36Sopenharmony_ci	eor		v15.16b, v15.16b, v6.16b
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci	st1		{v8.16b-v11.16b}, [x1], #64
25462306a36Sopenharmony_ci	st1		{v12.16b-v15.16b}, [x1], #64
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	mov		RIV.16b, v7.16b
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci	cbz		w4, .Lcbc_dec_end
25962306a36Sopenharmony_ci	b		.Lcbc_dec_loop_8x
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci.Lcbc_dec_4x:
26262306a36Sopenharmony_ci	add		w4, w4, #8
26362306a36Sopenharmony_ci	cmp		w4, #4
26462306a36Sopenharmony_ci	blt		.Lcbc_dec_loop_1x
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci	sub		w4, w4, #4
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x2], #64
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci	rev32		v8.16b, v0.16b
27162306a36Sopenharmony_ci	rev32		v9.16b, v1.16b
27262306a36Sopenharmony_ci	rev32		v10.16b, v2.16b
27362306a36Sopenharmony_ci	rev32		v11.16b, v3.16b
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci	SM4_CRYPT_BLK4_BE(v8, v9, v10, v11)
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	eor		v8.16b, v8.16b, RIV.16b
27862306a36Sopenharmony_ci	eor		v9.16b, v9.16b, v0.16b
27962306a36Sopenharmony_ci	eor		v10.16b, v10.16b, v1.16b
28062306a36Sopenharmony_ci	eor		v11.16b, v11.16b, v2.16b
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_ci	st1		{v8.16b-v11.16b}, [x1], #64
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci	mov		RIV.16b, v3.16b
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci	cbz		w4, .Lcbc_dec_end
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_ci.Lcbc_dec_loop_1x:
28962306a36Sopenharmony_ci	sub		w4, w4, #1
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	ld1		{v0.16b}, [x2], #16
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci	rev32		v8.16b, v0.16b
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_ci	SM4_CRYPT_BLK_BE(v8)
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	eor		v8.16b, v8.16b, RIV.16b
29862306a36Sopenharmony_ci	st1		{v8.16b}, [x1], #16
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	mov		RIV.16b, v0.16b
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci	cbnz		w4, .Lcbc_dec_loop_1x
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci.Lcbc_dec_end:
30562306a36Sopenharmony_ci	/* store new IV */
30662306a36Sopenharmony_ci	st1		{RIV.16b}, [x3]
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci	ret
30962306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_cbc_dec)
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci.align 3
31262306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_cbc_cts_enc)
31362306a36Sopenharmony_ci	/* input:
31462306a36Sopenharmony_ci	 *   x0: round key array, CTX
31562306a36Sopenharmony_ci	 *   x1: dst
31662306a36Sopenharmony_ci	 *   x2: src
31762306a36Sopenharmony_ci	 *   x3: iv (big endian, 128 bit)
31862306a36Sopenharmony_ci	 *   w4: nbytes
31962306a36Sopenharmony_ci	 */
32062306a36Sopenharmony_ci	SM4_PREPARE(x0)
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci	sub		w5, w4, #16
32362306a36Sopenharmony_ci	uxtw		x5, w5
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci	ld1		{RIV.16b}, [x3]
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci	ld1		{v0.16b}, [x2]
32862306a36Sopenharmony_ci	eor		RIV.16b, RIV.16b, v0.16b
32962306a36Sopenharmony_ci	SM4_CRYPT_BLK(RIV)
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	/* load permute table */
33262306a36Sopenharmony_ci	adr_l		x6, .Lcts_permute_table
33362306a36Sopenharmony_ci	add		x7, x6, #32
33462306a36Sopenharmony_ci	add		x6, x6, x5
33562306a36Sopenharmony_ci	sub		x7, x7, x5
33662306a36Sopenharmony_ci	ld1		{v3.16b}, [x6]
33762306a36Sopenharmony_ci	ld1		{v4.16b}, [x7]
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci	/* overlapping loads */
34062306a36Sopenharmony_ci	add		x2, x2, x5
34162306a36Sopenharmony_ci	ld1		{v1.16b}, [x2]
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci	/* create Cn from En-1 */
34462306a36Sopenharmony_ci	tbl		v0.16b, {RIV.16b}, v3.16b
34562306a36Sopenharmony_ci	/* padding Pn with zeros */
34662306a36Sopenharmony_ci	tbl		v1.16b, {v1.16b}, v4.16b
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_ci	eor		v1.16b, v1.16b, RIV.16b
34962306a36Sopenharmony_ci	SM4_CRYPT_BLK(v1)
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci	/* overlapping stores */
35262306a36Sopenharmony_ci	add		x5, x1, x5
35362306a36Sopenharmony_ci	st1		{v0.16b}, [x5]
35462306a36Sopenharmony_ci	st1		{v1.16b}, [x1]
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci	ret
35762306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_cbc_cts_enc)
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_ci.align 3
36062306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_cbc_cts_dec)
36162306a36Sopenharmony_ci	/* input:
36262306a36Sopenharmony_ci	 *   x0: round key array, CTX
36362306a36Sopenharmony_ci	 *   x1: dst
36462306a36Sopenharmony_ci	 *   x2: src
36562306a36Sopenharmony_ci	 *   x3: iv (big endian, 128 bit)
36662306a36Sopenharmony_ci	 *   w4: nbytes
36762306a36Sopenharmony_ci	 */
36862306a36Sopenharmony_ci	SM4_PREPARE(x0)
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	sub		w5, w4, #16
37162306a36Sopenharmony_ci	uxtw		x5, w5
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci	ld1		{RIV.16b}, [x3]
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_ci	/* load permute table */
37662306a36Sopenharmony_ci	adr_l		x6, .Lcts_permute_table
37762306a36Sopenharmony_ci	add		x7, x6, #32
37862306a36Sopenharmony_ci	add		x6, x6, x5
37962306a36Sopenharmony_ci	sub		x7, x7, x5
38062306a36Sopenharmony_ci	ld1		{v3.16b}, [x6]
38162306a36Sopenharmony_ci	ld1		{v4.16b}, [x7]
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_ci	/* overlapping loads */
38462306a36Sopenharmony_ci	ld1		{v0.16b}, [x2], x5
38562306a36Sopenharmony_ci	ld1		{v1.16b}, [x2]
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci	SM4_CRYPT_BLK(v0)
38862306a36Sopenharmony_ci	/* select the first Ln bytes of Xn to create Pn */
38962306a36Sopenharmony_ci	tbl		v2.16b, {v0.16b}, v3.16b
39062306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v1.16b
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci	/* overwrite the first Ln bytes with Cn to create En-1 */
39362306a36Sopenharmony_ci	tbx		v0.16b, {v1.16b}, v4.16b
39462306a36Sopenharmony_ci	SM4_CRYPT_BLK(v0)
39562306a36Sopenharmony_ci	eor		v0.16b, v0.16b, RIV.16b
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci	/* overlapping stores */
39862306a36Sopenharmony_ci	add		x5, x1, x5
39962306a36Sopenharmony_ci	st1		{v2.16b}, [x5]
40062306a36Sopenharmony_ci	st1		{v0.16b}, [x1]
40162306a36Sopenharmony_ci
40262306a36Sopenharmony_ci	ret
40362306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_cbc_cts_dec)
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_ci.align 3
40662306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_cfb_enc)
40762306a36Sopenharmony_ci	/* input:
40862306a36Sopenharmony_ci	 *   x0: round key array, CTX
40962306a36Sopenharmony_ci	 *   x1: dst
41062306a36Sopenharmony_ci	 *   x2: src
41162306a36Sopenharmony_ci	 *   x3: iv (big endian, 128 bit)
41262306a36Sopenharmony_ci	 *   w4: nblocks
41362306a36Sopenharmony_ci	 */
41462306a36Sopenharmony_ci	SM4_PREPARE(x0)
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_ci	ld1		{RIV.16b}, [x3]
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci.Lcfb_enc_loop_4x:
41962306a36Sopenharmony_ci	cmp		w4, #4
42062306a36Sopenharmony_ci	blt		.Lcfb_enc_loop_1x
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci	sub		w4, w4, #4
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x2], #64
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci	rev32		v8.16b, RIV.16b
42762306a36Sopenharmony_ci	SM4_CRYPT_BLK_BE(v8)
42862306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v8.16b
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci	rev32		v8.16b, v0.16b
43162306a36Sopenharmony_ci	SM4_CRYPT_BLK_BE(v8)
43262306a36Sopenharmony_ci	eor		v1.16b, v1.16b, v8.16b
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	rev32		v8.16b, v1.16b
43562306a36Sopenharmony_ci	SM4_CRYPT_BLK_BE(v8)
43662306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v8.16b
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci	rev32		v8.16b, v2.16b
43962306a36Sopenharmony_ci	SM4_CRYPT_BLK_BE(v8)
44062306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v8.16b
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x1], #64
44362306a36Sopenharmony_ci	mov		RIV.16b, v3.16b
44462306a36Sopenharmony_ci
44562306a36Sopenharmony_ci	cbz		w4, .Lcfb_enc_end
44662306a36Sopenharmony_ci	b		.Lcfb_enc_loop_4x
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci.Lcfb_enc_loop_1x:
44962306a36Sopenharmony_ci	sub		w4, w4, #1
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci	ld1		{v0.16b}, [x2], #16
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_ci	SM4_CRYPT_BLK(RIV)
45462306a36Sopenharmony_ci	eor		RIV.16b, RIV.16b, v0.16b
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci	st1		{RIV.16b}, [x1], #16
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci	cbnz		w4, .Lcfb_enc_loop_1x
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci.Lcfb_enc_end:
46162306a36Sopenharmony_ci	/* store new IV */
46262306a36Sopenharmony_ci	st1		{RIV.16b}, [x3]
46362306a36Sopenharmony_ci
46462306a36Sopenharmony_ci	ret
46562306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_cfb_enc)
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci.align 3
46862306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_cfb_dec)
46962306a36Sopenharmony_ci	/* input:
47062306a36Sopenharmony_ci	 *   x0: round key array, CTX
47162306a36Sopenharmony_ci	 *   x1: dst
47262306a36Sopenharmony_ci	 *   x2: src
47362306a36Sopenharmony_ci	 *   x3: iv (big endian, 128 bit)
47462306a36Sopenharmony_ci	 *   w4: nblocks
47562306a36Sopenharmony_ci	 */
47662306a36Sopenharmony_ci	SM4_PREPARE(x0)
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci	ld1		{RIV.16b}, [x3]
47962306a36Sopenharmony_ci
48062306a36Sopenharmony_ci.Lcfb_dec_loop_8x:
48162306a36Sopenharmony_ci	sub		w4, w4, #8
48262306a36Sopenharmony_ci	tbnz		w4, #31, .Lcfb_dec_4x
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x2], #64
48562306a36Sopenharmony_ci	ld1		{v4.16b-v7.16b}, [x2], #64
48662306a36Sopenharmony_ci
48762306a36Sopenharmony_ci	rev32		v8.16b, RIV.16b
48862306a36Sopenharmony_ci	rev32		v9.16b, v0.16b
48962306a36Sopenharmony_ci	rev32		v10.16b, v1.16b
49062306a36Sopenharmony_ci	rev32		v11.16b, v2.16b
49162306a36Sopenharmony_ci	rev32		v12.16b, v3.16b
49262306a36Sopenharmony_ci	rev32		v13.16b, v4.16b
49362306a36Sopenharmony_ci	rev32		v14.16b, v5.16b
49462306a36Sopenharmony_ci	rev32		v15.16b, v6.16b
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci	SM4_CRYPT_BLK8_BE(v8, v9, v10, v11, v12, v13, v14, v15)
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ci	mov		RIV.16b, v7.16b
49962306a36Sopenharmony_ci
50062306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v8.16b
50162306a36Sopenharmony_ci	eor		v1.16b, v1.16b, v9.16b
50262306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v10.16b
50362306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v11.16b
50462306a36Sopenharmony_ci	eor		v4.16b, v4.16b, v12.16b
50562306a36Sopenharmony_ci	eor		v5.16b, v5.16b, v13.16b
50662306a36Sopenharmony_ci	eor		v6.16b, v6.16b, v14.16b
50762306a36Sopenharmony_ci	eor		v7.16b, v7.16b, v15.16b
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x1], #64
51062306a36Sopenharmony_ci	st1		{v4.16b-v7.16b}, [x1], #64
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci	cbz		w4, .Lcfb_dec_end
51362306a36Sopenharmony_ci	b		.Lcfb_dec_loop_8x
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci.Lcfb_dec_4x:
51662306a36Sopenharmony_ci	add		w4, w4, #8
51762306a36Sopenharmony_ci	cmp		w4, #4
51862306a36Sopenharmony_ci	blt		.Lcfb_dec_loop_1x
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci	sub		w4, w4, #4
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x2], #64
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci	rev32		v8.16b, RIV.16b
52562306a36Sopenharmony_ci	rev32		v9.16b, v0.16b
52662306a36Sopenharmony_ci	rev32		v10.16b, v1.16b
52762306a36Sopenharmony_ci	rev32		v11.16b, v2.16b
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_ci	SM4_CRYPT_BLK4_BE(v8, v9, v10, v11)
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci	mov		RIV.16b, v3.16b
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v8.16b
53462306a36Sopenharmony_ci	eor		v1.16b, v1.16b, v9.16b
53562306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v10.16b
53662306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v11.16b
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x1], #64
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	cbz		w4, .Lcfb_dec_end
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci.Lcfb_dec_loop_1x:
54362306a36Sopenharmony_ci	sub		w4, w4, #1
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci	ld1		{v0.16b}, [x2], #16
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci	SM4_CRYPT_BLK(RIV)
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_ci	eor		RIV.16b, RIV.16b, v0.16b
55062306a36Sopenharmony_ci	st1		{RIV.16b}, [x1], #16
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_ci	mov		RIV.16b, v0.16b
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_ci	cbnz		w4, .Lcfb_dec_loop_1x
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_ci.Lcfb_dec_end:
55762306a36Sopenharmony_ci	/* store new IV */
55862306a36Sopenharmony_ci	st1		{RIV.16b}, [x3]
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci	ret
56162306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_cfb_dec)
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci.align 3
56462306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_ctr_enc)
56562306a36Sopenharmony_ci	/* input:
56662306a36Sopenharmony_ci	 *   x0: round key array, CTX
56762306a36Sopenharmony_ci	 *   x1: dst
56862306a36Sopenharmony_ci	 *   x2: src
56962306a36Sopenharmony_ci	 *   x3: ctr (big endian, 128 bit)
57062306a36Sopenharmony_ci	 *   w4: nblocks
57162306a36Sopenharmony_ci	 */
57262306a36Sopenharmony_ci	SM4_PREPARE(x0)
57362306a36Sopenharmony_ci
57462306a36Sopenharmony_ci	ldp		x7, x8, [x3]
57562306a36Sopenharmony_ci	rev		x7, x7
57662306a36Sopenharmony_ci	rev		x8, x8
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci.Lctr_loop_8x:
57962306a36Sopenharmony_ci	sub		w4, w4, #8
58062306a36Sopenharmony_ci	tbnz		w4, #31, .Lctr_4x
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_ci#define inc_le128(vctr)					\
58362306a36Sopenharmony_ci		mov		vctr.d[1], x8;		\
58462306a36Sopenharmony_ci		mov		vctr.d[0], x7;		\
58562306a36Sopenharmony_ci		adds		x8, x8, #1;		\
58662306a36Sopenharmony_ci		rev64		vctr.16b, vctr.16b;	\
58762306a36Sopenharmony_ci		adc		x7, x7, xzr;
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci	/* construct CTRs */
59062306a36Sopenharmony_ci	inc_le128(v0)			/* +0 */
59162306a36Sopenharmony_ci	inc_le128(v1)			/* +1 */
59262306a36Sopenharmony_ci	inc_le128(v2)			/* +2 */
59362306a36Sopenharmony_ci	inc_le128(v3)			/* +3 */
59462306a36Sopenharmony_ci	inc_le128(v4)			/* +4 */
59562306a36Sopenharmony_ci	inc_le128(v5)			/* +5 */
59662306a36Sopenharmony_ci	inc_le128(v6)			/* +6 */
59762306a36Sopenharmony_ci	inc_le128(v7)			/* +7 */
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ci	ld1		{v8.16b-v11.16b}, [x2], #64
60062306a36Sopenharmony_ci	ld1		{v12.16b-v15.16b}, [x2], #64
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v8.16b
60562306a36Sopenharmony_ci	eor		v1.16b, v1.16b, v9.16b
60662306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v10.16b
60762306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v11.16b
60862306a36Sopenharmony_ci	eor		v4.16b, v4.16b, v12.16b
60962306a36Sopenharmony_ci	eor		v5.16b, v5.16b, v13.16b
61062306a36Sopenharmony_ci	eor		v6.16b, v6.16b, v14.16b
61162306a36Sopenharmony_ci	eor		v7.16b, v7.16b, v15.16b
61262306a36Sopenharmony_ci
61362306a36Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x1], #64
61462306a36Sopenharmony_ci	st1		{v4.16b-v7.16b}, [x1], #64
61562306a36Sopenharmony_ci
61662306a36Sopenharmony_ci	cbz		w4, .Lctr_end
61762306a36Sopenharmony_ci	b		.Lctr_loop_8x
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci.Lctr_4x:
62062306a36Sopenharmony_ci	add		w4, w4, #8
62162306a36Sopenharmony_ci	cmp		w4, #4
62262306a36Sopenharmony_ci	blt		.Lctr_loop_1x
62362306a36Sopenharmony_ci
62462306a36Sopenharmony_ci	sub		w4, w4, #4
62562306a36Sopenharmony_ci
62662306a36Sopenharmony_ci	/* construct CTRs */
62762306a36Sopenharmony_ci	inc_le128(v0)			/* +0 */
62862306a36Sopenharmony_ci	inc_le128(v1)			/* +1 */
62962306a36Sopenharmony_ci	inc_le128(v2)			/* +2 */
63062306a36Sopenharmony_ci	inc_le128(v3)			/* +3 */
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_ci	ld1		{v8.16b-v11.16b}, [x2], #64
63362306a36Sopenharmony_ci
63462306a36Sopenharmony_ci	SM4_CRYPT_BLK4(v0, v1, v2, v3)
63562306a36Sopenharmony_ci
63662306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v8.16b
63762306a36Sopenharmony_ci	eor		v1.16b, v1.16b, v9.16b
63862306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v10.16b
63962306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v11.16b
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x1], #64
64262306a36Sopenharmony_ci
64362306a36Sopenharmony_ci	cbz		w4, .Lctr_end
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci.Lctr_loop_1x:
64662306a36Sopenharmony_ci	sub		w4, w4, #1
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci	/* construct CTRs */
64962306a36Sopenharmony_ci	inc_le128(v0)
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ci	ld1		{v8.16b}, [x2], #16
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci	SM4_CRYPT_BLK(v0)
65462306a36Sopenharmony_ci
65562306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v8.16b
65662306a36Sopenharmony_ci	st1		{v0.16b}, [x1], #16
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ci	cbnz		w4, .Lctr_loop_1x
65962306a36Sopenharmony_ci
66062306a36Sopenharmony_ci.Lctr_end:
66162306a36Sopenharmony_ci	/* store new CTR */
66262306a36Sopenharmony_ci	rev		x7, x7
66362306a36Sopenharmony_ci	rev		x8, x8
66462306a36Sopenharmony_ci	stp		x7, x8, [x3]
66562306a36Sopenharmony_ci
66662306a36Sopenharmony_ci	ret
66762306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_ctr_enc)
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ci
67062306a36Sopenharmony_ci#define tweak_next(vt, vin, RTMP)					\
67162306a36Sopenharmony_ci		sshr		RTMP.2d, vin.2d, #63;			\
67262306a36Sopenharmony_ci		and		RTMP.16b, RTMP.16b, RMASK.16b;		\
67362306a36Sopenharmony_ci		add		vt.2d, vin.2d, vin.2d;			\
67462306a36Sopenharmony_ci		ext		RTMP.16b, RTMP.16b, RTMP.16b, #8;	\
67562306a36Sopenharmony_ci		eor		vt.16b, vt.16b, RTMP.16b;
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_ci.align 3
67862306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_xts_enc)
67962306a36Sopenharmony_ci	/* input:
68062306a36Sopenharmony_ci	 *   x0: round key array, CTX
68162306a36Sopenharmony_ci	 *   x1: dst
68262306a36Sopenharmony_ci	 *   x2: src
68362306a36Sopenharmony_ci	 *   x3: tweak (big endian, 128 bit)
68462306a36Sopenharmony_ci	 *   w4: nbytes
68562306a36Sopenharmony_ci	 *   x5: round key array for IV
68662306a36Sopenharmony_ci	 */
68762306a36Sopenharmony_ci	ld1		{v8.16b}, [x3]
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_ci	cbz		x5, .Lxts_enc_nofirst
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_ci	SM4_PREPARE(x5)
69262306a36Sopenharmony_ci
69362306a36Sopenharmony_ci	/* Generate first tweak */
69462306a36Sopenharmony_ci	SM4_CRYPT_BLK(v8)
69562306a36Sopenharmony_ci
69662306a36Sopenharmony_ci.Lxts_enc_nofirst:
69762306a36Sopenharmony_ci	SM4_PREPARE(x0)
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci	ands		w5, w4, #15
70062306a36Sopenharmony_ci	lsr		w4, w4, #4
70162306a36Sopenharmony_ci	sub		w6, w4, #1
70262306a36Sopenharmony_ci	csel		w4, w4, w6, eq
70362306a36Sopenharmony_ci	uxtw		x5, w5
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_ci	movi		RMASK.2s, #0x1
70662306a36Sopenharmony_ci	movi		RTMP0.2s, #0x87
70762306a36Sopenharmony_ci	uzp1		RMASK.4s, RMASK.4s, RTMP0.4s
70862306a36Sopenharmony_ci
70962306a36Sopenharmony_ci	cbz		w4, .Lxts_enc_cts
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_ci.Lxts_enc_loop_8x:
71262306a36Sopenharmony_ci	sub		w4, w4, #8
71362306a36Sopenharmony_ci	tbnz		w4, #31, .Lxts_enc_4x
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci	tweak_next( v9,  v8, RTMP0)
71662306a36Sopenharmony_ci	tweak_next(v10,  v9, RTMP1)
71762306a36Sopenharmony_ci	tweak_next(v11, v10, RTMP2)
71862306a36Sopenharmony_ci	tweak_next(v12, v11, RTMP3)
71962306a36Sopenharmony_ci	tweak_next(v13, v12, RTMP0)
72062306a36Sopenharmony_ci	tweak_next(v14, v13, RTMP1)
72162306a36Sopenharmony_ci	tweak_next(v15, v14, RTMP2)
72262306a36Sopenharmony_ci
72362306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x2], #64
72462306a36Sopenharmony_ci	ld1		{v4.16b-v7.16b}, [x2], #64
72562306a36Sopenharmony_ci	eor		v0.16b, v0.16b,  v8.16b
72662306a36Sopenharmony_ci	eor		v1.16b, v1.16b,  v9.16b
72762306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v10.16b
72862306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v11.16b
72962306a36Sopenharmony_ci	eor		v4.16b, v4.16b, v12.16b
73062306a36Sopenharmony_ci	eor		v5.16b, v5.16b, v13.16b
73162306a36Sopenharmony_ci	eor		v6.16b, v6.16b, v14.16b
73262306a36Sopenharmony_ci	eor		v7.16b, v7.16b, v15.16b
73362306a36Sopenharmony_ci
73462306a36Sopenharmony_ci	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
73562306a36Sopenharmony_ci
73662306a36Sopenharmony_ci	eor		v0.16b, v0.16b,  v8.16b
73762306a36Sopenharmony_ci	eor		v1.16b, v1.16b,  v9.16b
73862306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v10.16b
73962306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v11.16b
74062306a36Sopenharmony_ci	eor		v4.16b, v4.16b, v12.16b
74162306a36Sopenharmony_ci	eor		v5.16b, v5.16b, v13.16b
74262306a36Sopenharmony_ci	eor		v6.16b, v6.16b, v14.16b
74362306a36Sopenharmony_ci	eor		v7.16b, v7.16b, v15.16b
74462306a36Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x1], #64
74562306a36Sopenharmony_ci	st1		{v4.16b-v7.16b}, [x1], #64
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_ci	tweak_next(v8, v15, RTMP3)
74862306a36Sopenharmony_ci
74962306a36Sopenharmony_ci	cbz		w4, .Lxts_enc_cts
75062306a36Sopenharmony_ci	b		.Lxts_enc_loop_8x
75162306a36Sopenharmony_ci
75262306a36Sopenharmony_ci.Lxts_enc_4x:
75362306a36Sopenharmony_ci	add		w4, w4, #8
75462306a36Sopenharmony_ci	cmp		w4, #4
75562306a36Sopenharmony_ci	blt		.Lxts_enc_loop_1x
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_ci	sub		w4, w4, #4
75862306a36Sopenharmony_ci
75962306a36Sopenharmony_ci	tweak_next( v9,  v8, RTMP0)
76062306a36Sopenharmony_ci	tweak_next(v10,  v9, RTMP1)
76162306a36Sopenharmony_ci	tweak_next(v11, v10, RTMP2)
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x2], #64
76462306a36Sopenharmony_ci	eor		v0.16b, v0.16b,  v8.16b
76562306a36Sopenharmony_ci	eor		v1.16b, v1.16b,  v9.16b
76662306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v10.16b
76762306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v11.16b
76862306a36Sopenharmony_ci
76962306a36Sopenharmony_ci	SM4_CRYPT_BLK4(v0, v1, v2, v3)
77062306a36Sopenharmony_ci
77162306a36Sopenharmony_ci	eor		v0.16b, v0.16b,  v8.16b
77262306a36Sopenharmony_ci	eor		v1.16b, v1.16b,  v9.16b
77362306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v10.16b
77462306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v11.16b
77562306a36Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x1], #64
77662306a36Sopenharmony_ci
77762306a36Sopenharmony_ci	tweak_next(v8, v11, RTMP3)
77862306a36Sopenharmony_ci
77962306a36Sopenharmony_ci	cbz		w4, .Lxts_enc_cts
78062306a36Sopenharmony_ci
78162306a36Sopenharmony_ci.Lxts_enc_loop_1x:
78262306a36Sopenharmony_ci	sub		w4, w4, #1
78362306a36Sopenharmony_ci
78462306a36Sopenharmony_ci	ld1		{v0.16b}, [x2], #16
78562306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v8.16b
78662306a36Sopenharmony_ci
78762306a36Sopenharmony_ci	SM4_CRYPT_BLK(v0)
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v8.16b
79062306a36Sopenharmony_ci	st1		{v0.16b}, [x1], #16
79162306a36Sopenharmony_ci
79262306a36Sopenharmony_ci	tweak_next(v8, v8, RTMP0)
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	cbnz		w4, .Lxts_enc_loop_1x
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci.Lxts_enc_cts:
79762306a36Sopenharmony_ci	cbz		x5, .Lxts_enc_end
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_ci	/* cipher text stealing */
80062306a36Sopenharmony_ci
80162306a36Sopenharmony_ci	tweak_next(v9, v8, RTMP0)
80262306a36Sopenharmony_ci	ld1		{v0.16b}, [x2]
80362306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v8.16b
80462306a36Sopenharmony_ci	SM4_CRYPT_BLK(v0)
80562306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v8.16b
80662306a36Sopenharmony_ci
80762306a36Sopenharmony_ci	/* load permute table */
80862306a36Sopenharmony_ci	adr_l		x6, .Lcts_permute_table
80962306a36Sopenharmony_ci	add		x7, x6, #32
81062306a36Sopenharmony_ci	add		x6, x6, x5
81162306a36Sopenharmony_ci	sub		x7, x7, x5
81262306a36Sopenharmony_ci	ld1		{v3.16b}, [x6]
81362306a36Sopenharmony_ci	ld1		{v4.16b}, [x7]
81462306a36Sopenharmony_ci
81562306a36Sopenharmony_ci	/* overlapping loads */
81662306a36Sopenharmony_ci	add		x2, x2, x5
81762306a36Sopenharmony_ci	ld1		{v1.16b}, [x2]
81862306a36Sopenharmony_ci
81962306a36Sopenharmony_ci	/* create Cn from En-1 */
82062306a36Sopenharmony_ci	tbl		v2.16b, {v0.16b}, v3.16b
82162306a36Sopenharmony_ci	/* padding Pn with En-1 at the end */
82262306a36Sopenharmony_ci	tbx		v0.16b, {v1.16b}, v4.16b
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v9.16b
82562306a36Sopenharmony_ci	SM4_CRYPT_BLK(v0)
82662306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v9.16b
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_ci	/* overlapping stores */
83062306a36Sopenharmony_ci	add		x5, x1, x5
83162306a36Sopenharmony_ci	st1		{v2.16b}, [x5]
83262306a36Sopenharmony_ci	st1		{v0.16b}, [x1]
83362306a36Sopenharmony_ci
83462306a36Sopenharmony_ci	b		.Lxts_enc_ret
83562306a36Sopenharmony_ci
83662306a36Sopenharmony_ci.Lxts_enc_end:
83762306a36Sopenharmony_ci	/* store new tweak */
83862306a36Sopenharmony_ci	st1		{v8.16b}, [x3]
83962306a36Sopenharmony_ci
84062306a36Sopenharmony_ci.Lxts_enc_ret:
84162306a36Sopenharmony_ci	ret
84262306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_xts_enc)
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_ci.align 3
84562306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_xts_dec)
84662306a36Sopenharmony_ci	/* input:
84762306a36Sopenharmony_ci	 *   x0: round key array, CTX
84862306a36Sopenharmony_ci	 *   x1: dst
84962306a36Sopenharmony_ci	 *   x2: src
85062306a36Sopenharmony_ci	 *   x3: tweak (big endian, 128 bit)
85162306a36Sopenharmony_ci	 *   w4: nbytes
85262306a36Sopenharmony_ci	 *   x5: round key array for IV
85362306a36Sopenharmony_ci	 */
85462306a36Sopenharmony_ci	ld1		{v8.16b}, [x3]
85562306a36Sopenharmony_ci
85662306a36Sopenharmony_ci	cbz		x5, .Lxts_dec_nofirst
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ci	SM4_PREPARE(x5)
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ci	/* Generate first tweak */
86162306a36Sopenharmony_ci	SM4_CRYPT_BLK(v8)
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci.Lxts_dec_nofirst:
86462306a36Sopenharmony_ci	SM4_PREPARE(x0)
86562306a36Sopenharmony_ci
86662306a36Sopenharmony_ci	ands		w5, w4, #15
86762306a36Sopenharmony_ci	lsr		w4, w4, #4
86862306a36Sopenharmony_ci	sub		w6, w4, #1
86962306a36Sopenharmony_ci	csel		w4, w4, w6, eq
87062306a36Sopenharmony_ci	uxtw		x5, w5
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_ci	movi		RMASK.2s, #0x1
87362306a36Sopenharmony_ci	movi		RTMP0.2s, #0x87
87462306a36Sopenharmony_ci	uzp1		RMASK.4s, RMASK.4s, RTMP0.4s
87562306a36Sopenharmony_ci
87662306a36Sopenharmony_ci	cbz		w4, .Lxts_dec_cts
87762306a36Sopenharmony_ci
87862306a36Sopenharmony_ci.Lxts_dec_loop_8x:
87962306a36Sopenharmony_ci	sub		w4, w4, #8
88062306a36Sopenharmony_ci	tbnz		w4, #31, .Lxts_dec_4x
88162306a36Sopenharmony_ci
88262306a36Sopenharmony_ci	tweak_next( v9,  v8, RTMP0)
88362306a36Sopenharmony_ci	tweak_next(v10,  v9, RTMP1)
88462306a36Sopenharmony_ci	tweak_next(v11, v10, RTMP2)
88562306a36Sopenharmony_ci	tweak_next(v12, v11, RTMP3)
88662306a36Sopenharmony_ci	tweak_next(v13, v12, RTMP0)
88762306a36Sopenharmony_ci	tweak_next(v14, v13, RTMP1)
88862306a36Sopenharmony_ci	tweak_next(v15, v14, RTMP2)
88962306a36Sopenharmony_ci
89062306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x2], #64
89162306a36Sopenharmony_ci	ld1		{v4.16b-v7.16b}, [x2], #64
89262306a36Sopenharmony_ci	eor		v0.16b, v0.16b,  v8.16b
89362306a36Sopenharmony_ci	eor		v1.16b, v1.16b,  v9.16b
89462306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v10.16b
89562306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v11.16b
89662306a36Sopenharmony_ci	eor		v4.16b, v4.16b, v12.16b
89762306a36Sopenharmony_ci	eor		v5.16b, v5.16b, v13.16b
89862306a36Sopenharmony_ci	eor		v6.16b, v6.16b, v14.16b
89962306a36Sopenharmony_ci	eor		v7.16b, v7.16b, v15.16b
90062306a36Sopenharmony_ci
90162306a36Sopenharmony_ci	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci	eor		v0.16b, v0.16b,  v8.16b
90462306a36Sopenharmony_ci	eor		v1.16b, v1.16b,  v9.16b
90562306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v10.16b
90662306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v11.16b
90762306a36Sopenharmony_ci	eor		v4.16b, v4.16b, v12.16b
90862306a36Sopenharmony_ci	eor		v5.16b, v5.16b, v13.16b
90962306a36Sopenharmony_ci	eor		v6.16b, v6.16b, v14.16b
91062306a36Sopenharmony_ci	eor		v7.16b, v7.16b, v15.16b
91162306a36Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x1], #64
91262306a36Sopenharmony_ci	st1		{v4.16b-v7.16b}, [x1], #64
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_ci	tweak_next(v8, v15, RTMP3)
91562306a36Sopenharmony_ci
91662306a36Sopenharmony_ci	cbz		w4, .Lxts_dec_cts
91762306a36Sopenharmony_ci	b		.Lxts_dec_loop_8x
91862306a36Sopenharmony_ci
91962306a36Sopenharmony_ci.Lxts_dec_4x:
92062306a36Sopenharmony_ci	add		w4, w4, #8
92162306a36Sopenharmony_ci	cmp		w4, #4
92262306a36Sopenharmony_ci	blt		.Lxts_dec_loop_1x
92362306a36Sopenharmony_ci
92462306a36Sopenharmony_ci	sub		w4, w4, #4
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci	tweak_next( v9,  v8, RTMP0)
92762306a36Sopenharmony_ci	tweak_next(v10,  v9, RTMP1)
92862306a36Sopenharmony_ci	tweak_next(v11, v10, RTMP2)
92962306a36Sopenharmony_ci
93062306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x2], #64
93162306a36Sopenharmony_ci	eor		v0.16b, v0.16b,  v8.16b
93262306a36Sopenharmony_ci	eor		v1.16b, v1.16b,  v9.16b
93362306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v10.16b
93462306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v11.16b
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ci	SM4_CRYPT_BLK4(v0, v1, v2, v3)
93762306a36Sopenharmony_ci
93862306a36Sopenharmony_ci	eor		v0.16b, v0.16b,  v8.16b
93962306a36Sopenharmony_ci	eor		v1.16b, v1.16b,  v9.16b
94062306a36Sopenharmony_ci	eor		v2.16b, v2.16b, v10.16b
94162306a36Sopenharmony_ci	eor		v3.16b, v3.16b, v11.16b
94262306a36Sopenharmony_ci	st1		{v0.16b-v3.16b}, [x1], #64
94362306a36Sopenharmony_ci
94462306a36Sopenharmony_ci	tweak_next(v8, v11, RTMP3)
94562306a36Sopenharmony_ci
94662306a36Sopenharmony_ci	cbz		w4, .Lxts_dec_cts
94762306a36Sopenharmony_ci
94862306a36Sopenharmony_ci.Lxts_dec_loop_1x:
94962306a36Sopenharmony_ci	sub		w4, w4, #1
95062306a36Sopenharmony_ci
95162306a36Sopenharmony_ci	ld1		{v0.16b}, [x2], #16
95262306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v8.16b
95362306a36Sopenharmony_ci
95462306a36Sopenharmony_ci	SM4_CRYPT_BLK(v0)
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v8.16b
95762306a36Sopenharmony_ci	st1		{v0.16b}, [x1], #16
95862306a36Sopenharmony_ci
95962306a36Sopenharmony_ci	tweak_next(v8, v8, RTMP0)
96062306a36Sopenharmony_ci
96162306a36Sopenharmony_ci	cbnz		w4, .Lxts_dec_loop_1x
96262306a36Sopenharmony_ci
96362306a36Sopenharmony_ci.Lxts_dec_cts:
96462306a36Sopenharmony_ci	cbz		x5, .Lxts_dec_end
96562306a36Sopenharmony_ci
96662306a36Sopenharmony_ci	/* cipher text stealing */
96762306a36Sopenharmony_ci
96862306a36Sopenharmony_ci	tweak_next(v9, v8, RTMP0)
96962306a36Sopenharmony_ci	ld1		{v0.16b}, [x2]
97062306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v9.16b
97162306a36Sopenharmony_ci	SM4_CRYPT_BLK(v0)
97262306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v9.16b
97362306a36Sopenharmony_ci
97462306a36Sopenharmony_ci	/* load permute table */
97562306a36Sopenharmony_ci	adr_l		x6, .Lcts_permute_table
97662306a36Sopenharmony_ci	add		x7, x6, #32
97762306a36Sopenharmony_ci	add		x6, x6, x5
97862306a36Sopenharmony_ci	sub		x7, x7, x5
97962306a36Sopenharmony_ci	ld1		{v3.16b}, [x6]
98062306a36Sopenharmony_ci	ld1		{v4.16b}, [x7]
98162306a36Sopenharmony_ci
98262306a36Sopenharmony_ci	/* overlapping loads */
98362306a36Sopenharmony_ci	add		x2, x2, x5
98462306a36Sopenharmony_ci	ld1		{v1.16b}, [x2]
98562306a36Sopenharmony_ci
98662306a36Sopenharmony_ci	/* create Cn from En-1 */
98762306a36Sopenharmony_ci	tbl		v2.16b, {v0.16b}, v3.16b
98862306a36Sopenharmony_ci	/* padding Pn with En-1 at the end */
98962306a36Sopenharmony_ci	tbx		v0.16b, {v1.16b}, v4.16b
99062306a36Sopenharmony_ci
99162306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v8.16b
99262306a36Sopenharmony_ci	SM4_CRYPT_BLK(v0)
99362306a36Sopenharmony_ci	eor		v0.16b, v0.16b, v8.16b
99462306a36Sopenharmony_ci
99562306a36Sopenharmony_ci
99662306a36Sopenharmony_ci	/* overlapping stores */
99762306a36Sopenharmony_ci	add		x5, x1, x5
99862306a36Sopenharmony_ci	st1		{v2.16b}, [x5]
99962306a36Sopenharmony_ci	st1		{v0.16b}, [x1]
100062306a36Sopenharmony_ci
100162306a36Sopenharmony_ci	b		.Lxts_dec_ret
100262306a36Sopenharmony_ci
100362306a36Sopenharmony_ci.Lxts_dec_end:
100462306a36Sopenharmony_ci	/* store new tweak */
100562306a36Sopenharmony_ci	st1		{v8.16b}, [x3]
100662306a36Sopenharmony_ci
100762306a36Sopenharmony_ci.Lxts_dec_ret:
100862306a36Sopenharmony_ci	ret
100962306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_xts_dec)
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_ci.align 3
101262306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_mac_update)
101362306a36Sopenharmony_ci	/* input:
101462306a36Sopenharmony_ci	 *   x0: round key array, CTX
101562306a36Sopenharmony_ci	 *   x1: digest
101662306a36Sopenharmony_ci	 *   x2: src
101762306a36Sopenharmony_ci	 *   w3: nblocks
101862306a36Sopenharmony_ci	 *   w4: enc_before
101962306a36Sopenharmony_ci	 *   w5: enc_after
102062306a36Sopenharmony_ci	 */
102162306a36Sopenharmony_ci	SM4_PREPARE(x0)
102262306a36Sopenharmony_ci
102362306a36Sopenharmony_ci	ld1		{RMAC.16b}, [x1]
102462306a36Sopenharmony_ci
102562306a36Sopenharmony_ci	cbz		w4, .Lmac_update
102662306a36Sopenharmony_ci
102762306a36Sopenharmony_ci	SM4_CRYPT_BLK(RMAC)
102862306a36Sopenharmony_ci
102962306a36Sopenharmony_ci.Lmac_update:
103062306a36Sopenharmony_ci	cbz		w3, .Lmac_ret
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_ci	sub		w6, w3, #1
103362306a36Sopenharmony_ci	cmp		w5, wzr
103462306a36Sopenharmony_ci	csel		w3, w3, w6, ne
103562306a36Sopenharmony_ci
103662306a36Sopenharmony_ci	cbz		w3, .Lmac_end
103762306a36Sopenharmony_ci
103862306a36Sopenharmony_ci.Lmac_loop_4x:
103962306a36Sopenharmony_ci	cmp		w3, #4
104062306a36Sopenharmony_ci	blt		.Lmac_loop_1x
104162306a36Sopenharmony_ci
104262306a36Sopenharmony_ci	sub		w3, w3, #4
104362306a36Sopenharmony_ci
104462306a36Sopenharmony_ci	ld1		{v0.16b-v3.16b}, [x2], #64
104562306a36Sopenharmony_ci
104662306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v0.16b
104762306a36Sopenharmony_ci	SM4_CRYPT_BLK(RMAC)
104862306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v1.16b
104962306a36Sopenharmony_ci	SM4_CRYPT_BLK(RMAC)
105062306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v2.16b
105162306a36Sopenharmony_ci	SM4_CRYPT_BLK(RMAC)
105262306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v3.16b
105362306a36Sopenharmony_ci	SM4_CRYPT_BLK(RMAC)
105462306a36Sopenharmony_ci
105562306a36Sopenharmony_ci	cbz		w3, .Lmac_end
105662306a36Sopenharmony_ci	b		.Lmac_loop_4x
105762306a36Sopenharmony_ci
105862306a36Sopenharmony_ci.Lmac_loop_1x:
105962306a36Sopenharmony_ci	sub		w3, w3, #1
106062306a36Sopenharmony_ci
106162306a36Sopenharmony_ci	ld1		{v0.16b}, [x2], #16
106262306a36Sopenharmony_ci
106362306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v0.16b
106462306a36Sopenharmony_ci	SM4_CRYPT_BLK(RMAC)
106562306a36Sopenharmony_ci
106662306a36Sopenharmony_ci	cbnz		w3, .Lmac_loop_1x
106762306a36Sopenharmony_ci
106862306a36Sopenharmony_ci
106962306a36Sopenharmony_ci.Lmac_end:
107062306a36Sopenharmony_ci	cbnz		w5, .Lmac_ret
107162306a36Sopenharmony_ci
107262306a36Sopenharmony_ci	ld1		{v0.16b}, [x2], #16
107362306a36Sopenharmony_ci	eor		RMAC.16b, RMAC.16b, v0.16b
107462306a36Sopenharmony_ci
107562306a36Sopenharmony_ci.Lmac_ret:
107662306a36Sopenharmony_ci	st1		{RMAC.16b}, [x1]
107762306a36Sopenharmony_ci	ret
107862306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_mac_update)
107962306a36Sopenharmony_ci
108062306a36Sopenharmony_ci
108162306a36Sopenharmony_ci	.section	".rodata", "a"
108262306a36Sopenharmony_ci	.align 4
108362306a36Sopenharmony_ci.Lbswap128_mask:
108462306a36Sopenharmony_ci	.byte		0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b
108562306a36Sopenharmony_ci	.byte		0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03
108662306a36Sopenharmony_ci
108762306a36Sopenharmony_ci.Lcts_permute_table:
108862306a36Sopenharmony_ci	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
108962306a36Sopenharmony_ci	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
109062306a36Sopenharmony_ci	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
109162306a36Sopenharmony_ci	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
109262306a36Sopenharmony_ci	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
109362306a36Sopenharmony_ci	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
1094