162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * This program is free software; you can redistribute it and/or modify
862306a36Sopenharmony_ci * it under the terms of the GNU General Public License version 2 as
962306a36Sopenharmony_ci * published by the Free Software Foundation.
1062306a36Sopenharmony_ci */
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#include <linux/linkage.h>
1362306a36Sopenharmony_ci#include <asm/assembler.h>
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci	.irp	b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
1662306a36Sopenharmony_ci	.set	.Lv\b\().2d, \b
1762306a36Sopenharmony_ci	.set	.Lv\b\().16b, \b
1862306a36Sopenharmony_ci	.endr
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci	/*
2162306a36Sopenharmony_ci	 * ARMv8.2 Crypto Extensions instructions
2262306a36Sopenharmony_ci	 */
2362306a36Sopenharmony_ci	.macro	eor3, rd, rn, rm, ra
2462306a36Sopenharmony_ci	.inst	0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
2562306a36Sopenharmony_ci	.endm
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci	.macro	rax1, rd, rn, rm
2862306a36Sopenharmony_ci	.inst	0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
2962306a36Sopenharmony_ci	.endm
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci	.macro	bcax, rd, rn, rm, ra
3262306a36Sopenharmony_ci	.inst	0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
3362306a36Sopenharmony_ci	.endm
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci	.macro	xar, rd, rn, rm, imm6
3662306a36Sopenharmony_ci	.inst	0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
3762306a36Sopenharmony_ci	.endm
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci	/*
4062306a36Sopenharmony_ci	 * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
4162306a36Sopenharmony_ci	 */
4262306a36Sopenharmony_ci	.text
4362306a36Sopenharmony_ciSYM_FUNC_START(sha3_ce_transform)
4462306a36Sopenharmony_ci	/* load state */
4562306a36Sopenharmony_ci	add	x8, x0, #32
4662306a36Sopenharmony_ci	ld1	{ v0.1d- v3.1d}, [x0]
4762306a36Sopenharmony_ci	ld1	{ v4.1d- v7.1d}, [x8], #32
4862306a36Sopenharmony_ci	ld1	{ v8.1d-v11.1d}, [x8], #32
4962306a36Sopenharmony_ci	ld1	{v12.1d-v15.1d}, [x8], #32
5062306a36Sopenharmony_ci	ld1	{v16.1d-v19.1d}, [x8], #32
5162306a36Sopenharmony_ci	ld1	{v20.1d-v23.1d}, [x8], #32
5262306a36Sopenharmony_ci	ld1	{v24.1d}, [x8]
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci0:	sub	w2, w2, #1
5562306a36Sopenharmony_ci	mov	w8, #24
5662306a36Sopenharmony_ci	adr_l	x9, .Lsha3_rcon
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci	/* load input */
5962306a36Sopenharmony_ci	ld1	{v25.8b-v28.8b}, [x1], #32
6062306a36Sopenharmony_ci	ld1	{v29.8b-v31.8b}, [x1], #24
6162306a36Sopenharmony_ci	eor	v0.8b, v0.8b, v25.8b
6262306a36Sopenharmony_ci	eor	v1.8b, v1.8b, v26.8b
6362306a36Sopenharmony_ci	eor	v2.8b, v2.8b, v27.8b
6462306a36Sopenharmony_ci	eor	v3.8b, v3.8b, v28.8b
6562306a36Sopenharmony_ci	eor	v4.8b, v4.8b, v29.8b
6662306a36Sopenharmony_ci	eor	v5.8b, v5.8b, v30.8b
6762306a36Sopenharmony_ci	eor	v6.8b, v6.8b, v31.8b
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	tbnz	x3, #6, 2f		// SHA3-512
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	ld1	{v25.8b-v28.8b}, [x1], #32
7262306a36Sopenharmony_ci	ld1	{v29.8b-v30.8b}, [x1], #16
7362306a36Sopenharmony_ci	eor	 v7.8b,  v7.8b, v25.8b
7462306a36Sopenharmony_ci	eor	 v8.8b,  v8.8b, v26.8b
7562306a36Sopenharmony_ci	eor	 v9.8b,  v9.8b, v27.8b
7662306a36Sopenharmony_ci	eor	v10.8b, v10.8b, v28.8b
7762306a36Sopenharmony_ci	eor	v11.8b, v11.8b, v29.8b
7862306a36Sopenharmony_ci	eor	v12.8b, v12.8b, v30.8b
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	tbnz	x3, #4, 1f		// SHA3-384 or SHA3-224
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	// SHA3-256
8362306a36Sopenharmony_ci	ld1	{v25.8b-v28.8b}, [x1], #32
8462306a36Sopenharmony_ci	eor	v13.8b, v13.8b, v25.8b
8562306a36Sopenharmony_ci	eor	v14.8b, v14.8b, v26.8b
8662306a36Sopenharmony_ci	eor	v15.8b, v15.8b, v27.8b
8762306a36Sopenharmony_ci	eor	v16.8b, v16.8b, v28.8b
8862306a36Sopenharmony_ci	b	3f
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci1:	tbz	x3, #2, 3f		// bit 2 cleared? SHA-384
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci	// SHA3-224
9362306a36Sopenharmony_ci	ld1	{v25.8b-v28.8b}, [x1], #32
9462306a36Sopenharmony_ci	ld1	{v29.8b}, [x1], #8
9562306a36Sopenharmony_ci	eor	v13.8b, v13.8b, v25.8b
9662306a36Sopenharmony_ci	eor	v14.8b, v14.8b, v26.8b
9762306a36Sopenharmony_ci	eor	v15.8b, v15.8b, v27.8b
9862306a36Sopenharmony_ci	eor	v16.8b, v16.8b, v28.8b
9962306a36Sopenharmony_ci	eor	v17.8b, v17.8b, v29.8b
10062306a36Sopenharmony_ci	b	3f
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	// SHA3-512
10362306a36Sopenharmony_ci2:	ld1	{v25.8b-v26.8b}, [x1], #16
10462306a36Sopenharmony_ci	eor	 v7.8b,  v7.8b, v25.8b
10562306a36Sopenharmony_ci	eor	 v8.8b,  v8.8b, v26.8b
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci3:	sub	w8, w8, #1
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ci	eor3	v29.16b,  v4.16b,  v9.16b, v14.16b
11062306a36Sopenharmony_ci	eor3	v26.16b,  v1.16b,  v6.16b, v11.16b
11162306a36Sopenharmony_ci	eor3	v28.16b,  v3.16b,  v8.16b, v13.16b
11262306a36Sopenharmony_ci	eor3	v25.16b,  v0.16b,  v5.16b, v10.16b
11362306a36Sopenharmony_ci	eor3	v27.16b,  v2.16b,  v7.16b, v12.16b
11462306a36Sopenharmony_ci	eor3	v29.16b, v29.16b, v19.16b, v24.16b
11562306a36Sopenharmony_ci	eor3	v26.16b, v26.16b, v16.16b, v21.16b
11662306a36Sopenharmony_ci	eor3	v28.16b, v28.16b, v18.16b, v23.16b
11762306a36Sopenharmony_ci	eor3	v25.16b, v25.16b, v15.16b, v20.16b
11862306a36Sopenharmony_ci	eor3	v27.16b, v27.16b, v17.16b, v22.16b
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	rax1	v30.2d, v29.2d, v26.2d	// bc[0]
12162306a36Sopenharmony_ci	rax1	v26.2d, v26.2d, v28.2d	// bc[2]
12262306a36Sopenharmony_ci	rax1	v28.2d, v28.2d, v25.2d	// bc[4]
12362306a36Sopenharmony_ci	rax1	v25.2d, v25.2d, v27.2d	// bc[1]
12462306a36Sopenharmony_ci	rax1	v27.2d, v27.2d, v29.2d	// bc[3]
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	eor	 v0.16b,  v0.16b, v30.16b
12762306a36Sopenharmony_ci	xar	 v29.2d,   v1.2d,  v25.2d, (64 - 1)
12862306a36Sopenharmony_ci	xar	  v1.2d,   v6.2d,  v25.2d, (64 - 44)
12962306a36Sopenharmony_ci	xar	  v6.2d,   v9.2d,  v28.2d, (64 - 20)
13062306a36Sopenharmony_ci	xar	  v9.2d,  v22.2d,  v26.2d, (64 - 61)
13162306a36Sopenharmony_ci	xar	 v22.2d,  v14.2d,  v28.2d, (64 - 39)
13262306a36Sopenharmony_ci	xar	 v14.2d,  v20.2d,  v30.2d, (64 - 18)
13362306a36Sopenharmony_ci	xar	 v31.2d,   v2.2d,  v26.2d, (64 - 62)
13462306a36Sopenharmony_ci	xar	  v2.2d,  v12.2d,  v26.2d, (64 - 43)
13562306a36Sopenharmony_ci	xar	 v12.2d,  v13.2d,  v27.2d, (64 - 25)
13662306a36Sopenharmony_ci	xar	 v13.2d,  v19.2d,  v28.2d, (64 - 8)
13762306a36Sopenharmony_ci	xar	 v19.2d,  v23.2d,  v27.2d, (64 - 56)
13862306a36Sopenharmony_ci	xar	 v23.2d,  v15.2d,  v30.2d, (64 - 41)
13962306a36Sopenharmony_ci	xar	 v15.2d,   v4.2d,  v28.2d, (64 - 27)
14062306a36Sopenharmony_ci	xar	 v28.2d,  v24.2d,  v28.2d, (64 - 14)
14162306a36Sopenharmony_ci	xar	 v24.2d,  v21.2d,  v25.2d, (64 - 2)
14262306a36Sopenharmony_ci	xar	  v8.2d,   v8.2d,  v27.2d, (64 - 55)
14362306a36Sopenharmony_ci	xar	  v4.2d,  v16.2d,  v25.2d, (64 - 45)
14462306a36Sopenharmony_ci	xar	 v16.2d,   v5.2d,  v30.2d, (64 - 36)
14562306a36Sopenharmony_ci	xar	  v5.2d,   v3.2d,  v27.2d, (64 - 28)
14662306a36Sopenharmony_ci	xar	 v27.2d,  v18.2d,  v27.2d, (64 - 21)
14762306a36Sopenharmony_ci	xar	  v3.2d,  v17.2d,  v26.2d, (64 - 15)
14862306a36Sopenharmony_ci	xar	 v25.2d,  v11.2d,  v25.2d, (64 - 10)
14962306a36Sopenharmony_ci	xar	 v26.2d,   v7.2d,  v26.2d, (64 - 6)
15062306a36Sopenharmony_ci	xar	 v30.2d,  v10.2d,  v30.2d, (64 - 3)
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci	bcax	v20.16b, v31.16b, v22.16b,  v8.16b
15362306a36Sopenharmony_ci	bcax	v21.16b,  v8.16b, v23.16b, v22.16b
15462306a36Sopenharmony_ci	bcax	v22.16b, v22.16b, v24.16b, v23.16b
15562306a36Sopenharmony_ci	bcax	v23.16b, v23.16b, v31.16b, v24.16b
15662306a36Sopenharmony_ci	bcax	v24.16b, v24.16b,  v8.16b, v31.16b
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci	ld1r	{v31.2d}, [x9], #8
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	bcax	v17.16b, v25.16b, v19.16b,  v3.16b
16162306a36Sopenharmony_ci	bcax	v18.16b,  v3.16b, v15.16b, v19.16b
16262306a36Sopenharmony_ci	bcax	v19.16b, v19.16b, v16.16b, v15.16b
16362306a36Sopenharmony_ci	bcax	v15.16b, v15.16b, v25.16b, v16.16b
16462306a36Sopenharmony_ci	bcax	v16.16b, v16.16b,  v3.16b, v25.16b
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	bcax	v10.16b, v29.16b, v12.16b, v26.16b
16762306a36Sopenharmony_ci	bcax	v11.16b, v26.16b, v13.16b, v12.16b
16862306a36Sopenharmony_ci	bcax	v12.16b, v12.16b, v14.16b, v13.16b
16962306a36Sopenharmony_ci	bcax	v13.16b, v13.16b, v29.16b, v14.16b
17062306a36Sopenharmony_ci	bcax	v14.16b, v14.16b, v26.16b, v29.16b
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci	bcax	 v7.16b, v30.16b,  v9.16b,  v4.16b
17362306a36Sopenharmony_ci	bcax	 v8.16b,  v4.16b,  v5.16b,  v9.16b
17462306a36Sopenharmony_ci	bcax	 v9.16b,  v9.16b,  v6.16b,  v5.16b
17562306a36Sopenharmony_ci	bcax	 v5.16b,  v5.16b, v30.16b,  v6.16b
17662306a36Sopenharmony_ci	bcax	 v6.16b,  v6.16b,  v4.16b, v30.16b
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	bcax	 v3.16b, v27.16b,  v0.16b, v28.16b
17962306a36Sopenharmony_ci	bcax	 v4.16b, v28.16b,  v1.16b,  v0.16b
18062306a36Sopenharmony_ci	bcax	 v0.16b,  v0.16b,  v2.16b,  v1.16b
18162306a36Sopenharmony_ci	bcax	 v1.16b,  v1.16b, v27.16b,  v2.16b
18262306a36Sopenharmony_ci	bcax	 v2.16b,  v2.16b, v28.16b, v27.16b
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	eor	 v0.16b,  v0.16b, v31.16b
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	cbnz	w8, 3b
18762306a36Sopenharmony_ci	cond_yield 4f, x8, x9
18862306a36Sopenharmony_ci	cbnz	w2, 0b
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci	/* save state */
19162306a36Sopenharmony_ci4:	st1	{ v0.1d- v3.1d}, [x0], #32
19262306a36Sopenharmony_ci	st1	{ v4.1d- v7.1d}, [x0], #32
19362306a36Sopenharmony_ci	st1	{ v8.1d-v11.1d}, [x0], #32
19462306a36Sopenharmony_ci	st1	{v12.1d-v15.1d}, [x0], #32
19562306a36Sopenharmony_ci	st1	{v16.1d-v19.1d}, [x0], #32
19662306a36Sopenharmony_ci	st1	{v20.1d-v23.1d}, [x0], #32
19762306a36Sopenharmony_ci	st1	{v24.1d}, [x0]
19862306a36Sopenharmony_ci	mov	w0, w2
19962306a36Sopenharmony_ci	ret
20062306a36Sopenharmony_ciSYM_FUNC_END(sha3_ce_transform)
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	.section	".rodata", "a"
20362306a36Sopenharmony_ci	.align		8
20462306a36Sopenharmony_ci.Lsha3_rcon:
20562306a36Sopenharmony_ci	.quad	0x0000000000000001, 0x0000000000008082, 0x800000000000808a
20662306a36Sopenharmony_ci	.quad	0x8000000080008000, 0x000000000000808b, 0x0000000080000001
20762306a36Sopenharmony_ci	.quad	0x8000000080008081, 0x8000000000008009, 0x000000000000008a
20862306a36Sopenharmony_ci	.quad	0x0000000000000088, 0x0000000080008009, 0x000000008000000a
20962306a36Sopenharmony_ci	.quad	0x000000008000808b, 0x800000000000008b, 0x8000000000008089
21062306a36Sopenharmony_ci	.quad	0x8000000000008003, 0x8000000000008002, 0x8000000000000080
21162306a36Sopenharmony_ci	.quad	0x000000000000800a, 0x800000008000000a, 0x8000000080008081
21262306a36Sopenharmony_ci	.quad	0x8000000000008080, 0x0000000080000001, 0x8000000080008008
213