18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify
88c2ecf20Sopenharmony_ci * it under the terms of the GNU General Public License version 2 as
98c2ecf20Sopenharmony_ci * published by the Free Software Foundation.
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include <linux/linkage.h>
138c2ecf20Sopenharmony_ci#include <asm/assembler.h>
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci	.irp	b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
168c2ecf20Sopenharmony_ci	.set	.Lv\b\().2d, \b
178c2ecf20Sopenharmony_ci	.set	.Lv\b\().16b, \b
188c2ecf20Sopenharmony_ci	.endr
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci	/*
218c2ecf20Sopenharmony_ci	 * ARMv8.2 Crypto Extensions instructions
228c2ecf20Sopenharmony_ci	 */
238c2ecf20Sopenharmony_ci	.macro	eor3, rd, rn, rm, ra
248c2ecf20Sopenharmony_ci	.inst	0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
258c2ecf20Sopenharmony_ci	.endm
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci	.macro	rax1, rd, rn, rm
288c2ecf20Sopenharmony_ci	.inst	0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
298c2ecf20Sopenharmony_ci	.endm
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci	.macro	bcax, rd, rn, rm, ra
328c2ecf20Sopenharmony_ci	.inst	0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
338c2ecf20Sopenharmony_ci	.endm
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci	.macro	xar, rd, rn, rm, imm6
368c2ecf20Sopenharmony_ci	.inst	0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
378c2ecf20Sopenharmony_ci	.endm
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci	/*
408c2ecf20Sopenharmony_ci	 * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
418c2ecf20Sopenharmony_ci	 */
428c2ecf20Sopenharmony_ci	.text
438c2ecf20Sopenharmony_ciSYM_FUNC_START(sha3_ce_transform)
448c2ecf20Sopenharmony_ci	/* load state */
458c2ecf20Sopenharmony_ci	add	x8, x0, #32
468c2ecf20Sopenharmony_ci	ld1	{ v0.1d- v3.1d}, [x0]
478c2ecf20Sopenharmony_ci	ld1	{ v4.1d- v7.1d}, [x8], #32
488c2ecf20Sopenharmony_ci	ld1	{ v8.1d-v11.1d}, [x8], #32
498c2ecf20Sopenharmony_ci	ld1	{v12.1d-v15.1d}, [x8], #32
508c2ecf20Sopenharmony_ci	ld1	{v16.1d-v19.1d}, [x8], #32
518c2ecf20Sopenharmony_ci	ld1	{v20.1d-v23.1d}, [x8], #32
528c2ecf20Sopenharmony_ci	ld1	{v24.1d}, [x8]
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci0:	sub	w2, w2, #1
558c2ecf20Sopenharmony_ci	mov	w8, #24
568c2ecf20Sopenharmony_ci	adr_l	x9, .Lsha3_rcon
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci	/* load input */
598c2ecf20Sopenharmony_ci	ld1	{v25.8b-v28.8b}, [x1], #32
608c2ecf20Sopenharmony_ci	ld1	{v29.8b-v31.8b}, [x1], #24
618c2ecf20Sopenharmony_ci	eor	v0.8b, v0.8b, v25.8b
628c2ecf20Sopenharmony_ci	eor	v1.8b, v1.8b, v26.8b
638c2ecf20Sopenharmony_ci	eor	v2.8b, v2.8b, v27.8b
648c2ecf20Sopenharmony_ci	eor	v3.8b, v3.8b, v28.8b
658c2ecf20Sopenharmony_ci	eor	v4.8b, v4.8b, v29.8b
668c2ecf20Sopenharmony_ci	eor	v5.8b, v5.8b, v30.8b
678c2ecf20Sopenharmony_ci	eor	v6.8b, v6.8b, v31.8b
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci	tbnz	x3, #6, 2f		// SHA3-512
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci	ld1	{v25.8b-v28.8b}, [x1], #32
728c2ecf20Sopenharmony_ci	ld1	{v29.8b-v30.8b}, [x1], #16
738c2ecf20Sopenharmony_ci	eor	 v7.8b,  v7.8b, v25.8b
748c2ecf20Sopenharmony_ci	eor	 v8.8b,  v8.8b, v26.8b
758c2ecf20Sopenharmony_ci	eor	 v9.8b,  v9.8b, v27.8b
768c2ecf20Sopenharmony_ci	eor	v10.8b, v10.8b, v28.8b
778c2ecf20Sopenharmony_ci	eor	v11.8b, v11.8b, v29.8b
788c2ecf20Sopenharmony_ci	eor	v12.8b, v12.8b, v30.8b
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	tbnz	x3, #4, 1f		// SHA3-384 or SHA3-224
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	// SHA3-256
838c2ecf20Sopenharmony_ci	ld1	{v25.8b-v28.8b}, [x1], #32
848c2ecf20Sopenharmony_ci	eor	v13.8b, v13.8b, v25.8b
858c2ecf20Sopenharmony_ci	eor	v14.8b, v14.8b, v26.8b
868c2ecf20Sopenharmony_ci	eor	v15.8b, v15.8b, v27.8b
878c2ecf20Sopenharmony_ci	eor	v16.8b, v16.8b, v28.8b
888c2ecf20Sopenharmony_ci	b	3f
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci1:	tbz	x3, #2, 3f		// bit 2 cleared? SHA-384
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci	// SHA3-224
938c2ecf20Sopenharmony_ci	ld1	{v25.8b-v28.8b}, [x1], #32
948c2ecf20Sopenharmony_ci	ld1	{v29.8b}, [x1], #8
958c2ecf20Sopenharmony_ci	eor	v13.8b, v13.8b, v25.8b
968c2ecf20Sopenharmony_ci	eor	v14.8b, v14.8b, v26.8b
978c2ecf20Sopenharmony_ci	eor	v15.8b, v15.8b, v27.8b
988c2ecf20Sopenharmony_ci	eor	v16.8b, v16.8b, v28.8b
998c2ecf20Sopenharmony_ci	eor	v17.8b, v17.8b, v29.8b
1008c2ecf20Sopenharmony_ci	b	3f
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci	// SHA3-512
1038c2ecf20Sopenharmony_ci2:	ld1	{v25.8b-v26.8b}, [x1], #16
1048c2ecf20Sopenharmony_ci	eor	 v7.8b,  v7.8b, v25.8b
1058c2ecf20Sopenharmony_ci	eor	 v8.8b,  v8.8b, v26.8b
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci3:	sub	w8, w8, #1
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci	eor3	v29.16b,  v4.16b,  v9.16b, v14.16b
1108c2ecf20Sopenharmony_ci	eor3	v26.16b,  v1.16b,  v6.16b, v11.16b
1118c2ecf20Sopenharmony_ci	eor3	v28.16b,  v3.16b,  v8.16b, v13.16b
1128c2ecf20Sopenharmony_ci	eor3	v25.16b,  v0.16b,  v5.16b, v10.16b
1138c2ecf20Sopenharmony_ci	eor3	v27.16b,  v2.16b,  v7.16b, v12.16b
1148c2ecf20Sopenharmony_ci	eor3	v29.16b, v29.16b, v19.16b, v24.16b
1158c2ecf20Sopenharmony_ci	eor3	v26.16b, v26.16b, v16.16b, v21.16b
1168c2ecf20Sopenharmony_ci	eor3	v28.16b, v28.16b, v18.16b, v23.16b
1178c2ecf20Sopenharmony_ci	eor3	v25.16b, v25.16b, v15.16b, v20.16b
1188c2ecf20Sopenharmony_ci	eor3	v27.16b, v27.16b, v17.16b, v22.16b
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	rax1	v30.2d, v29.2d, v26.2d	// bc[0]
1218c2ecf20Sopenharmony_ci	rax1	v26.2d, v26.2d, v28.2d	// bc[2]
1228c2ecf20Sopenharmony_ci	rax1	v28.2d, v28.2d, v25.2d	// bc[4]
1238c2ecf20Sopenharmony_ci	rax1	v25.2d, v25.2d, v27.2d	// bc[1]
1248c2ecf20Sopenharmony_ci	rax1	v27.2d, v27.2d, v29.2d	// bc[3]
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci	eor	 v0.16b,  v0.16b, v30.16b
1278c2ecf20Sopenharmony_ci	xar	 v29.2d,   v1.2d,  v25.2d, (64 - 1)
1288c2ecf20Sopenharmony_ci	xar	  v1.2d,   v6.2d,  v25.2d, (64 - 44)
1298c2ecf20Sopenharmony_ci	xar	  v6.2d,   v9.2d,  v28.2d, (64 - 20)
1308c2ecf20Sopenharmony_ci	xar	  v9.2d,  v22.2d,  v26.2d, (64 - 61)
1318c2ecf20Sopenharmony_ci	xar	 v22.2d,  v14.2d,  v28.2d, (64 - 39)
1328c2ecf20Sopenharmony_ci	xar	 v14.2d,  v20.2d,  v30.2d, (64 - 18)
1338c2ecf20Sopenharmony_ci	xar	 v31.2d,   v2.2d,  v26.2d, (64 - 62)
1348c2ecf20Sopenharmony_ci	xar	  v2.2d,  v12.2d,  v26.2d, (64 - 43)
1358c2ecf20Sopenharmony_ci	xar	 v12.2d,  v13.2d,  v27.2d, (64 - 25)
1368c2ecf20Sopenharmony_ci	xar	 v13.2d,  v19.2d,  v28.2d, (64 - 8)
1378c2ecf20Sopenharmony_ci	xar	 v19.2d,  v23.2d,  v27.2d, (64 - 56)
1388c2ecf20Sopenharmony_ci	xar	 v23.2d,  v15.2d,  v30.2d, (64 - 41)
1398c2ecf20Sopenharmony_ci	xar	 v15.2d,   v4.2d,  v28.2d, (64 - 27)
1408c2ecf20Sopenharmony_ci	xar	 v28.2d,  v24.2d,  v28.2d, (64 - 14)
1418c2ecf20Sopenharmony_ci	xar	 v24.2d,  v21.2d,  v25.2d, (64 - 2)
1428c2ecf20Sopenharmony_ci	xar	  v8.2d,   v8.2d,  v27.2d, (64 - 55)
1438c2ecf20Sopenharmony_ci	xar	  v4.2d,  v16.2d,  v25.2d, (64 - 45)
1448c2ecf20Sopenharmony_ci	xar	 v16.2d,   v5.2d,  v30.2d, (64 - 36)
1458c2ecf20Sopenharmony_ci	xar	  v5.2d,   v3.2d,  v27.2d, (64 - 28)
1468c2ecf20Sopenharmony_ci	xar	 v27.2d,  v18.2d,  v27.2d, (64 - 21)
1478c2ecf20Sopenharmony_ci	xar	  v3.2d,  v17.2d,  v26.2d, (64 - 15)
1488c2ecf20Sopenharmony_ci	xar	 v25.2d,  v11.2d,  v25.2d, (64 - 10)
1498c2ecf20Sopenharmony_ci	xar	 v26.2d,   v7.2d,  v26.2d, (64 - 6)
1508c2ecf20Sopenharmony_ci	xar	 v30.2d,  v10.2d,  v30.2d, (64 - 3)
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci	bcax	v20.16b, v31.16b, v22.16b,  v8.16b
1538c2ecf20Sopenharmony_ci	bcax	v21.16b,  v8.16b, v23.16b, v22.16b
1548c2ecf20Sopenharmony_ci	bcax	v22.16b, v22.16b, v24.16b, v23.16b
1558c2ecf20Sopenharmony_ci	bcax	v23.16b, v23.16b, v31.16b, v24.16b
1568c2ecf20Sopenharmony_ci	bcax	v24.16b, v24.16b,  v8.16b, v31.16b
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci	ld1r	{v31.2d}, [x9], #8
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	bcax	v17.16b, v25.16b, v19.16b,  v3.16b
1618c2ecf20Sopenharmony_ci	bcax	v18.16b,  v3.16b, v15.16b, v19.16b
1628c2ecf20Sopenharmony_ci	bcax	v19.16b, v19.16b, v16.16b, v15.16b
1638c2ecf20Sopenharmony_ci	bcax	v15.16b, v15.16b, v25.16b, v16.16b
1648c2ecf20Sopenharmony_ci	bcax	v16.16b, v16.16b,  v3.16b, v25.16b
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci	bcax	v10.16b, v29.16b, v12.16b, v26.16b
1678c2ecf20Sopenharmony_ci	bcax	v11.16b, v26.16b, v13.16b, v12.16b
1688c2ecf20Sopenharmony_ci	bcax	v12.16b, v12.16b, v14.16b, v13.16b
1698c2ecf20Sopenharmony_ci	bcax	v13.16b, v13.16b, v29.16b, v14.16b
1708c2ecf20Sopenharmony_ci	bcax	v14.16b, v14.16b, v26.16b, v29.16b
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci	bcax	 v7.16b, v30.16b,  v9.16b,  v4.16b
1738c2ecf20Sopenharmony_ci	bcax	 v8.16b,  v4.16b,  v5.16b,  v9.16b
1748c2ecf20Sopenharmony_ci	bcax	 v9.16b,  v9.16b,  v6.16b,  v5.16b
1758c2ecf20Sopenharmony_ci	bcax	 v5.16b,  v5.16b, v30.16b,  v6.16b
1768c2ecf20Sopenharmony_ci	bcax	 v6.16b,  v6.16b,  v4.16b, v30.16b
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci	bcax	 v3.16b, v27.16b,  v0.16b, v28.16b
1798c2ecf20Sopenharmony_ci	bcax	 v4.16b, v28.16b,  v1.16b,  v0.16b
1808c2ecf20Sopenharmony_ci	bcax	 v0.16b,  v0.16b,  v2.16b,  v1.16b
1818c2ecf20Sopenharmony_ci	bcax	 v1.16b,  v1.16b, v27.16b,  v2.16b
1828c2ecf20Sopenharmony_ci	bcax	 v2.16b,  v2.16b, v28.16b, v27.16b
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci	eor	 v0.16b,  v0.16b, v31.16b
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ci	cbnz	w8, 3b
1878c2ecf20Sopenharmony_ci	cond_yield 4f, x8, x9
1888c2ecf20Sopenharmony_ci	cbnz	w2, 0b
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci	/* save state */
1918c2ecf20Sopenharmony_ci4:	st1	{ v0.1d- v3.1d}, [x0], #32
1928c2ecf20Sopenharmony_ci	st1	{ v4.1d- v7.1d}, [x0], #32
1938c2ecf20Sopenharmony_ci	st1	{ v8.1d-v11.1d}, [x0], #32
1948c2ecf20Sopenharmony_ci	st1	{v12.1d-v15.1d}, [x0], #32
1958c2ecf20Sopenharmony_ci	st1	{v16.1d-v19.1d}, [x0], #32
1968c2ecf20Sopenharmony_ci	st1	{v20.1d-v23.1d}, [x0], #32
1978c2ecf20Sopenharmony_ci	st1	{v24.1d}, [x0]
1988c2ecf20Sopenharmony_ci	mov	w0, w2
1998c2ecf20Sopenharmony_ci	ret
2008c2ecf20Sopenharmony_ciSYM_FUNC_END(sha3_ce_transform)
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci	.section	".rodata", "a"
2038c2ecf20Sopenharmony_ci	.align		8
2048c2ecf20Sopenharmony_ci.Lsha3_rcon:
2058c2ecf20Sopenharmony_ci	.quad	0x0000000000000001, 0x0000000000008082, 0x800000000000808a
2068c2ecf20Sopenharmony_ci	.quad	0x8000000080008000, 0x000000000000808b, 0x0000000080000001
2078c2ecf20Sopenharmony_ci	.quad	0x8000000080008081, 0x8000000000008009, 0x000000000000008a
2088c2ecf20Sopenharmony_ci	.quad	0x0000000000000088, 0x0000000080008009, 0x000000008000000a
2098c2ecf20Sopenharmony_ci	.quad	0x000000008000808b, 0x800000000000008b, 0x8000000000008089
2108c2ecf20Sopenharmony_ci	.quad	0x8000000000008003, 0x8000000000008002, 0x8000000000000080
2118c2ecf20Sopenharmony_ci	.quad	0x000000000000800a, 0x800000008000000a, 0x8000000080008081
2128c2ecf20Sopenharmony_ci	.quad	0x8000000000008080, 0x0000000080000001, 0x8000000080008008
213