18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <linux/linkage.h>
98c2ecf20Sopenharmony_ci#include <asm/assembler.h>
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci	.irp		b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
128c2ecf20Sopenharmony_ci	.set		.Lv\b\().4s, \b
138c2ecf20Sopenharmony_ci	.endr
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci	.macro		sm3partw1, rd, rn, rm
168c2ecf20Sopenharmony_ci	.inst		0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
178c2ecf20Sopenharmony_ci	.endm
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci	.macro		sm3partw2, rd, rn, rm
208c2ecf20Sopenharmony_ci	.inst		0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
218c2ecf20Sopenharmony_ci	.endm
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_ci	.macro		sm3ss1, rd, rn, rm, ra
248c2ecf20Sopenharmony_ci	.inst		0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
258c2ecf20Sopenharmony_ci	.endm
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci	.macro		sm3tt1a, rd, rn, rm, imm2
288c2ecf20Sopenharmony_ci	.inst		0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
298c2ecf20Sopenharmony_ci	.endm
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci	.macro		sm3tt1b, rd, rn, rm, imm2
328c2ecf20Sopenharmony_ci	.inst		0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
338c2ecf20Sopenharmony_ci	.endm
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci	.macro		sm3tt2a, rd, rn, rm, imm2
368c2ecf20Sopenharmony_ci	.inst		0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
378c2ecf20Sopenharmony_ci	.endm
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci	.macro		sm3tt2b, rd, rn, rm, imm2
408c2ecf20Sopenharmony_ci	.inst		0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
418c2ecf20Sopenharmony_ci	.endm
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci	.macro		round, ab, s0, t0, t1, i
448c2ecf20Sopenharmony_ci	sm3ss1		v5.4s, v8.4s, \t0\().4s, v9.4s
458c2ecf20Sopenharmony_ci	shl		\t1\().4s, \t0\().4s, #1
468c2ecf20Sopenharmony_ci	sri		\t1\().4s, \t0\().4s, #31
478c2ecf20Sopenharmony_ci	sm3tt1\ab	v8.4s, v5.4s, v10.4s, \i
488c2ecf20Sopenharmony_ci	sm3tt2\ab	v9.4s, v5.4s, \s0\().4s, \i
498c2ecf20Sopenharmony_ci	.endm
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci	.macro		qround, ab, s0, s1, s2, s3, s4
528c2ecf20Sopenharmony_ci	.ifnb		\s4
538c2ecf20Sopenharmony_ci	ext		\s4\().16b, \s1\().16b, \s2\().16b, #12
548c2ecf20Sopenharmony_ci	ext		v6.16b, \s0\().16b, \s1\().16b, #12
558c2ecf20Sopenharmony_ci	ext		v7.16b, \s2\().16b, \s3\().16b, #8
568c2ecf20Sopenharmony_ci	sm3partw1	\s4\().4s, \s0\().4s, \s3\().4s
578c2ecf20Sopenharmony_ci	.endif
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci	eor		v10.16b, \s0\().16b, \s1\().16b
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	round		\ab, \s0, v11, v12, 0
628c2ecf20Sopenharmony_ci	round		\ab, \s0, v12, v11, 1
638c2ecf20Sopenharmony_ci	round		\ab, \s0, v11, v12, 2
648c2ecf20Sopenharmony_ci	round		\ab, \s0, v12, v11, 3
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	.ifnb		\s4
678c2ecf20Sopenharmony_ci	sm3partw2	\s4\().4s, v7.4s, v6.4s
688c2ecf20Sopenharmony_ci	.endif
698c2ecf20Sopenharmony_ci	.endm
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci	/*
728c2ecf20Sopenharmony_ci	 * void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
738c2ecf20Sopenharmony_ci	 *                       int blocks)
748c2ecf20Sopenharmony_ci	 */
758c2ecf20Sopenharmony_ci	.text
768c2ecf20Sopenharmony_ciSYM_FUNC_START(sm3_ce_transform)
778c2ecf20Sopenharmony_ci	/* load state */
788c2ecf20Sopenharmony_ci	ld1		{v8.4s-v9.4s}, [x0]
798c2ecf20Sopenharmony_ci	rev64		v8.4s, v8.4s
808c2ecf20Sopenharmony_ci	rev64		v9.4s, v9.4s
818c2ecf20Sopenharmony_ci	ext		v8.16b, v8.16b, v8.16b, #8
828c2ecf20Sopenharmony_ci	ext		v9.16b, v9.16b, v9.16b, #8
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci	adr_l		x8, .Lt
858c2ecf20Sopenharmony_ci	ldp		s13, s14, [x8]
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci	/* load input */
888c2ecf20Sopenharmony_ci0:	ld1		{v0.16b-v3.16b}, [x1], #64
898c2ecf20Sopenharmony_ci	sub		w2, w2, #1
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci	mov		v15.16b, v8.16b
928c2ecf20Sopenharmony_ci	mov		v16.16b, v9.16b
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ciCPU_LE(	rev32		v0.16b, v0.16b		)
958c2ecf20Sopenharmony_ciCPU_LE(	rev32		v1.16b, v1.16b		)
968c2ecf20Sopenharmony_ciCPU_LE(	rev32		v2.16b, v2.16b		)
978c2ecf20Sopenharmony_ciCPU_LE(	rev32		v3.16b, v3.16b		)
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci	ext		v11.16b, v13.16b, v13.16b, #4
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci	qround		a, v0, v1, v2, v3, v4
1028c2ecf20Sopenharmony_ci	qround		a, v1, v2, v3, v4, v0
1038c2ecf20Sopenharmony_ci	qround		a, v2, v3, v4, v0, v1
1048c2ecf20Sopenharmony_ci	qround		a, v3, v4, v0, v1, v2
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci	ext		v11.16b, v14.16b, v14.16b, #4
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci	qround		b, v4, v0, v1, v2, v3
1098c2ecf20Sopenharmony_ci	qround		b, v0, v1, v2, v3, v4
1108c2ecf20Sopenharmony_ci	qround		b, v1, v2, v3, v4, v0
1118c2ecf20Sopenharmony_ci	qround		b, v2, v3, v4, v0, v1
1128c2ecf20Sopenharmony_ci	qround		b, v3, v4, v0, v1, v2
1138c2ecf20Sopenharmony_ci	qround		b, v4, v0, v1, v2, v3
1148c2ecf20Sopenharmony_ci	qround		b, v0, v1, v2, v3, v4
1158c2ecf20Sopenharmony_ci	qround		b, v1, v2, v3, v4, v0
1168c2ecf20Sopenharmony_ci	qround		b, v2, v3, v4, v0, v1
1178c2ecf20Sopenharmony_ci	qround		b, v3, v4
1188c2ecf20Sopenharmony_ci	qround		b, v4, v0
1198c2ecf20Sopenharmony_ci	qround		b, v0, v1
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci	eor		v8.16b, v8.16b, v15.16b
1228c2ecf20Sopenharmony_ci	eor		v9.16b, v9.16b, v16.16b
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	/* handled all input blocks? */
1258c2ecf20Sopenharmony_ci	cbnz		w2, 0b
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci	/* save state */
1288c2ecf20Sopenharmony_ci	rev64		v8.4s, v8.4s
1298c2ecf20Sopenharmony_ci	rev64		v9.4s, v9.4s
1308c2ecf20Sopenharmony_ci	ext		v8.16b, v8.16b, v8.16b, #8
1318c2ecf20Sopenharmony_ci	ext		v9.16b, v9.16b, v9.16b, #8
1328c2ecf20Sopenharmony_ci	st1		{v8.4s-v9.4s}, [x0]
1338c2ecf20Sopenharmony_ci	ret
1348c2ecf20Sopenharmony_ciSYM_FUNC_END(sm3_ce_transform)
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci	.section	".rodata", "a"
1378c2ecf20Sopenharmony_ci	.align		3
1388c2ecf20Sopenharmony_ci.Lt:	.word		0x79cc4519, 0x9d8a7a87
139