18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <linux/linkage.h>
98c2ecf20Sopenharmony_ci#include <asm/assembler.h>
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci	.text
128c2ecf20Sopenharmony_ci	.arch		armv8-a+crypto
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci	k0		.req	v0
158c2ecf20Sopenharmony_ci	k1		.req	v1
168c2ecf20Sopenharmony_ci	k2		.req	v2
178c2ecf20Sopenharmony_ci	k3		.req	v3
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci	t0		.req	v4
208c2ecf20Sopenharmony_ci	t1		.req	v5
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci	dga		.req	q6
238c2ecf20Sopenharmony_ci	dgav		.req	v6
248c2ecf20Sopenharmony_ci	dgb		.req	s7
258c2ecf20Sopenharmony_ci	dgbv		.req	v7
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci	dg0q		.req	q12
288c2ecf20Sopenharmony_ci	dg0s		.req	s12
298c2ecf20Sopenharmony_ci	dg0v		.req	v12
308c2ecf20Sopenharmony_ci	dg1s		.req	s13
318c2ecf20Sopenharmony_ci	dg1v		.req	v13
328c2ecf20Sopenharmony_ci	dg2s		.req	s14
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci	.macro		add_only, op, ev, rc, s0, dg1
358c2ecf20Sopenharmony_ci	.ifc		\ev, ev
368c2ecf20Sopenharmony_ci	add		t1.4s, v\s0\().4s, \rc\().4s
378c2ecf20Sopenharmony_ci	sha1h		dg2s, dg0s
388c2ecf20Sopenharmony_ci	.ifnb		\dg1
398c2ecf20Sopenharmony_ci	sha1\op		dg0q, \dg1, t0.4s
408c2ecf20Sopenharmony_ci	.else
418c2ecf20Sopenharmony_ci	sha1\op		dg0q, dg1s, t0.4s
428c2ecf20Sopenharmony_ci	.endif
438c2ecf20Sopenharmony_ci	.else
448c2ecf20Sopenharmony_ci	.ifnb		\s0
458c2ecf20Sopenharmony_ci	add		t0.4s, v\s0\().4s, \rc\().4s
468c2ecf20Sopenharmony_ci	.endif
478c2ecf20Sopenharmony_ci	sha1h		dg1s, dg0s
488c2ecf20Sopenharmony_ci	sha1\op		dg0q, dg2s, t1.4s
498c2ecf20Sopenharmony_ci	.endif
508c2ecf20Sopenharmony_ci	.endm
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci	.macro		add_update, op, ev, rc, s0, s1, s2, s3, dg1
538c2ecf20Sopenharmony_ci	sha1su0		v\s0\().4s, v\s1\().4s, v\s2\().4s
548c2ecf20Sopenharmony_ci	add_only	\op, \ev, \rc, \s1, \dg1
558c2ecf20Sopenharmony_ci	sha1su1		v\s0\().4s, v\s3\().4s
568c2ecf20Sopenharmony_ci	.endm
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci	.macro		loadrc, k, val, tmp
598c2ecf20Sopenharmony_ci	movz		\tmp, :abs_g0_nc:\val
608c2ecf20Sopenharmony_ci	movk		\tmp, :abs_g1:\val
618c2ecf20Sopenharmony_ci	dup		\k, \tmp
628c2ecf20Sopenharmony_ci	.endm
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	/*
658c2ecf20Sopenharmony_ci	 * int sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
668c2ecf20Sopenharmony_ci	 *			 int blocks)
678c2ecf20Sopenharmony_ci	 */
688c2ecf20Sopenharmony_ciSYM_FUNC_START(sha1_ce_transform)
698c2ecf20Sopenharmony_ci	/* load round constants */
708c2ecf20Sopenharmony_ci	loadrc		k0.4s, 0x5a827999, w6
718c2ecf20Sopenharmony_ci	loadrc		k1.4s, 0x6ed9eba1, w6
728c2ecf20Sopenharmony_ci	loadrc		k2.4s, 0x8f1bbcdc, w6
738c2ecf20Sopenharmony_ci	loadrc		k3.4s, 0xca62c1d6, w6
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_ci	/* load state */
768c2ecf20Sopenharmony_ci	ld1		{dgav.4s}, [x0]
778c2ecf20Sopenharmony_ci	ldr		dgb, [x0, #16]
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci	/* load sha1_ce_state::finalize */
808c2ecf20Sopenharmony_ci	ldr_l		w4, sha1_ce_offsetof_finalize, x4
818c2ecf20Sopenharmony_ci	ldr		w4, [x0, x4]
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_ci	/* load input */
848c2ecf20Sopenharmony_ci0:	ld1		{v8.4s-v11.4s}, [x1], #64
858c2ecf20Sopenharmony_ci	sub		w2, w2, #1
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ciCPU_LE(	rev32		v8.16b, v8.16b		)
888c2ecf20Sopenharmony_ciCPU_LE(	rev32		v9.16b, v9.16b		)
898c2ecf20Sopenharmony_ciCPU_LE(	rev32		v10.16b, v10.16b	)
908c2ecf20Sopenharmony_ciCPU_LE(	rev32		v11.16b, v11.16b	)
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci1:	add		t0.4s, v8.4s, k0.4s
938c2ecf20Sopenharmony_ci	mov		dg0v.16b, dgav.16b
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_ci	add_update	c, ev, k0,  8,  9, 10, 11, dgb
968c2ecf20Sopenharmony_ci	add_update	c, od, k0,  9, 10, 11,  8
978c2ecf20Sopenharmony_ci	add_update	c, ev, k0, 10, 11,  8,  9
988c2ecf20Sopenharmony_ci	add_update	c, od, k0, 11,  8,  9, 10
998c2ecf20Sopenharmony_ci	add_update	c, ev, k1,  8,  9, 10, 11
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci	add_update	p, od, k1,  9, 10, 11,  8
1028c2ecf20Sopenharmony_ci	add_update	p, ev, k1, 10, 11,  8,  9
1038c2ecf20Sopenharmony_ci	add_update	p, od, k1, 11,  8,  9, 10
1048c2ecf20Sopenharmony_ci	add_update	p, ev, k1,  8,  9, 10, 11
1058c2ecf20Sopenharmony_ci	add_update	p, od, k2,  9, 10, 11,  8
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci	add_update	m, ev, k2, 10, 11,  8,  9
1088c2ecf20Sopenharmony_ci	add_update	m, od, k2, 11,  8,  9, 10
1098c2ecf20Sopenharmony_ci	add_update	m, ev, k2,  8,  9, 10, 11
1108c2ecf20Sopenharmony_ci	add_update	m, od, k2,  9, 10, 11,  8
1118c2ecf20Sopenharmony_ci	add_update	m, ev, k3, 10, 11,  8,  9
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci	add_update	p, od, k3, 11,  8,  9, 10
1148c2ecf20Sopenharmony_ci	add_only	p, ev, k3,  9
1158c2ecf20Sopenharmony_ci	add_only	p, od, k3, 10
1168c2ecf20Sopenharmony_ci	add_only	p, ev, k3, 11
1178c2ecf20Sopenharmony_ci	add_only	p, od
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_ci	/* update state */
1208c2ecf20Sopenharmony_ci	add		dgbv.2s, dgbv.2s, dg1v.2s
1218c2ecf20Sopenharmony_ci	add		dgav.4s, dgav.4s, dg0v.4s
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci	cbz		w2, 2f
1248c2ecf20Sopenharmony_ci	cond_yield	3f, x5, x6
1258c2ecf20Sopenharmony_ci	b		0b
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci	/*
1288c2ecf20Sopenharmony_ci	 * Final block: add padding and total bit count.
1298c2ecf20Sopenharmony_ci	 * Skip if the input size was not a round multiple of the block size,
1308c2ecf20Sopenharmony_ci	 * the padding is handled by the C code in that case.
1318c2ecf20Sopenharmony_ci	 */
1328c2ecf20Sopenharmony_ci2:	cbz		x4, 3f
1338c2ecf20Sopenharmony_ci	ldr_l		w4, sha1_ce_offsetof_count, x4
1348c2ecf20Sopenharmony_ci	ldr		x4, [x0, x4]
1358c2ecf20Sopenharmony_ci	movi		v9.2d, #0
1368c2ecf20Sopenharmony_ci	mov		x8, #0x80000000
1378c2ecf20Sopenharmony_ci	movi		v10.2d, #0
1388c2ecf20Sopenharmony_ci	ror		x7, x4, #29		// ror(lsl(x4, 3), 32)
1398c2ecf20Sopenharmony_ci	fmov		d8, x8
1408c2ecf20Sopenharmony_ci	mov		x4, #0
1418c2ecf20Sopenharmony_ci	mov		v11.d[0], xzr
1428c2ecf20Sopenharmony_ci	mov		v11.d[1], x7
1438c2ecf20Sopenharmony_ci	b		1b
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_ci	/* store new state */
1468c2ecf20Sopenharmony_ci3:	st1		{dgav.4s}, [x0]
1478c2ecf20Sopenharmony_ci	str		dgb, [x0, #16]
1488c2ecf20Sopenharmony_ci	mov		w0, w2
1498c2ecf20Sopenharmony_ci	ret
1508c2ecf20Sopenharmony_ciSYM_FUNC_END(sha1_ce_transform)
151