162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/linkage.h>
962306a36Sopenharmony_ci#include <asm/assembler.h>
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci	.text
1262306a36Sopenharmony_ci	.arch		armv8-a+crypto
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci	k0		.req	v0
1562306a36Sopenharmony_ci	k1		.req	v1
1662306a36Sopenharmony_ci	k2		.req	v2
1762306a36Sopenharmony_ci	k3		.req	v3
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci	t0		.req	v4
2062306a36Sopenharmony_ci	t1		.req	v5
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci	dga		.req	q6
2362306a36Sopenharmony_ci	dgav		.req	v6
2462306a36Sopenharmony_ci	dgb		.req	s7
2562306a36Sopenharmony_ci	dgbv		.req	v7
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci	dg0q		.req	q12
2862306a36Sopenharmony_ci	dg0s		.req	s12
2962306a36Sopenharmony_ci	dg0v		.req	v12
3062306a36Sopenharmony_ci	dg1s		.req	s13
3162306a36Sopenharmony_ci	dg1v		.req	v13
3262306a36Sopenharmony_ci	dg2s		.req	s14
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci	.macro		add_only, op, ev, rc, s0, dg1
3562306a36Sopenharmony_ci	.ifc		\ev, ev
3662306a36Sopenharmony_ci	add		t1.4s, v\s0\().4s, \rc\().4s
3762306a36Sopenharmony_ci	sha1h		dg2s, dg0s
3862306a36Sopenharmony_ci	.ifnb		\dg1
3962306a36Sopenharmony_ci	sha1\op		dg0q, \dg1, t0.4s
4062306a36Sopenharmony_ci	.else
4162306a36Sopenharmony_ci	sha1\op		dg0q, dg1s, t0.4s
4262306a36Sopenharmony_ci	.endif
4362306a36Sopenharmony_ci	.else
4462306a36Sopenharmony_ci	.ifnb		\s0
4562306a36Sopenharmony_ci	add		t0.4s, v\s0\().4s, \rc\().4s
4662306a36Sopenharmony_ci	.endif
4762306a36Sopenharmony_ci	sha1h		dg1s, dg0s
4862306a36Sopenharmony_ci	sha1\op		dg0q, dg2s, t1.4s
4962306a36Sopenharmony_ci	.endif
5062306a36Sopenharmony_ci	.endm
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci	.macro		add_update, op, ev, rc, s0, s1, s2, s3, dg1
5362306a36Sopenharmony_ci	sha1su0		v\s0\().4s, v\s1\().4s, v\s2\().4s
5462306a36Sopenharmony_ci	add_only	\op, \ev, \rc, \s1, \dg1
5562306a36Sopenharmony_ci	sha1su1		v\s0\().4s, v\s3\().4s
5662306a36Sopenharmony_ci	.endm
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci	.macro		loadrc, k, val, tmp
5962306a36Sopenharmony_ci	movz		\tmp, :abs_g0_nc:\val
6062306a36Sopenharmony_ci	movk		\tmp, :abs_g1:\val
6162306a36Sopenharmony_ci	dup		\k, \tmp
6262306a36Sopenharmony_ci	.endm
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci	/*
6562306a36Sopenharmony_ci	 * int sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
6662306a36Sopenharmony_ci	 *			 int blocks)
6762306a36Sopenharmony_ci	 */
6862306a36Sopenharmony_ciSYM_FUNC_START(sha1_ce_transform)
6962306a36Sopenharmony_ci	/* load round constants */
7062306a36Sopenharmony_ci	loadrc		k0.4s, 0x5a827999, w6
7162306a36Sopenharmony_ci	loadrc		k1.4s, 0x6ed9eba1, w6
7262306a36Sopenharmony_ci	loadrc		k2.4s, 0x8f1bbcdc, w6
7362306a36Sopenharmony_ci	loadrc		k3.4s, 0xca62c1d6, w6
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	/* load state */
7662306a36Sopenharmony_ci	ld1		{dgav.4s}, [x0]
7762306a36Sopenharmony_ci	ldr		dgb, [x0, #16]
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	/* load sha1_ce_state::finalize */
8062306a36Sopenharmony_ci	ldr_l		w4, sha1_ce_offsetof_finalize, x4
8162306a36Sopenharmony_ci	ldr		w4, [x0, x4]
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	/* load input */
8462306a36Sopenharmony_ci0:	ld1		{v8.4s-v11.4s}, [x1], #64
8562306a36Sopenharmony_ci	sub		w2, w2, #1
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ciCPU_LE(	rev32		v8.16b, v8.16b		)
8862306a36Sopenharmony_ciCPU_LE(	rev32		v9.16b, v9.16b		)
8962306a36Sopenharmony_ciCPU_LE(	rev32		v10.16b, v10.16b	)
9062306a36Sopenharmony_ciCPU_LE(	rev32		v11.16b, v11.16b	)
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci1:	add		t0.4s, v8.4s, k0.4s
9362306a36Sopenharmony_ci	mov		dg0v.16b, dgav.16b
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	add_update	c, ev, k0,  8,  9, 10, 11, dgb
9662306a36Sopenharmony_ci	add_update	c, od, k0,  9, 10, 11,  8
9762306a36Sopenharmony_ci	add_update	c, ev, k0, 10, 11,  8,  9
9862306a36Sopenharmony_ci	add_update	c, od, k0, 11,  8,  9, 10
9962306a36Sopenharmony_ci	add_update	c, ev, k1,  8,  9, 10, 11
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	add_update	p, od, k1,  9, 10, 11,  8
10262306a36Sopenharmony_ci	add_update	p, ev, k1, 10, 11,  8,  9
10362306a36Sopenharmony_ci	add_update	p, od, k1, 11,  8,  9, 10
10462306a36Sopenharmony_ci	add_update	p, ev, k1,  8,  9, 10, 11
10562306a36Sopenharmony_ci	add_update	p, od, k2,  9, 10, 11,  8
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci	add_update	m, ev, k2, 10, 11,  8,  9
10862306a36Sopenharmony_ci	add_update	m, od, k2, 11,  8,  9, 10
10962306a36Sopenharmony_ci	add_update	m, ev, k2,  8,  9, 10, 11
11062306a36Sopenharmony_ci	add_update	m, od, k2,  9, 10, 11,  8
11162306a36Sopenharmony_ci	add_update	m, ev, k3, 10, 11,  8,  9
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	add_update	p, od, k3, 11,  8,  9, 10
11462306a36Sopenharmony_ci	add_only	p, ev, k3,  9
11562306a36Sopenharmony_ci	add_only	p, od, k3, 10
11662306a36Sopenharmony_ci	add_only	p, ev, k3, 11
11762306a36Sopenharmony_ci	add_only	p, od
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci	/* update state */
12062306a36Sopenharmony_ci	add		dgbv.2s, dgbv.2s, dg1v.2s
12162306a36Sopenharmony_ci	add		dgav.4s, dgav.4s, dg0v.4s
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	cbz		w2, 2f
12462306a36Sopenharmony_ci	cond_yield	3f, x5, x6
12562306a36Sopenharmony_ci	b		0b
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	/*
12862306a36Sopenharmony_ci	 * Final block: add padding and total bit count.
12962306a36Sopenharmony_ci	 * Skip if the input size was not a round multiple of the block size,
13062306a36Sopenharmony_ci	 * the padding is handled by the C code in that case.
13162306a36Sopenharmony_ci	 */
13262306a36Sopenharmony_ci2:	cbz		x4, 3f
13362306a36Sopenharmony_ci	ldr_l		w4, sha1_ce_offsetof_count, x4
13462306a36Sopenharmony_ci	ldr		x4, [x0, x4]
13562306a36Sopenharmony_ci	movi		v9.2d, #0
13662306a36Sopenharmony_ci	mov		x8, #0x80000000
13762306a36Sopenharmony_ci	movi		v10.2d, #0
13862306a36Sopenharmony_ci	ror		x7, x4, #29		// ror(lsl(x4, 3), 32)
13962306a36Sopenharmony_ci	fmov		d8, x8
14062306a36Sopenharmony_ci	mov		x4, #0
14162306a36Sopenharmony_ci	mov		v11.d[0], xzr
14262306a36Sopenharmony_ci	mov		v11.d[1], x7
14362306a36Sopenharmony_ci	b		1b
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci	/* store new state */
14662306a36Sopenharmony_ci3:	st1		{dgav.4s}, [x0]
14762306a36Sopenharmony_ci	str		dgb, [x0, #16]
14862306a36Sopenharmony_ci	mov		w0, w2
14962306a36Sopenharmony_ci	ret
15062306a36Sopenharmony_ciSYM_FUNC_END(sha1_ce_transform)
151