1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
4  *
5  * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
6  */
7 
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
10 
11 	.text
12 	.arch		armv8-a+crypto
13 
14 	k0		.req	v0
15 	k1		.req	v1
16 	k2		.req	v2
17 	k3		.req	v3
18 
19 	t0		.req	v4
20 	t1		.req	v5
21 
22 	dga		.req	q6
23 	dgav		.req	v6
24 	dgb		.req	s7
25 	dgbv		.req	v7
26 
27 	dg0q		.req	q12
28 	dg0s		.req	s12
29 	dg0v		.req	v12
30 	dg1s		.req	s13
31 	dg1v		.req	v13
32 	dg2s		.req	s14
33 
34 	.macro		add_only, op, ev, rc, s0, dg1
35 	.ifc		\ev, ev
36 	add		t1.4s, v\s0\().4s, \rc\().4s
37 	sha1h		dg2s, dg0s
38 	.ifnb		\dg1
39 	sha1\op		dg0q, \dg1, t0.4s
40 	.else
41 	sha1\op		dg0q, dg1s, t0.4s
42 	.endif
43 	.else
44 	.ifnb		\s0
45 	add		t0.4s, v\s0\().4s, \rc\().4s
46 	.endif
47 	sha1h		dg1s, dg0s
48 	sha1\op		dg0q, dg2s, t1.4s
49 	.endif
50 	.endm
51 
52 	.macro		add_update, op, ev, rc, s0, s1, s2, s3, dg1
53 	sha1su0		v\s0\().4s, v\s1\().4s, v\s2\().4s
54 	add_only	\op, \ev, \rc, \s1, \dg1
55 	sha1su1		v\s0\().4s, v\s3\().4s
56 	.endm
57 
58 	.macro		loadrc, k, val, tmp
59 	movz		\tmp, :abs_g0_nc:\val
60 	movk		\tmp, :abs_g1:\val
61 	dup		\k, \tmp
62 	.endm
63 
64 	/*
65 	 * int sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
66 	 *			 int blocks)
67 	 */
68 SYM_FUNC_START(sha1_ce_transform)
69 	/* load round constants */
70 	loadrc		k0.4s, 0x5a827999, w6
71 	loadrc		k1.4s, 0x6ed9eba1, w6
72 	loadrc		k2.4s, 0x8f1bbcdc, w6
73 	loadrc		k3.4s, 0xca62c1d6, w6
74 
75 	/* load state */
76 	ld1		{dgav.4s}, [x0]
77 	ldr		dgb, [x0, #16]
78 
79 	/* load sha1_ce_state::finalize */
80 	ldr_l		w4, sha1_ce_offsetof_finalize, x4
81 	ldr		w4, [x0, x4]
82 
83 	/* load input */
84 0:	ld1		{v8.4s-v11.4s}, [x1], #64
85 	sub		w2, w2, #1
86 
87 CPU_LE(	rev32		v8.16b, v8.16b		)
88 CPU_LE(	rev32		v9.16b, v9.16b		)
89 CPU_LE(	rev32		v10.16b, v10.16b	)
90 CPU_LE(	rev32		v11.16b, v11.16b	)
91 
92 1:	add		t0.4s, v8.4s, k0.4s
93 	mov		dg0v.16b, dgav.16b
94 
95 	add_update	c, ev, k0,  8,  9, 10, 11, dgb
96 	add_update	c, od, k0,  9, 10, 11,  8
97 	add_update	c, ev, k0, 10, 11,  8,  9
98 	add_update	c, od, k0, 11,  8,  9, 10
99 	add_update	c, ev, k1,  8,  9, 10, 11
100 
101 	add_update	p, od, k1,  9, 10, 11,  8
102 	add_update	p, ev, k1, 10, 11,  8,  9
103 	add_update	p, od, k1, 11,  8,  9, 10
104 	add_update	p, ev, k1,  8,  9, 10, 11
105 	add_update	p, od, k2,  9, 10, 11,  8
106 
107 	add_update	m, ev, k2, 10, 11,  8,  9
108 	add_update	m, od, k2, 11,  8,  9, 10
109 	add_update	m, ev, k2,  8,  9, 10, 11
110 	add_update	m, od, k2,  9, 10, 11,  8
111 	add_update	m, ev, k3, 10, 11,  8,  9
112 
113 	add_update	p, od, k3, 11,  8,  9, 10
114 	add_only	p, ev, k3,  9
115 	add_only	p, od, k3, 10
116 	add_only	p, ev, k3, 11
117 	add_only	p, od
118 
119 	/* update state */
120 	add		dgbv.2s, dgbv.2s, dg1v.2s
121 	add		dgav.4s, dgav.4s, dg0v.4s
122 
123 	cbz		w2, 2f
124 	cond_yield	3f, x5, x6
125 	b		0b
126 
127 	/*
128 	 * Final block: add padding and total bit count.
129 	 * Skip if the input size was not a round multiple of the block size,
130 	 * the padding is handled by the C code in that case.
131 	 */
132 2:	cbz		x4, 3f
133 	ldr_l		w4, sha1_ce_offsetof_count, x4
134 	ldr		x4, [x0, x4]
135 	movi		v9.2d, #0
136 	mov		x8, #0x80000000
137 	movi		v10.2d, #0
138 	ror		x7, x4, #29		// ror(lsl(x4, 3), 32)
139 	fmov		d8, x8
140 	mov		x4, #0
141 	mov		v11.d[0], xzr
142 	mov		v11.d[1], x7
143 	b		1b
144 
145 	/* store new state */
146 3:	st1		{dgav.4s}, [x0]
147 	str		dgb, [x0, #16]
148 	mov		w0, w2
149 	ret
150 SYM_FUNC_END(sha1_ce_transform)
151