1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions
4  *
5  * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
6  */
7 
8 #include <linux/linkage.h>
9 #include <linux/cfi_types.h>
10 #include <asm/assembler.h>
11 
12 	.irp		b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
13 	.set		.Lv\b\().4s, \b
14 	.endr
15 
16 	.macro		sm3partw1, rd, rn, rm
17 	.inst		0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
18 	.endm
19 
20 	.macro		sm3partw2, rd, rn, rm
21 	.inst		0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
22 	.endm
23 
24 	.macro		sm3ss1, rd, rn, rm, ra
25 	.inst		0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
26 	.endm
27 
28 	.macro		sm3tt1a, rd, rn, rm, imm2
29 	.inst		0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
30 	.endm
31 
32 	.macro		sm3tt1b, rd, rn, rm, imm2
33 	.inst		0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
34 	.endm
35 
36 	.macro		sm3tt2a, rd, rn, rm, imm2
37 	.inst		0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
38 	.endm
39 
40 	.macro		sm3tt2b, rd, rn, rm, imm2
41 	.inst		0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
42 	.endm
43 
44 	.macro		round, ab, s0, t0, t1, i
45 	sm3ss1		v5.4s, v8.4s, \t0\().4s, v9.4s
46 	shl		\t1\().4s, \t0\().4s, #1
47 	sri		\t1\().4s, \t0\().4s, #31
48 	sm3tt1\ab	v8.4s, v5.4s, v10.4s, \i
49 	sm3tt2\ab	v9.4s, v5.4s, \s0\().4s, \i
50 	.endm
51 
52 	.macro		qround, ab, s0, s1, s2, s3, s4
53 	.ifnb		\s4
54 	ext		\s4\().16b, \s1\().16b, \s2\().16b, #12
55 	ext		v6.16b, \s0\().16b, \s1\().16b, #12
56 	ext		v7.16b, \s2\().16b, \s3\().16b, #8
57 	sm3partw1	\s4\().4s, \s0\().4s, \s3\().4s
58 	.endif
59 
60 	eor		v10.16b, \s0\().16b, \s1\().16b
61 
62 	round		\ab, \s0, v11, v12, 0
63 	round		\ab, \s0, v12, v11, 1
64 	round		\ab, \s0, v11, v12, 2
65 	round		\ab, \s0, v12, v11, 3
66 
67 	.ifnb		\s4
68 	sm3partw2	\s4\().4s, v7.4s, v6.4s
69 	.endif
70 	.endm
71 
72 	/*
73 	 * void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
74 	 *                       int blocks)
75 	 */
76 	.text
77 SYM_TYPED_FUNC_START(sm3_ce_transform)
78 	/* load state */
79 	ld1		{v8.4s-v9.4s}, [x0]
80 	rev64		v8.4s, v8.4s
81 	rev64		v9.4s, v9.4s
82 	ext		v8.16b, v8.16b, v8.16b, #8
83 	ext		v9.16b, v9.16b, v9.16b, #8
84 
85 	adr_l		x8, .Lt
86 	ldp		s13, s14, [x8]
87 
88 	/* load input */
89 0:	ld1		{v0.16b-v3.16b}, [x1], #64
90 	sub		w2, w2, #1
91 
92 	mov		v15.16b, v8.16b
93 	mov		v16.16b, v9.16b
94 
95 CPU_LE(	rev32		v0.16b, v0.16b		)
96 CPU_LE(	rev32		v1.16b, v1.16b		)
97 CPU_LE(	rev32		v2.16b, v2.16b		)
98 CPU_LE(	rev32		v3.16b, v3.16b		)
99 
100 	ext		v11.16b, v13.16b, v13.16b, #4
101 
102 	qround		a, v0, v1, v2, v3, v4
103 	qround		a, v1, v2, v3, v4, v0
104 	qround		a, v2, v3, v4, v0, v1
105 	qround		a, v3, v4, v0, v1, v2
106 
107 	ext		v11.16b, v14.16b, v14.16b, #4
108 
109 	qround		b, v4, v0, v1, v2, v3
110 	qround		b, v0, v1, v2, v3, v4
111 	qround		b, v1, v2, v3, v4, v0
112 	qround		b, v2, v3, v4, v0, v1
113 	qround		b, v3, v4, v0, v1, v2
114 	qround		b, v4, v0, v1, v2, v3
115 	qround		b, v0, v1, v2, v3, v4
116 	qround		b, v1, v2, v3, v4, v0
117 	qround		b, v2, v3, v4, v0, v1
118 	qround		b, v3, v4
119 	qround		b, v4, v0
120 	qround		b, v0, v1
121 
122 	eor		v8.16b, v8.16b, v15.16b
123 	eor		v9.16b, v9.16b, v16.16b
124 
125 	/* handled all input blocks? */
126 	cbnz		w2, 0b
127 
128 	/* save state */
129 	rev64		v8.4s, v8.4s
130 	rev64		v9.4s, v9.4s
131 	ext		v8.16b, v8.16b, v8.16b, #8
132 	ext		v9.16b, v9.16b, v9.16b, #8
133 	st1		{v8.4s-v9.4s}, [x0]
134 	ret
135 SYM_FUNC_END(sm3_ce_transform)
136 
137 	.section	".rodata", "a"
138 	.align		3
139 .Lt:	.word		0x79cc4519, 0x9d8a7a87
140