1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions
4  *
5  * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
6  */
7 
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
10 
11 	.irp		b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
12 	.set		.Lv\b\().4s, \b
13 	.endr
14 
15 	.macro		sm3partw1, rd, rn, rm
16 	.inst		0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
17 	.endm
18 
19 	.macro		sm3partw2, rd, rn, rm
20 	.inst		0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
21 	.endm
22 
23 	.macro		sm3ss1, rd, rn, rm, ra
24 	.inst		0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
25 	.endm
26 
27 	.macro		sm3tt1a, rd, rn, rm, imm2
28 	.inst		0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
29 	.endm
30 
31 	.macro		sm3tt1b, rd, rn, rm, imm2
32 	.inst		0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
33 	.endm
34 
35 	.macro		sm3tt2a, rd, rn, rm, imm2
36 	.inst		0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
37 	.endm
38 
39 	.macro		sm3tt2b, rd, rn, rm, imm2
40 	.inst		0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
41 	.endm
42 
43 	.macro		round, ab, s0, t0, t1, i
44 	sm3ss1		v5.4s, v8.4s, \t0\().4s, v9.4s
45 	shl		\t1\().4s, \t0\().4s, #1
46 	sri		\t1\().4s, \t0\().4s, #31
47 	sm3tt1\ab	v8.4s, v5.4s, v10.4s, \i
48 	sm3tt2\ab	v9.4s, v5.4s, \s0\().4s, \i
49 	.endm
50 
51 	.macro		qround, ab, s0, s1, s2, s3, s4
52 	.ifnb		\s4
53 	ext		\s4\().16b, \s1\().16b, \s2\().16b, #12
54 	ext		v6.16b, \s0\().16b, \s1\().16b, #12
55 	ext		v7.16b, \s2\().16b, \s3\().16b, #8
56 	sm3partw1	\s4\().4s, \s0\().4s, \s3\().4s
57 	.endif
58 
59 	eor		v10.16b, \s0\().16b, \s1\().16b
60 
61 	round		\ab, \s0, v11, v12, 0
62 	round		\ab, \s0, v12, v11, 1
63 	round		\ab, \s0, v11, v12, 2
64 	round		\ab, \s0, v12, v11, 3
65 
66 	.ifnb		\s4
67 	sm3partw2	\s4\().4s, v7.4s, v6.4s
68 	.endif
69 	.endm
70 
71 	/*
72 	 * void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
73 	 *                       int blocks)
74 	 */
75 	.text
76 SYM_FUNC_START(sm3_ce_transform)
77 	/* load state */
78 	ld1		{v8.4s-v9.4s}, [x0]
79 	rev64		v8.4s, v8.4s
80 	rev64		v9.4s, v9.4s
81 	ext		v8.16b, v8.16b, v8.16b, #8
82 	ext		v9.16b, v9.16b, v9.16b, #8
83 
84 	adr_l		x8, .Lt
85 	ldp		s13, s14, [x8]
86 
87 	/* load input */
88 0:	ld1		{v0.16b-v3.16b}, [x1], #64
89 	sub		w2, w2, #1
90 
91 	mov		v15.16b, v8.16b
92 	mov		v16.16b, v9.16b
93 
94 CPU_LE(	rev32		v0.16b, v0.16b		)
95 CPU_LE(	rev32		v1.16b, v1.16b		)
96 CPU_LE(	rev32		v2.16b, v2.16b		)
97 CPU_LE(	rev32		v3.16b, v3.16b		)
98 
99 	ext		v11.16b, v13.16b, v13.16b, #4
100 
101 	qround		a, v0, v1, v2, v3, v4
102 	qround		a, v1, v2, v3, v4, v0
103 	qround		a, v2, v3, v4, v0, v1
104 	qround		a, v3, v4, v0, v1, v2
105 
106 	ext		v11.16b, v14.16b, v14.16b, #4
107 
108 	qround		b, v4, v0, v1, v2, v3
109 	qround		b, v0, v1, v2, v3, v4
110 	qround		b, v1, v2, v3, v4, v0
111 	qround		b, v2, v3, v4, v0, v1
112 	qround		b, v3, v4, v0, v1, v2
113 	qround		b, v4, v0, v1, v2, v3
114 	qround		b, v0, v1, v2, v3, v4
115 	qround		b, v1, v2, v3, v4, v0
116 	qround		b, v2, v3, v4, v0, v1
117 	qround		b, v3, v4
118 	qround		b, v4, v0
119 	qround		b, v0, v1
120 
121 	eor		v8.16b, v8.16b, v15.16b
122 	eor		v9.16b, v9.16b, v16.16b
123 
124 	/* handled all input blocks? */
125 	cbnz		w2, 0b
126 
127 	/* save state */
128 	rev64		v8.4s, v8.4s
129 	rev64		v9.4s, v9.4s
130 	ext		v8.16b, v8.16b, v8.16b, #8
131 	ext		v9.16b, v9.16b, v9.16b, #8
132 	st1		{v8.4s-v9.4s}, [x0]
133 	ret
134 SYM_FUNC_END(sm3_ce_transform)
135 
136 	.section	".rodata", "a"
137 	.align		3
138 .Lt:	.word		0x79cc4519, 0x9d8a7a87
139