1 /*
2  * Intel SHA Extensions optimized implementation of a SHA-1 update function
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * Copyright(c) 2015 Intel Corporation.
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of version 2 of the GNU General Public License as
13  * published by the Free Software Foundation.
14  *
15  * This program is distributed in the hope that it will be useful, but
16  * WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * General Public License for more details.
19  *
20  * Contact Information:
21  * 	Sean Gulley <sean.m.gulley@intel.com>
22  * 	Tim Chen <tim.c.chen@linux.intel.com>
23  *
24  * BSD LICENSE
25  *
26  * Copyright(c) 2015 Intel Corporation.
27  *
28  * Redistribution and use in source and binary forms, with or without
29  * modification, are permitted provided that the following conditions
30  * are met:
31  *
32  * 	* Redistributions of source code must retain the above copyright
33  * 	  notice, this list of conditions and the following disclaimer.
34  * 	* Redistributions in binary form must reproduce the above copyright
35  * 	  notice, this list of conditions and the following disclaimer in
36  * 	  the documentation and/or other materials provided with the
37  * 	  distribution.
38  * 	* Neither the name of Intel Corporation nor the names of its
39  * 	  contributors may be used to endorse or promote products derived
40  * 	  from this software without specific prior written permission.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53  *
54  */
55 
56 #include <linux/linkage.h>
57 #include <linux/cfi_types.h>
58 
59 #define DIGEST_PTR	%rdi	/* 1st arg */
60 #define DATA_PTR	%rsi	/* 2nd arg */
61 #define NUM_BLKS	%rdx	/* 3rd arg */
62 
63 /* gcc conversion */
64 #define FRAME_SIZE	32	/* space for 2x16 bytes */
65 
66 #define ABCD		%xmm0
67 #define E0		%xmm1	/* Need two E's b/c they ping pong */
68 #define E1		%xmm2
69 #define MSG0		%xmm3
70 #define MSG1		%xmm4
71 #define MSG2		%xmm5
72 #define MSG3		%xmm6
73 #define SHUF_MASK	%xmm7
74 
75 
76 /*
77  * Intel SHA Extensions optimized implementation of a SHA-1 update function
78  *
79  * The function takes a pointer to the current hash values, a pointer to the
80  * input data, and a number of 64 byte blocks to process.  Once all blocks have
81  * been processed, the digest pointer is  updated with the resulting hash value.
82  * The function only processes complete blocks, there is no functionality to
83  * store partial blocks. All message padding and hash value initialization must
84  * be done outside the update function.
85  *
86  * The indented lines in the loop are instructions related to rounds processing.
87  * The non-indented lines are instructions related to the message schedule.
88  *
89  * void sha1_ni_transform(uint32_t *digest, const void *data,
90 		uint32_t numBlocks)
91  * digest : pointer to digest
92  * data: pointer to input data
93  * numBlocks: Number of blocks to process
94  */
95 .text
96 SYM_TYPED_FUNC_START(sha1_ni_transform)
97 	push		%rbp
98 	mov		%rsp, %rbp
99 	sub		$FRAME_SIZE, %rsp
100 	and		$~0xF, %rsp
101 
102 	shl		$6, NUM_BLKS		/* convert to bytes */
103 	jz		.Ldone_hash
104 	add		DATA_PTR, NUM_BLKS	/* pointer to end of data */
105 
106 	/* load initial hash values */
107 	pinsrd		$3, 1*16(DIGEST_PTR), E0
108 	movdqu		0*16(DIGEST_PTR), ABCD
109 	pand		UPPER_WORD_MASK(%rip), E0
110 	pshufd		$0x1B, ABCD, ABCD
111 
112 	movdqa		PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
113 
114 .Lloop0:
115 	/* Save hash values for addition after rounds */
116 	movdqa		E0, (0*16)(%rsp)
117 	movdqa		ABCD, (1*16)(%rsp)
118 
119 	/* Rounds 0-3 */
120 	movdqu		0*16(DATA_PTR), MSG0
121 	pshufb		SHUF_MASK, MSG0
122 		paddd		MSG0, E0
123 		movdqa		ABCD, E1
124 		sha1rnds4	$0, E0, ABCD
125 
126 	/* Rounds 4-7 */
127 	movdqu		1*16(DATA_PTR), MSG1
128 	pshufb		SHUF_MASK, MSG1
129 		sha1nexte	MSG1, E1
130 		movdqa		ABCD, E0
131 		sha1rnds4	$0, E1, ABCD
132 	sha1msg1	MSG1, MSG0
133 
134 	/* Rounds 8-11 */
135 	movdqu		2*16(DATA_PTR), MSG2
136 	pshufb		SHUF_MASK, MSG2
137 		sha1nexte	MSG2, E0
138 		movdqa		ABCD, E1
139 		sha1rnds4	$0, E0, ABCD
140 	sha1msg1	MSG2, MSG1
141 	pxor		MSG2, MSG0
142 
143 	/* Rounds 12-15 */
144 	movdqu		3*16(DATA_PTR), MSG3
145 	pshufb		SHUF_MASK, MSG3
146 		sha1nexte	MSG3, E1
147 		movdqa		ABCD, E0
148 	sha1msg2	MSG3, MSG0
149 		sha1rnds4	$0, E1, ABCD
150 	sha1msg1	MSG3, MSG2
151 	pxor		MSG3, MSG1
152 
153 	/* Rounds 16-19 */
154 		sha1nexte	MSG0, E0
155 		movdqa		ABCD, E1
156 	sha1msg2	MSG0, MSG1
157 		sha1rnds4	$0, E0, ABCD
158 	sha1msg1	MSG0, MSG3
159 	pxor		MSG0, MSG2
160 
161 	/* Rounds 20-23 */
162 		sha1nexte	MSG1, E1
163 		movdqa		ABCD, E0
164 	sha1msg2	MSG1, MSG2
165 		sha1rnds4	$1, E1, ABCD
166 	sha1msg1	MSG1, MSG0
167 	pxor		MSG1, MSG3
168 
169 	/* Rounds 24-27 */
170 		sha1nexte	MSG2, E0
171 		movdqa		ABCD, E1
172 	sha1msg2	MSG2, MSG3
173 		sha1rnds4	$1, E0, ABCD
174 	sha1msg1	MSG2, MSG1
175 	pxor		MSG2, MSG0
176 
177 	/* Rounds 28-31 */
178 		sha1nexte	MSG3, E1
179 		movdqa		ABCD, E0
180 	sha1msg2	MSG3, MSG0
181 		sha1rnds4	$1, E1, ABCD
182 	sha1msg1	MSG3, MSG2
183 	pxor		MSG3, MSG1
184 
185 	/* Rounds 32-35 */
186 		sha1nexte	MSG0, E0
187 		movdqa		ABCD, E1
188 	sha1msg2	MSG0, MSG1
189 		sha1rnds4	$1, E0, ABCD
190 	sha1msg1	MSG0, MSG3
191 	pxor		MSG0, MSG2
192 
193 	/* Rounds 36-39 */
194 		sha1nexte	MSG1, E1
195 		movdqa		ABCD, E0
196 	sha1msg2	MSG1, MSG2
197 		sha1rnds4	$1, E1, ABCD
198 	sha1msg1	MSG1, MSG0
199 	pxor		MSG1, MSG3
200 
201 	/* Rounds 40-43 */
202 		sha1nexte	MSG2, E0
203 		movdqa		ABCD, E1
204 	sha1msg2	MSG2, MSG3
205 		sha1rnds4	$2, E0, ABCD
206 	sha1msg1	MSG2, MSG1
207 	pxor		MSG2, MSG0
208 
209 	/* Rounds 44-47 */
210 		sha1nexte	MSG3, E1
211 		movdqa		ABCD, E0
212 	sha1msg2	MSG3, MSG0
213 		sha1rnds4	$2, E1, ABCD
214 	sha1msg1	MSG3, MSG2
215 	pxor		MSG3, MSG1
216 
217 	/* Rounds 48-51 */
218 		sha1nexte	MSG0, E0
219 		movdqa		ABCD, E1
220 	sha1msg2	MSG0, MSG1
221 		sha1rnds4	$2, E0, ABCD
222 	sha1msg1	MSG0, MSG3
223 	pxor		MSG0, MSG2
224 
225 	/* Rounds 52-55 */
226 		sha1nexte	MSG1, E1
227 		movdqa		ABCD, E0
228 	sha1msg2	MSG1, MSG2
229 		sha1rnds4	$2, E1, ABCD
230 	sha1msg1	MSG1, MSG0
231 	pxor		MSG1, MSG3
232 
233 	/* Rounds 56-59 */
234 		sha1nexte	MSG2, E0
235 		movdqa		ABCD, E1
236 	sha1msg2	MSG2, MSG3
237 		sha1rnds4	$2, E0, ABCD
238 	sha1msg1	MSG2, MSG1
239 	pxor		MSG2, MSG0
240 
241 	/* Rounds 60-63 */
242 		sha1nexte	MSG3, E1
243 		movdqa		ABCD, E0
244 	sha1msg2	MSG3, MSG0
245 		sha1rnds4	$3, E1, ABCD
246 	sha1msg1	MSG3, MSG2
247 	pxor		MSG3, MSG1
248 
249 	/* Rounds 64-67 */
250 		sha1nexte	MSG0, E0
251 		movdqa		ABCD, E1
252 	sha1msg2	MSG0, MSG1
253 		sha1rnds4	$3, E0, ABCD
254 	sha1msg1	MSG0, MSG3
255 	pxor		MSG0, MSG2
256 
257 	/* Rounds 68-71 */
258 		sha1nexte	MSG1, E1
259 		movdqa		ABCD, E0
260 	sha1msg2	MSG1, MSG2
261 		sha1rnds4	$3, E1, ABCD
262 	pxor		MSG1, MSG3
263 
264 	/* Rounds 72-75 */
265 		sha1nexte	MSG2, E0
266 		movdqa		ABCD, E1
267 	sha1msg2	MSG2, MSG3
268 		sha1rnds4	$3, E0, ABCD
269 
270 	/* Rounds 76-79 */
271 		sha1nexte	MSG3, E1
272 		movdqa		ABCD, E0
273 		sha1rnds4	$3, E1, ABCD
274 
275 	/* Add current hash values with previously saved */
276 	sha1nexte	(0*16)(%rsp), E0
277 	paddd		(1*16)(%rsp), ABCD
278 
279 	/* Increment data pointer and loop if more to process */
280 	add		$64, DATA_PTR
281 	cmp		NUM_BLKS, DATA_PTR
282 	jne		.Lloop0
283 
284 	/* Write hash values back in the correct order */
285 	pshufd		$0x1B, ABCD, ABCD
286 	movdqu		ABCD, 0*16(DIGEST_PTR)
287 	pextrd		$3, E0, 1*16(DIGEST_PTR)
288 
289 .Ldone_hash:
290 	mov		%rbp, %rsp
291 	pop		%rbp
292 
293 	RET
294 SYM_FUNC_END(sha1_ni_transform)
295 
296 .section	.rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
297 .align 16
298 PSHUFFLE_BYTE_FLIP_MASK:
299 	.octa 0x000102030405060708090a0b0c0d0e0f
300 
301 .section	.rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16
302 .align 16
303 UPPER_WORD_MASK:
304 	.octa 0xFFFFFFFF000000000000000000000000
305