1 /*
2  * Intel SHA Extensions optimized implementation of a SHA-256 update function
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * Copyright(c) 2015 Intel Corporation.
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of version 2 of the GNU General Public License as
13  * published by the Free Software Foundation.
14  *
15  * This program is distributed in the hope that it will be useful, but
16  * WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * General Public License for more details.
19  *
20  * Contact Information:
21  * 	Sean Gulley <sean.m.gulley@intel.com>
22  * 	Tim Chen <tim.c.chen@linux.intel.com>
23  *
24  * BSD LICENSE
25  *
26  * Copyright(c) 2015 Intel Corporation.
27  *
28  * Redistribution and use in source and binary forms, with or without
29  * modification, are permitted provided that the following conditions
30  * are met:
31  *
32  * 	* Redistributions of source code must retain the above copyright
33  * 	  notice, this list of conditions and the following disclaimer.
34  * 	* Redistributions in binary form must reproduce the above copyright
35  * 	  notice, this list of conditions and the following disclaimer in
36  * 	  the documentation and/or other materials provided with the
37  * 	  distribution.
38  * 	* Neither the name of Intel Corporation nor the names of its
39  * 	  contributors may be used to endorse or promote products derived
40  * 	  from this software without specific prior written permission.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53  *
54  */
55 
56 #include <linux/linkage.h>
57 #include <linux/cfi_types.h>
58 
59 #define DIGEST_PTR	%rdi	/* 1st arg */
60 #define DATA_PTR	%rsi	/* 2nd arg */
61 #define NUM_BLKS	%rdx	/* 3rd arg */
62 
63 #define SHA256CONSTANTS	%rax
64 
65 #define MSG		%xmm0
66 #define STATE0		%xmm1
67 #define STATE1		%xmm2
68 #define MSGTMP0		%xmm3
69 #define MSGTMP1		%xmm4
70 #define MSGTMP2		%xmm5
71 #define MSGTMP3		%xmm6
72 #define MSGTMP4		%xmm7
73 
74 #define SHUF_MASK	%xmm8
75 
76 #define ABEF_SAVE	%xmm9
77 #define CDGH_SAVE	%xmm10
78 
79 /*
80  * Intel SHA Extensions optimized implementation of a SHA-256 update function
81  *
82  * The function takes a pointer to the current hash values, a pointer to the
83  * input data, and a number of 64 byte blocks to process.  Once all blocks have
84  * been processed, the digest pointer is  updated with the resulting hash value.
85  * The function only processes complete blocks, there is no functionality to
86  * store partial blocks.  All message padding and hash value initialization must
87  * be done outside the update function.
88  *
89  * The indented lines in the loop are instructions related to rounds processing.
90  * The non-indented lines are instructions related to the message schedule.
91  *
92  * void sha256_ni_transform(uint32_t *digest, const void *data,
93 		uint32_t numBlocks);
94  * digest : pointer to digest
95  * data: pointer to input data
96  * numBlocks: Number of blocks to process
97  */
98 
99 .text
100 SYM_TYPED_FUNC_START(sha256_ni_transform)
101 
102 	shl		$6, NUM_BLKS		/*  convert to bytes */
103 	jz		.Ldone_hash
104 	add		DATA_PTR, NUM_BLKS	/* pointer to end of data */
105 
106 	/*
107 	 * load initial hash values
108 	 * Need to reorder these appropriately
109 	 * DCBA, HGFE -> ABEF, CDGH
110 	 */
111 	movdqu		0*16(DIGEST_PTR), STATE0
112 	movdqu		1*16(DIGEST_PTR), STATE1
113 
114 	pshufd		$0xB1, STATE0,  STATE0		/* CDAB */
115 	pshufd		$0x1B, STATE1,  STATE1		/* EFGH */
116 	movdqa		STATE0, MSGTMP4
117 	palignr		$8, STATE1,  STATE0		/* ABEF */
118 	pblendw		$0xF0, MSGTMP4, STATE1		/* CDGH */
119 
120 	movdqa		PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
121 	lea		K256(%rip), SHA256CONSTANTS
122 
123 .Lloop0:
124 	/* Save hash values for addition after rounds */
125 	movdqa		STATE0, ABEF_SAVE
126 	movdqa		STATE1, CDGH_SAVE
127 
128 	/* Rounds 0-3 */
129 	movdqu		0*16(DATA_PTR), MSG
130 	pshufb		SHUF_MASK, MSG
131 	movdqa		MSG, MSGTMP0
132 		paddd		0*16(SHA256CONSTANTS), MSG
133 		sha256rnds2	STATE0, STATE1
134 		pshufd 		$0x0E, MSG, MSG
135 		sha256rnds2	STATE1, STATE0
136 
137 	/* Rounds 4-7 */
138 	movdqu		1*16(DATA_PTR), MSG
139 	pshufb		SHUF_MASK, MSG
140 	movdqa		MSG, MSGTMP1
141 		paddd		1*16(SHA256CONSTANTS), MSG
142 		sha256rnds2	STATE0, STATE1
143 		pshufd 		$0x0E, MSG, MSG
144 		sha256rnds2	STATE1, STATE0
145 	sha256msg1	MSGTMP1, MSGTMP0
146 
147 	/* Rounds 8-11 */
148 	movdqu		2*16(DATA_PTR), MSG
149 	pshufb		SHUF_MASK, MSG
150 	movdqa		MSG, MSGTMP2
151 		paddd		2*16(SHA256CONSTANTS), MSG
152 		sha256rnds2	STATE0, STATE1
153 		pshufd 		$0x0E, MSG, MSG
154 		sha256rnds2	STATE1, STATE0
155 	sha256msg1	MSGTMP2, MSGTMP1
156 
157 	/* Rounds 12-15 */
158 	movdqu		3*16(DATA_PTR), MSG
159 	pshufb		SHUF_MASK, MSG
160 	movdqa		MSG, MSGTMP3
161 		paddd		3*16(SHA256CONSTANTS), MSG
162 		sha256rnds2	STATE0, STATE1
163 	movdqa		MSGTMP3, MSGTMP4
164 	palignr		$4, MSGTMP2, MSGTMP4
165 	paddd		MSGTMP4, MSGTMP0
166 	sha256msg2	MSGTMP3, MSGTMP0
167 		pshufd 		$0x0E, MSG, MSG
168 		sha256rnds2	STATE1, STATE0
169 	sha256msg1	MSGTMP3, MSGTMP2
170 
171 	/* Rounds 16-19 */
172 	movdqa		MSGTMP0, MSG
173 		paddd		4*16(SHA256CONSTANTS), MSG
174 		sha256rnds2	STATE0, STATE1
175 	movdqa		MSGTMP0, MSGTMP4
176 	palignr		$4, MSGTMP3, MSGTMP4
177 	paddd		MSGTMP4, MSGTMP1
178 	sha256msg2	MSGTMP0, MSGTMP1
179 		pshufd 		$0x0E, MSG, MSG
180 		sha256rnds2	STATE1, STATE0
181 	sha256msg1	MSGTMP0, MSGTMP3
182 
183 	/* Rounds 20-23 */
184 	movdqa		MSGTMP1, MSG
185 		paddd		5*16(SHA256CONSTANTS), MSG
186 		sha256rnds2	STATE0, STATE1
187 	movdqa		MSGTMP1, MSGTMP4
188 	palignr		$4, MSGTMP0, MSGTMP4
189 	paddd		MSGTMP4, MSGTMP2
190 	sha256msg2	MSGTMP1, MSGTMP2
191 		pshufd 		$0x0E, MSG, MSG
192 		sha256rnds2	STATE1, STATE0
193 	sha256msg1	MSGTMP1, MSGTMP0
194 
195 	/* Rounds 24-27 */
196 	movdqa		MSGTMP2, MSG
197 		paddd		6*16(SHA256CONSTANTS), MSG
198 		sha256rnds2	STATE0, STATE1
199 	movdqa		MSGTMP2, MSGTMP4
200 	palignr		$4, MSGTMP1, MSGTMP4
201 	paddd		MSGTMP4, MSGTMP3
202 	sha256msg2	MSGTMP2, MSGTMP3
203 		pshufd 		$0x0E, MSG, MSG
204 		sha256rnds2	STATE1, STATE0
205 	sha256msg1	MSGTMP2, MSGTMP1
206 
207 	/* Rounds 28-31 */
208 	movdqa		MSGTMP3, MSG
209 		paddd		7*16(SHA256CONSTANTS), MSG
210 		sha256rnds2	STATE0, STATE1
211 	movdqa		MSGTMP3, MSGTMP4
212 	palignr		$4, MSGTMP2, MSGTMP4
213 	paddd		MSGTMP4, MSGTMP0
214 	sha256msg2	MSGTMP3, MSGTMP0
215 		pshufd 		$0x0E, MSG, MSG
216 		sha256rnds2	STATE1, STATE0
217 	sha256msg1	MSGTMP3, MSGTMP2
218 
219 	/* Rounds 32-35 */
220 	movdqa		MSGTMP0, MSG
221 		paddd		8*16(SHA256CONSTANTS), MSG
222 		sha256rnds2	STATE0, STATE1
223 	movdqa		MSGTMP0, MSGTMP4
224 	palignr		$4, MSGTMP3, MSGTMP4
225 	paddd		MSGTMP4, MSGTMP1
226 	sha256msg2	MSGTMP0, MSGTMP1
227 		pshufd 		$0x0E, MSG, MSG
228 		sha256rnds2	STATE1, STATE0
229 	sha256msg1	MSGTMP0, MSGTMP3
230 
231 	/* Rounds 36-39 */
232 	movdqa		MSGTMP1, MSG
233 		paddd		9*16(SHA256CONSTANTS), MSG
234 		sha256rnds2	STATE0, STATE1
235 	movdqa		MSGTMP1, MSGTMP4
236 	palignr		$4, MSGTMP0, MSGTMP4
237 	paddd		MSGTMP4, MSGTMP2
238 	sha256msg2	MSGTMP1, MSGTMP2
239 		pshufd 		$0x0E, MSG, MSG
240 		sha256rnds2	STATE1, STATE0
241 	sha256msg1	MSGTMP1, MSGTMP0
242 
243 	/* Rounds 40-43 */
244 	movdqa		MSGTMP2, MSG
245 		paddd		10*16(SHA256CONSTANTS), MSG
246 		sha256rnds2	STATE0, STATE1
247 	movdqa		MSGTMP2, MSGTMP4
248 	palignr		$4, MSGTMP1, MSGTMP4
249 	paddd		MSGTMP4, MSGTMP3
250 	sha256msg2	MSGTMP2, MSGTMP3
251 		pshufd 		$0x0E, MSG, MSG
252 		sha256rnds2	STATE1, STATE0
253 	sha256msg1	MSGTMP2, MSGTMP1
254 
255 	/* Rounds 44-47 */
256 	movdqa		MSGTMP3, MSG
257 		paddd		11*16(SHA256CONSTANTS), MSG
258 		sha256rnds2	STATE0, STATE1
259 	movdqa		MSGTMP3, MSGTMP4
260 	palignr		$4, MSGTMP2, MSGTMP4
261 	paddd		MSGTMP4, MSGTMP0
262 	sha256msg2	MSGTMP3, MSGTMP0
263 		pshufd 		$0x0E, MSG, MSG
264 		sha256rnds2	STATE1, STATE0
265 	sha256msg1	MSGTMP3, MSGTMP2
266 
267 	/* Rounds 48-51 */
268 	movdqa		MSGTMP0, MSG
269 		paddd		12*16(SHA256CONSTANTS), MSG
270 		sha256rnds2	STATE0, STATE1
271 	movdqa		MSGTMP0, MSGTMP4
272 	palignr		$4, MSGTMP3, MSGTMP4
273 	paddd		MSGTMP4, MSGTMP1
274 	sha256msg2	MSGTMP0, MSGTMP1
275 		pshufd 		$0x0E, MSG, MSG
276 		sha256rnds2	STATE1, STATE0
277 	sha256msg1	MSGTMP0, MSGTMP3
278 
279 	/* Rounds 52-55 */
280 	movdqa		MSGTMP1, MSG
281 		paddd		13*16(SHA256CONSTANTS), MSG
282 		sha256rnds2	STATE0, STATE1
283 	movdqa		MSGTMP1, MSGTMP4
284 	palignr		$4, MSGTMP0, MSGTMP4
285 	paddd		MSGTMP4, MSGTMP2
286 	sha256msg2	MSGTMP1, MSGTMP2
287 		pshufd 		$0x0E, MSG, MSG
288 		sha256rnds2	STATE1, STATE0
289 
290 	/* Rounds 56-59 */
291 	movdqa		MSGTMP2, MSG
292 		paddd		14*16(SHA256CONSTANTS), MSG
293 		sha256rnds2	STATE0, STATE1
294 	movdqa		MSGTMP2, MSGTMP4
295 	palignr		$4, MSGTMP1, MSGTMP4
296 	paddd		MSGTMP4, MSGTMP3
297 	sha256msg2	MSGTMP2, MSGTMP3
298 		pshufd 		$0x0E, MSG, MSG
299 		sha256rnds2	STATE1, STATE0
300 
301 	/* Rounds 60-63 */
302 	movdqa		MSGTMP3, MSG
303 		paddd		15*16(SHA256CONSTANTS), MSG
304 		sha256rnds2	STATE0, STATE1
305 		pshufd 		$0x0E, MSG, MSG
306 		sha256rnds2	STATE1, STATE0
307 
308 	/* Add current hash values with previously saved */
309 	paddd		ABEF_SAVE, STATE0
310 	paddd		CDGH_SAVE, STATE1
311 
312 	/* Increment data pointer and loop if more to process */
313 	add		$64, DATA_PTR
314 	cmp		NUM_BLKS, DATA_PTR
315 	jne		.Lloop0
316 
317 	/* Write hash values back in the correct order */
318 	pshufd		$0x1B, STATE0,  STATE0		/* FEBA */
319 	pshufd		$0xB1, STATE1,  STATE1		/* DCHG */
320 	movdqa		STATE0, MSGTMP4
321 	pblendw		$0xF0, STATE1,  STATE0		/* DCBA */
322 	palignr		$8, MSGTMP4, STATE1		/* HGFE */
323 
324 	movdqu		STATE0, 0*16(DIGEST_PTR)
325 	movdqu		STATE1, 1*16(DIGEST_PTR)
326 
327 .Ldone_hash:
328 
329 	RET
330 SYM_FUNC_END(sha256_ni_transform)
331 
332 .section	.rodata.cst256.K256, "aM", @progbits, 256
333 .align 64
334 K256:
335 	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
336 	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
337 	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
338 	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
339 	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
340 	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
341 	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
342 	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
343 	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
344 	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
345 	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
346 	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
347 	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
348 	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
349 	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
350 	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
351 
352 .section	.rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
353 .align 16
354 PSHUFFLE_BYTE_FLIP_MASK:
355 	.octa 0x0c0d0e0f08090a0b0405060700010203
356