1 /* Sha256.c -- SHA-256 Hash
2 2023-04-02 : Igor Pavlov : Public domain
3 This code is based on public domain code from Wei Dai's Crypto++ library. */
4 
5 #include "Precomp.h"
6 
7 #include <string.h>
8 
9 #include "CpuArch.h"
10 #include "RotateDefs.h"
11 #include "Sha256.h"
12 
13 #if defined(_MSC_VER) && (_MSC_VER < 1900)
14 // #define USE_MY_MM
15 #endif
16 
17 #ifdef MY_CPU_X86_OR_AMD64
18   #ifdef _MSC_VER
19     #if _MSC_VER >= 1200
20       #define Z7_COMPILER_SHA256_SUPPORTED
21     #endif
22   #elif defined(__clang__)
23     #if (__clang_major__ >= 8) // fix that check
24       #define Z7_COMPILER_SHA256_SUPPORTED
25     #endif
26   #elif defined(__GNUC__)
27     #if (__GNUC__ >= 8) // fix that check
28       #define Z7_COMPILER_SHA256_SUPPORTED
29     #endif
30   #elif defined(__INTEL_COMPILER)
31     #if (__INTEL_COMPILER >= 1800) // fix that check
32       #define Z7_COMPILER_SHA256_SUPPORTED
33     #endif
34   #endif
35 #elif defined(MY_CPU_ARM_OR_ARM64)
36   #ifdef _MSC_VER
37     #if _MSC_VER >= 1910
38       #define Z7_COMPILER_SHA256_SUPPORTED
39     #endif
40   #elif defined(__clang__)
41     #if (__clang_major__ >= 8) // fix that check
42       #define Z7_COMPILER_SHA256_SUPPORTED
43     #endif
44   #elif defined(__GNUC__)
45     #if (__GNUC__ >= 6) // fix that check
46       #define Z7_COMPILER_SHA256_SUPPORTED
47     #endif
48   #endif
49 #endif
50 
51 void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
52 
53 #ifdef Z7_COMPILER_SHA256_SUPPORTED
54   void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
55 
56   static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks;
57   static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS_HW;
58 
59   #define SHA256_UPDATE_BLOCKS(p) p->func_UpdateBlocks
60 #else
61   #define SHA256_UPDATE_BLOCKS(p) Sha256_UpdateBlocks
62 #endif
63 
64 
Sha256_SetFunction(CSha256 *p, unsigned algo)65 BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
66 {
67   SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks;
68 
69   #ifdef Z7_COMPILER_SHA256_SUPPORTED
70     if (algo != SHA256_ALGO_SW)
71     {
72       if (algo == SHA256_ALGO_DEFAULT)
73         func = g_SHA256_FUNC_UPDATE_BLOCKS;
74       else
75       {
76         if (algo != SHA256_ALGO_HW)
77           return False;
78         func = g_SHA256_FUNC_UPDATE_BLOCKS_HW;
79         if (!func)
80           return False;
81       }
82     }
83   #else
84     if (algo > 1)
85       return False;
86   #endif
87 
88   p->func_UpdateBlocks = func;
89   return True;
90 }
91 
92 
93 /* define it for speed optimization */
94 
95 #ifdef Z7_SFX
96   #define STEP_PRE 1
97   #define STEP_MAIN 1
98 #else
99   #define STEP_PRE 2
100   #define STEP_MAIN 4
101   // #define Z7_SHA256_UNROLL
102 #endif
103 
104 #undef Z7_SHA256_BIG_W
105 #if STEP_MAIN != 16
106   #define Z7_SHA256_BIG_W
107 #endif
108 
109 
110 
111 
Sha256_InitState(CSha256 *p)112 void Sha256_InitState(CSha256 *p)
113 {
114   p->count = 0;
115   p->state[0] = 0x6a09e667;
116   p->state[1] = 0xbb67ae85;
117   p->state[2] = 0x3c6ef372;
118   p->state[3] = 0xa54ff53a;
119   p->state[4] = 0x510e527f;
120   p->state[5] = 0x9b05688c;
121   p->state[6] = 0x1f83d9ab;
122   p->state[7] = 0x5be0cd19;
123 }
124 
Sha256_Init(CSha256 *p)125 void Sha256_Init(CSha256 *p)
126 {
127   p->func_UpdateBlocks =
128   #ifdef Z7_COMPILER_SHA256_SUPPORTED
129       g_SHA256_FUNC_UPDATE_BLOCKS;
130   #else
131       NULL;
132   #endif
133   Sha256_InitState(p);
134 }
135 
136 #define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x, 22))
137 #define S1(x) (rotrFixed(x, 6) ^ rotrFixed(x,11) ^ rotrFixed(x, 25))
138 #define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3))
139 #define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >> 10))
140 
141 #define Ch(x,y,z) (z^(x&(y^z)))
142 #define Maj(x,y,z) ((x&y)|(z&(x|y)))
143 
144 
145 #define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe32(data + ((size_t)(j) + i) * 4))
146 
147 #define blk2_main(j, i)  s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15))
148 
149 #ifdef Z7_SHA256_BIG_W
150     // we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned.
151     #define w(j, i)     W[(size_t)(j) + i]
152     #define blk2(j, i)  (w(j, i) = w(j, (i)-16) + blk2_main(j, i))
153 #else
154     #if STEP_MAIN == 16
155         #define w(j, i)  W[(i) & 15]
156     #else
157         #define w(j, i)  W[((size_t)(j) + (i)) & 15]
158     #endif
159     #define blk2(j, i)  (w(j, i) += blk2_main(j, i))
160 #endif
161 
162 #define W_MAIN(i)  blk2(j, i)
163 
164 
165 #define T1(wx, i) \
166     tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
167     h = g; \
168     g = f; \
169     f = e; \
170     e = d + tmp; \
171     tmp += S0(a) + Maj(a, b, c); \
172     d = c; \
173     c = b; \
174     b = a; \
175     a = tmp; \
176 
177 #define R1_PRE(i)  T1( W_PRE, i)
178 #define R1_MAIN(i) T1( W_MAIN, i)
179 
180 #if (!defined(Z7_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
181 #define R2_MAIN(i) \
182     R1_MAIN(i) \
183     R1_MAIN(i + 1) \
184 
185 #endif
186 
187 
188 
189 #if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8
190 
191 #define T4( a,b,c,d,e,f,g,h, wx, i) \
192     h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
193     tmp = h; \
194     h += d; \
195     d = tmp + S0(a) + Maj(a, b, c); \
196 
197 #define R4( wx, i) \
198     T4 ( a,b,c,d,e,f,g,h, wx, (i  )); \
199     T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \
200     T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \
201     T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \
202 
203 #define R4_PRE(i)  R4( W_PRE, i)
204 #define R4_MAIN(i) R4( W_MAIN, i)
205 
206 
207 #define T8( a,b,c,d,e,f,g,h, wx, i) \
208     h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
209     d += h; \
210     h += S0(a) + Maj(a, b, c); \
211 
212 #define R8( wx, i) \
213     T8 ( a,b,c,d,e,f,g,h, wx, i  ); \
214     T8 ( h,a,b,c,d,e,f,g, wx, i+1); \
215     T8 ( g,h,a,b,c,d,e,f, wx, i+2); \
216     T8 ( f,g,h,a,b,c,d,e, wx, i+3); \
217     T8 ( e,f,g,h,a,b,c,d, wx, i+4); \
218     T8 ( d,e,f,g,h,a,b,c, wx, i+5); \
219     T8 ( c,d,e,f,g,h,a,b, wx, i+6); \
220     T8 ( b,c,d,e,f,g,h,a, wx, i+7); \
221 
222 #define R8_PRE(i)  R8( W_PRE, i)
223 #define R8_MAIN(i) R8( W_MAIN, i)
224 
225 #endif
226 
227 void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
228 
229 // static
230 extern MY_ALIGN(64)
231 const UInt32 SHA256_K_ARRAY[64];
232 
233 MY_ALIGN(64)
234 const UInt32 SHA256_K_ARRAY[64] = {
235   0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
236   0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
237   0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
238   0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
239   0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
240   0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
241   0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
242   0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
243   0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
244   0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
245   0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
246   0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
247   0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
248   0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
249   0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
250   0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
251 };
252 
253 #define K SHA256_K_ARRAY
254 
255 
256 Z7_NO_INLINE
Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks)257 void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks)
258 {
259   UInt32 W
260   #ifdef Z7_SHA256_BIG_W
261       [64];
262   #else
263       [16];
264   #endif
265 
266   unsigned j;
267 
268   UInt32 a,b,c,d,e,f,g,h;
269 
270   #if !defined(Z7_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
271   UInt32 tmp;
272   #endif
273 
274   a = state[0];
275   b = state[1];
276   c = state[2];
277   d = state[3];
278   e = state[4];
279   f = state[5];
280   g = state[6];
281   h = state[7];
282 
283   while (numBlocks)
284   {
285 
286   for (j = 0; j < 16; j += STEP_PRE)
287   {
288     #if STEP_PRE > 4
289 
290       #if STEP_PRE < 8
291       R4_PRE(0);
292       #else
293       R8_PRE(0);
294       #if STEP_PRE == 16
295       R8_PRE(8);
296       #endif
297       #endif
298 
299     #else
300 
301       R1_PRE(0)
302       #if STEP_PRE >= 2
303       R1_PRE(1)
304       #if STEP_PRE >= 4
305       R1_PRE(2)
306       R1_PRE(3)
307       #endif
308       #endif
309 
310     #endif
311   }
312 
313   for (j = 16; j < 64; j += STEP_MAIN)
314   {
315     #if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8
316 
317       #if STEP_MAIN < 8
318       R4_MAIN(0)
319       #else
320       R8_MAIN(0)
321       #if STEP_MAIN == 16
322       R8_MAIN(8)
323       #endif
324       #endif
325 
326     #else
327 
328       R1_MAIN(0)
329       #if STEP_MAIN >= 2
330       R1_MAIN(1)
331       #if STEP_MAIN >= 4
332       R2_MAIN(2)
333       #if STEP_MAIN >= 8
334       R2_MAIN(4)
335       R2_MAIN(6)
336       #if STEP_MAIN >= 16
337       R2_MAIN(8)
338       R2_MAIN(10)
339       R2_MAIN(12)
340       R2_MAIN(14)
341       #endif
342       #endif
343       #endif
344       #endif
345     #endif
346   }
347 
348   a += state[0]; state[0] = a;
349   b += state[1]; state[1] = b;
350   c += state[2]; state[2] = c;
351   d += state[3]; state[3] = d;
352   e += state[4]; state[4] = e;
353   f += state[5]; state[5] = f;
354   g += state[6]; state[6] = g;
355   h += state[7]; state[7] = h;
356 
357   data += 64;
358   numBlocks--;
359   }
360 
361   /* Wipe variables */
362   /* memset(W, 0, sizeof(W)); */
363 }
364 
365 #undef S0
366 #undef S1
367 #undef s0
368 #undef s1
369 #undef K
370 
371 #define Sha256_UpdateBlock(p) SHA256_UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
372 
Sha256_Update(CSha256 *p, const Byte *data, size_t size)373 void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
374 {
375   if (size == 0)
376     return;
377 
378   {
379     unsigned pos = (unsigned)p->count & 0x3F;
380     unsigned num;
381 
382     p->count += size;
383 
384     num = 64 - pos;
385     if (num > size)
386     {
387       memcpy(p->buffer + pos, data, size);
388       return;
389     }
390 
391     if (pos != 0)
392     {
393       size -= num;
394       memcpy(p->buffer + pos, data, num);
395       data += num;
396       Sha256_UpdateBlock(p);
397     }
398   }
399   {
400     size_t numBlocks = size >> 6;
401     SHA256_UPDATE_BLOCKS(p)(p->state, data, numBlocks);
402     size &= 0x3F;
403     if (size == 0)
404       return;
405     data += (numBlocks << 6);
406     memcpy(p->buffer, data, size);
407   }
408 }
409 
410 
Sha256_Final(CSha256 *p, Byte *digest)411 void Sha256_Final(CSha256 *p, Byte *digest)
412 {
413   unsigned pos = (unsigned)p->count & 0x3F;
414   unsigned i;
415 
416   p->buffer[pos++] = 0x80;
417 
418   if (pos > (64 - 8))
419   {
420     while (pos != 64) { p->buffer[pos++] = 0; }
421     // memset(&p->buf.buffer[pos], 0, 64 - pos);
422     Sha256_UpdateBlock(p);
423     pos = 0;
424   }
425 
426   /*
427   if (pos & 3)
428   {
429     p->buffer[pos] = 0;
430     p->buffer[pos + 1] = 0;
431     p->buffer[pos + 2] = 0;
432     pos += 3;
433     pos &= ~3;
434   }
435   {
436     for (; pos < 64 - 8; pos += 4)
437       *(UInt32 *)(&p->buffer[pos]) = 0;
438   }
439   */
440 
441   memset(&p->buffer[pos], 0, (64 - 8) - pos);
442 
443   {
444     UInt64 numBits = (p->count << 3);
445     SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32))
446     SetBe32(p->buffer + 64 - 4, (UInt32)(numBits))
447   }
448 
449   Sha256_UpdateBlock(p);
450 
451   for (i = 0; i < 8; i += 2)
452   {
453     UInt32 v0 = p->state[i];
454     UInt32 v1 = p->state[(size_t)i + 1];
455     SetBe32(digest    , v0)
456     SetBe32(digest + 4, v1)
457     digest += 8;
458   }
459 
460   Sha256_InitState(p);
461 }
462 
463 
Sha256Prepare(void)464 void Sha256Prepare(void)
465 {
466   #ifdef Z7_COMPILER_SHA256_SUPPORTED
467   SHA256_FUNC_UPDATE_BLOCKS f, f_hw;
468   f = Sha256_UpdateBlocks;
469   f_hw = NULL;
470   #ifdef MY_CPU_X86_OR_AMD64
471   #ifndef USE_MY_MM
472   if (CPU_IsSupported_SHA()
473       && CPU_IsSupported_SSSE3()
474       // && CPU_IsSupported_SSE41()
475       )
476   #endif
477   #else
478   if (CPU_IsSupported_SHA2())
479   #endif
480   {
481     // printf("\n========== HW SHA256 ======== \n");
482     f = f_hw = Sha256_UpdateBlocks_HW;
483   }
484   g_SHA256_FUNC_UPDATE_BLOCKS    = f;
485   g_SHA256_FUNC_UPDATE_BLOCKS_HW = f_hw;
486   #endif
487 }
488 
489 #undef S0
490 #undef S1
491 #undef s0
492 #undef s1
493 #undef Ch
494 #undef Maj
495 #undef W_MAIN
496 #undef W_PRE
497 #undef w
498 #undef blk2_main
499 #undef blk2
500 #undef T1
501 #undef T4
502 #undef T8
503 #undef R1_PRE
504 #undef R1_MAIN
505 #undef R2_MAIN
506 #undef R4
507 #undef R4_PRE
508 #undef R4_MAIN
509 #undef R8
510 #undef R8_PRE
511 #undef R8_MAIN
512 #undef STEP_PRE
513 #undef STEP_MAIN
514 #undef Z7_SHA256_BIG_W
515 #undef Z7_SHA256_UNROLL
516 #undef Z7_COMPILER_SHA256_SUPPORTED
517