1/*
2 * Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the OpenSSL license (the "License").  You may not use
5 * this file except in compliance with the License.  You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10#include <openssl/opensslconf.h>
11/*-
12 * IMPLEMENTATION NOTES.
13 *
14 * As you might have noticed 32-bit hash algorithms:
15 *
16 * - permit SHA_LONG to be wider than 32-bit
17 * - optimized versions implement two transform functions: one operating
18 *   on [aligned] data in host byte order and one - on data in input
19 *   stream byte order;
20 * - share common byte-order neutral collector and padding function
21 *   implementations, ../md32_common.h;
22 *
23 * Neither of the above applies to this SHA-512 implementations. Reasons
24 * [in reverse order] are:
25 *
26 * - it's the only 64-bit hash algorithm for the moment of this writing,
27 *   there is no need for common collector/padding implementation [yet];
28 * - by supporting only one transform function [which operates on
29 *   *aligned* data in input stream byte order, big-endian in this case]
30 *   we minimize burden of maintenance in two ways: a) collector/padding
31 *   function is simpler; b) only one transform function to stare at;
32 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
33 *   apply a number of optimizations to mitigate potential performance
34 *   penalties caused by previous design decision;
35 *
36 * Caveat lector.
37 *
38 * Implementation relies on the fact that "long long" is 64-bit on
39 * both 32- and 64-bit platforms. If some compiler vendor comes up
40 * with 128-bit long long, adjustment to sha.h would be required.
41 * As this implementation relies on 64-bit integer type, it's totally
42 * inappropriate for platforms which don't support it, most notably
43 * 16-bit platforms.
44 */
45#include <stdlib.h>
46#include <string.h>
47
48#include <openssl/crypto.h>
49#include <openssl/sha.h>
50#include <openssl/opensslv.h>
51
52#include "internal/cryptlib.h"
53#include "crypto/sha.h"
54
55#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56    defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57    defined(__s390__) || defined(__s390x__) || \
58    defined(__aarch64__) || \
59    defined(SHA512_ASM)
60# define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
61#endif
62
63int sha512_224_init(SHA512_CTX *c)
64{
65    c->h[0] = U64(0x8c3d37c819544da2);
66    c->h[1] = U64(0x73e1996689dcd4d6);
67    c->h[2] = U64(0x1dfab7ae32ff9c82);
68    c->h[3] = U64(0x679dd514582f9fcf);
69    c->h[4] = U64(0x0f6d2b697bd44da8);
70    c->h[5] = U64(0x77e36f7304c48942);
71    c->h[6] = U64(0x3f9d85a86a1d36c8);
72    c->h[7] = U64(0x1112e6ad91d692a1);
73
74    c->Nl = 0;
75    c->Nh = 0;
76    c->num = 0;
77    c->md_len = SHA224_DIGEST_LENGTH;
78    return 1;
79}
80
81int sha512_256_init(SHA512_CTX *c)
82{
83    c->h[0] = U64(0x22312194fc2bf72c);
84    c->h[1] = U64(0x9f555fa3c84c64c2);
85    c->h[2] = U64(0x2393b86b6f53b151);
86    c->h[3] = U64(0x963877195940eabd);
87    c->h[4] = U64(0x96283ee2a88effe3);
88    c->h[5] = U64(0xbe5e1e2553863992);
89    c->h[6] = U64(0x2b0199fc2c85b8aa);
90    c->h[7] = U64(0x0eb72ddc81c52ca2);
91
92    c->Nl = 0;
93    c->Nh = 0;
94    c->num = 0;
95    c->md_len = SHA256_DIGEST_LENGTH;
96    return 1;
97}
98
99int SHA384_Init(SHA512_CTX *c)
100{
101    c->h[0] = U64(0xcbbb9d5dc1059ed8);
102    c->h[1] = U64(0x629a292a367cd507);
103    c->h[2] = U64(0x9159015a3070dd17);
104    c->h[3] = U64(0x152fecd8f70e5939);
105    c->h[4] = U64(0x67332667ffc00b31);
106    c->h[5] = U64(0x8eb44a8768581511);
107    c->h[6] = U64(0xdb0c2e0d64f98fa7);
108    c->h[7] = U64(0x47b5481dbefa4fa4);
109
110    c->Nl = 0;
111    c->Nh = 0;
112    c->num = 0;
113    c->md_len = SHA384_DIGEST_LENGTH;
114    return 1;
115}
116
117int SHA512_Init(SHA512_CTX *c)
118{
119    c->h[0] = U64(0x6a09e667f3bcc908);
120    c->h[1] = U64(0xbb67ae8584caa73b);
121    c->h[2] = U64(0x3c6ef372fe94f82b);
122    c->h[3] = U64(0xa54ff53a5f1d36f1);
123    c->h[4] = U64(0x510e527fade682d1);
124    c->h[5] = U64(0x9b05688c2b3e6c1f);
125    c->h[6] = U64(0x1f83d9abfb41bd6b);
126    c->h[7] = U64(0x5be0cd19137e2179);
127
128    c->Nl = 0;
129    c->Nh = 0;
130    c->num = 0;
131    c->md_len = SHA512_DIGEST_LENGTH;
132    return 1;
133}
134
135#ifndef SHA512_ASM
136static
137#endif
138void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
139
140int SHA512_Final(unsigned char *md, SHA512_CTX *c)
141{
142    unsigned char *p = (unsigned char *)c->u.p;
143    size_t n = c->num;
144
145    p[n] = 0x80;                /* There always is a room for one */
146    n++;
147    if (n > (sizeof(c->u) - 16)) {
148        memset(p + n, 0, sizeof(c->u) - n);
149        n = 0;
150        sha512_block_data_order(c, p, 1);
151    }
152
153    memset(p + n, 0, sizeof(c->u) - 16 - n);
154#ifdef  B_ENDIAN
155    c->u.d[SHA_LBLOCK - 2] = c->Nh;
156    c->u.d[SHA_LBLOCK - 1] = c->Nl;
157#else
158    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
159    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
160    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
161    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
162    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
163    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
164    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
165    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
166    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
167    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
168    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
169    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
170    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
171    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
172    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
173    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
174#endif
175
176    sha512_block_data_order(c, p, 1);
177
178    if (md == 0)
179        return 0;
180
181    switch (c->md_len) {
182    /* Let compiler decide if it's appropriate to unroll... */
183    case SHA224_DIGEST_LENGTH:
184        for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
185            SHA_LONG64 t = c->h[n];
186
187            *(md++) = (unsigned char)(t >> 56);
188            *(md++) = (unsigned char)(t >> 48);
189            *(md++) = (unsigned char)(t >> 40);
190            *(md++) = (unsigned char)(t >> 32);
191            *(md++) = (unsigned char)(t >> 24);
192            *(md++) = (unsigned char)(t >> 16);
193            *(md++) = (unsigned char)(t >> 8);
194            *(md++) = (unsigned char)(t);
195        }
196        /*
197         * For 224 bits, there are four bytes left over that have to be
198         * processed separately.
199         */
200        {
201            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
202
203            *(md++) = (unsigned char)(t >> 56);
204            *(md++) = (unsigned char)(t >> 48);
205            *(md++) = (unsigned char)(t >> 40);
206            *(md++) = (unsigned char)(t >> 32);
207        }
208        break;
209    case SHA256_DIGEST_LENGTH:
210        for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
211            SHA_LONG64 t = c->h[n];
212
213            *(md++) = (unsigned char)(t >> 56);
214            *(md++) = (unsigned char)(t >> 48);
215            *(md++) = (unsigned char)(t >> 40);
216            *(md++) = (unsigned char)(t >> 32);
217            *(md++) = (unsigned char)(t >> 24);
218            *(md++) = (unsigned char)(t >> 16);
219            *(md++) = (unsigned char)(t >> 8);
220            *(md++) = (unsigned char)(t);
221        }
222        break;
223    case SHA384_DIGEST_LENGTH:
224        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
225            SHA_LONG64 t = c->h[n];
226
227            *(md++) = (unsigned char)(t >> 56);
228            *(md++) = (unsigned char)(t >> 48);
229            *(md++) = (unsigned char)(t >> 40);
230            *(md++) = (unsigned char)(t >> 32);
231            *(md++) = (unsigned char)(t >> 24);
232            *(md++) = (unsigned char)(t >> 16);
233            *(md++) = (unsigned char)(t >> 8);
234            *(md++) = (unsigned char)(t);
235        }
236        break;
237    case SHA512_DIGEST_LENGTH:
238        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
239            SHA_LONG64 t = c->h[n];
240
241            *(md++) = (unsigned char)(t >> 56);
242            *(md++) = (unsigned char)(t >> 48);
243            *(md++) = (unsigned char)(t >> 40);
244            *(md++) = (unsigned char)(t >> 32);
245            *(md++) = (unsigned char)(t >> 24);
246            *(md++) = (unsigned char)(t >> 16);
247            *(md++) = (unsigned char)(t >> 8);
248            *(md++) = (unsigned char)(t);
249        }
250        break;
251    /* ... as well as make sure md_len is not abused. */
252    default:
253        return 0;
254    }
255
256    return 1;
257}
258
259int SHA384_Final(unsigned char *md, SHA512_CTX *c)
260{
261    return SHA512_Final(md, c);
262}
263
264int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
265{
266    SHA_LONG64 l;
267    unsigned char *p = c->u.p;
268    const unsigned char *data = (const unsigned char *)_data;
269
270    if (len == 0)
271        return 1;
272
273    l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
274    if (l < c->Nl)
275        c->Nh++;
276    if (sizeof(len) >= 8)
277        c->Nh += (((SHA_LONG64) len) >> 61);
278    c->Nl = l;
279
280    if (c->num != 0) {
281        size_t n = sizeof(c->u) - c->num;
282
283        if (len < n) {
284            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
285            return 1;
286        } else {
287            memcpy(p + c->num, data, n), c->num = 0;
288            len -= n, data += n;
289            sha512_block_data_order(c, p, 1);
290        }
291    }
292
293    if (len >= sizeof(c->u)) {
294#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
295        if ((size_t)data % sizeof(c->u.d[0]) != 0)
296            while (len >= sizeof(c->u))
297                memcpy(p, data, sizeof(c->u)),
298                sha512_block_data_order(c, p, 1),
299                len -= sizeof(c->u), data += sizeof(c->u);
300        else
301#endif
302            sha512_block_data_order(c, data, len / sizeof(c->u)),
303            data += len, len %= sizeof(c->u), data -= len;
304    }
305
306    if (len != 0)
307        memcpy(p, data, len), c->num = (int)len;
308
309    return 1;
310}
311
312int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
313{
314    return SHA512_Update(c, data, len);
315}
316
317void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
318{
319#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
320    if ((size_t)data % sizeof(c->u.d[0]) != 0)
321        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
322#endif
323    sha512_block_data_order(c, data, 1);
324}
325
326unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
327{
328    SHA512_CTX c;
329    static unsigned char m[SHA384_DIGEST_LENGTH];
330
331    if (md == NULL)
332        md = m;
333    SHA384_Init(&c);
334    SHA512_Update(&c, d, n);
335    SHA512_Final(md, &c);
336    OPENSSL_cleanse(&c, sizeof(c));
337    return md;
338}
339
340unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
341{
342    SHA512_CTX c;
343    static unsigned char m[SHA512_DIGEST_LENGTH];
344
345    if (md == NULL)
346        md = m;
347    SHA512_Init(&c);
348    SHA512_Update(&c, d, n);
349    SHA512_Final(md, &c);
350    OPENSSL_cleanse(&c, sizeof(c));
351    return md;
352}
353
354#ifndef SHA512_ASM
355static const SHA_LONG64 K512[80] = {
356    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
357    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
358    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
359    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
360    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
361    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
362    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
363    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
364    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
365    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
366    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
367    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
368    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
369    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
370    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
371    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
372    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
373    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
374    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
375    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
376    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
377    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
378    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
379    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
380    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
381    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
382    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
383    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
384    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
385    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
386    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
387    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
388    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
389    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
390    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
391    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
392    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
393    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
394    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
395    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
396};
397
398# ifndef PEDANTIC
399#  if defined(__GNUC__) && __GNUC__>=2 && \
400      !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
401#   if defined(__x86_64) || defined(__x86_64__)
402#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
403                                asm ("rorq %1,%0"       \
404                                : "=r"(ret)             \
405                                : "J"(n),"0"(a)         \
406                                : "cc"); ret;           })
407#    if !defined(B_ENDIAN)
408#     define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
409                                asm ("bswapq    %0"             \
410                                : "=r"(ret)                     \
411                                : "0"(ret)); ret;               })
412#    endif
413#   elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
414#    if defined(I386_ONLY)
415#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
416                          unsigned int hi=p[0],lo=p[1];          \
417                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
418                                    "roll $16,%%eax; roll $16,%%edx; "\
419                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
420                                : "=a"(lo),"=d"(hi)             \
421                                : "0"(lo),"1"(hi) : "cc");      \
422                                ((SHA_LONG64)hi)<<32|lo;        })
423#    else
424#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
425                          unsigned int hi=p[0],lo=p[1];         \
426                                asm ("bswapl %0; bswapl %1;"    \
427                                : "=r"(lo),"=r"(hi)             \
428                                : "0"(lo),"1"(hi));             \
429                                ((SHA_LONG64)hi)<<32|lo;        })
430#    endif
431#   elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
432#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
433                                asm ("rotrdi %0,%1,%2"  \
434                                : "=r"(ret)             \
435                                : "r"(a),"K"(n)); ret;  })
436#   elif defined(__aarch64__)
437#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
438                                asm ("ror %0,%1,%2"     \
439                                : "=r"(ret)             \
440                                : "r"(a),"I"(n)); ret;  })
441#    if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
442        __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
443#     define PULL64(x)   ({ SHA_LONG64 ret;                     \
444                                asm ("rev       %0,%1"          \
445                                : "=r"(ret)                     \
446                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
447#    endif
448#   endif
449#  elif defined(_MSC_VER)
450#   if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
451#    pragma intrinsic(_rotr64)
452#    define ROTR(a,n)    _rotr64((a),n)
453#   endif
454#   if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
455       !defined(OPENSSL_NO_INLINE_ASM)
456#    if defined(I386_ONLY)
457static SHA_LONG64 __fastcall __pull64be(const void *x)
458{
459    _asm mov  edx,[ecx + 0]
460    _asm mov  eax,[ecx + 4]
461    _asm xchg dh, dl
462    _asm xchg ah, al
463    _asm rol  edx, 16
464    _asm rol  eax, 16
465    _asm xchg dh, dl
466    _asm xchg ah, al
467}
468#    else
469static SHA_LONG64 __fastcall __pull64be(const void *x)
470{
471    _asm mov   edx,[ecx + 0]
472    _asm mov   eax,[ecx + 4]
473    _asm bswap edx
474    _asm bswap eax
475}
476#    endif
477#    define PULL64(x) __pull64be(&(x))
478#   endif
479#  endif
480# endif
481# ifndef PULL64
482#  define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
483#  define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
484# endif
485# ifndef ROTR
486#  define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
487# endif
488# define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
489# define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
490# define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
491# define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
492# define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
493# define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
494
495# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
496/*
497 * This code should give better results on 32-bit CPU with less than
498 * ~24 registers, both size and performance wise...
499 */
500
501static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
502                                    size_t num)
503{
504    const SHA_LONG64 *W = in;
505    SHA_LONG64 A, E, T;
506    SHA_LONG64 X[9 + 80], *F;
507    int i;
508
509    while (num--) {
510
511        F = X + 80;
512        A = ctx->h[0];
513        F[1] = ctx->h[1];
514        F[2] = ctx->h[2];
515        F[3] = ctx->h[3];
516        E = ctx->h[4];
517        F[5] = ctx->h[5];
518        F[6] = ctx->h[6];
519        F[7] = ctx->h[7];
520
521        for (i = 0; i < 16; i++, F--) {
522#  ifdef B_ENDIAN
523            T = W[i];
524#  else
525            T = PULL64(W[i]);
526#  endif
527            F[0] = A;
528            F[4] = E;
529            F[8] = T;
530            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
531            E = F[3] + T;
532            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
533        }
534
535        for (; i < 80; i++, F--) {
536            T = sigma0(F[8 + 16 - 1]);
537            T += sigma1(F[8 + 16 - 14]);
538            T += F[8 + 16] + F[8 + 16 - 9];
539
540            F[0] = A;
541            F[4] = E;
542            F[8] = T;
543            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
544            E = F[3] + T;
545            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
546        }
547
548        ctx->h[0] += A;
549        ctx->h[1] += F[1];
550        ctx->h[2] += F[2];
551        ctx->h[3] += F[3];
552        ctx->h[4] += E;
553        ctx->h[5] += F[5];
554        ctx->h[6] += F[6];
555        ctx->h[7] += F[7];
556
557        W += SHA_LBLOCK;
558    }
559}
560
561# elif defined(OPENSSL_SMALL_FOOTPRINT)
562
563static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
564                                    size_t num)
565{
566    const SHA_LONG64 *W = in;
567    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
568    SHA_LONG64 X[16];
569    int i;
570
571    while (num--) {
572
573        a = ctx->h[0];
574        b = ctx->h[1];
575        c = ctx->h[2];
576        d = ctx->h[3];
577        e = ctx->h[4];
578        f = ctx->h[5];
579        g = ctx->h[6];
580        h = ctx->h[7];
581
582        for (i = 0; i < 16; i++) {
583#  ifdef B_ENDIAN
584            T1 = X[i] = W[i];
585#  else
586            T1 = X[i] = PULL64(W[i]);
587#  endif
588            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
589            T2 = Sigma0(a) + Maj(a, b, c);
590            h = g;
591            g = f;
592            f = e;
593            e = d + T1;
594            d = c;
595            c = b;
596            b = a;
597            a = T1 + T2;
598        }
599
600        for (; i < 80; i++) {
601            s0 = X[(i + 1) & 0x0f];
602            s0 = sigma0(s0);
603            s1 = X[(i + 14) & 0x0f];
604            s1 = sigma1(s1);
605
606            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
607            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
608            T2 = Sigma0(a) + Maj(a, b, c);
609            h = g;
610            g = f;
611            f = e;
612            e = d + T1;
613            d = c;
614            c = b;
615            b = a;
616            a = T1 + T2;
617        }
618
619        ctx->h[0] += a;
620        ctx->h[1] += b;
621        ctx->h[2] += c;
622        ctx->h[3] += d;
623        ctx->h[4] += e;
624        ctx->h[5] += f;
625        ctx->h[6] += g;
626        ctx->h[7] += h;
627
628        W += SHA_LBLOCK;
629    }
630}
631
632# else
633#  define ROUND_00_15(i,a,b,c,d,e,f,g,h)        do {    \
634        T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
635        h = Sigma0(a) + Maj(a,b,c);                     \
636        d += T1;        h += T1;                        } while (0)
637
638#  define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)    do {    \
639        s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
640        s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
641        T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
642        ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
643
644static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
645                                    size_t num)
646{
647    const SHA_LONG64 *W = in;
648    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
649    SHA_LONG64 X[16];
650    int i;
651
652    while (num--) {
653
654        a = ctx->h[0];
655        b = ctx->h[1];
656        c = ctx->h[2];
657        d = ctx->h[3];
658        e = ctx->h[4];
659        f = ctx->h[5];
660        g = ctx->h[6];
661        h = ctx->h[7];
662
663#  ifdef B_ENDIAN
664        T1 = X[0] = W[0];
665        ROUND_00_15(0, a, b, c, d, e, f, g, h);
666        T1 = X[1] = W[1];
667        ROUND_00_15(1, h, a, b, c, d, e, f, g);
668        T1 = X[2] = W[2];
669        ROUND_00_15(2, g, h, a, b, c, d, e, f);
670        T1 = X[3] = W[3];
671        ROUND_00_15(3, f, g, h, a, b, c, d, e);
672        T1 = X[4] = W[4];
673        ROUND_00_15(4, e, f, g, h, a, b, c, d);
674        T1 = X[5] = W[5];
675        ROUND_00_15(5, d, e, f, g, h, a, b, c);
676        T1 = X[6] = W[6];
677        ROUND_00_15(6, c, d, e, f, g, h, a, b);
678        T1 = X[7] = W[7];
679        ROUND_00_15(7, b, c, d, e, f, g, h, a);
680        T1 = X[8] = W[8];
681        ROUND_00_15(8, a, b, c, d, e, f, g, h);
682        T1 = X[9] = W[9];
683        ROUND_00_15(9, h, a, b, c, d, e, f, g);
684        T1 = X[10] = W[10];
685        ROUND_00_15(10, g, h, a, b, c, d, e, f);
686        T1 = X[11] = W[11];
687        ROUND_00_15(11, f, g, h, a, b, c, d, e);
688        T1 = X[12] = W[12];
689        ROUND_00_15(12, e, f, g, h, a, b, c, d);
690        T1 = X[13] = W[13];
691        ROUND_00_15(13, d, e, f, g, h, a, b, c);
692        T1 = X[14] = W[14];
693        ROUND_00_15(14, c, d, e, f, g, h, a, b);
694        T1 = X[15] = W[15];
695        ROUND_00_15(15, b, c, d, e, f, g, h, a);
696#  else
697        T1 = X[0] = PULL64(W[0]);
698        ROUND_00_15(0, a, b, c, d, e, f, g, h);
699        T1 = X[1] = PULL64(W[1]);
700        ROUND_00_15(1, h, a, b, c, d, e, f, g);
701        T1 = X[2] = PULL64(W[2]);
702        ROUND_00_15(2, g, h, a, b, c, d, e, f);
703        T1 = X[3] = PULL64(W[3]);
704        ROUND_00_15(3, f, g, h, a, b, c, d, e);
705        T1 = X[4] = PULL64(W[4]);
706        ROUND_00_15(4, e, f, g, h, a, b, c, d);
707        T1 = X[5] = PULL64(W[5]);
708        ROUND_00_15(5, d, e, f, g, h, a, b, c);
709        T1 = X[6] = PULL64(W[6]);
710        ROUND_00_15(6, c, d, e, f, g, h, a, b);
711        T1 = X[7] = PULL64(W[7]);
712        ROUND_00_15(7, b, c, d, e, f, g, h, a);
713        T1 = X[8] = PULL64(W[8]);
714        ROUND_00_15(8, a, b, c, d, e, f, g, h);
715        T1 = X[9] = PULL64(W[9]);
716        ROUND_00_15(9, h, a, b, c, d, e, f, g);
717        T1 = X[10] = PULL64(W[10]);
718        ROUND_00_15(10, g, h, a, b, c, d, e, f);
719        T1 = X[11] = PULL64(W[11]);
720        ROUND_00_15(11, f, g, h, a, b, c, d, e);
721        T1 = X[12] = PULL64(W[12]);
722        ROUND_00_15(12, e, f, g, h, a, b, c, d);
723        T1 = X[13] = PULL64(W[13]);
724        ROUND_00_15(13, d, e, f, g, h, a, b, c);
725        T1 = X[14] = PULL64(W[14]);
726        ROUND_00_15(14, c, d, e, f, g, h, a, b);
727        T1 = X[15] = PULL64(W[15]);
728        ROUND_00_15(15, b, c, d, e, f, g, h, a);
729#  endif
730
731        for (i = 16; i < 80; i += 16) {
732            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
733            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
734            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
735            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
736            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
737            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
738            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
739            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
740            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
741            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
742            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
743            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
744            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
745            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
746            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
747            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
748        }
749
750        ctx->h[0] += a;
751        ctx->h[1] += b;
752        ctx->h[2] += c;
753        ctx->h[3] += d;
754        ctx->h[4] += e;
755        ctx->h[5] += f;
756        ctx->h[6] += g;
757        ctx->h[7] += h;
758
759        W += SHA_LBLOCK;
760    }
761}
762
763# endif
764
765#endif                         /* SHA512_ASM */
766