17db96d56Sopenharmony_ci/*
27db96d56Sopenharmony_ci   BLAKE2 reference source code package - optimized C implementations
37db96d56Sopenharmony_ci
47db96d56Sopenharmony_ci   Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
57db96d56Sopenharmony_ci
67db96d56Sopenharmony_ci   To the extent possible under law, the author(s) have dedicated all copyright
77db96d56Sopenharmony_ci   and related and neighboring rights to this software to the public domain
87db96d56Sopenharmony_ci   worldwide. This software is distributed without any warranty.
97db96d56Sopenharmony_ci
107db96d56Sopenharmony_ci   You should have received a copy of the CC0 Public Domain Dedication along with
117db96d56Sopenharmony_ci   this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
127db96d56Sopenharmony_ci*/
137db96d56Sopenharmony_ci
147db96d56Sopenharmony_ci#include <stdint.h>
157db96d56Sopenharmony_ci#include <string.h>
167db96d56Sopenharmony_ci#include <stdio.h>
177db96d56Sopenharmony_ci
187db96d56Sopenharmony_ci#include "blake2.h"
197db96d56Sopenharmony_ci#include "blake2-impl.h"
207db96d56Sopenharmony_ci
217db96d56Sopenharmony_ci#include "blake2-config.h"
227db96d56Sopenharmony_ci
237db96d56Sopenharmony_ci#if defined(_MSC_VER)
247db96d56Sopenharmony_ci#include <intrin.h>
257db96d56Sopenharmony_ci#endif
267db96d56Sopenharmony_ci
277db96d56Sopenharmony_ci#if defined(HAVE_SSE2)
287db96d56Sopenharmony_ci#include <emmintrin.h>
297db96d56Sopenharmony_ci// MSVC only defines  _mm_set_epi64x for x86_64...
307db96d56Sopenharmony_ci#if defined(_MSC_VER) && !defined(_M_X64)
317db96d56Sopenharmony_cistatic inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 )
327db96d56Sopenharmony_ci{
337db96d56Sopenharmony_ci  return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 );
347db96d56Sopenharmony_ci}
357db96d56Sopenharmony_ci#endif
367db96d56Sopenharmony_ci#endif
377db96d56Sopenharmony_ci
387db96d56Sopenharmony_ci#if defined(HAVE_SSSE3)
397db96d56Sopenharmony_ci#include <tmmintrin.h>
407db96d56Sopenharmony_ci#endif
417db96d56Sopenharmony_ci#if defined(HAVE_SSE4_1)
427db96d56Sopenharmony_ci#include <smmintrin.h>
437db96d56Sopenharmony_ci#endif
447db96d56Sopenharmony_ci#if defined(HAVE_AVX)
457db96d56Sopenharmony_ci#include <immintrin.h>
467db96d56Sopenharmony_ci#endif
477db96d56Sopenharmony_ci#if defined(HAVE_XOP) && !defined(_MSC_VER)
487db96d56Sopenharmony_ci#include <x86intrin.h>
497db96d56Sopenharmony_ci#endif
507db96d56Sopenharmony_ci
517db96d56Sopenharmony_ci
527db96d56Sopenharmony_ci
537db96d56Sopenharmony_ci#include "blake2b-round.h"
547db96d56Sopenharmony_ci
557db96d56Sopenharmony_cistatic const uint64_t blake2b_IV[8] =
567db96d56Sopenharmony_ci{
577db96d56Sopenharmony_ci  0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
587db96d56Sopenharmony_ci  0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
597db96d56Sopenharmony_ci  0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
607db96d56Sopenharmony_ci  0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
617db96d56Sopenharmony_ci};
627db96d56Sopenharmony_ci
637db96d56Sopenharmony_cistatic const uint8_t blake2b_sigma[12][16] =
647db96d56Sopenharmony_ci{
657db96d56Sopenharmony_ci  {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
667db96d56Sopenharmony_ci  { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 } ,
677db96d56Sopenharmony_ci  { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 } ,
687db96d56Sopenharmony_ci  {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 } ,
697db96d56Sopenharmony_ci  {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 } ,
707db96d56Sopenharmony_ci  {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 } ,
717db96d56Sopenharmony_ci  { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 } ,
727db96d56Sopenharmony_ci  { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 } ,
737db96d56Sopenharmony_ci  {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 } ,
747db96d56Sopenharmony_ci  { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0 } ,
757db96d56Sopenharmony_ci  {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
767db96d56Sopenharmony_ci  { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 }
777db96d56Sopenharmony_ci};
787db96d56Sopenharmony_ci
797db96d56Sopenharmony_ci
807db96d56Sopenharmony_ci/* Some helper functions, not necessarily useful */
817db96d56Sopenharmony_cistatic inline int blake2b_set_lastnode( blake2b_state *S )
827db96d56Sopenharmony_ci{
837db96d56Sopenharmony_ci  S->f[1] = ~0ULL;
847db96d56Sopenharmony_ci  return 0;
857db96d56Sopenharmony_ci}
867db96d56Sopenharmony_ci
877db96d56Sopenharmony_cistatic inline int blake2b_clear_lastnode( blake2b_state *S )
887db96d56Sopenharmony_ci{
897db96d56Sopenharmony_ci  S->f[1] = 0ULL;
907db96d56Sopenharmony_ci  return 0;
917db96d56Sopenharmony_ci}
927db96d56Sopenharmony_ci
937db96d56Sopenharmony_cistatic inline int blake2b_set_lastblock( blake2b_state *S )
947db96d56Sopenharmony_ci{
957db96d56Sopenharmony_ci  if( S->last_node ) blake2b_set_lastnode( S );
967db96d56Sopenharmony_ci
977db96d56Sopenharmony_ci  S->f[0] = ~0ULL;
987db96d56Sopenharmony_ci  return 0;
997db96d56Sopenharmony_ci}
1007db96d56Sopenharmony_ci
1017db96d56Sopenharmony_cistatic inline int blake2b_clear_lastblock( blake2b_state *S )
1027db96d56Sopenharmony_ci{
1037db96d56Sopenharmony_ci  if( S->last_node ) blake2b_clear_lastnode( S );
1047db96d56Sopenharmony_ci
1057db96d56Sopenharmony_ci  S->f[0] = 0ULL;
1067db96d56Sopenharmony_ci  return 0;
1077db96d56Sopenharmony_ci}
1087db96d56Sopenharmony_ci
1097db96d56Sopenharmony_ci
1107db96d56Sopenharmony_cistatic inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
1117db96d56Sopenharmony_ci{
1127db96d56Sopenharmony_ci#if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__))
1137db96d56Sopenharmony_ci  // ADD/ADC chain
1147db96d56Sopenharmony_ci  __uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0];
1157db96d56Sopenharmony_ci  t += inc;
1167db96d56Sopenharmony_ci  S->t[0] = ( uint64_t )( t >>  0 );
1177db96d56Sopenharmony_ci  S->t[1] = ( uint64_t )( t >> 64 );
1187db96d56Sopenharmony_ci#else
1197db96d56Sopenharmony_ci  S->t[0] += inc;
1207db96d56Sopenharmony_ci  S->t[1] += ( S->t[0] < inc );
1217db96d56Sopenharmony_ci#endif
1227db96d56Sopenharmony_ci  return 0;
1237db96d56Sopenharmony_ci}
1247db96d56Sopenharmony_ci
1257db96d56Sopenharmony_ci
1267db96d56Sopenharmony_ci// Parameter-related functions
1277db96d56Sopenharmony_cistatic inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length )
1287db96d56Sopenharmony_ci{
1297db96d56Sopenharmony_ci  P->digest_length = digest_length;
1307db96d56Sopenharmony_ci  return 0;
1317db96d56Sopenharmony_ci}
1327db96d56Sopenharmony_ci
1337db96d56Sopenharmony_cistatic inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout )
1347db96d56Sopenharmony_ci{
1357db96d56Sopenharmony_ci  P->fanout = fanout;
1367db96d56Sopenharmony_ci  return 0;
1377db96d56Sopenharmony_ci}
1387db96d56Sopenharmony_ci
1397db96d56Sopenharmony_cistatic inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth )
1407db96d56Sopenharmony_ci{
1417db96d56Sopenharmony_ci  P->depth = depth;
1427db96d56Sopenharmony_ci  return 0;
1437db96d56Sopenharmony_ci}
1447db96d56Sopenharmony_ci
1457db96d56Sopenharmony_cistatic inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length )
1467db96d56Sopenharmony_ci{
1477db96d56Sopenharmony_ci  P->leaf_length = leaf_length;
1487db96d56Sopenharmony_ci  return 0;
1497db96d56Sopenharmony_ci}
1507db96d56Sopenharmony_ci
1517db96d56Sopenharmony_cistatic inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset )
1527db96d56Sopenharmony_ci{
1537db96d56Sopenharmony_ci  P->node_offset = node_offset;
1547db96d56Sopenharmony_ci  return 0;
1557db96d56Sopenharmony_ci}
1567db96d56Sopenharmony_ci
1577db96d56Sopenharmony_cistatic inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth )
1587db96d56Sopenharmony_ci{
1597db96d56Sopenharmony_ci  P->node_depth = node_depth;
1607db96d56Sopenharmony_ci  return 0;
1617db96d56Sopenharmony_ci}
1627db96d56Sopenharmony_ci
1637db96d56Sopenharmony_cistatic inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length )
1647db96d56Sopenharmony_ci{
1657db96d56Sopenharmony_ci  P->inner_length = inner_length;
1667db96d56Sopenharmony_ci  return 0;
1677db96d56Sopenharmony_ci}
1687db96d56Sopenharmony_ci
1697db96d56Sopenharmony_cistatic inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] )
1707db96d56Sopenharmony_ci{
1717db96d56Sopenharmony_ci  memcpy( P->salt, salt, BLAKE2B_SALTBYTES );
1727db96d56Sopenharmony_ci  return 0;
1737db96d56Sopenharmony_ci}
1747db96d56Sopenharmony_ci
1757db96d56Sopenharmony_cistatic inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] )
1767db96d56Sopenharmony_ci{
1777db96d56Sopenharmony_ci  memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES );
1787db96d56Sopenharmony_ci  return 0;
1797db96d56Sopenharmony_ci}
1807db96d56Sopenharmony_ci
1817db96d56Sopenharmony_cistatic inline int blake2b_init0( blake2b_state *S )
1827db96d56Sopenharmony_ci{
1837db96d56Sopenharmony_ci  memset( S, 0, sizeof( blake2b_state ) );
1847db96d56Sopenharmony_ci
1857db96d56Sopenharmony_ci  for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
1867db96d56Sopenharmony_ci
1877db96d56Sopenharmony_ci  return 0;
1887db96d56Sopenharmony_ci}
1897db96d56Sopenharmony_ci
1907db96d56Sopenharmony_ci
1917db96d56Sopenharmony_ci
1927db96d56Sopenharmony_ci#if defined(__cplusplus)
1937db96d56Sopenharmony_ciextern "C" {
1947db96d56Sopenharmony_ci#endif
1957db96d56Sopenharmony_ci  int blake2b_init( blake2b_state *S, size_t outlen );
1967db96d56Sopenharmony_ci  int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
1977db96d56Sopenharmony_ci  int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
1987db96d56Sopenharmony_ci  int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen );
1997db96d56Sopenharmony_ci  int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen );
2007db96d56Sopenharmony_ci  int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
2017db96d56Sopenharmony_ci#if defined(__cplusplus)
2027db96d56Sopenharmony_ci}
2037db96d56Sopenharmony_ci#endif
2047db96d56Sopenharmony_ci
2057db96d56Sopenharmony_ci/* init xors IV with input parameter block */
2067db96d56Sopenharmony_ciint blake2b_init_param( blake2b_state *S, const blake2b_param *P )
2077db96d56Sopenharmony_ci{
2087db96d56Sopenharmony_ci  uint8_t *p, *h, *v;
2097db96d56Sopenharmony_ci  //blake2b_init0( S );
2107db96d56Sopenharmony_ci  v = ( uint8_t * )( blake2b_IV );
2117db96d56Sopenharmony_ci  h = ( uint8_t * )( S->h );
2127db96d56Sopenharmony_ci  p = ( uint8_t * )( P );
2137db96d56Sopenharmony_ci  /* IV XOR ParamBlock */
2147db96d56Sopenharmony_ci  memset( S, 0, sizeof( blake2b_state ) );
2157db96d56Sopenharmony_ci
2167db96d56Sopenharmony_ci  for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
2177db96d56Sopenharmony_ci
2187db96d56Sopenharmony_ci  S->outlen = P->digest_length;
2197db96d56Sopenharmony_ci  return 0;
2207db96d56Sopenharmony_ci}
2217db96d56Sopenharmony_ci
2227db96d56Sopenharmony_ci
2237db96d56Sopenharmony_ci/* Some sort of default parameter block initialization, for sequential blake2b */
2247db96d56Sopenharmony_ci
2257db96d56Sopenharmony_ciint blake2b_init( blake2b_state *S, size_t outlen )
2267db96d56Sopenharmony_ci{
2277db96d56Sopenharmony_ci  if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
2287db96d56Sopenharmony_ci
2297db96d56Sopenharmony_ci  const blake2b_param P =
2307db96d56Sopenharmony_ci  {
2317db96d56Sopenharmony_ci    ( uint8_t ) outlen,
2327db96d56Sopenharmony_ci    0,
2337db96d56Sopenharmony_ci    1,
2347db96d56Sopenharmony_ci    1,
2357db96d56Sopenharmony_ci    0,
2367db96d56Sopenharmony_ci    0,
2377db96d56Sopenharmony_ci    0,
2387db96d56Sopenharmony_ci    0,
2397db96d56Sopenharmony_ci    {0},
2407db96d56Sopenharmony_ci    {0},
2417db96d56Sopenharmony_ci    {0}
2427db96d56Sopenharmony_ci  };
2437db96d56Sopenharmony_ci  return blake2b_init_param( S, &P );
2447db96d56Sopenharmony_ci}
2457db96d56Sopenharmony_ci
2467db96d56Sopenharmony_ciint blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
2477db96d56Sopenharmony_ci{
2487db96d56Sopenharmony_ci  if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
2497db96d56Sopenharmony_ci
2507db96d56Sopenharmony_ci  if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
2517db96d56Sopenharmony_ci
2527db96d56Sopenharmony_ci  const blake2b_param P =
2537db96d56Sopenharmony_ci  {
2547db96d56Sopenharmony_ci    ( uint8_t ) outlen,
2557db96d56Sopenharmony_ci    ( uint8_t ) keylen,
2567db96d56Sopenharmony_ci    1,
2577db96d56Sopenharmony_ci    1,
2587db96d56Sopenharmony_ci    0,
2597db96d56Sopenharmony_ci    0,
2607db96d56Sopenharmony_ci    0,
2617db96d56Sopenharmony_ci    0,
2627db96d56Sopenharmony_ci    {0},
2637db96d56Sopenharmony_ci    {0},
2647db96d56Sopenharmony_ci    {0}
2657db96d56Sopenharmony_ci  };
2667db96d56Sopenharmony_ci
2677db96d56Sopenharmony_ci  if( blake2b_init_param( S, &P ) < 0 )
2687db96d56Sopenharmony_ci    return 0;
2697db96d56Sopenharmony_ci
2707db96d56Sopenharmony_ci  {
2717db96d56Sopenharmony_ci    uint8_t block[BLAKE2B_BLOCKBYTES];
2727db96d56Sopenharmony_ci    memset( block, 0, BLAKE2B_BLOCKBYTES );
2737db96d56Sopenharmony_ci    memcpy( block, key, keylen );
2747db96d56Sopenharmony_ci    blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
2757db96d56Sopenharmony_ci    secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
2767db96d56Sopenharmony_ci  }
2777db96d56Sopenharmony_ci  return 0;
2787db96d56Sopenharmony_ci}
2797db96d56Sopenharmony_ci
2807db96d56Sopenharmony_cistatic inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
2817db96d56Sopenharmony_ci{
2827db96d56Sopenharmony_ci  __m128i row1l, row1h;
2837db96d56Sopenharmony_ci  __m128i row2l, row2h;
2847db96d56Sopenharmony_ci  __m128i row3l, row3h;
2857db96d56Sopenharmony_ci  __m128i row4l, row4h;
2867db96d56Sopenharmony_ci  __m128i b0, b1;
2877db96d56Sopenharmony_ci  __m128i t0, t1;
2887db96d56Sopenharmony_ci#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
2897db96d56Sopenharmony_ci  const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
2907db96d56Sopenharmony_ci  const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
2917db96d56Sopenharmony_ci#endif
2927db96d56Sopenharmony_ci#if defined(HAVE_SSE4_1)
2937db96d56Sopenharmony_ci  const __m128i m0 = LOADU( block + 00 );
2947db96d56Sopenharmony_ci  const __m128i m1 = LOADU( block + 16 );
2957db96d56Sopenharmony_ci  const __m128i m2 = LOADU( block + 32 );
2967db96d56Sopenharmony_ci  const __m128i m3 = LOADU( block + 48 );
2977db96d56Sopenharmony_ci  const __m128i m4 = LOADU( block + 64 );
2987db96d56Sopenharmony_ci  const __m128i m5 = LOADU( block + 80 );
2997db96d56Sopenharmony_ci  const __m128i m6 = LOADU( block + 96 );
3007db96d56Sopenharmony_ci  const __m128i m7 = LOADU( block + 112 );
3017db96d56Sopenharmony_ci#else
3027db96d56Sopenharmony_ci  const uint64_t  m0 = ( ( uint64_t * )block )[ 0];
3037db96d56Sopenharmony_ci  const uint64_t  m1 = ( ( uint64_t * )block )[ 1];
3047db96d56Sopenharmony_ci  const uint64_t  m2 = ( ( uint64_t * )block )[ 2];
3057db96d56Sopenharmony_ci  const uint64_t  m3 = ( ( uint64_t * )block )[ 3];
3067db96d56Sopenharmony_ci  const uint64_t  m4 = ( ( uint64_t * )block )[ 4];
3077db96d56Sopenharmony_ci  const uint64_t  m5 = ( ( uint64_t * )block )[ 5];
3087db96d56Sopenharmony_ci  const uint64_t  m6 = ( ( uint64_t * )block )[ 6];
3097db96d56Sopenharmony_ci  const uint64_t  m7 = ( ( uint64_t * )block )[ 7];
3107db96d56Sopenharmony_ci  const uint64_t  m8 = ( ( uint64_t * )block )[ 8];
3117db96d56Sopenharmony_ci  const uint64_t  m9 = ( ( uint64_t * )block )[ 9];
3127db96d56Sopenharmony_ci  const uint64_t m10 = ( ( uint64_t * )block )[10];
3137db96d56Sopenharmony_ci  const uint64_t m11 = ( ( uint64_t * )block )[11];
3147db96d56Sopenharmony_ci  const uint64_t m12 = ( ( uint64_t * )block )[12];
3157db96d56Sopenharmony_ci  const uint64_t m13 = ( ( uint64_t * )block )[13];
3167db96d56Sopenharmony_ci  const uint64_t m14 = ( ( uint64_t * )block )[14];
3177db96d56Sopenharmony_ci  const uint64_t m15 = ( ( uint64_t * )block )[15];
3187db96d56Sopenharmony_ci#endif
3197db96d56Sopenharmony_ci  row1l = LOADU( &S->h[0] );
3207db96d56Sopenharmony_ci  row1h = LOADU( &S->h[2] );
3217db96d56Sopenharmony_ci  row2l = LOADU( &S->h[4] );
3227db96d56Sopenharmony_ci  row2h = LOADU( &S->h[6] );
3237db96d56Sopenharmony_ci  row3l = LOADU( &blake2b_IV[0] );
3247db96d56Sopenharmony_ci  row3h = LOADU( &blake2b_IV[2] );
3257db96d56Sopenharmony_ci  row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
3267db96d56Sopenharmony_ci  row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
3277db96d56Sopenharmony_ci  ROUND( 0 );
3287db96d56Sopenharmony_ci  ROUND( 1 );
3297db96d56Sopenharmony_ci  ROUND( 2 );
3307db96d56Sopenharmony_ci  ROUND( 3 );
3317db96d56Sopenharmony_ci  ROUND( 4 );
3327db96d56Sopenharmony_ci  ROUND( 5 );
3337db96d56Sopenharmony_ci  ROUND( 6 );
3347db96d56Sopenharmony_ci  ROUND( 7 );
3357db96d56Sopenharmony_ci  ROUND( 8 );
3367db96d56Sopenharmony_ci  ROUND( 9 );
3377db96d56Sopenharmony_ci  ROUND( 10 );
3387db96d56Sopenharmony_ci  ROUND( 11 );
3397db96d56Sopenharmony_ci  row1l = _mm_xor_si128( row3l, row1l );
3407db96d56Sopenharmony_ci  row1h = _mm_xor_si128( row3h, row1h );
3417db96d56Sopenharmony_ci  STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
3427db96d56Sopenharmony_ci  STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
3437db96d56Sopenharmony_ci  row2l = _mm_xor_si128( row4l, row2l );
3447db96d56Sopenharmony_ci  row2h = _mm_xor_si128( row4h, row2h );
3457db96d56Sopenharmony_ci  STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
3467db96d56Sopenharmony_ci  STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
3477db96d56Sopenharmony_ci  return 0;
3487db96d56Sopenharmony_ci}
3497db96d56Sopenharmony_ci
3507db96d56Sopenharmony_ci
3517db96d56Sopenharmony_ciint blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen )
3527db96d56Sopenharmony_ci{
3537db96d56Sopenharmony_ci  while( inlen > 0 )
3547db96d56Sopenharmony_ci  {
3557db96d56Sopenharmony_ci    uint32_t left = S->buflen;
3567db96d56Sopenharmony_ci    uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
3577db96d56Sopenharmony_ci
3587db96d56Sopenharmony_ci    if( inlen > fill )
3597db96d56Sopenharmony_ci    {
3607db96d56Sopenharmony_ci      memcpy( S->buf + left, in, fill ); // Fill buffer
3617db96d56Sopenharmony_ci      S->buflen += fill;
3627db96d56Sopenharmony_ci      blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
3637db96d56Sopenharmony_ci      blake2b_compress( S, S->buf ); // Compress
3647db96d56Sopenharmony_ci      memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left
3657db96d56Sopenharmony_ci      S->buflen -= BLAKE2B_BLOCKBYTES;
3667db96d56Sopenharmony_ci      in += fill;
3677db96d56Sopenharmony_ci      inlen -= fill;
3687db96d56Sopenharmony_ci    }
3697db96d56Sopenharmony_ci    else // inlen <= fill
3707db96d56Sopenharmony_ci    {
3717db96d56Sopenharmony_ci      memcpy( S->buf + left, in, inlen );
3727db96d56Sopenharmony_ci      S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress
3737db96d56Sopenharmony_ci      in += inlen;
3747db96d56Sopenharmony_ci      inlen -= inlen;
3757db96d56Sopenharmony_ci    }
3767db96d56Sopenharmony_ci  }
3777db96d56Sopenharmony_ci
3787db96d56Sopenharmony_ci  return 0;
3797db96d56Sopenharmony_ci}
3807db96d56Sopenharmony_ci
3817db96d56Sopenharmony_ci
3827db96d56Sopenharmony_ciint blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen )
3837db96d56Sopenharmony_ci{
3847db96d56Sopenharmony_ci  if(S->outlen != outlen) return -1;
3857db96d56Sopenharmony_ci
3867db96d56Sopenharmony_ci  if( S->buflen > BLAKE2B_BLOCKBYTES )
3877db96d56Sopenharmony_ci  {
3887db96d56Sopenharmony_ci    blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
3897db96d56Sopenharmony_ci    blake2b_compress( S, S->buf );
3907db96d56Sopenharmony_ci    S->buflen -= BLAKE2B_BLOCKBYTES;
3917db96d56Sopenharmony_ci    memmove( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
3927db96d56Sopenharmony_ci  }
3937db96d56Sopenharmony_ci
3947db96d56Sopenharmony_ci  blake2b_increment_counter( S, S->buflen );
3957db96d56Sopenharmony_ci  blake2b_set_lastblock( S );
3967db96d56Sopenharmony_ci  memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
3977db96d56Sopenharmony_ci  blake2b_compress( S, S->buf );
3987db96d56Sopenharmony_ci  memcpy( out, &S->h[0], outlen );
3997db96d56Sopenharmony_ci  return 0;
4007db96d56Sopenharmony_ci}
4017db96d56Sopenharmony_ci
4027db96d56Sopenharmony_ci
4037db96d56Sopenharmony_ciint blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
4047db96d56Sopenharmony_ci{
4057db96d56Sopenharmony_ci  blake2b_state S[1];
4067db96d56Sopenharmony_ci
4077db96d56Sopenharmony_ci  /* Verify parameters */
4087db96d56Sopenharmony_ci  if ( NULL == in && inlen > 0 ) return -1;
4097db96d56Sopenharmony_ci
4107db96d56Sopenharmony_ci  if ( NULL == out ) return -1;
4117db96d56Sopenharmony_ci
4127db96d56Sopenharmony_ci  if( NULL == key && keylen > 0 ) return -1;
4137db96d56Sopenharmony_ci
4147db96d56Sopenharmony_ci  if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
4157db96d56Sopenharmony_ci
4167db96d56Sopenharmony_ci  if( keylen > BLAKE2B_KEYBYTES ) return -1;
4177db96d56Sopenharmony_ci
4187db96d56Sopenharmony_ci  if( keylen )
4197db96d56Sopenharmony_ci  {
4207db96d56Sopenharmony_ci    if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
4217db96d56Sopenharmony_ci  }
4227db96d56Sopenharmony_ci  else
4237db96d56Sopenharmony_ci  {
4247db96d56Sopenharmony_ci    if( blake2b_init( S, outlen ) < 0 ) return -1;
4257db96d56Sopenharmony_ci  }
4267db96d56Sopenharmony_ci
4277db96d56Sopenharmony_ci  if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0) return -1;
4287db96d56Sopenharmony_ci  return blake2b_final( S, out, outlen );
4297db96d56Sopenharmony_ci}
4307db96d56Sopenharmony_ci
4317db96d56Sopenharmony_ci#if defined(SUPERCOP)
4327db96d56Sopenharmony_ciint crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
4337db96d56Sopenharmony_ci{
4347db96d56Sopenharmony_ci  return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 );
4357db96d56Sopenharmony_ci}
4367db96d56Sopenharmony_ci#endif
437