1/* 2 BLAKE2 reference source code package - optimized C implementations 3 4 Written in 2012 by Samuel Neves <sneves@dei.uc.pt> 5 6 To the extent possible under law, the author(s) have dedicated all copyright 7 and related and neighboring rights to this software to the public domain 8 worldwide. This software is distributed without any warranty. 9 10 You should have received a copy of the CC0 Public Domain Dedication along with 11 this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>. 12*/ 13 14#include <stdint.h> 15#include <string.h> 16#include <stdio.h> 17 18#include "blake2.h" 19#include "blake2-impl.h" 20 21#include "blake2-config.h" 22 23#if defined(_MSC_VER) 24#include <intrin.h> 25#endif 26 27#if defined(HAVE_SSE2) 28#include <emmintrin.h> 29// MSVC only defines _mm_set_epi64x for x86_64... 30#if defined(_MSC_VER) && !defined(_M_X64) 31static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 ) 32{ 33 return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 ); 34} 35#endif 36#endif 37 38#if defined(HAVE_SSSE3) 39#include <tmmintrin.h> 40#endif 41#if defined(HAVE_SSE4_1) 42#include <smmintrin.h> 43#endif 44#if defined(HAVE_AVX) 45#include <immintrin.h> 46#endif 47#if defined(HAVE_XOP) && !defined(_MSC_VER) 48#include <x86intrin.h> 49#endif 50 51 52 53#include "blake2b-round.h" 54 55static const uint64_t blake2b_IV[8] = 56{ 57 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 58 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, 59 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, 60 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL 61}; 62 63static const uint8_t blake2b_sigma[12][16] = 64{ 65 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , 66 { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , 67 { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , 68 { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , 69 { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , 70 { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , 71 { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , 72 { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , 73 { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , 74 { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , 75 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , 76 { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } 77}; 78 79 80/* Some helper functions, not necessarily useful */ 81static inline int blake2b_set_lastnode( blake2b_state *S ) 82{ 83 S->f[1] = ~0ULL; 84 return 0; 85} 86 87static inline int blake2b_clear_lastnode( blake2b_state *S ) 88{ 89 S->f[1] = 0ULL; 90 return 0; 91} 92 93static inline int blake2b_set_lastblock( blake2b_state *S ) 94{ 95 if( S->last_node ) blake2b_set_lastnode( S ); 96 97 S->f[0] = ~0ULL; 98 return 0; 99} 100 101static inline int blake2b_clear_lastblock( blake2b_state *S ) 102{ 103 if( S->last_node ) blake2b_clear_lastnode( S ); 104 105 S->f[0] = 0ULL; 106 return 0; 107} 108 109 110static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) 111{ 112#if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__)) 113 // ADD/ADC chain 114 __uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0]; 115 t += inc; 116 S->t[0] = ( uint64_t )( t >> 0 ); 117 S->t[1] = ( uint64_t )( t >> 64 ); 118#else 119 S->t[0] += inc; 120 S->t[1] += ( S->t[0] < inc ); 121#endif 122 return 0; 123} 124 125 126// Parameter-related functions 127static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length ) 128{ 129 P->digest_length = digest_length; 130 return 0; 131} 132 133static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout ) 134{ 135 P->fanout = fanout; 136 return 0; 137} 138 139static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth ) 140{ 141 P->depth = depth; 142 return 0; 143} 144 145static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length ) 146{ 147 P->leaf_length = leaf_length; 148 return 0; 149} 150 151static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset ) 152{ 153 P->node_offset = node_offset; 154 return 0; 155} 156 157static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth ) 158{ 159 P->node_depth = node_depth; 160 return 0; 161} 162 163static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length ) 164{ 165 P->inner_length = inner_length; 166 return 0; 167} 168 169static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] ) 170{ 171 memcpy( P->salt, salt, BLAKE2B_SALTBYTES ); 172 return 0; 173} 174 175static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] ) 176{ 177 memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES ); 178 return 0; 179} 180 181static inline int blake2b_init0( blake2b_state *S ) 182{ 183 memset( S, 0, sizeof( blake2b_state ) ); 184 185 for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; 186 187 return 0; 188} 189 190 191 192#if defined(__cplusplus) 193extern "C" { 194#endif 195 int blake2b_init( blake2b_state *S, size_t outlen ); 196 int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); 197 int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); 198 int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ); 199 int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ); 200 int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); 201#if defined(__cplusplus) 202} 203#endif 204 205/* init xors IV with input parameter block */ 206int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) 207{ 208 uint8_t *p, *h, *v; 209 //blake2b_init0( S ); 210 v = ( uint8_t * )( blake2b_IV ); 211 h = ( uint8_t * )( S->h ); 212 p = ( uint8_t * )( P ); 213 /* IV XOR ParamBlock */ 214 memset( S, 0, sizeof( blake2b_state ) ); 215 216 for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; 217 218 S->outlen = P->digest_length; 219 return 0; 220} 221 222 223/* Some sort of default parameter block initialization, for sequential blake2b */ 224 225int blake2b_init( blake2b_state *S, size_t outlen ) 226{ 227 if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; 228 229 const blake2b_param P = 230 { 231 ( uint8_t ) outlen, 232 0, 233 1, 234 1, 235 0, 236 0, 237 0, 238 0, 239 {0}, 240 {0}, 241 {0} 242 }; 243 return blake2b_init_param( S, &P ); 244} 245 246int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ) 247{ 248 if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; 249 250 if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1; 251 252 const blake2b_param P = 253 { 254 ( uint8_t ) outlen, 255 ( uint8_t ) keylen, 256 1, 257 1, 258 0, 259 0, 260 0, 261 0, 262 {0}, 263 {0}, 264 {0} 265 }; 266 267 if( blake2b_init_param( S, &P ) < 0 ) 268 return 0; 269 270 { 271 uint8_t block[BLAKE2B_BLOCKBYTES]; 272 memset( block, 0, BLAKE2B_BLOCKBYTES ); 273 memcpy( block, key, keylen ); 274 blake2b_update( S, block, BLAKE2B_BLOCKBYTES ); 275 secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ 276 } 277 return 0; 278} 279 280static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) 281{ 282 __m128i row1l, row1h; 283 __m128i row2l, row2h; 284 __m128i row3l, row3h; 285 __m128i row4l, row4h; 286 __m128i b0, b1; 287 __m128i t0, t1; 288#if defined(HAVE_SSSE3) && !defined(HAVE_XOP) 289 const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 ); 290 const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 ); 291#endif 292#if defined(HAVE_SSE4_1) 293 const __m128i m0 = LOADU( block + 00 ); 294 const __m128i m1 = LOADU( block + 16 ); 295 const __m128i m2 = LOADU( block + 32 ); 296 const __m128i m3 = LOADU( block + 48 ); 297 const __m128i m4 = LOADU( block + 64 ); 298 const __m128i m5 = LOADU( block + 80 ); 299 const __m128i m6 = LOADU( block + 96 ); 300 const __m128i m7 = LOADU( block + 112 ); 301#else 302 const uint64_t m0 = ( ( uint64_t * )block )[ 0]; 303 const uint64_t m1 = ( ( uint64_t * )block )[ 1]; 304 const uint64_t m2 = ( ( uint64_t * )block )[ 2]; 305 const uint64_t m3 = ( ( uint64_t * )block )[ 3]; 306 const uint64_t m4 = ( ( uint64_t * )block )[ 4]; 307 const uint64_t m5 = ( ( uint64_t * )block )[ 5]; 308 const uint64_t m6 = ( ( uint64_t * )block )[ 6]; 309 const uint64_t m7 = ( ( uint64_t * )block )[ 7]; 310 const uint64_t m8 = ( ( uint64_t * )block )[ 8]; 311 const uint64_t m9 = ( ( uint64_t * )block )[ 9]; 312 const uint64_t m10 = ( ( uint64_t * )block )[10]; 313 const uint64_t m11 = ( ( uint64_t * )block )[11]; 314 const uint64_t m12 = ( ( uint64_t * )block )[12]; 315 const uint64_t m13 = ( ( uint64_t * )block )[13]; 316 const uint64_t m14 = ( ( uint64_t * )block )[14]; 317 const uint64_t m15 = ( ( uint64_t * )block )[15]; 318#endif 319 row1l = LOADU( &S->h[0] ); 320 row1h = LOADU( &S->h[2] ); 321 row2l = LOADU( &S->h[4] ); 322 row2h = LOADU( &S->h[6] ); 323 row3l = LOADU( &blake2b_IV[0] ); 324 row3h = LOADU( &blake2b_IV[2] ); 325 row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) ); 326 row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) ); 327 ROUND( 0 ); 328 ROUND( 1 ); 329 ROUND( 2 ); 330 ROUND( 3 ); 331 ROUND( 4 ); 332 ROUND( 5 ); 333 ROUND( 6 ); 334 ROUND( 7 ); 335 ROUND( 8 ); 336 ROUND( 9 ); 337 ROUND( 10 ); 338 ROUND( 11 ); 339 row1l = _mm_xor_si128( row3l, row1l ); 340 row1h = _mm_xor_si128( row3h, row1h ); 341 STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) ); 342 STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) ); 343 row2l = _mm_xor_si128( row4l, row2l ); 344 row2h = _mm_xor_si128( row4h, row2h ); 345 STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) ); 346 STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) ); 347 return 0; 348} 349 350 351int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ) 352{ 353 while( inlen > 0 ) 354 { 355 uint32_t left = S->buflen; 356 uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left; 357 358 if( inlen > fill ) 359 { 360 memcpy( S->buf + left, in, fill ); // Fill buffer 361 S->buflen += fill; 362 blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); 363 blake2b_compress( S, S->buf ); // Compress 364 memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left 365 S->buflen -= BLAKE2B_BLOCKBYTES; 366 in += fill; 367 inlen -= fill; 368 } 369 else // inlen <= fill 370 { 371 memcpy( S->buf + left, in, inlen ); 372 S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress 373 in += inlen; 374 inlen -= inlen; 375 } 376 } 377 378 return 0; 379} 380 381 382int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ) 383{ 384 if(S->outlen != outlen) return -1; 385 386 if( S->buflen > BLAKE2B_BLOCKBYTES ) 387 { 388 blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); 389 blake2b_compress( S, S->buf ); 390 S->buflen -= BLAKE2B_BLOCKBYTES; 391 memmove( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); 392 } 393 394 blake2b_increment_counter( S, S->buflen ); 395 blake2b_set_lastblock( S ); 396 memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ 397 blake2b_compress( S, S->buf ); 398 memcpy( out, &S->h[0], outlen ); 399 return 0; 400} 401 402 403int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) 404{ 405 blake2b_state S[1]; 406 407 /* Verify parameters */ 408 if ( NULL == in && inlen > 0 ) return -1; 409 410 if ( NULL == out ) return -1; 411 412 if( NULL == key && keylen > 0 ) return -1; 413 414 if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; 415 416 if( keylen > BLAKE2B_KEYBYTES ) return -1; 417 418 if( keylen ) 419 { 420 if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1; 421 } 422 else 423 { 424 if( blake2b_init( S, outlen ) < 0 ) return -1; 425 } 426 427 if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0) return -1; 428 return blake2b_final( S, out, outlen ); 429} 430 431#if defined(SUPERCOP) 432int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) 433{ 434 return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 ); 435} 436#endif 437