xref: /third_party/mesa3d/src/util/xxhash.h (revision bf215546)
1/*
2   xxHash - Extremely Fast Hash algorithm
3   Header File
4   Copyright (C) 2012-2016, Yann Collet.
5
6   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8   Redistribution and use in source and binary forms, with or without
9   modification, are permitted provided that the following conditions are
10   met:
11
12       * Redistributions of source code must retain the above copyright
13   notice, this list of conditions and the following disclaimer.
14       * Redistributions in binary form must reproduce the above
15   copyright notice, this list of conditions and the following disclaimer
16   in the documentation and/or other materials provided with the
17   distribution.
18
19   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31   You can contact the author at :
32   - xxHash source repository : https://github.com/Cyan4973/xxHash
33*/
34
35/* Notice extracted from xxHash homepage :
36
37xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
38It also successfully passes all tests from the SMHasher suite.
39
40Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
41
42Name            Speed       Q.Score   Author
43xxHash          5.4 GB/s     10
44CrapWow         3.2 GB/s      2       Andrew
45MumurHash 3a    2.7 GB/s     10       Austin Appleby
46SpookyHash      2.0 GB/s     10       Bob Jenkins
47SBox            1.4 GB/s      9       Bret Mulvey
48Lookup3         1.2 GB/s      9       Bob Jenkins
49SuperFastHash   1.2 GB/s      1       Paul Hsieh
50CityHash64      1.05 GB/s    10       Pike & Alakuijala
51FNV             0.55 GB/s     5       Fowler, Noll, Vo
52CRC32           0.43 GB/s     9
53MD5-32          0.33 GB/s    10       Ronald L. Rivest
54SHA1-32         0.28 GB/s    10
55
56Q.Score is a measure of quality of the hash function.
57It depends on successfully passing SMHasher test set.
5810 is a perfect score.
59
60Note : SMHasher's CRC32 implementation is not the fastest one.
61Other speed-oriented implementations can be faster,
62especially in combination with PCLMUL instruction :
63http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
64
65A 64-bit version, named XXH64, is available since r35.
66It offers much better speed, but for 64-bit applications only.
67Name     Speed on 64 bits    Speed on 32 bits
68XXH64       13.8 GB/s            1.9 GB/s
69XXH32        6.8 GB/s            6.0 GB/s
70*/
71
72/* Mesa leaves strict aliasing on in the compiler, and this code likes to
73 * dereference the passed in data as u32*, which means that the compiler is
74 * free to move the u32 read before the write of the struct members being
75 * hashed, and in practice it did in freedreno.  Forcing these two things
76 * prevents it.
77 */
78#define XXH_FORCE_ALIGN_CHECK 0
79#define XXH_FORCE_MEMORY_ACCESS 0
80
81#include "util/compiler.h" /* for FALLTHROUGH */
82
83#if defined (__cplusplus)
84extern "C" {
85#endif
86
87
88#ifndef XXHASH_H_5627135585666179
89#define XXHASH_H_5627135585666179 1
90
91/* ****************************
92 *  API modifier
93 ******************************/
94/** XXH_INLINE_ALL (and XXH_PRIVATE_API)
95 *  This build macro includes xxhash functions in `static` mode
96 *  in order to inline them, and remove their symbol from the public list.
97 *  Inlining offers great performance improvement on small keys,
98 *  and dramatic ones when length is expressed as a compile-time constant.
99 *  See https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html .
100 *  Methodology :
101 *     #define XXH_INLINE_ALL
102 *     #include "xxhash.h"
103 * `xxhash.c` is automatically included.
104 *  It's not useful to compile and link it as a separate object.
105 */
106#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
107#  ifndef XXH_STATIC_LINKING_ONLY
108#    define XXH_STATIC_LINKING_ONLY
109#  endif
110#  if defined(__GNUC__)
111#    define XXH_PUBLIC_API static __inline __attribute__((unused))
112#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
113#    define XXH_PUBLIC_API static inline
114#  elif defined(_MSC_VER)
115#    define XXH_PUBLIC_API static __inline
116#  else
117     /* this version may generate warnings for unused static functions */
118#    define XXH_PUBLIC_API static
119#  endif
120#else
121#  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
122#    ifdef XXH_EXPORT
123#      define XXH_PUBLIC_API __declspec(dllexport)
124#    elif XXH_IMPORT
125#      define XXH_PUBLIC_API __declspec(dllimport)
126#    endif
127#  else
128#    define XXH_PUBLIC_API   /* do nothing */
129#  endif
130#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
131
132/*! XXH_NAMESPACE, aka Namespace Emulation :
133 *
134 * If you want to include _and expose_ xxHash functions from within your own library,
135 * but also want to avoid symbol collisions with other libraries which may also include xxHash,
136 *
137 * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
138 * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
139 *
140 * Note that no change is required within the calling program as long as it includes `xxhash.h` :
141 * regular symbol name will be automatically translated by this header.
142 */
143#ifdef XXH_NAMESPACE
144#  define XXH_CAT(A,B) A##B
145#  define XXH_NAME2(A,B) XXH_CAT(A,B)
146#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
147#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
148#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
149#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
150#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
151#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
152#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
153#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
154#  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
155#  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
156#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
157#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
158#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
159#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
160#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
161#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
162#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
163#  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
164#  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
165#endif
166
167
168/* *************************************
169*  Version
170***************************************/
171#define XXH_VERSION_MAJOR    0
172#define XXH_VERSION_MINOR    7
173#define XXH_VERSION_RELEASE  2
174#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
175XXH_PUBLIC_API unsigned XXH_versionNumber (void);
176
177
178/* ****************************
179*  Definitions
180******************************/
181#include <stddef.h>   /* size_t */
182typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
183
184
185/*-**********************************************************************
186*  32-bit hash
187************************************************************************/
188#if !defined (__VMS) \
189  && (defined (__cplusplus) \
190  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
191#   include <stdint.h>
192    typedef uint32_t XXH32_hash_t;
193#else
194#   include <limits.h>
195#   if UINT_MAX == 0xFFFFFFFFUL
196      typedef unsigned int XXH32_hash_t;
197#   else
198#     if ULONG_MAX == 0xFFFFFFFFUL
199        typedef unsigned long XXH32_hash_t;
200#     else
201#       error "unsupported platform : need a 32-bit type"
202#     endif
203#   endif
204#endif
205
206/*! XXH32() :
207    Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
208    The memory between input & input+length must be valid (allocated and read-accessible).
209    "seed" can be used to alter the result predictably.
210    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
211XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
212
213/*******   Streaming   *******/
214
215/*
216 * Streaming functions generate the xxHash value from an incrememtal input.
217 * This method is slower than single-call functions, due to state management.
218 * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
219 *
220 * XXH state must first be allocated, using XXH*_createState() .
221 *
222 * Start a new hash by initializing state with a seed, using XXH*_reset().
223 *
224 * Then, feed the hash state by calling XXH*_update() as many times as necessary.
225 * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
226 *
227 * Finally, a hash value can be produced anytime, by using XXH*_digest().
228 * This function returns the nn-bits hash as an int or long long.
229 *
230 * It's still possible to continue inserting input into the hash state after a digest,
231 * and generate some new hash values later on, by invoking again XXH*_digest().
232 *
233 * When done, release the state, using XXH*_freeState().
234 */
235
236typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
237XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
238XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
239XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
240
241XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t seed);
242XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
243XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
244
245/*******   Canonical representation   *******/
246
247/* Default return values from XXH functions are basic unsigned 32 and 64 bits.
248 * This the simplest and fastest format for further post-processing.
249 * However, this leaves open the question of what is the order of bytes,
250 * since little and big endian conventions will write the same number differently.
251 *
252 * The canonical representation settles this issue,
253 * by mandating big-endian convention,
254 * aka, the same convention as human-readable numbers (large digits first).
255 * When writing hash values to storage, sending them over a network, or printing them,
256 * it's highly recommended to use the canonical representation,
257 * to ensure portability across a wider range of systems, present and future.
258 *
259 * The following functions allow transformation of hash values into and from canonical format.
260 */
261
262typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
263XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
264XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
265
266
267#ifndef XXH_NO_LONG_LONG
268/*-**********************************************************************
269*  64-bit hash
270************************************************************************/
271#if !defined (__VMS) \
272  && (defined (__cplusplus) \
273  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
274#   include <stdint.h>
275    typedef uint64_t XXH64_hash_t;
276#else
277    /* the following type must have a width of 64-bit */
278    typedef unsigned long long XXH64_hash_t;
279#endif
280
281/*! XXH64() :
282 *  Returns the 64-bit hash of sequence of length @length stored at memory address @input.
283 *  @seed can be used to alter the result predictably.
284 *  This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
285 */
286XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, XXH64_hash_t seed);
287
288/*******   Streaming   *******/
289typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
290XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
291XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
292XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
293
294XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, XXH64_hash_t seed);
295XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
296XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
297
298/*******   Canonical representation   *******/
299typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
300XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
301XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
302
303
304#endif  /* XXH_NO_LONG_LONG */
305
306#endif /* XXHASH_H_5627135585666179 */
307
308
309
310#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
311#define XXHASH_H_STATIC_13879238742
312/* ************************************************************************************************
313   This section contains declarations which are not guaranteed to remain stable.
314   They may change in future versions, becoming incompatible with a different version of the library.
315   These declarations should only be used with static linking.
316   Never use them in association with dynamic linking !
317*************************************************************************************************** */
318
319/* These definitions are only present to allow
320 * static allocation of XXH state, on stack or in a struct for example.
321 * Never **ever** use members directly. */
322
323struct XXH32_state_s {
324   XXH32_hash_t total_len_32;
325   XXH32_hash_t large_len;
326   XXH32_hash_t v1;
327   XXH32_hash_t v2;
328   XXH32_hash_t v3;
329   XXH32_hash_t v4;
330   XXH32_hash_t mem32[4];
331   XXH32_hash_t memsize;
332   XXH32_hash_t reserved;   /* never read nor write, might be removed in a future version */
333};   /* typedef'd to XXH32_state_t */
334
335
336#ifndef XXH_NO_LONG_LONG  /* defined when there is no 64-bit support */
337
338struct XXH64_state_s {
339   XXH64_hash_t total_len;
340   XXH64_hash_t v1;
341   XXH64_hash_t v2;
342   XXH64_hash_t v3;
343   XXH64_hash_t v4;
344   XXH64_hash_t mem64[4];
345   XXH32_hash_t memsize;
346   XXH32_hash_t reserved32;  /* required for padding anyway */
347   XXH64_hash_t reserved64;  /* never read nor write, might be removed in a future version */
348};   /* typedef'd to XXH64_state_t */
349
350#endif  /* XXH_NO_LONG_LONG */
351
352#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
353#  define XXH_IMPLEMENTATION
354#endif
355
356#endif  /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
357
358
359
360/*-**********************************************************************
361*  xxHash implementation
362*  Functions implementation used to be hosted within xxhash.c .
363*  However, code inlining requires to place implementation in the header file.
364*  As a consequence, xxhash.c used to be included within xxhash.h .
365*  But some build systems don't like *.c inclusions.
366*  So the implementation is now directly integrated within xxhash.h .
367*  Another small advantage is that xxhash.c is no longer required in /includes .
368************************************************************************/
369
370#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
371   || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
372#  define XXH_IMPLEM_13a8737387
373
374/* *************************************
375*  Tuning parameters
376***************************************/
377/*!XXH_FORCE_MEMORY_ACCESS :
378 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
379 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
380 * The below switch allow to select different access method for improved performance.
381 * Method 0 (default) : use `memcpy()`. Safe and portable.
382 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
383 *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
384 * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
385 *            It can generate buggy code on targets which do not support unaligned memory accesses.
386 *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
387 * See http://stackoverflow.com/a/32095106/646947 for details.
388 * Prefer these methods in priority order (0 > 1 > 2)
389 */
390#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
391#  if !defined(__clang__) && defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && (__ARM_ARCH == 6)
392#    define XXH_FORCE_MEMORY_ACCESS 2
393#  elif !defined(__clang__) && ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
394  (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7)))
395#    define XXH_FORCE_MEMORY_ACCESS 1
396#  endif
397#endif
398
399/*!XXH_ACCEPT_NULL_INPUT_POINTER :
400 * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault.
401 * When this macro is enabled, xxHash actively checks input for null pointer.
402 * It it is, result for null input pointers is the same as a null-length input.
403 */
404#ifndef XXH_ACCEPT_NULL_INPUT_POINTER   /* can be defined externally */
405#  define XXH_ACCEPT_NULL_INPUT_POINTER 0
406#endif
407
408/*!XXH_FORCE_ALIGN_CHECK :
409 * This is a minor performance trick, only useful with lots of very small keys.
410 * It means : check for aligned/unaligned input.
411 * The check costs one initial branch per hash;
412 * set it to 0 when the input is guaranteed to be aligned,
413 * or when alignment doesn't matter for performance.
414 */
415#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
416#  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
417#    define XXH_FORCE_ALIGN_CHECK 0
418#  else
419#    define XXH_FORCE_ALIGN_CHECK 1
420#  endif
421#endif
422
423/*!XXH_REROLL:
424 * Whether to reroll XXH32_finalize, and XXH64_finalize,
425 * instead of using an unrolled jump table/if statement loop.
426 *
427 * This is automatically defined on -Os/-Oz on GCC and Clang. */
428#ifndef XXH_REROLL
429#  if defined(__OPTIMIZE_SIZE__)
430#    define XXH_REROLL 1
431#  else
432#    define XXH_REROLL 0
433#  endif
434#endif
435
436
437/* *************************************
438*  Includes & Memory related functions
439***************************************/
440/*! Modify the local functions below should you wish to use some other memory routines
441*   for malloc(), free() */
442#include <stdlib.h>
443static void* XXH_malloc(size_t s) { return malloc(s); }
444static void  XXH_free  (void* p)  { free(p); }
445/*! and for memcpy() */
446#include <string.h>
447static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
448
449#include <limits.h>   /* ULLONG_MAX */
450
451
452/* *************************************
453*  Compiler Specific Options
454***************************************/
455#ifdef _MSC_VER    /* Visual Studio */
456#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
457#  define XXH_FORCE_INLINE static __forceinline
458#  define XXH_NO_INLINE static __declspec(noinline)
459#else
460#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
461#    ifdef __GNUC__
462#      define XXH_FORCE_INLINE static inline __attribute__((always_inline))
463#      define XXH_NO_INLINE static __attribute__((noinline))
464#    else
465#      define XXH_FORCE_INLINE static inline
466#      define XXH_NO_INLINE static
467#    endif
468#  else
469#    define XXH_FORCE_INLINE static
470#    define XXH_NO_INLINE static
471#  endif /* __STDC_VERSION__ */
472#endif
473
474
475
476/* *************************************
477*  Debug
478***************************************/
479/* DEBUGLEVEL is expected to be defined externally,
480 * typically through compiler command line.
481 * Value must be a number. */
482#ifndef DEBUGLEVEL
483#  define DEBUGLEVEL 0
484#endif
485
486#if (DEBUGLEVEL>=1)
487#  include <assert.h>   /* note : can still be disabled with NDEBUG */
488#  define XXH_ASSERT(c)   assert(c)
489#else
490#  define XXH_ASSERT(c)   ((void)0)
491#endif
492
493/* note : use after variable declarations */
494#define XXH_STATIC_ASSERT(c)  { enum { XXH_sa = 1/(int)(!!(c)) }; }
495
496
497/* *************************************
498*  Basic Types
499***************************************/
500#if !defined (__VMS) \
501 && (defined (__cplusplus) \
502 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
503# include <stdint.h>
504  typedef uint8_t  xxh_u8;
505#else
506  typedef unsigned char      xxh_u8;
507#endif
508typedef XXH32_hash_t xxh_u32;
509
510
511/* ***   Memory access   *** */
512
513#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
514
515/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
516static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
517
518#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
519
520/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
521/* currently only defined for gcc and icc */
522typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
523static xxh_u32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
524
525#else
526
527/* portable and safe solution. Generally efficient.
528 * see : http://stackoverflow.com/a/32095106/646947
529 */
530static xxh_u32 XXH_read32(const void* memPtr)
531{
532    xxh_u32 val;
533    memcpy(&val, memPtr, sizeof(val));
534    return val;
535}
536
537#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
538
539
540/* ***   Endianess   *** */
541typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
542
543/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
544#ifndef XXH_CPU_LITTLE_ENDIAN
545#  if defined(_WIN32) /* Windows is always little endian */ \
546     || defined(__LITTLE_ENDIAN__) \
547     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
548#    define XXH_CPU_LITTLE_ENDIAN 1
549#  elif defined(__BIG_ENDIAN__) \
550     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
551#    define XXH_CPU_LITTLE_ENDIAN 0
552#  else
553static int XXH_isLittleEndian(void)
554{
555    const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
556    return one.c[0];
557}
558#   define XXH_CPU_LITTLE_ENDIAN   XXH_isLittleEndian()
559#  endif
560#endif
561
562
563
564
565/* ****************************************
566*  Compiler-specific Functions and Macros
567******************************************/
568#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
569
570#ifndef __has_builtin
571#  define __has_builtin(x) 0
572#endif
573
574#if !defined(NO_CLANG_BUILTIN) && __has_builtin(__builtin_rotateleft32) && __has_builtin(__builtin_rotateleft64)
575#  define XXH_rotl32 __builtin_rotateleft32
576#  define XXH_rotl64 __builtin_rotateleft64
577/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
578#elif defined(_MSC_VER)
579#  define XXH_rotl32(x,r) _rotl(x,r)
580#  define XXH_rotl64(x,r) _rotl64(x,r)
581#else
582#  define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
583#  define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
584#endif
585
586#if defined(_MSC_VER)     /* Visual Studio */
587#  define XXH_swap32 _byteswap_ulong
588#elif XXH_GCC_VERSION >= 403
589#  define XXH_swap32 __builtin_bswap32
590#else
591static xxh_u32 XXH_swap32 (xxh_u32 x)
592{
593    return  ((x << 24) & 0xff000000 ) |
594            ((x <<  8) & 0x00ff0000 ) |
595            ((x >>  8) & 0x0000ff00 ) |
596            ((x >> 24) & 0x000000ff );
597}
598#endif
599
600
601/* ***************************
602*  Memory reads
603*****************************/
604typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
605
606XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
607{
608    return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
609}
610
611static xxh_u32 XXH_readBE32(const void* ptr)
612{
613    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
614}
615
616XXH_FORCE_INLINE xxh_u32
617XXH_readLE32_align(const void* ptr, XXH_alignment align)
618{
619    if (align==XXH_unaligned) {
620        return XXH_readLE32(ptr);
621    } else {
622        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
623    }
624}
625
626
627/* *************************************
628*  Misc
629***************************************/
630XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
631
632
633/* *******************************************************************
634*  32-bit hash functions
635*********************************************************************/
636static const xxh_u32 PRIME32_1 = 0x9E3779B1U;   /* 0b10011110001101110111100110110001 */
637static const xxh_u32 PRIME32_2 = 0x85EBCA77U;   /* 0b10000101111010111100101001110111 */
638static const xxh_u32 PRIME32_3 = 0xC2B2AE3DU;   /* 0b11000010101100101010111000111101 */
639static const xxh_u32 PRIME32_4 = 0x27D4EB2FU;   /* 0b00100111110101001110101100101111 */
640static const xxh_u32 PRIME32_5 = 0x165667B1U;   /* 0b00010110010101100110011110110001 */
641
642static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
643{
644    acc += input * PRIME32_2;
645    acc  = XXH_rotl32(acc, 13);
646    acc *= PRIME32_1;
647#if defined(__GNUC__) && defined(__SSE4_1__) && !defined(XXH_ENABLE_AUTOVECTORIZE)
648    /* UGLY HACK:
649     * This inline assembly hack forces acc into a normal register. This is the
650     * only thing that prevents GCC and Clang from autovectorizing the XXH32 loop
651     * (pragmas and attributes don't work for some resason) without globally
652     * disabling SSE4.1.
653     *
654     * The reason we want to avoid vectorization is because despite working on
655     * 4 integers at a time, there are multiple factors slowing XXH32 down on
656     * SSE4:
657     * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on newer chips!)
658     *   making it slightly slower to multiply four integers at once compared to four
659     *   integers independently. Even when pmulld was fastest, Sandy/Ivy Bridge, it is
660     *   still not worth it to go into SSE just to multiply unless doing a long operation.
661     *
662     * - Four instructions are required to rotate,
663     *      movqda tmp,  v // not required with VEX encoding
664     *      pslld  tmp, 13 // tmp <<= 13
665     *      psrld  v,   19 // x >>= 19
666     *      por    v,  tmp // x |= tmp
667     *   compared to one for scalar:
668     *      roll   v, 13    // reliably fast across the board
669     *      shldl  v, v, 13 // Sandy Bridge and later prefer this for some reason
670     *
671     * - Instruction level parallelism is actually more beneficial here because the
672     *   SIMD actually serializes this operation: While v1 is rotating, v2 can load data,
673     *   while v3 can multiply. SSE forces them to operate together.
674     *
675     * How this hack works:
676     * __asm__(""       // Declare an assembly block but don't declare any instructions
677     *          :       // However, as an Input/Output Operand,
678     *          "+r"    // constrain a read/write operand (+) as a general purpose register (r).
679     *          (acc)   // and set acc as the operand
680     * );
681     *
682     * Because of the 'r', the compiler has promised that seed will be in a
683     * general purpose register and the '+' says that it will be 'read/write',
684     * so it has to assume it has changed. It is like volatile without all the
685     * loads and stores.
686     *
687     * Since the argument has to be in a normal register (not an SSE register),
688     * each time XXH32_round is called, it is impossible to vectorize. */
689    __asm__("" : "+r" (acc));
690#endif
691    return acc;
692}
693
694/* mix all bits */
695static xxh_u32 XXH32_avalanche(xxh_u32 h32)
696{
697    h32 ^= h32 >> 15;
698    h32 *= PRIME32_2;
699    h32 ^= h32 >> 13;
700    h32 *= PRIME32_3;
701    h32 ^= h32 >> 16;
702    return(h32);
703}
704
705#define XXH_get32bits(p) XXH_readLE32_align(p, align)
706
707static xxh_u32
708XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
709{
710#define PROCESS1               \
711    h32 += (*ptr++) * PRIME32_5; \
712    h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
713
714#define PROCESS4                         \
715    h32 += XXH_get32bits(ptr) * PRIME32_3; \
716    ptr+=4;                                \
717    h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
718
719    /* Compact rerolled version */
720    if (XXH_REROLL) {
721        len &= 15;
722        while (len >= 4) {
723            PROCESS4;
724            len -= 4;
725        }
726        while (len > 0) {
727            PROCESS1;
728            --len;
729        }
730        return XXH32_avalanche(h32);
731    } else {
732         switch(len&15) /* or switch(bEnd - p) */ {
733           case 12:      PROCESS4;
734                         FALLTHROUGH;
735           case 8:       PROCESS4;
736                         FALLTHROUGH;
737           case 4:       PROCESS4;
738                         return XXH32_avalanche(h32);
739
740           case 13:      PROCESS4;
741                         FALLTHROUGH;
742           case 9:       PROCESS4;
743                         FALLTHROUGH;
744           case 5:       PROCESS4;
745                         PROCESS1;
746                         return XXH32_avalanche(h32);
747
748           case 14:      PROCESS4;
749                         FALLTHROUGH;
750           case 10:      PROCESS4;
751                         FALLTHROUGH;
752           case 6:       PROCESS4;
753                         PROCESS1;
754                         PROCESS1;
755                         return XXH32_avalanche(h32);
756
757           case 15:      PROCESS4;
758                         FALLTHROUGH;
759           case 11:      PROCESS4;
760                         FALLTHROUGH;
761           case 7:       PROCESS4;
762                         FALLTHROUGH;
763           case 3:       PROCESS1;
764                         FALLTHROUGH;
765           case 2:       PROCESS1;
766                         FALLTHROUGH;
767           case 1:       PROCESS1;
768                         FALLTHROUGH;
769           case 0:       return XXH32_avalanche(h32);
770        }
771        XXH_ASSERT(0);
772        return h32;   /* reaching this point is deemed impossible */
773    }
774}
775
776XXH_FORCE_INLINE xxh_u32
777XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
778{
779    const xxh_u8* bEnd = input + len;
780    xxh_u32 h32;
781
782#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
783    if (input==NULL) {
784        len=0;
785        bEnd=input=(const xxh_u8*)(size_t)16;
786    }
787#endif
788
789    if (len>=16) {
790        const xxh_u8* const limit = bEnd - 15;
791        xxh_u32 v1 = seed + PRIME32_1 + PRIME32_2;
792        xxh_u32 v2 = seed + PRIME32_2;
793        xxh_u32 v3 = seed + 0;
794        xxh_u32 v4 = seed - PRIME32_1;
795
796        do {
797            v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
798            v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
799            v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
800            v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
801        } while (input < limit);
802
803        h32 = XXH_rotl32(v1, 1)  + XXH_rotl32(v2, 7)
804            + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
805    } else {
806        h32  = seed + PRIME32_5;
807    }
808
809    h32 += (xxh_u32)len;
810
811    return XXH32_finalize(h32, input, len&15, align);
812}
813
814
815XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
816{
817#if 0
818    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
819    XXH32_state_t state;
820    XXH32_reset(&state, seed);
821    XXH32_update(&state, (const xxh_u8*)input, len);
822    return XXH32_digest(&state);
823
824#else
825
826    if (XXH_FORCE_ALIGN_CHECK) {
827        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
828            return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
829    }   }
830
831    return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
832#endif
833}
834
835
836
837/*******   Hash streaming   *******/
838
839XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
840{
841    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
842}
843XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
844{
845    XXH_free(statePtr);
846    return XXH_OK;
847}
848
849XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
850{
851    memcpy(dstState, srcState, sizeof(*dstState));
852}
853
854XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
855{
856    XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
857    memset(&state, 0, sizeof(state));
858    state.v1 = seed + PRIME32_1 + PRIME32_2;
859    state.v2 = seed + PRIME32_2;
860    state.v3 = seed + 0;
861    state.v4 = seed - PRIME32_1;
862    /* do not write into reserved, planned to be removed in a future version */
863    memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
864    return XXH_OK;
865}
866
867
868XXH_PUBLIC_API XXH_errorcode
869XXH32_update(XXH32_state_t* state, const void* input, size_t len)
870{
871    if (input==NULL)
872#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
873        return XXH_OK;
874#else
875        return XXH_ERROR;
876#endif
877
878    {   const xxh_u8* p = (const xxh_u8*)input;
879        const xxh_u8* const bEnd = p + len;
880
881        state->total_len_32 += (XXH32_hash_t)len;
882        state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
883
884        if (state->memsize + len < 16)  {   /* fill in tmp buffer */
885            XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
886            state->memsize += (XXH32_hash_t)len;
887            return XXH_OK;
888        }
889
890        if (state->memsize) {   /* some data left from previous update */
891            XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
892            {   const xxh_u32* p32 = state->mem32;
893                state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++;
894                state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++;
895                state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++;
896                state->v4 = XXH32_round(state->v4, XXH_readLE32(p32));
897            }
898            p += 16-state->memsize;
899            state->memsize = 0;
900        }
901
902        if (p <= bEnd-16) {
903            const xxh_u8* const limit = bEnd - 16;
904            xxh_u32 v1 = state->v1;
905            xxh_u32 v2 = state->v2;
906            xxh_u32 v3 = state->v3;
907            xxh_u32 v4 = state->v4;
908
909            do {
910                v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4;
911                v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4;
912                v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4;
913                v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4;
914            } while (p<=limit);
915
916            state->v1 = v1;
917            state->v2 = v2;
918            state->v3 = v3;
919            state->v4 = v4;
920        }
921
922        if (p < bEnd) {
923            XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
924            state->memsize = (unsigned)(bEnd-p);
925        }
926    }
927
928    return XXH_OK;
929}
930
931
932XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* state)
933{
934    xxh_u32 h32;
935
936    if (state->large_len) {
937        h32 = XXH_rotl32(state->v1, 1)
938            + XXH_rotl32(state->v2, 7)
939            + XXH_rotl32(state->v3, 12)
940            + XXH_rotl32(state->v4, 18);
941    } else {
942        h32 = state->v3 /* == seed */ + PRIME32_5;
943    }
944
945    h32 += state->total_len_32;
946
947    return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
948}
949
950
951/*******   Canonical representation   *******/
952
953/*! Default XXH result types are basic unsigned 32 and 64 bits.
954*   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
955*   These functions allow transformation of hash result into and from its canonical format.
956*   This way, hash values can be written into a file or buffer, remaining comparable across different systems.
957*/
958
959XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
960{
961    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
962    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
963    memcpy(dst, &hash, sizeof(*dst));
964}
965
966XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
967{
968    return XXH_readBE32(src);
969}
970
971
972#ifndef XXH_NO_LONG_LONG
973
974/* *******************************************************************
975*  64-bit hash functions
976*********************************************************************/
977
978/*******   Memory access   *******/
979
980typedef XXH64_hash_t xxh_u64;
981
982
983/*! XXH_REROLL_XXH64:
984 * Whether to reroll the XXH64_finalize() loop.
985 *
986 * Just like XXH32, we can unroll the XXH64_finalize() loop. This can be a performance gain
987 * on 64-bit hosts, as only one jump is required.
988 *
989 * However, on 32-bit hosts, because arithmetic needs to be done with two 32-bit registers,
990 * and 64-bit arithmetic needs to be simulated, it isn't beneficial to unroll. The code becomes
991 * ridiculously large (the largest function in the binary on i386!), and rerolling it saves
992 * anywhere from 3kB to 20kB. It is also slightly faster because it fits into cache better
993 * and is more likely to be inlined by the compiler.
994 *
995 * If XXH_REROLL is defined, this is ignored and the loop is always rerolled. */
996#ifndef XXH_REROLL_XXH64
997#  if (defined(__ILP32__) || defined(_ILP32)) /* ILP32 is often defined on 32-bit GCC family */ \
998   || !(defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) /* x86-64 */ \
999     || defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) /* aarch64 */ \
1000     || defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || defined(__powerpc64__) /* ppc64 */ \
1001     || defined(__mips64__) || defined(__mips64)) /* mips64 */ \
1002   || (!defined(SIZE_MAX) || SIZE_MAX < ULLONG_MAX) /* check limits */
1003#    define XXH_REROLL_XXH64 1
1004#  else
1005#    define XXH_REROLL_XXH64 0
1006#  endif
1007#endif /* !defined(XXH_REROLL_XXH64) */
1008
1009#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
1010
1011/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
1012static xxh_u64 XXH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; }
1013
1014#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
1015
1016/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
1017/* currently only defined for gcc and icc */
1018typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
1019static xxh_u64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
1020
1021#else
1022
1023/* portable and safe solution. Generally efficient.
1024 * see : http://stackoverflow.com/a/32095106/646947
1025 */
1026
1027static xxh_u64 XXH_read64(const void* memPtr)
1028{
1029    xxh_u64 val;
1030    memcpy(&val, memPtr, sizeof(val));
1031    return val;
1032}
1033
1034#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
1035
1036#if defined(_MSC_VER)     /* Visual Studio */
1037#  define XXH_swap64 _byteswap_uint64
1038#elif XXH_GCC_VERSION >= 403
1039#  define XXH_swap64 __builtin_bswap64
1040#else
1041static xxh_u64 XXH_swap64 (xxh_u64 x)
1042{
1043    return  ((x << 56) & 0xff00000000000000ULL) |
1044            ((x << 40) & 0x00ff000000000000ULL) |
1045            ((x << 24) & 0x0000ff0000000000ULL) |
1046            ((x << 8)  & 0x000000ff00000000ULL) |
1047            ((x >> 8)  & 0x00000000ff000000ULL) |
1048            ((x >> 24) & 0x0000000000ff0000ULL) |
1049            ((x >> 40) & 0x000000000000ff00ULL) |
1050            ((x >> 56) & 0x00000000000000ffULL);
1051}
1052#endif
1053
1054XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
1055{
1056    return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
1057}
1058
1059static xxh_u64 XXH_readBE64(const void* ptr)
1060{
1061    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
1062}
1063
1064XXH_FORCE_INLINE xxh_u64
1065XXH_readLE64_align(const void* ptr, XXH_alignment align)
1066{
1067    if (align==XXH_unaligned)
1068        return XXH_readLE64(ptr);
1069    else
1070        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
1071}
1072
1073
1074/*******   xxh64   *******/
1075
1076static const xxh_u64 PRIME64_1 = 0x9E3779B185EBCA87ULL;   /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
1077static const xxh_u64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL;   /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
1078static const xxh_u64 PRIME64_3 = 0x165667B19E3779F9ULL;   /* 0b0001011001010110011001111011000110011110001101110111100111111001 */
1079static const xxh_u64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL;   /* 0b1000010111101011110010100111011111000010101100101010111001100011 */
1080static const xxh_u64 PRIME64_5 = 0x27D4EB2F165667C5ULL;   /* 0b0010011111010100111010110010111100010110010101100110011111000101 */
1081
1082static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
1083{
1084    acc += input * PRIME64_2;
1085    acc  = XXH_rotl64(acc, 31);
1086    acc *= PRIME64_1;
1087    return acc;
1088}
1089
1090static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
1091{
1092    val  = XXH64_round(0, val);
1093    acc ^= val;
1094    acc  = acc * PRIME64_1 + PRIME64_4;
1095    return acc;
1096}
1097
1098static xxh_u64 XXH64_avalanche(xxh_u64 h64)
1099{
1100    h64 ^= h64 >> 33;
1101    h64 *= PRIME64_2;
1102    h64 ^= h64 >> 29;
1103    h64 *= PRIME64_3;
1104    h64 ^= h64 >> 32;
1105    return h64;
1106}
1107
1108
1109#define XXH_get64bits(p) XXH_readLE64_align(p, align)
1110
1111static xxh_u64
1112XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
1113{
1114#define PROCESS1_64            \
1115    h64 ^= (*ptr++) * PRIME64_5; \
1116    h64 = XXH_rotl64(h64, 11) * PRIME64_1;
1117
1118#define PROCESS4_64          \
1119    h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * PRIME64_1; \
1120    ptr+=4;                    \
1121    h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
1122
1123#define PROCESS8_64 {        \
1124    xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \
1125    ptr+=8;                    \
1126    h64 ^= k1;               \
1127    h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \
1128}
1129
1130    /* Rerolled version for 32-bit targets is faster and much smaller. */
1131    if (XXH_REROLL || XXH_REROLL_XXH64) {
1132        len &= 31;
1133        while (len >= 8) {
1134            PROCESS8_64;
1135            len -= 8;
1136        }
1137        if (len >= 4) {
1138            PROCESS4_64;
1139            len -= 4;
1140        }
1141        while (len > 0) {
1142            PROCESS1_64;
1143            --len;
1144        }
1145         return  XXH64_avalanche(h64);
1146    } else {
1147        switch(len & 31) {
1148           case 24: PROCESS8_64;
1149                    FALLTHROUGH;
1150           case 16: PROCESS8_64;
1151                    FALLTHROUGH;
1152           case  8: PROCESS8_64;
1153                    return XXH64_avalanche(h64);
1154
1155           case 28: PROCESS8_64;
1156                    FALLTHROUGH;
1157           case 20: PROCESS8_64;
1158                    FALLTHROUGH;
1159           case 12: PROCESS8_64;
1160                    FALLTHROUGH;
1161           case  4: PROCESS4_64;
1162                    return XXH64_avalanche(h64);
1163
1164           case 25: PROCESS8_64;
1165                    FALLTHROUGH;
1166           case 17: PROCESS8_64;
1167                    FALLTHROUGH;
1168           case  9: PROCESS8_64;
1169                    PROCESS1_64;
1170                    return XXH64_avalanche(h64);
1171
1172           case 29: PROCESS8_64;
1173                    FALLTHROUGH;
1174           case 21: PROCESS8_64;
1175                    FALLTHROUGH;
1176           case 13: PROCESS8_64;
1177                    FALLTHROUGH;
1178           case  5: PROCESS4_64;
1179                    PROCESS1_64;
1180                    return XXH64_avalanche(h64);
1181
1182           case 26: PROCESS8_64;
1183                    FALLTHROUGH;
1184           case 18: PROCESS8_64;
1185                    FALLTHROUGH;
1186           case 10: PROCESS8_64;
1187                    PROCESS1_64;
1188                    PROCESS1_64;
1189                    return XXH64_avalanche(h64);
1190
1191           case 30: PROCESS8_64;
1192                    FALLTHROUGH;
1193           case 22: PROCESS8_64;
1194                    FALLTHROUGH;
1195           case 14: PROCESS8_64;
1196                    FALLTHROUGH;
1197           case  6: PROCESS4_64;
1198                    PROCESS1_64;
1199                    PROCESS1_64;
1200                    return XXH64_avalanche(h64);
1201
1202           case 27: PROCESS8_64;
1203                    FALLTHROUGH;
1204           case 19: PROCESS8_64;
1205                    FALLTHROUGH;
1206           case 11: PROCESS8_64;
1207                    PROCESS1_64;
1208                    PROCESS1_64;
1209                    PROCESS1_64;
1210                    return XXH64_avalanche(h64);
1211
1212           case 31: PROCESS8_64;
1213                    FALLTHROUGH;
1214           case 23: PROCESS8_64;
1215                    FALLTHROUGH;
1216           case 15: PROCESS8_64;
1217                    FALLTHROUGH;
1218           case  7: PROCESS4_64;
1219                    FALLTHROUGH;
1220           case  3: PROCESS1_64;
1221                    FALLTHROUGH;
1222           case  2: PROCESS1_64;
1223                    FALLTHROUGH;
1224           case  1: PROCESS1_64;
1225                    FALLTHROUGH;
1226           case  0: return XXH64_avalanche(h64);
1227        }
1228    }
1229    /* impossible to reach */
1230    XXH_ASSERT(0);
1231    return 0;  /* unreachable, but some compilers complain without it */
1232}
1233
1234XXH_FORCE_INLINE xxh_u64
1235XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
1236{
1237    const xxh_u8* bEnd = input + len;
1238    xxh_u64 h64;
1239
1240#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
1241    if (input==NULL) {
1242        len=0;
1243        bEnd=input=(const xxh_u8*)(size_t)32;
1244    }
1245#endif
1246
1247    if (len>=32) {
1248        const xxh_u8* const limit = bEnd - 32;
1249        xxh_u64 v1 = seed + PRIME64_1 + PRIME64_2;
1250        xxh_u64 v2 = seed + PRIME64_2;
1251        xxh_u64 v3 = seed + 0;
1252        xxh_u64 v4 = seed - PRIME64_1;
1253
1254        do {
1255            v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
1256            v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
1257            v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
1258            v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
1259        } while (input<=limit);
1260
1261        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
1262        h64 = XXH64_mergeRound(h64, v1);
1263        h64 = XXH64_mergeRound(h64, v2);
1264        h64 = XXH64_mergeRound(h64, v3);
1265        h64 = XXH64_mergeRound(h64, v4);
1266
1267    } else {
1268        h64  = seed + PRIME64_5;
1269    }
1270
1271    h64 += (xxh_u64) len;
1272
1273    return XXH64_finalize(h64, input, len, align);
1274}
1275
1276
1277XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
1278{
1279#if 0
1280    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
1281    XXH64_state_t state;
1282    XXH64_reset(&state, seed);
1283    XXH64_update(&state, (const xxh_u8*)input, len);
1284    return XXH64_digest(&state);
1285
1286#else
1287
1288    if (XXH_FORCE_ALIGN_CHECK) {
1289        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
1290            return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
1291    }   }
1292
1293    return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
1294
1295#endif
1296}
1297
1298/*******   Hash Streaming   *******/
1299
1300XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
1301{
1302    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
1303}
1304XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
1305{
1306    XXH_free(statePtr);
1307    return XXH_OK;
1308}
1309
1310XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
1311{
1312    memcpy(dstState, srcState, sizeof(*dstState));
1313}
1314
1315XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
1316{
1317    XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
1318    memset(&state, 0, sizeof(state));
1319    state.v1 = seed + PRIME64_1 + PRIME64_2;
1320    state.v2 = seed + PRIME64_2;
1321    state.v3 = seed + 0;
1322    state.v4 = seed - PRIME64_1;
1323     /* do not write into reserved64, might be removed in a future version */
1324    memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
1325    return XXH_OK;
1326}
1327
1328XXH_PUBLIC_API XXH_errorcode
1329XXH64_update (XXH64_state_t* state, const void* input, size_t len)
1330{
1331    if (input==NULL)
1332#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
1333        return XXH_OK;
1334#else
1335        return XXH_ERROR;
1336#endif
1337
1338    {   const xxh_u8* p = (const xxh_u8*)input;
1339        const xxh_u8* const bEnd = p + len;
1340
1341        state->total_len += len;
1342
1343        if (state->memsize + len < 32) {  /* fill in tmp buffer */
1344            XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
1345            state->memsize += (xxh_u32)len;
1346            return XXH_OK;
1347        }
1348
1349        if (state->memsize) {   /* tmp buffer is full */
1350            XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
1351            state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0));
1352            state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1));
1353            state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2));
1354            state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3));
1355            p += 32-state->memsize;
1356            state->memsize = 0;
1357        }
1358
1359        if (p+32 <= bEnd) {
1360            const xxh_u8* const limit = bEnd - 32;
1361            xxh_u64 v1 = state->v1;
1362            xxh_u64 v2 = state->v2;
1363            xxh_u64 v3 = state->v3;
1364            xxh_u64 v4 = state->v4;
1365
1366            do {
1367                v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8;
1368                v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8;
1369                v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8;
1370                v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8;
1371            } while (p<=limit);
1372
1373            state->v1 = v1;
1374            state->v2 = v2;
1375            state->v3 = v3;
1376            state->v4 = v4;
1377        }
1378
1379        if (p < bEnd) {
1380            XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
1381            state->memsize = (unsigned)(bEnd-p);
1382        }
1383    }
1384
1385    return XXH_OK;
1386}
1387
1388
1389XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* state)
1390{
1391    xxh_u64 h64;
1392
1393    if (state->total_len >= 32) {
1394        xxh_u64 const v1 = state->v1;
1395        xxh_u64 const v2 = state->v2;
1396        xxh_u64 const v3 = state->v3;
1397        xxh_u64 const v4 = state->v4;
1398
1399        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
1400        h64 = XXH64_mergeRound(h64, v1);
1401        h64 = XXH64_mergeRound(h64, v2);
1402        h64 = XXH64_mergeRound(h64, v3);
1403        h64 = XXH64_mergeRound(h64, v4);
1404    } else {
1405        h64  = state->v3 /*seed*/ + PRIME64_5;
1406    }
1407
1408    h64 += (xxh_u64) state->total_len;
1409
1410    return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
1411}
1412
1413
1414/******* Canonical representation   *******/
1415
1416XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
1417{
1418    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
1419    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
1420    memcpy(dst, &hash, sizeof(*dst));
1421}
1422
1423XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
1424{
1425    return XXH_readBE64(src);
1426}
1427
1428
1429
1430/* *********************************************************************
1431*  XXH3
1432*  New generation hash designed for speed on small keys and vectorization
1433************************************************************************ */
1434
1435/* #include "xxh3.h" */
1436
1437
1438#endif  /* XXH_NO_LONG_LONG */
1439
1440
1441#endif  /* XXH_IMPLEMENTATION */
1442
1443
1444#if defined (__cplusplus)
1445}
1446#endif
1447