1 /*
2 xxHash - Extremely Fast Hash algorithm
3 Header File
4 Copyright (C) 2012-2016, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - xxHash source repository : https://github.com/Cyan4973/xxHash
33 */
34
35 /* Notice extracted from xxHash homepage :
36
37 xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
38 It also successfully passes all tests from the SMHasher suite.
39
40 Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
41
42 Name Speed Q.Score Author
43 xxHash 5.4 GB/s 10
44 CrapWow 3.2 GB/s 2 Andrew
45 MumurHash 3a 2.7 GB/s 10 Austin Appleby
46 SpookyHash 2.0 GB/s 10 Bob Jenkins
47 SBox 1.4 GB/s 9 Bret Mulvey
48 Lookup3 1.2 GB/s 9 Bob Jenkins
49 SuperFastHash 1.2 GB/s 1 Paul Hsieh
50 CityHash64 1.05 GB/s 10 Pike & Alakuijala
51 FNV 0.55 GB/s 5 Fowler, Noll, Vo
52 CRC32 0.43 GB/s 9
53 MD5-32 0.33 GB/s 10 Ronald L. Rivest
54 SHA1-32 0.28 GB/s 10
55
56 Q.Score is a measure of quality of the hash function.
57 It depends on successfully passing SMHasher test set.
58 10 is a perfect score.
59
60 Note : SMHasher's CRC32 implementation is not the fastest one.
61 Other speed-oriented implementations can be faster,
62 especially in combination with PCLMUL instruction :
63 http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
64
65 A 64-bit version, named XXH64, is available since r35.
66 It offers much better speed, but for 64-bit applications only.
67 Name Speed on 64 bits Speed on 32 bits
68 XXH64 13.8 GB/s 1.9 GB/s
69 XXH32 6.8 GB/s 6.0 GB/s
70 */
71
72 /* Mesa leaves strict aliasing on in the compiler, and this code likes to
73 * dereference the passed in data as u32*, which means that the compiler is
74 * free to move the u32 read before the write of the struct members being
75 * hashed, and in practice it did in freedreno. Forcing these two things
76 * prevents it.
77 */
78 #define XXH_FORCE_ALIGN_CHECK 0
79 #define XXH_FORCE_MEMORY_ACCESS 0
80
81 #include "util/compiler.h" /* for FALLTHROUGH */
82
83 #if defined (__cplusplus)
84 extern "C" {
85 #endif
86
87
88 #ifndef XXHASH_H_5627135585666179
89 #define XXHASH_H_5627135585666179 1
90
91 /* ****************************
92 * API modifier
93 ******************************/
94 /** XXH_INLINE_ALL (and XXH_PRIVATE_API)
95 * This build macro includes xxhash functions in `static` mode
96 * in order to inline them, and remove their symbol from the public list.
97 * Inlining offers great performance improvement on small keys,
98 * and dramatic ones when length is expressed as a compile-time constant.
99 * See https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html .
100 * Methodology :
101 * #define XXH_INLINE_ALL
102 * #include "xxhash.h"
103 * `xxhash.c` is automatically included.
104 * It's not useful to compile and link it as a separate object.
105 */
106 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
107 # ifndef XXH_STATIC_LINKING_ONLY
108 # define XXH_STATIC_LINKING_ONLY
109 # endif
110 # if defined(__GNUC__)
111 # define XXH_PUBLIC_API static __inline __attribute__((unused))
112 # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
113 # define XXH_PUBLIC_API static inline
114 # elif defined(_MSC_VER)
115 # define XXH_PUBLIC_API static __inline
116 # else
117 /* this version may generate warnings for unused static functions */
118 # define XXH_PUBLIC_API static
119 # endif
120 #else
121 # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
122 # ifdef XXH_EXPORT
123 # define XXH_PUBLIC_API __declspec(dllexport)
124 # elif XXH_IMPORT
125 # define XXH_PUBLIC_API __declspec(dllimport)
126 # endif
127 # else
128 # define XXH_PUBLIC_API /* do nothing */
129 # endif
130 #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
131
132 /*! XXH_NAMESPACE, aka Namespace Emulation :
133 *
134 * If you want to include _and expose_ xxHash functions from within your own library,
135 * but also want to avoid symbol collisions with other libraries which may also include xxHash,
136 *
137 * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
138 * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
139 *
140 * Note that no change is required within the calling program as long as it includes `xxhash.h` :
141 * regular symbol name will be automatically translated by this header.
142 */
143 #ifdef XXH_NAMESPACE
144 # define XXH_CAT(A,B) A##B
145 # define XXH_NAME2(A,B) XXH_CAT(A,B)
146 # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
147 # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
148 # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
149 # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
150 # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
151 # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
152 # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
153 # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
154 # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
155 # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
156 # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
157 # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
158 # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
159 # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
160 # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
161 # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
162 # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
163 # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
164 # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
165 #endif
166
167
168 /* *************************************
169 * Version
170 ***************************************/
171 #define XXH_VERSION_MAJOR 0
172 #define XXH_VERSION_MINOR 7
173 #define XXH_VERSION_RELEASE 2
174 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
175 XXH_PUBLIC_API unsigned XXH_versionNumber (void);
176
177
178 /* ****************************
179 * Definitions
180 ******************************/
181 #include <stddef.h> /* size_t */
182 typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
183
184
185 /*-**********************************************************************
186 * 32-bit hash
187 ************************************************************************/
188 #if !defined (__VMS) \
189 && (defined (__cplusplus) \
190 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
191 # include <stdint.h>
192 typedef uint32_t XXH32_hash_t;
193 #else
194 # include <limits.h>
195 # if UINT_MAX == 0xFFFFFFFFUL
196 typedef unsigned int XXH32_hash_t;
197 # else
198 # if ULONG_MAX == 0xFFFFFFFFUL
199 typedef unsigned long XXH32_hash_t;
200 # else
201 # error "unsupported platform : need a 32-bit type"
202 # endif
203 # endif
204 #endif
205
206 /*! XXH32() :
207 Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
208 The memory between input & input+length must be valid (allocated and read-accessible).
209 "seed" can be used to alter the result predictably.
210 Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
211 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
212
213 /******* Streaming *******/
214
215 /*
216 * Streaming functions generate the xxHash value from an incrememtal input.
217 * This method is slower than single-call functions, due to state management.
218 * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
219 *
220 * XXH state must first be allocated, using XXH*_createState() .
221 *
222 * Start a new hash by initializing state with a seed, using XXH*_reset().
223 *
224 * Then, feed the hash state by calling XXH*_update() as many times as necessary.
225 * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
226 *
227 * Finally, a hash value can be produced anytime, by using XXH*_digest().
228 * This function returns the nn-bits hash as an int or long long.
229 *
230 * It's still possible to continue inserting input into the hash state after a digest,
231 * and generate some new hash values later on, by invoking again XXH*_digest().
232 *
233 * When done, release the state, using XXH*_freeState().
234 */
235
236 typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
237 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
238 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
239 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
240
241 XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed);
242 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
243 XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
244
245 /******* Canonical representation *******/
246
247 /* Default return values from XXH functions are basic unsigned 32 and 64 bits.
248 * This the simplest and fastest format for further post-processing.
249 * However, this leaves open the question of what is the order of bytes,
250 * since little and big endian conventions will write the same number differently.
251 *
252 * The canonical representation settles this issue,
253 * by mandating big-endian convention,
254 * aka, the same convention as human-readable numbers (large digits first).
255 * When writing hash values to storage, sending them over a network, or printing them,
256 * it's highly recommended to use the canonical representation,
257 * to ensure portability across a wider range of systems, present and future.
258 *
259 * The following functions allow transformation of hash values into and from canonical format.
260 */
261
262 typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
263 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
264 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
265
266
267 #ifndef XXH_NO_LONG_LONG
268 /*-**********************************************************************
269 * 64-bit hash
270 ************************************************************************/
271 #if !defined (__VMS) \
272 && (defined (__cplusplus) \
273 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
274 # include <stdint.h>
275 typedef uint64_t XXH64_hash_t;
276 #else
277 /* the following type must have a width of 64-bit */
278 typedef unsigned long long XXH64_hash_t;
279 #endif
280
281 /*! XXH64() :
282 * Returns the 64-bit hash of sequence of length @length stored at memory address @input.
283 * @seed can be used to alter the result predictably.
284 * This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
285 */
286 XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, XXH64_hash_t seed);
287
288 /******* Streaming *******/
289 typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
290 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
291 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
292 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
293
294 XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, XXH64_hash_t seed);
295 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
296 XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
297
298 /******* Canonical representation *******/
299 typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
300 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
301 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
302
303
304 #endif /* XXH_NO_LONG_LONG */
305
306 #endif /* XXHASH_H_5627135585666179 */
307
308
309
310 #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
311 #define XXHASH_H_STATIC_13879238742
312 /* ************************************************************************************************
313 This section contains declarations which are not guaranteed to remain stable.
314 They may change in future versions, becoming incompatible with a different version of the library.
315 These declarations should only be used with static linking.
316 Never use them in association with dynamic linking !
317 *************************************************************************************************** */
318
319 /* These definitions are only present to allow
320 * static allocation of XXH state, on stack or in a struct for example.
321 * Never **ever** use members directly. */
322
323 struct XXH32_state_s {
324 XXH32_hash_t total_len_32;
325 XXH32_hash_t large_len;
326 XXH32_hash_t v1;
327 XXH32_hash_t v2;
328 XXH32_hash_t v3;
329 XXH32_hash_t v4;
330 XXH32_hash_t mem32[4];
331 XXH32_hash_t memsize;
332 XXH32_hash_t reserved; /* never read nor write, might be removed in a future version */
333 }; /* typedef'd to XXH32_state_t */
334
335
336 #ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */
337
338 struct XXH64_state_s {
339 XXH64_hash_t total_len;
340 XXH64_hash_t v1;
341 XXH64_hash_t v2;
342 XXH64_hash_t v3;
343 XXH64_hash_t v4;
344 XXH64_hash_t mem64[4];
345 XXH32_hash_t memsize;
346 XXH32_hash_t reserved32; /* required for padding anyway */
347 XXH64_hash_t reserved64; /* never read nor write, might be removed in a future version */
348 }; /* typedef'd to XXH64_state_t */
349
350 #endif /* XXH_NO_LONG_LONG */
351
352 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
353 # define XXH_IMPLEMENTATION
354 #endif
355
356 #endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
357
358
359
360 /*-**********************************************************************
361 * xxHash implementation
362 * Functions implementation used to be hosted within xxhash.c .
363 * However, code inlining requires to place implementation in the header file.
364 * As a consequence, xxhash.c used to be included within xxhash.h .
365 * But some build systems don't like *.c inclusions.
366 * So the implementation is now directly integrated within xxhash.h .
367 * Another small advantage is that xxhash.c is no longer required in /includes .
368 ************************************************************************/
369
370 #if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
371 || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
372 # define XXH_IMPLEM_13a8737387
373
374 /* *************************************
375 * Tuning parameters
376 ***************************************/
377 /*!XXH_FORCE_MEMORY_ACCESS :
378 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
379 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
380 * The below switch allow to select different access method for improved performance.
381 * Method 0 (default) : use `memcpy()`. Safe and portable.
382 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
383 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
384 * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
385 * It can generate buggy code on targets which do not support unaligned memory accesses.
386 * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
387 * See http://stackoverflow.com/a/32095106/646947 for details.
388 * Prefer these methods in priority order (0 > 1 > 2)
389 */
390 #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
391 # if !defined(__clang__) && defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && (__ARM_ARCH == 6)
392 # define XXH_FORCE_MEMORY_ACCESS 2
393 # elif !defined(__clang__) && ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
394 (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7)))
395 # define XXH_FORCE_MEMORY_ACCESS 1
396 # endif
397 #endif
398
399 /*!XXH_ACCEPT_NULL_INPUT_POINTER :
400 * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault.
401 * When this macro is enabled, xxHash actively checks input for null pointer.
402 * It it is, result for null input pointers is the same as a null-length input.
403 */
404 #ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */
405 # define XXH_ACCEPT_NULL_INPUT_POINTER 0
406 #endif
407
408 /*!XXH_FORCE_ALIGN_CHECK :
409 * This is a minor performance trick, only useful with lots of very small keys.
410 * It means : check for aligned/unaligned input.
411 * The check costs one initial branch per hash;
412 * set it to 0 when the input is guaranteed to be aligned,
413 * or when alignment doesn't matter for performance.
414 */
415 #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
416 # if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
417 # define XXH_FORCE_ALIGN_CHECK 0
418 # else
419 # define XXH_FORCE_ALIGN_CHECK 1
420 # endif
421 #endif
422
423 /*!XXH_REROLL:
424 * Whether to reroll XXH32_finalize, and XXH64_finalize,
425 * instead of using an unrolled jump table/if statement loop.
426 *
427 * This is automatically defined on -Os/-Oz on GCC and Clang. */
428 #ifndef XXH_REROLL
429 # if defined(__OPTIMIZE_SIZE__)
430 # define XXH_REROLL 1
431 # else
432 # define XXH_REROLL 0
433 # endif
434 #endif
435
436
437 /* *************************************
438 * Includes & Memory related functions
439 ***************************************/
440 /*! Modify the local functions below should you wish to use some other memory routines
441 * for malloc(), free() */
442 #include <stdlib.h>
XXH_malloc(size_t s)443 static void* XXH_malloc(size_t s) { return malloc(s); }
XXH_free(void * p)444 static void XXH_free (void* p) { free(p); }
445 /*! and for memcpy() */
446 #include <string.h>
XXH_memcpy(void * dest,const void * src,size_t size)447 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
448
449 #include <limits.h> /* ULLONG_MAX */
450
451
452 /* *************************************
453 * Compiler Specific Options
454 ***************************************/
455 #ifdef _MSC_VER /* Visual Studio */
456 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
457 # define XXH_FORCE_INLINE static __forceinline
458 # define XXH_NO_INLINE static __declspec(noinline)
459 #else
460 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
461 # ifdef __GNUC__
462 # define XXH_FORCE_INLINE static inline __attribute__((always_inline))
463 # define XXH_NO_INLINE static __attribute__((noinline))
464 # else
465 # define XXH_FORCE_INLINE static inline
466 # define XXH_NO_INLINE static
467 # endif
468 # else
469 # define XXH_FORCE_INLINE static
470 # define XXH_NO_INLINE static
471 # endif /* __STDC_VERSION__ */
472 #endif
473
474
475
476 /* *************************************
477 * Debug
478 ***************************************/
479 /* DEBUGLEVEL is expected to be defined externally,
480 * typically through compiler command line.
481 * Value must be a number. */
482 #ifndef DEBUGLEVEL
483 # define DEBUGLEVEL 0
484 #endif
485
486 #if (DEBUGLEVEL>=1)
487 # include <assert.h> /* note : can still be disabled with NDEBUG */
488 # define XXH_ASSERT(c) assert(c)
489 #else
490 # define XXH_ASSERT(c) ((void)0)
491 #endif
492
493 /* note : use after variable declarations */
494 #define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; }
495
496
497 /* *************************************
498 * Basic Types
499 ***************************************/
500 #if !defined (__VMS) \
501 && (defined (__cplusplus) \
502 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
503 # include <stdint.h>
504 typedef uint8_t xxh_u8;
505 #else
506 typedef unsigned char xxh_u8;
507 #endif
508 typedef XXH32_hash_t xxh_u32;
509
510
511 /* *** Memory access *** */
512
513 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
514
515 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
XXH_read32(const void * memPtr)516 static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
517
518 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
519
520 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
521 /* currently only defined for gcc and icc */
522 typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
XXH_read32(const void * ptr)523 static xxh_u32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
524
525 #else
526
527 /* portable and safe solution. Generally efficient.
528 * see : http://stackoverflow.com/a/32095106/646947
529 */
XXH_read32(const void * memPtr)530 static xxh_u32 XXH_read32(const void* memPtr)
531 {
532 xxh_u32 val;
533 memcpy(&val, memPtr, sizeof(val));
534 return val;
535 }
536
537 #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
538
539
540 /* *** Endianess *** */
541 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
542
543 /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
544 #ifndef XXH_CPU_LITTLE_ENDIAN
545 # if defined(_WIN32) /* Windows is always little endian */ \
546 || defined(__LITTLE_ENDIAN__) \
547 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
548 # define XXH_CPU_LITTLE_ENDIAN 1
549 # elif defined(__BIG_ENDIAN__) \
550 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
551 # define XXH_CPU_LITTLE_ENDIAN 0
552 # else
XXH_isLittleEndian(void)553 static int XXH_isLittleEndian(void)
554 {
555 const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 }; /* don't use static : performance detrimental */
556 return one.c[0];
557 }
558 # define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
559 # endif
560 #endif
561
562
563
564
565 /* ****************************************
566 * Compiler-specific Functions and Macros
567 ******************************************/
568 #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
569
570 #ifndef __has_builtin
571 # define __has_builtin(x) 0
572 #endif
573
574 #if !defined(NO_CLANG_BUILTIN) && __has_builtin(__builtin_rotateleft32) && __has_builtin(__builtin_rotateleft64)
575 # define XXH_rotl32 __builtin_rotateleft32
576 # define XXH_rotl64 __builtin_rotateleft64
577 /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
578 #elif defined(_MSC_VER)
579 # define XXH_rotl32(x,r) _rotl(x,r)
580 # define XXH_rotl64(x,r) _rotl64(x,r)
581 #else
582 # define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
583 # define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
584 #endif
585
586 #if defined(_MSC_VER) /* Visual Studio */
587 # define XXH_swap32 _byteswap_ulong
588 #elif XXH_GCC_VERSION >= 403
589 # define XXH_swap32 __builtin_bswap32
590 #else
XXH_swap32(xxh_u32 x)591 static xxh_u32 XXH_swap32 (xxh_u32 x)
592 {
593 return ((x << 24) & 0xff000000 ) |
594 ((x << 8) & 0x00ff0000 ) |
595 ((x >> 8) & 0x0000ff00 ) |
596 ((x >> 24) & 0x000000ff );
597 }
598 #endif
599
600
601 /* ***************************
602 * Memory reads
603 *****************************/
604 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
605
XXH_readLE32(const void * ptr)606 XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
607 {
608 return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
609 }
610
XXH_readBE32(const void * ptr)611 static xxh_u32 XXH_readBE32(const void* ptr)
612 {
613 return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
614 }
615
616 XXH_FORCE_INLINE xxh_u32
XXH_readLE32_align(const void * ptr,XXH_alignment align)617 XXH_readLE32_align(const void* ptr, XXH_alignment align)
618 {
619 if (align==XXH_unaligned) {
620 return XXH_readLE32(ptr);
621 } else {
622 return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
623 }
624 }
625
626
627 /* *************************************
628 * Misc
629 ***************************************/
XXH_versionNumber(void)630 XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
631
632
633 /* *******************************************************************
634 * 32-bit hash functions
635 *********************************************************************/
636 static const xxh_u32 PRIME32_1 = 0x9E3779B1U; /* 0b10011110001101110111100110110001 */
637 static const xxh_u32 PRIME32_2 = 0x85EBCA77U; /* 0b10000101111010111100101001110111 */
638 static const xxh_u32 PRIME32_3 = 0xC2B2AE3DU; /* 0b11000010101100101010111000111101 */
639 static const xxh_u32 PRIME32_4 = 0x27D4EB2FU; /* 0b00100111110101001110101100101111 */
640 static const xxh_u32 PRIME32_5 = 0x165667B1U; /* 0b00010110010101100110011110110001 */
641
XXH32_round(xxh_u32 acc,xxh_u32 input)642 static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
643 {
644 acc += input * PRIME32_2;
645 acc = XXH_rotl32(acc, 13);
646 acc *= PRIME32_1;
647 #if defined(__GNUC__) && defined(__SSE4_1__) && !defined(XXH_ENABLE_AUTOVECTORIZE)
648 /* UGLY HACK:
649 * This inline assembly hack forces acc into a normal register. This is the
650 * only thing that prevents GCC and Clang from autovectorizing the XXH32 loop
651 * (pragmas and attributes don't work for some resason) without globally
652 * disabling SSE4.1.
653 *
654 * The reason we want to avoid vectorization is because despite working on
655 * 4 integers at a time, there are multiple factors slowing XXH32 down on
656 * SSE4:
657 * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on newer chips!)
658 * making it slightly slower to multiply four integers at once compared to four
659 * integers independently. Even when pmulld was fastest, Sandy/Ivy Bridge, it is
660 * still not worth it to go into SSE just to multiply unless doing a long operation.
661 *
662 * - Four instructions are required to rotate,
663 * movqda tmp, v // not required with VEX encoding
664 * pslld tmp, 13 // tmp <<= 13
665 * psrld v, 19 // x >>= 19
666 * por v, tmp // x |= tmp
667 * compared to one for scalar:
668 * roll v, 13 // reliably fast across the board
669 * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason
670 *
671 * - Instruction level parallelism is actually more beneficial here because the
672 * SIMD actually serializes this operation: While v1 is rotating, v2 can load data,
673 * while v3 can multiply. SSE forces them to operate together.
674 *
675 * How this hack works:
676 * __asm__("" // Declare an assembly block but don't declare any instructions
677 * : // However, as an Input/Output Operand,
678 * "+r" // constrain a read/write operand (+) as a general purpose register (r).
679 * (acc) // and set acc as the operand
680 * );
681 *
682 * Because of the 'r', the compiler has promised that seed will be in a
683 * general purpose register and the '+' says that it will be 'read/write',
684 * so it has to assume it has changed. It is like volatile without all the
685 * loads and stores.
686 *
687 * Since the argument has to be in a normal register (not an SSE register),
688 * each time XXH32_round is called, it is impossible to vectorize. */
689 __asm__("" : "+r" (acc));
690 #endif
691 return acc;
692 }
693
694 /* mix all bits */
XXH32_avalanche(xxh_u32 h32)695 static xxh_u32 XXH32_avalanche(xxh_u32 h32)
696 {
697 h32 ^= h32 >> 15;
698 h32 *= PRIME32_2;
699 h32 ^= h32 >> 13;
700 h32 *= PRIME32_3;
701 h32 ^= h32 >> 16;
702 return(h32);
703 }
704
705 #define XXH_get32bits(p) XXH_readLE32_align(p, align)
706
707 static xxh_u32
XXH32_finalize(xxh_u32 h32,const xxh_u8 * ptr,size_t len,XXH_alignment align)708 XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
709 {
710 #define PROCESS1 \
711 h32 += (*ptr++) * PRIME32_5; \
712 h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
713
714 #define PROCESS4 \
715 h32 += XXH_get32bits(ptr) * PRIME32_3; \
716 ptr+=4; \
717 h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
718
719 /* Compact rerolled version */
720 if (XXH_REROLL) {
721 len &= 15;
722 while (len >= 4) {
723 PROCESS4;
724 len -= 4;
725 }
726 while (len > 0) {
727 PROCESS1;
728 --len;
729 }
730 return XXH32_avalanche(h32);
731 } else {
732 switch(len&15) /* or switch(bEnd - p) */ {
733 case 12: PROCESS4;
734 FALLTHROUGH;
735 case 8: PROCESS4;
736 FALLTHROUGH;
737 case 4: PROCESS4;
738 return XXH32_avalanche(h32);
739
740 case 13: PROCESS4;
741 FALLTHROUGH;
742 case 9: PROCESS4;
743 FALLTHROUGH;
744 case 5: PROCESS4;
745 PROCESS1;
746 return XXH32_avalanche(h32);
747
748 case 14: PROCESS4;
749 FALLTHROUGH;
750 case 10: PROCESS4;
751 FALLTHROUGH;
752 case 6: PROCESS4;
753 PROCESS1;
754 PROCESS1;
755 return XXH32_avalanche(h32);
756
757 case 15: PROCESS4;
758 FALLTHROUGH;
759 case 11: PROCESS4;
760 FALLTHROUGH;
761 case 7: PROCESS4;
762 FALLTHROUGH;
763 case 3: PROCESS1;
764 FALLTHROUGH;
765 case 2: PROCESS1;
766 FALLTHROUGH;
767 case 1: PROCESS1;
768 FALLTHROUGH;
769 case 0: return XXH32_avalanche(h32);
770 }
771 XXH_ASSERT(0);
772 return h32; /* reaching this point is deemed impossible */
773 }
774 }
775
776 XXH_FORCE_INLINE xxh_u32
XXH32_endian_align(const xxh_u8 * input,size_t len,xxh_u32 seed,XXH_alignment align)777 XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
778 {
779 const xxh_u8* bEnd = input + len;
780 xxh_u32 h32;
781
782 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
783 if (input==NULL) {
784 len=0;
785 bEnd=input=(const xxh_u8*)(size_t)16;
786 }
787 #endif
788
789 if (len>=16) {
790 const xxh_u8* const limit = bEnd - 15;
791 xxh_u32 v1 = seed + PRIME32_1 + PRIME32_2;
792 xxh_u32 v2 = seed + PRIME32_2;
793 xxh_u32 v3 = seed + 0;
794 xxh_u32 v4 = seed - PRIME32_1;
795
796 do {
797 v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
798 v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
799 v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
800 v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
801 } while (input < limit);
802
803 h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7)
804 + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
805 } else {
806 h32 = seed + PRIME32_5;
807 }
808
809 h32 += (xxh_u32)len;
810
811 return XXH32_finalize(h32, input, len&15, align);
812 }
813
814
XXH32(const void * input,size_t len,XXH32_hash_t seed)815 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
816 {
817 #if 0
818 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
819 XXH32_state_t state;
820 XXH32_reset(&state, seed);
821 XXH32_update(&state, (const xxh_u8*)input, len);
822 return XXH32_digest(&state);
823
824 #else
825
826 if (XXH_FORCE_ALIGN_CHECK) {
827 if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
828 return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
829 } }
830
831 return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
832 #endif
833 }
834
835
836
837 /******* Hash streaming *******/
838
XXH32_createState(void)839 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
840 {
841 return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
842 }
XXH32_freeState(XXH32_state_t * statePtr)843 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
844 {
845 XXH_free(statePtr);
846 return XXH_OK;
847 }
848
XXH32_copyState(XXH32_state_t * dstState,const XXH32_state_t * srcState)849 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
850 {
851 memcpy(dstState, srcState, sizeof(*dstState));
852 }
853
XXH32_reset(XXH32_state_t * statePtr,XXH32_hash_t seed)854 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
855 {
856 XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
857 memset(&state, 0, sizeof(state));
858 state.v1 = seed + PRIME32_1 + PRIME32_2;
859 state.v2 = seed + PRIME32_2;
860 state.v3 = seed + 0;
861 state.v4 = seed - PRIME32_1;
862 /* do not write into reserved, planned to be removed in a future version */
863 memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
864 return XXH_OK;
865 }
866
867
868 XXH_PUBLIC_API XXH_errorcode
XXH32_update(XXH32_state_t * state,const void * input,size_t len)869 XXH32_update(XXH32_state_t* state, const void* input, size_t len)
870 {
871 if (input==NULL)
872 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
873 return XXH_OK;
874 #else
875 return XXH_ERROR;
876 #endif
877
878 { const xxh_u8* p = (const xxh_u8*)input;
879 const xxh_u8* const bEnd = p + len;
880
881 state->total_len_32 += (XXH32_hash_t)len;
882 state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
883
884 if (state->memsize + len < 16) { /* fill in tmp buffer */
885 XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
886 state->memsize += (XXH32_hash_t)len;
887 return XXH_OK;
888 }
889
890 if (state->memsize) { /* some data left from previous update */
891 XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
892 { const xxh_u32* p32 = state->mem32;
893 state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++;
894 state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++;
895 state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++;
896 state->v4 = XXH32_round(state->v4, XXH_readLE32(p32));
897 }
898 p += 16-state->memsize;
899 state->memsize = 0;
900 }
901
902 if (p <= bEnd-16) {
903 const xxh_u8* const limit = bEnd - 16;
904 xxh_u32 v1 = state->v1;
905 xxh_u32 v2 = state->v2;
906 xxh_u32 v3 = state->v3;
907 xxh_u32 v4 = state->v4;
908
909 do {
910 v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4;
911 v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4;
912 v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4;
913 v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4;
914 } while (p<=limit);
915
916 state->v1 = v1;
917 state->v2 = v2;
918 state->v3 = v3;
919 state->v4 = v4;
920 }
921
922 if (p < bEnd) {
923 XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
924 state->memsize = (unsigned)(bEnd-p);
925 }
926 }
927
928 return XXH_OK;
929 }
930
931
XXH32_digest(const XXH32_state_t * state)932 XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* state)
933 {
934 xxh_u32 h32;
935
936 if (state->large_len) {
937 h32 = XXH_rotl32(state->v1, 1)
938 + XXH_rotl32(state->v2, 7)
939 + XXH_rotl32(state->v3, 12)
940 + XXH_rotl32(state->v4, 18);
941 } else {
942 h32 = state->v3 /* == seed */ + PRIME32_5;
943 }
944
945 h32 += state->total_len_32;
946
947 return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
948 }
949
950
951 /******* Canonical representation *******/
952
953 /*! Default XXH result types are basic unsigned 32 and 64 bits.
954 * The canonical representation follows human-readable write convention, aka big-endian (large digits first).
955 * These functions allow transformation of hash result into and from its canonical format.
956 * This way, hash values can be written into a file or buffer, remaining comparable across different systems.
957 */
958
XXH32_canonicalFromHash(XXH32_canonical_t * dst,XXH32_hash_t hash)959 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
960 {
961 XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
962 if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
963 memcpy(dst, &hash, sizeof(*dst));
964 }
965
XXH32_hashFromCanonical(const XXH32_canonical_t * src)966 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
967 {
968 return XXH_readBE32(src);
969 }
970
971
972 #ifndef XXH_NO_LONG_LONG
973
974 /* *******************************************************************
975 * 64-bit hash functions
976 *********************************************************************/
977
978 /******* Memory access *******/
979
980 typedef XXH64_hash_t xxh_u64;
981
982
983 /*! XXH_REROLL_XXH64:
984 * Whether to reroll the XXH64_finalize() loop.
985 *
986 * Just like XXH32, we can unroll the XXH64_finalize() loop. This can be a performance gain
987 * on 64-bit hosts, as only one jump is required.
988 *
989 * However, on 32-bit hosts, because arithmetic needs to be done with two 32-bit registers,
990 * and 64-bit arithmetic needs to be simulated, it isn't beneficial to unroll. The code becomes
991 * ridiculously large (the largest function in the binary on i386!), and rerolling it saves
992 * anywhere from 3kB to 20kB. It is also slightly faster because it fits into cache better
993 * and is more likely to be inlined by the compiler.
994 *
995 * If XXH_REROLL is defined, this is ignored and the loop is always rerolled. */
996 #ifndef XXH_REROLL_XXH64
997 # if (defined(__ILP32__) || defined(_ILP32)) /* ILP32 is often defined on 32-bit GCC family */ \
998 || !(defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) /* x86-64 */ \
999 || defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) /* aarch64 */ \
1000 || defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || defined(__powerpc64__) /* ppc64 */ \
1001 || defined(__mips64__) || defined(__mips64)) /* mips64 */ \
1002 || (!defined(SIZE_MAX) || SIZE_MAX < ULLONG_MAX) /* check limits */
1003 # define XXH_REROLL_XXH64 1
1004 # else
1005 # define XXH_REROLL_XXH64 0
1006 # endif
1007 #endif /* !defined(XXH_REROLL_XXH64) */
1008
1009 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
1010
1011 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
XXH_read64(const void * memPtr)1012 static xxh_u64 XXH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; }
1013
1014 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
1015
1016 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
1017 /* currently only defined for gcc and icc */
1018 typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
XXH_read64(const void * ptr)1019 static xxh_u64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
1020
1021 #else
1022
1023 /* portable and safe solution. Generally efficient.
1024 * see : http://stackoverflow.com/a/32095106/646947
1025 */
1026
XXH_read64(const void * memPtr)1027 static xxh_u64 XXH_read64(const void* memPtr)
1028 {
1029 xxh_u64 val;
1030 memcpy(&val, memPtr, sizeof(val));
1031 return val;
1032 }
1033
1034 #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
1035
1036 #if defined(_MSC_VER) /* Visual Studio */
1037 # define XXH_swap64 _byteswap_uint64
1038 #elif XXH_GCC_VERSION >= 403
1039 # define XXH_swap64 __builtin_bswap64
1040 #else
XXH_swap64(xxh_u64 x)1041 static xxh_u64 XXH_swap64 (xxh_u64 x)
1042 {
1043 return ((x << 56) & 0xff00000000000000ULL) |
1044 ((x << 40) & 0x00ff000000000000ULL) |
1045 ((x << 24) & 0x0000ff0000000000ULL) |
1046 ((x << 8) & 0x000000ff00000000ULL) |
1047 ((x >> 8) & 0x00000000ff000000ULL) |
1048 ((x >> 24) & 0x0000000000ff0000ULL) |
1049 ((x >> 40) & 0x000000000000ff00ULL) |
1050 ((x >> 56) & 0x00000000000000ffULL);
1051 }
1052 #endif
1053
XXH_readLE64(const void * ptr)1054 XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
1055 {
1056 return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
1057 }
1058
XXH_readBE64(const void * ptr)1059 static xxh_u64 XXH_readBE64(const void* ptr)
1060 {
1061 return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
1062 }
1063
1064 XXH_FORCE_INLINE xxh_u64
XXH_readLE64_align(const void * ptr,XXH_alignment align)1065 XXH_readLE64_align(const void* ptr, XXH_alignment align)
1066 {
1067 if (align==XXH_unaligned)
1068 return XXH_readLE64(ptr);
1069 else
1070 return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
1071 }
1072
1073
1074 /******* xxh64 *******/
1075
1076 static const xxh_u64 PRIME64_1 = 0x9E3779B185EBCA87ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
1077 static const xxh_u64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
1078 static const xxh_u64 PRIME64_3 = 0x165667B19E3779F9ULL; /* 0b0001011001010110011001111011000110011110001101110111100111111001 */
1079 static const xxh_u64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL; /* 0b1000010111101011110010100111011111000010101100101010111001100011 */
1080 static const xxh_u64 PRIME64_5 = 0x27D4EB2F165667C5ULL; /* 0b0010011111010100111010110010111100010110010101100110011111000101 */
1081
XXH64_round(xxh_u64 acc,xxh_u64 input)1082 static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
1083 {
1084 acc += input * PRIME64_2;
1085 acc = XXH_rotl64(acc, 31);
1086 acc *= PRIME64_1;
1087 return acc;
1088 }
1089
XXH64_mergeRound(xxh_u64 acc,xxh_u64 val)1090 static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
1091 {
1092 val = XXH64_round(0, val);
1093 acc ^= val;
1094 acc = acc * PRIME64_1 + PRIME64_4;
1095 return acc;
1096 }
1097
XXH64_avalanche(xxh_u64 h64)1098 static xxh_u64 XXH64_avalanche(xxh_u64 h64)
1099 {
1100 h64 ^= h64 >> 33;
1101 h64 *= PRIME64_2;
1102 h64 ^= h64 >> 29;
1103 h64 *= PRIME64_3;
1104 h64 ^= h64 >> 32;
1105 return h64;
1106 }
1107
1108
1109 #define XXH_get64bits(p) XXH_readLE64_align(p, align)
1110
1111 static xxh_u64
XXH64_finalize(xxh_u64 h64,const xxh_u8 * ptr,size_t len,XXH_alignment align)1112 XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
1113 {
1114 #define PROCESS1_64 \
1115 h64 ^= (*ptr++) * PRIME64_5; \
1116 h64 = XXH_rotl64(h64, 11) * PRIME64_1;
1117
1118 #define PROCESS4_64 \
1119 h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * PRIME64_1; \
1120 ptr+=4; \
1121 h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
1122
1123 #define PROCESS8_64 { \
1124 xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \
1125 ptr+=8; \
1126 h64 ^= k1; \
1127 h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \
1128 }
1129
1130 /* Rerolled version for 32-bit targets is faster and much smaller. */
1131 if (XXH_REROLL || XXH_REROLL_XXH64) {
1132 len &= 31;
1133 while (len >= 8) {
1134 PROCESS8_64;
1135 len -= 8;
1136 }
1137 if (len >= 4) {
1138 PROCESS4_64;
1139 len -= 4;
1140 }
1141 while (len > 0) {
1142 PROCESS1_64;
1143 --len;
1144 }
1145 return XXH64_avalanche(h64);
1146 } else {
1147 switch(len & 31) {
1148 case 24: PROCESS8_64;
1149 FALLTHROUGH;
1150 case 16: PROCESS8_64;
1151 FALLTHROUGH;
1152 case 8: PROCESS8_64;
1153 return XXH64_avalanche(h64);
1154
1155 case 28: PROCESS8_64;
1156 FALLTHROUGH;
1157 case 20: PROCESS8_64;
1158 FALLTHROUGH;
1159 case 12: PROCESS8_64;
1160 FALLTHROUGH;
1161 case 4: PROCESS4_64;
1162 return XXH64_avalanche(h64);
1163
1164 case 25: PROCESS8_64;
1165 FALLTHROUGH;
1166 case 17: PROCESS8_64;
1167 FALLTHROUGH;
1168 case 9: PROCESS8_64;
1169 PROCESS1_64;
1170 return XXH64_avalanche(h64);
1171
1172 case 29: PROCESS8_64;
1173 FALLTHROUGH;
1174 case 21: PROCESS8_64;
1175 FALLTHROUGH;
1176 case 13: PROCESS8_64;
1177 FALLTHROUGH;
1178 case 5: PROCESS4_64;
1179 PROCESS1_64;
1180 return XXH64_avalanche(h64);
1181
1182 case 26: PROCESS8_64;
1183 FALLTHROUGH;
1184 case 18: PROCESS8_64;
1185 FALLTHROUGH;
1186 case 10: PROCESS8_64;
1187 PROCESS1_64;
1188 PROCESS1_64;
1189 return XXH64_avalanche(h64);
1190
1191 case 30: PROCESS8_64;
1192 FALLTHROUGH;
1193 case 22: PROCESS8_64;
1194 FALLTHROUGH;
1195 case 14: PROCESS8_64;
1196 FALLTHROUGH;
1197 case 6: PROCESS4_64;
1198 PROCESS1_64;
1199 PROCESS1_64;
1200 return XXH64_avalanche(h64);
1201
1202 case 27: PROCESS8_64;
1203 FALLTHROUGH;
1204 case 19: PROCESS8_64;
1205 FALLTHROUGH;
1206 case 11: PROCESS8_64;
1207 PROCESS1_64;
1208 PROCESS1_64;
1209 PROCESS1_64;
1210 return XXH64_avalanche(h64);
1211
1212 case 31: PROCESS8_64;
1213 FALLTHROUGH;
1214 case 23: PROCESS8_64;
1215 FALLTHROUGH;
1216 case 15: PROCESS8_64;
1217 FALLTHROUGH;
1218 case 7: PROCESS4_64;
1219 FALLTHROUGH;
1220 case 3: PROCESS1_64;
1221 FALLTHROUGH;
1222 case 2: PROCESS1_64;
1223 FALLTHROUGH;
1224 case 1: PROCESS1_64;
1225 FALLTHROUGH;
1226 case 0: return XXH64_avalanche(h64);
1227 }
1228 }
1229 /* impossible to reach */
1230 XXH_ASSERT(0);
1231 return 0; /* unreachable, but some compilers complain without it */
1232 }
1233
1234 XXH_FORCE_INLINE xxh_u64
XXH64_endian_align(const xxh_u8 * input,size_t len,xxh_u64 seed,XXH_alignment align)1235 XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
1236 {
1237 const xxh_u8* bEnd = input + len;
1238 xxh_u64 h64;
1239
1240 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
1241 if (input==NULL) {
1242 len=0;
1243 bEnd=input=(const xxh_u8*)(size_t)32;
1244 }
1245 #endif
1246
1247 if (len>=32) {
1248 const xxh_u8* const limit = bEnd - 32;
1249 xxh_u64 v1 = seed + PRIME64_1 + PRIME64_2;
1250 xxh_u64 v2 = seed + PRIME64_2;
1251 xxh_u64 v3 = seed + 0;
1252 xxh_u64 v4 = seed - PRIME64_1;
1253
1254 do {
1255 v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
1256 v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
1257 v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
1258 v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
1259 } while (input<=limit);
1260
1261 h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
1262 h64 = XXH64_mergeRound(h64, v1);
1263 h64 = XXH64_mergeRound(h64, v2);
1264 h64 = XXH64_mergeRound(h64, v3);
1265 h64 = XXH64_mergeRound(h64, v4);
1266
1267 } else {
1268 h64 = seed + PRIME64_5;
1269 }
1270
1271 h64 += (xxh_u64) len;
1272
1273 return XXH64_finalize(h64, input, len, align);
1274 }
1275
1276
XXH64(const void * input,size_t len,XXH64_hash_t seed)1277 XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
1278 {
1279 #if 0
1280 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
1281 XXH64_state_t state;
1282 XXH64_reset(&state, seed);
1283 XXH64_update(&state, (const xxh_u8*)input, len);
1284 return XXH64_digest(&state);
1285
1286 #else
1287
1288 if (XXH_FORCE_ALIGN_CHECK) {
1289 if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
1290 return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
1291 } }
1292
1293 return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
1294
1295 #endif
1296 }
1297
1298 /******* Hash Streaming *******/
1299
XXH64_createState(void)1300 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
1301 {
1302 return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
1303 }
XXH64_freeState(XXH64_state_t * statePtr)1304 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
1305 {
1306 XXH_free(statePtr);
1307 return XXH_OK;
1308 }
1309
XXH64_copyState(XXH64_state_t * dstState,const XXH64_state_t * srcState)1310 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
1311 {
1312 memcpy(dstState, srcState, sizeof(*dstState));
1313 }
1314
XXH64_reset(XXH64_state_t * statePtr,XXH64_hash_t seed)1315 XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
1316 {
1317 XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
1318 memset(&state, 0, sizeof(state));
1319 state.v1 = seed + PRIME64_1 + PRIME64_2;
1320 state.v2 = seed + PRIME64_2;
1321 state.v3 = seed + 0;
1322 state.v4 = seed - PRIME64_1;
1323 /* do not write into reserved64, might be removed in a future version */
1324 memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
1325 return XXH_OK;
1326 }
1327
1328 XXH_PUBLIC_API XXH_errorcode
XXH64_update(XXH64_state_t * state,const void * input,size_t len)1329 XXH64_update (XXH64_state_t* state, const void* input, size_t len)
1330 {
1331 if (input==NULL)
1332 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
1333 return XXH_OK;
1334 #else
1335 return XXH_ERROR;
1336 #endif
1337
1338 { const xxh_u8* p = (const xxh_u8*)input;
1339 const xxh_u8* const bEnd = p + len;
1340
1341 state->total_len += len;
1342
1343 if (state->memsize + len < 32) { /* fill in tmp buffer */
1344 XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
1345 state->memsize += (xxh_u32)len;
1346 return XXH_OK;
1347 }
1348
1349 if (state->memsize) { /* tmp buffer is full */
1350 XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
1351 state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0));
1352 state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1));
1353 state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2));
1354 state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3));
1355 p += 32-state->memsize;
1356 state->memsize = 0;
1357 }
1358
1359 if (p+32 <= bEnd) {
1360 const xxh_u8* const limit = bEnd - 32;
1361 xxh_u64 v1 = state->v1;
1362 xxh_u64 v2 = state->v2;
1363 xxh_u64 v3 = state->v3;
1364 xxh_u64 v4 = state->v4;
1365
1366 do {
1367 v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8;
1368 v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8;
1369 v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8;
1370 v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8;
1371 } while (p<=limit);
1372
1373 state->v1 = v1;
1374 state->v2 = v2;
1375 state->v3 = v3;
1376 state->v4 = v4;
1377 }
1378
1379 if (p < bEnd) {
1380 XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
1381 state->memsize = (unsigned)(bEnd-p);
1382 }
1383 }
1384
1385 return XXH_OK;
1386 }
1387
1388
XXH64_digest(const XXH64_state_t * state)1389 XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* state)
1390 {
1391 xxh_u64 h64;
1392
1393 if (state->total_len >= 32) {
1394 xxh_u64 const v1 = state->v1;
1395 xxh_u64 const v2 = state->v2;
1396 xxh_u64 const v3 = state->v3;
1397 xxh_u64 const v4 = state->v4;
1398
1399 h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
1400 h64 = XXH64_mergeRound(h64, v1);
1401 h64 = XXH64_mergeRound(h64, v2);
1402 h64 = XXH64_mergeRound(h64, v3);
1403 h64 = XXH64_mergeRound(h64, v4);
1404 } else {
1405 h64 = state->v3 /*seed*/ + PRIME64_5;
1406 }
1407
1408 h64 += (xxh_u64) state->total_len;
1409
1410 return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
1411 }
1412
1413
1414 /******* Canonical representation *******/
1415
XXH64_canonicalFromHash(XXH64_canonical_t * dst,XXH64_hash_t hash)1416 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
1417 {
1418 XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
1419 if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
1420 memcpy(dst, &hash, sizeof(*dst));
1421 }
1422
XXH64_hashFromCanonical(const XXH64_canonical_t * src)1423 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
1424 {
1425 return XXH_readBE64(src);
1426 }
1427
1428
1429
1430 /* *********************************************************************
1431 * XXH3
1432 * New generation hash designed for speed on small keys and vectorization
1433 ************************************************************************ */
1434
1435 /* #include "xxh3.h" */
1436
1437
1438 #endif /* XXH_NO_LONG_LONG */
1439
1440
1441 #endif /* XXH_IMPLEMENTATION */
1442
1443
1444 #if defined (__cplusplus)
1445 }
1446 #endif
1447