1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #ifndef LIBPANDABASE_UTILS_HASH_H
16 #define LIBPANDABASE_UTILS_HASH_H
17
18 #include "murmur3_hash.h"
19
20 namespace ark {
21
22 // Now, murmur3 hash is used by default
23 template <uint32_t SEED_VALUE>
24 using DefaultHash = MurmurHash32<SEED_VALUE>;
25
26 // Default seed which is used in hash functions.
27 // NOTE: To create different seed for your purposes,
28 // one must define it here and create new alias hash class
29 static constexpr uint32_t DEFAULT_SEED = 0x12345678U;
30
31 // Hash class alias with the default seed inside.
32 using Hash = DefaultHash<DEFAULT_SEED>;
33
34 /**
35 * @brief Create 32 bits Hash from @param key via @param seed.
36 * @param key - a key which should be hashed
37 * @param len - length of the key in bytes
38 * @param seed - seed which is used to calculate hash
39 * @return 32 bits hash
40 */
GetHash32WithSeed(const uint8_t *key, size_t len, uint32_t seed)41 inline uint32_t GetHash32WithSeed(const uint8_t *key, size_t len, uint32_t seed)
42 {
43 return Hash::GetHash32WithSeed(key, len, seed);
44 }
45
46 /**
47 * @brief Create 32 bits Hash from @param key.
48 * @param key - a key which should be hashed
49 * @param len - length of the key in bytes
50 * @return 32 bits hash
51 */
GetHash32(const uint8_t *key, size_t len)52 inline uint32_t GetHash32(const uint8_t *key, size_t len)
53 {
54 return Hash::GetHash32(key, len);
55 }
56
57 /**
58 * @brief Create 32 bits Hash from MUTF8 @param string.
59 * @param string - a pointer to the MUTF8 string
60 * @return 32 bits hash
61 */
GetHash32String(const uint8_t *mutf8String)62 inline uint32_t GetHash32String(const uint8_t *mutf8String)
63 {
64 return Hash::GetHash32String(mutf8String);
65 }
66
67 /**
68 * @brief Create 32 bits Hash from MUTF8 @param string.
69 * @param string - a pointer to the MUTF8 string
70 * @param seed - seed which is used to calculate hash
71 * @return 32 bits hash
72 */
GetHash32StringWithSeed(const uint8_t *mutf8String, uint32_t seed)73 inline uint32_t GetHash32StringWithSeed(const uint8_t *mutf8String, uint32_t seed)
74 {
75 return Hash::GetHash32StringWithSeed(mutf8String, seed);
76 }
77
78 constexpr uint32_t FNV_INITIAL_SEED = 0x811c9dc5;
79
80 /// Works like FNV hash but operates over 4-byte words at a time instead of single bytes
81 template <typename Item>
PseudoFnvHashItem(Item item, uint32_t seed = FNV_INITIAL_SEED)82 uint32_t PseudoFnvHashItem(Item item, uint32_t seed = FNV_INITIAL_SEED)
83 {
84 // NOLINTNEXTLINE(readability-braces-around-statements)
85 if constexpr (sizeof(Item) <= 4) {
86 constexpr uint32_t PRIME = 16777619U;
87 return (seed ^ static_cast<uint32_t>(item)) * PRIME;
88 } else if constexpr (sizeof(Item) == 8) { // NOLINT(readability-misleading-indentation)
89 auto item1 = static_cast<uint64_t>(item);
90 uint32_t hash = PseudoFnvHashItem(static_cast<uint32_t>(item1), seed);
91 constexpr uint32_t FOUR_BYTES = 32U;
92 return PseudoFnvHashItem(static_cast<uint32_t>(item1 >> FOUR_BYTES), hash);
93 } else {
94 static_assert(sizeof(Item *) == 0, "PseudoFnvHashItem can only be called on types of size 1, 2, 4 or 8");
95 }
96 }
97
98 /// Works like FNV hash but operates over 4-byte words at a time instead of single bytes
99 inline uint32_t PseudoFnvHashString(const uint8_t *str, uint32_t hash = FNV_INITIAL_SEED)
100 {
101 while (true) {
102 // NOLINTNEXTLINE(readability-implicit-bool-conversion, cppcoreguidelines-pro-bounds-pointer-arithmetic)
103 if (!str[0] || !str[1] || !str[2] || !str[3]) {
104 break;
105 }
106 constexpr uint32_t BYTE = 8U;
107 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
108 uint32_t word32 = str[0];
109 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
110 word32 |= static_cast<uint32_t>(str[1]) << BYTE;
111 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
112 word32 |= static_cast<uint32_t>(str[2U]) << (BYTE * 2U);
113 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
114 word32 |= static_cast<uint32_t>(str[3U]) << (BYTE * 3U);
115 hash = PseudoFnvHashItem(word32, hash);
116 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
117 str += sizeof(uint32_t);
118 }
119 while (*str != 0) {
120 hash = PseudoFnvHashItem(*str, hash);
121 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
122 ++str;
123 }
124 return hash;
125 }
126
127 // NOTE(romanov,dyadov) Either rename to PseudoFnvHash or implement and call original FNV
128 template <typename Container>
129 uint32_t FnvHash(const Container &data, uint32_t hash = FNV_INITIAL_SEED)
130 {
131 for (const auto value : data) {
132 hash = PseudoFnvHashItem(value, hash);
133 }
134 return hash;
135 }
136
137 /// Combine lhash and rhash
138 inline size_t MergeHashes(size_t lhash, size_t rhash)
139 {
140 constexpr const size_t MAGIC_CONSTANT = 0x9e3779b9;
141 size_t shl = lhash << 6U;
142 size_t shr = lhash >> 2U;
143 return lhash ^ (rhash + MAGIC_CONSTANT + shl + shr);
144 }
145
146 /// Combine lhash and rhash
147 // this overload is only enabled when it can't conflict with the size_t one
148 template <typename T = uint32_t, typename std::enable_if_t<(sizeof(size_t) != sizeof(T)), int> = 0>
149 inline uint32_t MergeHashes(uint32_t lhash, uint32_t rhash)
150 {
151 // note that the numbers are for 32 bits specifically, see https://github.com/HowardHinnant/hash_append/issues/7
152 constexpr const uint32_t MAGIC_CONSTANT = 0x9e3779b9;
153 uint32_t shl = lhash << 6U;
154 uint32_t shr = lhash >> 2U;
155 return lhash ^ (rhash + MAGIC_CONSTANT + shl + shr);
156 }
157
158 struct PairHash {
159 public:
160 template <typename T, typename U>
161 std::size_t operator()(const std::pair<T, U> &x) const
162 {
163 return MergeHashes(std::hash<T>()(x.first), std::hash<U>()(x.second));
164 }
165 };
166 } // namespace ark
167
168 #endif // LIBPANDABASE_UTILS_HASH_H
169