1/** 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15// This is the murmur3 32 bit hash implementation 16// The main structure and constants were taken from Austin Appleby. 17// From his gitlab (https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp): 18// - MurmurHash3 was written by Austin Appleby, and is placed in the public 19// - domain. The author hereby disclaims copyright to this source code. 20 21#ifndef LIBPANDABASE_UTILS_MURMUR3_HASH_H 22#define LIBPANDABASE_UTILS_MURMUR3_HASH_H 23 24#include "hash_base.h" 25#include "logger.h" 26 27namespace panda { 28 29struct MurmurHash32Const { 30 // Here are the main constants for hash counting: 31 // Many of them looks like some kind of magic 32 static constexpr uint32_t C1 = 0xCC9E2D51U; 33 static constexpr uint32_t C2 = 0x1B873593U; 34 static constexpr uint32_t MAX_BITS = 32; 35 static constexpr uint32_t FINALIZE_FIRST_SHIFT = 16; 36 static constexpr uint32_t FINALIZE_SECOND_SHIFT = 13; 37 static constexpr uint32_t FINALIZE_THIRD_SHIFT = 16; 38 static constexpr uint32_t FINALIZE_FIRST_MULTIPLICATOR = 0x85EBCA6BU; 39 static constexpr uint32_t FINALIZE_SECOND_MULTIPLICATOR = 0xC2BAE35U; 40 static constexpr uint32_t MAIN_FIRST_SHIFT = 15; 41 static constexpr uint32_t MAIN_SECOND_SHIFT = 13; 42 static constexpr uint32_t MAIN_CONSTANT = 0xE6546B64U; 43 static constexpr uint32_t MAIN_MULTIPLICATOR = 5; 44 static constexpr uint32_t TAIL_SHIFT = 8; 45 static constexpr uint32_t TAIL_LAST_SHIFT = 15; 46 static constexpr uint32_t BLOCK_SIZE = 4; 47}; 48 49// In general murmur hash looks like that: 50// key = |....|....|....|.|.|.| 51// 32 32 32 8 8 8 52// Firstly, we proceed each 32 bits block from key; 53// Secondly, we proceed last 8 bits block which were not covered in previous step. 54template <uint32_t seed_value> 55class MurmurHash32 final : public HashBase<MurmurHash32<seed_value>>, MurmurHash32Const { 56public: 57 static uint32_t GetHash32WithSeedImpl(const uint8_t *key, size_t len, uint32_t seed) 58 { 59 return MurmurHash3(key, len, seed); 60 } 61 static uint32_t GetHash32Impl(const uint8_t *key, size_t len) 62 { 63 return GetHash32WithSeedImpl(key, len, seed_value); 64 } 65 static uint32_t GetHash32StringImpl(const uint8_t *mutf8_string) 66 { 67 return MurmurHash3String(mutf8_string, seed_value); 68 } 69 static uint32_t GetHash32StringWithSeedImpl(const uint8_t *mutf8_string, uint32_t seed) 70 { 71 return MurmurHash3String(mutf8_string, seed); 72 } 73 74private: 75 static uint32_t Rotl(uint32_t word, uint8_t shift) 76 { 77 return (word << shift) | (word >> (MAX_BITS - shift)); 78 } 79 80 // Finilize the result of the hash function 81 static uint32_t Finalize(uint32_t h) 82 { 83 h ^= h >> FINALIZE_FIRST_SHIFT; 84 h *= FINALIZE_FIRST_MULTIPLICATOR; 85 h ^= h >> FINALIZE_SECOND_SHIFT; 86 h *= FINALIZE_SECOND_MULTIPLICATOR; 87 h ^= h >> FINALIZE_THIRD_SHIFT; 88 return h; 89 } 90 91 static uint32_t MurmurHash3(const uint8_t *key, size_t len, uint32_t seed) 92 { 93 // We start hashing from the seed 94 uint32_t hash = seed; 95 96 // Do the main part: 97 // Iterate for each 32bits 98 auto blocks = reinterpret_cast<uintptr_t>(key); 99 std::array<uint8_t, BLOCK_SIZE> memblock = {'\0', '\0', '\0', '\0'}; 100 for (size_t i = len / BLOCK_SIZE; i != 0; i--) { 101 for (auto &j : memblock) { 102 j = *reinterpret_cast<uint8_t *>(blocks); 103 blocks += sizeof(uint8_t); 104 } 105 // Do this because we don't want to dispatch Big/Little endianness. 106 uint32_t k1 = *reinterpret_cast<uint32_t *>(memblock.data()); 107 108 k1 *= C1; 109 k1 = Rotl(k1, MAIN_FIRST_SHIFT); 110 k1 *= C2; 111 112 hash ^= k1; 113 hash = Rotl(hash, MAIN_SECOND_SHIFT); 114 hash = hash * MAIN_MULTIPLICATOR + MAIN_CONSTANT; 115 } 116 117 // Proceed the tail: 118 // blocks is a pointer to the end of 32bits section 119 auto tail = blocks; 120 uint32_t k1 = 0; 121 for (size_t i = len & 3U; i > 0; i--) { 122 // Get ((uint8_t*)tail)[i - 1]: 123 uintptr_t block_pointer = tail + sizeof(uint8_t) * (i - 1); 124 uint8_t block = *reinterpret_cast<uint8_t *>(block_pointer); 125 uint32_t temp = static_cast<uint32_t>(block) << (TAIL_SHIFT * (i - 1U)); 126 k1 ^= temp; 127 if (i == 1) { 128 k1 = Rotl(k1, TAIL_LAST_SHIFT); 129 k1 *= C2; 130 hash ^= k1; 131 } 132 } 133 134 // Finalize the result 135 hash ^= len; 136 hash = Finalize(hash); 137 138 return hash; 139 } 140 141 static uint32_t MurmurHash3String(const uint8_t *mutf8_string, uint32_t seed) 142 { 143 // We start hashing from the seed 144 uint32_t hash = seed; 145 // We should still compute length of the string, we will need it later 146 size_t mutf8_length = 0; 147 // Do the main part: 148 // Iterate for each 32bits 149 auto blocks = reinterpret_cast<uintptr_t>(mutf8_string); 150 std::array<uint8_t, BLOCK_SIZE> memblock = {'\0', '\0', '\0', '\0'}; 151 size_t tail_len = 0; 152 while (true) { 153 tail_len = 0; 154 for (unsigned char &i : memblock) { 155 i = *reinterpret_cast<uint8_t *>(blocks); 156 blocks += sizeof(uint8_t); 157 if (i == '\0') { 158 break; 159 } 160 tail_len++; 161 } 162 if (tail_len != BLOCK_SIZE) { 163 // We couldn't read four 8bytes value 164 break; 165 } 166 // Do this because we don't want to dispatch Big/Little endianness. 167 uint32_t k1 = *reinterpret_cast<uint32_t *>(memblock.data()); 168 169 k1 *= C1; 170 k1 = Rotl(k1, MAIN_FIRST_SHIFT); 171 k1 *= C2; 172 173 hash ^= k1; 174 hash = Rotl(hash, MAIN_SECOND_SHIFT); 175 hash = hash * MAIN_MULTIPLICATOR + MAIN_CONSTANT; 176 mutf8_length += BLOCK_SIZE; 177 } 178 179 // Proceed the tail: 180 181 mutf8_length += tail_len; 182 uint32_t k1 = 0; 183 for (size_t i = tail_len; i > 0; i--) { 184 uint8_t block = memblock[i - 1U]; 185 uint32_t temp = static_cast<uint32_t>(block) << (TAIL_SHIFT * (i - 1U)); 186 k1 ^= temp; 187 if (i == 1) { 188 k1 = Rotl(k1, TAIL_LAST_SHIFT); 189 k1 *= C2; 190 hash ^= k1; 191 } 192 } 193 194 // Finalize the result 195 hash ^= mutf8_length; 196 hash = Finalize(hash); 197 198 return hash; 199 } 200}; 201 202} // namespace panda 203 204#endif // LIBPANDABASE_UTILS_MURMUR3_HASH_H 205