1/**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15// This is the murmur3 32 bit hash implementation
16// The main structure and constants were taken from Austin Appleby.
17// From his gitlab (https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp):
18// - MurmurHash3 was written by Austin Appleby, and is placed in the public
19// - domain. The author hereby disclaims copyright to this source code.
20
21#ifndef LIBPANDABASE_UTILS_MURMUR3_HASH_H
22#define LIBPANDABASE_UTILS_MURMUR3_HASH_H
23
24#include "hash_base.h"
25#include "logger.h"
26
27namespace panda {
28
29struct MurmurHash32Const {
30    // Here are the main constants for hash counting:
31    // Many of them looks like some kind of magic
32    static constexpr uint32_t C1 = 0xCC9E2D51U;
33    static constexpr uint32_t C2 = 0x1B873593U;
34    static constexpr uint32_t MAX_BITS = 32;
35    static constexpr uint32_t FINALIZE_FIRST_SHIFT = 16;
36    static constexpr uint32_t FINALIZE_SECOND_SHIFT = 13;
37    static constexpr uint32_t FINALIZE_THIRD_SHIFT = 16;
38    static constexpr uint32_t FINALIZE_FIRST_MULTIPLICATOR = 0x85EBCA6BU;
39    static constexpr uint32_t FINALIZE_SECOND_MULTIPLICATOR = 0xC2BAE35U;
40    static constexpr uint32_t MAIN_FIRST_SHIFT = 15;
41    static constexpr uint32_t MAIN_SECOND_SHIFT = 13;
42    static constexpr uint32_t MAIN_CONSTANT = 0xE6546B64U;
43    static constexpr uint32_t MAIN_MULTIPLICATOR = 5;
44    static constexpr uint32_t TAIL_SHIFT = 8;
45    static constexpr uint32_t TAIL_LAST_SHIFT = 15;
46    static constexpr uint32_t BLOCK_SIZE = 4;
47};
48
49// In general murmur hash looks like that:
50// key = |....|....|....|.|.|.|
51//         32   32   32  8 8 8
52// Firstly, we proceed each 32 bits block from key;
53// Secondly, we proceed last 8 bits block which were not covered in previous step.
54template <uint32_t seed_value>
55class MurmurHash32 final : public HashBase<MurmurHash32<seed_value>>, MurmurHash32Const {
56public:
57    static uint32_t GetHash32WithSeedImpl(const uint8_t *key, size_t len, uint32_t seed)
58    {
59        return MurmurHash3(key, len, seed);
60    }
61    static uint32_t GetHash32Impl(const uint8_t *key, size_t len)
62    {
63        return GetHash32WithSeedImpl(key, len, seed_value);
64    }
65    static uint32_t GetHash32StringImpl(const uint8_t *mutf8_string)
66    {
67        return MurmurHash3String(mutf8_string, seed_value);
68    }
69    static uint32_t GetHash32StringWithSeedImpl(const uint8_t *mutf8_string, uint32_t seed)
70    {
71        return MurmurHash3String(mutf8_string, seed);
72    }
73
74private:
75    static uint32_t Rotl(uint32_t word, uint8_t shift)
76    {
77        return (word << shift) | (word >> (MAX_BITS - shift));
78    }
79
80    // Finilize the result of the hash function
81    static uint32_t Finalize(uint32_t h)
82    {
83        h ^= h >> FINALIZE_FIRST_SHIFT;
84        h *= FINALIZE_FIRST_MULTIPLICATOR;
85        h ^= h >> FINALIZE_SECOND_SHIFT;
86        h *= FINALIZE_SECOND_MULTIPLICATOR;
87        h ^= h >> FINALIZE_THIRD_SHIFT;
88        return h;
89    }
90
91    static uint32_t MurmurHash3(const uint8_t *key, size_t len, uint32_t seed)
92    {
93        // We start hashing from the seed
94        uint32_t hash = seed;
95
96        // Do the main part:
97        // Iterate for each 32bits
98        auto blocks = reinterpret_cast<uintptr_t>(key);
99        std::array<uint8_t, BLOCK_SIZE> memblock = {'\0', '\0', '\0', '\0'};
100        for (size_t i = len / BLOCK_SIZE; i != 0; i--) {
101            for (auto &j : memblock) {
102                j = *reinterpret_cast<uint8_t *>(blocks);
103                blocks += sizeof(uint8_t);
104            }
105            // Do this because we don't want to dispatch Big/Little endianness.
106            uint32_t k1 = *reinterpret_cast<uint32_t *>(memblock.data());
107
108            k1 *= C1;
109            k1 = Rotl(k1, MAIN_FIRST_SHIFT);
110            k1 *= C2;
111
112            hash ^= k1;
113            hash = Rotl(hash, MAIN_SECOND_SHIFT);
114            hash = hash * MAIN_MULTIPLICATOR + MAIN_CONSTANT;
115        }
116
117        // Proceed the tail:
118        // blocks is a pointer to the end of 32bits section
119        auto tail = blocks;
120        uint32_t k1 = 0;
121        for (size_t i = len & 3U; i > 0; i--) {
122            // Get ((uint8_t*)tail)[i - 1]:
123            uintptr_t block_pointer = tail + sizeof(uint8_t) * (i - 1);
124            uint8_t block = *reinterpret_cast<uint8_t *>(block_pointer);
125            uint32_t temp = static_cast<uint32_t>(block) << (TAIL_SHIFT * (i - 1U));
126            k1 ^= temp;
127            if (i == 1) {
128                k1 = Rotl(k1, TAIL_LAST_SHIFT);
129                k1 *= C2;
130                hash ^= k1;
131            }
132        }
133
134        // Finalize the result
135        hash ^= len;
136        hash = Finalize(hash);
137
138        return hash;
139    }
140
141    static uint32_t MurmurHash3String(const uint8_t *mutf8_string, uint32_t seed)
142    {
143        // We start hashing from the seed
144        uint32_t hash = seed;
145        // We should still compute length of the string, we will need it later
146        size_t mutf8_length = 0;
147        // Do the main part:
148        // Iterate for each 32bits
149        auto blocks = reinterpret_cast<uintptr_t>(mutf8_string);
150        std::array<uint8_t, BLOCK_SIZE> memblock = {'\0', '\0', '\0', '\0'};
151        size_t tail_len = 0;
152        while (true) {
153            tail_len = 0;
154            for (unsigned char &i : memblock) {
155                i = *reinterpret_cast<uint8_t *>(blocks);
156                blocks += sizeof(uint8_t);
157                if (i == '\0') {
158                    break;
159                }
160                tail_len++;
161            }
162            if (tail_len != BLOCK_SIZE) {
163                // We couldn't read four 8bytes value
164                break;
165            }
166            // Do this because we don't want to dispatch Big/Little endianness.
167            uint32_t k1 = *reinterpret_cast<uint32_t *>(memblock.data());
168
169            k1 *= C1;
170            k1 = Rotl(k1, MAIN_FIRST_SHIFT);
171            k1 *= C2;
172
173            hash ^= k1;
174            hash = Rotl(hash, MAIN_SECOND_SHIFT);
175            hash = hash * MAIN_MULTIPLICATOR + MAIN_CONSTANT;
176            mutf8_length += BLOCK_SIZE;
177        }
178
179        // Proceed the tail:
180
181        mutf8_length += tail_len;
182        uint32_t k1 = 0;
183        for (size_t i = tail_len; i > 0; i--) {
184            uint8_t block = memblock[i - 1U];
185            uint32_t temp = static_cast<uint32_t>(block) << (TAIL_SHIFT * (i - 1U));
186            k1 ^= temp;
187            if (i == 1) {
188                k1 = Rotl(k1, TAIL_LAST_SHIFT);
189                k1 *= C2;
190                hash ^= k1;
191            }
192        }
193
194        // Finalize the result
195        hash ^= mutf8_length;
196        hash = Finalize(hash);
197
198        return hash;
199    }
200};
201
202}  // namespace panda
203
204#endif  // LIBPANDABASE_UTILS_MURMUR3_HASH_H
205