1/* NOLINT(build/header_guard) */ 2/* Copyright 2018 Google Inc. All Rights Reserved. 3 4 Distributed under MIT license. 5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 6*/ 7 8/* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */ 9/* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */ 10/* JUMP = skip bytes for speedup */ 11 12/* Rolling hash for long distance long string matches. Stores one position 13 per bucket, bucket key is computed over a long region. */ 14 15#define HashRolling HASHER() 16 17static const uint32_t FN(kRollingHashMul32) = 69069; 18static const uint32_t FN(kInvalidPos) = 0xffffffff; 19 20/* This hasher uses a longer forward length, but returning a higher value here 21 will hurt compression by the main hasher when combined with a composite 22 hasher. The hasher tests for forward itself instead. */ 23static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; } 24static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; } 25 26/* Computes a code from a single byte. A lookup table of 256 values could be 27 used, but simply adding 1 works about as good. */ 28static uint32_t FN(HashByte)(uint8_t byte) { 29 return (uint32_t)byte + 1u; 30} 31 32static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add, 33 uint32_t factor) { 34 return (uint32_t)(factor * state + FN(HashByte)(add)); 35} 36 37static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add, 38 uint8_t rem, uint32_t factor, 39 uint32_t factor_remove) { 40 return (uint32_t)(factor * state + 41 FN(HashByte)(add) - factor_remove * FN(HashByte)(rem)); 42} 43 44typedef struct HashRolling { 45 uint32_t state; 46 uint32_t* table; 47 size_t next_ix; 48 49 uint32_t chunk_len; 50 uint32_t factor; 51 uint32_t factor_remove; 52} HashRolling; 53 54static void FN(Initialize)( 55 HasherCommon* common, HashRolling* BROTLI_RESTRICT self, 56 const BrotliEncoderParams* params) { 57 size_t i; 58 self->state = 0; 59 self->next_ix = 0; 60 61 self->factor = FN(kRollingHashMul32); 62 63 /* Compute the factor of the oldest byte to remove: factor**steps modulo 64 0xffffffff (the multiplications rely on 32-bit overflow) */ 65 self->factor_remove = 1; 66 for (i = 0; i < CHUNKLEN; i += JUMP) { 67 self->factor_remove *= self->factor; 68 } 69 70 self->table = (uint32_t*)common->extra; 71 for (i = 0; i < NUMBUCKETS; i++) { 72 self->table[i] = FN(kInvalidPos); 73 } 74 75 BROTLI_UNUSED(params); 76} 77 78static void FN(Prepare)(HashRolling* BROTLI_RESTRICT self, BROTLI_BOOL one_shot, 79 size_t input_size, const uint8_t* BROTLI_RESTRICT data) { 80 size_t i; 81 /* Too small size, cannot use this hasher. */ 82 if (input_size < CHUNKLEN) return; 83 self->state = 0; 84 for (i = 0; i < CHUNKLEN; i += JUMP) { 85 self->state = FN(HashRollingFunctionInitial)( 86 self->state, data[i], self->factor); 87 } 88 BROTLI_UNUSED(one_shot); 89} 90 91static BROTLI_INLINE size_t FN(HashMemAllocInBytes)( 92 const BrotliEncoderParams* params, BROTLI_BOOL one_shot, 93 size_t input_size) { 94 return NUMBUCKETS * sizeof(uint32_t); 95 BROTLI_UNUSED(params); 96 BROTLI_UNUSED(one_shot); 97 BROTLI_UNUSED(input_size); 98} 99 100static BROTLI_INLINE void FN(Store)(HashRolling* BROTLI_RESTRICT self, 101 const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) { 102 BROTLI_UNUSED(self); 103 BROTLI_UNUSED(data); 104 BROTLI_UNUSED(mask); 105 BROTLI_UNUSED(ix); 106} 107 108static BROTLI_INLINE void FN(StoreRange)(HashRolling* BROTLI_RESTRICT self, 109 const uint8_t* BROTLI_RESTRICT data, const size_t mask, 110 const size_t ix_start, const size_t ix_end) { 111 BROTLI_UNUSED(self); 112 BROTLI_UNUSED(data); 113 BROTLI_UNUSED(mask); 114 BROTLI_UNUSED(ix_start); 115 BROTLI_UNUSED(ix_end); 116} 117 118static BROTLI_INLINE void FN(StitchToPreviousBlock)( 119 HashRolling* BROTLI_RESTRICT self, 120 size_t num_bytes, size_t position, const uint8_t* ringbuffer, 121 size_t ring_buffer_mask) { 122 /* In this case we must re-initialize the hasher from scratch from the 123 current position. */ 124 size_t position_masked; 125 size_t available = num_bytes; 126 if ((position & (JUMP - 1)) != 0) { 127 size_t diff = JUMP - (position & (JUMP - 1)); 128 available = (diff > available) ? 0 : (available - diff); 129 position += diff; 130 } 131 position_masked = position & ring_buffer_mask; 132 /* wrapping around ringbuffer not handled. */ 133 if (available > ring_buffer_mask - position_masked) { 134 available = ring_buffer_mask - position_masked; 135 } 136 137 FN(Prepare)(self, BROTLI_FALSE, available, 138 ringbuffer + (position & ring_buffer_mask)); 139 self->next_ix = position; 140 BROTLI_UNUSED(num_bytes); 141} 142 143static BROTLI_INLINE void FN(PrepareDistanceCache)( 144 HashRolling* BROTLI_RESTRICT self, 145 int* BROTLI_RESTRICT distance_cache) { 146 BROTLI_UNUSED(self); 147 BROTLI_UNUSED(distance_cache); 148} 149 150static BROTLI_INLINE void FN(FindLongestMatch)( 151 HashRolling* BROTLI_RESTRICT self, 152 const BrotliEncoderDictionary* dictionary, 153 const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, 154 const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, 155 const size_t max_length, const size_t max_backward, 156 const size_t dictionary_distance, const size_t max_distance, 157 HasherSearchResult* BROTLI_RESTRICT out) { 158 const size_t cur_ix_masked = cur_ix & ring_buffer_mask; 159 size_t pos; 160 161 if ((cur_ix & (JUMP - 1)) != 0) return; 162 163 /* Not enough lookahead */ 164 if (max_length < CHUNKLEN) return; 165 166 for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) { 167 uint32_t code = self->state & MASK; 168 169 uint8_t rem = data[pos & ring_buffer_mask]; 170 uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask]; 171 size_t found_ix = FN(kInvalidPos); 172 173 self->state = FN(HashRollingFunction)( 174 self->state, add, rem, self->factor, self->factor_remove); 175 176 if (code < NUMBUCKETS) { 177 found_ix = self->table[code]; 178 self->table[code] = (uint32_t)pos; 179 if (pos == cur_ix && found_ix != FN(kInvalidPos)) { 180 /* The cast to 32-bit makes backward distances up to 4GB work even 181 if cur_ix is above 4GB, despite using 32-bit values in the table. */ 182 size_t backward = (uint32_t)(cur_ix - found_ix); 183 if (backward <= max_backward) { 184 const size_t found_ix_masked = found_ix & ring_buffer_mask; 185 const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked], 186 &data[cur_ix_masked], 187 max_length); 188 if (len >= 4 && len > out->len) { 189 score_t score = BackwardReferenceScore(len, backward); 190 if (score > out->score) { 191 out->len = len; 192 out->distance = backward; 193 out->score = score; 194 out->len_code_delta = 0; 195 } 196 } 197 } 198 } 199 } 200 } 201 202 self->next_ix = cur_ix + JUMP; 203 204 /* NOTE: this hasher does not search in the dictionary. It is used as 205 backup-hasher, the main hasher already searches in it. */ 206 BROTLI_UNUSED(dictionary); 207 BROTLI_UNUSED(distance_cache); 208 BROTLI_UNUSED(dictionary_distance); 209 BROTLI_UNUSED(max_distance); 210} 211 212#undef HashRolling 213