11cb0ef41Sopenharmony_ci/* NOLINT(build/header_guard) */ 21cb0ef41Sopenharmony_ci/* Copyright 2018 Google Inc. All Rights Reserved. 31cb0ef41Sopenharmony_ci 41cb0ef41Sopenharmony_ci Distributed under MIT license. 51cb0ef41Sopenharmony_ci See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 61cb0ef41Sopenharmony_ci*/ 71cb0ef41Sopenharmony_ci 81cb0ef41Sopenharmony_ci/* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */ 91cb0ef41Sopenharmony_ci/* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */ 101cb0ef41Sopenharmony_ci/* JUMP = skip bytes for speedup */ 111cb0ef41Sopenharmony_ci 121cb0ef41Sopenharmony_ci/* Rolling hash for long distance long string matches. Stores one position 131cb0ef41Sopenharmony_ci per bucket, bucket key is computed over a long region. */ 141cb0ef41Sopenharmony_ci 151cb0ef41Sopenharmony_ci#define HashRolling HASHER() 161cb0ef41Sopenharmony_ci 171cb0ef41Sopenharmony_cistatic const uint32_t FN(kRollingHashMul32) = 69069; 181cb0ef41Sopenharmony_cistatic const uint32_t FN(kInvalidPos) = 0xffffffff; 191cb0ef41Sopenharmony_ci 201cb0ef41Sopenharmony_ci/* This hasher uses a longer forward length, but returning a higher value here 211cb0ef41Sopenharmony_ci will hurt compression by the main hasher when combined with a composite 221cb0ef41Sopenharmony_ci hasher. The hasher tests for forward itself instead. */ 231cb0ef41Sopenharmony_cistatic BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; } 241cb0ef41Sopenharmony_cistatic BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; } 251cb0ef41Sopenharmony_ci 261cb0ef41Sopenharmony_ci/* Computes a code from a single byte. A lookup table of 256 values could be 271cb0ef41Sopenharmony_ci used, but simply adding 1 works about as good. */ 281cb0ef41Sopenharmony_cistatic uint32_t FN(HashByte)(uint8_t byte) { 291cb0ef41Sopenharmony_ci return (uint32_t)byte + 1u; 301cb0ef41Sopenharmony_ci} 311cb0ef41Sopenharmony_ci 321cb0ef41Sopenharmony_cistatic uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add, 331cb0ef41Sopenharmony_ci uint32_t factor) { 341cb0ef41Sopenharmony_ci return (uint32_t)(factor * state + FN(HashByte)(add)); 351cb0ef41Sopenharmony_ci} 361cb0ef41Sopenharmony_ci 371cb0ef41Sopenharmony_cistatic uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add, 381cb0ef41Sopenharmony_ci uint8_t rem, uint32_t factor, 391cb0ef41Sopenharmony_ci uint32_t factor_remove) { 401cb0ef41Sopenharmony_ci return (uint32_t)(factor * state + 411cb0ef41Sopenharmony_ci FN(HashByte)(add) - factor_remove * FN(HashByte)(rem)); 421cb0ef41Sopenharmony_ci} 431cb0ef41Sopenharmony_ci 441cb0ef41Sopenharmony_citypedef struct HashRolling { 451cb0ef41Sopenharmony_ci uint32_t state; 461cb0ef41Sopenharmony_ci uint32_t* table; 471cb0ef41Sopenharmony_ci size_t next_ix; 481cb0ef41Sopenharmony_ci 491cb0ef41Sopenharmony_ci uint32_t chunk_len; 501cb0ef41Sopenharmony_ci uint32_t factor; 511cb0ef41Sopenharmony_ci uint32_t factor_remove; 521cb0ef41Sopenharmony_ci} HashRolling; 531cb0ef41Sopenharmony_ci 541cb0ef41Sopenharmony_cistatic void FN(Initialize)( 551cb0ef41Sopenharmony_ci HasherCommon* common, HashRolling* BROTLI_RESTRICT self, 561cb0ef41Sopenharmony_ci const BrotliEncoderParams* params) { 571cb0ef41Sopenharmony_ci size_t i; 581cb0ef41Sopenharmony_ci self->state = 0; 591cb0ef41Sopenharmony_ci self->next_ix = 0; 601cb0ef41Sopenharmony_ci 611cb0ef41Sopenharmony_ci self->factor = FN(kRollingHashMul32); 621cb0ef41Sopenharmony_ci 631cb0ef41Sopenharmony_ci /* Compute the factor of the oldest byte to remove: factor**steps modulo 641cb0ef41Sopenharmony_ci 0xffffffff (the multiplications rely on 32-bit overflow) */ 651cb0ef41Sopenharmony_ci self->factor_remove = 1; 661cb0ef41Sopenharmony_ci for (i = 0; i < CHUNKLEN; i += JUMP) { 671cb0ef41Sopenharmony_ci self->factor_remove *= self->factor; 681cb0ef41Sopenharmony_ci } 691cb0ef41Sopenharmony_ci 701cb0ef41Sopenharmony_ci self->table = (uint32_t*)common->extra; 711cb0ef41Sopenharmony_ci for (i = 0; i < NUMBUCKETS; i++) { 721cb0ef41Sopenharmony_ci self->table[i] = FN(kInvalidPos); 731cb0ef41Sopenharmony_ci } 741cb0ef41Sopenharmony_ci 751cb0ef41Sopenharmony_ci BROTLI_UNUSED(params); 761cb0ef41Sopenharmony_ci} 771cb0ef41Sopenharmony_ci 781cb0ef41Sopenharmony_cistatic void FN(Prepare)(HashRolling* BROTLI_RESTRICT self, BROTLI_BOOL one_shot, 791cb0ef41Sopenharmony_ci size_t input_size, const uint8_t* BROTLI_RESTRICT data) { 801cb0ef41Sopenharmony_ci size_t i; 811cb0ef41Sopenharmony_ci /* Too small size, cannot use this hasher. */ 821cb0ef41Sopenharmony_ci if (input_size < CHUNKLEN) return; 831cb0ef41Sopenharmony_ci self->state = 0; 841cb0ef41Sopenharmony_ci for (i = 0; i < CHUNKLEN; i += JUMP) { 851cb0ef41Sopenharmony_ci self->state = FN(HashRollingFunctionInitial)( 861cb0ef41Sopenharmony_ci self->state, data[i], self->factor); 871cb0ef41Sopenharmony_ci } 881cb0ef41Sopenharmony_ci BROTLI_UNUSED(one_shot); 891cb0ef41Sopenharmony_ci} 901cb0ef41Sopenharmony_ci 911cb0ef41Sopenharmony_cistatic BROTLI_INLINE size_t FN(HashMemAllocInBytes)( 921cb0ef41Sopenharmony_ci const BrotliEncoderParams* params, BROTLI_BOOL one_shot, 931cb0ef41Sopenharmony_ci size_t input_size) { 941cb0ef41Sopenharmony_ci return NUMBUCKETS * sizeof(uint32_t); 951cb0ef41Sopenharmony_ci BROTLI_UNUSED(params); 961cb0ef41Sopenharmony_ci BROTLI_UNUSED(one_shot); 971cb0ef41Sopenharmony_ci BROTLI_UNUSED(input_size); 981cb0ef41Sopenharmony_ci} 991cb0ef41Sopenharmony_ci 1001cb0ef41Sopenharmony_cistatic BROTLI_INLINE void FN(Store)(HashRolling* BROTLI_RESTRICT self, 1011cb0ef41Sopenharmony_ci const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) { 1021cb0ef41Sopenharmony_ci BROTLI_UNUSED(self); 1031cb0ef41Sopenharmony_ci BROTLI_UNUSED(data); 1041cb0ef41Sopenharmony_ci BROTLI_UNUSED(mask); 1051cb0ef41Sopenharmony_ci BROTLI_UNUSED(ix); 1061cb0ef41Sopenharmony_ci} 1071cb0ef41Sopenharmony_ci 1081cb0ef41Sopenharmony_cistatic BROTLI_INLINE void FN(StoreRange)(HashRolling* BROTLI_RESTRICT self, 1091cb0ef41Sopenharmony_ci const uint8_t* BROTLI_RESTRICT data, const size_t mask, 1101cb0ef41Sopenharmony_ci const size_t ix_start, const size_t ix_end) { 1111cb0ef41Sopenharmony_ci BROTLI_UNUSED(self); 1121cb0ef41Sopenharmony_ci BROTLI_UNUSED(data); 1131cb0ef41Sopenharmony_ci BROTLI_UNUSED(mask); 1141cb0ef41Sopenharmony_ci BROTLI_UNUSED(ix_start); 1151cb0ef41Sopenharmony_ci BROTLI_UNUSED(ix_end); 1161cb0ef41Sopenharmony_ci} 1171cb0ef41Sopenharmony_ci 1181cb0ef41Sopenharmony_cistatic BROTLI_INLINE void FN(StitchToPreviousBlock)( 1191cb0ef41Sopenharmony_ci HashRolling* BROTLI_RESTRICT self, 1201cb0ef41Sopenharmony_ci size_t num_bytes, size_t position, const uint8_t* ringbuffer, 1211cb0ef41Sopenharmony_ci size_t ring_buffer_mask) { 1221cb0ef41Sopenharmony_ci /* In this case we must re-initialize the hasher from scratch from the 1231cb0ef41Sopenharmony_ci current position. */ 1241cb0ef41Sopenharmony_ci size_t position_masked; 1251cb0ef41Sopenharmony_ci size_t available = num_bytes; 1261cb0ef41Sopenharmony_ci if ((position & (JUMP - 1)) != 0) { 1271cb0ef41Sopenharmony_ci size_t diff = JUMP - (position & (JUMP - 1)); 1281cb0ef41Sopenharmony_ci available = (diff > available) ? 0 : (available - diff); 1291cb0ef41Sopenharmony_ci position += diff; 1301cb0ef41Sopenharmony_ci } 1311cb0ef41Sopenharmony_ci position_masked = position & ring_buffer_mask; 1321cb0ef41Sopenharmony_ci /* wrapping around ringbuffer not handled. */ 1331cb0ef41Sopenharmony_ci if (available > ring_buffer_mask - position_masked) { 1341cb0ef41Sopenharmony_ci available = ring_buffer_mask - position_masked; 1351cb0ef41Sopenharmony_ci } 1361cb0ef41Sopenharmony_ci 1371cb0ef41Sopenharmony_ci FN(Prepare)(self, BROTLI_FALSE, available, 1381cb0ef41Sopenharmony_ci ringbuffer + (position & ring_buffer_mask)); 1391cb0ef41Sopenharmony_ci self->next_ix = position; 1401cb0ef41Sopenharmony_ci BROTLI_UNUSED(num_bytes); 1411cb0ef41Sopenharmony_ci} 1421cb0ef41Sopenharmony_ci 1431cb0ef41Sopenharmony_cistatic BROTLI_INLINE void FN(PrepareDistanceCache)( 1441cb0ef41Sopenharmony_ci HashRolling* BROTLI_RESTRICT self, 1451cb0ef41Sopenharmony_ci int* BROTLI_RESTRICT distance_cache) { 1461cb0ef41Sopenharmony_ci BROTLI_UNUSED(self); 1471cb0ef41Sopenharmony_ci BROTLI_UNUSED(distance_cache); 1481cb0ef41Sopenharmony_ci} 1491cb0ef41Sopenharmony_ci 1501cb0ef41Sopenharmony_cistatic BROTLI_INLINE void FN(FindLongestMatch)( 1511cb0ef41Sopenharmony_ci HashRolling* BROTLI_RESTRICT self, 1521cb0ef41Sopenharmony_ci const BrotliEncoderDictionary* dictionary, 1531cb0ef41Sopenharmony_ci const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, 1541cb0ef41Sopenharmony_ci const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, 1551cb0ef41Sopenharmony_ci const size_t max_length, const size_t max_backward, 1561cb0ef41Sopenharmony_ci const size_t dictionary_distance, const size_t max_distance, 1571cb0ef41Sopenharmony_ci HasherSearchResult* BROTLI_RESTRICT out) { 1581cb0ef41Sopenharmony_ci const size_t cur_ix_masked = cur_ix & ring_buffer_mask; 1591cb0ef41Sopenharmony_ci size_t pos; 1601cb0ef41Sopenharmony_ci 1611cb0ef41Sopenharmony_ci if ((cur_ix & (JUMP - 1)) != 0) return; 1621cb0ef41Sopenharmony_ci 1631cb0ef41Sopenharmony_ci /* Not enough lookahead */ 1641cb0ef41Sopenharmony_ci if (max_length < CHUNKLEN) return; 1651cb0ef41Sopenharmony_ci 1661cb0ef41Sopenharmony_ci for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) { 1671cb0ef41Sopenharmony_ci uint32_t code = self->state & MASK; 1681cb0ef41Sopenharmony_ci 1691cb0ef41Sopenharmony_ci uint8_t rem = data[pos & ring_buffer_mask]; 1701cb0ef41Sopenharmony_ci uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask]; 1711cb0ef41Sopenharmony_ci size_t found_ix = FN(kInvalidPos); 1721cb0ef41Sopenharmony_ci 1731cb0ef41Sopenharmony_ci self->state = FN(HashRollingFunction)( 1741cb0ef41Sopenharmony_ci self->state, add, rem, self->factor, self->factor_remove); 1751cb0ef41Sopenharmony_ci 1761cb0ef41Sopenharmony_ci if (code < NUMBUCKETS) { 1771cb0ef41Sopenharmony_ci found_ix = self->table[code]; 1781cb0ef41Sopenharmony_ci self->table[code] = (uint32_t)pos; 1791cb0ef41Sopenharmony_ci if (pos == cur_ix && found_ix != FN(kInvalidPos)) { 1801cb0ef41Sopenharmony_ci /* The cast to 32-bit makes backward distances up to 4GB work even 1811cb0ef41Sopenharmony_ci if cur_ix is above 4GB, despite using 32-bit values in the table. */ 1821cb0ef41Sopenharmony_ci size_t backward = (uint32_t)(cur_ix - found_ix); 1831cb0ef41Sopenharmony_ci if (backward <= max_backward) { 1841cb0ef41Sopenharmony_ci const size_t found_ix_masked = found_ix & ring_buffer_mask; 1851cb0ef41Sopenharmony_ci const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked], 1861cb0ef41Sopenharmony_ci &data[cur_ix_masked], 1871cb0ef41Sopenharmony_ci max_length); 1881cb0ef41Sopenharmony_ci if (len >= 4 && len > out->len) { 1891cb0ef41Sopenharmony_ci score_t score = BackwardReferenceScore(len, backward); 1901cb0ef41Sopenharmony_ci if (score > out->score) { 1911cb0ef41Sopenharmony_ci out->len = len; 1921cb0ef41Sopenharmony_ci out->distance = backward; 1931cb0ef41Sopenharmony_ci out->score = score; 1941cb0ef41Sopenharmony_ci out->len_code_delta = 0; 1951cb0ef41Sopenharmony_ci } 1961cb0ef41Sopenharmony_ci } 1971cb0ef41Sopenharmony_ci } 1981cb0ef41Sopenharmony_ci } 1991cb0ef41Sopenharmony_ci } 2001cb0ef41Sopenharmony_ci } 2011cb0ef41Sopenharmony_ci 2021cb0ef41Sopenharmony_ci self->next_ix = cur_ix + JUMP; 2031cb0ef41Sopenharmony_ci 2041cb0ef41Sopenharmony_ci /* NOTE: this hasher does not search in the dictionary. It is used as 2051cb0ef41Sopenharmony_ci backup-hasher, the main hasher already searches in it. */ 2061cb0ef41Sopenharmony_ci BROTLI_UNUSED(dictionary); 2071cb0ef41Sopenharmony_ci BROTLI_UNUSED(distance_cache); 2081cb0ef41Sopenharmony_ci BROTLI_UNUSED(dictionary_distance); 2091cb0ef41Sopenharmony_ci BROTLI_UNUSED(max_distance); 2101cb0ef41Sopenharmony_ci} 2111cb0ef41Sopenharmony_ci 2121cb0ef41Sopenharmony_ci#undef HashRolling 213