1/* Copyright 2010 Google Inc. All Rights Reserved. 2 3 Distributed under MIT license. 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 5*/ 6 7/* A (forgetful) hash table to the data seen by the compressor, to 8 help create backward references to previous data. */ 9 10#ifndef BROTLI_ENC_HASH_H_ 11#define BROTLI_ENC_HASH_H_ 12 13#include <string.h> /* memcmp, memset */ 14 15#include "../common/constants.h" 16#include "../common/dictionary.h" 17#include "../common/platform.h" 18#include <brotli/types.h> 19#include "./encoder_dict.h" 20#include "./fast_log.h" 21#include "./find_match_length.h" 22#include "./memory.h" 23#include "./quality.h" 24#include "./static_dict.h" 25 26#if defined(__cplusplus) || defined(c_plusplus) 27extern "C" { 28#endif 29 30typedef struct { 31 /* Dynamically allocated area; first member for quickest access. */ 32 void* extra; 33 34 size_t dict_num_lookups; 35 size_t dict_num_matches; 36 37 BrotliHasherParams params; 38 39 /* False if hasher needs to be "prepared" before use. */ 40 BROTLI_BOOL is_prepared_; 41} HasherCommon; 42 43#define score_t size_t 44 45static const uint32_t kCutoffTransformsCount = 10; 46/* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */ 47/* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */ 48static const uint64_t kCutoffTransforms = 49 BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200); 50 51typedef struct HasherSearchResult { 52 size_t len; 53 size_t distance; 54 score_t score; 55 int len_code_delta; /* == len_code - len */ 56} HasherSearchResult; 57 58/* kHashMul32 multiplier has these properties: 59 * The multiplier must be odd. Otherwise we may lose the highest bit. 60 * No long streaks of ones or zeros. 61 * There is no effort to ensure that it is a prime, the oddity is enough 62 for this use. 63 * The number has been tuned heuristically against compression benchmarks. */ 64static const uint32_t kHashMul32 = 0x1E35A7BD; 65static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD); 66static const uint64_t kHashMul64Long = 67 BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u); 68 69static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) { 70 uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; 71 /* The higher bits contain more mixture from the multiplication, 72 so we take our results from there. */ 73 return h >> (32 - 14); 74} 75 76static BROTLI_INLINE void PrepareDistanceCache( 77 int* BROTLI_RESTRICT distance_cache, const int num_distances) { 78 if (num_distances > 4) { 79 int last_distance = distance_cache[0]; 80 distance_cache[4] = last_distance - 1; 81 distance_cache[5] = last_distance + 1; 82 distance_cache[6] = last_distance - 2; 83 distance_cache[7] = last_distance + 2; 84 distance_cache[8] = last_distance - 3; 85 distance_cache[9] = last_distance + 3; 86 if (num_distances > 10) { 87 int next_last_distance = distance_cache[1]; 88 distance_cache[10] = next_last_distance - 1; 89 distance_cache[11] = next_last_distance + 1; 90 distance_cache[12] = next_last_distance - 2; 91 distance_cache[13] = next_last_distance + 2; 92 distance_cache[14] = next_last_distance - 3; 93 distance_cache[15] = next_last_distance + 3; 94 } 95 } 96} 97 98#define BROTLI_LITERAL_BYTE_SCORE 135 99#define BROTLI_DISTANCE_BIT_PENALTY 30 100/* Score must be positive after applying maximal penalty. */ 101#define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t)) 102 103/* Usually, we always choose the longest backward reference. This function 104 allows for the exception of that rule. 105 106 If we choose a backward reference that is further away, it will 107 usually be coded with more bits. We approximate this by assuming 108 log2(distance). If the distance can be expressed in terms of the 109 last four distances, we use some heuristic constants to estimate 110 the bits cost. For the first up to four literals we use the bit 111 cost of the literals from the literal cost model, after that we 112 use the average bit cost of the cost model. 113 114 This function is used to sometimes discard a longer backward reference 115 when it is not much longer and the bit cost for encoding it is more 116 than the saved literals. 117 118 backward_reference_offset MUST be positive. */ 119static BROTLI_INLINE score_t BackwardReferenceScore( 120 size_t copy_length, size_t backward_reference_offset) { 121 return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length - 122 BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset); 123} 124 125static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance( 126 size_t copy_length) { 127 return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length + 128 BROTLI_SCORE_BASE + 15; 129} 130 131static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance( 132 size_t distance_short_code) { 133 return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE); 134} 135 136static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( 137 const BrotliEncoderDictionary* dictionary, size_t len, size_t word_idx, 138 const uint8_t* data, size_t max_length, size_t max_backward, 139 size_t max_distance, HasherSearchResult* out) { 140 size_t offset; 141 size_t matchlen; 142 size_t backward; 143 score_t score; 144 offset = dictionary->words->offsets_by_length[len] + len * word_idx; 145 if (len > max_length) { 146 return BROTLI_FALSE; 147 } 148 149 matchlen = 150 FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len); 151 if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) { 152 return BROTLI_FALSE; 153 } 154 { 155 size_t cut = len - matchlen; 156 size_t transform_id = (cut << 2) + 157 (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F); 158 backward = max_backward + 1 + word_idx + 159 (transform_id << dictionary->words->size_bits_by_length[len]); 160 } 161 if (backward > max_distance) { 162 return BROTLI_FALSE; 163 } 164 score = BackwardReferenceScore(matchlen, backward); 165 if (score < out->score) { 166 return BROTLI_FALSE; 167 } 168 out->len = matchlen; 169 out->len_code_delta = (int)len - (int)matchlen; 170 out->distance = backward; 171 out->score = score; 172 return BROTLI_TRUE; 173} 174 175static BROTLI_INLINE void SearchInStaticDictionary( 176 const BrotliEncoderDictionary* dictionary, 177 HasherCommon* common, const uint8_t* data, size_t max_length, 178 size_t max_backward, size_t max_distance, 179 HasherSearchResult* out, BROTLI_BOOL shallow) { 180 size_t key; 181 size_t i; 182 if (common->dict_num_matches < (common->dict_num_lookups >> 7)) { 183 return; 184 } 185 key = Hash14(data) << 1; 186 for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) { 187 common->dict_num_lookups++; 188 if (dictionary->hash_table_lengths[key] != 0) { 189 BROTLI_BOOL item_matches = TestStaticDictionaryItem( 190 dictionary, dictionary->hash_table_lengths[key], 191 dictionary->hash_table_words[key], data, 192 max_length, max_backward, max_distance, out); 193 if (item_matches) { 194 common->dict_num_matches++; 195 } 196 } 197 } 198} 199 200typedef struct BackwardMatch { 201 uint32_t distance; 202 uint32_t length_and_code; 203} BackwardMatch; 204 205static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self, 206 size_t dist, size_t len) { 207 self->distance = (uint32_t)dist; 208 self->length_and_code = (uint32_t)(len << 5); 209} 210 211static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self, 212 size_t dist, size_t len, size_t len_code) { 213 self->distance = (uint32_t)dist; 214 self->length_and_code = 215 (uint32_t)((len << 5) | (len == len_code ? 0 : len_code)); 216} 217 218static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) { 219 return self->length_and_code >> 5; 220} 221 222static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) { 223 size_t code = self->length_and_code & 31; 224 return code ? code : BackwardMatchLength(self); 225} 226 227#define EXPAND_CAT(a, b) CAT(a, b) 228#define CAT(a, b) a ## b 229#define FN(X) EXPAND_CAT(X, HASHER()) 230 231#define HASHER() H10 232#define BUCKET_BITS 17 233#define MAX_TREE_SEARCH_DEPTH 64 234#define MAX_TREE_COMP_LENGTH 128 235#include "./hash_to_binary_tree_inc.h" /* NOLINT(build/include) */ 236#undef MAX_TREE_SEARCH_DEPTH 237#undef MAX_TREE_COMP_LENGTH 238#undef BUCKET_BITS 239#undef HASHER 240/* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */ 241#define MAX_NUM_MATCHES_H10 128 242 243/* For BUCKET_SWEEP_BITS == 0, enabling the dictionary lookup makes compression 244 a little faster (0.5% - 1%) and it compresses 0.15% better on small text 245 and HTML inputs. */ 246 247#define HASHER() H2 248#define BUCKET_BITS 16 249#define BUCKET_SWEEP_BITS 0 250#define HASH_LEN 5 251#define USE_DICTIONARY 1 252#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ 253#undef BUCKET_SWEEP_BITS 254#undef USE_DICTIONARY 255#undef HASHER 256 257#define HASHER() H3 258#define BUCKET_SWEEP_BITS 1 259#define USE_DICTIONARY 0 260#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ 261#undef USE_DICTIONARY 262#undef BUCKET_SWEEP_BITS 263#undef BUCKET_BITS 264#undef HASHER 265 266#define HASHER() H4 267#define BUCKET_BITS 17 268#define BUCKET_SWEEP_BITS 2 269#define USE_DICTIONARY 1 270#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ 271#undef USE_DICTIONARY 272#undef HASH_LEN 273#undef BUCKET_SWEEP_BITS 274#undef BUCKET_BITS 275#undef HASHER 276 277#define HASHER() H5 278#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */ 279#undef HASHER 280 281#define HASHER() H6 282#include "./hash_longest_match64_inc.h" /* NOLINT(build/include) */ 283#undef HASHER 284 285#define BUCKET_BITS 15 286 287#define NUM_LAST_DISTANCES_TO_CHECK 4 288#define NUM_BANKS 1 289#define BANK_BITS 16 290#define HASHER() H40 291#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ 292#undef HASHER 293#undef NUM_LAST_DISTANCES_TO_CHECK 294 295#define NUM_LAST_DISTANCES_TO_CHECK 10 296#define HASHER() H41 297#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ 298#undef HASHER 299#undef NUM_LAST_DISTANCES_TO_CHECK 300#undef NUM_BANKS 301#undef BANK_BITS 302 303#define NUM_LAST_DISTANCES_TO_CHECK 16 304#define NUM_BANKS 512 305#define BANK_BITS 9 306#define HASHER() H42 307#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ 308#undef HASHER 309#undef NUM_LAST_DISTANCES_TO_CHECK 310#undef NUM_BANKS 311#undef BANK_BITS 312 313#undef BUCKET_BITS 314 315#define HASHER() H54 316#define BUCKET_BITS 20 317#define BUCKET_SWEEP_BITS 2 318#define HASH_LEN 7 319#define USE_DICTIONARY 0 320#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ 321#undef USE_DICTIONARY 322#undef HASH_LEN 323#undef BUCKET_SWEEP_BITS 324#undef BUCKET_BITS 325#undef HASHER 326 327/* fast large window hashers */ 328 329#define HASHER() HROLLING_FAST 330#define CHUNKLEN 32 331#define JUMP 4 332#define NUMBUCKETS 16777216 333#define MASK ((NUMBUCKETS * 64) - 1) 334#include "./hash_rolling_inc.h" /* NOLINT(build/include) */ 335#undef JUMP 336#undef HASHER 337 338 339#define HASHER() HROLLING 340#define JUMP 1 341#include "./hash_rolling_inc.h" /* NOLINT(build/include) */ 342#undef MASK 343#undef NUMBUCKETS 344#undef JUMP 345#undef CHUNKLEN 346#undef HASHER 347 348#define HASHER() H35 349#define HASHER_A H3 350#define HASHER_B HROLLING_FAST 351#include "./hash_composite_inc.h" /* NOLINT(build/include) */ 352#undef HASHER_A 353#undef HASHER_B 354#undef HASHER 355 356#define HASHER() H55 357#define HASHER_A H54 358#define HASHER_B HROLLING_FAST 359#include "./hash_composite_inc.h" /* NOLINT(build/include) */ 360#undef HASHER_A 361#undef HASHER_B 362#undef HASHER 363 364#define HASHER() H65 365#define HASHER_A H6 366#define HASHER_B HROLLING 367#include "./hash_composite_inc.h" /* NOLINT(build/include) */ 368#undef HASHER_A 369#undef HASHER_B 370#undef HASHER 371 372#undef FN 373#undef CAT 374#undef EXPAND_CAT 375 376#define FOR_SIMPLE_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54) 377#define FOR_COMPOSITE_HASHERS(H) H(35) H(55) H(65) 378#define FOR_GENERIC_HASHERS(H) FOR_SIMPLE_HASHERS(H) FOR_COMPOSITE_HASHERS(H) 379#define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10) 380 381typedef struct { 382 HasherCommon common; 383 384 union { 385#define MEMBER_(N) \ 386 H ## N _H ## N; 387 FOR_ALL_HASHERS(MEMBER_) 388#undef MEMBER_ 389 } privat; 390} Hasher; 391 392/* MUST be invoked before any other method. */ 393static BROTLI_INLINE void HasherInit(Hasher* hasher) { 394 hasher->common.extra = NULL; 395} 396 397static BROTLI_INLINE void DestroyHasher(MemoryManager* m, Hasher* hasher) { 398 if (hasher->common.extra == NULL) return; 399 BROTLI_FREE(m, hasher->common.extra); 400} 401 402static BROTLI_INLINE void HasherReset(Hasher* hasher) { 403 hasher->common.is_prepared_ = BROTLI_FALSE; 404} 405 406static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params, 407 BROTLI_BOOL one_shot, const size_t input_size) { 408 switch (params->hasher.type) { 409#define SIZE_(N) \ 410 case N: \ 411 return HashMemAllocInBytesH ## N(params, one_shot, input_size); 412 FOR_ALL_HASHERS(SIZE_) 413#undef SIZE_ 414 default: 415 break; 416 } 417 return 0; /* Default case. */ 418} 419 420static BROTLI_INLINE void HasherSetup(MemoryManager* m, Hasher* hasher, 421 BrotliEncoderParams* params, const uint8_t* data, size_t position, 422 size_t input_size, BROTLI_BOOL is_last) { 423 BROTLI_BOOL one_shot = (position == 0 && is_last); 424 if (hasher->common.extra == NULL) { 425 size_t alloc_size; 426 ChooseHasher(params, ¶ms->hasher); 427 alloc_size = HasherSize(params, one_shot, input_size); 428 hasher->common.extra = BROTLI_ALLOC(m, uint8_t, alloc_size); 429 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(hasher->common.extra)) return; 430 hasher->common.params = params->hasher; 431 switch (hasher->common.params.type) { 432#define INITIALIZE_(N) \ 433 case N: \ 434 InitializeH ## N(&hasher->common, \ 435 &hasher->privat._H ## N, params); \ 436 break; 437 FOR_ALL_HASHERS(INITIALIZE_); 438#undef INITIALIZE_ 439 default: 440 break; 441 } 442 HasherReset(hasher); 443 } 444 445 if (!hasher->common.is_prepared_) { 446 switch (hasher->common.params.type) { 447#define PREPARE_(N) \ 448 case N: \ 449 PrepareH ## N( \ 450 &hasher->privat._H ## N, \ 451 one_shot, input_size, data); \ 452 break; 453 FOR_ALL_HASHERS(PREPARE_) 454#undef PREPARE_ 455 default: break; 456 } 457 if (position == 0) { 458 hasher->common.dict_num_lookups = 0; 459 hasher->common.dict_num_matches = 0; 460 } 461 hasher->common.is_prepared_ = BROTLI_TRUE; 462 } 463} 464 465static BROTLI_INLINE void InitOrStitchToPreviousBlock( 466 MemoryManager* m, Hasher* hasher, const uint8_t* data, size_t mask, 467 BrotliEncoderParams* params, size_t position, size_t input_size, 468 BROTLI_BOOL is_last) { 469 HasherSetup(m, hasher, params, data, position, input_size, is_last); 470 if (BROTLI_IS_OOM(m)) return; 471 switch (hasher->common.params.type) { 472#define INIT_(N) \ 473 case N: \ 474 StitchToPreviousBlockH ## N( \ 475 &hasher->privat._H ## N, \ 476 input_size, position, data, mask); \ 477 break; 478 FOR_ALL_HASHERS(INIT_) 479#undef INIT_ 480 default: break; 481 } 482} 483 484#if defined(__cplusplus) || defined(c_plusplus) 485} /* extern "C" */ 486#endif 487 488#endif /* BROTLI_ENC_HASH_H_ */ 489