11cb0ef41Sopenharmony_ci/* Copyright 2013 Google Inc. All Rights Reserved. 21cb0ef41Sopenharmony_ci 31cb0ef41Sopenharmony_ci Distributed under MIT license. 41cb0ef41Sopenharmony_ci See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 51cb0ef41Sopenharmony_ci*/ 61cb0ef41Sopenharmony_ci 71cb0ef41Sopenharmony_ci#include "./static_dict.h" 81cb0ef41Sopenharmony_ci 91cb0ef41Sopenharmony_ci#include "../common/dictionary.h" 101cb0ef41Sopenharmony_ci#include "../common/platform.h" 111cb0ef41Sopenharmony_ci#include "../common/transform.h" 121cb0ef41Sopenharmony_ci#include "./encoder_dict.h" 131cb0ef41Sopenharmony_ci#include "./find_match_length.h" 141cb0ef41Sopenharmony_ci 151cb0ef41Sopenharmony_ci#if defined(__cplusplus) || defined(c_plusplus) 161cb0ef41Sopenharmony_ciextern "C" { 171cb0ef41Sopenharmony_ci#endif 181cb0ef41Sopenharmony_ci 191cb0ef41Sopenharmony_cistatic BROTLI_INLINE uint32_t Hash(const uint8_t* data) { 201cb0ef41Sopenharmony_ci uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32; 211cb0ef41Sopenharmony_ci /* The higher bits contain more mixture from the multiplication, 221cb0ef41Sopenharmony_ci so we take our results from there. */ 231cb0ef41Sopenharmony_ci return h >> (32 - kDictNumBits); 241cb0ef41Sopenharmony_ci} 251cb0ef41Sopenharmony_ci 261cb0ef41Sopenharmony_cistatic BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code, 271cb0ef41Sopenharmony_ci uint32_t* matches) { 281cb0ef41Sopenharmony_ci uint32_t match = (uint32_t)((distance << 5) + len_code); 291cb0ef41Sopenharmony_ci matches[len] = BROTLI_MIN(uint32_t, matches[len], match); 301cb0ef41Sopenharmony_ci} 311cb0ef41Sopenharmony_ci 321cb0ef41Sopenharmony_cistatic BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary, 331cb0ef41Sopenharmony_ci const uint8_t* data, 341cb0ef41Sopenharmony_ci size_t id, 351cb0ef41Sopenharmony_ci size_t len, 361cb0ef41Sopenharmony_ci size_t maxlen) { 371cb0ef41Sopenharmony_ci const size_t offset = dictionary->offsets_by_length[len] + len * id; 381cb0ef41Sopenharmony_ci return FindMatchLengthWithLimit(&dictionary->data[offset], data, 391cb0ef41Sopenharmony_ci BROTLI_MIN(size_t, len, maxlen)); 401cb0ef41Sopenharmony_ci} 411cb0ef41Sopenharmony_ci 421cb0ef41Sopenharmony_cistatic BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary, 431cb0ef41Sopenharmony_ci DictWord w, const uint8_t* data, size_t max_length) { 441cb0ef41Sopenharmony_ci if (w.len > max_length) { 451cb0ef41Sopenharmony_ci return BROTLI_FALSE; 461cb0ef41Sopenharmony_ci } else { 471cb0ef41Sopenharmony_ci const size_t offset = dictionary->offsets_by_length[w.len] + 481cb0ef41Sopenharmony_ci (size_t)w.len * (size_t)w.idx; 491cb0ef41Sopenharmony_ci const uint8_t* dict = &dictionary->data[offset]; 501cb0ef41Sopenharmony_ci if (w.transform == 0) { 511cb0ef41Sopenharmony_ci /* Match against base dictionary word. */ 521cb0ef41Sopenharmony_ci return 531cb0ef41Sopenharmony_ci TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len); 541cb0ef41Sopenharmony_ci } else if (w.transform == 10) { 551cb0ef41Sopenharmony_ci /* Match against uppercase first transform. 561cb0ef41Sopenharmony_ci Note that there are only ASCII uppercase words in the lookup table. */ 571cb0ef41Sopenharmony_ci return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' && 581cb0ef41Sopenharmony_ci (dict[0] ^ 32) == data[0] && 591cb0ef41Sopenharmony_ci FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) == 601cb0ef41Sopenharmony_ci w.len - 1u); 611cb0ef41Sopenharmony_ci } else { 621cb0ef41Sopenharmony_ci /* Match against uppercase all transform. 631cb0ef41Sopenharmony_ci Note that there are only ASCII uppercase words in the lookup table. */ 641cb0ef41Sopenharmony_ci size_t i; 651cb0ef41Sopenharmony_ci for (i = 0; i < w.len; ++i) { 661cb0ef41Sopenharmony_ci if (dict[i] >= 'a' && dict[i] <= 'z') { 671cb0ef41Sopenharmony_ci if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE; 681cb0ef41Sopenharmony_ci } else { 691cb0ef41Sopenharmony_ci if (dict[i] != data[i]) return BROTLI_FALSE; 701cb0ef41Sopenharmony_ci } 711cb0ef41Sopenharmony_ci } 721cb0ef41Sopenharmony_ci return BROTLI_TRUE; 731cb0ef41Sopenharmony_ci } 741cb0ef41Sopenharmony_ci } 751cb0ef41Sopenharmony_ci} 761cb0ef41Sopenharmony_ci 771cb0ef41Sopenharmony_ciBROTLI_BOOL BrotliFindAllStaticDictionaryMatches( 781cb0ef41Sopenharmony_ci const BrotliEncoderDictionary* dictionary, const uint8_t* data, 791cb0ef41Sopenharmony_ci size_t min_length, size_t max_length, uint32_t* matches) { 801cb0ef41Sopenharmony_ci BROTLI_BOOL has_found_match = BROTLI_FALSE; 811cb0ef41Sopenharmony_ci { 821cb0ef41Sopenharmony_ci size_t offset = dictionary->buckets[Hash(data)]; 831cb0ef41Sopenharmony_ci BROTLI_BOOL end = !offset; 841cb0ef41Sopenharmony_ci while (!end) { 851cb0ef41Sopenharmony_ci DictWord w = dictionary->dict_words[offset++]; 861cb0ef41Sopenharmony_ci const size_t l = w.len & 0x1F; 871cb0ef41Sopenharmony_ci const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; 881cb0ef41Sopenharmony_ci const size_t id = w.idx; 891cb0ef41Sopenharmony_ci end = !!(w.len & 0x80); 901cb0ef41Sopenharmony_ci w.len = (uint8_t)l; 911cb0ef41Sopenharmony_ci if (w.transform == 0) { 921cb0ef41Sopenharmony_ci const size_t matchlen = 931cb0ef41Sopenharmony_ci DictMatchLength(dictionary->words, data, id, l, max_length); 941cb0ef41Sopenharmony_ci const uint8_t* s; 951cb0ef41Sopenharmony_ci size_t minlen; 961cb0ef41Sopenharmony_ci size_t maxlen; 971cb0ef41Sopenharmony_ci size_t len; 981cb0ef41Sopenharmony_ci /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */ 991cb0ef41Sopenharmony_ci if (matchlen == l) { 1001cb0ef41Sopenharmony_ci AddMatch(id, l, l, matches); 1011cb0ef41Sopenharmony_ci has_found_match = BROTLI_TRUE; 1021cb0ef41Sopenharmony_ci } 1031cb0ef41Sopenharmony_ci /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and 1041cb0ef41Sopenharmony_ci "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */ 1051cb0ef41Sopenharmony_ci if (matchlen >= l - 1) { 1061cb0ef41Sopenharmony_ci AddMatch(id + 12 * n, l - 1, l, matches); 1071cb0ef41Sopenharmony_ci if (l + 2 < max_length && 1081cb0ef41Sopenharmony_ci data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' && 1091cb0ef41Sopenharmony_ci data[l + 2] == ' ') { 1101cb0ef41Sopenharmony_ci AddMatch(id + 49 * n, l + 3, l, matches); 1111cb0ef41Sopenharmony_ci } 1121cb0ef41Sopenharmony_ci has_found_match = BROTLI_TRUE; 1131cb0ef41Sopenharmony_ci } 1141cb0ef41Sopenharmony_ci /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */ 1151cb0ef41Sopenharmony_ci minlen = min_length; 1161cb0ef41Sopenharmony_ci if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9); 1171cb0ef41Sopenharmony_ci maxlen = BROTLI_MIN(size_t, matchlen, l - 2); 1181cb0ef41Sopenharmony_ci for (len = minlen; len <= maxlen; ++len) { 1191cb0ef41Sopenharmony_ci size_t cut = l - len; 1201cb0ef41Sopenharmony_ci size_t transform_id = (cut << 2) + 1211cb0ef41Sopenharmony_ci (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F); 1221cb0ef41Sopenharmony_ci AddMatch(id + transform_id * n, len, l, matches); 1231cb0ef41Sopenharmony_ci has_found_match = BROTLI_TRUE; 1241cb0ef41Sopenharmony_ci } 1251cb0ef41Sopenharmony_ci if (matchlen < l || l + 6 >= max_length) { 1261cb0ef41Sopenharmony_ci continue; 1271cb0ef41Sopenharmony_ci } 1281cb0ef41Sopenharmony_ci s = &data[l]; 1291cb0ef41Sopenharmony_ci /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */ 1301cb0ef41Sopenharmony_ci if (s[0] == ' ') { 1311cb0ef41Sopenharmony_ci AddMatch(id + n, l + 1, l, matches); 1321cb0ef41Sopenharmony_ci if (s[1] == 'a') { 1331cb0ef41Sopenharmony_ci if (s[2] == ' ') { 1341cb0ef41Sopenharmony_ci AddMatch(id + 28 * n, l + 3, l, matches); 1351cb0ef41Sopenharmony_ci } else if (s[2] == 's') { 1361cb0ef41Sopenharmony_ci if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches); 1371cb0ef41Sopenharmony_ci } else if (s[2] == 't') { 1381cb0ef41Sopenharmony_ci if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches); 1391cb0ef41Sopenharmony_ci } else if (s[2] == 'n') { 1401cb0ef41Sopenharmony_ci if (s[3] == 'd' && s[4] == ' ') { 1411cb0ef41Sopenharmony_ci AddMatch(id + 10 * n, l + 5, l, matches); 1421cb0ef41Sopenharmony_ci } 1431cb0ef41Sopenharmony_ci } 1441cb0ef41Sopenharmony_ci } else if (s[1] == 'b') { 1451cb0ef41Sopenharmony_ci if (s[2] == 'y' && s[3] == ' ') { 1461cb0ef41Sopenharmony_ci AddMatch(id + 38 * n, l + 4, l, matches); 1471cb0ef41Sopenharmony_ci } 1481cb0ef41Sopenharmony_ci } else if (s[1] == 'i') { 1491cb0ef41Sopenharmony_ci if (s[2] == 'n') { 1501cb0ef41Sopenharmony_ci if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches); 1511cb0ef41Sopenharmony_ci } else if (s[2] == 's') { 1521cb0ef41Sopenharmony_ci if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches); 1531cb0ef41Sopenharmony_ci } 1541cb0ef41Sopenharmony_ci } else if (s[1] == 'f') { 1551cb0ef41Sopenharmony_ci if (s[2] == 'o') { 1561cb0ef41Sopenharmony_ci if (s[3] == 'r' && s[4] == ' ') { 1571cb0ef41Sopenharmony_ci AddMatch(id + 25 * n, l + 5, l, matches); 1581cb0ef41Sopenharmony_ci } 1591cb0ef41Sopenharmony_ci } else if (s[2] == 'r') { 1601cb0ef41Sopenharmony_ci if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') { 1611cb0ef41Sopenharmony_ci AddMatch(id + 37 * n, l + 6, l, matches); 1621cb0ef41Sopenharmony_ci } 1631cb0ef41Sopenharmony_ci } 1641cb0ef41Sopenharmony_ci } else if (s[1] == 'o') { 1651cb0ef41Sopenharmony_ci if (s[2] == 'f') { 1661cb0ef41Sopenharmony_ci if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches); 1671cb0ef41Sopenharmony_ci } else if (s[2] == 'n') { 1681cb0ef41Sopenharmony_ci if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches); 1691cb0ef41Sopenharmony_ci } 1701cb0ef41Sopenharmony_ci } else if (s[1] == 'n') { 1711cb0ef41Sopenharmony_ci if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') { 1721cb0ef41Sopenharmony_ci AddMatch(id + 80 * n, l + 5, l, matches); 1731cb0ef41Sopenharmony_ci } 1741cb0ef41Sopenharmony_ci } else if (s[1] == 't') { 1751cb0ef41Sopenharmony_ci if (s[2] == 'h') { 1761cb0ef41Sopenharmony_ci if (s[3] == 'e') { 1771cb0ef41Sopenharmony_ci if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches); 1781cb0ef41Sopenharmony_ci } else if (s[3] == 'a') { 1791cb0ef41Sopenharmony_ci if (s[4] == 't' && s[5] == ' ') { 1801cb0ef41Sopenharmony_ci AddMatch(id + 29 * n, l + 6, l, matches); 1811cb0ef41Sopenharmony_ci } 1821cb0ef41Sopenharmony_ci } 1831cb0ef41Sopenharmony_ci } else if (s[2] == 'o') { 1841cb0ef41Sopenharmony_ci if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches); 1851cb0ef41Sopenharmony_ci } 1861cb0ef41Sopenharmony_ci } else if (s[1] == 'w') { 1871cb0ef41Sopenharmony_ci if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') { 1881cb0ef41Sopenharmony_ci AddMatch(id + 35 * n, l + 6, l, matches); 1891cb0ef41Sopenharmony_ci } 1901cb0ef41Sopenharmony_ci } 1911cb0ef41Sopenharmony_ci } else if (s[0] == '"') { 1921cb0ef41Sopenharmony_ci AddMatch(id + 19 * n, l + 1, l, matches); 1931cb0ef41Sopenharmony_ci if (s[1] == '>') { 1941cb0ef41Sopenharmony_ci AddMatch(id + 21 * n, l + 2, l, matches); 1951cb0ef41Sopenharmony_ci } 1961cb0ef41Sopenharmony_ci } else if (s[0] == '.') { 1971cb0ef41Sopenharmony_ci AddMatch(id + 20 * n, l + 1, l, matches); 1981cb0ef41Sopenharmony_ci if (s[1] == ' ') { 1991cb0ef41Sopenharmony_ci AddMatch(id + 31 * n, l + 2, l, matches); 2001cb0ef41Sopenharmony_ci if (s[2] == 'T' && s[3] == 'h') { 2011cb0ef41Sopenharmony_ci if (s[4] == 'e') { 2021cb0ef41Sopenharmony_ci if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches); 2031cb0ef41Sopenharmony_ci } else if (s[4] == 'i') { 2041cb0ef41Sopenharmony_ci if (s[5] == 's' && s[6] == ' ') { 2051cb0ef41Sopenharmony_ci AddMatch(id + 75 * n, l + 7, l, matches); 2061cb0ef41Sopenharmony_ci } 2071cb0ef41Sopenharmony_ci } 2081cb0ef41Sopenharmony_ci } 2091cb0ef41Sopenharmony_ci } 2101cb0ef41Sopenharmony_ci } else if (s[0] == ',') { 2111cb0ef41Sopenharmony_ci AddMatch(id + 76 * n, l + 1, l, matches); 2121cb0ef41Sopenharmony_ci if (s[1] == ' ') { 2131cb0ef41Sopenharmony_ci AddMatch(id + 14 * n, l + 2, l, matches); 2141cb0ef41Sopenharmony_ci } 2151cb0ef41Sopenharmony_ci } else if (s[0] == '\n') { 2161cb0ef41Sopenharmony_ci AddMatch(id + 22 * n, l + 1, l, matches); 2171cb0ef41Sopenharmony_ci if (s[1] == '\t') { 2181cb0ef41Sopenharmony_ci AddMatch(id + 50 * n, l + 2, l, matches); 2191cb0ef41Sopenharmony_ci } 2201cb0ef41Sopenharmony_ci } else if (s[0] == ']') { 2211cb0ef41Sopenharmony_ci AddMatch(id + 24 * n, l + 1, l, matches); 2221cb0ef41Sopenharmony_ci } else if (s[0] == '\'') { 2231cb0ef41Sopenharmony_ci AddMatch(id + 36 * n, l + 1, l, matches); 2241cb0ef41Sopenharmony_ci } else if (s[0] == ':') { 2251cb0ef41Sopenharmony_ci AddMatch(id + 51 * n, l + 1, l, matches); 2261cb0ef41Sopenharmony_ci } else if (s[0] == '(') { 2271cb0ef41Sopenharmony_ci AddMatch(id + 57 * n, l + 1, l, matches); 2281cb0ef41Sopenharmony_ci } else if (s[0] == '=') { 2291cb0ef41Sopenharmony_ci if (s[1] == '"') { 2301cb0ef41Sopenharmony_ci AddMatch(id + 70 * n, l + 2, l, matches); 2311cb0ef41Sopenharmony_ci } else if (s[1] == '\'') { 2321cb0ef41Sopenharmony_ci AddMatch(id + 86 * n, l + 2, l, matches); 2331cb0ef41Sopenharmony_ci } 2341cb0ef41Sopenharmony_ci } else if (s[0] == 'a') { 2351cb0ef41Sopenharmony_ci if (s[1] == 'l' && s[2] == ' ') { 2361cb0ef41Sopenharmony_ci AddMatch(id + 84 * n, l + 3, l, matches); 2371cb0ef41Sopenharmony_ci } 2381cb0ef41Sopenharmony_ci } else if (s[0] == 'e') { 2391cb0ef41Sopenharmony_ci if (s[1] == 'd') { 2401cb0ef41Sopenharmony_ci if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches); 2411cb0ef41Sopenharmony_ci } else if (s[1] == 'r') { 2421cb0ef41Sopenharmony_ci if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches); 2431cb0ef41Sopenharmony_ci } else if (s[1] == 's') { 2441cb0ef41Sopenharmony_ci if (s[2] == 't' && s[3] == ' ') { 2451cb0ef41Sopenharmony_ci AddMatch(id + 95 * n, l + 4, l, matches); 2461cb0ef41Sopenharmony_ci } 2471cb0ef41Sopenharmony_ci } 2481cb0ef41Sopenharmony_ci } else if (s[0] == 'f') { 2491cb0ef41Sopenharmony_ci if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') { 2501cb0ef41Sopenharmony_ci AddMatch(id + 90 * n, l + 4, l, matches); 2511cb0ef41Sopenharmony_ci } 2521cb0ef41Sopenharmony_ci } else if (s[0] == 'i') { 2531cb0ef41Sopenharmony_ci if (s[1] == 'v') { 2541cb0ef41Sopenharmony_ci if (s[2] == 'e' && s[3] == ' ') { 2551cb0ef41Sopenharmony_ci AddMatch(id + 92 * n, l + 4, l, matches); 2561cb0ef41Sopenharmony_ci } 2571cb0ef41Sopenharmony_ci } else if (s[1] == 'z') { 2581cb0ef41Sopenharmony_ci if (s[2] == 'e' && s[3] == ' ') { 2591cb0ef41Sopenharmony_ci AddMatch(id + 100 * n, l + 4, l, matches); 2601cb0ef41Sopenharmony_ci } 2611cb0ef41Sopenharmony_ci } 2621cb0ef41Sopenharmony_ci } else if (s[0] == 'l') { 2631cb0ef41Sopenharmony_ci if (s[1] == 'e') { 2641cb0ef41Sopenharmony_ci if (s[2] == 's' && s[3] == 's' && s[4] == ' ') { 2651cb0ef41Sopenharmony_ci AddMatch(id + 93 * n, l + 5, l, matches); 2661cb0ef41Sopenharmony_ci } 2671cb0ef41Sopenharmony_ci } else if (s[1] == 'y') { 2681cb0ef41Sopenharmony_ci if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches); 2691cb0ef41Sopenharmony_ci } 2701cb0ef41Sopenharmony_ci } else if (s[0] == 'o') { 2711cb0ef41Sopenharmony_ci if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') { 2721cb0ef41Sopenharmony_ci AddMatch(id + 106 * n, l + 4, l, matches); 2731cb0ef41Sopenharmony_ci } 2741cb0ef41Sopenharmony_ci } 2751cb0ef41Sopenharmony_ci } else { 2761cb0ef41Sopenharmony_ci /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and 2771cb0ef41Sopenharmony_ci is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL) 2781cb0ef41Sopenharmony_ci transform. */ 2791cb0ef41Sopenharmony_ci const BROTLI_BOOL is_all_caps = 2801cb0ef41Sopenharmony_ci TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST); 2811cb0ef41Sopenharmony_ci const uint8_t* s; 2821cb0ef41Sopenharmony_ci if (!IsMatch(dictionary->words, w, data, max_length)) { 2831cb0ef41Sopenharmony_ci continue; 2841cb0ef41Sopenharmony_ci } 2851cb0ef41Sopenharmony_ci /* Transform "" + kUppercase{First,All} + "" */ 2861cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches); 2871cb0ef41Sopenharmony_ci has_found_match = BROTLI_TRUE; 2881cb0ef41Sopenharmony_ci if (l + 1 >= max_length) { 2891cb0ef41Sopenharmony_ci continue; 2901cb0ef41Sopenharmony_ci } 2911cb0ef41Sopenharmony_ci /* Transforms "" + kUppercase{First,All} + <suffix> */ 2921cb0ef41Sopenharmony_ci s = &data[l]; 2931cb0ef41Sopenharmony_ci if (s[0] == ' ') { 2941cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches); 2951cb0ef41Sopenharmony_ci } else if (s[0] == '"') { 2961cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches); 2971cb0ef41Sopenharmony_ci if (s[1] == '>') { 2981cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches); 2991cb0ef41Sopenharmony_ci } 3001cb0ef41Sopenharmony_ci } else if (s[0] == '.') { 3011cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches); 3021cb0ef41Sopenharmony_ci if (s[1] == ' ') { 3031cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches); 3041cb0ef41Sopenharmony_ci } 3051cb0ef41Sopenharmony_ci } else if (s[0] == ',') { 3061cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches); 3071cb0ef41Sopenharmony_ci if (s[1] == ' ') { 3081cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches); 3091cb0ef41Sopenharmony_ci } 3101cb0ef41Sopenharmony_ci } else if (s[0] == '\'') { 3111cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches); 3121cb0ef41Sopenharmony_ci } else if (s[0] == '(') { 3131cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches); 3141cb0ef41Sopenharmony_ci } else if (s[0] == '=') { 3151cb0ef41Sopenharmony_ci if (s[1] == '"') { 3161cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches); 3171cb0ef41Sopenharmony_ci } else if (s[1] == '\'') { 3181cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches); 3191cb0ef41Sopenharmony_ci } 3201cb0ef41Sopenharmony_ci } 3211cb0ef41Sopenharmony_ci } 3221cb0ef41Sopenharmony_ci } 3231cb0ef41Sopenharmony_ci } 3241cb0ef41Sopenharmony_ci /* Transforms with prefixes " " and "." */ 3251cb0ef41Sopenharmony_ci if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) { 3261cb0ef41Sopenharmony_ci BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' '); 3271cb0ef41Sopenharmony_ci size_t offset = dictionary->buckets[Hash(&data[1])]; 3281cb0ef41Sopenharmony_ci BROTLI_BOOL end = !offset; 3291cb0ef41Sopenharmony_ci while (!end) { 3301cb0ef41Sopenharmony_ci DictWord w = dictionary->dict_words[offset++]; 3311cb0ef41Sopenharmony_ci const size_t l = w.len & 0x1F; 3321cb0ef41Sopenharmony_ci const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; 3331cb0ef41Sopenharmony_ci const size_t id = w.idx; 3341cb0ef41Sopenharmony_ci end = !!(w.len & 0x80); 3351cb0ef41Sopenharmony_ci w.len = (uint8_t)l; 3361cb0ef41Sopenharmony_ci if (w.transform == 0) { 3371cb0ef41Sopenharmony_ci const uint8_t* s; 3381cb0ef41Sopenharmony_ci if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) { 3391cb0ef41Sopenharmony_ci continue; 3401cb0ef41Sopenharmony_ci } 3411cb0ef41Sopenharmony_ci /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and 3421cb0ef41Sopenharmony_ci "." + BROTLI_TRANSFORM_IDENTITY + "" */ 3431cb0ef41Sopenharmony_ci AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches); 3441cb0ef41Sopenharmony_ci has_found_match = BROTLI_TRUE; 3451cb0ef41Sopenharmony_ci if (l + 2 >= max_length) { 3461cb0ef41Sopenharmony_ci continue; 3471cb0ef41Sopenharmony_ci } 3481cb0ef41Sopenharmony_ci /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and 3491cb0ef41Sopenharmony_ci "." + BROTLI_TRANSFORM_IDENTITY + <suffix> 3501cb0ef41Sopenharmony_ci */ 3511cb0ef41Sopenharmony_ci s = &data[l + 1]; 3521cb0ef41Sopenharmony_ci if (s[0] == ' ') { 3531cb0ef41Sopenharmony_ci AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches); 3541cb0ef41Sopenharmony_ci } else if (s[0] == '(') { 3551cb0ef41Sopenharmony_ci AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches); 3561cb0ef41Sopenharmony_ci } else if (is_space) { 3571cb0ef41Sopenharmony_ci if (s[0] == ',') { 3581cb0ef41Sopenharmony_ci AddMatch(id + 103 * n, l + 2, l, matches); 3591cb0ef41Sopenharmony_ci if (s[1] == ' ') { 3601cb0ef41Sopenharmony_ci AddMatch(id + 33 * n, l + 3, l, matches); 3611cb0ef41Sopenharmony_ci } 3621cb0ef41Sopenharmony_ci } else if (s[0] == '.') { 3631cb0ef41Sopenharmony_ci AddMatch(id + 71 * n, l + 2, l, matches); 3641cb0ef41Sopenharmony_ci if (s[1] == ' ') { 3651cb0ef41Sopenharmony_ci AddMatch(id + 52 * n, l + 3, l, matches); 3661cb0ef41Sopenharmony_ci } 3671cb0ef41Sopenharmony_ci } else if (s[0] == '=') { 3681cb0ef41Sopenharmony_ci if (s[1] == '"') { 3691cb0ef41Sopenharmony_ci AddMatch(id + 81 * n, l + 3, l, matches); 3701cb0ef41Sopenharmony_ci } else if (s[1] == '\'') { 3711cb0ef41Sopenharmony_ci AddMatch(id + 98 * n, l + 3, l, matches); 3721cb0ef41Sopenharmony_ci } 3731cb0ef41Sopenharmony_ci } 3741cb0ef41Sopenharmony_ci } 3751cb0ef41Sopenharmony_ci } else if (is_space) { 3761cb0ef41Sopenharmony_ci /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and 3771cb0ef41Sopenharmony_ci is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL) 3781cb0ef41Sopenharmony_ci transform. */ 3791cb0ef41Sopenharmony_ci const BROTLI_BOOL is_all_caps = 3801cb0ef41Sopenharmony_ci TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST); 3811cb0ef41Sopenharmony_ci const uint8_t* s; 3821cb0ef41Sopenharmony_ci if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) { 3831cb0ef41Sopenharmony_ci continue; 3841cb0ef41Sopenharmony_ci } 3851cb0ef41Sopenharmony_ci /* Transforms " " + kUppercase{First,All} + "" */ 3861cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches); 3871cb0ef41Sopenharmony_ci has_found_match = BROTLI_TRUE; 3881cb0ef41Sopenharmony_ci if (l + 2 >= max_length) { 3891cb0ef41Sopenharmony_ci continue; 3901cb0ef41Sopenharmony_ci } 3911cb0ef41Sopenharmony_ci /* Transforms " " + kUppercase{First,All} + <suffix> */ 3921cb0ef41Sopenharmony_ci s = &data[l + 1]; 3931cb0ef41Sopenharmony_ci if (s[0] == ' ') { 3941cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches); 3951cb0ef41Sopenharmony_ci } else if (s[0] == ',') { 3961cb0ef41Sopenharmony_ci if (!is_all_caps) { 3971cb0ef41Sopenharmony_ci AddMatch(id + 109 * n, l + 2, l, matches); 3981cb0ef41Sopenharmony_ci } 3991cb0ef41Sopenharmony_ci if (s[1] == ' ') { 4001cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches); 4011cb0ef41Sopenharmony_ci } 4021cb0ef41Sopenharmony_ci } else if (s[0] == '.') { 4031cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches); 4041cb0ef41Sopenharmony_ci if (s[1] == ' ') { 4051cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches); 4061cb0ef41Sopenharmony_ci } 4071cb0ef41Sopenharmony_ci } else if (s[0] == '=') { 4081cb0ef41Sopenharmony_ci if (s[1] == '"') { 4091cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches); 4101cb0ef41Sopenharmony_ci } else if (s[1] == '\'') { 4111cb0ef41Sopenharmony_ci AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches); 4121cb0ef41Sopenharmony_ci } 4131cb0ef41Sopenharmony_ci } 4141cb0ef41Sopenharmony_ci } 4151cb0ef41Sopenharmony_ci } 4161cb0ef41Sopenharmony_ci } 4171cb0ef41Sopenharmony_ci if (max_length >= 6) { 4181cb0ef41Sopenharmony_ci /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */ 4191cb0ef41Sopenharmony_ci if ((data[1] == ' ' && 4201cb0ef41Sopenharmony_ci (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || 4211cb0ef41Sopenharmony_ci (data[0] == 0xC2 && data[1] == 0xA0)) { 4221cb0ef41Sopenharmony_ci size_t offset = dictionary->buckets[Hash(&data[2])]; 4231cb0ef41Sopenharmony_ci BROTLI_BOOL end = !offset; 4241cb0ef41Sopenharmony_ci while (!end) { 4251cb0ef41Sopenharmony_ci DictWord w = dictionary->dict_words[offset++]; 4261cb0ef41Sopenharmony_ci const size_t l = w.len & 0x1F; 4271cb0ef41Sopenharmony_ci const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; 4281cb0ef41Sopenharmony_ci const size_t id = w.idx; 4291cb0ef41Sopenharmony_ci end = !!(w.len & 0x80); 4301cb0ef41Sopenharmony_ci w.len = (uint8_t)l; 4311cb0ef41Sopenharmony_ci if (w.transform == 0 && 4321cb0ef41Sopenharmony_ci IsMatch(dictionary->words, w, &data[2], max_length - 2)) { 4331cb0ef41Sopenharmony_ci if (data[0] == 0xC2) { 4341cb0ef41Sopenharmony_ci AddMatch(id + 102 * n, l + 2, l, matches); 4351cb0ef41Sopenharmony_ci has_found_match = BROTLI_TRUE; 4361cb0ef41Sopenharmony_ci } else if (l + 2 < max_length && data[l + 2] == ' ') { 4371cb0ef41Sopenharmony_ci size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13); 4381cb0ef41Sopenharmony_ci AddMatch(id + t * n, l + 3, l, matches); 4391cb0ef41Sopenharmony_ci has_found_match = BROTLI_TRUE; 4401cb0ef41Sopenharmony_ci } 4411cb0ef41Sopenharmony_ci } 4421cb0ef41Sopenharmony_ci } 4431cb0ef41Sopenharmony_ci } 4441cb0ef41Sopenharmony_ci } 4451cb0ef41Sopenharmony_ci if (max_length >= 9) { 4461cb0ef41Sopenharmony_ci /* Transforms with prefixes " the " and ".com/" */ 4471cb0ef41Sopenharmony_ci if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' && 4481cb0ef41Sopenharmony_ci data[3] == 'e' && data[4] == ' ') || 4491cb0ef41Sopenharmony_ci (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && 4501cb0ef41Sopenharmony_ci data[3] == 'm' && data[4] == '/')) { 4511cb0ef41Sopenharmony_ci size_t offset = dictionary->buckets[Hash(&data[5])]; 4521cb0ef41Sopenharmony_ci BROTLI_BOOL end = !offset; 4531cb0ef41Sopenharmony_ci while (!end) { 4541cb0ef41Sopenharmony_ci DictWord w = dictionary->dict_words[offset++]; 4551cb0ef41Sopenharmony_ci const size_t l = w.len & 0x1F; 4561cb0ef41Sopenharmony_ci const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; 4571cb0ef41Sopenharmony_ci const size_t id = w.idx; 4581cb0ef41Sopenharmony_ci end = !!(w.len & 0x80); 4591cb0ef41Sopenharmony_ci w.len = (uint8_t)l; 4601cb0ef41Sopenharmony_ci if (w.transform == 0 && 4611cb0ef41Sopenharmony_ci IsMatch(dictionary->words, w, &data[5], max_length - 5)) { 4621cb0ef41Sopenharmony_ci AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches); 4631cb0ef41Sopenharmony_ci has_found_match = BROTLI_TRUE; 4641cb0ef41Sopenharmony_ci if (l + 5 < max_length) { 4651cb0ef41Sopenharmony_ci const uint8_t* s = &data[l + 5]; 4661cb0ef41Sopenharmony_ci if (data[0] == ' ') { 4671cb0ef41Sopenharmony_ci if (l + 8 < max_length && 4681cb0ef41Sopenharmony_ci s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') { 4691cb0ef41Sopenharmony_ci AddMatch(id + 62 * n, l + 9, l, matches); 4701cb0ef41Sopenharmony_ci if (l + 12 < max_length && 4711cb0ef41Sopenharmony_ci s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') { 4721cb0ef41Sopenharmony_ci AddMatch(id + 73 * n, l + 13, l, matches); 4731cb0ef41Sopenharmony_ci } 4741cb0ef41Sopenharmony_ci } 4751cb0ef41Sopenharmony_ci } 4761cb0ef41Sopenharmony_ci } 4771cb0ef41Sopenharmony_ci } 4781cb0ef41Sopenharmony_ci } 4791cb0ef41Sopenharmony_ci } 4801cb0ef41Sopenharmony_ci } 4811cb0ef41Sopenharmony_ci return has_found_match; 4821cb0ef41Sopenharmony_ci} 4831cb0ef41Sopenharmony_ci 4841cb0ef41Sopenharmony_ci#if defined(__cplusplus) || defined(c_plusplus) 4851cb0ef41Sopenharmony_ci} /* extern "C" */ 4861cb0ef41Sopenharmony_ci#endif 487