11cb0ef41Sopenharmony_ci/* Copyright 2013 Google Inc. All Rights Reserved.
21cb0ef41Sopenharmony_ci
31cb0ef41Sopenharmony_ci   Distributed under MIT license.
41cb0ef41Sopenharmony_ci   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
51cb0ef41Sopenharmony_ci*/
61cb0ef41Sopenharmony_ci
71cb0ef41Sopenharmony_ci#include "./static_dict.h"
81cb0ef41Sopenharmony_ci
91cb0ef41Sopenharmony_ci#include "../common/dictionary.h"
101cb0ef41Sopenharmony_ci#include "../common/platform.h"
111cb0ef41Sopenharmony_ci#include "../common/transform.h"
121cb0ef41Sopenharmony_ci#include "./encoder_dict.h"
131cb0ef41Sopenharmony_ci#include "./find_match_length.h"
141cb0ef41Sopenharmony_ci
151cb0ef41Sopenharmony_ci#if defined(__cplusplus) || defined(c_plusplus)
161cb0ef41Sopenharmony_ciextern "C" {
171cb0ef41Sopenharmony_ci#endif
181cb0ef41Sopenharmony_ci
191cb0ef41Sopenharmony_cistatic BROTLI_INLINE uint32_t Hash(const uint8_t* data) {
201cb0ef41Sopenharmony_ci  uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;
211cb0ef41Sopenharmony_ci  /* The higher bits contain more mixture from the multiplication,
221cb0ef41Sopenharmony_ci     so we take our results from there. */
231cb0ef41Sopenharmony_ci  return h >> (32 - kDictNumBits);
241cb0ef41Sopenharmony_ci}
251cb0ef41Sopenharmony_ci
261cb0ef41Sopenharmony_cistatic BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
271cb0ef41Sopenharmony_ci                                   uint32_t* matches) {
281cb0ef41Sopenharmony_ci  uint32_t match = (uint32_t)((distance << 5) + len_code);
291cb0ef41Sopenharmony_ci  matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
301cb0ef41Sopenharmony_ci}
311cb0ef41Sopenharmony_ci
321cb0ef41Sopenharmony_cistatic BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary,
331cb0ef41Sopenharmony_ci                                            const uint8_t* data,
341cb0ef41Sopenharmony_ci                                            size_t id,
351cb0ef41Sopenharmony_ci                                            size_t len,
361cb0ef41Sopenharmony_ci                                            size_t maxlen) {
371cb0ef41Sopenharmony_ci  const size_t offset = dictionary->offsets_by_length[len] + len * id;
381cb0ef41Sopenharmony_ci  return FindMatchLengthWithLimit(&dictionary->data[offset], data,
391cb0ef41Sopenharmony_ci                                  BROTLI_MIN(size_t, len, maxlen));
401cb0ef41Sopenharmony_ci}
411cb0ef41Sopenharmony_ci
421cb0ef41Sopenharmony_cistatic BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
431cb0ef41Sopenharmony_ci    DictWord w, const uint8_t* data, size_t max_length) {
441cb0ef41Sopenharmony_ci  if (w.len > max_length) {
451cb0ef41Sopenharmony_ci    return BROTLI_FALSE;
461cb0ef41Sopenharmony_ci  } else {
471cb0ef41Sopenharmony_ci    const size_t offset = dictionary->offsets_by_length[w.len] +
481cb0ef41Sopenharmony_ci        (size_t)w.len * (size_t)w.idx;
491cb0ef41Sopenharmony_ci    const uint8_t* dict = &dictionary->data[offset];
501cb0ef41Sopenharmony_ci    if (w.transform == 0) {
511cb0ef41Sopenharmony_ci      /* Match against base dictionary word. */
521cb0ef41Sopenharmony_ci      return
531cb0ef41Sopenharmony_ci          TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);
541cb0ef41Sopenharmony_ci    } else if (w.transform == 10) {
551cb0ef41Sopenharmony_ci      /* Match against uppercase first transform.
561cb0ef41Sopenharmony_ci         Note that there are only ASCII uppercase words in the lookup table. */
571cb0ef41Sopenharmony_ci      return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&
581cb0ef41Sopenharmony_ci              (dict[0] ^ 32) == data[0] &&
591cb0ef41Sopenharmony_ci              FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
601cb0ef41Sopenharmony_ci              w.len - 1u);
611cb0ef41Sopenharmony_ci    } else {
621cb0ef41Sopenharmony_ci      /* Match against uppercase all transform.
631cb0ef41Sopenharmony_ci         Note that there are only ASCII uppercase words in the lookup table. */
641cb0ef41Sopenharmony_ci      size_t i;
651cb0ef41Sopenharmony_ci      for (i = 0; i < w.len; ++i) {
661cb0ef41Sopenharmony_ci        if (dict[i] >= 'a' && dict[i] <= 'z') {
671cb0ef41Sopenharmony_ci          if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;
681cb0ef41Sopenharmony_ci        } else {
691cb0ef41Sopenharmony_ci          if (dict[i] != data[i]) return BROTLI_FALSE;
701cb0ef41Sopenharmony_ci        }
711cb0ef41Sopenharmony_ci      }
721cb0ef41Sopenharmony_ci      return BROTLI_TRUE;
731cb0ef41Sopenharmony_ci    }
741cb0ef41Sopenharmony_ci  }
751cb0ef41Sopenharmony_ci}
761cb0ef41Sopenharmony_ci
771cb0ef41Sopenharmony_ciBROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
781cb0ef41Sopenharmony_ci    const BrotliEncoderDictionary* dictionary, const uint8_t* data,
791cb0ef41Sopenharmony_ci    size_t min_length, size_t max_length, uint32_t* matches) {
801cb0ef41Sopenharmony_ci  BROTLI_BOOL has_found_match = BROTLI_FALSE;
811cb0ef41Sopenharmony_ci  {
821cb0ef41Sopenharmony_ci    size_t offset = dictionary->buckets[Hash(data)];
831cb0ef41Sopenharmony_ci    BROTLI_BOOL end = !offset;
841cb0ef41Sopenharmony_ci    while (!end) {
851cb0ef41Sopenharmony_ci      DictWord w = dictionary->dict_words[offset++];
861cb0ef41Sopenharmony_ci      const size_t l = w.len & 0x1F;
871cb0ef41Sopenharmony_ci      const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
881cb0ef41Sopenharmony_ci      const size_t id = w.idx;
891cb0ef41Sopenharmony_ci      end = !!(w.len & 0x80);
901cb0ef41Sopenharmony_ci      w.len = (uint8_t)l;
911cb0ef41Sopenharmony_ci      if (w.transform == 0) {
921cb0ef41Sopenharmony_ci        const size_t matchlen =
931cb0ef41Sopenharmony_ci            DictMatchLength(dictionary->words, data, id, l, max_length);
941cb0ef41Sopenharmony_ci        const uint8_t* s;
951cb0ef41Sopenharmony_ci        size_t minlen;
961cb0ef41Sopenharmony_ci        size_t maxlen;
971cb0ef41Sopenharmony_ci        size_t len;
981cb0ef41Sopenharmony_ci        /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
991cb0ef41Sopenharmony_ci        if (matchlen == l) {
1001cb0ef41Sopenharmony_ci          AddMatch(id, l, l, matches);
1011cb0ef41Sopenharmony_ci          has_found_match = BROTLI_TRUE;
1021cb0ef41Sopenharmony_ci        }
1031cb0ef41Sopenharmony_ci        /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
1041cb0ef41Sopenharmony_ci                      "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
1051cb0ef41Sopenharmony_ci        if (matchlen >= l - 1) {
1061cb0ef41Sopenharmony_ci          AddMatch(id + 12 * n, l - 1, l, matches);
1071cb0ef41Sopenharmony_ci          if (l + 2 < max_length &&
1081cb0ef41Sopenharmony_ci              data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
1091cb0ef41Sopenharmony_ci              data[l + 2] == ' ') {
1101cb0ef41Sopenharmony_ci            AddMatch(id + 49 * n, l + 3, l, matches);
1111cb0ef41Sopenharmony_ci          }
1121cb0ef41Sopenharmony_ci          has_found_match = BROTLI_TRUE;
1131cb0ef41Sopenharmony_ci        }
1141cb0ef41Sopenharmony_ci        /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
1151cb0ef41Sopenharmony_ci        minlen = min_length;
1161cb0ef41Sopenharmony_ci        if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
1171cb0ef41Sopenharmony_ci        maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
1181cb0ef41Sopenharmony_ci        for (len = minlen; len <= maxlen; ++len) {
1191cb0ef41Sopenharmony_ci          size_t cut = l - len;
1201cb0ef41Sopenharmony_ci          size_t transform_id = (cut << 2) +
1211cb0ef41Sopenharmony_ci              (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
1221cb0ef41Sopenharmony_ci          AddMatch(id + transform_id * n, len, l, matches);
1231cb0ef41Sopenharmony_ci          has_found_match = BROTLI_TRUE;
1241cb0ef41Sopenharmony_ci        }
1251cb0ef41Sopenharmony_ci        if (matchlen < l || l + 6 >= max_length) {
1261cb0ef41Sopenharmony_ci          continue;
1271cb0ef41Sopenharmony_ci        }
1281cb0ef41Sopenharmony_ci        s = &data[l];
1291cb0ef41Sopenharmony_ci        /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
1301cb0ef41Sopenharmony_ci        if (s[0] == ' ') {
1311cb0ef41Sopenharmony_ci          AddMatch(id + n, l + 1, l, matches);
1321cb0ef41Sopenharmony_ci          if (s[1] == 'a') {
1331cb0ef41Sopenharmony_ci            if (s[2] == ' ') {
1341cb0ef41Sopenharmony_ci              AddMatch(id + 28 * n, l + 3, l, matches);
1351cb0ef41Sopenharmony_ci            } else if (s[2] == 's') {
1361cb0ef41Sopenharmony_ci              if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
1371cb0ef41Sopenharmony_ci            } else if (s[2] == 't') {
1381cb0ef41Sopenharmony_ci              if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
1391cb0ef41Sopenharmony_ci            } else if (s[2] == 'n') {
1401cb0ef41Sopenharmony_ci              if (s[3] == 'd' && s[4] == ' ') {
1411cb0ef41Sopenharmony_ci                AddMatch(id + 10 * n, l + 5, l, matches);
1421cb0ef41Sopenharmony_ci              }
1431cb0ef41Sopenharmony_ci            }
1441cb0ef41Sopenharmony_ci          } else if (s[1] == 'b') {
1451cb0ef41Sopenharmony_ci            if (s[2] == 'y' && s[3] == ' ') {
1461cb0ef41Sopenharmony_ci              AddMatch(id + 38 * n, l + 4, l, matches);
1471cb0ef41Sopenharmony_ci            }
1481cb0ef41Sopenharmony_ci          } else if (s[1] == 'i') {
1491cb0ef41Sopenharmony_ci            if (s[2] == 'n') {
1501cb0ef41Sopenharmony_ci              if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
1511cb0ef41Sopenharmony_ci            } else if (s[2] == 's') {
1521cb0ef41Sopenharmony_ci              if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
1531cb0ef41Sopenharmony_ci            }
1541cb0ef41Sopenharmony_ci          } else if (s[1] == 'f') {
1551cb0ef41Sopenharmony_ci            if (s[2] == 'o') {
1561cb0ef41Sopenharmony_ci              if (s[3] == 'r' && s[4] == ' ') {
1571cb0ef41Sopenharmony_ci                AddMatch(id + 25 * n, l + 5, l, matches);
1581cb0ef41Sopenharmony_ci              }
1591cb0ef41Sopenharmony_ci            } else if (s[2] == 'r') {
1601cb0ef41Sopenharmony_ci              if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
1611cb0ef41Sopenharmony_ci                AddMatch(id + 37 * n, l + 6, l, matches);
1621cb0ef41Sopenharmony_ci              }
1631cb0ef41Sopenharmony_ci            }
1641cb0ef41Sopenharmony_ci          } else if (s[1] == 'o') {
1651cb0ef41Sopenharmony_ci            if (s[2] == 'f') {
1661cb0ef41Sopenharmony_ci              if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
1671cb0ef41Sopenharmony_ci            } else if (s[2] == 'n') {
1681cb0ef41Sopenharmony_ci              if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
1691cb0ef41Sopenharmony_ci            }
1701cb0ef41Sopenharmony_ci          } else if (s[1] == 'n') {
1711cb0ef41Sopenharmony_ci            if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
1721cb0ef41Sopenharmony_ci              AddMatch(id + 80 * n, l + 5, l, matches);
1731cb0ef41Sopenharmony_ci            }
1741cb0ef41Sopenharmony_ci          } else if (s[1] == 't') {
1751cb0ef41Sopenharmony_ci            if (s[2] == 'h') {
1761cb0ef41Sopenharmony_ci              if (s[3] == 'e') {
1771cb0ef41Sopenharmony_ci                if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
1781cb0ef41Sopenharmony_ci              } else if (s[3] == 'a') {
1791cb0ef41Sopenharmony_ci                if (s[4] == 't' && s[5] == ' ') {
1801cb0ef41Sopenharmony_ci                  AddMatch(id + 29 * n, l + 6, l, matches);
1811cb0ef41Sopenharmony_ci                }
1821cb0ef41Sopenharmony_ci              }
1831cb0ef41Sopenharmony_ci            } else if (s[2] == 'o') {
1841cb0ef41Sopenharmony_ci              if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
1851cb0ef41Sopenharmony_ci            }
1861cb0ef41Sopenharmony_ci          } else if (s[1] == 'w') {
1871cb0ef41Sopenharmony_ci            if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
1881cb0ef41Sopenharmony_ci              AddMatch(id + 35 * n, l + 6, l, matches);
1891cb0ef41Sopenharmony_ci            }
1901cb0ef41Sopenharmony_ci          }
1911cb0ef41Sopenharmony_ci        } else if (s[0] == '"') {
1921cb0ef41Sopenharmony_ci          AddMatch(id + 19 * n, l + 1, l, matches);
1931cb0ef41Sopenharmony_ci          if (s[1] == '>') {
1941cb0ef41Sopenharmony_ci            AddMatch(id + 21 * n, l + 2, l, matches);
1951cb0ef41Sopenharmony_ci          }
1961cb0ef41Sopenharmony_ci        } else if (s[0] == '.') {
1971cb0ef41Sopenharmony_ci          AddMatch(id + 20 * n, l + 1, l, matches);
1981cb0ef41Sopenharmony_ci          if (s[1] == ' ') {
1991cb0ef41Sopenharmony_ci            AddMatch(id + 31 * n, l + 2, l, matches);
2001cb0ef41Sopenharmony_ci            if (s[2] == 'T' && s[3] == 'h') {
2011cb0ef41Sopenharmony_ci              if (s[4] == 'e') {
2021cb0ef41Sopenharmony_ci                if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
2031cb0ef41Sopenharmony_ci              } else if (s[4] == 'i') {
2041cb0ef41Sopenharmony_ci                if (s[5] == 's' && s[6] == ' ') {
2051cb0ef41Sopenharmony_ci                  AddMatch(id + 75 * n, l + 7, l, matches);
2061cb0ef41Sopenharmony_ci                }
2071cb0ef41Sopenharmony_ci              }
2081cb0ef41Sopenharmony_ci            }
2091cb0ef41Sopenharmony_ci          }
2101cb0ef41Sopenharmony_ci        } else if (s[0] == ',') {
2111cb0ef41Sopenharmony_ci          AddMatch(id + 76 * n, l + 1, l, matches);
2121cb0ef41Sopenharmony_ci          if (s[1] == ' ') {
2131cb0ef41Sopenharmony_ci            AddMatch(id + 14 * n, l + 2, l, matches);
2141cb0ef41Sopenharmony_ci          }
2151cb0ef41Sopenharmony_ci        } else if (s[0] == '\n') {
2161cb0ef41Sopenharmony_ci          AddMatch(id + 22 * n, l + 1, l, matches);
2171cb0ef41Sopenharmony_ci          if (s[1] == '\t') {
2181cb0ef41Sopenharmony_ci            AddMatch(id + 50 * n, l + 2, l, matches);
2191cb0ef41Sopenharmony_ci          }
2201cb0ef41Sopenharmony_ci        } else if (s[0] == ']') {
2211cb0ef41Sopenharmony_ci          AddMatch(id + 24 * n, l + 1, l, matches);
2221cb0ef41Sopenharmony_ci        } else if (s[0] == '\'') {
2231cb0ef41Sopenharmony_ci          AddMatch(id + 36 * n, l + 1, l, matches);
2241cb0ef41Sopenharmony_ci        } else if (s[0] == ':') {
2251cb0ef41Sopenharmony_ci          AddMatch(id + 51 * n, l + 1, l, matches);
2261cb0ef41Sopenharmony_ci        } else if (s[0] == '(') {
2271cb0ef41Sopenharmony_ci          AddMatch(id + 57 * n, l + 1, l, matches);
2281cb0ef41Sopenharmony_ci        } else if (s[0] == '=') {
2291cb0ef41Sopenharmony_ci          if (s[1] == '"') {
2301cb0ef41Sopenharmony_ci            AddMatch(id + 70 * n, l + 2, l, matches);
2311cb0ef41Sopenharmony_ci          } else if (s[1] == '\'') {
2321cb0ef41Sopenharmony_ci            AddMatch(id + 86 * n, l + 2, l, matches);
2331cb0ef41Sopenharmony_ci          }
2341cb0ef41Sopenharmony_ci        } else if (s[0] == 'a') {
2351cb0ef41Sopenharmony_ci          if (s[1] == 'l' && s[2] == ' ') {
2361cb0ef41Sopenharmony_ci            AddMatch(id + 84 * n, l + 3, l, matches);
2371cb0ef41Sopenharmony_ci          }
2381cb0ef41Sopenharmony_ci        } else if (s[0] == 'e') {
2391cb0ef41Sopenharmony_ci          if (s[1] == 'd') {
2401cb0ef41Sopenharmony_ci            if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
2411cb0ef41Sopenharmony_ci          } else if (s[1] == 'r') {
2421cb0ef41Sopenharmony_ci            if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
2431cb0ef41Sopenharmony_ci          } else if (s[1] == 's') {
2441cb0ef41Sopenharmony_ci            if (s[2] == 't' && s[3] == ' ') {
2451cb0ef41Sopenharmony_ci              AddMatch(id + 95 * n, l + 4, l, matches);
2461cb0ef41Sopenharmony_ci            }
2471cb0ef41Sopenharmony_ci          }
2481cb0ef41Sopenharmony_ci        } else if (s[0] == 'f') {
2491cb0ef41Sopenharmony_ci          if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
2501cb0ef41Sopenharmony_ci            AddMatch(id + 90 * n, l + 4, l, matches);
2511cb0ef41Sopenharmony_ci          }
2521cb0ef41Sopenharmony_ci        } else if (s[0] == 'i') {
2531cb0ef41Sopenharmony_ci          if (s[1] == 'v') {
2541cb0ef41Sopenharmony_ci            if (s[2] == 'e' && s[3] == ' ') {
2551cb0ef41Sopenharmony_ci              AddMatch(id + 92 * n, l + 4, l, matches);
2561cb0ef41Sopenharmony_ci            }
2571cb0ef41Sopenharmony_ci          } else if (s[1] == 'z') {
2581cb0ef41Sopenharmony_ci            if (s[2] == 'e' && s[3] == ' ') {
2591cb0ef41Sopenharmony_ci              AddMatch(id + 100 * n, l + 4, l, matches);
2601cb0ef41Sopenharmony_ci            }
2611cb0ef41Sopenharmony_ci          }
2621cb0ef41Sopenharmony_ci        } else if (s[0] == 'l') {
2631cb0ef41Sopenharmony_ci          if (s[1] == 'e') {
2641cb0ef41Sopenharmony_ci            if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
2651cb0ef41Sopenharmony_ci              AddMatch(id + 93 * n, l + 5, l, matches);
2661cb0ef41Sopenharmony_ci            }
2671cb0ef41Sopenharmony_ci          } else if (s[1] == 'y') {
2681cb0ef41Sopenharmony_ci            if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
2691cb0ef41Sopenharmony_ci          }
2701cb0ef41Sopenharmony_ci        } else if (s[0] == 'o') {
2711cb0ef41Sopenharmony_ci          if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
2721cb0ef41Sopenharmony_ci            AddMatch(id + 106 * n, l + 4, l, matches);
2731cb0ef41Sopenharmony_ci          }
2741cb0ef41Sopenharmony_ci        }
2751cb0ef41Sopenharmony_ci      } else {
2761cb0ef41Sopenharmony_ci        /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
2771cb0ef41Sopenharmony_ci               is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
2781cb0ef41Sopenharmony_ci           transform. */
2791cb0ef41Sopenharmony_ci        const BROTLI_BOOL is_all_caps =
2801cb0ef41Sopenharmony_ci            TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
2811cb0ef41Sopenharmony_ci        const uint8_t* s;
2821cb0ef41Sopenharmony_ci        if (!IsMatch(dictionary->words, w, data, max_length)) {
2831cb0ef41Sopenharmony_ci          continue;
2841cb0ef41Sopenharmony_ci        }
2851cb0ef41Sopenharmony_ci        /* Transform "" + kUppercase{First,All} + "" */
2861cb0ef41Sopenharmony_ci        AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
2871cb0ef41Sopenharmony_ci        has_found_match = BROTLI_TRUE;
2881cb0ef41Sopenharmony_ci        if (l + 1 >= max_length) {
2891cb0ef41Sopenharmony_ci          continue;
2901cb0ef41Sopenharmony_ci        }
2911cb0ef41Sopenharmony_ci        /* Transforms "" + kUppercase{First,All} + <suffix> */
2921cb0ef41Sopenharmony_ci        s = &data[l];
2931cb0ef41Sopenharmony_ci        if (s[0] == ' ') {
2941cb0ef41Sopenharmony_ci          AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
2951cb0ef41Sopenharmony_ci        } else if (s[0] == '"') {
2961cb0ef41Sopenharmony_ci          AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
2971cb0ef41Sopenharmony_ci          if (s[1] == '>') {
2981cb0ef41Sopenharmony_ci            AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
2991cb0ef41Sopenharmony_ci          }
3001cb0ef41Sopenharmony_ci        } else if (s[0] == '.') {
3011cb0ef41Sopenharmony_ci          AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
3021cb0ef41Sopenharmony_ci          if (s[1] == ' ') {
3031cb0ef41Sopenharmony_ci            AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
3041cb0ef41Sopenharmony_ci          }
3051cb0ef41Sopenharmony_ci        } else if (s[0] == ',') {
3061cb0ef41Sopenharmony_ci          AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
3071cb0ef41Sopenharmony_ci          if (s[1] == ' ') {
3081cb0ef41Sopenharmony_ci            AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
3091cb0ef41Sopenharmony_ci          }
3101cb0ef41Sopenharmony_ci        } else if (s[0] == '\'') {
3111cb0ef41Sopenharmony_ci          AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
3121cb0ef41Sopenharmony_ci        } else if (s[0] == '(') {
3131cb0ef41Sopenharmony_ci          AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
3141cb0ef41Sopenharmony_ci        } else if (s[0] == '=') {
3151cb0ef41Sopenharmony_ci          if (s[1] == '"') {
3161cb0ef41Sopenharmony_ci            AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
3171cb0ef41Sopenharmony_ci          } else if (s[1] == '\'') {
3181cb0ef41Sopenharmony_ci            AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
3191cb0ef41Sopenharmony_ci          }
3201cb0ef41Sopenharmony_ci        }
3211cb0ef41Sopenharmony_ci      }
3221cb0ef41Sopenharmony_ci    }
3231cb0ef41Sopenharmony_ci  }
3241cb0ef41Sopenharmony_ci  /* Transforms with prefixes " " and "." */
3251cb0ef41Sopenharmony_ci  if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
3261cb0ef41Sopenharmony_ci    BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
3271cb0ef41Sopenharmony_ci    size_t offset = dictionary->buckets[Hash(&data[1])];
3281cb0ef41Sopenharmony_ci    BROTLI_BOOL end = !offset;
3291cb0ef41Sopenharmony_ci    while (!end) {
3301cb0ef41Sopenharmony_ci      DictWord w = dictionary->dict_words[offset++];
3311cb0ef41Sopenharmony_ci      const size_t l = w.len & 0x1F;
3321cb0ef41Sopenharmony_ci      const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
3331cb0ef41Sopenharmony_ci      const size_t id = w.idx;
3341cb0ef41Sopenharmony_ci      end = !!(w.len & 0x80);
3351cb0ef41Sopenharmony_ci      w.len = (uint8_t)l;
3361cb0ef41Sopenharmony_ci      if (w.transform == 0) {
3371cb0ef41Sopenharmony_ci        const uint8_t* s;
3381cb0ef41Sopenharmony_ci        if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
3391cb0ef41Sopenharmony_ci          continue;
3401cb0ef41Sopenharmony_ci        }
3411cb0ef41Sopenharmony_ci        /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
3421cb0ef41Sopenharmony_ci                      "." + BROTLI_TRANSFORM_IDENTITY + "" */
3431cb0ef41Sopenharmony_ci        AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
3441cb0ef41Sopenharmony_ci        has_found_match = BROTLI_TRUE;
3451cb0ef41Sopenharmony_ci        if (l + 2 >= max_length) {
3461cb0ef41Sopenharmony_ci          continue;
3471cb0ef41Sopenharmony_ci        }
3481cb0ef41Sopenharmony_ci        /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
3491cb0ef41Sopenharmony_ci                      "." + BROTLI_TRANSFORM_IDENTITY + <suffix>
3501cb0ef41Sopenharmony_ci        */
3511cb0ef41Sopenharmony_ci        s = &data[l + 1];
3521cb0ef41Sopenharmony_ci        if (s[0] == ' ') {
3531cb0ef41Sopenharmony_ci          AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
3541cb0ef41Sopenharmony_ci        } else if (s[0] == '(') {
3551cb0ef41Sopenharmony_ci          AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
3561cb0ef41Sopenharmony_ci        } else if (is_space) {
3571cb0ef41Sopenharmony_ci          if (s[0] == ',') {
3581cb0ef41Sopenharmony_ci            AddMatch(id + 103 * n, l + 2, l, matches);
3591cb0ef41Sopenharmony_ci            if (s[1] == ' ') {
3601cb0ef41Sopenharmony_ci              AddMatch(id + 33 * n, l + 3, l, matches);
3611cb0ef41Sopenharmony_ci            }
3621cb0ef41Sopenharmony_ci          } else if (s[0] == '.') {
3631cb0ef41Sopenharmony_ci            AddMatch(id + 71 * n, l + 2, l, matches);
3641cb0ef41Sopenharmony_ci            if (s[1] == ' ') {
3651cb0ef41Sopenharmony_ci              AddMatch(id + 52 * n, l + 3, l, matches);
3661cb0ef41Sopenharmony_ci            }
3671cb0ef41Sopenharmony_ci          } else if (s[0] == '=') {
3681cb0ef41Sopenharmony_ci            if (s[1] == '"') {
3691cb0ef41Sopenharmony_ci              AddMatch(id + 81 * n, l + 3, l, matches);
3701cb0ef41Sopenharmony_ci            } else if (s[1] == '\'') {
3711cb0ef41Sopenharmony_ci              AddMatch(id + 98 * n, l + 3, l, matches);
3721cb0ef41Sopenharmony_ci            }
3731cb0ef41Sopenharmony_ci          }
3741cb0ef41Sopenharmony_ci        }
3751cb0ef41Sopenharmony_ci      } else if (is_space) {
3761cb0ef41Sopenharmony_ci        /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
3771cb0ef41Sopenharmony_ci               is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
3781cb0ef41Sopenharmony_ci           transform. */
3791cb0ef41Sopenharmony_ci        const BROTLI_BOOL is_all_caps =
3801cb0ef41Sopenharmony_ci            TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
3811cb0ef41Sopenharmony_ci        const uint8_t* s;
3821cb0ef41Sopenharmony_ci        if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
3831cb0ef41Sopenharmony_ci          continue;
3841cb0ef41Sopenharmony_ci        }
3851cb0ef41Sopenharmony_ci        /* Transforms " " + kUppercase{First,All} + "" */
3861cb0ef41Sopenharmony_ci        AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
3871cb0ef41Sopenharmony_ci        has_found_match = BROTLI_TRUE;
3881cb0ef41Sopenharmony_ci        if (l + 2 >= max_length) {
3891cb0ef41Sopenharmony_ci          continue;
3901cb0ef41Sopenharmony_ci        }
3911cb0ef41Sopenharmony_ci        /* Transforms " " + kUppercase{First,All} + <suffix> */
3921cb0ef41Sopenharmony_ci        s = &data[l + 1];
3931cb0ef41Sopenharmony_ci        if (s[0] == ' ') {
3941cb0ef41Sopenharmony_ci          AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
3951cb0ef41Sopenharmony_ci        } else if (s[0] == ',') {
3961cb0ef41Sopenharmony_ci          if (!is_all_caps) {
3971cb0ef41Sopenharmony_ci            AddMatch(id + 109 * n, l + 2, l, matches);
3981cb0ef41Sopenharmony_ci          }
3991cb0ef41Sopenharmony_ci          if (s[1] == ' ') {
4001cb0ef41Sopenharmony_ci            AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
4011cb0ef41Sopenharmony_ci          }
4021cb0ef41Sopenharmony_ci        } else if (s[0] == '.') {
4031cb0ef41Sopenharmony_ci          AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
4041cb0ef41Sopenharmony_ci          if (s[1] == ' ') {
4051cb0ef41Sopenharmony_ci            AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
4061cb0ef41Sopenharmony_ci          }
4071cb0ef41Sopenharmony_ci        } else if (s[0] == '=') {
4081cb0ef41Sopenharmony_ci          if (s[1] == '"') {
4091cb0ef41Sopenharmony_ci            AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
4101cb0ef41Sopenharmony_ci          } else if (s[1] == '\'') {
4111cb0ef41Sopenharmony_ci            AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
4121cb0ef41Sopenharmony_ci          }
4131cb0ef41Sopenharmony_ci        }
4141cb0ef41Sopenharmony_ci      }
4151cb0ef41Sopenharmony_ci    }
4161cb0ef41Sopenharmony_ci  }
4171cb0ef41Sopenharmony_ci  if (max_length >= 6) {
4181cb0ef41Sopenharmony_ci    /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
4191cb0ef41Sopenharmony_ci    if ((data[1] == ' ' &&
4201cb0ef41Sopenharmony_ci         (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
4211cb0ef41Sopenharmony_ci        (data[0] == 0xC2 && data[1] == 0xA0)) {
4221cb0ef41Sopenharmony_ci      size_t offset = dictionary->buckets[Hash(&data[2])];
4231cb0ef41Sopenharmony_ci      BROTLI_BOOL end = !offset;
4241cb0ef41Sopenharmony_ci      while (!end) {
4251cb0ef41Sopenharmony_ci        DictWord w = dictionary->dict_words[offset++];
4261cb0ef41Sopenharmony_ci        const size_t l = w.len & 0x1F;
4271cb0ef41Sopenharmony_ci        const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
4281cb0ef41Sopenharmony_ci        const size_t id = w.idx;
4291cb0ef41Sopenharmony_ci        end = !!(w.len & 0x80);
4301cb0ef41Sopenharmony_ci        w.len = (uint8_t)l;
4311cb0ef41Sopenharmony_ci        if (w.transform == 0 &&
4321cb0ef41Sopenharmony_ci            IsMatch(dictionary->words, w, &data[2], max_length - 2)) {
4331cb0ef41Sopenharmony_ci          if (data[0] == 0xC2) {
4341cb0ef41Sopenharmony_ci            AddMatch(id + 102 * n, l + 2, l, matches);
4351cb0ef41Sopenharmony_ci            has_found_match = BROTLI_TRUE;
4361cb0ef41Sopenharmony_ci          } else if (l + 2 < max_length && data[l + 2] == ' ') {
4371cb0ef41Sopenharmony_ci            size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
4381cb0ef41Sopenharmony_ci            AddMatch(id + t * n, l + 3, l, matches);
4391cb0ef41Sopenharmony_ci            has_found_match = BROTLI_TRUE;
4401cb0ef41Sopenharmony_ci          }
4411cb0ef41Sopenharmony_ci        }
4421cb0ef41Sopenharmony_ci      }
4431cb0ef41Sopenharmony_ci    }
4441cb0ef41Sopenharmony_ci  }
4451cb0ef41Sopenharmony_ci  if (max_length >= 9) {
4461cb0ef41Sopenharmony_ci    /* Transforms with prefixes " the " and ".com/" */
4471cb0ef41Sopenharmony_ci    if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
4481cb0ef41Sopenharmony_ci         data[3] == 'e' && data[4] == ' ') ||
4491cb0ef41Sopenharmony_ci        (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
4501cb0ef41Sopenharmony_ci         data[3] == 'm' && data[4] == '/')) {
4511cb0ef41Sopenharmony_ci      size_t offset = dictionary->buckets[Hash(&data[5])];
4521cb0ef41Sopenharmony_ci      BROTLI_BOOL end = !offset;
4531cb0ef41Sopenharmony_ci      while (!end) {
4541cb0ef41Sopenharmony_ci        DictWord w = dictionary->dict_words[offset++];
4551cb0ef41Sopenharmony_ci        const size_t l = w.len & 0x1F;
4561cb0ef41Sopenharmony_ci        const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
4571cb0ef41Sopenharmony_ci        const size_t id = w.idx;
4581cb0ef41Sopenharmony_ci        end = !!(w.len & 0x80);
4591cb0ef41Sopenharmony_ci        w.len = (uint8_t)l;
4601cb0ef41Sopenharmony_ci        if (w.transform == 0 &&
4611cb0ef41Sopenharmony_ci            IsMatch(dictionary->words, w, &data[5], max_length - 5)) {
4621cb0ef41Sopenharmony_ci          AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
4631cb0ef41Sopenharmony_ci          has_found_match = BROTLI_TRUE;
4641cb0ef41Sopenharmony_ci          if (l + 5 < max_length) {
4651cb0ef41Sopenharmony_ci            const uint8_t* s = &data[l + 5];
4661cb0ef41Sopenharmony_ci            if (data[0] == ' ') {
4671cb0ef41Sopenharmony_ci              if (l + 8 < max_length &&
4681cb0ef41Sopenharmony_ci                  s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
4691cb0ef41Sopenharmony_ci                AddMatch(id + 62 * n, l + 9, l, matches);
4701cb0ef41Sopenharmony_ci                if (l + 12 < max_length &&
4711cb0ef41Sopenharmony_ci                    s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
4721cb0ef41Sopenharmony_ci                  AddMatch(id + 73 * n, l + 13, l, matches);
4731cb0ef41Sopenharmony_ci                }
4741cb0ef41Sopenharmony_ci              }
4751cb0ef41Sopenharmony_ci            }
4761cb0ef41Sopenharmony_ci          }
4771cb0ef41Sopenharmony_ci        }
4781cb0ef41Sopenharmony_ci      }
4791cb0ef41Sopenharmony_ci    }
4801cb0ef41Sopenharmony_ci  }
4811cb0ef41Sopenharmony_ci  return has_found_match;
4821cb0ef41Sopenharmony_ci}
4831cb0ef41Sopenharmony_ci
4841cb0ef41Sopenharmony_ci#if defined(__cplusplus) || defined(c_plusplus)
4851cb0ef41Sopenharmony_ci}  /* extern "C" */
4861cb0ef41Sopenharmony_ci#endif
487