11cb0ef41Sopenharmony_ci// Copyright 2011 the V8 project authors. All rights reserved.
21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be
31cb0ef41Sopenharmony_ci// found in the LICENSE file.
41cb0ef41Sopenharmony_ci
51cb0ef41Sopenharmony_ci#ifndef V8_STRINGS_STRING_SEARCH_H_
61cb0ef41Sopenharmony_ci#define V8_STRINGS_STRING_SEARCH_H_
71cb0ef41Sopenharmony_ci
81cb0ef41Sopenharmony_ci#include "src/base/strings.h"
91cb0ef41Sopenharmony_ci#include "src/base/vector.h"
101cb0ef41Sopenharmony_ci#include "src/execution/isolate.h"
111cb0ef41Sopenharmony_ci
121cb0ef41Sopenharmony_cinamespace v8 {
131cb0ef41Sopenharmony_cinamespace internal {
141cb0ef41Sopenharmony_ci
151cb0ef41Sopenharmony_ci//---------------------------------------------------------------------
161cb0ef41Sopenharmony_ci// String Search object.
171cb0ef41Sopenharmony_ci//---------------------------------------------------------------------
181cb0ef41Sopenharmony_ci
191cb0ef41Sopenharmony_ci// Class holding constants and methods that apply to all string search variants,
201cb0ef41Sopenharmony_ci// independently of subject and pattern char size.
211cb0ef41Sopenharmony_ciclass StringSearchBase {
221cb0ef41Sopenharmony_ci protected:
231cb0ef41Sopenharmony_ci  // Cap on the maximal shift in the Boyer-Moore implementation. By setting a
241cb0ef41Sopenharmony_ci  // limit, we can fix the size of tables. For a needle longer than this limit,
251cb0ef41Sopenharmony_ci  // search will not be optimal, since we only build tables for a suffix
261cb0ef41Sopenharmony_ci  // of the string, but it is a safe approximation.
271cb0ef41Sopenharmony_ci  static const int kBMMaxShift = Isolate::kBMMaxShift;
281cb0ef41Sopenharmony_ci
291cb0ef41Sopenharmony_ci  // Reduce alphabet to this size.
301cb0ef41Sopenharmony_ci  // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size
311cb0ef41Sopenharmony_ci  // proportional to the input alphabet. We reduce the alphabet size by
321cb0ef41Sopenharmony_ci  // equating input characters modulo a smaller alphabet size. This gives
331cb0ef41Sopenharmony_ci  // a potentially less efficient searching, but is a safe approximation.
341cb0ef41Sopenharmony_ci  // For needles using only characters in the same Unicode 256-code point page,
351cb0ef41Sopenharmony_ci  // there is no search speed degradation.
361cb0ef41Sopenharmony_ci  static const int kLatin1AlphabetSize = 256;
371cb0ef41Sopenharmony_ci  static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize;
381cb0ef41Sopenharmony_ci
391cb0ef41Sopenharmony_ci  // Bad-char shift table stored in the state. It's length is the alphabet size.
401cb0ef41Sopenharmony_ci  // For patterns below this length, the skip length of Boyer-Moore is too short
411cb0ef41Sopenharmony_ci  // to compensate for the algorithmic overhead compared to simple brute force.
421cb0ef41Sopenharmony_ci  static const int kBMMinPatternLength = 7;
431cb0ef41Sopenharmony_ci
441cb0ef41Sopenharmony_ci  static inline bool IsOneByteString(base::Vector<const uint8_t> string) {
451cb0ef41Sopenharmony_ci    return true;
461cb0ef41Sopenharmony_ci  }
471cb0ef41Sopenharmony_ci
481cb0ef41Sopenharmony_ci  static inline bool IsOneByteString(base::Vector<const base::uc16> string) {
491cb0ef41Sopenharmony_ci    return String::IsOneByte(string.begin(), string.length());
501cb0ef41Sopenharmony_ci  }
511cb0ef41Sopenharmony_ci
521cb0ef41Sopenharmony_ci  friend class Isolate;
531cb0ef41Sopenharmony_ci};
541cb0ef41Sopenharmony_ci
551cb0ef41Sopenharmony_citemplate <typename PatternChar, typename SubjectChar>
561cb0ef41Sopenharmony_ciclass StringSearch : private StringSearchBase {
571cb0ef41Sopenharmony_ci public:
581cb0ef41Sopenharmony_ci  StringSearch(Isolate* isolate, base::Vector<const PatternChar> pattern)
591cb0ef41Sopenharmony_ci      : isolate_(isolate),
601cb0ef41Sopenharmony_ci        pattern_(pattern),
611cb0ef41Sopenharmony_ci        start_(std::max(0, pattern.length() - kBMMaxShift)) {
621cb0ef41Sopenharmony_ci    if (sizeof(PatternChar) > sizeof(SubjectChar)) {
631cb0ef41Sopenharmony_ci      if (!IsOneByteString(pattern_)) {
641cb0ef41Sopenharmony_ci        strategy_ = &FailSearch;
651cb0ef41Sopenharmony_ci        return;
661cb0ef41Sopenharmony_ci      }
671cb0ef41Sopenharmony_ci    }
681cb0ef41Sopenharmony_ci    int pattern_length = pattern_.length();
691cb0ef41Sopenharmony_ci    if (pattern_length < kBMMinPatternLength) {
701cb0ef41Sopenharmony_ci      if (pattern_length == 1) {
711cb0ef41Sopenharmony_ci        strategy_ = &SingleCharSearch;
721cb0ef41Sopenharmony_ci        return;
731cb0ef41Sopenharmony_ci      }
741cb0ef41Sopenharmony_ci      strategy_ = &LinearSearch;
751cb0ef41Sopenharmony_ci      return;
761cb0ef41Sopenharmony_ci    }
771cb0ef41Sopenharmony_ci    strategy_ = &InitialSearch;
781cb0ef41Sopenharmony_ci  }
791cb0ef41Sopenharmony_ci
801cb0ef41Sopenharmony_ci  int Search(base::Vector<const SubjectChar> subject, int index) {
811cb0ef41Sopenharmony_ci    return strategy_(this, subject, index);
821cb0ef41Sopenharmony_ci  }
831cb0ef41Sopenharmony_ci
841cb0ef41Sopenharmony_ci  static inline int AlphabetSize() {
851cb0ef41Sopenharmony_ci    if (sizeof(PatternChar) == 1) {
861cb0ef41Sopenharmony_ci      // Latin1 needle.
871cb0ef41Sopenharmony_ci      return kLatin1AlphabetSize;
881cb0ef41Sopenharmony_ci    } else {
891cb0ef41Sopenharmony_ci      DCHECK_EQ(sizeof(PatternChar), 2);
901cb0ef41Sopenharmony_ci      // UC16 needle.
911cb0ef41Sopenharmony_ci      return kUC16AlphabetSize;
921cb0ef41Sopenharmony_ci    }
931cb0ef41Sopenharmony_ci  }
941cb0ef41Sopenharmony_ci
951cb0ef41Sopenharmony_ci private:
961cb0ef41Sopenharmony_ci  using SearchFunction = int (*)(StringSearch<PatternChar, SubjectChar>*,
971cb0ef41Sopenharmony_ci                                 base::Vector<const SubjectChar>, int);
981cb0ef41Sopenharmony_ci
991cb0ef41Sopenharmony_ci  static int FailSearch(StringSearch<PatternChar, SubjectChar>*,
1001cb0ef41Sopenharmony_ci                        base::Vector<const SubjectChar>, int) {
1011cb0ef41Sopenharmony_ci    return -1;
1021cb0ef41Sopenharmony_ci  }
1031cb0ef41Sopenharmony_ci
1041cb0ef41Sopenharmony_ci  static int SingleCharSearch(StringSearch<PatternChar, SubjectChar>* search,
1051cb0ef41Sopenharmony_ci                              base::Vector<const SubjectChar> subject,
1061cb0ef41Sopenharmony_ci                              int start_index);
1071cb0ef41Sopenharmony_ci
1081cb0ef41Sopenharmony_ci  static int LinearSearch(StringSearch<PatternChar, SubjectChar>* search,
1091cb0ef41Sopenharmony_ci                          base::Vector<const SubjectChar> subject,
1101cb0ef41Sopenharmony_ci                          int start_index);
1111cb0ef41Sopenharmony_ci
1121cb0ef41Sopenharmony_ci  static int InitialSearch(StringSearch<PatternChar, SubjectChar>* search,
1131cb0ef41Sopenharmony_ci                           base::Vector<const SubjectChar> subject,
1141cb0ef41Sopenharmony_ci                           int start_index);
1151cb0ef41Sopenharmony_ci
1161cb0ef41Sopenharmony_ci  static int BoyerMooreHorspoolSearch(
1171cb0ef41Sopenharmony_ci      StringSearch<PatternChar, SubjectChar>* search,
1181cb0ef41Sopenharmony_ci      base::Vector<const SubjectChar> subject, int start_index);
1191cb0ef41Sopenharmony_ci
1201cb0ef41Sopenharmony_ci  static int BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search,
1211cb0ef41Sopenharmony_ci                              base::Vector<const SubjectChar> subject,
1221cb0ef41Sopenharmony_ci                              int start_index);
1231cb0ef41Sopenharmony_ci
1241cb0ef41Sopenharmony_ci  void PopulateBoyerMooreHorspoolTable();
1251cb0ef41Sopenharmony_ci
1261cb0ef41Sopenharmony_ci  void PopulateBoyerMooreTable();
1271cb0ef41Sopenharmony_ci
1281cb0ef41Sopenharmony_ci  static inline bool exceedsOneByte(uint8_t c) { return false; }
1291cb0ef41Sopenharmony_ci
1301cb0ef41Sopenharmony_ci  static inline bool exceedsOneByte(uint16_t c) {
1311cb0ef41Sopenharmony_ci    return c > String::kMaxOneByteCharCodeU;
1321cb0ef41Sopenharmony_ci  }
1331cb0ef41Sopenharmony_ci
1341cb0ef41Sopenharmony_ci  static inline int CharOccurrence(int* bad_char_occurrence,
1351cb0ef41Sopenharmony_ci                                   SubjectChar char_code) {
1361cb0ef41Sopenharmony_ci    if (sizeof(SubjectChar) == 1) {
1371cb0ef41Sopenharmony_ci      return bad_char_occurrence[static_cast<int>(char_code)];
1381cb0ef41Sopenharmony_ci    }
1391cb0ef41Sopenharmony_ci    if (sizeof(PatternChar) == 1) {
1401cb0ef41Sopenharmony_ci      if (exceedsOneByte(char_code)) {
1411cb0ef41Sopenharmony_ci        return -1;
1421cb0ef41Sopenharmony_ci      }
1431cb0ef41Sopenharmony_ci      return bad_char_occurrence[static_cast<unsigned int>(char_code)];
1441cb0ef41Sopenharmony_ci    }
1451cb0ef41Sopenharmony_ci    // Both pattern and subject are UC16. Reduce character to equivalence
1461cb0ef41Sopenharmony_ci    // class.
1471cb0ef41Sopenharmony_ci    int equiv_class = char_code % kUC16AlphabetSize;
1481cb0ef41Sopenharmony_ci    return bad_char_occurrence[equiv_class];
1491cb0ef41Sopenharmony_ci  }
1501cb0ef41Sopenharmony_ci
1511cb0ef41Sopenharmony_ci  // The following tables are shared by all searches.
1521cb0ef41Sopenharmony_ci  // TODO(lrn): Introduce a way for a pattern to keep its tables
1531cb0ef41Sopenharmony_ci  // between searches (e.g., for an Atom RegExp).
1541cb0ef41Sopenharmony_ci
1551cb0ef41Sopenharmony_ci  // Store for the BoyerMoore(Horspool) bad char shift table.
1561cb0ef41Sopenharmony_ci  // Return a table covering the last kBMMaxShift+1 positions of
1571cb0ef41Sopenharmony_ci  // pattern.
1581cb0ef41Sopenharmony_ci  int* bad_char_table() { return isolate_->bad_char_shift_table(); }
1591cb0ef41Sopenharmony_ci
1601cb0ef41Sopenharmony_ci  // Store for the BoyerMoore good suffix shift table.
1611cb0ef41Sopenharmony_ci  int* good_suffix_shift_table() {
1621cb0ef41Sopenharmony_ci    // Return biased pointer that maps the range  [start_..pattern_.length()
1631cb0ef41Sopenharmony_ci    // to the kGoodSuffixShiftTable array.
1641cb0ef41Sopenharmony_ci    return isolate_->good_suffix_shift_table() - start_;
1651cb0ef41Sopenharmony_ci  }
1661cb0ef41Sopenharmony_ci
1671cb0ef41Sopenharmony_ci  // Table used temporarily while building the BoyerMoore good suffix
1681cb0ef41Sopenharmony_ci  // shift table.
1691cb0ef41Sopenharmony_ci  int* suffix_table() {
1701cb0ef41Sopenharmony_ci    // Return biased pointer that maps the range  [start_..pattern_.length()
1711cb0ef41Sopenharmony_ci    // to the kSuffixTable array.
1721cb0ef41Sopenharmony_ci    return isolate_->suffix_table() - start_;
1731cb0ef41Sopenharmony_ci  }
1741cb0ef41Sopenharmony_ci
1751cb0ef41Sopenharmony_ci  Isolate* isolate_;
1761cb0ef41Sopenharmony_ci  // The pattern to search for.
1771cb0ef41Sopenharmony_ci  base::Vector<const PatternChar> pattern_;
1781cb0ef41Sopenharmony_ci  // Pointer to implementation of the search.
1791cb0ef41Sopenharmony_ci  SearchFunction strategy_;
1801cb0ef41Sopenharmony_ci  // Cache value of max(0, pattern_length() - kBMMaxShift)
1811cb0ef41Sopenharmony_ci  int start_;
1821cb0ef41Sopenharmony_ci};
1831cb0ef41Sopenharmony_ci
1841cb0ef41Sopenharmony_citemplate <typename T, typename U>
1851cb0ef41Sopenharmony_ciinline T AlignDown(T value, U alignment) {
1861cb0ef41Sopenharmony_ci  return reinterpret_cast<T>(
1871cb0ef41Sopenharmony_ci      (reinterpret_cast<uintptr_t>(value) & ~(alignment - 1)));
1881cb0ef41Sopenharmony_ci}
1891cb0ef41Sopenharmony_ci
1901cb0ef41Sopenharmony_ciinline uint8_t GetHighestValueByte(base::uc16 character) {
1911cb0ef41Sopenharmony_ci  return std::max(static_cast<uint8_t>(character & 0xFF),
1921cb0ef41Sopenharmony_ci                  static_cast<uint8_t>(character >> 8));
1931cb0ef41Sopenharmony_ci}
1941cb0ef41Sopenharmony_ci
1951cb0ef41Sopenharmony_ciinline uint8_t GetHighestValueByte(uint8_t character) { return character; }
1961cb0ef41Sopenharmony_ci
1971cb0ef41Sopenharmony_citemplate <typename PatternChar, typename SubjectChar>
1981cb0ef41Sopenharmony_ciinline int FindFirstCharacter(base::Vector<const PatternChar> pattern,
1991cb0ef41Sopenharmony_ci                              base::Vector<const SubjectChar> subject,
2001cb0ef41Sopenharmony_ci                              int index) {
2011cb0ef41Sopenharmony_ci  const PatternChar pattern_first_char = pattern[0];
2021cb0ef41Sopenharmony_ci  const int max_n = (subject.length() - pattern.length() + 1);
2031cb0ef41Sopenharmony_ci
2041cb0ef41Sopenharmony_ci  if (sizeof(SubjectChar) == 2 && pattern_first_char == 0) {
2051cb0ef41Sopenharmony_ci    // Special-case looking for the 0 char in other than one-byte strings.
2061cb0ef41Sopenharmony_ci    // memchr mostly fails in this case due to every other byte being 0 in text
2071cb0ef41Sopenharmony_ci    // that is mostly ascii characters.
2081cb0ef41Sopenharmony_ci    for (int i = index; i < max_n; ++i) {
2091cb0ef41Sopenharmony_ci      if (subject[i] == 0) return i;
2101cb0ef41Sopenharmony_ci    }
2111cb0ef41Sopenharmony_ci    return -1;
2121cb0ef41Sopenharmony_ci  }
2131cb0ef41Sopenharmony_ci  const uint8_t search_byte = GetHighestValueByte(pattern_first_char);
2141cb0ef41Sopenharmony_ci  const SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char);
2151cb0ef41Sopenharmony_ci  int pos = index;
2161cb0ef41Sopenharmony_ci  do {
2171cb0ef41Sopenharmony_ci    DCHECK_GE(max_n - pos, 0);
2181cb0ef41Sopenharmony_ci    const SubjectChar* char_pos = reinterpret_cast<const SubjectChar*>(
2191cb0ef41Sopenharmony_ci        memchr(subject.begin() + pos, search_byte,
2201cb0ef41Sopenharmony_ci               (max_n - pos) * sizeof(SubjectChar)));
2211cb0ef41Sopenharmony_ci    if (char_pos == nullptr) return -1;
2221cb0ef41Sopenharmony_ci    char_pos = AlignDown(char_pos, sizeof(SubjectChar));
2231cb0ef41Sopenharmony_ci    pos = static_cast<int>(char_pos - subject.begin());
2241cb0ef41Sopenharmony_ci    if (subject[pos] == search_char) return pos;
2251cb0ef41Sopenharmony_ci  } while (++pos < max_n);
2261cb0ef41Sopenharmony_ci
2271cb0ef41Sopenharmony_ci  return -1;
2281cb0ef41Sopenharmony_ci}
2291cb0ef41Sopenharmony_ci
2301cb0ef41Sopenharmony_ci//---------------------------------------------------------------------
2311cb0ef41Sopenharmony_ci// Single Character Pattern Search Strategy
2321cb0ef41Sopenharmony_ci//---------------------------------------------------------------------
2331cb0ef41Sopenharmony_ci
2341cb0ef41Sopenharmony_citemplate <typename PatternChar, typename SubjectChar>
2351cb0ef41Sopenharmony_ciint StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
2361cb0ef41Sopenharmony_ci    StringSearch<PatternChar, SubjectChar>* search,
2371cb0ef41Sopenharmony_ci    base::Vector<const SubjectChar> subject, int index) {
2381cb0ef41Sopenharmony_ci  DCHECK_EQ(1, search->pattern_.length());
2391cb0ef41Sopenharmony_ci  PatternChar pattern_first_char = search->pattern_[0];
2401cb0ef41Sopenharmony_ci  if (sizeof(PatternChar) > sizeof(SubjectChar)) {
2411cb0ef41Sopenharmony_ci    if (exceedsOneByte(pattern_first_char)) {
2421cb0ef41Sopenharmony_ci      return -1;
2431cb0ef41Sopenharmony_ci    }
2441cb0ef41Sopenharmony_ci  }
2451cb0ef41Sopenharmony_ci  return FindFirstCharacter(search->pattern_, subject, index);
2461cb0ef41Sopenharmony_ci}
2471cb0ef41Sopenharmony_ci
2481cb0ef41Sopenharmony_ci//---------------------------------------------------------------------
2491cb0ef41Sopenharmony_ci// Linear Search Strategy
2501cb0ef41Sopenharmony_ci//---------------------------------------------------------------------
2511cb0ef41Sopenharmony_ci
2521cb0ef41Sopenharmony_citemplate <typename PatternChar, typename SubjectChar>
2531cb0ef41Sopenharmony_ciinline bool CharCompare(const PatternChar* pattern, const SubjectChar* subject,
2541cb0ef41Sopenharmony_ci                        int length) {
2551cb0ef41Sopenharmony_ci  DCHECK_GT(length, 0);
2561cb0ef41Sopenharmony_ci  int pos = 0;
2571cb0ef41Sopenharmony_ci  do {
2581cb0ef41Sopenharmony_ci    if (pattern[pos] != subject[pos]) {
2591cb0ef41Sopenharmony_ci      return false;
2601cb0ef41Sopenharmony_ci    }
2611cb0ef41Sopenharmony_ci    pos++;
2621cb0ef41Sopenharmony_ci  } while (pos < length);
2631cb0ef41Sopenharmony_ci  return true;
2641cb0ef41Sopenharmony_ci}
2651cb0ef41Sopenharmony_ci
2661cb0ef41Sopenharmony_ci// Simple linear search for short patterns. Never bails out.
2671cb0ef41Sopenharmony_citemplate <typename PatternChar, typename SubjectChar>
2681cb0ef41Sopenharmony_ciint StringSearch<PatternChar, SubjectChar>::LinearSearch(
2691cb0ef41Sopenharmony_ci    StringSearch<PatternChar, SubjectChar>* search,
2701cb0ef41Sopenharmony_ci    base::Vector<const SubjectChar> subject, int index) {
2711cb0ef41Sopenharmony_ci  base::Vector<const PatternChar> pattern = search->pattern_;
2721cb0ef41Sopenharmony_ci  DCHECK_GT(pattern.length(), 1);
2731cb0ef41Sopenharmony_ci  int pattern_length = pattern.length();
2741cb0ef41Sopenharmony_ci  int i = index;
2751cb0ef41Sopenharmony_ci  int n = subject.length() - pattern_length;
2761cb0ef41Sopenharmony_ci  while (i <= n) {
2771cb0ef41Sopenharmony_ci    i = FindFirstCharacter(pattern, subject, i);
2781cb0ef41Sopenharmony_ci    if (i == -1) return -1;
2791cb0ef41Sopenharmony_ci    DCHECK_LE(i, n);
2801cb0ef41Sopenharmony_ci    i++;
2811cb0ef41Sopenharmony_ci    // Loop extracted to separate function to allow using return to do
2821cb0ef41Sopenharmony_ci    // a deeper break.
2831cb0ef41Sopenharmony_ci    if (CharCompare(pattern.begin() + 1, subject.begin() + i,
2841cb0ef41Sopenharmony_ci                    pattern_length - 1)) {
2851cb0ef41Sopenharmony_ci      return i - 1;
2861cb0ef41Sopenharmony_ci    }
2871cb0ef41Sopenharmony_ci  }
2881cb0ef41Sopenharmony_ci  return -1;
2891cb0ef41Sopenharmony_ci}
2901cb0ef41Sopenharmony_ci
2911cb0ef41Sopenharmony_ci//---------------------------------------------------------------------
2921cb0ef41Sopenharmony_ci// Boyer-Moore string search
2931cb0ef41Sopenharmony_ci//---------------------------------------------------------------------
2941cb0ef41Sopenharmony_ci
2951cb0ef41Sopenharmony_citemplate <typename PatternChar, typename SubjectChar>
2961cb0ef41Sopenharmony_ciint StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
2971cb0ef41Sopenharmony_ci    StringSearch<PatternChar, SubjectChar>* search,
2981cb0ef41Sopenharmony_ci    base::Vector<const SubjectChar> subject, int start_index) {
2991cb0ef41Sopenharmony_ci  base::Vector<const PatternChar> pattern = search->pattern_;
3001cb0ef41Sopenharmony_ci  int subject_length = subject.length();
3011cb0ef41Sopenharmony_ci  int pattern_length = pattern.length();
3021cb0ef41Sopenharmony_ci  // Only preprocess at most kBMMaxShift last characters of pattern.
3031cb0ef41Sopenharmony_ci  int start = search->start_;
3041cb0ef41Sopenharmony_ci
3051cb0ef41Sopenharmony_ci  int* bad_char_occurence = search->bad_char_table();
3061cb0ef41Sopenharmony_ci  int* good_suffix_shift = search->good_suffix_shift_table();
3071cb0ef41Sopenharmony_ci
3081cb0ef41Sopenharmony_ci  PatternChar last_char = pattern[pattern_length - 1];
3091cb0ef41Sopenharmony_ci  int index = start_index;
3101cb0ef41Sopenharmony_ci  // Continue search from i.
3111cb0ef41Sopenharmony_ci  while (index <= subject_length - pattern_length) {
3121cb0ef41Sopenharmony_ci    int j = pattern_length - 1;
3131cb0ef41Sopenharmony_ci    int c;
3141cb0ef41Sopenharmony_ci    while (last_char != (c = subject[index + j])) {
3151cb0ef41Sopenharmony_ci      int shift = j - CharOccurrence(bad_char_occurence, c);
3161cb0ef41Sopenharmony_ci      index += shift;
3171cb0ef41Sopenharmony_ci      if (index > subject_length - pattern_length) {
3181cb0ef41Sopenharmony_ci        return -1;
3191cb0ef41Sopenharmony_ci      }
3201cb0ef41Sopenharmony_ci    }
3211cb0ef41Sopenharmony_ci    while (j >= 0 && pattern[j] == (c = subject[index + j])) j--;
3221cb0ef41Sopenharmony_ci    if (j < 0) {
3231cb0ef41Sopenharmony_ci      return index;
3241cb0ef41Sopenharmony_ci    } else if (j < start) {
3251cb0ef41Sopenharmony_ci      // we have matched more than our tables allow us to be smart about.
3261cb0ef41Sopenharmony_ci      // Fall back on BMH shift.
3271cb0ef41Sopenharmony_ci      index += pattern_length - 1 -
3281cb0ef41Sopenharmony_ci               CharOccurrence(bad_char_occurence,
3291cb0ef41Sopenharmony_ci                              static_cast<SubjectChar>(last_char));
3301cb0ef41Sopenharmony_ci    } else {
3311cb0ef41Sopenharmony_ci      int gs_shift = good_suffix_shift[j + 1];
3321cb0ef41Sopenharmony_ci      int bc_occ = CharOccurrence(bad_char_occurence, c);
3331cb0ef41Sopenharmony_ci      int shift = j - bc_occ;
3341cb0ef41Sopenharmony_ci      if (gs_shift > shift) {
3351cb0ef41Sopenharmony_ci        shift = gs_shift;
3361cb0ef41Sopenharmony_ci      }
3371cb0ef41Sopenharmony_ci      index += shift;
3381cb0ef41Sopenharmony_ci    }
3391cb0ef41Sopenharmony_ci  }
3401cb0ef41Sopenharmony_ci
3411cb0ef41Sopenharmony_ci  return -1;
3421cb0ef41Sopenharmony_ci}
3431cb0ef41Sopenharmony_ci
3441cb0ef41Sopenharmony_citemplate <typename PatternChar, typename SubjectChar>
3451cb0ef41Sopenharmony_civoid StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
3461cb0ef41Sopenharmony_ci  int pattern_length = pattern_.length();
3471cb0ef41Sopenharmony_ci  const PatternChar* pattern = pattern_.begin();
3481cb0ef41Sopenharmony_ci  // Only look at the last kBMMaxShift characters of pattern (from start_
3491cb0ef41Sopenharmony_ci  // to pattern_length).
3501cb0ef41Sopenharmony_ci  int start = start_;
3511cb0ef41Sopenharmony_ci  int length = pattern_length - start;
3521cb0ef41Sopenharmony_ci
3531cb0ef41Sopenharmony_ci  // Biased tables so that we can use pattern indices as table indices,
3541cb0ef41Sopenharmony_ci  // even if we only cover the part of the pattern from offset start.
3551cb0ef41Sopenharmony_ci  int* shift_table = good_suffix_shift_table();
3561cb0ef41Sopenharmony_ci  int* suffix_table = this->suffix_table();
3571cb0ef41Sopenharmony_ci
3581cb0ef41Sopenharmony_ci  // Initialize table.
3591cb0ef41Sopenharmony_ci  for (int i = start; i < pattern_length; i++) {
3601cb0ef41Sopenharmony_ci    shift_table[i] = length;
3611cb0ef41Sopenharmony_ci  }
3621cb0ef41Sopenharmony_ci  shift_table[pattern_length] = 1;
3631cb0ef41Sopenharmony_ci  suffix_table[pattern_length] = pattern_length + 1;
3641cb0ef41Sopenharmony_ci
3651cb0ef41Sopenharmony_ci  if (pattern_length <= start) {
3661cb0ef41Sopenharmony_ci    return;
3671cb0ef41Sopenharmony_ci  }
3681cb0ef41Sopenharmony_ci
3691cb0ef41Sopenharmony_ci  // Find suffixes.
3701cb0ef41Sopenharmony_ci  PatternChar last_char = pattern[pattern_length - 1];
3711cb0ef41Sopenharmony_ci  int suffix = pattern_length + 1;
3721cb0ef41Sopenharmony_ci  {
3731cb0ef41Sopenharmony_ci    int i = pattern_length;
3741cb0ef41Sopenharmony_ci    while (i > start) {
3751cb0ef41Sopenharmony_ci      PatternChar c = pattern[i - 1];
3761cb0ef41Sopenharmony_ci      while (suffix <= pattern_length && c != pattern[suffix - 1]) {
3771cb0ef41Sopenharmony_ci        if (shift_table[suffix] == length) {
3781cb0ef41Sopenharmony_ci          shift_table[suffix] = suffix - i;
3791cb0ef41Sopenharmony_ci        }
3801cb0ef41Sopenharmony_ci        suffix = suffix_table[suffix];
3811cb0ef41Sopenharmony_ci      }
3821cb0ef41Sopenharmony_ci      suffix_table[--i] = --suffix;
3831cb0ef41Sopenharmony_ci      if (suffix == pattern_length) {
3841cb0ef41Sopenharmony_ci        // No suffix to extend, so we check against last_char only.
3851cb0ef41Sopenharmony_ci        while ((i > start) && (pattern[i - 1] != last_char)) {
3861cb0ef41Sopenharmony_ci          if (shift_table[pattern_length] == length) {
3871cb0ef41Sopenharmony_ci            shift_table[pattern_length] = pattern_length - i;
3881cb0ef41Sopenharmony_ci          }
3891cb0ef41Sopenharmony_ci          suffix_table[--i] = pattern_length;
3901cb0ef41Sopenharmony_ci        }
3911cb0ef41Sopenharmony_ci        if (i > start) {
3921cb0ef41Sopenharmony_ci          suffix_table[--i] = --suffix;
3931cb0ef41Sopenharmony_ci        }
3941cb0ef41Sopenharmony_ci      }
3951cb0ef41Sopenharmony_ci    }
3961cb0ef41Sopenharmony_ci  }
3971cb0ef41Sopenharmony_ci  // Build shift table using suffixes.
3981cb0ef41Sopenharmony_ci  if (suffix < pattern_length) {
3991cb0ef41Sopenharmony_ci    for (int i = start; i <= pattern_length; i++) {
4001cb0ef41Sopenharmony_ci      if (shift_table[i] == length) {
4011cb0ef41Sopenharmony_ci        shift_table[i] = suffix - start;
4021cb0ef41Sopenharmony_ci      }
4031cb0ef41Sopenharmony_ci      if (i == suffix) {
4041cb0ef41Sopenharmony_ci        suffix = suffix_table[suffix];
4051cb0ef41Sopenharmony_ci      }
4061cb0ef41Sopenharmony_ci    }
4071cb0ef41Sopenharmony_ci  }
4081cb0ef41Sopenharmony_ci}
4091cb0ef41Sopenharmony_ci
4101cb0ef41Sopenharmony_ci//---------------------------------------------------------------------
4111cb0ef41Sopenharmony_ci// Boyer-Moore-Horspool string search.
4121cb0ef41Sopenharmony_ci//---------------------------------------------------------------------
4131cb0ef41Sopenharmony_ci
4141cb0ef41Sopenharmony_citemplate <typename PatternChar, typename SubjectChar>
4151cb0ef41Sopenharmony_ciint StringSearch<PatternChar, SubjectChar>::BoyerMooreHorspoolSearch(
4161cb0ef41Sopenharmony_ci    StringSearch<PatternChar, SubjectChar>* search,
4171cb0ef41Sopenharmony_ci    base::Vector<const SubjectChar> subject, int start_index) {
4181cb0ef41Sopenharmony_ci  base::Vector<const PatternChar> pattern = search->pattern_;
4191cb0ef41Sopenharmony_ci  int subject_length = subject.length();
4201cb0ef41Sopenharmony_ci  int pattern_length = pattern.length();
4211cb0ef41Sopenharmony_ci  int* char_occurrences = search->bad_char_table();
4221cb0ef41Sopenharmony_ci  int badness = -pattern_length;
4231cb0ef41Sopenharmony_ci
4241cb0ef41Sopenharmony_ci  // How bad we are doing without a good-suffix table.
4251cb0ef41Sopenharmony_ci  PatternChar last_char = pattern[pattern_length - 1];
4261cb0ef41Sopenharmony_ci  int last_char_shift =
4271cb0ef41Sopenharmony_ci      pattern_length - 1 -
4281cb0ef41Sopenharmony_ci      CharOccurrence(char_occurrences, static_cast<SubjectChar>(last_char));
4291cb0ef41Sopenharmony_ci  // Perform search
4301cb0ef41Sopenharmony_ci  int index = start_index;  // No matches found prior to this index.
4311cb0ef41Sopenharmony_ci  while (index <= subject_length - pattern_length) {
4321cb0ef41Sopenharmony_ci    int j = pattern_length - 1;
4331cb0ef41Sopenharmony_ci    int subject_char;
4341cb0ef41Sopenharmony_ci    while (last_char != (subject_char = subject[index + j])) {
4351cb0ef41Sopenharmony_ci      int bc_occ = CharOccurrence(char_occurrences, subject_char);
4361cb0ef41Sopenharmony_ci      int shift = j - bc_occ;
4371cb0ef41Sopenharmony_ci      index += shift;
4381cb0ef41Sopenharmony_ci      badness += 1 - shift;  // at most zero, so badness cannot increase.
4391cb0ef41Sopenharmony_ci      if (index > subject_length - pattern_length) {
4401cb0ef41Sopenharmony_ci        return -1;
4411cb0ef41Sopenharmony_ci      }
4421cb0ef41Sopenharmony_ci    }
4431cb0ef41Sopenharmony_ci    j--;
4441cb0ef41Sopenharmony_ci    while (j >= 0 && pattern[j] == (subject[index + j])) j--;
4451cb0ef41Sopenharmony_ci    if (j < 0) {
4461cb0ef41Sopenharmony_ci      return index;
4471cb0ef41Sopenharmony_ci    } else {
4481cb0ef41Sopenharmony_ci      index += last_char_shift;
4491cb0ef41Sopenharmony_ci      // Badness increases by the number of characters we have
4501cb0ef41Sopenharmony_ci      // checked, and decreases by the number of characters we
4511cb0ef41Sopenharmony_ci      // can skip by shifting. It's a measure of how we are doing
4521cb0ef41Sopenharmony_ci      // compared to reading each character exactly once.
4531cb0ef41Sopenharmony_ci      badness += (pattern_length - j) - last_char_shift;
4541cb0ef41Sopenharmony_ci      if (badness > 0) {
4551cb0ef41Sopenharmony_ci        search->PopulateBoyerMooreTable();
4561cb0ef41Sopenharmony_ci        search->strategy_ = &BoyerMooreSearch;
4571cb0ef41Sopenharmony_ci        return BoyerMooreSearch(search, subject, index);
4581cb0ef41Sopenharmony_ci      }
4591cb0ef41Sopenharmony_ci    }
4601cb0ef41Sopenharmony_ci  }
4611cb0ef41Sopenharmony_ci  return -1;
4621cb0ef41Sopenharmony_ci}
4631cb0ef41Sopenharmony_ci
4641cb0ef41Sopenharmony_citemplate <typename PatternChar, typename SubjectChar>
4651cb0ef41Sopenharmony_civoid StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {
4661cb0ef41Sopenharmony_ci  int pattern_length = pattern_.length();
4671cb0ef41Sopenharmony_ci
4681cb0ef41Sopenharmony_ci  int* bad_char_occurrence = bad_char_table();
4691cb0ef41Sopenharmony_ci
4701cb0ef41Sopenharmony_ci  // Only preprocess at most kBMMaxShift last characters of pattern.
4711cb0ef41Sopenharmony_ci  int start = start_;
4721cb0ef41Sopenharmony_ci  // Run forwards to populate bad_char_table, so that *last* instance
4731cb0ef41Sopenharmony_ci  // of character equivalence class is the one registered.
4741cb0ef41Sopenharmony_ci  // Notice: Doesn't include the last character.
4751cb0ef41Sopenharmony_ci  int table_size = AlphabetSize();
4761cb0ef41Sopenharmony_ci  if (start == 0) {  // All patterns less than kBMMaxShift in length.
4771cb0ef41Sopenharmony_ci    memset(bad_char_occurrence, -1, table_size * sizeof(*bad_char_occurrence));
4781cb0ef41Sopenharmony_ci  } else {
4791cb0ef41Sopenharmony_ci    for (int i = 0; i < table_size; i++) {
4801cb0ef41Sopenharmony_ci      bad_char_occurrence[i] = start - 1;
4811cb0ef41Sopenharmony_ci    }
4821cb0ef41Sopenharmony_ci  }
4831cb0ef41Sopenharmony_ci  for (int i = start; i < pattern_length - 1; i++) {
4841cb0ef41Sopenharmony_ci    PatternChar c = pattern_[i];
4851cb0ef41Sopenharmony_ci    int bucket = (sizeof(PatternChar) == 1) ? c : c % AlphabetSize();
4861cb0ef41Sopenharmony_ci    bad_char_occurrence[bucket] = i;
4871cb0ef41Sopenharmony_ci  }
4881cb0ef41Sopenharmony_ci}
4891cb0ef41Sopenharmony_ci
4901cb0ef41Sopenharmony_ci//---------------------------------------------------------------------
4911cb0ef41Sopenharmony_ci// Linear string search with bailout to BMH.
4921cb0ef41Sopenharmony_ci//---------------------------------------------------------------------
4931cb0ef41Sopenharmony_ci
4941cb0ef41Sopenharmony_ci// Simple linear search for short patterns, which bails out if the string
4951cb0ef41Sopenharmony_ci// isn't found very early in the subject. Upgrades to BoyerMooreHorspool.
4961cb0ef41Sopenharmony_citemplate <typename PatternChar, typename SubjectChar>
4971cb0ef41Sopenharmony_ciint StringSearch<PatternChar, SubjectChar>::InitialSearch(
4981cb0ef41Sopenharmony_ci    StringSearch<PatternChar, SubjectChar>* search,
4991cb0ef41Sopenharmony_ci    base::Vector<const SubjectChar> subject, int index) {
5001cb0ef41Sopenharmony_ci  base::Vector<const PatternChar> pattern = search->pattern_;
5011cb0ef41Sopenharmony_ci  int pattern_length = pattern.length();
5021cb0ef41Sopenharmony_ci  // Badness is a count of how much work we have done.  When we have
5031cb0ef41Sopenharmony_ci  // done enough work we decide it's probably worth switching to a better
5041cb0ef41Sopenharmony_ci  // algorithm.
5051cb0ef41Sopenharmony_ci  int badness = -10 - (pattern_length << 2);
5061cb0ef41Sopenharmony_ci
5071cb0ef41Sopenharmony_ci  // We know our pattern is at least 2 characters, we cache the first so
5081cb0ef41Sopenharmony_ci  // the common case of the first character not matching is faster.
5091cb0ef41Sopenharmony_ci  for (int i = index, n = subject.length() - pattern_length; i <= n; i++) {
5101cb0ef41Sopenharmony_ci    badness++;
5111cb0ef41Sopenharmony_ci    if (badness <= 0) {
5121cb0ef41Sopenharmony_ci      i = FindFirstCharacter(pattern, subject, i);
5131cb0ef41Sopenharmony_ci      if (i == -1) return -1;
5141cb0ef41Sopenharmony_ci      DCHECK_LE(i, n);
5151cb0ef41Sopenharmony_ci      int j = 1;
5161cb0ef41Sopenharmony_ci      do {
5171cb0ef41Sopenharmony_ci        if (pattern[j] != subject[i + j]) {
5181cb0ef41Sopenharmony_ci          break;
5191cb0ef41Sopenharmony_ci        }
5201cb0ef41Sopenharmony_ci        j++;
5211cb0ef41Sopenharmony_ci      } while (j < pattern_length);
5221cb0ef41Sopenharmony_ci      if (j == pattern_length) {
5231cb0ef41Sopenharmony_ci        return i;
5241cb0ef41Sopenharmony_ci      }
5251cb0ef41Sopenharmony_ci      badness += j;
5261cb0ef41Sopenharmony_ci    } else {
5271cb0ef41Sopenharmony_ci      search->PopulateBoyerMooreHorspoolTable();
5281cb0ef41Sopenharmony_ci      search->strategy_ = &BoyerMooreHorspoolSearch;
5291cb0ef41Sopenharmony_ci      return BoyerMooreHorspoolSearch(search, subject, i);
5301cb0ef41Sopenharmony_ci    }
5311cb0ef41Sopenharmony_ci  }
5321cb0ef41Sopenharmony_ci  return -1;
5331cb0ef41Sopenharmony_ci}
5341cb0ef41Sopenharmony_ci
5351cb0ef41Sopenharmony_ci// Perform a a single stand-alone search.
5361cb0ef41Sopenharmony_ci// If searching multiple times for the same pattern, a search
5371cb0ef41Sopenharmony_ci// object should be constructed once and the Search function then called
5381cb0ef41Sopenharmony_ci// for each search.
5391cb0ef41Sopenharmony_citemplate <typename SubjectChar, typename PatternChar>
5401cb0ef41Sopenharmony_ciint SearchString(Isolate* isolate, base::Vector<const SubjectChar> subject,
5411cb0ef41Sopenharmony_ci                 base::Vector<const PatternChar> pattern, int start_index) {
5421cb0ef41Sopenharmony_ci  StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
5431cb0ef41Sopenharmony_ci  return search.Search(subject, start_index);
5441cb0ef41Sopenharmony_ci}
5451cb0ef41Sopenharmony_ci
5461cb0ef41Sopenharmony_ci// A wrapper function around SearchString that wraps raw pointers to the subject
5471cb0ef41Sopenharmony_ci// and pattern as vectors before calling SearchString. Used from the
5481cb0ef41Sopenharmony_ci// StringIndexOf builtin.
5491cb0ef41Sopenharmony_citemplate <typename SubjectChar, typename PatternChar>
5501cb0ef41Sopenharmony_ciintptr_t SearchStringRaw(Isolate* isolate, const SubjectChar* subject_ptr,
5511cb0ef41Sopenharmony_ci                         int subject_length, const PatternChar* pattern_ptr,
5521cb0ef41Sopenharmony_ci                         int pattern_length, int start_index) {
5531cb0ef41Sopenharmony_ci  DisallowGarbageCollection no_gc;
5541cb0ef41Sopenharmony_ci  base::Vector<const SubjectChar> subject(subject_ptr, subject_length);
5551cb0ef41Sopenharmony_ci  base::Vector<const PatternChar> pattern(pattern_ptr, pattern_length);
5561cb0ef41Sopenharmony_ci  return SearchString(isolate, subject, pattern, start_index);
5571cb0ef41Sopenharmony_ci}
5581cb0ef41Sopenharmony_ci
5591cb0ef41Sopenharmony_ci}  // namespace internal
5601cb0ef41Sopenharmony_ci}  // namespace v8
5611cb0ef41Sopenharmony_ci
5621cb0ef41Sopenharmony_ci#endif  // V8_STRINGS_STRING_SEARCH_H_
563