11cb0ef41Sopenharmony_ci// Copyright 2019 the V8 project authors. All rights reserved.
21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be
31cb0ef41Sopenharmony_ci// found in the LICENSE file.
41cb0ef41Sopenharmony_ci
51cb0ef41Sopenharmony_ci#ifndef V8_REGEXP_REGEXP_COMPILER_H_
61cb0ef41Sopenharmony_ci#define V8_REGEXP_REGEXP_COMPILER_H_
71cb0ef41Sopenharmony_ci
81cb0ef41Sopenharmony_ci#include <bitset>
91cb0ef41Sopenharmony_ci
101cb0ef41Sopenharmony_ci#include "src/base/small-vector.h"
111cb0ef41Sopenharmony_ci#include "src/base/strings.h"
121cb0ef41Sopenharmony_ci#include "src/regexp/regexp-flags.h"
131cb0ef41Sopenharmony_ci#include "src/regexp/regexp-nodes.h"
141cb0ef41Sopenharmony_ci
151cb0ef41Sopenharmony_cinamespace v8 {
161cb0ef41Sopenharmony_cinamespace internal {
171cb0ef41Sopenharmony_ci
181cb0ef41Sopenharmony_ciclass DynamicBitSet;
191cb0ef41Sopenharmony_ciclass Isolate;
201cb0ef41Sopenharmony_ci
211cb0ef41Sopenharmony_cinamespace regexp_compiler_constants {
221cb0ef41Sopenharmony_ci
231cb0ef41Sopenharmony_ci// The '2' variant is has inclusive from and exclusive to.
241cb0ef41Sopenharmony_ci// This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
251cb0ef41Sopenharmony_ci// which include WhiteSpace (7.2) or LineTerminator (7.3) values.
261cb0ef41Sopenharmony_ciconstexpr base::uc32 kRangeEndMarker = 0x110000;
271cb0ef41Sopenharmony_ciconstexpr int kSpaceRanges[] = {
281cb0ef41Sopenharmony_ci    '\t',   '\r' + 1, ' ',    ' ' + 1, 0x00A0, 0x00A1, 0x1680,
291cb0ef41Sopenharmony_ci    0x1681, 0x2000,   0x200B, 0x2028,  0x202A, 0x202F, 0x2030,
301cb0ef41Sopenharmony_ci    0x205F, 0x2060,   0x3000, 0x3001,  0xFEFF, 0xFF00, kRangeEndMarker};
311cb0ef41Sopenharmony_ciconstexpr int kSpaceRangeCount = arraysize(kSpaceRanges);
321cb0ef41Sopenharmony_ci
331cb0ef41Sopenharmony_ciconstexpr int kWordRanges[] = {'0',     '9' + 1, 'A',     'Z' + 1,        '_',
341cb0ef41Sopenharmony_ci                               '_' + 1, 'a',     'z' + 1, kRangeEndMarker};
351cb0ef41Sopenharmony_ciconstexpr int kWordRangeCount = arraysize(kWordRanges);
361cb0ef41Sopenharmony_ciconstexpr int kDigitRanges[] = {'0', '9' + 1, kRangeEndMarker};
371cb0ef41Sopenharmony_ciconstexpr int kDigitRangeCount = arraysize(kDigitRanges);
381cb0ef41Sopenharmony_ciconstexpr int kSurrogateRanges[] = {kLeadSurrogateStart,
391cb0ef41Sopenharmony_ci                                    kLeadSurrogateStart + 1, kRangeEndMarker};
401cb0ef41Sopenharmony_ciconstexpr int kSurrogateRangeCount = arraysize(kSurrogateRanges);
411cb0ef41Sopenharmony_ciconstexpr int kLineTerminatorRanges[] = {0x000A, 0x000B, 0x000D,         0x000E,
421cb0ef41Sopenharmony_ci                                         0x2028, 0x202A, kRangeEndMarker};
431cb0ef41Sopenharmony_ciconstexpr int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges);
441cb0ef41Sopenharmony_ci
451cb0ef41Sopenharmony_ci// More makes code generation slower, less makes V8 benchmark score lower.
461cb0ef41Sopenharmony_ciconstexpr int kMaxLookaheadForBoyerMoore = 8;
471cb0ef41Sopenharmony_ci// In a 3-character pattern you can maximally step forwards 3 characters
481cb0ef41Sopenharmony_ci// at a time, which is not always enough to pay for the extra logic.
491cb0ef41Sopenharmony_ciconstexpr int kPatternTooShortForBoyerMoore = 2;
501cb0ef41Sopenharmony_ci
511cb0ef41Sopenharmony_ci}  // namespace regexp_compiler_constants
521cb0ef41Sopenharmony_ci
531cb0ef41Sopenharmony_ciinline bool NeedsUnicodeCaseEquivalents(RegExpFlags flags) {
541cb0ef41Sopenharmony_ci  // Both unicode and ignore_case flags are set. We need to use ICU to find
551cb0ef41Sopenharmony_ci  // the closure over case equivalents.
561cb0ef41Sopenharmony_ci  return IsUnicode(flags) && IsIgnoreCase(flags);
571cb0ef41Sopenharmony_ci}
581cb0ef41Sopenharmony_ci
591cb0ef41Sopenharmony_ci// Details of a quick mask-compare check that can look ahead in the
601cb0ef41Sopenharmony_ci// input stream.
611cb0ef41Sopenharmony_ciclass QuickCheckDetails {
621cb0ef41Sopenharmony_ci public:
631cb0ef41Sopenharmony_ci  QuickCheckDetails()
641cb0ef41Sopenharmony_ci      : characters_(0), mask_(0), value_(0), cannot_match_(false) {}
651cb0ef41Sopenharmony_ci  explicit QuickCheckDetails(int characters)
661cb0ef41Sopenharmony_ci      : characters_(characters), mask_(0), value_(0), cannot_match_(false) {}
671cb0ef41Sopenharmony_ci  bool Rationalize(bool one_byte);
681cb0ef41Sopenharmony_ci  // Merge in the information from another branch of an alternation.
691cb0ef41Sopenharmony_ci  void Merge(QuickCheckDetails* other, int from_index);
701cb0ef41Sopenharmony_ci  // Advance the current position by some amount.
711cb0ef41Sopenharmony_ci  void Advance(int by, bool one_byte);
721cb0ef41Sopenharmony_ci  void Clear();
731cb0ef41Sopenharmony_ci  bool cannot_match() { return cannot_match_; }
741cb0ef41Sopenharmony_ci  void set_cannot_match() { cannot_match_ = true; }
751cb0ef41Sopenharmony_ci  struct Position {
761cb0ef41Sopenharmony_ci    Position() : mask(0), value(0), determines_perfectly(false) {}
771cb0ef41Sopenharmony_ci    base::uc32 mask;
781cb0ef41Sopenharmony_ci    base::uc32 value;
791cb0ef41Sopenharmony_ci    bool determines_perfectly;
801cb0ef41Sopenharmony_ci  };
811cb0ef41Sopenharmony_ci  int characters() { return characters_; }
821cb0ef41Sopenharmony_ci  void set_characters(int characters) { characters_ = characters; }
831cb0ef41Sopenharmony_ci  Position* positions(int index) {
841cb0ef41Sopenharmony_ci    DCHECK_LE(0, index);
851cb0ef41Sopenharmony_ci    DCHECK_GT(characters_, index);
861cb0ef41Sopenharmony_ci    return positions_ + index;
871cb0ef41Sopenharmony_ci  }
881cb0ef41Sopenharmony_ci  uint32_t mask() { return mask_; }
891cb0ef41Sopenharmony_ci  uint32_t value() { return value_; }
901cb0ef41Sopenharmony_ci
911cb0ef41Sopenharmony_ci private:
921cb0ef41Sopenharmony_ci  // How many characters do we have quick check information from.  This is
931cb0ef41Sopenharmony_ci  // the same for all branches of a choice node.
941cb0ef41Sopenharmony_ci  int characters_;
951cb0ef41Sopenharmony_ci  Position positions_[4];
961cb0ef41Sopenharmony_ci  // These values are the condensate of the above array after Rationalize().
971cb0ef41Sopenharmony_ci  uint32_t mask_;
981cb0ef41Sopenharmony_ci  uint32_t value_;
991cb0ef41Sopenharmony_ci  // If set to true, there is no way this quick check can match at all.
1001cb0ef41Sopenharmony_ci  // E.g., if it requires to be at the start of the input, and isn't.
1011cb0ef41Sopenharmony_ci  bool cannot_match_;
1021cb0ef41Sopenharmony_ci};
1031cb0ef41Sopenharmony_ci
1041cb0ef41Sopenharmony_ci// Improve the speed that we scan for an initial point where a non-anchored
1051cb0ef41Sopenharmony_ci// regexp can match by using a Boyer-Moore-like table. This is done by
1061cb0ef41Sopenharmony_ci// identifying non-greedy non-capturing loops in the nodes that eat any
1071cb0ef41Sopenharmony_ci// character one at a time.  For example in the middle of the regexp
1081cb0ef41Sopenharmony_ci// /foo[\s\S]*?bar/ we find such a loop.  There is also such a loop implicitly
1091cb0ef41Sopenharmony_ci// inserted at the start of any non-anchored regexp.
1101cb0ef41Sopenharmony_ci//
1111cb0ef41Sopenharmony_ci// When we have found such a loop we look ahead in the nodes to find the set of
1121cb0ef41Sopenharmony_ci// characters that can come at given distances. For example for the regexp
1131cb0ef41Sopenharmony_ci// /.?foo/ we know that there are at least 3 characters ahead of us, and the
1141cb0ef41Sopenharmony_ci// sets of characters that can occur are [any, [f, o], [o]]. We find a range in
1151cb0ef41Sopenharmony_ci// the lookahead info where the set of characters is reasonably constrained. In
1161cb0ef41Sopenharmony_ci// our example this is from index 1 to 2 (0 is not constrained). We can now
1171cb0ef41Sopenharmony_ci// look 3 characters ahead and if we don't find one of [f, o] (the union of
1181cb0ef41Sopenharmony_ci// [f, o] and [o]) then we can skip forwards by the range size (in this case 2).
1191cb0ef41Sopenharmony_ci//
1201cb0ef41Sopenharmony_ci// For Unicode input strings we do the same, but modulo 128.
1211cb0ef41Sopenharmony_ci//
1221cb0ef41Sopenharmony_ci// We also look at the first string fed to the regexp and use that to get a hint
1231cb0ef41Sopenharmony_ci// of the character frequencies in the inputs. This affects the assessment of
1241cb0ef41Sopenharmony_ci// whether the set of characters is 'reasonably constrained'.
1251cb0ef41Sopenharmony_ci//
1261cb0ef41Sopenharmony_ci// We also have another lookahead mechanism (called quick check in the code),
1271cb0ef41Sopenharmony_ci// which uses a wide load of multiple characters followed by a mask and compare
1281cb0ef41Sopenharmony_ci// to determine whether a match is possible at this point.
1291cb0ef41Sopenharmony_cienum ContainedInLattice {
1301cb0ef41Sopenharmony_ci  kNotYet = 0,
1311cb0ef41Sopenharmony_ci  kLatticeIn = 1,
1321cb0ef41Sopenharmony_ci  kLatticeOut = 2,
1331cb0ef41Sopenharmony_ci  kLatticeUnknown = 3  // Can also mean both in and out.
1341cb0ef41Sopenharmony_ci};
1351cb0ef41Sopenharmony_ci
1361cb0ef41Sopenharmony_ciinline ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b) {
1371cb0ef41Sopenharmony_ci  return static_cast<ContainedInLattice>(a | b);
1381cb0ef41Sopenharmony_ci}
1391cb0ef41Sopenharmony_ci
1401cb0ef41Sopenharmony_ciclass BoyerMoorePositionInfo : public ZoneObject {
1411cb0ef41Sopenharmony_ci public:
1421cb0ef41Sopenharmony_ci  bool at(int i) const { return map_[i]; }
1431cb0ef41Sopenharmony_ci
1441cb0ef41Sopenharmony_ci  static constexpr int kMapSize = 128;
1451cb0ef41Sopenharmony_ci  static constexpr int kMask = kMapSize - 1;
1461cb0ef41Sopenharmony_ci
1471cb0ef41Sopenharmony_ci  int map_count() const { return map_count_; }
1481cb0ef41Sopenharmony_ci
1491cb0ef41Sopenharmony_ci  void Set(int character);
1501cb0ef41Sopenharmony_ci  void SetInterval(const Interval& interval);
1511cb0ef41Sopenharmony_ci  void SetAll();
1521cb0ef41Sopenharmony_ci
1531cb0ef41Sopenharmony_ci  bool is_non_word() { return w_ == kLatticeOut; }
1541cb0ef41Sopenharmony_ci  bool is_word() { return w_ == kLatticeIn; }
1551cb0ef41Sopenharmony_ci
1561cb0ef41Sopenharmony_ci  using Bitset = std::bitset<kMapSize>;
1571cb0ef41Sopenharmony_ci  Bitset raw_bitset() const { return map_; }
1581cb0ef41Sopenharmony_ci
1591cb0ef41Sopenharmony_ci private:
1601cb0ef41Sopenharmony_ci  Bitset map_;
1611cb0ef41Sopenharmony_ci  int map_count_ = 0;               // Number of set bits in the map.
1621cb0ef41Sopenharmony_ci  ContainedInLattice w_ = kNotYet;  // The \w character class.
1631cb0ef41Sopenharmony_ci};
1641cb0ef41Sopenharmony_ci
1651cb0ef41Sopenharmony_ciclass BoyerMooreLookahead : public ZoneObject {
1661cb0ef41Sopenharmony_ci public:
1671cb0ef41Sopenharmony_ci  BoyerMooreLookahead(int length, RegExpCompiler* compiler, Zone* zone);
1681cb0ef41Sopenharmony_ci
1691cb0ef41Sopenharmony_ci  int length() { return length_; }
1701cb0ef41Sopenharmony_ci  int max_char() { return max_char_; }
1711cb0ef41Sopenharmony_ci  RegExpCompiler* compiler() { return compiler_; }
1721cb0ef41Sopenharmony_ci
1731cb0ef41Sopenharmony_ci  int Count(int map_number) { return bitmaps_->at(map_number)->map_count(); }
1741cb0ef41Sopenharmony_ci
1751cb0ef41Sopenharmony_ci  BoyerMoorePositionInfo* at(int i) { return bitmaps_->at(i); }
1761cb0ef41Sopenharmony_ci
1771cb0ef41Sopenharmony_ci  void Set(int map_number, int character) {
1781cb0ef41Sopenharmony_ci    if (character > max_char_) return;
1791cb0ef41Sopenharmony_ci    BoyerMoorePositionInfo* info = bitmaps_->at(map_number);
1801cb0ef41Sopenharmony_ci    info->Set(character);
1811cb0ef41Sopenharmony_ci  }
1821cb0ef41Sopenharmony_ci
1831cb0ef41Sopenharmony_ci  void SetInterval(int map_number, const Interval& interval) {
1841cb0ef41Sopenharmony_ci    if (interval.from() > max_char_) return;
1851cb0ef41Sopenharmony_ci    BoyerMoorePositionInfo* info = bitmaps_->at(map_number);
1861cb0ef41Sopenharmony_ci    if (interval.to() > max_char_) {
1871cb0ef41Sopenharmony_ci      info->SetInterval(Interval(interval.from(), max_char_));
1881cb0ef41Sopenharmony_ci    } else {
1891cb0ef41Sopenharmony_ci      info->SetInterval(interval);
1901cb0ef41Sopenharmony_ci    }
1911cb0ef41Sopenharmony_ci  }
1921cb0ef41Sopenharmony_ci
1931cb0ef41Sopenharmony_ci  void SetAll(int map_number) { bitmaps_->at(map_number)->SetAll(); }
1941cb0ef41Sopenharmony_ci
1951cb0ef41Sopenharmony_ci  void SetRest(int from_map) {
1961cb0ef41Sopenharmony_ci    for (int i = from_map; i < length_; i++) SetAll(i);
1971cb0ef41Sopenharmony_ci  }
1981cb0ef41Sopenharmony_ci  void EmitSkipInstructions(RegExpMacroAssembler* masm);
1991cb0ef41Sopenharmony_ci
2001cb0ef41Sopenharmony_ci private:
2011cb0ef41Sopenharmony_ci  // This is the value obtained by EatsAtLeast.  If we do not have at least this
2021cb0ef41Sopenharmony_ci  // many characters left in the sample string then the match is bound to fail.
2031cb0ef41Sopenharmony_ci  // Therefore it is OK to read a character this far ahead of the current match
2041cb0ef41Sopenharmony_ci  // point.
2051cb0ef41Sopenharmony_ci  int length_;
2061cb0ef41Sopenharmony_ci  RegExpCompiler* compiler_;
2071cb0ef41Sopenharmony_ci  // 0xff for Latin1, 0xffff for UTF-16.
2081cb0ef41Sopenharmony_ci  int max_char_;
2091cb0ef41Sopenharmony_ci  ZoneList<BoyerMoorePositionInfo*>* bitmaps_;
2101cb0ef41Sopenharmony_ci
2111cb0ef41Sopenharmony_ci  int GetSkipTable(int min_lookahead, int max_lookahead,
2121cb0ef41Sopenharmony_ci                   Handle<ByteArray> boolean_skip_table);
2131cb0ef41Sopenharmony_ci  bool FindWorthwhileInterval(int* from, int* to);
2141cb0ef41Sopenharmony_ci  int FindBestInterval(int max_number_of_chars, int old_biggest_points,
2151cb0ef41Sopenharmony_ci                       int* from, int* to);
2161cb0ef41Sopenharmony_ci};
2171cb0ef41Sopenharmony_ci
2181cb0ef41Sopenharmony_ci// There are many ways to generate code for a node.  This class encapsulates
2191cb0ef41Sopenharmony_ci// the current way we should be generating.  In other words it encapsulates
2201cb0ef41Sopenharmony_ci// the current state of the code generator.  The effect of this is that we
2211cb0ef41Sopenharmony_ci// generate code for paths that the matcher can take through the regular
2221cb0ef41Sopenharmony_ci// expression.  A given node in the regexp can be code-generated several times
2231cb0ef41Sopenharmony_ci// as it can be part of several traces.  For example for the regexp:
2241cb0ef41Sopenharmony_ci// /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part
2251cb0ef41Sopenharmony_ci// of the foo-bar-baz trace and once as part of the foo-ip-baz trace.  The code
2261cb0ef41Sopenharmony_ci// to match foo is generated only once (the traces have a common prefix).  The
2271cb0ef41Sopenharmony_ci// code to store the capture is deferred and generated (twice) after the places
2281cb0ef41Sopenharmony_ci// where baz has been matched.
2291cb0ef41Sopenharmony_ciclass Trace {
2301cb0ef41Sopenharmony_ci public:
2311cb0ef41Sopenharmony_ci  // A value for a property that is either known to be true, know to be false,
2321cb0ef41Sopenharmony_ci  // or not known.
2331cb0ef41Sopenharmony_ci  enum TriBool { UNKNOWN = -1, FALSE_VALUE = 0, TRUE_VALUE = 1 };
2341cb0ef41Sopenharmony_ci
2351cb0ef41Sopenharmony_ci  class DeferredAction {
2361cb0ef41Sopenharmony_ci   public:
2371cb0ef41Sopenharmony_ci    DeferredAction(ActionNode::ActionType action_type, int reg)
2381cb0ef41Sopenharmony_ci        : action_type_(action_type), reg_(reg), next_(nullptr) {}
2391cb0ef41Sopenharmony_ci    DeferredAction* next() { return next_; }
2401cb0ef41Sopenharmony_ci    bool Mentions(int reg);
2411cb0ef41Sopenharmony_ci    int reg() { return reg_; }
2421cb0ef41Sopenharmony_ci    ActionNode::ActionType action_type() { return action_type_; }
2431cb0ef41Sopenharmony_ci
2441cb0ef41Sopenharmony_ci   private:
2451cb0ef41Sopenharmony_ci    ActionNode::ActionType action_type_;
2461cb0ef41Sopenharmony_ci    int reg_;
2471cb0ef41Sopenharmony_ci    DeferredAction* next_;
2481cb0ef41Sopenharmony_ci    friend class Trace;
2491cb0ef41Sopenharmony_ci  };
2501cb0ef41Sopenharmony_ci
2511cb0ef41Sopenharmony_ci  class DeferredCapture : public DeferredAction {
2521cb0ef41Sopenharmony_ci   public:
2531cb0ef41Sopenharmony_ci    DeferredCapture(int reg, bool is_capture, Trace* trace)
2541cb0ef41Sopenharmony_ci        : DeferredAction(ActionNode::STORE_POSITION, reg),
2551cb0ef41Sopenharmony_ci          cp_offset_(trace->cp_offset()),
2561cb0ef41Sopenharmony_ci          is_capture_(is_capture) {}
2571cb0ef41Sopenharmony_ci    int cp_offset() { return cp_offset_; }
2581cb0ef41Sopenharmony_ci    bool is_capture() { return is_capture_; }
2591cb0ef41Sopenharmony_ci
2601cb0ef41Sopenharmony_ci   private:
2611cb0ef41Sopenharmony_ci    int cp_offset_;
2621cb0ef41Sopenharmony_ci    bool is_capture_;
2631cb0ef41Sopenharmony_ci    void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
2641cb0ef41Sopenharmony_ci  };
2651cb0ef41Sopenharmony_ci
2661cb0ef41Sopenharmony_ci  class DeferredSetRegisterForLoop : public DeferredAction {
2671cb0ef41Sopenharmony_ci   public:
2681cb0ef41Sopenharmony_ci    DeferredSetRegisterForLoop(int reg, int value)
2691cb0ef41Sopenharmony_ci        : DeferredAction(ActionNode::SET_REGISTER_FOR_LOOP, reg),
2701cb0ef41Sopenharmony_ci          value_(value) {}
2711cb0ef41Sopenharmony_ci    int value() { return value_; }
2721cb0ef41Sopenharmony_ci
2731cb0ef41Sopenharmony_ci   private:
2741cb0ef41Sopenharmony_ci    int value_;
2751cb0ef41Sopenharmony_ci  };
2761cb0ef41Sopenharmony_ci
2771cb0ef41Sopenharmony_ci  class DeferredClearCaptures : public DeferredAction {
2781cb0ef41Sopenharmony_ci   public:
2791cb0ef41Sopenharmony_ci    explicit DeferredClearCaptures(Interval range)
2801cb0ef41Sopenharmony_ci        : DeferredAction(ActionNode::CLEAR_CAPTURES, -1), range_(range) {}
2811cb0ef41Sopenharmony_ci    Interval range() { return range_; }
2821cb0ef41Sopenharmony_ci
2831cb0ef41Sopenharmony_ci   private:
2841cb0ef41Sopenharmony_ci    Interval range_;
2851cb0ef41Sopenharmony_ci  };
2861cb0ef41Sopenharmony_ci
2871cb0ef41Sopenharmony_ci  class DeferredIncrementRegister : public DeferredAction {
2881cb0ef41Sopenharmony_ci   public:
2891cb0ef41Sopenharmony_ci    explicit DeferredIncrementRegister(int reg)
2901cb0ef41Sopenharmony_ci        : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) {}
2911cb0ef41Sopenharmony_ci  };
2921cb0ef41Sopenharmony_ci
2931cb0ef41Sopenharmony_ci  Trace()
2941cb0ef41Sopenharmony_ci      : cp_offset_(0),
2951cb0ef41Sopenharmony_ci        actions_(nullptr),
2961cb0ef41Sopenharmony_ci        backtrack_(nullptr),
2971cb0ef41Sopenharmony_ci        stop_node_(nullptr),
2981cb0ef41Sopenharmony_ci        loop_label_(nullptr),
2991cb0ef41Sopenharmony_ci        characters_preloaded_(0),
3001cb0ef41Sopenharmony_ci        bound_checked_up_to_(0),
3011cb0ef41Sopenharmony_ci        flush_budget_(100),
3021cb0ef41Sopenharmony_ci        at_start_(UNKNOWN) {}
3031cb0ef41Sopenharmony_ci
3041cb0ef41Sopenharmony_ci  // End the trace.  This involves flushing the deferred actions in the trace
3051cb0ef41Sopenharmony_ci  // and pushing a backtrack location onto the backtrack stack.  Once this is
3061cb0ef41Sopenharmony_ci  // done we can start a new trace or go to one that has already been
3071cb0ef41Sopenharmony_ci  // generated.
3081cb0ef41Sopenharmony_ci  void Flush(RegExpCompiler* compiler, RegExpNode* successor);
3091cb0ef41Sopenharmony_ci  int cp_offset() { return cp_offset_; }
3101cb0ef41Sopenharmony_ci  DeferredAction* actions() { return actions_; }
3111cb0ef41Sopenharmony_ci  // A trivial trace is one that has no deferred actions or other state that
3121cb0ef41Sopenharmony_ci  // affects the assumptions used when generating code.  There is no recorded
3131cb0ef41Sopenharmony_ci  // backtrack location in a trivial trace, so with a trivial trace we will
3141cb0ef41Sopenharmony_ci  // generate code that, on a failure to match, gets the backtrack location
3151cb0ef41Sopenharmony_ci  // from the backtrack stack rather than using a direct jump instruction.  We
3161cb0ef41Sopenharmony_ci  // always start code generation with a trivial trace and non-trivial traces
3171cb0ef41Sopenharmony_ci  // are created as we emit code for nodes or add to the list of deferred
3181cb0ef41Sopenharmony_ci  // actions in the trace.  The location of the code generated for a node using
3191cb0ef41Sopenharmony_ci  // a trivial trace is recorded in a label in the node so that gotos can be
3201cb0ef41Sopenharmony_ci  // generated to that code.
3211cb0ef41Sopenharmony_ci  bool is_trivial() {
3221cb0ef41Sopenharmony_ci    return backtrack_ == nullptr && actions_ == nullptr && cp_offset_ == 0 &&
3231cb0ef41Sopenharmony_ci           characters_preloaded_ == 0 && bound_checked_up_to_ == 0 &&
3241cb0ef41Sopenharmony_ci           quick_check_performed_.characters() == 0 && at_start_ == UNKNOWN;
3251cb0ef41Sopenharmony_ci  }
3261cb0ef41Sopenharmony_ci  TriBool at_start() { return at_start_; }
3271cb0ef41Sopenharmony_ci  void set_at_start(TriBool at_start) { at_start_ = at_start; }
3281cb0ef41Sopenharmony_ci  Label* backtrack() { return backtrack_; }
3291cb0ef41Sopenharmony_ci  Label* loop_label() { return loop_label_; }
3301cb0ef41Sopenharmony_ci  RegExpNode* stop_node() { return stop_node_; }
3311cb0ef41Sopenharmony_ci  int characters_preloaded() { return characters_preloaded_; }
3321cb0ef41Sopenharmony_ci  int bound_checked_up_to() { return bound_checked_up_to_; }
3331cb0ef41Sopenharmony_ci  int flush_budget() { return flush_budget_; }
3341cb0ef41Sopenharmony_ci  QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; }
3351cb0ef41Sopenharmony_ci  bool mentions_reg(int reg);
3361cb0ef41Sopenharmony_ci  // Returns true if a deferred position store exists to the specified
3371cb0ef41Sopenharmony_ci  // register and stores the offset in the out-parameter.  Otherwise
3381cb0ef41Sopenharmony_ci  // returns false.
3391cb0ef41Sopenharmony_ci  bool GetStoredPosition(int reg, int* cp_offset);
3401cb0ef41Sopenharmony_ci  // These set methods and AdvanceCurrentPositionInTrace should be used only on
3411cb0ef41Sopenharmony_ci  // new traces - the intention is that traces are immutable after creation.
3421cb0ef41Sopenharmony_ci  void add_action(DeferredAction* new_action) {
3431cb0ef41Sopenharmony_ci    DCHECK(new_action->next_ == nullptr);
3441cb0ef41Sopenharmony_ci    new_action->next_ = actions_;
3451cb0ef41Sopenharmony_ci    actions_ = new_action;
3461cb0ef41Sopenharmony_ci  }
3471cb0ef41Sopenharmony_ci  void set_backtrack(Label* backtrack) { backtrack_ = backtrack; }
3481cb0ef41Sopenharmony_ci  void set_stop_node(RegExpNode* node) { stop_node_ = node; }
3491cb0ef41Sopenharmony_ci  void set_loop_label(Label* label) { loop_label_ = label; }
3501cb0ef41Sopenharmony_ci  void set_characters_preloaded(int count) { characters_preloaded_ = count; }
3511cb0ef41Sopenharmony_ci  void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; }
3521cb0ef41Sopenharmony_ci  void set_flush_budget(int to) { flush_budget_ = to; }
3531cb0ef41Sopenharmony_ci  void set_quick_check_performed(QuickCheckDetails* d) {
3541cb0ef41Sopenharmony_ci    quick_check_performed_ = *d;
3551cb0ef41Sopenharmony_ci  }
3561cb0ef41Sopenharmony_ci  void InvalidateCurrentCharacter();
3571cb0ef41Sopenharmony_ci  void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler);
3581cb0ef41Sopenharmony_ci
3591cb0ef41Sopenharmony_ci private:
3601cb0ef41Sopenharmony_ci  int FindAffectedRegisters(DynamicBitSet* affected_registers, Zone* zone);
3611cb0ef41Sopenharmony_ci  void PerformDeferredActions(RegExpMacroAssembler* macro, int max_register,
3621cb0ef41Sopenharmony_ci                              const DynamicBitSet& affected_registers,
3631cb0ef41Sopenharmony_ci                              DynamicBitSet* registers_to_pop,
3641cb0ef41Sopenharmony_ci                              DynamicBitSet* registers_to_clear, Zone* zone);
3651cb0ef41Sopenharmony_ci  void RestoreAffectedRegisters(RegExpMacroAssembler* macro, int max_register,
3661cb0ef41Sopenharmony_ci                                const DynamicBitSet& registers_to_pop,
3671cb0ef41Sopenharmony_ci                                const DynamicBitSet& registers_to_clear);
3681cb0ef41Sopenharmony_ci  int cp_offset_;
3691cb0ef41Sopenharmony_ci  DeferredAction* actions_;
3701cb0ef41Sopenharmony_ci  Label* backtrack_;
3711cb0ef41Sopenharmony_ci  RegExpNode* stop_node_;
3721cb0ef41Sopenharmony_ci  Label* loop_label_;
3731cb0ef41Sopenharmony_ci  int characters_preloaded_;
3741cb0ef41Sopenharmony_ci  int bound_checked_up_to_;
3751cb0ef41Sopenharmony_ci  QuickCheckDetails quick_check_performed_;
3761cb0ef41Sopenharmony_ci  int flush_budget_;
3771cb0ef41Sopenharmony_ci  TriBool at_start_;
3781cb0ef41Sopenharmony_ci};
3791cb0ef41Sopenharmony_ci
3801cb0ef41Sopenharmony_ciclass GreedyLoopState {
3811cb0ef41Sopenharmony_ci public:
3821cb0ef41Sopenharmony_ci  explicit GreedyLoopState(bool not_at_start);
3831cb0ef41Sopenharmony_ci
3841cb0ef41Sopenharmony_ci  Label* label() { return &label_; }
3851cb0ef41Sopenharmony_ci  Trace* counter_backtrack_trace() { return &counter_backtrack_trace_; }
3861cb0ef41Sopenharmony_ci
3871cb0ef41Sopenharmony_ci private:
3881cb0ef41Sopenharmony_ci  Label label_;
3891cb0ef41Sopenharmony_ci  Trace counter_backtrack_trace_;
3901cb0ef41Sopenharmony_ci};
3911cb0ef41Sopenharmony_ci
3921cb0ef41Sopenharmony_cistruct PreloadState {
3931cb0ef41Sopenharmony_ci  static const int kEatsAtLeastNotYetInitialized = -1;
3941cb0ef41Sopenharmony_ci  bool preload_is_current_;
3951cb0ef41Sopenharmony_ci  bool preload_has_checked_bounds_;
3961cb0ef41Sopenharmony_ci  int preload_characters_;
3971cb0ef41Sopenharmony_ci  int eats_at_least_;
3981cb0ef41Sopenharmony_ci  void init() { eats_at_least_ = kEatsAtLeastNotYetInitialized; }
3991cb0ef41Sopenharmony_ci};
4001cb0ef41Sopenharmony_ci
4011cb0ef41Sopenharmony_ci// Analysis performs assertion propagation and computes eats_at_least_ values.
4021cb0ef41Sopenharmony_ci// See the comments on AssertionPropagator and EatsAtLeastPropagator for more
4031cb0ef41Sopenharmony_ci// details.
4041cb0ef41Sopenharmony_ciRegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpFlags flags,
4051cb0ef41Sopenharmony_ci                          RegExpNode* node);
4061cb0ef41Sopenharmony_ci
4071cb0ef41Sopenharmony_ciclass FrequencyCollator {
4081cb0ef41Sopenharmony_ci public:
4091cb0ef41Sopenharmony_ci  FrequencyCollator() : total_samples_(0) {
4101cb0ef41Sopenharmony_ci    for (int i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
4111cb0ef41Sopenharmony_ci      frequencies_[i] = CharacterFrequency(i);
4121cb0ef41Sopenharmony_ci    }
4131cb0ef41Sopenharmony_ci  }
4141cb0ef41Sopenharmony_ci
4151cb0ef41Sopenharmony_ci  void CountCharacter(int character) {
4161cb0ef41Sopenharmony_ci    int index = (character & RegExpMacroAssembler::kTableMask);
4171cb0ef41Sopenharmony_ci    frequencies_[index].Increment();
4181cb0ef41Sopenharmony_ci    total_samples_++;
4191cb0ef41Sopenharmony_ci  }
4201cb0ef41Sopenharmony_ci
4211cb0ef41Sopenharmony_ci  // Does not measure in percent, but rather per-128 (the table size from the
4221cb0ef41Sopenharmony_ci  // regexp macro assembler).
4231cb0ef41Sopenharmony_ci  int Frequency(int in_character) {
4241cb0ef41Sopenharmony_ci    DCHECK((in_character & RegExpMacroAssembler::kTableMask) == in_character);
4251cb0ef41Sopenharmony_ci    if (total_samples_ < 1) return 1;  // Division by zero.
4261cb0ef41Sopenharmony_ci    int freq_in_per128 =
4271cb0ef41Sopenharmony_ci        (frequencies_[in_character].counter() * 128) / total_samples_;
4281cb0ef41Sopenharmony_ci    return freq_in_per128;
4291cb0ef41Sopenharmony_ci  }
4301cb0ef41Sopenharmony_ci
4311cb0ef41Sopenharmony_ci private:
4321cb0ef41Sopenharmony_ci  class CharacterFrequency {
4331cb0ef41Sopenharmony_ci   public:
4341cb0ef41Sopenharmony_ci    CharacterFrequency() : counter_(0), character_(-1) {}
4351cb0ef41Sopenharmony_ci    explicit CharacterFrequency(int character)
4361cb0ef41Sopenharmony_ci        : counter_(0), character_(character) {}
4371cb0ef41Sopenharmony_ci
4381cb0ef41Sopenharmony_ci    void Increment() { counter_++; }
4391cb0ef41Sopenharmony_ci    int counter() { return counter_; }
4401cb0ef41Sopenharmony_ci    int character() { return character_; }
4411cb0ef41Sopenharmony_ci
4421cb0ef41Sopenharmony_ci   private:
4431cb0ef41Sopenharmony_ci    int counter_;
4441cb0ef41Sopenharmony_ci    int character_;
4451cb0ef41Sopenharmony_ci  };
4461cb0ef41Sopenharmony_ci
4471cb0ef41Sopenharmony_ci private:
4481cb0ef41Sopenharmony_ci  CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
4491cb0ef41Sopenharmony_ci  int total_samples_;
4501cb0ef41Sopenharmony_ci};
4511cb0ef41Sopenharmony_ci
4521cb0ef41Sopenharmony_ciclass RegExpCompiler {
4531cb0ef41Sopenharmony_ci public:
4541cb0ef41Sopenharmony_ci  RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
4551cb0ef41Sopenharmony_ci                 RegExpFlags flags, bool is_one_byte);
4561cb0ef41Sopenharmony_ci
4571cb0ef41Sopenharmony_ci  int AllocateRegister() {
4581cb0ef41Sopenharmony_ci    if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
4591cb0ef41Sopenharmony_ci      reg_exp_too_big_ = true;
4601cb0ef41Sopenharmony_ci      return next_register_;
4611cb0ef41Sopenharmony_ci    }
4621cb0ef41Sopenharmony_ci    return next_register_++;
4631cb0ef41Sopenharmony_ci  }
4641cb0ef41Sopenharmony_ci
4651cb0ef41Sopenharmony_ci  // Lookarounds to match lone surrogates for unicode character class matches
4661cb0ef41Sopenharmony_ci  // are never nested. We can therefore reuse registers.
4671cb0ef41Sopenharmony_ci  int UnicodeLookaroundStackRegister() {
4681cb0ef41Sopenharmony_ci    if (unicode_lookaround_stack_register_ == kNoRegister) {
4691cb0ef41Sopenharmony_ci      unicode_lookaround_stack_register_ = AllocateRegister();
4701cb0ef41Sopenharmony_ci    }
4711cb0ef41Sopenharmony_ci    return unicode_lookaround_stack_register_;
4721cb0ef41Sopenharmony_ci  }
4731cb0ef41Sopenharmony_ci
4741cb0ef41Sopenharmony_ci  int UnicodeLookaroundPositionRegister() {
4751cb0ef41Sopenharmony_ci    if (unicode_lookaround_position_register_ == kNoRegister) {
4761cb0ef41Sopenharmony_ci      unicode_lookaround_position_register_ = AllocateRegister();
4771cb0ef41Sopenharmony_ci    }
4781cb0ef41Sopenharmony_ci    return unicode_lookaround_position_register_;
4791cb0ef41Sopenharmony_ci  }
4801cb0ef41Sopenharmony_ci
4811cb0ef41Sopenharmony_ci  struct CompilationResult final {
4821cb0ef41Sopenharmony_ci    explicit CompilationResult(RegExpError err) : error(err) {}
4831cb0ef41Sopenharmony_ci    CompilationResult(Handle<Object> code, int registers)
4841cb0ef41Sopenharmony_ci        : code(code), num_registers(registers) {}
4851cb0ef41Sopenharmony_ci
4861cb0ef41Sopenharmony_ci    static CompilationResult RegExpTooBig() {
4871cb0ef41Sopenharmony_ci      return CompilationResult(RegExpError::kTooLarge);
4881cb0ef41Sopenharmony_ci    }
4891cb0ef41Sopenharmony_ci
4901cb0ef41Sopenharmony_ci    bool Succeeded() const { return error == RegExpError::kNone; }
4911cb0ef41Sopenharmony_ci
4921cb0ef41Sopenharmony_ci    const RegExpError error = RegExpError::kNone;
4931cb0ef41Sopenharmony_ci    Handle<Object> code;
4941cb0ef41Sopenharmony_ci    int num_registers = 0;
4951cb0ef41Sopenharmony_ci  };
4961cb0ef41Sopenharmony_ci
4971cb0ef41Sopenharmony_ci  CompilationResult Assemble(Isolate* isolate, RegExpMacroAssembler* assembler,
4981cb0ef41Sopenharmony_ci                             RegExpNode* start, int capture_count,
4991cb0ef41Sopenharmony_ci                             Handle<String> pattern);
5001cb0ef41Sopenharmony_ci
5011cb0ef41Sopenharmony_ci  // Preprocessing is the final step of node creation before analysis
5021cb0ef41Sopenharmony_ci  // and assembly. It includes:
5031cb0ef41Sopenharmony_ci  // - Wrapping the body of the regexp in capture 0.
5041cb0ef41Sopenharmony_ci  // - Inserting the implicit .* before/after the regexp if necessary.
5051cb0ef41Sopenharmony_ci  // - If the input is a one-byte string, filtering out nodes that can't match.
5061cb0ef41Sopenharmony_ci  // - Fixing up regexp matches that start within a surrogate pair.
5071cb0ef41Sopenharmony_ci  RegExpNode* PreprocessRegExp(RegExpCompileData* data, RegExpFlags flags,
5081cb0ef41Sopenharmony_ci                               bool is_one_byte);
5091cb0ef41Sopenharmony_ci
5101cb0ef41Sopenharmony_ci  // If the regexp matching starts within a surrogate pair, step back to the
5111cb0ef41Sopenharmony_ci  // lead surrogate and start matching from there.
5121cb0ef41Sopenharmony_ci  RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpNode* on_success);
5131cb0ef41Sopenharmony_ci
5141cb0ef41Sopenharmony_ci  inline void AddWork(RegExpNode* node) {
5151cb0ef41Sopenharmony_ci    if (!node->on_work_list() && !node->label()->is_bound()) {
5161cb0ef41Sopenharmony_ci      node->set_on_work_list(true);
5171cb0ef41Sopenharmony_ci      work_list_->push_back(node);
5181cb0ef41Sopenharmony_ci    }
5191cb0ef41Sopenharmony_ci  }
5201cb0ef41Sopenharmony_ci
5211cb0ef41Sopenharmony_ci  static const int kImplementationOffset = 0;
5221cb0ef41Sopenharmony_ci  static const int kNumberOfRegistersOffset = 0;
5231cb0ef41Sopenharmony_ci  static const int kCodeOffset = 1;
5241cb0ef41Sopenharmony_ci
5251cb0ef41Sopenharmony_ci  RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
5261cb0ef41Sopenharmony_ci  EndNode* accept() { return accept_; }
5271cb0ef41Sopenharmony_ci
5281cb0ef41Sopenharmony_ci  static const int kMaxRecursion = 100;
5291cb0ef41Sopenharmony_ci  inline int recursion_depth() { return recursion_depth_; }
5301cb0ef41Sopenharmony_ci  inline void IncrementRecursionDepth() { recursion_depth_++; }
5311cb0ef41Sopenharmony_ci  inline void DecrementRecursionDepth() { recursion_depth_--; }
5321cb0ef41Sopenharmony_ci
5331cb0ef41Sopenharmony_ci  RegExpFlags flags() const { return flags_; }
5341cb0ef41Sopenharmony_ci
5351cb0ef41Sopenharmony_ci  void SetRegExpTooBig() { reg_exp_too_big_ = true; }
5361cb0ef41Sopenharmony_ci
5371cb0ef41Sopenharmony_ci  inline bool one_byte() { return one_byte_; }
5381cb0ef41Sopenharmony_ci  inline bool optimize() { return optimize_; }
5391cb0ef41Sopenharmony_ci  inline void set_optimize(bool value) { optimize_ = value; }
5401cb0ef41Sopenharmony_ci  inline bool limiting_recursion() { return limiting_recursion_; }
5411cb0ef41Sopenharmony_ci  inline void set_limiting_recursion(bool value) {
5421cb0ef41Sopenharmony_ci    limiting_recursion_ = value;
5431cb0ef41Sopenharmony_ci  }
5441cb0ef41Sopenharmony_ci  bool read_backward() { return read_backward_; }
5451cb0ef41Sopenharmony_ci  void set_read_backward(bool value) { read_backward_ = value; }
5461cb0ef41Sopenharmony_ci  FrequencyCollator* frequency_collator() { return &frequency_collator_; }
5471cb0ef41Sopenharmony_ci
5481cb0ef41Sopenharmony_ci  int current_expansion_factor() { return current_expansion_factor_; }
5491cb0ef41Sopenharmony_ci  void set_current_expansion_factor(int value) {
5501cb0ef41Sopenharmony_ci    current_expansion_factor_ = value;
5511cb0ef41Sopenharmony_ci  }
5521cb0ef41Sopenharmony_ci
5531cb0ef41Sopenharmony_ci  // The recursive nature of ToNode node generation means we may run into stack
5541cb0ef41Sopenharmony_ci  // overflow issues. We introduce periodic checks to detect these, and the
5551cb0ef41Sopenharmony_ci  // tick counter helps limit overhead of these checks.
5561cb0ef41Sopenharmony_ci  // TODO(jgruber): This is super hacky and should be replaced by an abort
5571cb0ef41Sopenharmony_ci  // mechanism or iterative node generation.
5581cb0ef41Sopenharmony_ci  void ToNodeMaybeCheckForStackOverflow() {
5591cb0ef41Sopenharmony_ci    if ((to_node_overflow_check_ticks_++ % 16 == 0)) {
5601cb0ef41Sopenharmony_ci      ToNodeCheckForStackOverflow();
5611cb0ef41Sopenharmony_ci    }
5621cb0ef41Sopenharmony_ci  }
5631cb0ef41Sopenharmony_ci  void ToNodeCheckForStackOverflow();
5641cb0ef41Sopenharmony_ci
5651cb0ef41Sopenharmony_ci  Isolate* isolate() const { return isolate_; }
5661cb0ef41Sopenharmony_ci  Zone* zone() const { return zone_; }
5671cb0ef41Sopenharmony_ci
5681cb0ef41Sopenharmony_ci  static const int kNoRegister = -1;
5691cb0ef41Sopenharmony_ci
5701cb0ef41Sopenharmony_ci private:
5711cb0ef41Sopenharmony_ci  EndNode* accept_;
5721cb0ef41Sopenharmony_ci  int next_register_;
5731cb0ef41Sopenharmony_ci  int unicode_lookaround_stack_register_;
5741cb0ef41Sopenharmony_ci  int unicode_lookaround_position_register_;
5751cb0ef41Sopenharmony_ci  ZoneVector<RegExpNode*>* work_list_;
5761cb0ef41Sopenharmony_ci  int recursion_depth_;
5771cb0ef41Sopenharmony_ci  const RegExpFlags flags_;
5781cb0ef41Sopenharmony_ci  RegExpMacroAssembler* macro_assembler_;
5791cb0ef41Sopenharmony_ci  bool one_byte_;
5801cb0ef41Sopenharmony_ci  bool reg_exp_too_big_;
5811cb0ef41Sopenharmony_ci  bool limiting_recursion_;
5821cb0ef41Sopenharmony_ci  int to_node_overflow_check_ticks_ = 0;
5831cb0ef41Sopenharmony_ci  bool optimize_;
5841cb0ef41Sopenharmony_ci  bool read_backward_;
5851cb0ef41Sopenharmony_ci  int current_expansion_factor_;
5861cb0ef41Sopenharmony_ci  FrequencyCollator frequency_collator_;
5871cb0ef41Sopenharmony_ci  Isolate* isolate_;
5881cb0ef41Sopenharmony_ci  Zone* zone_;
5891cb0ef41Sopenharmony_ci};
5901cb0ef41Sopenharmony_ci
5911cb0ef41Sopenharmony_ci// Categorizes character ranges into BMP, non-BMP, lead, and trail surrogates.
5921cb0ef41Sopenharmony_ciclass UnicodeRangeSplitter {
5931cb0ef41Sopenharmony_ci public:
5941cb0ef41Sopenharmony_ci  V8_EXPORT_PRIVATE UnicodeRangeSplitter(ZoneList<CharacterRange>* base);
5951cb0ef41Sopenharmony_ci
5961cb0ef41Sopenharmony_ci  static constexpr int kInitialSize = 8;
5971cb0ef41Sopenharmony_ci  using CharacterRangeVector = base::SmallVector<CharacterRange, kInitialSize>;
5981cb0ef41Sopenharmony_ci
5991cb0ef41Sopenharmony_ci  const CharacterRangeVector* bmp() const { return &bmp_; }
6001cb0ef41Sopenharmony_ci  const CharacterRangeVector* lead_surrogates() const {
6011cb0ef41Sopenharmony_ci    return &lead_surrogates_;
6021cb0ef41Sopenharmony_ci  }
6031cb0ef41Sopenharmony_ci  const CharacterRangeVector* trail_surrogates() const {
6041cb0ef41Sopenharmony_ci    return &trail_surrogates_;
6051cb0ef41Sopenharmony_ci  }
6061cb0ef41Sopenharmony_ci  const CharacterRangeVector* non_bmp() const { return &non_bmp_; }
6071cb0ef41Sopenharmony_ci
6081cb0ef41Sopenharmony_ci private:
6091cb0ef41Sopenharmony_ci  void AddRange(CharacterRange range);
6101cb0ef41Sopenharmony_ci
6111cb0ef41Sopenharmony_ci  CharacterRangeVector bmp_;
6121cb0ef41Sopenharmony_ci  CharacterRangeVector lead_surrogates_;
6131cb0ef41Sopenharmony_ci  CharacterRangeVector trail_surrogates_;
6141cb0ef41Sopenharmony_ci  CharacterRangeVector non_bmp_;
6151cb0ef41Sopenharmony_ci};
6161cb0ef41Sopenharmony_ci
6171cb0ef41Sopenharmony_ci// We need to check for the following characters: 0x39C 0x3BC 0x178.
6181cb0ef41Sopenharmony_ci// TODO(jgruber): Move to CharacterRange.
6191cb0ef41Sopenharmony_cibool RangeContainsLatin1Equivalents(CharacterRange range);
6201cb0ef41Sopenharmony_ci
6211cb0ef41Sopenharmony_ci}  // namespace internal
6221cb0ef41Sopenharmony_ci}  // namespace v8
6231cb0ef41Sopenharmony_ci
6241cb0ef41Sopenharmony_ci#endif  // V8_REGEXP_REGEXP_COMPILER_H_
625