11cb0ef41Sopenharmony_ci// Copyright 2016 the V8 project authors. All rights reserved.
21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be
31cb0ef41Sopenharmony_ci// found in the LICENSE file.
41cb0ef41Sopenharmony_ci
51cb0ef41Sopenharmony_ci#include "src/regexp/regexp-parser.h"
61cb0ef41Sopenharmony_ci
71cb0ef41Sopenharmony_ci#include "src/base/small-vector.h"
81cb0ef41Sopenharmony_ci#include "src/execution/isolate.h"
91cb0ef41Sopenharmony_ci#include "src/objects/string-inl.h"
101cb0ef41Sopenharmony_ci#include "src/regexp/property-sequences.h"
111cb0ef41Sopenharmony_ci#include "src/regexp/regexp-ast.h"
121cb0ef41Sopenharmony_ci#include "src/regexp/regexp-macro-assembler.h"
131cb0ef41Sopenharmony_ci#include "src/regexp/regexp.h"
141cb0ef41Sopenharmony_ci#include "src/strings/char-predicates-inl.h"
151cb0ef41Sopenharmony_ci#include "src/utils/ostreams.h"
161cb0ef41Sopenharmony_ci#include "src/utils/utils.h"
171cb0ef41Sopenharmony_ci#include "src/zone/zone-allocator.h"
181cb0ef41Sopenharmony_ci#include "src/zone/zone-list-inl.h"
191cb0ef41Sopenharmony_ci
201cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT
211cb0ef41Sopenharmony_ci#include "unicode/uniset.h"
221cb0ef41Sopenharmony_ci#endif  // V8_INTL_SUPPORT
231cb0ef41Sopenharmony_ci
241cb0ef41Sopenharmony_cinamespace v8 {
251cb0ef41Sopenharmony_cinamespace internal {
261cb0ef41Sopenharmony_ci
271cb0ef41Sopenharmony_cinamespace {
281cb0ef41Sopenharmony_ci
291cb0ef41Sopenharmony_ci// Whether we're currently inside the ClassEscape production
301cb0ef41Sopenharmony_ci// (tc39.es/ecma262/#prod-annexB-CharacterEscape).
311cb0ef41Sopenharmony_cienum class InClassEscapeState {
321cb0ef41Sopenharmony_ci  kInClass,
331cb0ef41Sopenharmony_ci  kNotInClass,
341cb0ef41Sopenharmony_ci};
351cb0ef41Sopenharmony_ci
361cb0ef41Sopenharmony_ci// Accumulates RegExp atoms and assertions into lists of terms and alternatives.
371cb0ef41Sopenharmony_ciclass RegExpBuilder {
381cb0ef41Sopenharmony_ci public:
391cb0ef41Sopenharmony_ci  RegExpBuilder(Zone* zone, RegExpFlags flags)
401cb0ef41Sopenharmony_ci      : zone_(zone),
411cb0ef41Sopenharmony_ci        flags_(flags),
421cb0ef41Sopenharmony_ci        terms_(ZoneAllocator<RegExpTree*>{zone}),
431cb0ef41Sopenharmony_ci        text_(ZoneAllocator<RegExpTree*>{zone}),
441cb0ef41Sopenharmony_ci        alternatives_(ZoneAllocator<RegExpTree*>{zone}) {}
451cb0ef41Sopenharmony_ci  void AddCharacter(base::uc16 character);
461cb0ef41Sopenharmony_ci  void AddUnicodeCharacter(base::uc32 character);
471cb0ef41Sopenharmony_ci  void AddEscapedUnicodeCharacter(base::uc32 character);
481cb0ef41Sopenharmony_ci  // "Adds" an empty expression. Does nothing except consume a
491cb0ef41Sopenharmony_ci  // following quantifier
501cb0ef41Sopenharmony_ci  void AddEmpty();
511cb0ef41Sopenharmony_ci  void AddCharacterClass(RegExpCharacterClass* cc);
521cb0ef41Sopenharmony_ci  void AddCharacterClassForDesugaring(base::uc32 c);
531cb0ef41Sopenharmony_ci  void AddAtom(RegExpTree* tree);
541cb0ef41Sopenharmony_ci  void AddTerm(RegExpTree* tree);
551cb0ef41Sopenharmony_ci  void AddAssertion(RegExpTree* tree);
561cb0ef41Sopenharmony_ci  void NewAlternative();  // '|'
571cb0ef41Sopenharmony_ci  bool AddQuantifierToAtom(int min, int max,
581cb0ef41Sopenharmony_ci                           RegExpQuantifier::QuantifierType type);
591cb0ef41Sopenharmony_ci  void FlushText();
601cb0ef41Sopenharmony_ci  RegExpTree* ToRegExp();
611cb0ef41Sopenharmony_ci  RegExpFlags flags() const { return flags_; }
621cb0ef41Sopenharmony_ci
631cb0ef41Sopenharmony_ci  bool ignore_case() const { return IsIgnoreCase(flags_); }
641cb0ef41Sopenharmony_ci  bool multiline() const { return IsMultiline(flags_); }
651cb0ef41Sopenharmony_ci  bool dotall() const { return IsDotAll(flags_); }
661cb0ef41Sopenharmony_ci
671cb0ef41Sopenharmony_ci private:
681cb0ef41Sopenharmony_ci  static const base::uc16 kNoPendingSurrogate = 0;
691cb0ef41Sopenharmony_ci  void AddLeadSurrogate(base::uc16 lead_surrogate);
701cb0ef41Sopenharmony_ci  void AddTrailSurrogate(base::uc16 trail_surrogate);
711cb0ef41Sopenharmony_ci  void FlushPendingSurrogate();
721cb0ef41Sopenharmony_ci  void FlushCharacters();
731cb0ef41Sopenharmony_ci  void FlushTerms();
741cb0ef41Sopenharmony_ci  bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc);
751cb0ef41Sopenharmony_ci  bool NeedsDesugaringForIgnoreCase(base::uc32 c);
761cb0ef41Sopenharmony_ci  Zone* zone() const { return zone_; }
771cb0ef41Sopenharmony_ci  bool unicode() const { return IsUnicode(flags_); }
781cb0ef41Sopenharmony_ci
791cb0ef41Sopenharmony_ci  Zone* const zone_;
801cb0ef41Sopenharmony_ci  bool pending_empty_ = false;
811cb0ef41Sopenharmony_ci  const RegExpFlags flags_;
821cb0ef41Sopenharmony_ci  ZoneList<base::uc16>* characters_ = nullptr;
831cb0ef41Sopenharmony_ci  base::uc16 pending_surrogate_ = kNoPendingSurrogate;
841cb0ef41Sopenharmony_ci
851cb0ef41Sopenharmony_ci  using SmallRegExpTreeVector =
861cb0ef41Sopenharmony_ci      base::SmallVector<RegExpTree*, 8, ZoneAllocator<RegExpTree*>>;
871cb0ef41Sopenharmony_ci  SmallRegExpTreeVector terms_;
881cb0ef41Sopenharmony_ci  SmallRegExpTreeVector text_;
891cb0ef41Sopenharmony_ci  SmallRegExpTreeVector alternatives_;
901cb0ef41Sopenharmony_ci#ifdef DEBUG
911cb0ef41Sopenharmony_ci  enum {
921cb0ef41Sopenharmony_ci    ADD_NONE,
931cb0ef41Sopenharmony_ci    ADD_CHAR,
941cb0ef41Sopenharmony_ci    ADD_TERM,
951cb0ef41Sopenharmony_ci    ADD_ASSERT,
961cb0ef41Sopenharmony_ci    ADD_ATOM
971cb0ef41Sopenharmony_ci  } last_added_ = ADD_NONE;
981cb0ef41Sopenharmony_ci#define LAST(x) last_added_ = x;
991cb0ef41Sopenharmony_ci#else
1001cb0ef41Sopenharmony_ci#define LAST(x)
1011cb0ef41Sopenharmony_ci#endif
1021cb0ef41Sopenharmony_ci};
1031cb0ef41Sopenharmony_ci
1041cb0ef41Sopenharmony_cienum SubexpressionType {
1051cb0ef41Sopenharmony_ci  INITIAL,
1061cb0ef41Sopenharmony_ci  CAPTURE,  // All positive values represent captures.
1071cb0ef41Sopenharmony_ci  POSITIVE_LOOKAROUND,
1081cb0ef41Sopenharmony_ci  NEGATIVE_LOOKAROUND,
1091cb0ef41Sopenharmony_ci  GROUPING
1101cb0ef41Sopenharmony_ci};
1111cb0ef41Sopenharmony_ci
1121cb0ef41Sopenharmony_ciclass RegExpParserState : public ZoneObject {
1131cb0ef41Sopenharmony_ci public:
1141cb0ef41Sopenharmony_ci  // Push a state on the stack.
1151cb0ef41Sopenharmony_ci  RegExpParserState(RegExpParserState* previous_state,
1161cb0ef41Sopenharmony_ci                    SubexpressionType group_type,
1171cb0ef41Sopenharmony_ci                    RegExpLookaround::Type lookaround_type,
1181cb0ef41Sopenharmony_ci                    int disjunction_capture_index,
1191cb0ef41Sopenharmony_ci                    const ZoneVector<base::uc16>* capture_name,
1201cb0ef41Sopenharmony_ci                    RegExpFlags flags, Zone* zone)
1211cb0ef41Sopenharmony_ci      : previous_state_(previous_state),
1221cb0ef41Sopenharmony_ci        builder_(zone, flags),
1231cb0ef41Sopenharmony_ci        group_type_(group_type),
1241cb0ef41Sopenharmony_ci        lookaround_type_(lookaround_type),
1251cb0ef41Sopenharmony_ci        disjunction_capture_index_(disjunction_capture_index),
1261cb0ef41Sopenharmony_ci        capture_name_(capture_name) {}
1271cb0ef41Sopenharmony_ci  // Parser state of containing expression, if any.
1281cb0ef41Sopenharmony_ci  RegExpParserState* previous_state() const { return previous_state_; }
1291cb0ef41Sopenharmony_ci  bool IsSubexpression() { return previous_state_ != nullptr; }
1301cb0ef41Sopenharmony_ci  // RegExpBuilder building this regexp's AST.
1311cb0ef41Sopenharmony_ci  RegExpBuilder* builder() { return &builder_; }
1321cb0ef41Sopenharmony_ci  // Type of regexp being parsed (parenthesized group or entire regexp).
1331cb0ef41Sopenharmony_ci  SubexpressionType group_type() const { return group_type_; }
1341cb0ef41Sopenharmony_ci  // Lookahead or Lookbehind.
1351cb0ef41Sopenharmony_ci  RegExpLookaround::Type lookaround_type() const { return lookaround_type_; }
1361cb0ef41Sopenharmony_ci  // Index in captures array of first capture in this sub-expression, if any.
1371cb0ef41Sopenharmony_ci  // Also the capture index of this sub-expression itself, if group_type
1381cb0ef41Sopenharmony_ci  // is CAPTURE.
1391cb0ef41Sopenharmony_ci  int capture_index() const { return disjunction_capture_index_; }
1401cb0ef41Sopenharmony_ci  // The name of the current sub-expression, if group_type is CAPTURE. Only
1411cb0ef41Sopenharmony_ci  // used for named captures.
1421cb0ef41Sopenharmony_ci  const ZoneVector<base::uc16>* capture_name() const { return capture_name_; }
1431cb0ef41Sopenharmony_ci
1441cb0ef41Sopenharmony_ci  bool IsNamedCapture() const { return capture_name_ != nullptr; }
1451cb0ef41Sopenharmony_ci
1461cb0ef41Sopenharmony_ci  // Check whether the parser is inside a capture group with the given index.
1471cb0ef41Sopenharmony_ci  bool IsInsideCaptureGroup(int index) const {
1481cb0ef41Sopenharmony_ci    for (const RegExpParserState* s = this; s != nullptr;
1491cb0ef41Sopenharmony_ci         s = s->previous_state()) {
1501cb0ef41Sopenharmony_ci      if (s->group_type() != CAPTURE) continue;
1511cb0ef41Sopenharmony_ci      // Return true if we found the matching capture index.
1521cb0ef41Sopenharmony_ci      if (index == s->capture_index()) return true;
1531cb0ef41Sopenharmony_ci      // Abort if index is larger than what has been parsed up till this state.
1541cb0ef41Sopenharmony_ci      if (index > s->capture_index()) return false;
1551cb0ef41Sopenharmony_ci    }
1561cb0ef41Sopenharmony_ci    return false;
1571cb0ef41Sopenharmony_ci  }
1581cb0ef41Sopenharmony_ci
1591cb0ef41Sopenharmony_ci  // Check whether the parser is inside a capture group with the given name.
1601cb0ef41Sopenharmony_ci  bool IsInsideCaptureGroup(const ZoneVector<base::uc16>* name) const {
1611cb0ef41Sopenharmony_ci    DCHECK_NOT_NULL(name);
1621cb0ef41Sopenharmony_ci    for (const RegExpParserState* s = this; s != nullptr;
1631cb0ef41Sopenharmony_ci         s = s->previous_state()) {
1641cb0ef41Sopenharmony_ci      if (s->capture_name() == nullptr) continue;
1651cb0ef41Sopenharmony_ci      if (*s->capture_name() == *name) return true;
1661cb0ef41Sopenharmony_ci    }
1671cb0ef41Sopenharmony_ci    return false;
1681cb0ef41Sopenharmony_ci  }
1691cb0ef41Sopenharmony_ci
1701cb0ef41Sopenharmony_ci private:
1711cb0ef41Sopenharmony_ci  // Linked list implementation of stack of states.
1721cb0ef41Sopenharmony_ci  RegExpParserState* const previous_state_;
1731cb0ef41Sopenharmony_ci  // Builder for the stored disjunction.
1741cb0ef41Sopenharmony_ci  RegExpBuilder builder_;
1751cb0ef41Sopenharmony_ci  // Stored disjunction type (capture, look-ahead or grouping), if any.
1761cb0ef41Sopenharmony_ci  const SubexpressionType group_type_;
1771cb0ef41Sopenharmony_ci  // Stored read direction.
1781cb0ef41Sopenharmony_ci  const RegExpLookaround::Type lookaround_type_;
1791cb0ef41Sopenharmony_ci  // Stored disjunction's capture index (if any).
1801cb0ef41Sopenharmony_ci  const int disjunction_capture_index_;
1811cb0ef41Sopenharmony_ci  // Stored capture name (if any).
1821cb0ef41Sopenharmony_ci  const ZoneVector<base::uc16>* const capture_name_;
1831cb0ef41Sopenharmony_ci};
1841cb0ef41Sopenharmony_ci
1851cb0ef41Sopenharmony_citemplate <class CharT>
1861cb0ef41Sopenharmony_ciclass RegExpParserImpl final {
1871cb0ef41Sopenharmony_ci private:
1881cb0ef41Sopenharmony_ci  RegExpParserImpl(const CharT* input, int input_length, RegExpFlags flags,
1891cb0ef41Sopenharmony_ci                   uintptr_t stack_limit, Zone* zone,
1901cb0ef41Sopenharmony_ci                   const DisallowGarbageCollection& no_gc);
1911cb0ef41Sopenharmony_ci
1921cb0ef41Sopenharmony_ci  bool Parse(RegExpCompileData* result);
1931cb0ef41Sopenharmony_ci
1941cb0ef41Sopenharmony_ci  RegExpTree* ParsePattern();
1951cb0ef41Sopenharmony_ci  RegExpTree* ParseDisjunction();
1961cb0ef41Sopenharmony_ci  RegExpTree* ParseGroup();
1971cb0ef41Sopenharmony_ci
1981cb0ef41Sopenharmony_ci  // Parses a {...,...} quantifier and stores the range in the given
1991cb0ef41Sopenharmony_ci  // out parameters.
2001cb0ef41Sopenharmony_ci  bool ParseIntervalQuantifier(int* min_out, int* max_out);
2011cb0ef41Sopenharmony_ci
2021cb0ef41Sopenharmony_ci  // Checks whether the following is a length-digit hexadecimal number,
2031cb0ef41Sopenharmony_ci  // and sets the value if it is.
2041cb0ef41Sopenharmony_ci  bool ParseHexEscape(int length, base::uc32* value);
2051cb0ef41Sopenharmony_ci  bool ParseUnicodeEscape(base::uc32* value);
2061cb0ef41Sopenharmony_ci  bool ParseUnlimitedLengthHexNumber(int max_value, base::uc32* value);
2071cb0ef41Sopenharmony_ci
2081cb0ef41Sopenharmony_ci  bool ParsePropertyClassName(ZoneVector<char>* name_1,
2091cb0ef41Sopenharmony_ci                              ZoneVector<char>* name_2);
2101cb0ef41Sopenharmony_ci  bool AddPropertyClassRange(ZoneList<CharacterRange>* add_to, bool negate,
2111cb0ef41Sopenharmony_ci                             const ZoneVector<char>& name_1,
2121cb0ef41Sopenharmony_ci                             const ZoneVector<char>& name_2);
2131cb0ef41Sopenharmony_ci
2141cb0ef41Sopenharmony_ci  RegExpTree* ParseCharacterClass(const RegExpBuilder* state);
2151cb0ef41Sopenharmony_ci
2161cb0ef41Sopenharmony_ci  base::uc32 ParseOctalLiteral();
2171cb0ef41Sopenharmony_ci
2181cb0ef41Sopenharmony_ci  // Tries to parse the input as a back reference.  If successful it
2191cb0ef41Sopenharmony_ci  // stores the result in the output parameter and returns true.  If
2201cb0ef41Sopenharmony_ci  // it fails it will push back the characters read so the same characters
2211cb0ef41Sopenharmony_ci  // can be reparsed.
2221cb0ef41Sopenharmony_ci  bool ParseBackReferenceIndex(int* index_out);
2231cb0ef41Sopenharmony_ci
2241cb0ef41Sopenharmony_ci  // Parse inside a class. Either add escaped class to the range, or return
2251cb0ef41Sopenharmony_ci  // false and pass parsed single character through |char_out|.
2261cb0ef41Sopenharmony_ci  void ParseClassEscape(ZoneList<CharacterRange>* ranges, Zone* zone,
2271cb0ef41Sopenharmony_ci                        bool add_unicode_case_equivalents, base::uc32* char_out,
2281cb0ef41Sopenharmony_ci                        bool* is_class_escape);
2291cb0ef41Sopenharmony_ci  // Returns true iff parsing was successful.
2301cb0ef41Sopenharmony_ci  bool TryParseCharacterClassEscape(base::uc32 next,
2311cb0ef41Sopenharmony_ci                                    InClassEscapeState in_class_escape_state,
2321cb0ef41Sopenharmony_ci                                    ZoneList<CharacterRange>* ranges,
2331cb0ef41Sopenharmony_ci                                    Zone* zone,
2341cb0ef41Sopenharmony_ci                                    bool add_unicode_case_equivalents);
2351cb0ef41Sopenharmony_ci  // Parses and returns a single escaped character.
2361cb0ef41Sopenharmony_ci  base::uc32 ParseCharacterEscape(InClassEscapeState in_class_escape_state,
2371cb0ef41Sopenharmony_ci                                  bool* is_escaped_unicode_character);
2381cb0ef41Sopenharmony_ci
2391cb0ef41Sopenharmony_ci  RegExpTree* ReportError(RegExpError error);
2401cb0ef41Sopenharmony_ci  void Advance();
2411cb0ef41Sopenharmony_ci  void Advance(int dist);
2421cb0ef41Sopenharmony_ci  void RewindByOneCodepoint();  // Rewinds to before the previous Advance().
2431cb0ef41Sopenharmony_ci  void Reset(int pos);
2441cb0ef41Sopenharmony_ci
2451cb0ef41Sopenharmony_ci  // Reports whether the pattern might be used as a literal search string.
2461cb0ef41Sopenharmony_ci  // Only use if the result of the parse is a single atom node.
2471cb0ef41Sopenharmony_ci  bool simple() const { return simple_; }
2481cb0ef41Sopenharmony_ci  bool contains_anchor() const { return contains_anchor_; }
2491cb0ef41Sopenharmony_ci  void set_contains_anchor() { contains_anchor_ = true; }
2501cb0ef41Sopenharmony_ci  int captures_started() const { return captures_started_; }
2511cb0ef41Sopenharmony_ci  int position() const { return next_pos_ - 1; }
2521cb0ef41Sopenharmony_ci  bool failed() const { return failed_; }
2531cb0ef41Sopenharmony_ci  bool unicode() const { return IsUnicode(top_level_flags_) || force_unicode_; }
2541cb0ef41Sopenharmony_ci
2551cb0ef41Sopenharmony_ci  static bool IsSyntaxCharacterOrSlash(base::uc32 c);
2561cb0ef41Sopenharmony_ci
2571cb0ef41Sopenharmony_ci  static const base::uc32 kEndMarker = (1 << 21);
2581cb0ef41Sopenharmony_ci
2591cb0ef41Sopenharmony_ci private:
2601cb0ef41Sopenharmony_ci  // Return the 1-indexed RegExpCapture object, allocate if necessary.
2611cb0ef41Sopenharmony_ci  RegExpCapture* GetCapture(int index);
2621cb0ef41Sopenharmony_ci
2631cb0ef41Sopenharmony_ci  // Creates a new named capture at the specified index. Must be called exactly
2641cb0ef41Sopenharmony_ci  // once for each named capture. Fails if a capture with the same name is
2651cb0ef41Sopenharmony_ci  // encountered.
2661cb0ef41Sopenharmony_ci  bool CreateNamedCaptureAtIndex(const ZoneVector<base::uc16>* name, int index);
2671cb0ef41Sopenharmony_ci
2681cb0ef41Sopenharmony_ci  // Parses the name of a capture group (?<name>pattern). The name must adhere
2691cb0ef41Sopenharmony_ci  // to IdentifierName in the ECMAScript standard.
2701cb0ef41Sopenharmony_ci  const ZoneVector<base::uc16>* ParseCaptureGroupName();
2711cb0ef41Sopenharmony_ci
2721cb0ef41Sopenharmony_ci  bool ParseNamedBackReference(RegExpBuilder* builder,
2731cb0ef41Sopenharmony_ci                               RegExpParserState* state);
2741cb0ef41Sopenharmony_ci  RegExpParserState* ParseOpenParenthesis(RegExpParserState* state);
2751cb0ef41Sopenharmony_ci
2761cb0ef41Sopenharmony_ci  // After the initial parsing pass, patch corresponding RegExpCapture objects
2771cb0ef41Sopenharmony_ci  // into all RegExpBackReferences. This is done after initial parsing in order
2781cb0ef41Sopenharmony_ci  // to avoid complicating cases in which references comes before the capture.
2791cb0ef41Sopenharmony_ci  void PatchNamedBackReferences();
2801cb0ef41Sopenharmony_ci
2811cb0ef41Sopenharmony_ci  ZoneVector<RegExpCapture*>* GetNamedCaptures() const;
2821cb0ef41Sopenharmony_ci
2831cb0ef41Sopenharmony_ci  // Returns true iff the pattern contains named captures. May call
2841cb0ef41Sopenharmony_ci  // ScanForCaptures to look ahead at the remaining pattern.
2851cb0ef41Sopenharmony_ci  bool HasNamedCaptures(InClassEscapeState in_class_escape_state);
2861cb0ef41Sopenharmony_ci
2871cb0ef41Sopenharmony_ci  Zone* zone() const { return zone_; }
2881cb0ef41Sopenharmony_ci
2891cb0ef41Sopenharmony_ci  base::uc32 current() const { return current_; }
2901cb0ef41Sopenharmony_ci  bool has_more() const { return has_more_; }
2911cb0ef41Sopenharmony_ci  bool has_next() const { return next_pos_ < input_length(); }
2921cb0ef41Sopenharmony_ci  base::uc32 Next();
2931cb0ef41Sopenharmony_ci  template <bool update_position>
2941cb0ef41Sopenharmony_ci  base::uc32 ReadNext();
2951cb0ef41Sopenharmony_ci  CharT InputAt(int index) const {
2961cb0ef41Sopenharmony_ci    DCHECK(0 <= index && index < input_length());
2971cb0ef41Sopenharmony_ci    return input_[index];
2981cb0ef41Sopenharmony_ci  }
2991cb0ef41Sopenharmony_ci  int input_length() const { return input_length_; }
3001cb0ef41Sopenharmony_ci  void ScanForCaptures(InClassEscapeState in_class_escape_state);
3011cb0ef41Sopenharmony_ci
3021cb0ef41Sopenharmony_ci  struct RegExpCaptureNameLess {
3031cb0ef41Sopenharmony_ci    bool operator()(const RegExpCapture* lhs, const RegExpCapture* rhs) const {
3041cb0ef41Sopenharmony_ci      DCHECK_NOT_NULL(lhs);
3051cb0ef41Sopenharmony_ci      DCHECK_NOT_NULL(rhs);
3061cb0ef41Sopenharmony_ci      return *lhs->name() < *rhs->name();
3071cb0ef41Sopenharmony_ci    }
3081cb0ef41Sopenharmony_ci  };
3091cb0ef41Sopenharmony_ci
3101cb0ef41Sopenharmony_ci  class ForceUnicodeScope final {
3111cb0ef41Sopenharmony_ci   public:
3121cb0ef41Sopenharmony_ci    explicit ForceUnicodeScope(RegExpParserImpl<CharT>* parser)
3131cb0ef41Sopenharmony_ci        : parser_(parser) {
3141cb0ef41Sopenharmony_ci      DCHECK(!parser_->force_unicode_);
3151cb0ef41Sopenharmony_ci      parser_->force_unicode_ = true;
3161cb0ef41Sopenharmony_ci    }
3171cb0ef41Sopenharmony_ci    ~ForceUnicodeScope() {
3181cb0ef41Sopenharmony_ci      DCHECK(parser_->force_unicode_);
3191cb0ef41Sopenharmony_ci      parser_->force_unicode_ = false;
3201cb0ef41Sopenharmony_ci    }
3211cb0ef41Sopenharmony_ci
3221cb0ef41Sopenharmony_ci   private:
3231cb0ef41Sopenharmony_ci    RegExpParserImpl<CharT>* const parser_;
3241cb0ef41Sopenharmony_ci  };
3251cb0ef41Sopenharmony_ci
3261cb0ef41Sopenharmony_ci  const DisallowGarbageCollection no_gc_;
3271cb0ef41Sopenharmony_ci  Zone* const zone_;
3281cb0ef41Sopenharmony_ci  RegExpError error_ = RegExpError::kNone;
3291cb0ef41Sopenharmony_ci  int error_pos_ = 0;
3301cb0ef41Sopenharmony_ci  ZoneList<RegExpCapture*>* captures_;
3311cb0ef41Sopenharmony_ci  ZoneSet<RegExpCapture*, RegExpCaptureNameLess>* named_captures_;
3321cb0ef41Sopenharmony_ci  ZoneList<RegExpBackReference*>* named_back_references_;
3331cb0ef41Sopenharmony_ci  const CharT* const input_;
3341cb0ef41Sopenharmony_ci  const int input_length_;
3351cb0ef41Sopenharmony_ci  base::uc32 current_;
3361cb0ef41Sopenharmony_ci  const RegExpFlags top_level_flags_;
3371cb0ef41Sopenharmony_ci  bool force_unicode_ = false;  // Force parser to act as if unicode were set.
3381cb0ef41Sopenharmony_ci  int next_pos_;
3391cb0ef41Sopenharmony_ci  int captures_started_;
3401cb0ef41Sopenharmony_ci  int capture_count_;  // Only valid after we have scanned for captures.
3411cb0ef41Sopenharmony_ci  bool has_more_;
3421cb0ef41Sopenharmony_ci  bool simple_;
3431cb0ef41Sopenharmony_ci  bool contains_anchor_;
3441cb0ef41Sopenharmony_ci  bool is_scanned_for_captures_;
3451cb0ef41Sopenharmony_ci  bool has_named_captures_;  // Only valid after we have scanned for captures.
3461cb0ef41Sopenharmony_ci  bool failed_;
3471cb0ef41Sopenharmony_ci  const uintptr_t stack_limit_;
3481cb0ef41Sopenharmony_ci
3491cb0ef41Sopenharmony_ci  friend bool RegExpParser::ParseRegExpFromHeapString(Isolate*, Zone*,
3501cb0ef41Sopenharmony_ci                                                      Handle<String>,
3511cb0ef41Sopenharmony_ci                                                      RegExpFlags,
3521cb0ef41Sopenharmony_ci                                                      RegExpCompileData*);
3531cb0ef41Sopenharmony_ci  friend bool RegExpParser::VerifyRegExpSyntax<CharT>(
3541cb0ef41Sopenharmony_ci      Zone*, uintptr_t, const CharT*, int, RegExpFlags, RegExpCompileData*,
3551cb0ef41Sopenharmony_ci      const DisallowGarbageCollection&);
3561cb0ef41Sopenharmony_ci};
3571cb0ef41Sopenharmony_ci
3581cb0ef41Sopenharmony_citemplate <class CharT>
3591cb0ef41Sopenharmony_ciRegExpParserImpl<CharT>::RegExpParserImpl(
3601cb0ef41Sopenharmony_ci    const CharT* input, int input_length, RegExpFlags flags,
3611cb0ef41Sopenharmony_ci    uintptr_t stack_limit, Zone* zone, const DisallowGarbageCollection& no_gc)
3621cb0ef41Sopenharmony_ci    : zone_(zone),
3631cb0ef41Sopenharmony_ci      captures_(nullptr),
3641cb0ef41Sopenharmony_ci      named_captures_(nullptr),
3651cb0ef41Sopenharmony_ci      named_back_references_(nullptr),
3661cb0ef41Sopenharmony_ci      input_(input),
3671cb0ef41Sopenharmony_ci      input_length_(input_length),
3681cb0ef41Sopenharmony_ci      current_(kEndMarker),
3691cb0ef41Sopenharmony_ci      top_level_flags_(flags),
3701cb0ef41Sopenharmony_ci      next_pos_(0),
3711cb0ef41Sopenharmony_ci      captures_started_(0),
3721cb0ef41Sopenharmony_ci      capture_count_(0),
3731cb0ef41Sopenharmony_ci      has_more_(true),
3741cb0ef41Sopenharmony_ci      simple_(false),
3751cb0ef41Sopenharmony_ci      contains_anchor_(false),
3761cb0ef41Sopenharmony_ci      is_scanned_for_captures_(false),
3771cb0ef41Sopenharmony_ci      has_named_captures_(false),
3781cb0ef41Sopenharmony_ci      failed_(false),
3791cb0ef41Sopenharmony_ci      stack_limit_(stack_limit) {
3801cb0ef41Sopenharmony_ci  Advance();
3811cb0ef41Sopenharmony_ci}
3821cb0ef41Sopenharmony_ci
3831cb0ef41Sopenharmony_citemplate <>
3841cb0ef41Sopenharmony_citemplate <bool update_position>
3851cb0ef41Sopenharmony_ciinline base::uc32 RegExpParserImpl<uint8_t>::ReadNext() {
3861cb0ef41Sopenharmony_ci  int position = next_pos_;
3871cb0ef41Sopenharmony_ci  base::uc16 c0 = InputAt(position);
3881cb0ef41Sopenharmony_ci  position++;
3891cb0ef41Sopenharmony_ci  DCHECK(!unibrow::Utf16::IsLeadSurrogate(c0));
3901cb0ef41Sopenharmony_ci  if (update_position) next_pos_ = position;
3911cb0ef41Sopenharmony_ci  return c0;
3921cb0ef41Sopenharmony_ci}
3931cb0ef41Sopenharmony_ci
3941cb0ef41Sopenharmony_citemplate <>
3951cb0ef41Sopenharmony_citemplate <bool update_position>
3961cb0ef41Sopenharmony_ciinline base::uc32 RegExpParserImpl<base::uc16>::ReadNext() {
3971cb0ef41Sopenharmony_ci  int position = next_pos_;
3981cb0ef41Sopenharmony_ci  base::uc16 c0 = InputAt(position);
3991cb0ef41Sopenharmony_ci  base::uc32 result = c0;
4001cb0ef41Sopenharmony_ci  position++;
4011cb0ef41Sopenharmony_ci  // Read the whole surrogate pair in case of unicode flag, if possible.
4021cb0ef41Sopenharmony_ci  if (unicode() && position < input_length() &&
4031cb0ef41Sopenharmony_ci      unibrow::Utf16::IsLeadSurrogate(c0)) {
4041cb0ef41Sopenharmony_ci    base::uc16 c1 = InputAt(position);
4051cb0ef41Sopenharmony_ci    if (unibrow::Utf16::IsTrailSurrogate(c1)) {
4061cb0ef41Sopenharmony_ci      result = unibrow::Utf16::CombineSurrogatePair(c0, c1);
4071cb0ef41Sopenharmony_ci      position++;
4081cb0ef41Sopenharmony_ci    }
4091cb0ef41Sopenharmony_ci  }
4101cb0ef41Sopenharmony_ci  if (update_position) next_pos_ = position;
4111cb0ef41Sopenharmony_ci  return result;
4121cb0ef41Sopenharmony_ci}
4131cb0ef41Sopenharmony_ci
4141cb0ef41Sopenharmony_citemplate <class CharT>
4151cb0ef41Sopenharmony_cibase::uc32 RegExpParserImpl<CharT>::Next() {
4161cb0ef41Sopenharmony_ci  if (has_next()) {
4171cb0ef41Sopenharmony_ci    return ReadNext<false>();
4181cb0ef41Sopenharmony_ci  } else {
4191cb0ef41Sopenharmony_ci    return kEndMarker;
4201cb0ef41Sopenharmony_ci  }
4211cb0ef41Sopenharmony_ci}
4221cb0ef41Sopenharmony_ci
4231cb0ef41Sopenharmony_citemplate <class CharT>
4241cb0ef41Sopenharmony_civoid RegExpParserImpl<CharT>::Advance() {
4251cb0ef41Sopenharmony_ci  if (has_next()) {
4261cb0ef41Sopenharmony_ci    if (GetCurrentStackPosition() < stack_limit_) {
4271cb0ef41Sopenharmony_ci      if (FLAG_correctness_fuzzer_suppressions) {
4281cb0ef41Sopenharmony_ci        FATAL("Aborting on stack overflow");
4291cb0ef41Sopenharmony_ci      }
4301cb0ef41Sopenharmony_ci      ReportError(RegExpError::kStackOverflow);
4311cb0ef41Sopenharmony_ci    } else {
4321cb0ef41Sopenharmony_ci      current_ = ReadNext<true>();
4331cb0ef41Sopenharmony_ci    }
4341cb0ef41Sopenharmony_ci  } else {
4351cb0ef41Sopenharmony_ci    current_ = kEndMarker;
4361cb0ef41Sopenharmony_ci    // Advance so that position() points to 1-after-the-last-character. This is
4371cb0ef41Sopenharmony_ci    // important so that Reset() to this position works correctly.
4381cb0ef41Sopenharmony_ci    next_pos_ = input_length() + 1;
4391cb0ef41Sopenharmony_ci    has_more_ = false;
4401cb0ef41Sopenharmony_ci  }
4411cb0ef41Sopenharmony_ci}
4421cb0ef41Sopenharmony_ci
4431cb0ef41Sopenharmony_citemplate <class CharT>
4441cb0ef41Sopenharmony_civoid RegExpParserImpl<CharT>::RewindByOneCodepoint() {
4451cb0ef41Sopenharmony_ci  if (current() == kEndMarker) return;
4461cb0ef41Sopenharmony_ci  // Rewinds by one code point, i.e.: two code units if `current` is outside
4471cb0ef41Sopenharmony_ci  // the basic multilingual plane (= composed of a lead and trail surrogate),
4481cb0ef41Sopenharmony_ci  // or one code unit otherwise.
4491cb0ef41Sopenharmony_ci  const int rewind_by =
4501cb0ef41Sopenharmony_ci      current() > unibrow::Utf16::kMaxNonSurrogateCharCode ? -2 : -1;
4511cb0ef41Sopenharmony_ci  Advance(rewind_by);  // Undo the last Advance.
4521cb0ef41Sopenharmony_ci}
4531cb0ef41Sopenharmony_ci
4541cb0ef41Sopenharmony_citemplate <class CharT>
4551cb0ef41Sopenharmony_civoid RegExpParserImpl<CharT>::Reset(int pos) {
4561cb0ef41Sopenharmony_ci  next_pos_ = pos;
4571cb0ef41Sopenharmony_ci  has_more_ = (pos < input_length());
4581cb0ef41Sopenharmony_ci  Advance();
4591cb0ef41Sopenharmony_ci}
4601cb0ef41Sopenharmony_ci
4611cb0ef41Sopenharmony_citemplate <class CharT>
4621cb0ef41Sopenharmony_civoid RegExpParserImpl<CharT>::Advance(int dist) {
4631cb0ef41Sopenharmony_ci  next_pos_ += dist - 1;
4641cb0ef41Sopenharmony_ci  Advance();
4651cb0ef41Sopenharmony_ci}
4661cb0ef41Sopenharmony_ci
4671cb0ef41Sopenharmony_citemplate <class CharT>
4681cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::IsSyntaxCharacterOrSlash(base::uc32 c) {
4691cb0ef41Sopenharmony_ci  switch (c) {
4701cb0ef41Sopenharmony_ci    case '^':
4711cb0ef41Sopenharmony_ci    case '$':
4721cb0ef41Sopenharmony_ci    case '\\':
4731cb0ef41Sopenharmony_ci    case '.':
4741cb0ef41Sopenharmony_ci    case '*':
4751cb0ef41Sopenharmony_ci    case '+':
4761cb0ef41Sopenharmony_ci    case '?':
4771cb0ef41Sopenharmony_ci    case '(':
4781cb0ef41Sopenharmony_ci    case ')':
4791cb0ef41Sopenharmony_ci    case '[':
4801cb0ef41Sopenharmony_ci    case ']':
4811cb0ef41Sopenharmony_ci    case '{':
4821cb0ef41Sopenharmony_ci    case '}':
4831cb0ef41Sopenharmony_ci    case '|':
4841cb0ef41Sopenharmony_ci    case '/':
4851cb0ef41Sopenharmony_ci      return true;
4861cb0ef41Sopenharmony_ci    default:
4871cb0ef41Sopenharmony_ci      break;
4881cb0ef41Sopenharmony_ci  }
4891cb0ef41Sopenharmony_ci  return false;
4901cb0ef41Sopenharmony_ci}
4911cb0ef41Sopenharmony_ci
4921cb0ef41Sopenharmony_citemplate <class CharT>
4931cb0ef41Sopenharmony_ciRegExpTree* RegExpParserImpl<CharT>::ReportError(RegExpError error) {
4941cb0ef41Sopenharmony_ci  if (failed_) return nullptr;  // Do not overwrite any existing error.
4951cb0ef41Sopenharmony_ci  failed_ = true;
4961cb0ef41Sopenharmony_ci  error_ = error;
4971cb0ef41Sopenharmony_ci  error_pos_ = position();
4981cb0ef41Sopenharmony_ci  // Zip to the end to make sure no more input is read.
4991cb0ef41Sopenharmony_ci  current_ = kEndMarker;
5001cb0ef41Sopenharmony_ci  next_pos_ = input_length();
5011cb0ef41Sopenharmony_ci  return nullptr;
5021cb0ef41Sopenharmony_ci}
5031cb0ef41Sopenharmony_ci
5041cb0ef41Sopenharmony_ci#define CHECK_FAILED /**/);    \
5051cb0ef41Sopenharmony_ci  if (failed_) return nullptr; \
5061cb0ef41Sopenharmony_ci  ((void)0
5071cb0ef41Sopenharmony_ci
5081cb0ef41Sopenharmony_ci// Pattern ::
5091cb0ef41Sopenharmony_ci//   Disjunction
5101cb0ef41Sopenharmony_citemplate <class CharT>
5111cb0ef41Sopenharmony_ciRegExpTree* RegExpParserImpl<CharT>::ParsePattern() {
5121cb0ef41Sopenharmony_ci  RegExpTree* result = ParseDisjunction(CHECK_FAILED);
5131cb0ef41Sopenharmony_ci  PatchNamedBackReferences(CHECK_FAILED);
5141cb0ef41Sopenharmony_ci  DCHECK(!has_more());
5151cb0ef41Sopenharmony_ci  // If the result of parsing is a literal string atom, and it has the
5161cb0ef41Sopenharmony_ci  // same length as the input, then the atom is identical to the input.
5171cb0ef41Sopenharmony_ci  if (result->IsAtom() && result->AsAtom()->length() == input_length()) {
5181cb0ef41Sopenharmony_ci    simple_ = true;
5191cb0ef41Sopenharmony_ci  }
5201cb0ef41Sopenharmony_ci  return result;
5211cb0ef41Sopenharmony_ci}
5221cb0ef41Sopenharmony_ci
5231cb0ef41Sopenharmony_ci// Disjunction ::
5241cb0ef41Sopenharmony_ci//   Alternative
5251cb0ef41Sopenharmony_ci//   Alternative | Disjunction
5261cb0ef41Sopenharmony_ci// Alternative ::
5271cb0ef41Sopenharmony_ci//   [empty]
5281cb0ef41Sopenharmony_ci//   Term Alternative
5291cb0ef41Sopenharmony_ci// Term ::
5301cb0ef41Sopenharmony_ci//   Assertion
5311cb0ef41Sopenharmony_ci//   Atom
5321cb0ef41Sopenharmony_ci//   Atom Quantifier
5331cb0ef41Sopenharmony_citemplate <class CharT>
5341cb0ef41Sopenharmony_ciRegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() {
5351cb0ef41Sopenharmony_ci  // Used to store current state while parsing subexpressions.
5361cb0ef41Sopenharmony_ci  RegExpParserState initial_state(nullptr, INITIAL, RegExpLookaround::LOOKAHEAD,
5371cb0ef41Sopenharmony_ci                                  0, nullptr, top_level_flags_, zone());
5381cb0ef41Sopenharmony_ci  RegExpParserState* state = &initial_state;
5391cb0ef41Sopenharmony_ci  // Cache the builder in a local variable for quick access.
5401cb0ef41Sopenharmony_ci  RegExpBuilder* builder = initial_state.builder();
5411cb0ef41Sopenharmony_ci  while (true) {
5421cb0ef41Sopenharmony_ci    switch (current()) {
5431cb0ef41Sopenharmony_ci      case kEndMarker:
5441cb0ef41Sopenharmony_ci        if (failed()) return nullptr;  // E.g. the initial Advance failed.
5451cb0ef41Sopenharmony_ci        if (state->IsSubexpression()) {
5461cb0ef41Sopenharmony_ci          // Inside a parenthesized group when hitting end of input.
5471cb0ef41Sopenharmony_ci          return ReportError(RegExpError::kUnterminatedGroup);
5481cb0ef41Sopenharmony_ci        }
5491cb0ef41Sopenharmony_ci        DCHECK_EQ(INITIAL, state->group_type());
5501cb0ef41Sopenharmony_ci        // Parsing completed successfully.
5511cb0ef41Sopenharmony_ci        return builder->ToRegExp();
5521cb0ef41Sopenharmony_ci      case ')': {
5531cb0ef41Sopenharmony_ci        if (!state->IsSubexpression()) {
5541cb0ef41Sopenharmony_ci          return ReportError(RegExpError::kUnmatchedParen);
5551cb0ef41Sopenharmony_ci        }
5561cb0ef41Sopenharmony_ci        DCHECK_NE(INITIAL, state->group_type());
5571cb0ef41Sopenharmony_ci
5581cb0ef41Sopenharmony_ci        Advance();
5591cb0ef41Sopenharmony_ci        // End disjunction parsing and convert builder content to new single
5601cb0ef41Sopenharmony_ci        // regexp atom.
5611cb0ef41Sopenharmony_ci        RegExpTree* body = builder->ToRegExp();
5621cb0ef41Sopenharmony_ci
5631cb0ef41Sopenharmony_ci        int end_capture_index = captures_started();
5641cb0ef41Sopenharmony_ci
5651cb0ef41Sopenharmony_ci        int capture_index = state->capture_index();
5661cb0ef41Sopenharmony_ci        SubexpressionType group_type = state->group_type();
5671cb0ef41Sopenharmony_ci
5681cb0ef41Sopenharmony_ci        // Build result of subexpression.
5691cb0ef41Sopenharmony_ci        if (group_type == CAPTURE) {
5701cb0ef41Sopenharmony_ci          if (state->IsNamedCapture()) {
5711cb0ef41Sopenharmony_ci            CreateNamedCaptureAtIndex(state->capture_name(),
5721cb0ef41Sopenharmony_ci                                      capture_index CHECK_FAILED);
5731cb0ef41Sopenharmony_ci          }
5741cb0ef41Sopenharmony_ci          RegExpCapture* capture = GetCapture(capture_index);
5751cb0ef41Sopenharmony_ci          capture->set_body(body);
5761cb0ef41Sopenharmony_ci          body = capture;
5771cb0ef41Sopenharmony_ci        } else if (group_type == GROUPING) {
5781cb0ef41Sopenharmony_ci          body = zone()->template New<RegExpGroup>(body);
5791cb0ef41Sopenharmony_ci        } else {
5801cb0ef41Sopenharmony_ci          DCHECK(group_type == POSITIVE_LOOKAROUND ||
5811cb0ef41Sopenharmony_ci                 group_type == NEGATIVE_LOOKAROUND);
5821cb0ef41Sopenharmony_ci          bool is_positive = (group_type == POSITIVE_LOOKAROUND);
5831cb0ef41Sopenharmony_ci          body = zone()->template New<RegExpLookaround>(
5841cb0ef41Sopenharmony_ci              body, is_positive, end_capture_index - capture_index,
5851cb0ef41Sopenharmony_ci              capture_index, state->lookaround_type());
5861cb0ef41Sopenharmony_ci        }
5871cb0ef41Sopenharmony_ci
5881cb0ef41Sopenharmony_ci        // Restore previous state.
5891cb0ef41Sopenharmony_ci        state = state->previous_state();
5901cb0ef41Sopenharmony_ci        builder = state->builder();
5911cb0ef41Sopenharmony_ci
5921cb0ef41Sopenharmony_ci        builder->AddAtom(body);
5931cb0ef41Sopenharmony_ci        // For compatibility with JSC and ES3, we allow quantifiers after
5941cb0ef41Sopenharmony_ci        // lookaheads, and break in all cases.
5951cb0ef41Sopenharmony_ci        break;
5961cb0ef41Sopenharmony_ci      }
5971cb0ef41Sopenharmony_ci      case '|': {
5981cb0ef41Sopenharmony_ci        Advance();
5991cb0ef41Sopenharmony_ci        builder->NewAlternative();
6001cb0ef41Sopenharmony_ci        continue;
6011cb0ef41Sopenharmony_ci      }
6021cb0ef41Sopenharmony_ci      case '*':
6031cb0ef41Sopenharmony_ci      case '+':
6041cb0ef41Sopenharmony_ci      case '?':
6051cb0ef41Sopenharmony_ci        return ReportError(RegExpError::kNothingToRepeat);
6061cb0ef41Sopenharmony_ci      case '^': {
6071cb0ef41Sopenharmony_ci        Advance();
6081cb0ef41Sopenharmony_ci        builder->AddAssertion(zone()->template New<RegExpAssertion>(
6091cb0ef41Sopenharmony_ci            builder->multiline() ? RegExpAssertion::Type::START_OF_LINE
6101cb0ef41Sopenharmony_ci                                 : RegExpAssertion::Type::START_OF_INPUT));
6111cb0ef41Sopenharmony_ci        set_contains_anchor();
6121cb0ef41Sopenharmony_ci        continue;
6131cb0ef41Sopenharmony_ci      }
6141cb0ef41Sopenharmony_ci      case '$': {
6151cb0ef41Sopenharmony_ci        Advance();
6161cb0ef41Sopenharmony_ci        RegExpAssertion::Type assertion_type =
6171cb0ef41Sopenharmony_ci            builder->multiline() ? RegExpAssertion::Type::END_OF_LINE
6181cb0ef41Sopenharmony_ci                                 : RegExpAssertion::Type::END_OF_INPUT;
6191cb0ef41Sopenharmony_ci        builder->AddAssertion(
6201cb0ef41Sopenharmony_ci            zone()->template New<RegExpAssertion>(assertion_type));
6211cb0ef41Sopenharmony_ci        continue;
6221cb0ef41Sopenharmony_ci      }
6231cb0ef41Sopenharmony_ci      case '.': {
6241cb0ef41Sopenharmony_ci        Advance();
6251cb0ef41Sopenharmony_ci        ZoneList<CharacterRange>* ranges =
6261cb0ef41Sopenharmony_ci            zone()->template New<ZoneList<CharacterRange>>(2, zone());
6271cb0ef41Sopenharmony_ci
6281cb0ef41Sopenharmony_ci        if (builder->dotall()) {
6291cb0ef41Sopenharmony_ci          // Everything.
6301cb0ef41Sopenharmony_ci          CharacterRange::AddClassEscape(StandardCharacterSet::kEverything,
6311cb0ef41Sopenharmony_ci                                         ranges, false, zone());
6321cb0ef41Sopenharmony_ci        } else {
6331cb0ef41Sopenharmony_ci          // Everything except \x0A, \x0D, \u2028 and \u2029.
6341cb0ef41Sopenharmony_ci          CharacterRange::AddClassEscape(
6351cb0ef41Sopenharmony_ci              StandardCharacterSet::kNotLineTerminator, ranges, false, zone());
6361cb0ef41Sopenharmony_ci        }
6371cb0ef41Sopenharmony_ci
6381cb0ef41Sopenharmony_ci        RegExpCharacterClass* cc =
6391cb0ef41Sopenharmony_ci            zone()->template New<RegExpCharacterClass>(zone(), ranges);
6401cb0ef41Sopenharmony_ci        builder->AddCharacterClass(cc);
6411cb0ef41Sopenharmony_ci        break;
6421cb0ef41Sopenharmony_ci      }
6431cb0ef41Sopenharmony_ci      case '(': {
6441cb0ef41Sopenharmony_ci        state = ParseOpenParenthesis(state CHECK_FAILED);
6451cb0ef41Sopenharmony_ci        builder = state->builder();
6461cb0ef41Sopenharmony_ci        continue;
6471cb0ef41Sopenharmony_ci      }
6481cb0ef41Sopenharmony_ci      case '[': {
6491cb0ef41Sopenharmony_ci        RegExpTree* cc = ParseCharacterClass(builder CHECK_FAILED);
6501cb0ef41Sopenharmony_ci        builder->AddCharacterClass(cc->AsCharacterClass());
6511cb0ef41Sopenharmony_ci        break;
6521cb0ef41Sopenharmony_ci      }
6531cb0ef41Sopenharmony_ci      // Atom ::
6541cb0ef41Sopenharmony_ci      //   \ AtomEscape
6551cb0ef41Sopenharmony_ci      case '\\':
6561cb0ef41Sopenharmony_ci        switch (Next()) {
6571cb0ef41Sopenharmony_ci          case kEndMarker:
6581cb0ef41Sopenharmony_ci            return ReportError(RegExpError::kEscapeAtEndOfPattern);
6591cb0ef41Sopenharmony_ci          // AtomEscape ::
6601cb0ef41Sopenharmony_ci          //   [+UnicodeMode] DecimalEscape
6611cb0ef41Sopenharmony_ci          //   [~UnicodeMode] DecimalEscape but only if the CapturingGroupNumber
6621cb0ef41Sopenharmony_ci          //                  of DecimalEscape is ≤ NcapturingParens
6631cb0ef41Sopenharmony_ci          //   CharacterEscape (some cases of this mixed in too)
6641cb0ef41Sopenharmony_ci          //
6651cb0ef41Sopenharmony_ci          // TODO(jgruber): It may make sense to disentangle all the different
6661cb0ef41Sopenharmony_ci          // cases and make the structure mirror the spec, e.g. for AtomEscape:
6671cb0ef41Sopenharmony_ci          //
6681cb0ef41Sopenharmony_ci          //  if (TryParseDecimalEscape(...)) return;
6691cb0ef41Sopenharmony_ci          //  if (TryParseCharacterClassEscape(...)) return;
6701cb0ef41Sopenharmony_ci          //  if (TryParseCharacterEscape(...)) return;
6711cb0ef41Sopenharmony_ci          //  if (TryParseGroupName(...)) return;
6721cb0ef41Sopenharmony_ci          case '1':
6731cb0ef41Sopenharmony_ci          case '2':
6741cb0ef41Sopenharmony_ci          case '3':
6751cb0ef41Sopenharmony_ci          case '4':
6761cb0ef41Sopenharmony_ci          case '5':
6771cb0ef41Sopenharmony_ci          case '6':
6781cb0ef41Sopenharmony_ci          case '7':
6791cb0ef41Sopenharmony_ci          case '8':
6801cb0ef41Sopenharmony_ci          case '9': {
6811cb0ef41Sopenharmony_ci            int index = 0;
6821cb0ef41Sopenharmony_ci            const bool is_backref =
6831cb0ef41Sopenharmony_ci                ParseBackReferenceIndex(&index CHECK_FAILED);
6841cb0ef41Sopenharmony_ci            if (is_backref) {
6851cb0ef41Sopenharmony_ci              if (state->IsInsideCaptureGroup(index)) {
6861cb0ef41Sopenharmony_ci                // The back reference is inside the capture group it refers to.
6871cb0ef41Sopenharmony_ci                // Nothing can possibly have been captured yet, so we use empty
6881cb0ef41Sopenharmony_ci                // instead. This ensures that, when checking a back reference,
6891cb0ef41Sopenharmony_ci                // the capture registers of the referenced capture are either
6901cb0ef41Sopenharmony_ci                // both set or both cleared.
6911cb0ef41Sopenharmony_ci                builder->AddEmpty();
6921cb0ef41Sopenharmony_ci              } else {
6931cb0ef41Sopenharmony_ci                RegExpCapture* capture = GetCapture(index);
6941cb0ef41Sopenharmony_ci                RegExpTree* atom = zone()->template New<RegExpBackReference>(
6951cb0ef41Sopenharmony_ci                    capture, builder->flags());
6961cb0ef41Sopenharmony_ci                builder->AddAtom(atom);
6971cb0ef41Sopenharmony_ci              }
6981cb0ef41Sopenharmony_ci              break;
6991cb0ef41Sopenharmony_ci            }
7001cb0ef41Sopenharmony_ci            // With /u, no identity escapes except for syntax characters
7011cb0ef41Sopenharmony_ci            // are allowed. Otherwise, all identity escapes are allowed.
7021cb0ef41Sopenharmony_ci            if (unicode()) {
7031cb0ef41Sopenharmony_ci              return ReportError(RegExpError::kInvalidEscape);
7041cb0ef41Sopenharmony_ci            }
7051cb0ef41Sopenharmony_ci            base::uc32 first_digit = Next();
7061cb0ef41Sopenharmony_ci            if (first_digit == '8' || first_digit == '9') {
7071cb0ef41Sopenharmony_ci              builder->AddCharacter(first_digit);
7081cb0ef41Sopenharmony_ci              Advance(2);
7091cb0ef41Sopenharmony_ci              break;
7101cb0ef41Sopenharmony_ci            }
7111cb0ef41Sopenharmony_ci            V8_FALLTHROUGH;
7121cb0ef41Sopenharmony_ci          }
7131cb0ef41Sopenharmony_ci          case '0': {
7141cb0ef41Sopenharmony_ci            Advance();
7151cb0ef41Sopenharmony_ci            if (unicode() && Next() >= '0' && Next() <= '9') {
7161cb0ef41Sopenharmony_ci              // With /u, decimal escape with leading 0 are not parsed as octal.
7171cb0ef41Sopenharmony_ci              return ReportError(RegExpError::kInvalidDecimalEscape);
7181cb0ef41Sopenharmony_ci            }
7191cb0ef41Sopenharmony_ci            base::uc32 octal = ParseOctalLiteral();
7201cb0ef41Sopenharmony_ci            builder->AddCharacter(octal);
7211cb0ef41Sopenharmony_ci            break;
7221cb0ef41Sopenharmony_ci          }
7231cb0ef41Sopenharmony_ci          case 'b':
7241cb0ef41Sopenharmony_ci            Advance(2);
7251cb0ef41Sopenharmony_ci            builder->AddAssertion(zone()->template New<RegExpAssertion>(
7261cb0ef41Sopenharmony_ci                RegExpAssertion::Type::BOUNDARY));
7271cb0ef41Sopenharmony_ci            continue;
7281cb0ef41Sopenharmony_ci          case 'B':
7291cb0ef41Sopenharmony_ci            Advance(2);
7301cb0ef41Sopenharmony_ci            builder->AddAssertion(zone()->template New<RegExpAssertion>(
7311cb0ef41Sopenharmony_ci                RegExpAssertion::Type::NON_BOUNDARY));
7321cb0ef41Sopenharmony_ci            continue;
7331cb0ef41Sopenharmony_ci          // AtomEscape ::
7341cb0ef41Sopenharmony_ci          //   CharacterClassEscape
7351cb0ef41Sopenharmony_ci          case 'd':
7361cb0ef41Sopenharmony_ci          case 'D':
7371cb0ef41Sopenharmony_ci          case 's':
7381cb0ef41Sopenharmony_ci          case 'S':
7391cb0ef41Sopenharmony_ci          case 'w':
7401cb0ef41Sopenharmony_ci          case 'W':
7411cb0ef41Sopenharmony_ci          case 'p':
7421cb0ef41Sopenharmony_ci          case 'P': {
7431cb0ef41Sopenharmony_ci            base::uc32 next = Next();
7441cb0ef41Sopenharmony_ci            ZoneList<CharacterRange>* ranges =
7451cb0ef41Sopenharmony_ci                zone()->template New<ZoneList<CharacterRange>>(2, zone());
7461cb0ef41Sopenharmony_ci            bool add_unicode_case_equivalents =
7471cb0ef41Sopenharmony_ci                unicode() && builder->ignore_case();
7481cb0ef41Sopenharmony_ci            bool parsed_character_class_escape = TryParseCharacterClassEscape(
7491cb0ef41Sopenharmony_ci                next, InClassEscapeState::kNotInClass, ranges, zone(),
7501cb0ef41Sopenharmony_ci                add_unicode_case_equivalents CHECK_FAILED);
7511cb0ef41Sopenharmony_ci
7521cb0ef41Sopenharmony_ci            if (parsed_character_class_escape) {
7531cb0ef41Sopenharmony_ci              RegExpCharacterClass* cc =
7541cb0ef41Sopenharmony_ci                  zone()->template New<RegExpCharacterClass>(zone(), ranges);
7551cb0ef41Sopenharmony_ci              builder->AddCharacterClass(cc);
7561cb0ef41Sopenharmony_ci            } else {
7571cb0ef41Sopenharmony_ci              CHECK(!unicode());
7581cb0ef41Sopenharmony_ci              Advance(2);
7591cb0ef41Sopenharmony_ci              builder->AddCharacter(next);  // IdentityEscape.
7601cb0ef41Sopenharmony_ci            }
7611cb0ef41Sopenharmony_ci            break;
7621cb0ef41Sopenharmony_ci          }
7631cb0ef41Sopenharmony_ci          // AtomEscape ::
7641cb0ef41Sopenharmony_ci          //   k GroupName
7651cb0ef41Sopenharmony_ci          case 'k': {
7661cb0ef41Sopenharmony_ci            // Either an identity escape or a named back-reference.  The two
7671cb0ef41Sopenharmony_ci            // interpretations are mutually exclusive: '\k' is interpreted as
7681cb0ef41Sopenharmony_ci            // an identity escape for non-Unicode patterns without named
7691cb0ef41Sopenharmony_ci            // capture groups, and as the beginning of a named back-reference
7701cb0ef41Sopenharmony_ci            // in all other cases.
7711cb0ef41Sopenharmony_ci            const bool has_named_captures =
7721cb0ef41Sopenharmony_ci                HasNamedCaptures(InClassEscapeState::kNotInClass CHECK_FAILED);
7731cb0ef41Sopenharmony_ci            if (unicode() || has_named_captures) {
7741cb0ef41Sopenharmony_ci              Advance(2);
7751cb0ef41Sopenharmony_ci              ParseNamedBackReference(builder, state CHECK_FAILED);
7761cb0ef41Sopenharmony_ci              break;
7771cb0ef41Sopenharmony_ci            }
7781cb0ef41Sopenharmony_ci          }
7791cb0ef41Sopenharmony_ci            V8_FALLTHROUGH;
7801cb0ef41Sopenharmony_ci          // AtomEscape ::
7811cb0ef41Sopenharmony_ci          //   CharacterEscape
7821cb0ef41Sopenharmony_ci          default: {
7831cb0ef41Sopenharmony_ci            bool is_escaped_unicode_character = false;
7841cb0ef41Sopenharmony_ci            base::uc32 c = ParseCharacterEscape(
7851cb0ef41Sopenharmony_ci                InClassEscapeState::kNotInClass,
7861cb0ef41Sopenharmony_ci                &is_escaped_unicode_character CHECK_FAILED);
7871cb0ef41Sopenharmony_ci            if (is_escaped_unicode_character) {
7881cb0ef41Sopenharmony_ci              builder->AddEscapedUnicodeCharacter(c);
7891cb0ef41Sopenharmony_ci            } else {
7901cb0ef41Sopenharmony_ci              builder->AddCharacter(c);
7911cb0ef41Sopenharmony_ci            }
7921cb0ef41Sopenharmony_ci            break;
7931cb0ef41Sopenharmony_ci          }
7941cb0ef41Sopenharmony_ci        }
7951cb0ef41Sopenharmony_ci        break;
7961cb0ef41Sopenharmony_ci      case '{': {
7971cb0ef41Sopenharmony_ci        int dummy;
7981cb0ef41Sopenharmony_ci        bool parsed = ParseIntervalQuantifier(&dummy, &dummy CHECK_FAILED);
7991cb0ef41Sopenharmony_ci        if (parsed) return ReportError(RegExpError::kNothingToRepeat);
8001cb0ef41Sopenharmony_ci        V8_FALLTHROUGH;
8011cb0ef41Sopenharmony_ci      }
8021cb0ef41Sopenharmony_ci      case '}':
8031cb0ef41Sopenharmony_ci      case ']':
8041cb0ef41Sopenharmony_ci        if (unicode()) {
8051cb0ef41Sopenharmony_ci          return ReportError(RegExpError::kLoneQuantifierBrackets);
8061cb0ef41Sopenharmony_ci        }
8071cb0ef41Sopenharmony_ci        V8_FALLTHROUGH;
8081cb0ef41Sopenharmony_ci      default:
8091cb0ef41Sopenharmony_ci        builder->AddUnicodeCharacter(current());
8101cb0ef41Sopenharmony_ci        Advance();
8111cb0ef41Sopenharmony_ci        break;
8121cb0ef41Sopenharmony_ci    }  // end switch(current())
8131cb0ef41Sopenharmony_ci
8141cb0ef41Sopenharmony_ci    int min;
8151cb0ef41Sopenharmony_ci    int max;
8161cb0ef41Sopenharmony_ci    switch (current()) {
8171cb0ef41Sopenharmony_ci      // QuantifierPrefix ::
8181cb0ef41Sopenharmony_ci      //   *
8191cb0ef41Sopenharmony_ci      //   +
8201cb0ef41Sopenharmony_ci      //   ?
8211cb0ef41Sopenharmony_ci      //   {
8221cb0ef41Sopenharmony_ci      case '*':
8231cb0ef41Sopenharmony_ci        min = 0;
8241cb0ef41Sopenharmony_ci        max = RegExpTree::kInfinity;
8251cb0ef41Sopenharmony_ci        Advance();
8261cb0ef41Sopenharmony_ci        break;
8271cb0ef41Sopenharmony_ci      case '+':
8281cb0ef41Sopenharmony_ci        min = 1;
8291cb0ef41Sopenharmony_ci        max = RegExpTree::kInfinity;
8301cb0ef41Sopenharmony_ci        Advance();
8311cb0ef41Sopenharmony_ci        break;
8321cb0ef41Sopenharmony_ci      case '?':
8331cb0ef41Sopenharmony_ci        min = 0;
8341cb0ef41Sopenharmony_ci        max = 1;
8351cb0ef41Sopenharmony_ci        Advance();
8361cb0ef41Sopenharmony_ci        break;
8371cb0ef41Sopenharmony_ci      case '{':
8381cb0ef41Sopenharmony_ci        if (ParseIntervalQuantifier(&min, &max)) {
8391cb0ef41Sopenharmony_ci          if (max < min) {
8401cb0ef41Sopenharmony_ci            return ReportError(RegExpError::kRangeOutOfOrder);
8411cb0ef41Sopenharmony_ci          }
8421cb0ef41Sopenharmony_ci          break;
8431cb0ef41Sopenharmony_ci        } else if (unicode()) {
8441cb0ef41Sopenharmony_ci          // With /u, incomplete quantifiers are not allowed.
8451cb0ef41Sopenharmony_ci          return ReportError(RegExpError::kIncompleteQuantifier);
8461cb0ef41Sopenharmony_ci        }
8471cb0ef41Sopenharmony_ci        continue;
8481cb0ef41Sopenharmony_ci      default:
8491cb0ef41Sopenharmony_ci        continue;
8501cb0ef41Sopenharmony_ci    }
8511cb0ef41Sopenharmony_ci    RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY;
8521cb0ef41Sopenharmony_ci    if (current() == '?') {
8531cb0ef41Sopenharmony_ci      quantifier_type = RegExpQuantifier::NON_GREEDY;
8541cb0ef41Sopenharmony_ci      Advance();
8551cb0ef41Sopenharmony_ci    } else if (FLAG_regexp_possessive_quantifier && current() == '+') {
8561cb0ef41Sopenharmony_ci      // FLAG_regexp_possessive_quantifier is a debug-only flag.
8571cb0ef41Sopenharmony_ci      quantifier_type = RegExpQuantifier::POSSESSIVE;
8581cb0ef41Sopenharmony_ci      Advance();
8591cb0ef41Sopenharmony_ci    }
8601cb0ef41Sopenharmony_ci    if (!builder->AddQuantifierToAtom(min, max, quantifier_type)) {
8611cb0ef41Sopenharmony_ci      return ReportError(RegExpError::kInvalidQuantifier);
8621cb0ef41Sopenharmony_ci    }
8631cb0ef41Sopenharmony_ci  }
8641cb0ef41Sopenharmony_ci}
8651cb0ef41Sopenharmony_ci
8661cb0ef41Sopenharmony_citemplate <class CharT>
8671cb0ef41Sopenharmony_ciRegExpParserState* RegExpParserImpl<CharT>::ParseOpenParenthesis(
8681cb0ef41Sopenharmony_ci    RegExpParserState* state) {
8691cb0ef41Sopenharmony_ci  RegExpLookaround::Type lookaround_type = state->lookaround_type();
8701cb0ef41Sopenharmony_ci  bool is_named_capture = false;
8711cb0ef41Sopenharmony_ci  const ZoneVector<base::uc16>* capture_name = nullptr;
8721cb0ef41Sopenharmony_ci  SubexpressionType subexpr_type = CAPTURE;
8731cb0ef41Sopenharmony_ci  Advance();
8741cb0ef41Sopenharmony_ci  if (current() == '?') {
8751cb0ef41Sopenharmony_ci    switch (Next()) {
8761cb0ef41Sopenharmony_ci      case ':':
8771cb0ef41Sopenharmony_ci        Advance(2);
8781cb0ef41Sopenharmony_ci        subexpr_type = GROUPING;
8791cb0ef41Sopenharmony_ci        break;
8801cb0ef41Sopenharmony_ci      case '=':
8811cb0ef41Sopenharmony_ci        Advance(2);
8821cb0ef41Sopenharmony_ci        lookaround_type = RegExpLookaround::LOOKAHEAD;
8831cb0ef41Sopenharmony_ci        subexpr_type = POSITIVE_LOOKAROUND;
8841cb0ef41Sopenharmony_ci        break;
8851cb0ef41Sopenharmony_ci      case '!':
8861cb0ef41Sopenharmony_ci        Advance(2);
8871cb0ef41Sopenharmony_ci        lookaround_type = RegExpLookaround::LOOKAHEAD;
8881cb0ef41Sopenharmony_ci        subexpr_type = NEGATIVE_LOOKAROUND;
8891cb0ef41Sopenharmony_ci        break;
8901cb0ef41Sopenharmony_ci      case '<':
8911cb0ef41Sopenharmony_ci        Advance();
8921cb0ef41Sopenharmony_ci        if (Next() == '=') {
8931cb0ef41Sopenharmony_ci          Advance(2);
8941cb0ef41Sopenharmony_ci          lookaround_type = RegExpLookaround::LOOKBEHIND;
8951cb0ef41Sopenharmony_ci          subexpr_type = POSITIVE_LOOKAROUND;
8961cb0ef41Sopenharmony_ci          break;
8971cb0ef41Sopenharmony_ci        } else if (Next() == '!') {
8981cb0ef41Sopenharmony_ci          Advance(2);
8991cb0ef41Sopenharmony_ci          lookaround_type = RegExpLookaround::LOOKBEHIND;
9001cb0ef41Sopenharmony_ci          subexpr_type = NEGATIVE_LOOKAROUND;
9011cb0ef41Sopenharmony_ci          break;
9021cb0ef41Sopenharmony_ci        }
9031cb0ef41Sopenharmony_ci        is_named_capture = true;
9041cb0ef41Sopenharmony_ci        has_named_captures_ = true;
9051cb0ef41Sopenharmony_ci        Advance();
9061cb0ef41Sopenharmony_ci        break;
9071cb0ef41Sopenharmony_ci      default:
9081cb0ef41Sopenharmony_ci        ReportError(RegExpError::kInvalidGroup);
9091cb0ef41Sopenharmony_ci        return nullptr;
9101cb0ef41Sopenharmony_ci    }
9111cb0ef41Sopenharmony_ci  }
9121cb0ef41Sopenharmony_ci  if (subexpr_type == CAPTURE) {
9131cb0ef41Sopenharmony_ci    if (captures_started_ >= RegExpMacroAssembler::kMaxRegisterCount) {
9141cb0ef41Sopenharmony_ci      ReportError(RegExpError::kTooManyCaptures);
9151cb0ef41Sopenharmony_ci      return nullptr;
9161cb0ef41Sopenharmony_ci    }
9171cb0ef41Sopenharmony_ci    captures_started_++;
9181cb0ef41Sopenharmony_ci
9191cb0ef41Sopenharmony_ci    if (is_named_capture) {
9201cb0ef41Sopenharmony_ci      capture_name = ParseCaptureGroupName(CHECK_FAILED);
9211cb0ef41Sopenharmony_ci    }
9221cb0ef41Sopenharmony_ci  }
9231cb0ef41Sopenharmony_ci  // Store current state and begin new disjunction parsing.
9241cb0ef41Sopenharmony_ci  return zone()->template New<RegExpParserState>(
9251cb0ef41Sopenharmony_ci      state, subexpr_type, lookaround_type, captures_started_, capture_name,
9261cb0ef41Sopenharmony_ci      state->builder()->flags(), zone());
9271cb0ef41Sopenharmony_ci}
9281cb0ef41Sopenharmony_ci
9291cb0ef41Sopenharmony_ci#ifdef DEBUG
9301cb0ef41Sopenharmony_cinamespace {
9311cb0ef41Sopenharmony_ci
9321cb0ef41Sopenharmony_cibool IsSpecialClassEscape(base::uc32 c) {
9331cb0ef41Sopenharmony_ci  switch (c) {
9341cb0ef41Sopenharmony_ci    case 'd':
9351cb0ef41Sopenharmony_ci    case 'D':
9361cb0ef41Sopenharmony_ci    case 's':
9371cb0ef41Sopenharmony_ci    case 'S':
9381cb0ef41Sopenharmony_ci    case 'w':
9391cb0ef41Sopenharmony_ci    case 'W':
9401cb0ef41Sopenharmony_ci      return true;
9411cb0ef41Sopenharmony_ci    default:
9421cb0ef41Sopenharmony_ci      return false;
9431cb0ef41Sopenharmony_ci  }
9441cb0ef41Sopenharmony_ci}
9451cb0ef41Sopenharmony_ci
9461cb0ef41Sopenharmony_ci}  // namespace
9471cb0ef41Sopenharmony_ci#endif
9481cb0ef41Sopenharmony_ci
9491cb0ef41Sopenharmony_ci// In order to know whether an escape is a backreference or not we have to scan
9501cb0ef41Sopenharmony_ci// the entire regexp and find the number of capturing parentheses.  However we
9511cb0ef41Sopenharmony_ci// don't want to scan the regexp twice unless it is necessary.  This mini-parser
9521cb0ef41Sopenharmony_ci// is called when needed.  It can see the difference between capturing and
9531cb0ef41Sopenharmony_ci// noncapturing parentheses and can skip character classes and backslash-escaped
9541cb0ef41Sopenharmony_ci// characters.
9551cb0ef41Sopenharmony_ci//
9561cb0ef41Sopenharmony_ci// Important: The scanner has to be in a consistent state when calling
9571cb0ef41Sopenharmony_ci// ScanForCaptures, e.g. not in the middle of an escape sequence '\['.
9581cb0ef41Sopenharmony_citemplate <class CharT>
9591cb0ef41Sopenharmony_civoid RegExpParserImpl<CharT>::ScanForCaptures(
9601cb0ef41Sopenharmony_ci    InClassEscapeState in_class_escape_state) {
9611cb0ef41Sopenharmony_ci  DCHECK(!is_scanned_for_captures_);
9621cb0ef41Sopenharmony_ci  const int saved_position = position();
9631cb0ef41Sopenharmony_ci  // Start with captures started previous to current position
9641cb0ef41Sopenharmony_ci  int capture_count = captures_started();
9651cb0ef41Sopenharmony_ci  // When we start inside a character class, skip everything inside the class.
9661cb0ef41Sopenharmony_ci  if (in_class_escape_state == InClassEscapeState::kInClass) {
9671cb0ef41Sopenharmony_ci    int c;
9681cb0ef41Sopenharmony_ci    while ((c = current()) != kEndMarker) {
9691cb0ef41Sopenharmony_ci      Advance();
9701cb0ef41Sopenharmony_ci      if (c == '\\') {
9711cb0ef41Sopenharmony_ci        Advance();
9721cb0ef41Sopenharmony_ci      } else {
9731cb0ef41Sopenharmony_ci        if (c == ']') break;
9741cb0ef41Sopenharmony_ci      }
9751cb0ef41Sopenharmony_ci    }
9761cb0ef41Sopenharmony_ci  }
9771cb0ef41Sopenharmony_ci  // Add count of captures after this position.
9781cb0ef41Sopenharmony_ci  int n;
9791cb0ef41Sopenharmony_ci  while ((n = current()) != kEndMarker) {
9801cb0ef41Sopenharmony_ci    Advance();
9811cb0ef41Sopenharmony_ci    switch (n) {
9821cb0ef41Sopenharmony_ci      case '\\':
9831cb0ef41Sopenharmony_ci        Advance();
9841cb0ef41Sopenharmony_ci        break;
9851cb0ef41Sopenharmony_ci      case '[': {
9861cb0ef41Sopenharmony_ci        int c;
9871cb0ef41Sopenharmony_ci        while ((c = current()) != kEndMarker) {
9881cb0ef41Sopenharmony_ci          Advance();
9891cb0ef41Sopenharmony_ci          if (c == '\\') {
9901cb0ef41Sopenharmony_ci            Advance();
9911cb0ef41Sopenharmony_ci          } else {
9921cb0ef41Sopenharmony_ci            if (c == ']') break;
9931cb0ef41Sopenharmony_ci          }
9941cb0ef41Sopenharmony_ci        }
9951cb0ef41Sopenharmony_ci        break;
9961cb0ef41Sopenharmony_ci      }
9971cb0ef41Sopenharmony_ci      case '(':
9981cb0ef41Sopenharmony_ci        if (current() == '?') {
9991cb0ef41Sopenharmony_ci          // At this point we could be in
10001cb0ef41Sopenharmony_ci          // * a non-capturing group '(:',
10011cb0ef41Sopenharmony_ci          // * a lookbehind assertion '(?<=' '(?<!'
10021cb0ef41Sopenharmony_ci          // * or a named capture '(?<'.
10031cb0ef41Sopenharmony_ci          //
10041cb0ef41Sopenharmony_ci          // Of these, only named captures are capturing groups.
10051cb0ef41Sopenharmony_ci
10061cb0ef41Sopenharmony_ci          Advance();
10071cb0ef41Sopenharmony_ci          if (current() != '<') break;
10081cb0ef41Sopenharmony_ci
10091cb0ef41Sopenharmony_ci          Advance();
10101cb0ef41Sopenharmony_ci          if (current() == '=' || current() == '!') break;
10111cb0ef41Sopenharmony_ci
10121cb0ef41Sopenharmony_ci          // Found a possible named capture. It could turn out to be a syntax
10131cb0ef41Sopenharmony_ci          // error (e.g. an unterminated or invalid name), but that distinction
10141cb0ef41Sopenharmony_ci          // does not matter for our purposes.
10151cb0ef41Sopenharmony_ci          has_named_captures_ = true;
10161cb0ef41Sopenharmony_ci        }
10171cb0ef41Sopenharmony_ci        capture_count++;
10181cb0ef41Sopenharmony_ci        break;
10191cb0ef41Sopenharmony_ci    }
10201cb0ef41Sopenharmony_ci  }
10211cb0ef41Sopenharmony_ci  capture_count_ = capture_count;
10221cb0ef41Sopenharmony_ci  is_scanned_for_captures_ = true;
10231cb0ef41Sopenharmony_ci  Reset(saved_position);
10241cb0ef41Sopenharmony_ci}
10251cb0ef41Sopenharmony_ci
10261cb0ef41Sopenharmony_citemplate <class CharT>
10271cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::ParseBackReferenceIndex(int* index_out) {
10281cb0ef41Sopenharmony_ci  DCHECK_EQ('\\', current());
10291cb0ef41Sopenharmony_ci  DCHECK('1' <= Next() && Next() <= '9');
10301cb0ef41Sopenharmony_ci  // Try to parse a decimal literal that is no greater than the total number
10311cb0ef41Sopenharmony_ci  // of left capturing parentheses in the input.
10321cb0ef41Sopenharmony_ci  int start = position();
10331cb0ef41Sopenharmony_ci  int value = Next() - '0';
10341cb0ef41Sopenharmony_ci  Advance(2);
10351cb0ef41Sopenharmony_ci  while (true) {
10361cb0ef41Sopenharmony_ci    base::uc32 c = current();
10371cb0ef41Sopenharmony_ci    if (IsDecimalDigit(c)) {
10381cb0ef41Sopenharmony_ci      value = 10 * value + (c - '0');
10391cb0ef41Sopenharmony_ci      if (value > RegExpMacroAssembler::kMaxRegisterCount) {
10401cb0ef41Sopenharmony_ci        Reset(start);
10411cb0ef41Sopenharmony_ci        return false;
10421cb0ef41Sopenharmony_ci      }
10431cb0ef41Sopenharmony_ci      Advance();
10441cb0ef41Sopenharmony_ci    } else {
10451cb0ef41Sopenharmony_ci      break;
10461cb0ef41Sopenharmony_ci    }
10471cb0ef41Sopenharmony_ci  }
10481cb0ef41Sopenharmony_ci  if (value > captures_started()) {
10491cb0ef41Sopenharmony_ci    if (!is_scanned_for_captures_)
10501cb0ef41Sopenharmony_ci      ScanForCaptures(InClassEscapeState::kNotInClass);
10511cb0ef41Sopenharmony_ci    if (value > capture_count_) {
10521cb0ef41Sopenharmony_ci      Reset(start);
10531cb0ef41Sopenharmony_ci      return false;
10541cb0ef41Sopenharmony_ci    }
10551cb0ef41Sopenharmony_ci  }
10561cb0ef41Sopenharmony_ci  *index_out = value;
10571cb0ef41Sopenharmony_ci  return true;
10581cb0ef41Sopenharmony_ci}
10591cb0ef41Sopenharmony_ci
10601cb0ef41Sopenharmony_cinamespace {
10611cb0ef41Sopenharmony_ci
10621cb0ef41Sopenharmony_civoid push_code_unit(ZoneVector<base::uc16>* v, uint32_t code_unit) {
10631cb0ef41Sopenharmony_ci  if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
10641cb0ef41Sopenharmony_ci    v->push_back(code_unit);
10651cb0ef41Sopenharmony_ci  } else {
10661cb0ef41Sopenharmony_ci    v->push_back(unibrow::Utf16::LeadSurrogate(code_unit));
10671cb0ef41Sopenharmony_ci    v->push_back(unibrow::Utf16::TrailSurrogate(code_unit));
10681cb0ef41Sopenharmony_ci  }
10691cb0ef41Sopenharmony_ci}
10701cb0ef41Sopenharmony_ci
10711cb0ef41Sopenharmony_ci}  // namespace
10721cb0ef41Sopenharmony_ci
10731cb0ef41Sopenharmony_citemplate <class CharT>
10741cb0ef41Sopenharmony_ciconst ZoneVector<base::uc16>* RegExpParserImpl<CharT>::ParseCaptureGroupName() {
10751cb0ef41Sopenharmony_ci  // Due to special Advance requirements (see the next comment), rewind by one
10761cb0ef41Sopenharmony_ci  // such that names starting with a surrogate pair are parsed correctly for
10771cb0ef41Sopenharmony_ci  // patterns where the unicode flag is unset.
10781cb0ef41Sopenharmony_ci  //
10791cb0ef41Sopenharmony_ci  // Note that we use this odd pattern of rewinding the last advance in order
10801cb0ef41Sopenharmony_ci  // to adhere to the common parser behavior of expecting `current` to point at
10811cb0ef41Sopenharmony_ci  // the first candidate character for a function (e.g. when entering ParseFoo,
10821cb0ef41Sopenharmony_ci  // `current` should point at the first character of Foo).
10831cb0ef41Sopenharmony_ci  RewindByOneCodepoint();
10841cb0ef41Sopenharmony_ci
10851cb0ef41Sopenharmony_ci  ZoneVector<base::uc16>* name =
10861cb0ef41Sopenharmony_ci      zone()->template New<ZoneVector<base::uc16>>(zone());
10871cb0ef41Sopenharmony_ci
10881cb0ef41Sopenharmony_ci  {
10891cb0ef41Sopenharmony_ci    // Advance behavior inside this function is tricky since
10901cb0ef41Sopenharmony_ci    // RegExpIdentifierName explicitly enables unicode (in spec terms, sets +U)
10911cb0ef41Sopenharmony_ci    // and thus allows surrogate pairs and \u{}-style escapes even in
10921cb0ef41Sopenharmony_ci    // non-unicode patterns. Therefore Advance within the capture group name
10931cb0ef41Sopenharmony_ci    // has to force-enable unicode, and outside the name revert to default
10941cb0ef41Sopenharmony_ci    // behavior.
10951cb0ef41Sopenharmony_ci    ForceUnicodeScope force_unicode(this);
10961cb0ef41Sopenharmony_ci
10971cb0ef41Sopenharmony_ci    bool at_start = true;
10981cb0ef41Sopenharmony_ci    while (true) {
10991cb0ef41Sopenharmony_ci      Advance();
11001cb0ef41Sopenharmony_ci      base::uc32 c = current();
11011cb0ef41Sopenharmony_ci
11021cb0ef41Sopenharmony_ci      // Convert unicode escapes.
11031cb0ef41Sopenharmony_ci      if (c == '\\' && Next() == 'u') {
11041cb0ef41Sopenharmony_ci        Advance(2);
11051cb0ef41Sopenharmony_ci        if (!ParseUnicodeEscape(&c)) {
11061cb0ef41Sopenharmony_ci          ReportError(RegExpError::kInvalidUnicodeEscape);
11071cb0ef41Sopenharmony_ci          return nullptr;
11081cb0ef41Sopenharmony_ci        }
11091cb0ef41Sopenharmony_ci        RewindByOneCodepoint();
11101cb0ef41Sopenharmony_ci      }
11111cb0ef41Sopenharmony_ci
11121cb0ef41Sopenharmony_ci      // The backslash char is misclassified as both ID_Start and ID_Continue.
11131cb0ef41Sopenharmony_ci      if (c == '\\') {
11141cb0ef41Sopenharmony_ci        ReportError(RegExpError::kInvalidCaptureGroupName);
11151cb0ef41Sopenharmony_ci        return nullptr;
11161cb0ef41Sopenharmony_ci      }
11171cb0ef41Sopenharmony_ci
11181cb0ef41Sopenharmony_ci      if (at_start) {
11191cb0ef41Sopenharmony_ci        if (!IsIdentifierStart(c)) {
11201cb0ef41Sopenharmony_ci          ReportError(RegExpError::kInvalidCaptureGroupName);
11211cb0ef41Sopenharmony_ci          return nullptr;
11221cb0ef41Sopenharmony_ci        }
11231cb0ef41Sopenharmony_ci        push_code_unit(name, c);
11241cb0ef41Sopenharmony_ci        at_start = false;
11251cb0ef41Sopenharmony_ci      } else {
11261cb0ef41Sopenharmony_ci        if (c == '>') {
11271cb0ef41Sopenharmony_ci          break;
11281cb0ef41Sopenharmony_ci        } else if (IsIdentifierPart(c)) {
11291cb0ef41Sopenharmony_ci          push_code_unit(name, c);
11301cb0ef41Sopenharmony_ci        } else {
11311cb0ef41Sopenharmony_ci          ReportError(RegExpError::kInvalidCaptureGroupName);
11321cb0ef41Sopenharmony_ci          return nullptr;
11331cb0ef41Sopenharmony_ci        }
11341cb0ef41Sopenharmony_ci      }
11351cb0ef41Sopenharmony_ci    }
11361cb0ef41Sopenharmony_ci  }
11371cb0ef41Sopenharmony_ci
11381cb0ef41Sopenharmony_ci  // This final advance goes back into the state of pointing at the next
11391cb0ef41Sopenharmony_ci  // relevant char, which the rest of the parser expects. See also the previous
11401cb0ef41Sopenharmony_ci  // comments in this function.
11411cb0ef41Sopenharmony_ci  Advance();
11421cb0ef41Sopenharmony_ci  return name;
11431cb0ef41Sopenharmony_ci}
11441cb0ef41Sopenharmony_ci
11451cb0ef41Sopenharmony_citemplate <class CharT>
11461cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::CreateNamedCaptureAtIndex(
11471cb0ef41Sopenharmony_ci    const ZoneVector<base::uc16>* name, int index) {
11481cb0ef41Sopenharmony_ci  DCHECK(0 < index && index <= captures_started_);
11491cb0ef41Sopenharmony_ci  DCHECK_NOT_NULL(name);
11501cb0ef41Sopenharmony_ci
11511cb0ef41Sopenharmony_ci  RegExpCapture* capture = GetCapture(index);
11521cb0ef41Sopenharmony_ci  DCHECK_NULL(capture->name());
11531cb0ef41Sopenharmony_ci
11541cb0ef41Sopenharmony_ci  capture->set_name(name);
11551cb0ef41Sopenharmony_ci
11561cb0ef41Sopenharmony_ci  if (named_captures_ == nullptr) {
11571cb0ef41Sopenharmony_ci    named_captures_ =
11581cb0ef41Sopenharmony_ci        zone_->template New<ZoneSet<RegExpCapture*, RegExpCaptureNameLess>>(
11591cb0ef41Sopenharmony_ci            zone());
11601cb0ef41Sopenharmony_ci  } else {
11611cb0ef41Sopenharmony_ci    // Check for duplicates and bail if we find any.
11621cb0ef41Sopenharmony_ci
11631cb0ef41Sopenharmony_ci    const auto& named_capture_it = named_captures_->find(capture);
11641cb0ef41Sopenharmony_ci    if (named_capture_it != named_captures_->end()) {
11651cb0ef41Sopenharmony_ci      ReportError(RegExpError::kDuplicateCaptureGroupName);
11661cb0ef41Sopenharmony_ci      return false;
11671cb0ef41Sopenharmony_ci    }
11681cb0ef41Sopenharmony_ci  }
11691cb0ef41Sopenharmony_ci
11701cb0ef41Sopenharmony_ci  named_captures_->emplace(capture);
11711cb0ef41Sopenharmony_ci
11721cb0ef41Sopenharmony_ci  return true;
11731cb0ef41Sopenharmony_ci}
11741cb0ef41Sopenharmony_ci
11751cb0ef41Sopenharmony_citemplate <class CharT>
11761cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::ParseNamedBackReference(
11771cb0ef41Sopenharmony_ci    RegExpBuilder* builder, RegExpParserState* state) {
11781cb0ef41Sopenharmony_ci  // The parser is assumed to be on the '<' in \k<name>.
11791cb0ef41Sopenharmony_ci  if (current() != '<') {
11801cb0ef41Sopenharmony_ci    ReportError(RegExpError::kInvalidNamedReference);
11811cb0ef41Sopenharmony_ci    return false;
11821cb0ef41Sopenharmony_ci  }
11831cb0ef41Sopenharmony_ci
11841cb0ef41Sopenharmony_ci  Advance();
11851cb0ef41Sopenharmony_ci  const ZoneVector<base::uc16>* name = ParseCaptureGroupName();
11861cb0ef41Sopenharmony_ci  if (name == nullptr) {
11871cb0ef41Sopenharmony_ci    return false;
11881cb0ef41Sopenharmony_ci  }
11891cb0ef41Sopenharmony_ci
11901cb0ef41Sopenharmony_ci  if (state->IsInsideCaptureGroup(name)) {
11911cb0ef41Sopenharmony_ci    builder->AddEmpty();
11921cb0ef41Sopenharmony_ci  } else {
11931cb0ef41Sopenharmony_ci    RegExpBackReference* atom =
11941cb0ef41Sopenharmony_ci        zone()->template New<RegExpBackReference>(builder->flags());
11951cb0ef41Sopenharmony_ci    atom->set_name(name);
11961cb0ef41Sopenharmony_ci
11971cb0ef41Sopenharmony_ci    builder->AddAtom(atom);
11981cb0ef41Sopenharmony_ci
11991cb0ef41Sopenharmony_ci    if (named_back_references_ == nullptr) {
12001cb0ef41Sopenharmony_ci      named_back_references_ =
12011cb0ef41Sopenharmony_ci          zone()->template New<ZoneList<RegExpBackReference*>>(1, zone());
12021cb0ef41Sopenharmony_ci    }
12031cb0ef41Sopenharmony_ci    named_back_references_->Add(atom, zone());
12041cb0ef41Sopenharmony_ci  }
12051cb0ef41Sopenharmony_ci
12061cb0ef41Sopenharmony_ci  return true;
12071cb0ef41Sopenharmony_ci}
12081cb0ef41Sopenharmony_ci
12091cb0ef41Sopenharmony_citemplate <class CharT>
12101cb0ef41Sopenharmony_civoid RegExpParserImpl<CharT>::PatchNamedBackReferences() {
12111cb0ef41Sopenharmony_ci  if (named_back_references_ == nullptr) return;
12121cb0ef41Sopenharmony_ci
12131cb0ef41Sopenharmony_ci  if (named_captures_ == nullptr) {
12141cb0ef41Sopenharmony_ci    ReportError(RegExpError::kInvalidNamedCaptureReference);
12151cb0ef41Sopenharmony_ci    return;
12161cb0ef41Sopenharmony_ci  }
12171cb0ef41Sopenharmony_ci
12181cb0ef41Sopenharmony_ci  // Look up and patch the actual capture for each named back reference.
12191cb0ef41Sopenharmony_ci
12201cb0ef41Sopenharmony_ci  for (int i = 0; i < named_back_references_->length(); i++) {
12211cb0ef41Sopenharmony_ci    RegExpBackReference* ref = named_back_references_->at(i);
12221cb0ef41Sopenharmony_ci
12231cb0ef41Sopenharmony_ci    // Capture used to search the named_captures_ by name, index of the
12241cb0ef41Sopenharmony_ci    // capture is never used.
12251cb0ef41Sopenharmony_ci    static const int kInvalidIndex = 0;
12261cb0ef41Sopenharmony_ci    RegExpCapture* search_capture =
12271cb0ef41Sopenharmony_ci        zone()->template New<RegExpCapture>(kInvalidIndex);
12281cb0ef41Sopenharmony_ci    DCHECK_NULL(search_capture->name());
12291cb0ef41Sopenharmony_ci    search_capture->set_name(ref->name());
12301cb0ef41Sopenharmony_ci
12311cb0ef41Sopenharmony_ci    int index = -1;
12321cb0ef41Sopenharmony_ci    const auto& capture_it = named_captures_->find(search_capture);
12331cb0ef41Sopenharmony_ci    if (capture_it != named_captures_->end()) {
12341cb0ef41Sopenharmony_ci      index = (*capture_it)->index();
12351cb0ef41Sopenharmony_ci    } else {
12361cb0ef41Sopenharmony_ci      ReportError(RegExpError::kInvalidNamedCaptureReference);
12371cb0ef41Sopenharmony_ci      return;
12381cb0ef41Sopenharmony_ci    }
12391cb0ef41Sopenharmony_ci
12401cb0ef41Sopenharmony_ci    ref->set_capture(GetCapture(index));
12411cb0ef41Sopenharmony_ci  }
12421cb0ef41Sopenharmony_ci}
12431cb0ef41Sopenharmony_ci
12441cb0ef41Sopenharmony_citemplate <class CharT>
12451cb0ef41Sopenharmony_ciRegExpCapture* RegExpParserImpl<CharT>::GetCapture(int index) {
12461cb0ef41Sopenharmony_ci  // The index for the capture groups are one-based. Its index in the list is
12471cb0ef41Sopenharmony_ci  // zero-based.
12481cb0ef41Sopenharmony_ci  const int known_captures =
12491cb0ef41Sopenharmony_ci      is_scanned_for_captures_ ? capture_count_ : captures_started_;
12501cb0ef41Sopenharmony_ci  DCHECK(index <= known_captures);
12511cb0ef41Sopenharmony_ci  if (captures_ == nullptr) {
12521cb0ef41Sopenharmony_ci    captures_ =
12531cb0ef41Sopenharmony_ci        zone()->template New<ZoneList<RegExpCapture*>>(known_captures, zone());
12541cb0ef41Sopenharmony_ci  }
12551cb0ef41Sopenharmony_ci  while (captures_->length() < known_captures) {
12561cb0ef41Sopenharmony_ci    captures_->Add(zone()->template New<RegExpCapture>(captures_->length() + 1),
12571cb0ef41Sopenharmony_ci                   zone());
12581cb0ef41Sopenharmony_ci  }
12591cb0ef41Sopenharmony_ci  return captures_->at(index - 1);
12601cb0ef41Sopenharmony_ci}
12611cb0ef41Sopenharmony_ci
12621cb0ef41Sopenharmony_citemplate <class CharT>
12631cb0ef41Sopenharmony_ciZoneVector<RegExpCapture*>* RegExpParserImpl<CharT>::GetNamedCaptures() const {
12641cb0ef41Sopenharmony_ci  if (named_captures_ == nullptr || named_captures_->empty()) {
12651cb0ef41Sopenharmony_ci    return nullptr;
12661cb0ef41Sopenharmony_ci  }
12671cb0ef41Sopenharmony_ci
12681cb0ef41Sopenharmony_ci  return zone()->template New<ZoneVector<RegExpCapture*>>(
12691cb0ef41Sopenharmony_ci      named_captures_->begin(), named_captures_->end(), zone());
12701cb0ef41Sopenharmony_ci}
12711cb0ef41Sopenharmony_ci
12721cb0ef41Sopenharmony_citemplate <class CharT>
12731cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::HasNamedCaptures(
12741cb0ef41Sopenharmony_ci    InClassEscapeState in_class_escape_state) {
12751cb0ef41Sopenharmony_ci  if (has_named_captures_ || is_scanned_for_captures_) {
12761cb0ef41Sopenharmony_ci    return has_named_captures_;
12771cb0ef41Sopenharmony_ci  }
12781cb0ef41Sopenharmony_ci
12791cb0ef41Sopenharmony_ci  ScanForCaptures(in_class_escape_state);
12801cb0ef41Sopenharmony_ci  DCHECK(is_scanned_for_captures_);
12811cb0ef41Sopenharmony_ci  return has_named_captures_;
12821cb0ef41Sopenharmony_ci}
12831cb0ef41Sopenharmony_ci
12841cb0ef41Sopenharmony_ci// QuantifierPrefix ::
12851cb0ef41Sopenharmony_ci//   { DecimalDigits }
12861cb0ef41Sopenharmony_ci//   { DecimalDigits , }
12871cb0ef41Sopenharmony_ci//   { DecimalDigits , DecimalDigits }
12881cb0ef41Sopenharmony_ci//
12891cb0ef41Sopenharmony_ci// Returns true if parsing succeeds, and set the min_out and max_out
12901cb0ef41Sopenharmony_ci// values. Values are truncated to RegExpTree::kInfinity if they overflow.
12911cb0ef41Sopenharmony_citemplate <class CharT>
12921cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::ParseIntervalQuantifier(int* min_out,
12931cb0ef41Sopenharmony_ci                                                      int* max_out) {
12941cb0ef41Sopenharmony_ci  DCHECK_EQ(current(), '{');
12951cb0ef41Sopenharmony_ci  int start = position();
12961cb0ef41Sopenharmony_ci  Advance();
12971cb0ef41Sopenharmony_ci  int min = 0;
12981cb0ef41Sopenharmony_ci  if (!IsDecimalDigit(current())) {
12991cb0ef41Sopenharmony_ci    Reset(start);
13001cb0ef41Sopenharmony_ci    return false;
13011cb0ef41Sopenharmony_ci  }
13021cb0ef41Sopenharmony_ci  while (IsDecimalDigit(current())) {
13031cb0ef41Sopenharmony_ci    int next = current() - '0';
13041cb0ef41Sopenharmony_ci    if (min > (RegExpTree::kInfinity - next) / 10) {
13051cb0ef41Sopenharmony_ci      // Overflow. Skip past remaining decimal digits and return -1.
13061cb0ef41Sopenharmony_ci      do {
13071cb0ef41Sopenharmony_ci        Advance();
13081cb0ef41Sopenharmony_ci      } while (IsDecimalDigit(current()));
13091cb0ef41Sopenharmony_ci      min = RegExpTree::kInfinity;
13101cb0ef41Sopenharmony_ci      break;
13111cb0ef41Sopenharmony_ci    }
13121cb0ef41Sopenharmony_ci    min = 10 * min + next;
13131cb0ef41Sopenharmony_ci    Advance();
13141cb0ef41Sopenharmony_ci  }
13151cb0ef41Sopenharmony_ci  int max = 0;
13161cb0ef41Sopenharmony_ci  if (current() == '}') {
13171cb0ef41Sopenharmony_ci    max = min;
13181cb0ef41Sopenharmony_ci    Advance();
13191cb0ef41Sopenharmony_ci  } else if (current() == ',') {
13201cb0ef41Sopenharmony_ci    Advance();
13211cb0ef41Sopenharmony_ci    if (current() == '}') {
13221cb0ef41Sopenharmony_ci      max = RegExpTree::kInfinity;
13231cb0ef41Sopenharmony_ci      Advance();
13241cb0ef41Sopenharmony_ci    } else {
13251cb0ef41Sopenharmony_ci      while (IsDecimalDigit(current())) {
13261cb0ef41Sopenharmony_ci        int next = current() - '0';
13271cb0ef41Sopenharmony_ci        if (max > (RegExpTree::kInfinity - next) / 10) {
13281cb0ef41Sopenharmony_ci          do {
13291cb0ef41Sopenharmony_ci            Advance();
13301cb0ef41Sopenharmony_ci          } while (IsDecimalDigit(current()));
13311cb0ef41Sopenharmony_ci          max = RegExpTree::kInfinity;
13321cb0ef41Sopenharmony_ci          break;
13331cb0ef41Sopenharmony_ci        }
13341cb0ef41Sopenharmony_ci        max = 10 * max + next;
13351cb0ef41Sopenharmony_ci        Advance();
13361cb0ef41Sopenharmony_ci      }
13371cb0ef41Sopenharmony_ci      if (current() != '}') {
13381cb0ef41Sopenharmony_ci        Reset(start);
13391cb0ef41Sopenharmony_ci        return false;
13401cb0ef41Sopenharmony_ci      }
13411cb0ef41Sopenharmony_ci      Advance();
13421cb0ef41Sopenharmony_ci    }
13431cb0ef41Sopenharmony_ci  } else {
13441cb0ef41Sopenharmony_ci    Reset(start);
13451cb0ef41Sopenharmony_ci    return false;
13461cb0ef41Sopenharmony_ci  }
13471cb0ef41Sopenharmony_ci  *min_out = min;
13481cb0ef41Sopenharmony_ci  *max_out = max;
13491cb0ef41Sopenharmony_ci  return true;
13501cb0ef41Sopenharmony_ci}
13511cb0ef41Sopenharmony_ci
13521cb0ef41Sopenharmony_citemplate <class CharT>
13531cb0ef41Sopenharmony_cibase::uc32 RegExpParserImpl<CharT>::ParseOctalLiteral() {
13541cb0ef41Sopenharmony_ci  DCHECK(('0' <= current() && current() <= '7') || current() == kEndMarker);
13551cb0ef41Sopenharmony_ci  // For compatibility with some other browsers (not all), we parse
13561cb0ef41Sopenharmony_ci  // up to three octal digits with a value below 256.
13571cb0ef41Sopenharmony_ci  // ES#prod-annexB-LegacyOctalEscapeSequence
13581cb0ef41Sopenharmony_ci  base::uc32 value = current() - '0';
13591cb0ef41Sopenharmony_ci  Advance();
13601cb0ef41Sopenharmony_ci  if ('0' <= current() && current() <= '7') {
13611cb0ef41Sopenharmony_ci    value = value * 8 + current() - '0';
13621cb0ef41Sopenharmony_ci    Advance();
13631cb0ef41Sopenharmony_ci    if (value < 32 && '0' <= current() && current() <= '7') {
13641cb0ef41Sopenharmony_ci      value = value * 8 + current() - '0';
13651cb0ef41Sopenharmony_ci      Advance();
13661cb0ef41Sopenharmony_ci    }
13671cb0ef41Sopenharmony_ci  }
13681cb0ef41Sopenharmony_ci  return value;
13691cb0ef41Sopenharmony_ci}
13701cb0ef41Sopenharmony_ci
13711cb0ef41Sopenharmony_citemplate <class CharT>
13721cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::ParseHexEscape(int length, base::uc32* value) {
13731cb0ef41Sopenharmony_ci  int start = position();
13741cb0ef41Sopenharmony_ci  base::uc32 val = 0;
13751cb0ef41Sopenharmony_ci  for (int i = 0; i < length; ++i) {
13761cb0ef41Sopenharmony_ci    base::uc32 c = current();
13771cb0ef41Sopenharmony_ci    int d = base::HexValue(c);
13781cb0ef41Sopenharmony_ci    if (d < 0) {
13791cb0ef41Sopenharmony_ci      Reset(start);
13801cb0ef41Sopenharmony_ci      return false;
13811cb0ef41Sopenharmony_ci    }
13821cb0ef41Sopenharmony_ci    val = val * 16 + d;
13831cb0ef41Sopenharmony_ci    Advance();
13841cb0ef41Sopenharmony_ci  }
13851cb0ef41Sopenharmony_ci  *value = val;
13861cb0ef41Sopenharmony_ci  return true;
13871cb0ef41Sopenharmony_ci}
13881cb0ef41Sopenharmony_ci
13891cb0ef41Sopenharmony_ci// This parses RegExpUnicodeEscapeSequence as described in ECMA262.
13901cb0ef41Sopenharmony_citemplate <class CharT>
13911cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::ParseUnicodeEscape(base::uc32* value) {
13921cb0ef41Sopenharmony_ci  // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are
13931cb0ef41Sopenharmony_ci  // allowed). In the latter case, the number of hex digits between { } is
13941cb0ef41Sopenharmony_ci  // arbitrary. \ and u have already been read.
13951cb0ef41Sopenharmony_ci  if (current() == '{' && unicode()) {
13961cb0ef41Sopenharmony_ci    int start = position();
13971cb0ef41Sopenharmony_ci    Advance();
13981cb0ef41Sopenharmony_ci    if (ParseUnlimitedLengthHexNumber(0x10FFFF, value)) {
13991cb0ef41Sopenharmony_ci      if (current() == '}') {
14001cb0ef41Sopenharmony_ci        Advance();
14011cb0ef41Sopenharmony_ci        return true;
14021cb0ef41Sopenharmony_ci      }
14031cb0ef41Sopenharmony_ci    }
14041cb0ef41Sopenharmony_ci    Reset(start);
14051cb0ef41Sopenharmony_ci    return false;
14061cb0ef41Sopenharmony_ci  }
14071cb0ef41Sopenharmony_ci  // \u but no {, or \u{...} escapes not allowed.
14081cb0ef41Sopenharmony_ci  bool result = ParseHexEscape(4, value);
14091cb0ef41Sopenharmony_ci  if (result && unicode() && unibrow::Utf16::IsLeadSurrogate(*value) &&
14101cb0ef41Sopenharmony_ci      current() == '\\') {
14111cb0ef41Sopenharmony_ci    // Attempt to read trail surrogate.
14121cb0ef41Sopenharmony_ci    int start = position();
14131cb0ef41Sopenharmony_ci    if (Next() == 'u') {
14141cb0ef41Sopenharmony_ci      Advance(2);
14151cb0ef41Sopenharmony_ci      base::uc32 trail;
14161cb0ef41Sopenharmony_ci      if (ParseHexEscape(4, &trail) &&
14171cb0ef41Sopenharmony_ci          unibrow::Utf16::IsTrailSurrogate(trail)) {
14181cb0ef41Sopenharmony_ci        *value = unibrow::Utf16::CombineSurrogatePair(
14191cb0ef41Sopenharmony_ci            static_cast<base::uc16>(*value), static_cast<base::uc16>(trail));
14201cb0ef41Sopenharmony_ci        return true;
14211cb0ef41Sopenharmony_ci      }
14221cb0ef41Sopenharmony_ci    }
14231cb0ef41Sopenharmony_ci    Reset(start);
14241cb0ef41Sopenharmony_ci  }
14251cb0ef41Sopenharmony_ci  return result;
14261cb0ef41Sopenharmony_ci}
14271cb0ef41Sopenharmony_ci
14281cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT
14291cb0ef41Sopenharmony_ci
14301cb0ef41Sopenharmony_cinamespace {
14311cb0ef41Sopenharmony_ci
14321cb0ef41Sopenharmony_cibool IsExactPropertyAlias(const char* property_name, UProperty property) {
14331cb0ef41Sopenharmony_ci  const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME);
14341cb0ef41Sopenharmony_ci  if (short_name != nullptr && strcmp(property_name, short_name) == 0)
14351cb0ef41Sopenharmony_ci    return true;
14361cb0ef41Sopenharmony_ci  for (int i = 0;; i++) {
14371cb0ef41Sopenharmony_ci    const char* long_name = u_getPropertyName(
14381cb0ef41Sopenharmony_ci        property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
14391cb0ef41Sopenharmony_ci    if (long_name == nullptr) break;
14401cb0ef41Sopenharmony_ci    if (strcmp(property_name, long_name) == 0) return true;
14411cb0ef41Sopenharmony_ci  }
14421cb0ef41Sopenharmony_ci  return false;
14431cb0ef41Sopenharmony_ci}
14441cb0ef41Sopenharmony_ci
14451cb0ef41Sopenharmony_cibool IsExactPropertyValueAlias(const char* property_value_name,
14461cb0ef41Sopenharmony_ci                               UProperty property, int32_t property_value) {
14471cb0ef41Sopenharmony_ci  const char* short_name =
14481cb0ef41Sopenharmony_ci      u_getPropertyValueName(property, property_value, U_SHORT_PROPERTY_NAME);
14491cb0ef41Sopenharmony_ci  if (short_name != nullptr && strcmp(property_value_name, short_name) == 0) {
14501cb0ef41Sopenharmony_ci    return true;
14511cb0ef41Sopenharmony_ci  }
14521cb0ef41Sopenharmony_ci  for (int i = 0;; i++) {
14531cb0ef41Sopenharmony_ci    const char* long_name = u_getPropertyValueName(
14541cb0ef41Sopenharmony_ci        property, property_value,
14551cb0ef41Sopenharmony_ci        static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
14561cb0ef41Sopenharmony_ci    if (long_name == nullptr) break;
14571cb0ef41Sopenharmony_ci    if (strcmp(property_value_name, long_name) == 0) return true;
14581cb0ef41Sopenharmony_ci  }
14591cb0ef41Sopenharmony_ci  return false;
14601cb0ef41Sopenharmony_ci}
14611cb0ef41Sopenharmony_ci
14621cb0ef41Sopenharmony_cibool LookupPropertyValueName(UProperty property,
14631cb0ef41Sopenharmony_ci                             const char* property_value_name, bool negate,
14641cb0ef41Sopenharmony_ci                             ZoneList<CharacterRange>* result, Zone* zone) {
14651cb0ef41Sopenharmony_ci  UProperty property_for_lookup = property;
14661cb0ef41Sopenharmony_ci  if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) {
14671cb0ef41Sopenharmony_ci    // For the property Script_Extensions, we have to do the property value
14681cb0ef41Sopenharmony_ci    // name lookup as if the property is Script.
14691cb0ef41Sopenharmony_ci    property_for_lookup = UCHAR_SCRIPT;
14701cb0ef41Sopenharmony_ci  }
14711cb0ef41Sopenharmony_ci  int32_t property_value =
14721cb0ef41Sopenharmony_ci      u_getPropertyValueEnum(property_for_lookup, property_value_name);
14731cb0ef41Sopenharmony_ci  if (property_value == UCHAR_INVALID_CODE) return false;
14741cb0ef41Sopenharmony_ci
14751cb0ef41Sopenharmony_ci  // We require the property name to match exactly to one of the property value
14761cb0ef41Sopenharmony_ci  // aliases. However, u_getPropertyValueEnum uses loose matching.
14771cb0ef41Sopenharmony_ci  if (!IsExactPropertyValueAlias(property_value_name, property_for_lookup,
14781cb0ef41Sopenharmony_ci                                 property_value)) {
14791cb0ef41Sopenharmony_ci    return false;
14801cb0ef41Sopenharmony_ci  }
14811cb0ef41Sopenharmony_ci
14821cb0ef41Sopenharmony_ci  UErrorCode ec = U_ZERO_ERROR;
14831cb0ef41Sopenharmony_ci  icu::UnicodeSet set;
14841cb0ef41Sopenharmony_ci  set.applyIntPropertyValue(property, property_value, ec);
14851cb0ef41Sopenharmony_ci  bool success = ec == U_ZERO_ERROR && !set.isEmpty();
14861cb0ef41Sopenharmony_ci
14871cb0ef41Sopenharmony_ci  if (success) {
14881cb0ef41Sopenharmony_ci    set.removeAllStrings();
14891cb0ef41Sopenharmony_ci    if (negate) set.complement();
14901cb0ef41Sopenharmony_ci    for (int i = 0; i < set.getRangeCount(); i++) {
14911cb0ef41Sopenharmony_ci      result->Add(
14921cb0ef41Sopenharmony_ci          CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
14931cb0ef41Sopenharmony_ci          zone);
14941cb0ef41Sopenharmony_ci    }
14951cb0ef41Sopenharmony_ci  }
14961cb0ef41Sopenharmony_ci  return success;
14971cb0ef41Sopenharmony_ci}
14981cb0ef41Sopenharmony_ci
14991cb0ef41Sopenharmony_citemplate <size_t N>
15001cb0ef41Sopenharmony_ciinline bool NameEquals(const char* name, const char (&literal)[N]) {
15011cb0ef41Sopenharmony_ci  return strncmp(name, literal, N + 1) == 0;
15021cb0ef41Sopenharmony_ci}
15031cb0ef41Sopenharmony_ci
15041cb0ef41Sopenharmony_cibool LookupSpecialPropertyValueName(const char* name,
15051cb0ef41Sopenharmony_ci                                    ZoneList<CharacterRange>* result,
15061cb0ef41Sopenharmony_ci                                    bool negate, Zone* zone) {
15071cb0ef41Sopenharmony_ci  if (NameEquals(name, "Any")) {
15081cb0ef41Sopenharmony_ci    if (negate) {
15091cb0ef41Sopenharmony_ci      // Leave the list of character ranges empty, since the negation of 'Any'
15101cb0ef41Sopenharmony_ci      // is the empty set.
15111cb0ef41Sopenharmony_ci    } else {
15121cb0ef41Sopenharmony_ci      result->Add(CharacterRange::Everything(), zone);
15131cb0ef41Sopenharmony_ci    }
15141cb0ef41Sopenharmony_ci  } else if (NameEquals(name, "ASCII")) {
15151cb0ef41Sopenharmony_ci    result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint)
15161cb0ef41Sopenharmony_ci                       : CharacterRange::Range(0x0, 0x7F),
15171cb0ef41Sopenharmony_ci                zone);
15181cb0ef41Sopenharmony_ci  } else if (NameEquals(name, "Assigned")) {
15191cb0ef41Sopenharmony_ci    return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned",
15201cb0ef41Sopenharmony_ci                                   !negate, result, zone);
15211cb0ef41Sopenharmony_ci  } else {
15221cb0ef41Sopenharmony_ci    return false;
15231cb0ef41Sopenharmony_ci  }
15241cb0ef41Sopenharmony_ci  return true;
15251cb0ef41Sopenharmony_ci}
15261cb0ef41Sopenharmony_ci
15271cb0ef41Sopenharmony_ci// Explicitly allowlist supported binary properties. The spec forbids supporting
15281cb0ef41Sopenharmony_ci// properties outside of this set to ensure interoperability.
15291cb0ef41Sopenharmony_cibool IsSupportedBinaryProperty(UProperty property) {
15301cb0ef41Sopenharmony_ci  switch (property) {
15311cb0ef41Sopenharmony_ci    case UCHAR_ALPHABETIC:
15321cb0ef41Sopenharmony_ci    // 'Any' is not supported by ICU. See LookupSpecialPropertyValueName.
15331cb0ef41Sopenharmony_ci    // 'ASCII' is not supported by ICU. See LookupSpecialPropertyValueName.
15341cb0ef41Sopenharmony_ci    case UCHAR_ASCII_HEX_DIGIT:
15351cb0ef41Sopenharmony_ci    // 'Assigned' is not supported by ICU. See LookupSpecialPropertyValueName.
15361cb0ef41Sopenharmony_ci    case UCHAR_BIDI_CONTROL:
15371cb0ef41Sopenharmony_ci    case UCHAR_BIDI_MIRRORED:
15381cb0ef41Sopenharmony_ci    case UCHAR_CASE_IGNORABLE:
15391cb0ef41Sopenharmony_ci    case UCHAR_CASED:
15401cb0ef41Sopenharmony_ci    case UCHAR_CHANGES_WHEN_CASEFOLDED:
15411cb0ef41Sopenharmony_ci    case UCHAR_CHANGES_WHEN_CASEMAPPED:
15421cb0ef41Sopenharmony_ci    case UCHAR_CHANGES_WHEN_LOWERCASED:
15431cb0ef41Sopenharmony_ci    case UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED:
15441cb0ef41Sopenharmony_ci    case UCHAR_CHANGES_WHEN_TITLECASED:
15451cb0ef41Sopenharmony_ci    case UCHAR_CHANGES_WHEN_UPPERCASED:
15461cb0ef41Sopenharmony_ci    case UCHAR_DASH:
15471cb0ef41Sopenharmony_ci    case UCHAR_DEFAULT_IGNORABLE_CODE_POINT:
15481cb0ef41Sopenharmony_ci    case UCHAR_DEPRECATED:
15491cb0ef41Sopenharmony_ci    case UCHAR_DIACRITIC:
15501cb0ef41Sopenharmony_ci    case UCHAR_EMOJI:
15511cb0ef41Sopenharmony_ci    case UCHAR_EMOJI_COMPONENT:
15521cb0ef41Sopenharmony_ci    case UCHAR_EMOJI_MODIFIER_BASE:
15531cb0ef41Sopenharmony_ci    case UCHAR_EMOJI_MODIFIER:
15541cb0ef41Sopenharmony_ci    case UCHAR_EMOJI_PRESENTATION:
15551cb0ef41Sopenharmony_ci    case UCHAR_EXTENDED_PICTOGRAPHIC:
15561cb0ef41Sopenharmony_ci    case UCHAR_EXTENDER:
15571cb0ef41Sopenharmony_ci    case UCHAR_GRAPHEME_BASE:
15581cb0ef41Sopenharmony_ci    case UCHAR_GRAPHEME_EXTEND:
15591cb0ef41Sopenharmony_ci    case UCHAR_HEX_DIGIT:
15601cb0ef41Sopenharmony_ci    case UCHAR_ID_CONTINUE:
15611cb0ef41Sopenharmony_ci    case UCHAR_ID_START:
15621cb0ef41Sopenharmony_ci    case UCHAR_IDEOGRAPHIC:
15631cb0ef41Sopenharmony_ci    case UCHAR_IDS_BINARY_OPERATOR:
15641cb0ef41Sopenharmony_ci    case UCHAR_IDS_TRINARY_OPERATOR:
15651cb0ef41Sopenharmony_ci    case UCHAR_JOIN_CONTROL:
15661cb0ef41Sopenharmony_ci    case UCHAR_LOGICAL_ORDER_EXCEPTION:
15671cb0ef41Sopenharmony_ci    case UCHAR_LOWERCASE:
15681cb0ef41Sopenharmony_ci    case UCHAR_MATH:
15691cb0ef41Sopenharmony_ci    case UCHAR_NONCHARACTER_CODE_POINT:
15701cb0ef41Sopenharmony_ci    case UCHAR_PATTERN_SYNTAX:
15711cb0ef41Sopenharmony_ci    case UCHAR_PATTERN_WHITE_SPACE:
15721cb0ef41Sopenharmony_ci    case UCHAR_QUOTATION_MARK:
15731cb0ef41Sopenharmony_ci    case UCHAR_RADICAL:
15741cb0ef41Sopenharmony_ci    case UCHAR_REGIONAL_INDICATOR:
15751cb0ef41Sopenharmony_ci    case UCHAR_S_TERM:
15761cb0ef41Sopenharmony_ci    case UCHAR_SOFT_DOTTED:
15771cb0ef41Sopenharmony_ci    case UCHAR_TERMINAL_PUNCTUATION:
15781cb0ef41Sopenharmony_ci    case UCHAR_UNIFIED_IDEOGRAPH:
15791cb0ef41Sopenharmony_ci    case UCHAR_UPPERCASE:
15801cb0ef41Sopenharmony_ci    case UCHAR_VARIATION_SELECTOR:
15811cb0ef41Sopenharmony_ci    case UCHAR_WHITE_SPACE:
15821cb0ef41Sopenharmony_ci    case UCHAR_XID_CONTINUE:
15831cb0ef41Sopenharmony_ci    case UCHAR_XID_START:
15841cb0ef41Sopenharmony_ci      return true;
15851cb0ef41Sopenharmony_ci    default:
15861cb0ef41Sopenharmony_ci      break;
15871cb0ef41Sopenharmony_ci  }
15881cb0ef41Sopenharmony_ci  return false;
15891cb0ef41Sopenharmony_ci}
15901cb0ef41Sopenharmony_ci
15911cb0ef41Sopenharmony_cibool IsUnicodePropertyValueCharacter(char c) {
15921cb0ef41Sopenharmony_ci  // https://tc39.github.io/proposal-regexp-unicode-property-escapes/
15931cb0ef41Sopenharmony_ci  //
15941cb0ef41Sopenharmony_ci  // Note that using this to validate each parsed char is quite conservative.
15951cb0ef41Sopenharmony_ci  // A possible alternative solution would be to only ensure the parsed
15961cb0ef41Sopenharmony_ci  // property name/value candidate string does not contain '\0' characters and
15971cb0ef41Sopenharmony_ci  // let ICU lookups trigger the final failure.
15981cb0ef41Sopenharmony_ci  if ('a' <= c && c <= 'z') return true;
15991cb0ef41Sopenharmony_ci  if ('A' <= c && c <= 'Z') return true;
16001cb0ef41Sopenharmony_ci  if ('0' <= c && c <= '9') return true;
16011cb0ef41Sopenharmony_ci  return (c == '_');
16021cb0ef41Sopenharmony_ci}
16031cb0ef41Sopenharmony_ci
16041cb0ef41Sopenharmony_ci}  // namespace
16051cb0ef41Sopenharmony_ci
16061cb0ef41Sopenharmony_citemplate <class CharT>
16071cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::ParsePropertyClassName(ZoneVector<char>* name_1,
16081cb0ef41Sopenharmony_ci                                                     ZoneVector<char>* name_2) {
16091cb0ef41Sopenharmony_ci  DCHECK(name_1->empty());
16101cb0ef41Sopenharmony_ci  DCHECK(name_2->empty());
16111cb0ef41Sopenharmony_ci  // Parse the property class as follows:
16121cb0ef41Sopenharmony_ci  // - In \p{name}, 'name' is interpreted
16131cb0ef41Sopenharmony_ci  //   - either as a general category property value name.
16141cb0ef41Sopenharmony_ci  //   - or as a binary property name.
16151cb0ef41Sopenharmony_ci  // - In \p{name=value}, 'name' is interpreted as an enumerated property name,
16161cb0ef41Sopenharmony_ci  //   and 'value' is interpreted as one of the available property value names.
16171cb0ef41Sopenharmony_ci  // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used.
16181cb0ef41Sopenharmony_ci  // - Loose matching is not applied.
16191cb0ef41Sopenharmony_ci  if (current() == '{') {
16201cb0ef41Sopenharmony_ci    // Parse \p{[PropertyName=]PropertyNameValue}
16211cb0ef41Sopenharmony_ci    for (Advance(); current() != '}' && current() != '='; Advance()) {
16221cb0ef41Sopenharmony_ci      if (!IsUnicodePropertyValueCharacter(current())) return false;
16231cb0ef41Sopenharmony_ci      if (!has_next()) return false;
16241cb0ef41Sopenharmony_ci      name_1->push_back(static_cast<char>(current()));
16251cb0ef41Sopenharmony_ci    }
16261cb0ef41Sopenharmony_ci    if (current() == '=') {
16271cb0ef41Sopenharmony_ci      for (Advance(); current() != '}'; Advance()) {
16281cb0ef41Sopenharmony_ci        if (!IsUnicodePropertyValueCharacter(current())) return false;
16291cb0ef41Sopenharmony_ci        if (!has_next()) return false;
16301cb0ef41Sopenharmony_ci        name_2->push_back(static_cast<char>(current()));
16311cb0ef41Sopenharmony_ci      }
16321cb0ef41Sopenharmony_ci      name_2->push_back(0);  // null-terminate string.
16331cb0ef41Sopenharmony_ci    }
16341cb0ef41Sopenharmony_ci  } else {
16351cb0ef41Sopenharmony_ci    return false;
16361cb0ef41Sopenharmony_ci  }
16371cb0ef41Sopenharmony_ci  Advance();
16381cb0ef41Sopenharmony_ci  name_1->push_back(0);  // null-terminate string.
16391cb0ef41Sopenharmony_ci
16401cb0ef41Sopenharmony_ci  DCHECK(name_1->size() - 1 == std::strlen(name_1->data()));
16411cb0ef41Sopenharmony_ci  DCHECK(name_2->empty() || name_2->size() - 1 == std::strlen(name_2->data()));
16421cb0ef41Sopenharmony_ci  return true;
16431cb0ef41Sopenharmony_ci}
16441cb0ef41Sopenharmony_ci
16451cb0ef41Sopenharmony_citemplate <class CharT>
16461cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::AddPropertyClassRange(
16471cb0ef41Sopenharmony_ci    ZoneList<CharacterRange>* add_to, bool negate,
16481cb0ef41Sopenharmony_ci    const ZoneVector<char>& name_1, const ZoneVector<char>& name_2) {
16491cb0ef41Sopenharmony_ci  if (name_2.empty()) {
16501cb0ef41Sopenharmony_ci    // First attempt to interpret as general category property value name.
16511cb0ef41Sopenharmony_ci    const char* name = name_1.data();
16521cb0ef41Sopenharmony_ci    if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate,
16531cb0ef41Sopenharmony_ci                                add_to, zone())) {
16541cb0ef41Sopenharmony_ci      return true;
16551cb0ef41Sopenharmony_ci    }
16561cb0ef41Sopenharmony_ci    // Interpret "Any", "ASCII", and "Assigned".
16571cb0ef41Sopenharmony_ci    if (LookupSpecialPropertyValueName(name, add_to, negate, zone())) {
16581cb0ef41Sopenharmony_ci      return true;
16591cb0ef41Sopenharmony_ci    }
16601cb0ef41Sopenharmony_ci    // Then attempt to interpret as binary property name with value name 'Y'.
16611cb0ef41Sopenharmony_ci    UProperty property = u_getPropertyEnum(name);
16621cb0ef41Sopenharmony_ci    if (!IsSupportedBinaryProperty(property)) return false;
16631cb0ef41Sopenharmony_ci    if (!IsExactPropertyAlias(name, property)) return false;
16641cb0ef41Sopenharmony_ci    return LookupPropertyValueName(property, negate ? "N" : "Y", false, add_to,
16651cb0ef41Sopenharmony_ci                                   zone());
16661cb0ef41Sopenharmony_ci  } else {
16671cb0ef41Sopenharmony_ci    // Both property name and value name are specified. Attempt to interpret
16681cb0ef41Sopenharmony_ci    // the property name as enumerated property.
16691cb0ef41Sopenharmony_ci    const char* property_name = name_1.data();
16701cb0ef41Sopenharmony_ci    const char* value_name = name_2.data();
16711cb0ef41Sopenharmony_ci    UProperty property = u_getPropertyEnum(property_name);
16721cb0ef41Sopenharmony_ci    if (!IsExactPropertyAlias(property_name, property)) return false;
16731cb0ef41Sopenharmony_ci    if (property == UCHAR_GENERAL_CATEGORY) {
16741cb0ef41Sopenharmony_ci      // We want to allow aggregate value names such as "Letter".
16751cb0ef41Sopenharmony_ci      property = UCHAR_GENERAL_CATEGORY_MASK;
16761cb0ef41Sopenharmony_ci    } else if (property != UCHAR_SCRIPT &&
16771cb0ef41Sopenharmony_ci               property != UCHAR_SCRIPT_EXTENSIONS) {
16781cb0ef41Sopenharmony_ci      return false;
16791cb0ef41Sopenharmony_ci    }
16801cb0ef41Sopenharmony_ci    return LookupPropertyValueName(property, value_name, negate, add_to,
16811cb0ef41Sopenharmony_ci                                   zone());
16821cb0ef41Sopenharmony_ci  }
16831cb0ef41Sopenharmony_ci}
16841cb0ef41Sopenharmony_ci
16851cb0ef41Sopenharmony_ci#else  // V8_INTL_SUPPORT
16861cb0ef41Sopenharmony_ci
16871cb0ef41Sopenharmony_citemplate <class CharT>
16881cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::ParsePropertyClassName(ZoneVector<char>* name_1,
16891cb0ef41Sopenharmony_ci                                                     ZoneVector<char>* name_2) {
16901cb0ef41Sopenharmony_ci  return false;
16911cb0ef41Sopenharmony_ci}
16921cb0ef41Sopenharmony_ci
16931cb0ef41Sopenharmony_citemplate <class CharT>
16941cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::AddPropertyClassRange(
16951cb0ef41Sopenharmony_ci    ZoneList<CharacterRange>* add_to, bool negate,
16961cb0ef41Sopenharmony_ci    const ZoneVector<char>& name_1, const ZoneVector<char>& name_2) {
16971cb0ef41Sopenharmony_ci  return false;
16981cb0ef41Sopenharmony_ci}
16991cb0ef41Sopenharmony_ci
17001cb0ef41Sopenharmony_ci#endif  // V8_INTL_SUPPORT
17011cb0ef41Sopenharmony_ci
17021cb0ef41Sopenharmony_citemplate <class CharT>
17031cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::ParseUnlimitedLengthHexNumber(int max_value,
17041cb0ef41Sopenharmony_ci                                                            base::uc32* value) {
17051cb0ef41Sopenharmony_ci  base::uc32 x = 0;
17061cb0ef41Sopenharmony_ci  int d = base::HexValue(current());
17071cb0ef41Sopenharmony_ci  if (d < 0) {
17081cb0ef41Sopenharmony_ci    return false;
17091cb0ef41Sopenharmony_ci  }
17101cb0ef41Sopenharmony_ci  while (d >= 0) {
17111cb0ef41Sopenharmony_ci    x = x * 16 + d;
17121cb0ef41Sopenharmony_ci    if (x > static_cast<base::uc32>(max_value)) {
17131cb0ef41Sopenharmony_ci      return false;
17141cb0ef41Sopenharmony_ci    }
17151cb0ef41Sopenharmony_ci    Advance();
17161cb0ef41Sopenharmony_ci    d = base::HexValue(current());
17171cb0ef41Sopenharmony_ci  }
17181cb0ef41Sopenharmony_ci  *value = x;
17191cb0ef41Sopenharmony_ci  return true;
17201cb0ef41Sopenharmony_ci}
17211cb0ef41Sopenharmony_ci
17221cb0ef41Sopenharmony_ci// https://tc39.es/ecma262/#prod-CharacterEscape
17231cb0ef41Sopenharmony_citemplate <class CharT>
17241cb0ef41Sopenharmony_cibase::uc32 RegExpParserImpl<CharT>::ParseCharacterEscape(
17251cb0ef41Sopenharmony_ci    InClassEscapeState in_class_escape_state,
17261cb0ef41Sopenharmony_ci    bool* is_escaped_unicode_character) {
17271cb0ef41Sopenharmony_ci  DCHECK_EQ('\\', current());
17281cb0ef41Sopenharmony_ci  DCHECK(has_next() && !IsSpecialClassEscape(Next()));
17291cb0ef41Sopenharmony_ci
17301cb0ef41Sopenharmony_ci  Advance();
17311cb0ef41Sopenharmony_ci
17321cb0ef41Sopenharmony_ci  const base::uc32 c = current();
17331cb0ef41Sopenharmony_ci  switch (c) {
17341cb0ef41Sopenharmony_ci    // CharacterEscape ::
17351cb0ef41Sopenharmony_ci    //   ControlEscape :: one of
17361cb0ef41Sopenharmony_ci    //     f n r t v
17371cb0ef41Sopenharmony_ci    case 'f':
17381cb0ef41Sopenharmony_ci      Advance();
17391cb0ef41Sopenharmony_ci      return '\f';
17401cb0ef41Sopenharmony_ci    case 'n':
17411cb0ef41Sopenharmony_ci      Advance();
17421cb0ef41Sopenharmony_ci      return '\n';
17431cb0ef41Sopenharmony_ci    case 'r':
17441cb0ef41Sopenharmony_ci      Advance();
17451cb0ef41Sopenharmony_ci      return '\r';
17461cb0ef41Sopenharmony_ci    case 't':
17471cb0ef41Sopenharmony_ci      Advance();
17481cb0ef41Sopenharmony_ci      return '\t';
17491cb0ef41Sopenharmony_ci    case 'v':
17501cb0ef41Sopenharmony_ci      Advance();
17511cb0ef41Sopenharmony_ci      return '\v';
17521cb0ef41Sopenharmony_ci    // CharacterEscape ::
17531cb0ef41Sopenharmony_ci    //   c ControlLetter
17541cb0ef41Sopenharmony_ci    case 'c': {
17551cb0ef41Sopenharmony_ci      base::uc32 controlLetter = Next();
17561cb0ef41Sopenharmony_ci      base::uc32 letter = controlLetter & ~('A' ^ 'a');
17571cb0ef41Sopenharmony_ci      if (letter >= 'A' && letter <= 'Z') {
17581cb0ef41Sopenharmony_ci        Advance(2);
17591cb0ef41Sopenharmony_ci        // Control letters mapped to ASCII control characters in the range
17601cb0ef41Sopenharmony_ci        // 0x00-0x1F.
17611cb0ef41Sopenharmony_ci        return controlLetter & 0x1F;
17621cb0ef41Sopenharmony_ci      }
17631cb0ef41Sopenharmony_ci      if (unicode()) {
17641cb0ef41Sopenharmony_ci        // With /u, invalid escapes are not treated as identity escapes.
17651cb0ef41Sopenharmony_ci        ReportError(RegExpError::kInvalidUnicodeEscape);
17661cb0ef41Sopenharmony_ci        return 0;
17671cb0ef41Sopenharmony_ci      }
17681cb0ef41Sopenharmony_ci      if (in_class_escape_state == InClassEscapeState::kInClass) {
17691cb0ef41Sopenharmony_ci        // Inside a character class, we also accept digits and underscore as
17701cb0ef41Sopenharmony_ci        // control characters, unless with /u. See Annex B:
17711cb0ef41Sopenharmony_ci        // ES#prod-annexB-ClassControlLetter
17721cb0ef41Sopenharmony_ci        if ((controlLetter >= '0' && controlLetter <= '9') ||
17731cb0ef41Sopenharmony_ci            controlLetter == '_') {
17741cb0ef41Sopenharmony_ci          Advance(2);
17751cb0ef41Sopenharmony_ci          return controlLetter & 0x1F;
17761cb0ef41Sopenharmony_ci        }
17771cb0ef41Sopenharmony_ci      }
17781cb0ef41Sopenharmony_ci      // We match JSC in reading the backslash as a literal
17791cb0ef41Sopenharmony_ci      // character instead of as starting an escape.
17801cb0ef41Sopenharmony_ci      return '\\';
17811cb0ef41Sopenharmony_ci    }
17821cb0ef41Sopenharmony_ci    // CharacterEscape ::
17831cb0ef41Sopenharmony_ci    //   0 [lookahead ∉ DecimalDigit]
17841cb0ef41Sopenharmony_ci    //   [~UnicodeMode] LegacyOctalEscapeSequence
17851cb0ef41Sopenharmony_ci    case '0':
17861cb0ef41Sopenharmony_ci      // \0 is interpreted as NUL if not followed by another digit.
17871cb0ef41Sopenharmony_ci      if (Next() < '0' || Next() > '9') {
17881cb0ef41Sopenharmony_ci        Advance();
17891cb0ef41Sopenharmony_ci        return 0;
17901cb0ef41Sopenharmony_ci      }
17911cb0ef41Sopenharmony_ci      V8_FALLTHROUGH;
17921cb0ef41Sopenharmony_ci    case '1':
17931cb0ef41Sopenharmony_ci    case '2':
17941cb0ef41Sopenharmony_ci    case '3':
17951cb0ef41Sopenharmony_ci    case '4':
17961cb0ef41Sopenharmony_ci    case '5':
17971cb0ef41Sopenharmony_ci    case '6':
17981cb0ef41Sopenharmony_ci    case '7':
17991cb0ef41Sopenharmony_ci      // For compatibility, we interpret a decimal escape that isn't
18001cb0ef41Sopenharmony_ci      // a back reference (and therefore either \0 or not valid according
18011cb0ef41Sopenharmony_ci      // to the specification) as a 1..3 digit octal character code.
18021cb0ef41Sopenharmony_ci      // ES#prod-annexB-LegacyOctalEscapeSequence
18031cb0ef41Sopenharmony_ci      if (unicode()) {
18041cb0ef41Sopenharmony_ci        // With /u, decimal escape is not interpreted as octal character code.
18051cb0ef41Sopenharmony_ci        ReportError(RegExpError::kInvalidClassEscape);
18061cb0ef41Sopenharmony_ci        return 0;
18071cb0ef41Sopenharmony_ci      }
18081cb0ef41Sopenharmony_ci      return ParseOctalLiteral();
18091cb0ef41Sopenharmony_ci    // CharacterEscape ::
18101cb0ef41Sopenharmony_ci    //   HexEscapeSequence
18111cb0ef41Sopenharmony_ci    case 'x': {
18121cb0ef41Sopenharmony_ci      Advance();
18131cb0ef41Sopenharmony_ci      base::uc32 value;
18141cb0ef41Sopenharmony_ci      if (ParseHexEscape(2, &value)) return value;
18151cb0ef41Sopenharmony_ci      if (unicode()) {
18161cb0ef41Sopenharmony_ci        // With /u, invalid escapes are not treated as identity escapes.
18171cb0ef41Sopenharmony_ci        ReportError(RegExpError::kInvalidEscape);
18181cb0ef41Sopenharmony_ci        return 0;
18191cb0ef41Sopenharmony_ci      }
18201cb0ef41Sopenharmony_ci      // If \x is not followed by a two-digit hexadecimal, treat it
18211cb0ef41Sopenharmony_ci      // as an identity escape.
18221cb0ef41Sopenharmony_ci      return 'x';
18231cb0ef41Sopenharmony_ci    }
18241cb0ef41Sopenharmony_ci    // CharacterEscape ::
18251cb0ef41Sopenharmony_ci    //   RegExpUnicodeEscapeSequence [?UnicodeMode]
18261cb0ef41Sopenharmony_ci    case 'u': {
18271cb0ef41Sopenharmony_ci      Advance();
18281cb0ef41Sopenharmony_ci      base::uc32 value;
18291cb0ef41Sopenharmony_ci      if (ParseUnicodeEscape(&value)) {
18301cb0ef41Sopenharmony_ci        *is_escaped_unicode_character = true;
18311cb0ef41Sopenharmony_ci        return value;
18321cb0ef41Sopenharmony_ci      }
18331cb0ef41Sopenharmony_ci      if (unicode()) {
18341cb0ef41Sopenharmony_ci        // With /u, invalid escapes are not treated as identity escapes.
18351cb0ef41Sopenharmony_ci        ReportError(RegExpError::kInvalidUnicodeEscape);
18361cb0ef41Sopenharmony_ci        return 0;
18371cb0ef41Sopenharmony_ci      }
18381cb0ef41Sopenharmony_ci      // If \u is not followed by a two-digit hexadecimal, treat it
18391cb0ef41Sopenharmony_ci      // as an identity escape.
18401cb0ef41Sopenharmony_ci      return 'u';
18411cb0ef41Sopenharmony_ci    }
18421cb0ef41Sopenharmony_ci    default:
18431cb0ef41Sopenharmony_ci      break;
18441cb0ef41Sopenharmony_ci  }
18451cb0ef41Sopenharmony_ci
18461cb0ef41Sopenharmony_ci  // CharacterEscape ::
18471cb0ef41Sopenharmony_ci  //   IdentityEscape[?UnicodeMode, ?N]
18481cb0ef41Sopenharmony_ci  //
18491cb0ef41Sopenharmony_ci  // * With /u, no identity escapes except for syntax characters are
18501cb0ef41Sopenharmony_ci  //   allowed.
18511cb0ef41Sopenharmony_ci  // * Without /u:
18521cb0ef41Sopenharmony_ci  //   * '\c' is not an IdentityEscape.
18531cb0ef41Sopenharmony_ci  //   * '\k' is not an IdentityEscape when named captures exist.
18541cb0ef41Sopenharmony_ci  //   * Otherwise, all identity escapes are allowed.
18551cb0ef41Sopenharmony_ci  if (unicode()) {
18561cb0ef41Sopenharmony_ci    if (!IsSyntaxCharacterOrSlash(c)) {
18571cb0ef41Sopenharmony_ci      ReportError(RegExpError::kInvalidEscape);
18581cb0ef41Sopenharmony_ci      return 0;
18591cb0ef41Sopenharmony_ci    }
18601cb0ef41Sopenharmony_ci    Advance();
18611cb0ef41Sopenharmony_ci    return c;
18621cb0ef41Sopenharmony_ci  }
18631cb0ef41Sopenharmony_ci  DCHECK(!unicode());
18641cb0ef41Sopenharmony_ci  if (c == 'c') {
18651cb0ef41Sopenharmony_ci    ReportError(RegExpError::kInvalidEscape);
18661cb0ef41Sopenharmony_ci    return 0;
18671cb0ef41Sopenharmony_ci  }
18681cb0ef41Sopenharmony_ci  Advance();
18691cb0ef41Sopenharmony_ci  // Note: It's important to Advance before the HasNamedCaptures call s.t. we
18701cb0ef41Sopenharmony_ci  // don't start scanning in the middle of an escape.
18711cb0ef41Sopenharmony_ci  if (c == 'k' && HasNamedCaptures(in_class_escape_state)) {
18721cb0ef41Sopenharmony_ci    ReportError(RegExpError::kInvalidEscape);
18731cb0ef41Sopenharmony_ci    return 0;
18741cb0ef41Sopenharmony_ci  }
18751cb0ef41Sopenharmony_ci  return c;
18761cb0ef41Sopenharmony_ci}
18771cb0ef41Sopenharmony_ci
18781cb0ef41Sopenharmony_ci// https://tc39.es/ecma262/#prod-ClassEscape
18791cb0ef41Sopenharmony_citemplate <class CharT>
18801cb0ef41Sopenharmony_civoid RegExpParserImpl<CharT>::ParseClassEscape(
18811cb0ef41Sopenharmony_ci    ZoneList<CharacterRange>* ranges, Zone* zone,
18821cb0ef41Sopenharmony_ci    bool add_unicode_case_equivalents, base::uc32* char_out,
18831cb0ef41Sopenharmony_ci    bool* is_class_escape) {
18841cb0ef41Sopenharmony_ci  *is_class_escape = false;
18851cb0ef41Sopenharmony_ci
18861cb0ef41Sopenharmony_ci  if (current() != '\\') {
18871cb0ef41Sopenharmony_ci    // Not a ClassEscape.
18881cb0ef41Sopenharmony_ci    *char_out = current();
18891cb0ef41Sopenharmony_ci    Advance();
18901cb0ef41Sopenharmony_ci    return;
18911cb0ef41Sopenharmony_ci  }
18921cb0ef41Sopenharmony_ci
18931cb0ef41Sopenharmony_ci  const base::uc32 next = Next();
18941cb0ef41Sopenharmony_ci  switch (next) {
18951cb0ef41Sopenharmony_ci    case 'b':
18961cb0ef41Sopenharmony_ci      *char_out = '\b';
18971cb0ef41Sopenharmony_ci      Advance(2);
18981cb0ef41Sopenharmony_ci      return;
18991cb0ef41Sopenharmony_ci    case '-':
19001cb0ef41Sopenharmony_ci      if (unicode()) {
19011cb0ef41Sopenharmony_ci        *char_out = next;
19021cb0ef41Sopenharmony_ci        Advance(2);
19031cb0ef41Sopenharmony_ci        return;
19041cb0ef41Sopenharmony_ci      }
19051cb0ef41Sopenharmony_ci      break;
19061cb0ef41Sopenharmony_ci    case kEndMarker:
19071cb0ef41Sopenharmony_ci      ReportError(RegExpError::kEscapeAtEndOfPattern);
19081cb0ef41Sopenharmony_ci      return;
19091cb0ef41Sopenharmony_ci    default:
19101cb0ef41Sopenharmony_ci      break;
19111cb0ef41Sopenharmony_ci  }
19121cb0ef41Sopenharmony_ci
19131cb0ef41Sopenharmony_ci  static constexpr InClassEscapeState kInClassEscape =
19141cb0ef41Sopenharmony_ci      InClassEscapeState::kInClass;
19151cb0ef41Sopenharmony_ci  *is_class_escape = TryParseCharacterClassEscape(
19161cb0ef41Sopenharmony_ci      next, kInClassEscape, ranges, zone, add_unicode_case_equivalents);
19171cb0ef41Sopenharmony_ci  if (*is_class_escape) return;
19181cb0ef41Sopenharmony_ci
19191cb0ef41Sopenharmony_ci  bool dummy = false;  // Unused.
19201cb0ef41Sopenharmony_ci  *char_out = ParseCharacterEscape(kInClassEscape, &dummy);
19211cb0ef41Sopenharmony_ci}
19221cb0ef41Sopenharmony_ci
19231cb0ef41Sopenharmony_ci// https://tc39.es/ecma262/#prod-CharacterClassEscape
19241cb0ef41Sopenharmony_citemplate <class CharT>
19251cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::TryParseCharacterClassEscape(
19261cb0ef41Sopenharmony_ci    base::uc32 next, InClassEscapeState in_class_escape_state,
19271cb0ef41Sopenharmony_ci    ZoneList<CharacterRange>* ranges, Zone* zone,
19281cb0ef41Sopenharmony_ci    bool add_unicode_case_equivalents) {
19291cb0ef41Sopenharmony_ci  DCHECK_EQ(current(), '\\');
19301cb0ef41Sopenharmony_ci  DCHECK_EQ(Next(), next);
19311cb0ef41Sopenharmony_ci
19321cb0ef41Sopenharmony_ci  switch (next) {
19331cb0ef41Sopenharmony_ci    case 'd':
19341cb0ef41Sopenharmony_ci    case 'D':
19351cb0ef41Sopenharmony_ci    case 's':
19361cb0ef41Sopenharmony_ci    case 'S':
19371cb0ef41Sopenharmony_ci    case 'w':
19381cb0ef41Sopenharmony_ci    case 'W':
19391cb0ef41Sopenharmony_ci      CharacterRange::AddClassEscape(static_cast<StandardCharacterSet>(next),
19401cb0ef41Sopenharmony_ci                                     ranges, add_unicode_case_equivalents,
19411cb0ef41Sopenharmony_ci                                     zone);
19421cb0ef41Sopenharmony_ci      Advance(2);
19431cb0ef41Sopenharmony_ci      return true;
19441cb0ef41Sopenharmony_ci    case 'p':
19451cb0ef41Sopenharmony_ci    case 'P': {
19461cb0ef41Sopenharmony_ci      if (!unicode()) return false;
19471cb0ef41Sopenharmony_ci      bool negate = next == 'P';
19481cb0ef41Sopenharmony_ci      Advance(2);
19491cb0ef41Sopenharmony_ci      ZoneVector<char> name_1(zone);
19501cb0ef41Sopenharmony_ci      ZoneVector<char> name_2(zone);
19511cb0ef41Sopenharmony_ci      if (!ParsePropertyClassName(&name_1, &name_2) ||
19521cb0ef41Sopenharmony_ci          !AddPropertyClassRange(ranges, negate, name_1, name_2)) {
19531cb0ef41Sopenharmony_ci        ReportError(in_class_escape_state == InClassEscapeState::kInClass
19541cb0ef41Sopenharmony_ci                        ? RegExpError::kInvalidClassPropertyName
19551cb0ef41Sopenharmony_ci                        : RegExpError::kInvalidPropertyName);
19561cb0ef41Sopenharmony_ci      }
19571cb0ef41Sopenharmony_ci      return true;
19581cb0ef41Sopenharmony_ci    }
19591cb0ef41Sopenharmony_ci    default:
19601cb0ef41Sopenharmony_ci      return false;
19611cb0ef41Sopenharmony_ci  }
19621cb0ef41Sopenharmony_ci}
19631cb0ef41Sopenharmony_ci
19641cb0ef41Sopenharmony_citemplate <class CharT>
19651cb0ef41Sopenharmony_ciRegExpTree* RegExpParserImpl<CharT>::ParseCharacterClass(
19661cb0ef41Sopenharmony_ci    const RegExpBuilder* builder) {
19671cb0ef41Sopenharmony_ci  DCHECK_EQ(current(), '[');
19681cb0ef41Sopenharmony_ci  Advance();
19691cb0ef41Sopenharmony_ci  bool is_negated = false;
19701cb0ef41Sopenharmony_ci  if (current() == '^') {
19711cb0ef41Sopenharmony_ci    is_negated = true;
19721cb0ef41Sopenharmony_ci    Advance();
19731cb0ef41Sopenharmony_ci  }
19741cb0ef41Sopenharmony_ci  ZoneList<CharacterRange>* ranges =
19751cb0ef41Sopenharmony_ci      zone()->template New<ZoneList<CharacterRange>>(2, zone());
19761cb0ef41Sopenharmony_ci  bool add_unicode_case_equivalents = unicode() && builder->ignore_case();
19771cb0ef41Sopenharmony_ci  while (has_more() && current() != ']') {
19781cb0ef41Sopenharmony_ci    base::uc32 char_1, char_2;
19791cb0ef41Sopenharmony_ci    bool is_class_1, is_class_2;
19801cb0ef41Sopenharmony_ci    ParseClassEscape(ranges, zone(), add_unicode_case_equivalents, &char_1,
19811cb0ef41Sopenharmony_ci                     &is_class_1 CHECK_FAILED);
19821cb0ef41Sopenharmony_ci    if (current() == '-') {
19831cb0ef41Sopenharmony_ci      Advance();
19841cb0ef41Sopenharmony_ci      if (current() == kEndMarker) {
19851cb0ef41Sopenharmony_ci        // If we reach the end we break out of the loop and let the
19861cb0ef41Sopenharmony_ci        // following code report an error.
19871cb0ef41Sopenharmony_ci        break;
19881cb0ef41Sopenharmony_ci      } else if (current() == ']') {
19891cb0ef41Sopenharmony_ci        if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
19901cb0ef41Sopenharmony_ci        ranges->Add(CharacterRange::Singleton('-'), zone());
19911cb0ef41Sopenharmony_ci        break;
19921cb0ef41Sopenharmony_ci      }
19931cb0ef41Sopenharmony_ci      ParseClassEscape(ranges, zone(), add_unicode_case_equivalents, &char_2,
19941cb0ef41Sopenharmony_ci                       &is_class_2 CHECK_FAILED);
19951cb0ef41Sopenharmony_ci      if (is_class_1 || is_class_2) {
19961cb0ef41Sopenharmony_ci        // Either end is an escaped character class. Treat the '-' verbatim.
19971cb0ef41Sopenharmony_ci        if (unicode()) {
19981cb0ef41Sopenharmony_ci          // ES2015 21.2.2.15.1 step 1.
19991cb0ef41Sopenharmony_ci          return ReportError(RegExpError::kInvalidCharacterClass);
20001cb0ef41Sopenharmony_ci        }
20011cb0ef41Sopenharmony_ci        if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
20021cb0ef41Sopenharmony_ci        ranges->Add(CharacterRange::Singleton('-'), zone());
20031cb0ef41Sopenharmony_ci        if (!is_class_2) ranges->Add(CharacterRange::Singleton(char_2), zone());
20041cb0ef41Sopenharmony_ci        continue;
20051cb0ef41Sopenharmony_ci      }
20061cb0ef41Sopenharmony_ci      // ES2015 21.2.2.15.1 step 6.
20071cb0ef41Sopenharmony_ci      if (char_1 > char_2) {
20081cb0ef41Sopenharmony_ci        return ReportError(RegExpError::kOutOfOrderCharacterClass);
20091cb0ef41Sopenharmony_ci      }
20101cb0ef41Sopenharmony_ci      ranges->Add(CharacterRange::Range(char_1, char_2), zone());
20111cb0ef41Sopenharmony_ci    } else {
20121cb0ef41Sopenharmony_ci      if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
20131cb0ef41Sopenharmony_ci    }
20141cb0ef41Sopenharmony_ci  }
20151cb0ef41Sopenharmony_ci  if (!has_more()) {
20161cb0ef41Sopenharmony_ci    return ReportError(RegExpError::kUnterminatedCharacterClass);
20171cb0ef41Sopenharmony_ci  }
20181cb0ef41Sopenharmony_ci  Advance();
20191cb0ef41Sopenharmony_ci  RegExpCharacterClass::CharacterClassFlags character_class_flags;
20201cb0ef41Sopenharmony_ci  if (is_negated) character_class_flags = RegExpCharacterClass::NEGATED;
20211cb0ef41Sopenharmony_ci  return zone()->template New<RegExpCharacterClass>(zone(), ranges,
20221cb0ef41Sopenharmony_ci                                                    character_class_flags);
20231cb0ef41Sopenharmony_ci}
20241cb0ef41Sopenharmony_ci
20251cb0ef41Sopenharmony_ci#undef CHECK_FAILED
20261cb0ef41Sopenharmony_ci
20271cb0ef41Sopenharmony_citemplate <class CharT>
20281cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::Parse(RegExpCompileData* result) {
20291cb0ef41Sopenharmony_ci  DCHECK_NOT_NULL(result);
20301cb0ef41Sopenharmony_ci  RegExpTree* tree = ParsePattern();
20311cb0ef41Sopenharmony_ci
20321cb0ef41Sopenharmony_ci  if (failed()) {
20331cb0ef41Sopenharmony_ci    DCHECK_NULL(tree);
20341cb0ef41Sopenharmony_ci    DCHECK_NE(error_, RegExpError::kNone);
20351cb0ef41Sopenharmony_ci    result->error = error_;
20361cb0ef41Sopenharmony_ci    result->error_pos = error_pos_;
20371cb0ef41Sopenharmony_ci    return false;
20381cb0ef41Sopenharmony_ci  }
20391cb0ef41Sopenharmony_ci
20401cb0ef41Sopenharmony_ci  DCHECK_NOT_NULL(tree);
20411cb0ef41Sopenharmony_ci  DCHECK_EQ(error_, RegExpError::kNone);
20421cb0ef41Sopenharmony_ci  if (FLAG_trace_regexp_parser) {
20431cb0ef41Sopenharmony_ci    StdoutStream os;
20441cb0ef41Sopenharmony_ci    tree->Print(os, zone());
20451cb0ef41Sopenharmony_ci    os << "\n";
20461cb0ef41Sopenharmony_ci  }
20471cb0ef41Sopenharmony_ci
20481cb0ef41Sopenharmony_ci  result->tree = tree;
20491cb0ef41Sopenharmony_ci  const int capture_count = captures_started();
20501cb0ef41Sopenharmony_ci  result->simple = tree->IsAtom() && simple() && capture_count == 0;
20511cb0ef41Sopenharmony_ci  result->contains_anchor = contains_anchor();
20521cb0ef41Sopenharmony_ci  result->capture_count = capture_count;
20531cb0ef41Sopenharmony_ci  result->named_captures = GetNamedCaptures();
20541cb0ef41Sopenharmony_ci  return true;
20551cb0ef41Sopenharmony_ci}
20561cb0ef41Sopenharmony_ci
20571cb0ef41Sopenharmony_civoid RegExpBuilder::AddLeadSurrogate(base::uc16 lead_surrogate) {
20581cb0ef41Sopenharmony_ci  DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate));
20591cb0ef41Sopenharmony_ci  FlushPendingSurrogate();
20601cb0ef41Sopenharmony_ci  // Hold onto the lead surrogate, waiting for a trail surrogate to follow.
20611cb0ef41Sopenharmony_ci  pending_surrogate_ = lead_surrogate;
20621cb0ef41Sopenharmony_ci}
20631cb0ef41Sopenharmony_ci
20641cb0ef41Sopenharmony_civoid RegExpBuilder::AddTrailSurrogate(base::uc16 trail_surrogate) {
20651cb0ef41Sopenharmony_ci  DCHECK(unibrow::Utf16::IsTrailSurrogate(trail_surrogate));
20661cb0ef41Sopenharmony_ci  if (pending_surrogate_ != kNoPendingSurrogate) {
20671cb0ef41Sopenharmony_ci    base::uc16 lead_surrogate = pending_surrogate_;
20681cb0ef41Sopenharmony_ci    pending_surrogate_ = kNoPendingSurrogate;
20691cb0ef41Sopenharmony_ci    DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate));
20701cb0ef41Sopenharmony_ci    base::uc32 combined =
20711cb0ef41Sopenharmony_ci        unibrow::Utf16::CombineSurrogatePair(lead_surrogate, trail_surrogate);
20721cb0ef41Sopenharmony_ci    if (NeedsDesugaringForIgnoreCase(combined)) {
20731cb0ef41Sopenharmony_ci      AddCharacterClassForDesugaring(combined);
20741cb0ef41Sopenharmony_ci    } else {
20751cb0ef41Sopenharmony_ci      ZoneList<base::uc16> surrogate_pair(2, zone());
20761cb0ef41Sopenharmony_ci      surrogate_pair.Add(lead_surrogate, zone());
20771cb0ef41Sopenharmony_ci      surrogate_pair.Add(trail_surrogate, zone());
20781cb0ef41Sopenharmony_ci      RegExpAtom* atom =
20791cb0ef41Sopenharmony_ci          zone()->New<RegExpAtom>(surrogate_pair.ToConstVector());
20801cb0ef41Sopenharmony_ci      AddAtom(atom);
20811cb0ef41Sopenharmony_ci    }
20821cb0ef41Sopenharmony_ci  } else {
20831cb0ef41Sopenharmony_ci    pending_surrogate_ = trail_surrogate;
20841cb0ef41Sopenharmony_ci    FlushPendingSurrogate();
20851cb0ef41Sopenharmony_ci  }
20861cb0ef41Sopenharmony_ci}
20871cb0ef41Sopenharmony_ci
20881cb0ef41Sopenharmony_civoid RegExpBuilder::FlushPendingSurrogate() {
20891cb0ef41Sopenharmony_ci  if (pending_surrogate_ != kNoPendingSurrogate) {
20901cb0ef41Sopenharmony_ci    DCHECK(unicode());
20911cb0ef41Sopenharmony_ci    base::uc32 c = pending_surrogate_;
20921cb0ef41Sopenharmony_ci    pending_surrogate_ = kNoPendingSurrogate;
20931cb0ef41Sopenharmony_ci    AddCharacterClassForDesugaring(c);
20941cb0ef41Sopenharmony_ci  }
20951cb0ef41Sopenharmony_ci}
20961cb0ef41Sopenharmony_ci
20971cb0ef41Sopenharmony_civoid RegExpBuilder::FlushCharacters() {
20981cb0ef41Sopenharmony_ci  FlushPendingSurrogate();
20991cb0ef41Sopenharmony_ci  pending_empty_ = false;
21001cb0ef41Sopenharmony_ci  if (characters_ != nullptr) {
21011cb0ef41Sopenharmony_ci    RegExpTree* atom = zone()->New<RegExpAtom>(characters_->ToConstVector());
21021cb0ef41Sopenharmony_ci    characters_ = nullptr;
21031cb0ef41Sopenharmony_ci    text_.emplace_back(atom);
21041cb0ef41Sopenharmony_ci    LAST(ADD_ATOM);
21051cb0ef41Sopenharmony_ci  }
21061cb0ef41Sopenharmony_ci}
21071cb0ef41Sopenharmony_ci
21081cb0ef41Sopenharmony_civoid RegExpBuilder::FlushText() {
21091cb0ef41Sopenharmony_ci  FlushCharacters();
21101cb0ef41Sopenharmony_ci  size_t num_text = text_.size();
21111cb0ef41Sopenharmony_ci  if (num_text == 0) {
21121cb0ef41Sopenharmony_ci    return;
21131cb0ef41Sopenharmony_ci  } else if (num_text == 1) {
21141cb0ef41Sopenharmony_ci    terms_.emplace_back(text_.back());
21151cb0ef41Sopenharmony_ci  } else {
21161cb0ef41Sopenharmony_ci    RegExpText* text = zone()->New<RegExpText>(zone());
21171cb0ef41Sopenharmony_ci    for (size_t i = 0; i < num_text; i++) {
21181cb0ef41Sopenharmony_ci      text_[i]->AppendToText(text, zone());
21191cb0ef41Sopenharmony_ci    }
21201cb0ef41Sopenharmony_ci    terms_.emplace_back(text);
21211cb0ef41Sopenharmony_ci  }
21221cb0ef41Sopenharmony_ci  text_.clear();
21231cb0ef41Sopenharmony_ci}
21241cb0ef41Sopenharmony_ci
21251cb0ef41Sopenharmony_civoid RegExpBuilder::AddCharacter(base::uc16 c) {
21261cb0ef41Sopenharmony_ci  FlushPendingSurrogate();
21271cb0ef41Sopenharmony_ci  pending_empty_ = false;
21281cb0ef41Sopenharmony_ci  if (NeedsDesugaringForIgnoreCase(c)) {
21291cb0ef41Sopenharmony_ci    AddCharacterClassForDesugaring(c);
21301cb0ef41Sopenharmony_ci  } else {
21311cb0ef41Sopenharmony_ci    if (characters_ == nullptr) {
21321cb0ef41Sopenharmony_ci      characters_ = zone()->New<ZoneList<base::uc16>>(4, zone());
21331cb0ef41Sopenharmony_ci    }
21341cb0ef41Sopenharmony_ci    characters_->Add(c, zone());
21351cb0ef41Sopenharmony_ci    LAST(ADD_CHAR);
21361cb0ef41Sopenharmony_ci  }
21371cb0ef41Sopenharmony_ci}
21381cb0ef41Sopenharmony_ci
21391cb0ef41Sopenharmony_civoid RegExpBuilder::AddUnicodeCharacter(base::uc32 c) {
21401cb0ef41Sopenharmony_ci  if (c > static_cast<base::uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
21411cb0ef41Sopenharmony_ci    DCHECK(unicode());
21421cb0ef41Sopenharmony_ci    AddLeadSurrogate(unibrow::Utf16::LeadSurrogate(c));
21431cb0ef41Sopenharmony_ci    AddTrailSurrogate(unibrow::Utf16::TrailSurrogate(c));
21441cb0ef41Sopenharmony_ci  } else if (unicode() && unibrow::Utf16::IsLeadSurrogate(c)) {
21451cb0ef41Sopenharmony_ci    AddLeadSurrogate(c);
21461cb0ef41Sopenharmony_ci  } else if (unicode() && unibrow::Utf16::IsTrailSurrogate(c)) {
21471cb0ef41Sopenharmony_ci    AddTrailSurrogate(c);
21481cb0ef41Sopenharmony_ci  } else {
21491cb0ef41Sopenharmony_ci    AddCharacter(static_cast<base::uc16>(c));
21501cb0ef41Sopenharmony_ci  }
21511cb0ef41Sopenharmony_ci}
21521cb0ef41Sopenharmony_ci
21531cb0ef41Sopenharmony_civoid RegExpBuilder::AddEscapedUnicodeCharacter(base::uc32 character) {
21541cb0ef41Sopenharmony_ci  // A lead or trail surrogate parsed via escape sequence will not
21551cb0ef41Sopenharmony_ci  // pair up with any preceding lead or following trail surrogate.
21561cb0ef41Sopenharmony_ci  FlushPendingSurrogate();
21571cb0ef41Sopenharmony_ci  AddUnicodeCharacter(character);
21581cb0ef41Sopenharmony_ci  FlushPendingSurrogate();
21591cb0ef41Sopenharmony_ci}
21601cb0ef41Sopenharmony_ci
21611cb0ef41Sopenharmony_civoid RegExpBuilder::AddEmpty() { pending_empty_ = true; }
21621cb0ef41Sopenharmony_ci
21631cb0ef41Sopenharmony_civoid RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {
21641cb0ef41Sopenharmony_ci  if (NeedsDesugaringForUnicode(cc)) {
21651cb0ef41Sopenharmony_ci    // With /u, character class needs to be desugared, so it
21661cb0ef41Sopenharmony_ci    // must be a standalone term instead of being part of a RegExpText.
21671cb0ef41Sopenharmony_ci    AddTerm(cc);
21681cb0ef41Sopenharmony_ci  } else {
21691cb0ef41Sopenharmony_ci    AddAtom(cc);
21701cb0ef41Sopenharmony_ci  }
21711cb0ef41Sopenharmony_ci}
21721cb0ef41Sopenharmony_ci
21731cb0ef41Sopenharmony_civoid RegExpBuilder::AddCharacterClassForDesugaring(base::uc32 c) {
21741cb0ef41Sopenharmony_ci  AddTerm(zone()->New<RegExpCharacterClass>(
21751cb0ef41Sopenharmony_ci      zone(), CharacterRange::List(zone(), CharacterRange::Singleton(c))));
21761cb0ef41Sopenharmony_ci}
21771cb0ef41Sopenharmony_ci
21781cb0ef41Sopenharmony_civoid RegExpBuilder::AddAtom(RegExpTree* term) {
21791cb0ef41Sopenharmony_ci  if (term->IsEmpty()) {
21801cb0ef41Sopenharmony_ci    AddEmpty();
21811cb0ef41Sopenharmony_ci    return;
21821cb0ef41Sopenharmony_ci  }
21831cb0ef41Sopenharmony_ci  if (term->IsTextElement()) {
21841cb0ef41Sopenharmony_ci    FlushCharacters();
21851cb0ef41Sopenharmony_ci    text_.emplace_back(term);
21861cb0ef41Sopenharmony_ci  } else {
21871cb0ef41Sopenharmony_ci    FlushText();
21881cb0ef41Sopenharmony_ci    terms_.emplace_back(term);
21891cb0ef41Sopenharmony_ci  }
21901cb0ef41Sopenharmony_ci  LAST(ADD_ATOM);
21911cb0ef41Sopenharmony_ci}
21921cb0ef41Sopenharmony_ci
21931cb0ef41Sopenharmony_civoid RegExpBuilder::AddTerm(RegExpTree* term) {
21941cb0ef41Sopenharmony_ci  FlushText();
21951cb0ef41Sopenharmony_ci  terms_.emplace_back(term);
21961cb0ef41Sopenharmony_ci  LAST(ADD_ATOM);
21971cb0ef41Sopenharmony_ci}
21981cb0ef41Sopenharmony_ci
21991cb0ef41Sopenharmony_civoid RegExpBuilder::AddAssertion(RegExpTree* assert) {
22001cb0ef41Sopenharmony_ci  FlushText();
22011cb0ef41Sopenharmony_ci  terms_.emplace_back(assert);
22021cb0ef41Sopenharmony_ci  LAST(ADD_ASSERT);
22031cb0ef41Sopenharmony_ci}
22041cb0ef41Sopenharmony_ci
22051cb0ef41Sopenharmony_civoid RegExpBuilder::NewAlternative() { FlushTerms(); }
22061cb0ef41Sopenharmony_ci
22071cb0ef41Sopenharmony_civoid RegExpBuilder::FlushTerms() {
22081cb0ef41Sopenharmony_ci  FlushText();
22091cb0ef41Sopenharmony_ci  size_t num_terms = terms_.size();
22101cb0ef41Sopenharmony_ci  RegExpTree* alternative;
22111cb0ef41Sopenharmony_ci  if (num_terms == 0) {
22121cb0ef41Sopenharmony_ci    alternative = zone()->New<RegExpEmpty>();
22131cb0ef41Sopenharmony_ci  } else if (num_terms == 1) {
22141cb0ef41Sopenharmony_ci    alternative = terms_.back();
22151cb0ef41Sopenharmony_ci  } else {
22161cb0ef41Sopenharmony_ci    alternative =
22171cb0ef41Sopenharmony_ci        zone()->New<RegExpAlternative>(zone()->New<ZoneList<RegExpTree*>>(
22181cb0ef41Sopenharmony_ci            base::VectorOf(terms_.begin(), terms_.size()), zone()));
22191cb0ef41Sopenharmony_ci  }
22201cb0ef41Sopenharmony_ci  alternatives_.emplace_back(alternative);
22211cb0ef41Sopenharmony_ci  terms_.clear();
22221cb0ef41Sopenharmony_ci  LAST(ADD_NONE);
22231cb0ef41Sopenharmony_ci}
22241cb0ef41Sopenharmony_ci
22251cb0ef41Sopenharmony_cibool RegExpBuilder::NeedsDesugaringForUnicode(RegExpCharacterClass* cc) {
22261cb0ef41Sopenharmony_ci  if (!unicode()) return false;
22271cb0ef41Sopenharmony_ci  // TODO(yangguo): we could be smarter than this. Case-insensitivity does not
22281cb0ef41Sopenharmony_ci  // necessarily mean that we need to desugar. It's probably nicer to have a
22291cb0ef41Sopenharmony_ci  // separate pass to figure out unicode desugarings.
22301cb0ef41Sopenharmony_ci  if (ignore_case()) return true;
22311cb0ef41Sopenharmony_ci  ZoneList<CharacterRange>* ranges = cc->ranges(zone());
22321cb0ef41Sopenharmony_ci  CharacterRange::Canonicalize(ranges);
22331cb0ef41Sopenharmony_ci  for (int i = ranges->length() - 1; i >= 0; i--) {
22341cb0ef41Sopenharmony_ci    base::uc32 from = ranges->at(i).from();
22351cb0ef41Sopenharmony_ci    base::uc32 to = ranges->at(i).to();
22361cb0ef41Sopenharmony_ci    // Check for non-BMP characters.
22371cb0ef41Sopenharmony_ci    if (to >= kNonBmpStart) return true;
22381cb0ef41Sopenharmony_ci    // Check for lone surrogates.
22391cb0ef41Sopenharmony_ci    if (from <= kTrailSurrogateEnd && to >= kLeadSurrogateStart) return true;
22401cb0ef41Sopenharmony_ci  }
22411cb0ef41Sopenharmony_ci  return false;
22421cb0ef41Sopenharmony_ci}
22431cb0ef41Sopenharmony_ci
22441cb0ef41Sopenharmony_cibool RegExpBuilder::NeedsDesugaringForIgnoreCase(base::uc32 c) {
22451cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT
22461cb0ef41Sopenharmony_ci  if (unicode() && ignore_case()) {
22471cb0ef41Sopenharmony_ci    icu::UnicodeSet set(c, c);
22481cb0ef41Sopenharmony_ci    set.closeOver(USET_CASE_INSENSITIVE);
22491cb0ef41Sopenharmony_ci    set.removeAllStrings();
22501cb0ef41Sopenharmony_ci    return set.size() > 1;
22511cb0ef41Sopenharmony_ci  }
22521cb0ef41Sopenharmony_ci  // In the case where ICU is not included, we act as if the unicode flag is
22531cb0ef41Sopenharmony_ci  // not set, and do not desugar.
22541cb0ef41Sopenharmony_ci#endif  // V8_INTL_SUPPORT
22551cb0ef41Sopenharmony_ci  return false;
22561cb0ef41Sopenharmony_ci}
22571cb0ef41Sopenharmony_ci
22581cb0ef41Sopenharmony_ciRegExpTree* RegExpBuilder::ToRegExp() {
22591cb0ef41Sopenharmony_ci  FlushTerms();
22601cb0ef41Sopenharmony_ci  size_t num_alternatives = alternatives_.size();
22611cb0ef41Sopenharmony_ci  if (num_alternatives == 0) return zone()->New<RegExpEmpty>();
22621cb0ef41Sopenharmony_ci  if (num_alternatives == 1) return alternatives_.back();
22631cb0ef41Sopenharmony_ci  return zone()->New<RegExpDisjunction>(zone()->New<ZoneList<RegExpTree*>>(
22641cb0ef41Sopenharmony_ci      base::VectorOf(alternatives_.begin(), alternatives_.size()), zone()));
22651cb0ef41Sopenharmony_ci}
22661cb0ef41Sopenharmony_ci
22671cb0ef41Sopenharmony_cibool RegExpBuilder::AddQuantifierToAtom(
22681cb0ef41Sopenharmony_ci    int min, int max, RegExpQuantifier::QuantifierType quantifier_type) {
22691cb0ef41Sopenharmony_ci  FlushPendingSurrogate();
22701cb0ef41Sopenharmony_ci  if (pending_empty_) {
22711cb0ef41Sopenharmony_ci    pending_empty_ = false;
22721cb0ef41Sopenharmony_ci    return true;
22731cb0ef41Sopenharmony_ci  }
22741cb0ef41Sopenharmony_ci  RegExpTree* atom;
22751cb0ef41Sopenharmony_ci  if (characters_ != nullptr) {
22761cb0ef41Sopenharmony_ci    DCHECK(last_added_ == ADD_CHAR);
22771cb0ef41Sopenharmony_ci    // Last atom was character.
22781cb0ef41Sopenharmony_ci    base::Vector<const base::uc16> char_vector = characters_->ToConstVector();
22791cb0ef41Sopenharmony_ci    int num_chars = char_vector.length();
22801cb0ef41Sopenharmony_ci    if (num_chars > 1) {
22811cb0ef41Sopenharmony_ci      base::Vector<const base::uc16> prefix =
22821cb0ef41Sopenharmony_ci          char_vector.SubVector(0, num_chars - 1);
22831cb0ef41Sopenharmony_ci      text_.emplace_back(zone()->New<RegExpAtom>(prefix));
22841cb0ef41Sopenharmony_ci      char_vector = char_vector.SubVector(num_chars - 1, num_chars);
22851cb0ef41Sopenharmony_ci    }
22861cb0ef41Sopenharmony_ci    characters_ = nullptr;
22871cb0ef41Sopenharmony_ci    atom = zone()->New<RegExpAtom>(char_vector);
22881cb0ef41Sopenharmony_ci    FlushText();
22891cb0ef41Sopenharmony_ci  } else if (text_.size() > 0) {
22901cb0ef41Sopenharmony_ci    DCHECK(last_added_ == ADD_ATOM);
22911cb0ef41Sopenharmony_ci    atom = text_.back();
22921cb0ef41Sopenharmony_ci    text_.pop_back();
22931cb0ef41Sopenharmony_ci    FlushText();
22941cb0ef41Sopenharmony_ci  } else if (terms_.size() > 0) {
22951cb0ef41Sopenharmony_ci    DCHECK(last_added_ == ADD_ATOM);
22961cb0ef41Sopenharmony_ci    atom = terms_.back();
22971cb0ef41Sopenharmony_ci    terms_.pop_back();
22981cb0ef41Sopenharmony_ci    if (atom->IsLookaround()) {
22991cb0ef41Sopenharmony_ci      // With /u, lookarounds are not quantifiable.
23001cb0ef41Sopenharmony_ci      if (unicode()) return false;
23011cb0ef41Sopenharmony_ci      // Lookbehinds are not quantifiable.
23021cb0ef41Sopenharmony_ci      if (atom->AsLookaround()->type() == RegExpLookaround::LOOKBEHIND) {
23031cb0ef41Sopenharmony_ci        return false;
23041cb0ef41Sopenharmony_ci      }
23051cb0ef41Sopenharmony_ci    }
23061cb0ef41Sopenharmony_ci    if (atom->max_match() == 0) {
23071cb0ef41Sopenharmony_ci      // Guaranteed to only match an empty string.
23081cb0ef41Sopenharmony_ci      LAST(ADD_TERM);
23091cb0ef41Sopenharmony_ci      if (min == 0) {
23101cb0ef41Sopenharmony_ci        return true;
23111cb0ef41Sopenharmony_ci      }
23121cb0ef41Sopenharmony_ci      terms_.emplace_back(atom);
23131cb0ef41Sopenharmony_ci      return true;
23141cb0ef41Sopenharmony_ci    }
23151cb0ef41Sopenharmony_ci  } else {
23161cb0ef41Sopenharmony_ci    // Only call immediately after adding an atom or character!
23171cb0ef41Sopenharmony_ci    UNREACHABLE();
23181cb0ef41Sopenharmony_ci  }
23191cb0ef41Sopenharmony_ci  terms_.emplace_back(
23201cb0ef41Sopenharmony_ci      zone()->New<RegExpQuantifier>(min, max, quantifier_type, atom));
23211cb0ef41Sopenharmony_ci  LAST(ADD_TERM);
23221cb0ef41Sopenharmony_ci  return true;
23231cb0ef41Sopenharmony_ci}
23241cb0ef41Sopenharmony_ci
23251cb0ef41Sopenharmony_citemplate class RegExpParserImpl<uint8_t>;
23261cb0ef41Sopenharmony_citemplate class RegExpParserImpl<base::uc16>;
23271cb0ef41Sopenharmony_ci
23281cb0ef41Sopenharmony_ci}  // namespace
23291cb0ef41Sopenharmony_ci
23301cb0ef41Sopenharmony_ci// static
23311cb0ef41Sopenharmony_cibool RegExpParser::ParseRegExpFromHeapString(Isolate* isolate, Zone* zone,
23321cb0ef41Sopenharmony_ci                                             Handle<String> input,
23331cb0ef41Sopenharmony_ci                                             RegExpFlags flags,
23341cb0ef41Sopenharmony_ci                                             RegExpCompileData* result) {
23351cb0ef41Sopenharmony_ci  DisallowGarbageCollection no_gc;
23361cb0ef41Sopenharmony_ci  uintptr_t stack_limit = isolate->stack_guard()->real_climit();
23371cb0ef41Sopenharmony_ci  String::FlatContent content = input->GetFlatContent(no_gc);
23381cb0ef41Sopenharmony_ci  if (content.IsOneByte()) {
23391cb0ef41Sopenharmony_ci    base::Vector<const uint8_t> v = content.ToOneByteVector();
23401cb0ef41Sopenharmony_ci    return RegExpParserImpl<uint8_t>{v.begin(),   v.length(), flags,
23411cb0ef41Sopenharmony_ci                                     stack_limit, zone,       no_gc}
23421cb0ef41Sopenharmony_ci        .Parse(result);
23431cb0ef41Sopenharmony_ci  } else {
23441cb0ef41Sopenharmony_ci    base::Vector<const base::uc16> v = content.ToUC16Vector();
23451cb0ef41Sopenharmony_ci    return RegExpParserImpl<base::uc16>{v.begin(),   v.length(), flags,
23461cb0ef41Sopenharmony_ci                                        stack_limit, zone,       no_gc}
23471cb0ef41Sopenharmony_ci        .Parse(result);
23481cb0ef41Sopenharmony_ci  }
23491cb0ef41Sopenharmony_ci}
23501cb0ef41Sopenharmony_ci
23511cb0ef41Sopenharmony_ci// static
23521cb0ef41Sopenharmony_citemplate <class CharT>
23531cb0ef41Sopenharmony_cibool RegExpParser::VerifyRegExpSyntax(Zone* zone, uintptr_t stack_limit,
23541cb0ef41Sopenharmony_ci                                      const CharT* input, int input_length,
23551cb0ef41Sopenharmony_ci                                      RegExpFlags flags,
23561cb0ef41Sopenharmony_ci                                      RegExpCompileData* result,
23571cb0ef41Sopenharmony_ci                                      const DisallowGarbageCollection& no_gc) {
23581cb0ef41Sopenharmony_ci  return RegExpParserImpl<CharT>{input,       input_length, flags,
23591cb0ef41Sopenharmony_ci                                 stack_limit, zone,         no_gc}
23601cb0ef41Sopenharmony_ci      .Parse(result);
23611cb0ef41Sopenharmony_ci}
23621cb0ef41Sopenharmony_ci
23631cb0ef41Sopenharmony_citemplate bool RegExpParser::VerifyRegExpSyntax<uint8_t>(
23641cb0ef41Sopenharmony_ci    Zone*, uintptr_t, const uint8_t*, int, RegExpFlags, RegExpCompileData*,
23651cb0ef41Sopenharmony_ci    const DisallowGarbageCollection&);
23661cb0ef41Sopenharmony_citemplate bool RegExpParser::VerifyRegExpSyntax<base::uc16>(
23671cb0ef41Sopenharmony_ci    Zone*, uintptr_t, const base::uc16*, int, RegExpFlags, RegExpCompileData*,
23681cb0ef41Sopenharmony_ci    const DisallowGarbageCollection&);
23691cb0ef41Sopenharmony_ci
23701cb0ef41Sopenharmony_ci// static
23711cb0ef41Sopenharmony_cibool RegExpParser::VerifyRegExpSyntax(Isolate* isolate, Zone* zone,
23721cb0ef41Sopenharmony_ci                                      Handle<String> input, RegExpFlags flags,
23731cb0ef41Sopenharmony_ci                                      RegExpCompileData* result,
23741cb0ef41Sopenharmony_ci                                      const DisallowGarbageCollection&) {
23751cb0ef41Sopenharmony_ci  return ParseRegExpFromHeapString(isolate, zone, input, flags, result);
23761cb0ef41Sopenharmony_ci}
23771cb0ef41Sopenharmony_ci
23781cb0ef41Sopenharmony_ci#undef LAST
23791cb0ef41Sopenharmony_ci
23801cb0ef41Sopenharmony_ci}  // namespace internal
23811cb0ef41Sopenharmony_ci}  // namespace v8
2382