11cb0ef41Sopenharmony_ci// Copyright 2016 the V8 project authors. All rights reserved. 21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be 31cb0ef41Sopenharmony_ci// found in the LICENSE file. 41cb0ef41Sopenharmony_ci 51cb0ef41Sopenharmony_ci#include "src/regexp/regexp-parser.h" 61cb0ef41Sopenharmony_ci 71cb0ef41Sopenharmony_ci#include "src/base/small-vector.h" 81cb0ef41Sopenharmony_ci#include "src/execution/isolate.h" 91cb0ef41Sopenharmony_ci#include "src/objects/string-inl.h" 101cb0ef41Sopenharmony_ci#include "src/regexp/property-sequences.h" 111cb0ef41Sopenharmony_ci#include "src/regexp/regexp-ast.h" 121cb0ef41Sopenharmony_ci#include "src/regexp/regexp-macro-assembler.h" 131cb0ef41Sopenharmony_ci#include "src/regexp/regexp.h" 141cb0ef41Sopenharmony_ci#include "src/strings/char-predicates-inl.h" 151cb0ef41Sopenharmony_ci#include "src/utils/ostreams.h" 161cb0ef41Sopenharmony_ci#include "src/utils/utils.h" 171cb0ef41Sopenharmony_ci#include "src/zone/zone-allocator.h" 181cb0ef41Sopenharmony_ci#include "src/zone/zone-list-inl.h" 191cb0ef41Sopenharmony_ci 201cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT 211cb0ef41Sopenharmony_ci#include "unicode/uniset.h" 221cb0ef41Sopenharmony_ci#endif // V8_INTL_SUPPORT 231cb0ef41Sopenharmony_ci 241cb0ef41Sopenharmony_cinamespace v8 { 251cb0ef41Sopenharmony_cinamespace internal { 261cb0ef41Sopenharmony_ci 271cb0ef41Sopenharmony_cinamespace { 281cb0ef41Sopenharmony_ci 291cb0ef41Sopenharmony_ci// Whether we're currently inside the ClassEscape production 301cb0ef41Sopenharmony_ci// (tc39.es/ecma262/#prod-annexB-CharacterEscape). 311cb0ef41Sopenharmony_cienum class InClassEscapeState { 321cb0ef41Sopenharmony_ci kInClass, 331cb0ef41Sopenharmony_ci kNotInClass, 341cb0ef41Sopenharmony_ci}; 351cb0ef41Sopenharmony_ci 361cb0ef41Sopenharmony_ci// Accumulates RegExp atoms and assertions into lists of terms and alternatives. 371cb0ef41Sopenharmony_ciclass RegExpBuilder { 381cb0ef41Sopenharmony_ci public: 391cb0ef41Sopenharmony_ci RegExpBuilder(Zone* zone, RegExpFlags flags) 401cb0ef41Sopenharmony_ci : zone_(zone), 411cb0ef41Sopenharmony_ci flags_(flags), 421cb0ef41Sopenharmony_ci terms_(ZoneAllocator<RegExpTree*>{zone}), 431cb0ef41Sopenharmony_ci text_(ZoneAllocator<RegExpTree*>{zone}), 441cb0ef41Sopenharmony_ci alternatives_(ZoneAllocator<RegExpTree*>{zone}) {} 451cb0ef41Sopenharmony_ci void AddCharacter(base::uc16 character); 461cb0ef41Sopenharmony_ci void AddUnicodeCharacter(base::uc32 character); 471cb0ef41Sopenharmony_ci void AddEscapedUnicodeCharacter(base::uc32 character); 481cb0ef41Sopenharmony_ci // "Adds" an empty expression. Does nothing except consume a 491cb0ef41Sopenharmony_ci // following quantifier 501cb0ef41Sopenharmony_ci void AddEmpty(); 511cb0ef41Sopenharmony_ci void AddCharacterClass(RegExpCharacterClass* cc); 521cb0ef41Sopenharmony_ci void AddCharacterClassForDesugaring(base::uc32 c); 531cb0ef41Sopenharmony_ci void AddAtom(RegExpTree* tree); 541cb0ef41Sopenharmony_ci void AddTerm(RegExpTree* tree); 551cb0ef41Sopenharmony_ci void AddAssertion(RegExpTree* tree); 561cb0ef41Sopenharmony_ci void NewAlternative(); // '|' 571cb0ef41Sopenharmony_ci bool AddQuantifierToAtom(int min, int max, 581cb0ef41Sopenharmony_ci RegExpQuantifier::QuantifierType type); 591cb0ef41Sopenharmony_ci void FlushText(); 601cb0ef41Sopenharmony_ci RegExpTree* ToRegExp(); 611cb0ef41Sopenharmony_ci RegExpFlags flags() const { return flags_; } 621cb0ef41Sopenharmony_ci 631cb0ef41Sopenharmony_ci bool ignore_case() const { return IsIgnoreCase(flags_); } 641cb0ef41Sopenharmony_ci bool multiline() const { return IsMultiline(flags_); } 651cb0ef41Sopenharmony_ci bool dotall() const { return IsDotAll(flags_); } 661cb0ef41Sopenharmony_ci 671cb0ef41Sopenharmony_ci private: 681cb0ef41Sopenharmony_ci static const base::uc16 kNoPendingSurrogate = 0; 691cb0ef41Sopenharmony_ci void AddLeadSurrogate(base::uc16 lead_surrogate); 701cb0ef41Sopenharmony_ci void AddTrailSurrogate(base::uc16 trail_surrogate); 711cb0ef41Sopenharmony_ci void FlushPendingSurrogate(); 721cb0ef41Sopenharmony_ci void FlushCharacters(); 731cb0ef41Sopenharmony_ci void FlushTerms(); 741cb0ef41Sopenharmony_ci bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc); 751cb0ef41Sopenharmony_ci bool NeedsDesugaringForIgnoreCase(base::uc32 c); 761cb0ef41Sopenharmony_ci Zone* zone() const { return zone_; } 771cb0ef41Sopenharmony_ci bool unicode() const { return IsUnicode(flags_); } 781cb0ef41Sopenharmony_ci 791cb0ef41Sopenharmony_ci Zone* const zone_; 801cb0ef41Sopenharmony_ci bool pending_empty_ = false; 811cb0ef41Sopenharmony_ci const RegExpFlags flags_; 821cb0ef41Sopenharmony_ci ZoneList<base::uc16>* characters_ = nullptr; 831cb0ef41Sopenharmony_ci base::uc16 pending_surrogate_ = kNoPendingSurrogate; 841cb0ef41Sopenharmony_ci 851cb0ef41Sopenharmony_ci using SmallRegExpTreeVector = 861cb0ef41Sopenharmony_ci base::SmallVector<RegExpTree*, 8, ZoneAllocator<RegExpTree*>>; 871cb0ef41Sopenharmony_ci SmallRegExpTreeVector terms_; 881cb0ef41Sopenharmony_ci SmallRegExpTreeVector text_; 891cb0ef41Sopenharmony_ci SmallRegExpTreeVector alternatives_; 901cb0ef41Sopenharmony_ci#ifdef DEBUG 911cb0ef41Sopenharmony_ci enum { 921cb0ef41Sopenharmony_ci ADD_NONE, 931cb0ef41Sopenharmony_ci ADD_CHAR, 941cb0ef41Sopenharmony_ci ADD_TERM, 951cb0ef41Sopenharmony_ci ADD_ASSERT, 961cb0ef41Sopenharmony_ci ADD_ATOM 971cb0ef41Sopenharmony_ci } last_added_ = ADD_NONE; 981cb0ef41Sopenharmony_ci#define LAST(x) last_added_ = x; 991cb0ef41Sopenharmony_ci#else 1001cb0ef41Sopenharmony_ci#define LAST(x) 1011cb0ef41Sopenharmony_ci#endif 1021cb0ef41Sopenharmony_ci}; 1031cb0ef41Sopenharmony_ci 1041cb0ef41Sopenharmony_cienum SubexpressionType { 1051cb0ef41Sopenharmony_ci INITIAL, 1061cb0ef41Sopenharmony_ci CAPTURE, // All positive values represent captures. 1071cb0ef41Sopenharmony_ci POSITIVE_LOOKAROUND, 1081cb0ef41Sopenharmony_ci NEGATIVE_LOOKAROUND, 1091cb0ef41Sopenharmony_ci GROUPING 1101cb0ef41Sopenharmony_ci}; 1111cb0ef41Sopenharmony_ci 1121cb0ef41Sopenharmony_ciclass RegExpParserState : public ZoneObject { 1131cb0ef41Sopenharmony_ci public: 1141cb0ef41Sopenharmony_ci // Push a state on the stack. 1151cb0ef41Sopenharmony_ci RegExpParserState(RegExpParserState* previous_state, 1161cb0ef41Sopenharmony_ci SubexpressionType group_type, 1171cb0ef41Sopenharmony_ci RegExpLookaround::Type lookaround_type, 1181cb0ef41Sopenharmony_ci int disjunction_capture_index, 1191cb0ef41Sopenharmony_ci const ZoneVector<base::uc16>* capture_name, 1201cb0ef41Sopenharmony_ci RegExpFlags flags, Zone* zone) 1211cb0ef41Sopenharmony_ci : previous_state_(previous_state), 1221cb0ef41Sopenharmony_ci builder_(zone, flags), 1231cb0ef41Sopenharmony_ci group_type_(group_type), 1241cb0ef41Sopenharmony_ci lookaround_type_(lookaround_type), 1251cb0ef41Sopenharmony_ci disjunction_capture_index_(disjunction_capture_index), 1261cb0ef41Sopenharmony_ci capture_name_(capture_name) {} 1271cb0ef41Sopenharmony_ci // Parser state of containing expression, if any. 1281cb0ef41Sopenharmony_ci RegExpParserState* previous_state() const { return previous_state_; } 1291cb0ef41Sopenharmony_ci bool IsSubexpression() { return previous_state_ != nullptr; } 1301cb0ef41Sopenharmony_ci // RegExpBuilder building this regexp's AST. 1311cb0ef41Sopenharmony_ci RegExpBuilder* builder() { return &builder_; } 1321cb0ef41Sopenharmony_ci // Type of regexp being parsed (parenthesized group or entire regexp). 1331cb0ef41Sopenharmony_ci SubexpressionType group_type() const { return group_type_; } 1341cb0ef41Sopenharmony_ci // Lookahead or Lookbehind. 1351cb0ef41Sopenharmony_ci RegExpLookaround::Type lookaround_type() const { return lookaround_type_; } 1361cb0ef41Sopenharmony_ci // Index in captures array of first capture in this sub-expression, if any. 1371cb0ef41Sopenharmony_ci // Also the capture index of this sub-expression itself, if group_type 1381cb0ef41Sopenharmony_ci // is CAPTURE. 1391cb0ef41Sopenharmony_ci int capture_index() const { return disjunction_capture_index_; } 1401cb0ef41Sopenharmony_ci // The name of the current sub-expression, if group_type is CAPTURE. Only 1411cb0ef41Sopenharmony_ci // used for named captures. 1421cb0ef41Sopenharmony_ci const ZoneVector<base::uc16>* capture_name() const { return capture_name_; } 1431cb0ef41Sopenharmony_ci 1441cb0ef41Sopenharmony_ci bool IsNamedCapture() const { return capture_name_ != nullptr; } 1451cb0ef41Sopenharmony_ci 1461cb0ef41Sopenharmony_ci // Check whether the parser is inside a capture group with the given index. 1471cb0ef41Sopenharmony_ci bool IsInsideCaptureGroup(int index) const { 1481cb0ef41Sopenharmony_ci for (const RegExpParserState* s = this; s != nullptr; 1491cb0ef41Sopenharmony_ci s = s->previous_state()) { 1501cb0ef41Sopenharmony_ci if (s->group_type() != CAPTURE) continue; 1511cb0ef41Sopenharmony_ci // Return true if we found the matching capture index. 1521cb0ef41Sopenharmony_ci if (index == s->capture_index()) return true; 1531cb0ef41Sopenharmony_ci // Abort if index is larger than what has been parsed up till this state. 1541cb0ef41Sopenharmony_ci if (index > s->capture_index()) return false; 1551cb0ef41Sopenharmony_ci } 1561cb0ef41Sopenharmony_ci return false; 1571cb0ef41Sopenharmony_ci } 1581cb0ef41Sopenharmony_ci 1591cb0ef41Sopenharmony_ci // Check whether the parser is inside a capture group with the given name. 1601cb0ef41Sopenharmony_ci bool IsInsideCaptureGroup(const ZoneVector<base::uc16>* name) const { 1611cb0ef41Sopenharmony_ci DCHECK_NOT_NULL(name); 1621cb0ef41Sopenharmony_ci for (const RegExpParserState* s = this; s != nullptr; 1631cb0ef41Sopenharmony_ci s = s->previous_state()) { 1641cb0ef41Sopenharmony_ci if (s->capture_name() == nullptr) continue; 1651cb0ef41Sopenharmony_ci if (*s->capture_name() == *name) return true; 1661cb0ef41Sopenharmony_ci } 1671cb0ef41Sopenharmony_ci return false; 1681cb0ef41Sopenharmony_ci } 1691cb0ef41Sopenharmony_ci 1701cb0ef41Sopenharmony_ci private: 1711cb0ef41Sopenharmony_ci // Linked list implementation of stack of states. 1721cb0ef41Sopenharmony_ci RegExpParserState* const previous_state_; 1731cb0ef41Sopenharmony_ci // Builder for the stored disjunction. 1741cb0ef41Sopenharmony_ci RegExpBuilder builder_; 1751cb0ef41Sopenharmony_ci // Stored disjunction type (capture, look-ahead or grouping), if any. 1761cb0ef41Sopenharmony_ci const SubexpressionType group_type_; 1771cb0ef41Sopenharmony_ci // Stored read direction. 1781cb0ef41Sopenharmony_ci const RegExpLookaround::Type lookaround_type_; 1791cb0ef41Sopenharmony_ci // Stored disjunction's capture index (if any). 1801cb0ef41Sopenharmony_ci const int disjunction_capture_index_; 1811cb0ef41Sopenharmony_ci // Stored capture name (if any). 1821cb0ef41Sopenharmony_ci const ZoneVector<base::uc16>* const capture_name_; 1831cb0ef41Sopenharmony_ci}; 1841cb0ef41Sopenharmony_ci 1851cb0ef41Sopenharmony_citemplate <class CharT> 1861cb0ef41Sopenharmony_ciclass RegExpParserImpl final { 1871cb0ef41Sopenharmony_ci private: 1881cb0ef41Sopenharmony_ci RegExpParserImpl(const CharT* input, int input_length, RegExpFlags flags, 1891cb0ef41Sopenharmony_ci uintptr_t stack_limit, Zone* zone, 1901cb0ef41Sopenharmony_ci const DisallowGarbageCollection& no_gc); 1911cb0ef41Sopenharmony_ci 1921cb0ef41Sopenharmony_ci bool Parse(RegExpCompileData* result); 1931cb0ef41Sopenharmony_ci 1941cb0ef41Sopenharmony_ci RegExpTree* ParsePattern(); 1951cb0ef41Sopenharmony_ci RegExpTree* ParseDisjunction(); 1961cb0ef41Sopenharmony_ci RegExpTree* ParseGroup(); 1971cb0ef41Sopenharmony_ci 1981cb0ef41Sopenharmony_ci // Parses a {...,...} quantifier and stores the range in the given 1991cb0ef41Sopenharmony_ci // out parameters. 2001cb0ef41Sopenharmony_ci bool ParseIntervalQuantifier(int* min_out, int* max_out); 2011cb0ef41Sopenharmony_ci 2021cb0ef41Sopenharmony_ci // Checks whether the following is a length-digit hexadecimal number, 2031cb0ef41Sopenharmony_ci // and sets the value if it is. 2041cb0ef41Sopenharmony_ci bool ParseHexEscape(int length, base::uc32* value); 2051cb0ef41Sopenharmony_ci bool ParseUnicodeEscape(base::uc32* value); 2061cb0ef41Sopenharmony_ci bool ParseUnlimitedLengthHexNumber(int max_value, base::uc32* value); 2071cb0ef41Sopenharmony_ci 2081cb0ef41Sopenharmony_ci bool ParsePropertyClassName(ZoneVector<char>* name_1, 2091cb0ef41Sopenharmony_ci ZoneVector<char>* name_2); 2101cb0ef41Sopenharmony_ci bool AddPropertyClassRange(ZoneList<CharacterRange>* add_to, bool negate, 2111cb0ef41Sopenharmony_ci const ZoneVector<char>& name_1, 2121cb0ef41Sopenharmony_ci const ZoneVector<char>& name_2); 2131cb0ef41Sopenharmony_ci 2141cb0ef41Sopenharmony_ci RegExpTree* ParseCharacterClass(const RegExpBuilder* state); 2151cb0ef41Sopenharmony_ci 2161cb0ef41Sopenharmony_ci base::uc32 ParseOctalLiteral(); 2171cb0ef41Sopenharmony_ci 2181cb0ef41Sopenharmony_ci // Tries to parse the input as a back reference. If successful it 2191cb0ef41Sopenharmony_ci // stores the result in the output parameter and returns true. If 2201cb0ef41Sopenharmony_ci // it fails it will push back the characters read so the same characters 2211cb0ef41Sopenharmony_ci // can be reparsed. 2221cb0ef41Sopenharmony_ci bool ParseBackReferenceIndex(int* index_out); 2231cb0ef41Sopenharmony_ci 2241cb0ef41Sopenharmony_ci // Parse inside a class. Either add escaped class to the range, or return 2251cb0ef41Sopenharmony_ci // false and pass parsed single character through |char_out|. 2261cb0ef41Sopenharmony_ci void ParseClassEscape(ZoneList<CharacterRange>* ranges, Zone* zone, 2271cb0ef41Sopenharmony_ci bool add_unicode_case_equivalents, base::uc32* char_out, 2281cb0ef41Sopenharmony_ci bool* is_class_escape); 2291cb0ef41Sopenharmony_ci // Returns true iff parsing was successful. 2301cb0ef41Sopenharmony_ci bool TryParseCharacterClassEscape(base::uc32 next, 2311cb0ef41Sopenharmony_ci InClassEscapeState in_class_escape_state, 2321cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* ranges, 2331cb0ef41Sopenharmony_ci Zone* zone, 2341cb0ef41Sopenharmony_ci bool add_unicode_case_equivalents); 2351cb0ef41Sopenharmony_ci // Parses and returns a single escaped character. 2361cb0ef41Sopenharmony_ci base::uc32 ParseCharacterEscape(InClassEscapeState in_class_escape_state, 2371cb0ef41Sopenharmony_ci bool* is_escaped_unicode_character); 2381cb0ef41Sopenharmony_ci 2391cb0ef41Sopenharmony_ci RegExpTree* ReportError(RegExpError error); 2401cb0ef41Sopenharmony_ci void Advance(); 2411cb0ef41Sopenharmony_ci void Advance(int dist); 2421cb0ef41Sopenharmony_ci void RewindByOneCodepoint(); // Rewinds to before the previous Advance(). 2431cb0ef41Sopenharmony_ci void Reset(int pos); 2441cb0ef41Sopenharmony_ci 2451cb0ef41Sopenharmony_ci // Reports whether the pattern might be used as a literal search string. 2461cb0ef41Sopenharmony_ci // Only use if the result of the parse is a single atom node. 2471cb0ef41Sopenharmony_ci bool simple() const { return simple_; } 2481cb0ef41Sopenharmony_ci bool contains_anchor() const { return contains_anchor_; } 2491cb0ef41Sopenharmony_ci void set_contains_anchor() { contains_anchor_ = true; } 2501cb0ef41Sopenharmony_ci int captures_started() const { return captures_started_; } 2511cb0ef41Sopenharmony_ci int position() const { return next_pos_ - 1; } 2521cb0ef41Sopenharmony_ci bool failed() const { return failed_; } 2531cb0ef41Sopenharmony_ci bool unicode() const { return IsUnicode(top_level_flags_) || force_unicode_; } 2541cb0ef41Sopenharmony_ci 2551cb0ef41Sopenharmony_ci static bool IsSyntaxCharacterOrSlash(base::uc32 c); 2561cb0ef41Sopenharmony_ci 2571cb0ef41Sopenharmony_ci static const base::uc32 kEndMarker = (1 << 21); 2581cb0ef41Sopenharmony_ci 2591cb0ef41Sopenharmony_ci private: 2601cb0ef41Sopenharmony_ci // Return the 1-indexed RegExpCapture object, allocate if necessary. 2611cb0ef41Sopenharmony_ci RegExpCapture* GetCapture(int index); 2621cb0ef41Sopenharmony_ci 2631cb0ef41Sopenharmony_ci // Creates a new named capture at the specified index. Must be called exactly 2641cb0ef41Sopenharmony_ci // once for each named capture. Fails if a capture with the same name is 2651cb0ef41Sopenharmony_ci // encountered. 2661cb0ef41Sopenharmony_ci bool CreateNamedCaptureAtIndex(const ZoneVector<base::uc16>* name, int index); 2671cb0ef41Sopenharmony_ci 2681cb0ef41Sopenharmony_ci // Parses the name of a capture group (?<name>pattern). The name must adhere 2691cb0ef41Sopenharmony_ci // to IdentifierName in the ECMAScript standard. 2701cb0ef41Sopenharmony_ci const ZoneVector<base::uc16>* ParseCaptureGroupName(); 2711cb0ef41Sopenharmony_ci 2721cb0ef41Sopenharmony_ci bool ParseNamedBackReference(RegExpBuilder* builder, 2731cb0ef41Sopenharmony_ci RegExpParserState* state); 2741cb0ef41Sopenharmony_ci RegExpParserState* ParseOpenParenthesis(RegExpParserState* state); 2751cb0ef41Sopenharmony_ci 2761cb0ef41Sopenharmony_ci // After the initial parsing pass, patch corresponding RegExpCapture objects 2771cb0ef41Sopenharmony_ci // into all RegExpBackReferences. This is done after initial parsing in order 2781cb0ef41Sopenharmony_ci // to avoid complicating cases in which references comes before the capture. 2791cb0ef41Sopenharmony_ci void PatchNamedBackReferences(); 2801cb0ef41Sopenharmony_ci 2811cb0ef41Sopenharmony_ci ZoneVector<RegExpCapture*>* GetNamedCaptures() const; 2821cb0ef41Sopenharmony_ci 2831cb0ef41Sopenharmony_ci // Returns true iff the pattern contains named captures. May call 2841cb0ef41Sopenharmony_ci // ScanForCaptures to look ahead at the remaining pattern. 2851cb0ef41Sopenharmony_ci bool HasNamedCaptures(InClassEscapeState in_class_escape_state); 2861cb0ef41Sopenharmony_ci 2871cb0ef41Sopenharmony_ci Zone* zone() const { return zone_; } 2881cb0ef41Sopenharmony_ci 2891cb0ef41Sopenharmony_ci base::uc32 current() const { return current_; } 2901cb0ef41Sopenharmony_ci bool has_more() const { return has_more_; } 2911cb0ef41Sopenharmony_ci bool has_next() const { return next_pos_ < input_length(); } 2921cb0ef41Sopenharmony_ci base::uc32 Next(); 2931cb0ef41Sopenharmony_ci template <bool update_position> 2941cb0ef41Sopenharmony_ci base::uc32 ReadNext(); 2951cb0ef41Sopenharmony_ci CharT InputAt(int index) const { 2961cb0ef41Sopenharmony_ci DCHECK(0 <= index && index < input_length()); 2971cb0ef41Sopenharmony_ci return input_[index]; 2981cb0ef41Sopenharmony_ci } 2991cb0ef41Sopenharmony_ci int input_length() const { return input_length_; } 3001cb0ef41Sopenharmony_ci void ScanForCaptures(InClassEscapeState in_class_escape_state); 3011cb0ef41Sopenharmony_ci 3021cb0ef41Sopenharmony_ci struct RegExpCaptureNameLess { 3031cb0ef41Sopenharmony_ci bool operator()(const RegExpCapture* lhs, const RegExpCapture* rhs) const { 3041cb0ef41Sopenharmony_ci DCHECK_NOT_NULL(lhs); 3051cb0ef41Sopenharmony_ci DCHECK_NOT_NULL(rhs); 3061cb0ef41Sopenharmony_ci return *lhs->name() < *rhs->name(); 3071cb0ef41Sopenharmony_ci } 3081cb0ef41Sopenharmony_ci }; 3091cb0ef41Sopenharmony_ci 3101cb0ef41Sopenharmony_ci class ForceUnicodeScope final { 3111cb0ef41Sopenharmony_ci public: 3121cb0ef41Sopenharmony_ci explicit ForceUnicodeScope(RegExpParserImpl<CharT>* parser) 3131cb0ef41Sopenharmony_ci : parser_(parser) { 3141cb0ef41Sopenharmony_ci DCHECK(!parser_->force_unicode_); 3151cb0ef41Sopenharmony_ci parser_->force_unicode_ = true; 3161cb0ef41Sopenharmony_ci } 3171cb0ef41Sopenharmony_ci ~ForceUnicodeScope() { 3181cb0ef41Sopenharmony_ci DCHECK(parser_->force_unicode_); 3191cb0ef41Sopenharmony_ci parser_->force_unicode_ = false; 3201cb0ef41Sopenharmony_ci } 3211cb0ef41Sopenharmony_ci 3221cb0ef41Sopenharmony_ci private: 3231cb0ef41Sopenharmony_ci RegExpParserImpl<CharT>* const parser_; 3241cb0ef41Sopenharmony_ci }; 3251cb0ef41Sopenharmony_ci 3261cb0ef41Sopenharmony_ci const DisallowGarbageCollection no_gc_; 3271cb0ef41Sopenharmony_ci Zone* const zone_; 3281cb0ef41Sopenharmony_ci RegExpError error_ = RegExpError::kNone; 3291cb0ef41Sopenharmony_ci int error_pos_ = 0; 3301cb0ef41Sopenharmony_ci ZoneList<RegExpCapture*>* captures_; 3311cb0ef41Sopenharmony_ci ZoneSet<RegExpCapture*, RegExpCaptureNameLess>* named_captures_; 3321cb0ef41Sopenharmony_ci ZoneList<RegExpBackReference*>* named_back_references_; 3331cb0ef41Sopenharmony_ci const CharT* const input_; 3341cb0ef41Sopenharmony_ci const int input_length_; 3351cb0ef41Sopenharmony_ci base::uc32 current_; 3361cb0ef41Sopenharmony_ci const RegExpFlags top_level_flags_; 3371cb0ef41Sopenharmony_ci bool force_unicode_ = false; // Force parser to act as if unicode were set. 3381cb0ef41Sopenharmony_ci int next_pos_; 3391cb0ef41Sopenharmony_ci int captures_started_; 3401cb0ef41Sopenharmony_ci int capture_count_; // Only valid after we have scanned for captures. 3411cb0ef41Sopenharmony_ci bool has_more_; 3421cb0ef41Sopenharmony_ci bool simple_; 3431cb0ef41Sopenharmony_ci bool contains_anchor_; 3441cb0ef41Sopenharmony_ci bool is_scanned_for_captures_; 3451cb0ef41Sopenharmony_ci bool has_named_captures_; // Only valid after we have scanned for captures. 3461cb0ef41Sopenharmony_ci bool failed_; 3471cb0ef41Sopenharmony_ci const uintptr_t stack_limit_; 3481cb0ef41Sopenharmony_ci 3491cb0ef41Sopenharmony_ci friend bool RegExpParser::ParseRegExpFromHeapString(Isolate*, Zone*, 3501cb0ef41Sopenharmony_ci Handle<String>, 3511cb0ef41Sopenharmony_ci RegExpFlags, 3521cb0ef41Sopenharmony_ci RegExpCompileData*); 3531cb0ef41Sopenharmony_ci friend bool RegExpParser::VerifyRegExpSyntax<CharT>( 3541cb0ef41Sopenharmony_ci Zone*, uintptr_t, const CharT*, int, RegExpFlags, RegExpCompileData*, 3551cb0ef41Sopenharmony_ci const DisallowGarbageCollection&); 3561cb0ef41Sopenharmony_ci}; 3571cb0ef41Sopenharmony_ci 3581cb0ef41Sopenharmony_citemplate <class CharT> 3591cb0ef41Sopenharmony_ciRegExpParserImpl<CharT>::RegExpParserImpl( 3601cb0ef41Sopenharmony_ci const CharT* input, int input_length, RegExpFlags flags, 3611cb0ef41Sopenharmony_ci uintptr_t stack_limit, Zone* zone, const DisallowGarbageCollection& no_gc) 3621cb0ef41Sopenharmony_ci : zone_(zone), 3631cb0ef41Sopenharmony_ci captures_(nullptr), 3641cb0ef41Sopenharmony_ci named_captures_(nullptr), 3651cb0ef41Sopenharmony_ci named_back_references_(nullptr), 3661cb0ef41Sopenharmony_ci input_(input), 3671cb0ef41Sopenharmony_ci input_length_(input_length), 3681cb0ef41Sopenharmony_ci current_(kEndMarker), 3691cb0ef41Sopenharmony_ci top_level_flags_(flags), 3701cb0ef41Sopenharmony_ci next_pos_(0), 3711cb0ef41Sopenharmony_ci captures_started_(0), 3721cb0ef41Sopenharmony_ci capture_count_(0), 3731cb0ef41Sopenharmony_ci has_more_(true), 3741cb0ef41Sopenharmony_ci simple_(false), 3751cb0ef41Sopenharmony_ci contains_anchor_(false), 3761cb0ef41Sopenharmony_ci is_scanned_for_captures_(false), 3771cb0ef41Sopenharmony_ci has_named_captures_(false), 3781cb0ef41Sopenharmony_ci failed_(false), 3791cb0ef41Sopenharmony_ci stack_limit_(stack_limit) { 3801cb0ef41Sopenharmony_ci Advance(); 3811cb0ef41Sopenharmony_ci} 3821cb0ef41Sopenharmony_ci 3831cb0ef41Sopenharmony_citemplate <> 3841cb0ef41Sopenharmony_citemplate <bool update_position> 3851cb0ef41Sopenharmony_ciinline base::uc32 RegExpParserImpl<uint8_t>::ReadNext() { 3861cb0ef41Sopenharmony_ci int position = next_pos_; 3871cb0ef41Sopenharmony_ci base::uc16 c0 = InputAt(position); 3881cb0ef41Sopenharmony_ci position++; 3891cb0ef41Sopenharmony_ci DCHECK(!unibrow::Utf16::IsLeadSurrogate(c0)); 3901cb0ef41Sopenharmony_ci if (update_position) next_pos_ = position; 3911cb0ef41Sopenharmony_ci return c0; 3921cb0ef41Sopenharmony_ci} 3931cb0ef41Sopenharmony_ci 3941cb0ef41Sopenharmony_citemplate <> 3951cb0ef41Sopenharmony_citemplate <bool update_position> 3961cb0ef41Sopenharmony_ciinline base::uc32 RegExpParserImpl<base::uc16>::ReadNext() { 3971cb0ef41Sopenharmony_ci int position = next_pos_; 3981cb0ef41Sopenharmony_ci base::uc16 c0 = InputAt(position); 3991cb0ef41Sopenharmony_ci base::uc32 result = c0; 4001cb0ef41Sopenharmony_ci position++; 4011cb0ef41Sopenharmony_ci // Read the whole surrogate pair in case of unicode flag, if possible. 4021cb0ef41Sopenharmony_ci if (unicode() && position < input_length() && 4031cb0ef41Sopenharmony_ci unibrow::Utf16::IsLeadSurrogate(c0)) { 4041cb0ef41Sopenharmony_ci base::uc16 c1 = InputAt(position); 4051cb0ef41Sopenharmony_ci if (unibrow::Utf16::IsTrailSurrogate(c1)) { 4061cb0ef41Sopenharmony_ci result = unibrow::Utf16::CombineSurrogatePair(c0, c1); 4071cb0ef41Sopenharmony_ci position++; 4081cb0ef41Sopenharmony_ci } 4091cb0ef41Sopenharmony_ci } 4101cb0ef41Sopenharmony_ci if (update_position) next_pos_ = position; 4111cb0ef41Sopenharmony_ci return result; 4121cb0ef41Sopenharmony_ci} 4131cb0ef41Sopenharmony_ci 4141cb0ef41Sopenharmony_citemplate <class CharT> 4151cb0ef41Sopenharmony_cibase::uc32 RegExpParserImpl<CharT>::Next() { 4161cb0ef41Sopenharmony_ci if (has_next()) { 4171cb0ef41Sopenharmony_ci return ReadNext<false>(); 4181cb0ef41Sopenharmony_ci } else { 4191cb0ef41Sopenharmony_ci return kEndMarker; 4201cb0ef41Sopenharmony_ci } 4211cb0ef41Sopenharmony_ci} 4221cb0ef41Sopenharmony_ci 4231cb0ef41Sopenharmony_citemplate <class CharT> 4241cb0ef41Sopenharmony_civoid RegExpParserImpl<CharT>::Advance() { 4251cb0ef41Sopenharmony_ci if (has_next()) { 4261cb0ef41Sopenharmony_ci if (GetCurrentStackPosition() < stack_limit_) { 4271cb0ef41Sopenharmony_ci if (FLAG_correctness_fuzzer_suppressions) { 4281cb0ef41Sopenharmony_ci FATAL("Aborting on stack overflow"); 4291cb0ef41Sopenharmony_ci } 4301cb0ef41Sopenharmony_ci ReportError(RegExpError::kStackOverflow); 4311cb0ef41Sopenharmony_ci } else { 4321cb0ef41Sopenharmony_ci current_ = ReadNext<true>(); 4331cb0ef41Sopenharmony_ci } 4341cb0ef41Sopenharmony_ci } else { 4351cb0ef41Sopenharmony_ci current_ = kEndMarker; 4361cb0ef41Sopenharmony_ci // Advance so that position() points to 1-after-the-last-character. This is 4371cb0ef41Sopenharmony_ci // important so that Reset() to this position works correctly. 4381cb0ef41Sopenharmony_ci next_pos_ = input_length() + 1; 4391cb0ef41Sopenharmony_ci has_more_ = false; 4401cb0ef41Sopenharmony_ci } 4411cb0ef41Sopenharmony_ci} 4421cb0ef41Sopenharmony_ci 4431cb0ef41Sopenharmony_citemplate <class CharT> 4441cb0ef41Sopenharmony_civoid RegExpParserImpl<CharT>::RewindByOneCodepoint() { 4451cb0ef41Sopenharmony_ci if (current() == kEndMarker) return; 4461cb0ef41Sopenharmony_ci // Rewinds by one code point, i.e.: two code units if `current` is outside 4471cb0ef41Sopenharmony_ci // the basic multilingual plane (= composed of a lead and trail surrogate), 4481cb0ef41Sopenharmony_ci // or one code unit otherwise. 4491cb0ef41Sopenharmony_ci const int rewind_by = 4501cb0ef41Sopenharmony_ci current() > unibrow::Utf16::kMaxNonSurrogateCharCode ? -2 : -1; 4511cb0ef41Sopenharmony_ci Advance(rewind_by); // Undo the last Advance. 4521cb0ef41Sopenharmony_ci} 4531cb0ef41Sopenharmony_ci 4541cb0ef41Sopenharmony_citemplate <class CharT> 4551cb0ef41Sopenharmony_civoid RegExpParserImpl<CharT>::Reset(int pos) { 4561cb0ef41Sopenharmony_ci next_pos_ = pos; 4571cb0ef41Sopenharmony_ci has_more_ = (pos < input_length()); 4581cb0ef41Sopenharmony_ci Advance(); 4591cb0ef41Sopenharmony_ci} 4601cb0ef41Sopenharmony_ci 4611cb0ef41Sopenharmony_citemplate <class CharT> 4621cb0ef41Sopenharmony_civoid RegExpParserImpl<CharT>::Advance(int dist) { 4631cb0ef41Sopenharmony_ci next_pos_ += dist - 1; 4641cb0ef41Sopenharmony_ci Advance(); 4651cb0ef41Sopenharmony_ci} 4661cb0ef41Sopenharmony_ci 4671cb0ef41Sopenharmony_citemplate <class CharT> 4681cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::IsSyntaxCharacterOrSlash(base::uc32 c) { 4691cb0ef41Sopenharmony_ci switch (c) { 4701cb0ef41Sopenharmony_ci case '^': 4711cb0ef41Sopenharmony_ci case '$': 4721cb0ef41Sopenharmony_ci case '\\': 4731cb0ef41Sopenharmony_ci case '.': 4741cb0ef41Sopenharmony_ci case '*': 4751cb0ef41Sopenharmony_ci case '+': 4761cb0ef41Sopenharmony_ci case '?': 4771cb0ef41Sopenharmony_ci case '(': 4781cb0ef41Sopenharmony_ci case ')': 4791cb0ef41Sopenharmony_ci case '[': 4801cb0ef41Sopenharmony_ci case ']': 4811cb0ef41Sopenharmony_ci case '{': 4821cb0ef41Sopenharmony_ci case '}': 4831cb0ef41Sopenharmony_ci case '|': 4841cb0ef41Sopenharmony_ci case '/': 4851cb0ef41Sopenharmony_ci return true; 4861cb0ef41Sopenharmony_ci default: 4871cb0ef41Sopenharmony_ci break; 4881cb0ef41Sopenharmony_ci } 4891cb0ef41Sopenharmony_ci return false; 4901cb0ef41Sopenharmony_ci} 4911cb0ef41Sopenharmony_ci 4921cb0ef41Sopenharmony_citemplate <class CharT> 4931cb0ef41Sopenharmony_ciRegExpTree* RegExpParserImpl<CharT>::ReportError(RegExpError error) { 4941cb0ef41Sopenharmony_ci if (failed_) return nullptr; // Do not overwrite any existing error. 4951cb0ef41Sopenharmony_ci failed_ = true; 4961cb0ef41Sopenharmony_ci error_ = error; 4971cb0ef41Sopenharmony_ci error_pos_ = position(); 4981cb0ef41Sopenharmony_ci // Zip to the end to make sure no more input is read. 4991cb0ef41Sopenharmony_ci current_ = kEndMarker; 5001cb0ef41Sopenharmony_ci next_pos_ = input_length(); 5011cb0ef41Sopenharmony_ci return nullptr; 5021cb0ef41Sopenharmony_ci} 5031cb0ef41Sopenharmony_ci 5041cb0ef41Sopenharmony_ci#define CHECK_FAILED /**/); \ 5051cb0ef41Sopenharmony_ci if (failed_) return nullptr; \ 5061cb0ef41Sopenharmony_ci ((void)0 5071cb0ef41Sopenharmony_ci 5081cb0ef41Sopenharmony_ci// Pattern :: 5091cb0ef41Sopenharmony_ci// Disjunction 5101cb0ef41Sopenharmony_citemplate <class CharT> 5111cb0ef41Sopenharmony_ciRegExpTree* RegExpParserImpl<CharT>::ParsePattern() { 5121cb0ef41Sopenharmony_ci RegExpTree* result = ParseDisjunction(CHECK_FAILED); 5131cb0ef41Sopenharmony_ci PatchNamedBackReferences(CHECK_FAILED); 5141cb0ef41Sopenharmony_ci DCHECK(!has_more()); 5151cb0ef41Sopenharmony_ci // If the result of parsing is a literal string atom, and it has the 5161cb0ef41Sopenharmony_ci // same length as the input, then the atom is identical to the input. 5171cb0ef41Sopenharmony_ci if (result->IsAtom() && result->AsAtom()->length() == input_length()) { 5181cb0ef41Sopenharmony_ci simple_ = true; 5191cb0ef41Sopenharmony_ci } 5201cb0ef41Sopenharmony_ci return result; 5211cb0ef41Sopenharmony_ci} 5221cb0ef41Sopenharmony_ci 5231cb0ef41Sopenharmony_ci// Disjunction :: 5241cb0ef41Sopenharmony_ci// Alternative 5251cb0ef41Sopenharmony_ci// Alternative | Disjunction 5261cb0ef41Sopenharmony_ci// Alternative :: 5271cb0ef41Sopenharmony_ci// [empty] 5281cb0ef41Sopenharmony_ci// Term Alternative 5291cb0ef41Sopenharmony_ci// Term :: 5301cb0ef41Sopenharmony_ci// Assertion 5311cb0ef41Sopenharmony_ci// Atom 5321cb0ef41Sopenharmony_ci// Atom Quantifier 5331cb0ef41Sopenharmony_citemplate <class CharT> 5341cb0ef41Sopenharmony_ciRegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() { 5351cb0ef41Sopenharmony_ci // Used to store current state while parsing subexpressions. 5361cb0ef41Sopenharmony_ci RegExpParserState initial_state(nullptr, INITIAL, RegExpLookaround::LOOKAHEAD, 5371cb0ef41Sopenharmony_ci 0, nullptr, top_level_flags_, zone()); 5381cb0ef41Sopenharmony_ci RegExpParserState* state = &initial_state; 5391cb0ef41Sopenharmony_ci // Cache the builder in a local variable for quick access. 5401cb0ef41Sopenharmony_ci RegExpBuilder* builder = initial_state.builder(); 5411cb0ef41Sopenharmony_ci while (true) { 5421cb0ef41Sopenharmony_ci switch (current()) { 5431cb0ef41Sopenharmony_ci case kEndMarker: 5441cb0ef41Sopenharmony_ci if (failed()) return nullptr; // E.g. the initial Advance failed. 5451cb0ef41Sopenharmony_ci if (state->IsSubexpression()) { 5461cb0ef41Sopenharmony_ci // Inside a parenthesized group when hitting end of input. 5471cb0ef41Sopenharmony_ci return ReportError(RegExpError::kUnterminatedGroup); 5481cb0ef41Sopenharmony_ci } 5491cb0ef41Sopenharmony_ci DCHECK_EQ(INITIAL, state->group_type()); 5501cb0ef41Sopenharmony_ci // Parsing completed successfully. 5511cb0ef41Sopenharmony_ci return builder->ToRegExp(); 5521cb0ef41Sopenharmony_ci case ')': { 5531cb0ef41Sopenharmony_ci if (!state->IsSubexpression()) { 5541cb0ef41Sopenharmony_ci return ReportError(RegExpError::kUnmatchedParen); 5551cb0ef41Sopenharmony_ci } 5561cb0ef41Sopenharmony_ci DCHECK_NE(INITIAL, state->group_type()); 5571cb0ef41Sopenharmony_ci 5581cb0ef41Sopenharmony_ci Advance(); 5591cb0ef41Sopenharmony_ci // End disjunction parsing and convert builder content to new single 5601cb0ef41Sopenharmony_ci // regexp atom. 5611cb0ef41Sopenharmony_ci RegExpTree* body = builder->ToRegExp(); 5621cb0ef41Sopenharmony_ci 5631cb0ef41Sopenharmony_ci int end_capture_index = captures_started(); 5641cb0ef41Sopenharmony_ci 5651cb0ef41Sopenharmony_ci int capture_index = state->capture_index(); 5661cb0ef41Sopenharmony_ci SubexpressionType group_type = state->group_type(); 5671cb0ef41Sopenharmony_ci 5681cb0ef41Sopenharmony_ci // Build result of subexpression. 5691cb0ef41Sopenharmony_ci if (group_type == CAPTURE) { 5701cb0ef41Sopenharmony_ci if (state->IsNamedCapture()) { 5711cb0ef41Sopenharmony_ci CreateNamedCaptureAtIndex(state->capture_name(), 5721cb0ef41Sopenharmony_ci capture_index CHECK_FAILED); 5731cb0ef41Sopenharmony_ci } 5741cb0ef41Sopenharmony_ci RegExpCapture* capture = GetCapture(capture_index); 5751cb0ef41Sopenharmony_ci capture->set_body(body); 5761cb0ef41Sopenharmony_ci body = capture; 5771cb0ef41Sopenharmony_ci } else if (group_type == GROUPING) { 5781cb0ef41Sopenharmony_ci body = zone()->template New<RegExpGroup>(body); 5791cb0ef41Sopenharmony_ci } else { 5801cb0ef41Sopenharmony_ci DCHECK(group_type == POSITIVE_LOOKAROUND || 5811cb0ef41Sopenharmony_ci group_type == NEGATIVE_LOOKAROUND); 5821cb0ef41Sopenharmony_ci bool is_positive = (group_type == POSITIVE_LOOKAROUND); 5831cb0ef41Sopenharmony_ci body = zone()->template New<RegExpLookaround>( 5841cb0ef41Sopenharmony_ci body, is_positive, end_capture_index - capture_index, 5851cb0ef41Sopenharmony_ci capture_index, state->lookaround_type()); 5861cb0ef41Sopenharmony_ci } 5871cb0ef41Sopenharmony_ci 5881cb0ef41Sopenharmony_ci // Restore previous state. 5891cb0ef41Sopenharmony_ci state = state->previous_state(); 5901cb0ef41Sopenharmony_ci builder = state->builder(); 5911cb0ef41Sopenharmony_ci 5921cb0ef41Sopenharmony_ci builder->AddAtom(body); 5931cb0ef41Sopenharmony_ci // For compatibility with JSC and ES3, we allow quantifiers after 5941cb0ef41Sopenharmony_ci // lookaheads, and break in all cases. 5951cb0ef41Sopenharmony_ci break; 5961cb0ef41Sopenharmony_ci } 5971cb0ef41Sopenharmony_ci case '|': { 5981cb0ef41Sopenharmony_ci Advance(); 5991cb0ef41Sopenharmony_ci builder->NewAlternative(); 6001cb0ef41Sopenharmony_ci continue; 6011cb0ef41Sopenharmony_ci } 6021cb0ef41Sopenharmony_ci case '*': 6031cb0ef41Sopenharmony_ci case '+': 6041cb0ef41Sopenharmony_ci case '?': 6051cb0ef41Sopenharmony_ci return ReportError(RegExpError::kNothingToRepeat); 6061cb0ef41Sopenharmony_ci case '^': { 6071cb0ef41Sopenharmony_ci Advance(); 6081cb0ef41Sopenharmony_ci builder->AddAssertion(zone()->template New<RegExpAssertion>( 6091cb0ef41Sopenharmony_ci builder->multiline() ? RegExpAssertion::Type::START_OF_LINE 6101cb0ef41Sopenharmony_ci : RegExpAssertion::Type::START_OF_INPUT)); 6111cb0ef41Sopenharmony_ci set_contains_anchor(); 6121cb0ef41Sopenharmony_ci continue; 6131cb0ef41Sopenharmony_ci } 6141cb0ef41Sopenharmony_ci case '$': { 6151cb0ef41Sopenharmony_ci Advance(); 6161cb0ef41Sopenharmony_ci RegExpAssertion::Type assertion_type = 6171cb0ef41Sopenharmony_ci builder->multiline() ? RegExpAssertion::Type::END_OF_LINE 6181cb0ef41Sopenharmony_ci : RegExpAssertion::Type::END_OF_INPUT; 6191cb0ef41Sopenharmony_ci builder->AddAssertion( 6201cb0ef41Sopenharmony_ci zone()->template New<RegExpAssertion>(assertion_type)); 6211cb0ef41Sopenharmony_ci continue; 6221cb0ef41Sopenharmony_ci } 6231cb0ef41Sopenharmony_ci case '.': { 6241cb0ef41Sopenharmony_ci Advance(); 6251cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* ranges = 6261cb0ef41Sopenharmony_ci zone()->template New<ZoneList<CharacterRange>>(2, zone()); 6271cb0ef41Sopenharmony_ci 6281cb0ef41Sopenharmony_ci if (builder->dotall()) { 6291cb0ef41Sopenharmony_ci // Everything. 6301cb0ef41Sopenharmony_ci CharacterRange::AddClassEscape(StandardCharacterSet::kEverything, 6311cb0ef41Sopenharmony_ci ranges, false, zone()); 6321cb0ef41Sopenharmony_ci } else { 6331cb0ef41Sopenharmony_ci // Everything except \x0A, \x0D, \u2028 and \u2029. 6341cb0ef41Sopenharmony_ci CharacterRange::AddClassEscape( 6351cb0ef41Sopenharmony_ci StandardCharacterSet::kNotLineTerminator, ranges, false, zone()); 6361cb0ef41Sopenharmony_ci } 6371cb0ef41Sopenharmony_ci 6381cb0ef41Sopenharmony_ci RegExpCharacterClass* cc = 6391cb0ef41Sopenharmony_ci zone()->template New<RegExpCharacterClass>(zone(), ranges); 6401cb0ef41Sopenharmony_ci builder->AddCharacterClass(cc); 6411cb0ef41Sopenharmony_ci break; 6421cb0ef41Sopenharmony_ci } 6431cb0ef41Sopenharmony_ci case '(': { 6441cb0ef41Sopenharmony_ci state = ParseOpenParenthesis(state CHECK_FAILED); 6451cb0ef41Sopenharmony_ci builder = state->builder(); 6461cb0ef41Sopenharmony_ci continue; 6471cb0ef41Sopenharmony_ci } 6481cb0ef41Sopenharmony_ci case '[': { 6491cb0ef41Sopenharmony_ci RegExpTree* cc = ParseCharacterClass(builder CHECK_FAILED); 6501cb0ef41Sopenharmony_ci builder->AddCharacterClass(cc->AsCharacterClass()); 6511cb0ef41Sopenharmony_ci break; 6521cb0ef41Sopenharmony_ci } 6531cb0ef41Sopenharmony_ci // Atom :: 6541cb0ef41Sopenharmony_ci // \ AtomEscape 6551cb0ef41Sopenharmony_ci case '\\': 6561cb0ef41Sopenharmony_ci switch (Next()) { 6571cb0ef41Sopenharmony_ci case kEndMarker: 6581cb0ef41Sopenharmony_ci return ReportError(RegExpError::kEscapeAtEndOfPattern); 6591cb0ef41Sopenharmony_ci // AtomEscape :: 6601cb0ef41Sopenharmony_ci // [+UnicodeMode] DecimalEscape 6611cb0ef41Sopenharmony_ci // [~UnicodeMode] DecimalEscape but only if the CapturingGroupNumber 6621cb0ef41Sopenharmony_ci // of DecimalEscape is ≤ NcapturingParens 6631cb0ef41Sopenharmony_ci // CharacterEscape (some cases of this mixed in too) 6641cb0ef41Sopenharmony_ci // 6651cb0ef41Sopenharmony_ci // TODO(jgruber): It may make sense to disentangle all the different 6661cb0ef41Sopenharmony_ci // cases and make the structure mirror the spec, e.g. for AtomEscape: 6671cb0ef41Sopenharmony_ci // 6681cb0ef41Sopenharmony_ci // if (TryParseDecimalEscape(...)) return; 6691cb0ef41Sopenharmony_ci // if (TryParseCharacterClassEscape(...)) return; 6701cb0ef41Sopenharmony_ci // if (TryParseCharacterEscape(...)) return; 6711cb0ef41Sopenharmony_ci // if (TryParseGroupName(...)) return; 6721cb0ef41Sopenharmony_ci case '1': 6731cb0ef41Sopenharmony_ci case '2': 6741cb0ef41Sopenharmony_ci case '3': 6751cb0ef41Sopenharmony_ci case '4': 6761cb0ef41Sopenharmony_ci case '5': 6771cb0ef41Sopenharmony_ci case '6': 6781cb0ef41Sopenharmony_ci case '7': 6791cb0ef41Sopenharmony_ci case '8': 6801cb0ef41Sopenharmony_ci case '9': { 6811cb0ef41Sopenharmony_ci int index = 0; 6821cb0ef41Sopenharmony_ci const bool is_backref = 6831cb0ef41Sopenharmony_ci ParseBackReferenceIndex(&index CHECK_FAILED); 6841cb0ef41Sopenharmony_ci if (is_backref) { 6851cb0ef41Sopenharmony_ci if (state->IsInsideCaptureGroup(index)) { 6861cb0ef41Sopenharmony_ci // The back reference is inside the capture group it refers to. 6871cb0ef41Sopenharmony_ci // Nothing can possibly have been captured yet, so we use empty 6881cb0ef41Sopenharmony_ci // instead. This ensures that, when checking a back reference, 6891cb0ef41Sopenharmony_ci // the capture registers of the referenced capture are either 6901cb0ef41Sopenharmony_ci // both set or both cleared. 6911cb0ef41Sopenharmony_ci builder->AddEmpty(); 6921cb0ef41Sopenharmony_ci } else { 6931cb0ef41Sopenharmony_ci RegExpCapture* capture = GetCapture(index); 6941cb0ef41Sopenharmony_ci RegExpTree* atom = zone()->template New<RegExpBackReference>( 6951cb0ef41Sopenharmony_ci capture, builder->flags()); 6961cb0ef41Sopenharmony_ci builder->AddAtom(atom); 6971cb0ef41Sopenharmony_ci } 6981cb0ef41Sopenharmony_ci break; 6991cb0ef41Sopenharmony_ci } 7001cb0ef41Sopenharmony_ci // With /u, no identity escapes except for syntax characters 7011cb0ef41Sopenharmony_ci // are allowed. Otherwise, all identity escapes are allowed. 7021cb0ef41Sopenharmony_ci if (unicode()) { 7031cb0ef41Sopenharmony_ci return ReportError(RegExpError::kInvalidEscape); 7041cb0ef41Sopenharmony_ci } 7051cb0ef41Sopenharmony_ci base::uc32 first_digit = Next(); 7061cb0ef41Sopenharmony_ci if (first_digit == '8' || first_digit == '9') { 7071cb0ef41Sopenharmony_ci builder->AddCharacter(first_digit); 7081cb0ef41Sopenharmony_ci Advance(2); 7091cb0ef41Sopenharmony_ci break; 7101cb0ef41Sopenharmony_ci } 7111cb0ef41Sopenharmony_ci V8_FALLTHROUGH; 7121cb0ef41Sopenharmony_ci } 7131cb0ef41Sopenharmony_ci case '0': { 7141cb0ef41Sopenharmony_ci Advance(); 7151cb0ef41Sopenharmony_ci if (unicode() && Next() >= '0' && Next() <= '9') { 7161cb0ef41Sopenharmony_ci // With /u, decimal escape with leading 0 are not parsed as octal. 7171cb0ef41Sopenharmony_ci return ReportError(RegExpError::kInvalidDecimalEscape); 7181cb0ef41Sopenharmony_ci } 7191cb0ef41Sopenharmony_ci base::uc32 octal = ParseOctalLiteral(); 7201cb0ef41Sopenharmony_ci builder->AddCharacter(octal); 7211cb0ef41Sopenharmony_ci break; 7221cb0ef41Sopenharmony_ci } 7231cb0ef41Sopenharmony_ci case 'b': 7241cb0ef41Sopenharmony_ci Advance(2); 7251cb0ef41Sopenharmony_ci builder->AddAssertion(zone()->template New<RegExpAssertion>( 7261cb0ef41Sopenharmony_ci RegExpAssertion::Type::BOUNDARY)); 7271cb0ef41Sopenharmony_ci continue; 7281cb0ef41Sopenharmony_ci case 'B': 7291cb0ef41Sopenharmony_ci Advance(2); 7301cb0ef41Sopenharmony_ci builder->AddAssertion(zone()->template New<RegExpAssertion>( 7311cb0ef41Sopenharmony_ci RegExpAssertion::Type::NON_BOUNDARY)); 7321cb0ef41Sopenharmony_ci continue; 7331cb0ef41Sopenharmony_ci // AtomEscape :: 7341cb0ef41Sopenharmony_ci // CharacterClassEscape 7351cb0ef41Sopenharmony_ci case 'd': 7361cb0ef41Sopenharmony_ci case 'D': 7371cb0ef41Sopenharmony_ci case 's': 7381cb0ef41Sopenharmony_ci case 'S': 7391cb0ef41Sopenharmony_ci case 'w': 7401cb0ef41Sopenharmony_ci case 'W': 7411cb0ef41Sopenharmony_ci case 'p': 7421cb0ef41Sopenharmony_ci case 'P': { 7431cb0ef41Sopenharmony_ci base::uc32 next = Next(); 7441cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* ranges = 7451cb0ef41Sopenharmony_ci zone()->template New<ZoneList<CharacterRange>>(2, zone()); 7461cb0ef41Sopenharmony_ci bool add_unicode_case_equivalents = 7471cb0ef41Sopenharmony_ci unicode() && builder->ignore_case(); 7481cb0ef41Sopenharmony_ci bool parsed_character_class_escape = TryParseCharacterClassEscape( 7491cb0ef41Sopenharmony_ci next, InClassEscapeState::kNotInClass, ranges, zone(), 7501cb0ef41Sopenharmony_ci add_unicode_case_equivalents CHECK_FAILED); 7511cb0ef41Sopenharmony_ci 7521cb0ef41Sopenharmony_ci if (parsed_character_class_escape) { 7531cb0ef41Sopenharmony_ci RegExpCharacterClass* cc = 7541cb0ef41Sopenharmony_ci zone()->template New<RegExpCharacterClass>(zone(), ranges); 7551cb0ef41Sopenharmony_ci builder->AddCharacterClass(cc); 7561cb0ef41Sopenharmony_ci } else { 7571cb0ef41Sopenharmony_ci CHECK(!unicode()); 7581cb0ef41Sopenharmony_ci Advance(2); 7591cb0ef41Sopenharmony_ci builder->AddCharacter(next); // IdentityEscape. 7601cb0ef41Sopenharmony_ci } 7611cb0ef41Sopenharmony_ci break; 7621cb0ef41Sopenharmony_ci } 7631cb0ef41Sopenharmony_ci // AtomEscape :: 7641cb0ef41Sopenharmony_ci // k GroupName 7651cb0ef41Sopenharmony_ci case 'k': { 7661cb0ef41Sopenharmony_ci // Either an identity escape or a named back-reference. The two 7671cb0ef41Sopenharmony_ci // interpretations are mutually exclusive: '\k' is interpreted as 7681cb0ef41Sopenharmony_ci // an identity escape for non-Unicode patterns without named 7691cb0ef41Sopenharmony_ci // capture groups, and as the beginning of a named back-reference 7701cb0ef41Sopenharmony_ci // in all other cases. 7711cb0ef41Sopenharmony_ci const bool has_named_captures = 7721cb0ef41Sopenharmony_ci HasNamedCaptures(InClassEscapeState::kNotInClass CHECK_FAILED); 7731cb0ef41Sopenharmony_ci if (unicode() || has_named_captures) { 7741cb0ef41Sopenharmony_ci Advance(2); 7751cb0ef41Sopenharmony_ci ParseNamedBackReference(builder, state CHECK_FAILED); 7761cb0ef41Sopenharmony_ci break; 7771cb0ef41Sopenharmony_ci } 7781cb0ef41Sopenharmony_ci } 7791cb0ef41Sopenharmony_ci V8_FALLTHROUGH; 7801cb0ef41Sopenharmony_ci // AtomEscape :: 7811cb0ef41Sopenharmony_ci // CharacterEscape 7821cb0ef41Sopenharmony_ci default: { 7831cb0ef41Sopenharmony_ci bool is_escaped_unicode_character = false; 7841cb0ef41Sopenharmony_ci base::uc32 c = ParseCharacterEscape( 7851cb0ef41Sopenharmony_ci InClassEscapeState::kNotInClass, 7861cb0ef41Sopenharmony_ci &is_escaped_unicode_character CHECK_FAILED); 7871cb0ef41Sopenharmony_ci if (is_escaped_unicode_character) { 7881cb0ef41Sopenharmony_ci builder->AddEscapedUnicodeCharacter(c); 7891cb0ef41Sopenharmony_ci } else { 7901cb0ef41Sopenharmony_ci builder->AddCharacter(c); 7911cb0ef41Sopenharmony_ci } 7921cb0ef41Sopenharmony_ci break; 7931cb0ef41Sopenharmony_ci } 7941cb0ef41Sopenharmony_ci } 7951cb0ef41Sopenharmony_ci break; 7961cb0ef41Sopenharmony_ci case '{': { 7971cb0ef41Sopenharmony_ci int dummy; 7981cb0ef41Sopenharmony_ci bool parsed = ParseIntervalQuantifier(&dummy, &dummy CHECK_FAILED); 7991cb0ef41Sopenharmony_ci if (parsed) return ReportError(RegExpError::kNothingToRepeat); 8001cb0ef41Sopenharmony_ci V8_FALLTHROUGH; 8011cb0ef41Sopenharmony_ci } 8021cb0ef41Sopenharmony_ci case '}': 8031cb0ef41Sopenharmony_ci case ']': 8041cb0ef41Sopenharmony_ci if (unicode()) { 8051cb0ef41Sopenharmony_ci return ReportError(RegExpError::kLoneQuantifierBrackets); 8061cb0ef41Sopenharmony_ci } 8071cb0ef41Sopenharmony_ci V8_FALLTHROUGH; 8081cb0ef41Sopenharmony_ci default: 8091cb0ef41Sopenharmony_ci builder->AddUnicodeCharacter(current()); 8101cb0ef41Sopenharmony_ci Advance(); 8111cb0ef41Sopenharmony_ci break; 8121cb0ef41Sopenharmony_ci } // end switch(current()) 8131cb0ef41Sopenharmony_ci 8141cb0ef41Sopenharmony_ci int min; 8151cb0ef41Sopenharmony_ci int max; 8161cb0ef41Sopenharmony_ci switch (current()) { 8171cb0ef41Sopenharmony_ci // QuantifierPrefix :: 8181cb0ef41Sopenharmony_ci // * 8191cb0ef41Sopenharmony_ci // + 8201cb0ef41Sopenharmony_ci // ? 8211cb0ef41Sopenharmony_ci // { 8221cb0ef41Sopenharmony_ci case '*': 8231cb0ef41Sopenharmony_ci min = 0; 8241cb0ef41Sopenharmony_ci max = RegExpTree::kInfinity; 8251cb0ef41Sopenharmony_ci Advance(); 8261cb0ef41Sopenharmony_ci break; 8271cb0ef41Sopenharmony_ci case '+': 8281cb0ef41Sopenharmony_ci min = 1; 8291cb0ef41Sopenharmony_ci max = RegExpTree::kInfinity; 8301cb0ef41Sopenharmony_ci Advance(); 8311cb0ef41Sopenharmony_ci break; 8321cb0ef41Sopenharmony_ci case '?': 8331cb0ef41Sopenharmony_ci min = 0; 8341cb0ef41Sopenharmony_ci max = 1; 8351cb0ef41Sopenharmony_ci Advance(); 8361cb0ef41Sopenharmony_ci break; 8371cb0ef41Sopenharmony_ci case '{': 8381cb0ef41Sopenharmony_ci if (ParseIntervalQuantifier(&min, &max)) { 8391cb0ef41Sopenharmony_ci if (max < min) { 8401cb0ef41Sopenharmony_ci return ReportError(RegExpError::kRangeOutOfOrder); 8411cb0ef41Sopenharmony_ci } 8421cb0ef41Sopenharmony_ci break; 8431cb0ef41Sopenharmony_ci } else if (unicode()) { 8441cb0ef41Sopenharmony_ci // With /u, incomplete quantifiers are not allowed. 8451cb0ef41Sopenharmony_ci return ReportError(RegExpError::kIncompleteQuantifier); 8461cb0ef41Sopenharmony_ci } 8471cb0ef41Sopenharmony_ci continue; 8481cb0ef41Sopenharmony_ci default: 8491cb0ef41Sopenharmony_ci continue; 8501cb0ef41Sopenharmony_ci } 8511cb0ef41Sopenharmony_ci RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY; 8521cb0ef41Sopenharmony_ci if (current() == '?') { 8531cb0ef41Sopenharmony_ci quantifier_type = RegExpQuantifier::NON_GREEDY; 8541cb0ef41Sopenharmony_ci Advance(); 8551cb0ef41Sopenharmony_ci } else if (FLAG_regexp_possessive_quantifier && current() == '+') { 8561cb0ef41Sopenharmony_ci // FLAG_regexp_possessive_quantifier is a debug-only flag. 8571cb0ef41Sopenharmony_ci quantifier_type = RegExpQuantifier::POSSESSIVE; 8581cb0ef41Sopenharmony_ci Advance(); 8591cb0ef41Sopenharmony_ci } 8601cb0ef41Sopenharmony_ci if (!builder->AddQuantifierToAtom(min, max, quantifier_type)) { 8611cb0ef41Sopenharmony_ci return ReportError(RegExpError::kInvalidQuantifier); 8621cb0ef41Sopenharmony_ci } 8631cb0ef41Sopenharmony_ci } 8641cb0ef41Sopenharmony_ci} 8651cb0ef41Sopenharmony_ci 8661cb0ef41Sopenharmony_citemplate <class CharT> 8671cb0ef41Sopenharmony_ciRegExpParserState* RegExpParserImpl<CharT>::ParseOpenParenthesis( 8681cb0ef41Sopenharmony_ci RegExpParserState* state) { 8691cb0ef41Sopenharmony_ci RegExpLookaround::Type lookaround_type = state->lookaround_type(); 8701cb0ef41Sopenharmony_ci bool is_named_capture = false; 8711cb0ef41Sopenharmony_ci const ZoneVector<base::uc16>* capture_name = nullptr; 8721cb0ef41Sopenharmony_ci SubexpressionType subexpr_type = CAPTURE; 8731cb0ef41Sopenharmony_ci Advance(); 8741cb0ef41Sopenharmony_ci if (current() == '?') { 8751cb0ef41Sopenharmony_ci switch (Next()) { 8761cb0ef41Sopenharmony_ci case ':': 8771cb0ef41Sopenharmony_ci Advance(2); 8781cb0ef41Sopenharmony_ci subexpr_type = GROUPING; 8791cb0ef41Sopenharmony_ci break; 8801cb0ef41Sopenharmony_ci case '=': 8811cb0ef41Sopenharmony_ci Advance(2); 8821cb0ef41Sopenharmony_ci lookaround_type = RegExpLookaround::LOOKAHEAD; 8831cb0ef41Sopenharmony_ci subexpr_type = POSITIVE_LOOKAROUND; 8841cb0ef41Sopenharmony_ci break; 8851cb0ef41Sopenharmony_ci case '!': 8861cb0ef41Sopenharmony_ci Advance(2); 8871cb0ef41Sopenharmony_ci lookaround_type = RegExpLookaround::LOOKAHEAD; 8881cb0ef41Sopenharmony_ci subexpr_type = NEGATIVE_LOOKAROUND; 8891cb0ef41Sopenharmony_ci break; 8901cb0ef41Sopenharmony_ci case '<': 8911cb0ef41Sopenharmony_ci Advance(); 8921cb0ef41Sopenharmony_ci if (Next() == '=') { 8931cb0ef41Sopenharmony_ci Advance(2); 8941cb0ef41Sopenharmony_ci lookaround_type = RegExpLookaround::LOOKBEHIND; 8951cb0ef41Sopenharmony_ci subexpr_type = POSITIVE_LOOKAROUND; 8961cb0ef41Sopenharmony_ci break; 8971cb0ef41Sopenharmony_ci } else if (Next() == '!') { 8981cb0ef41Sopenharmony_ci Advance(2); 8991cb0ef41Sopenharmony_ci lookaround_type = RegExpLookaround::LOOKBEHIND; 9001cb0ef41Sopenharmony_ci subexpr_type = NEGATIVE_LOOKAROUND; 9011cb0ef41Sopenharmony_ci break; 9021cb0ef41Sopenharmony_ci } 9031cb0ef41Sopenharmony_ci is_named_capture = true; 9041cb0ef41Sopenharmony_ci has_named_captures_ = true; 9051cb0ef41Sopenharmony_ci Advance(); 9061cb0ef41Sopenharmony_ci break; 9071cb0ef41Sopenharmony_ci default: 9081cb0ef41Sopenharmony_ci ReportError(RegExpError::kInvalidGroup); 9091cb0ef41Sopenharmony_ci return nullptr; 9101cb0ef41Sopenharmony_ci } 9111cb0ef41Sopenharmony_ci } 9121cb0ef41Sopenharmony_ci if (subexpr_type == CAPTURE) { 9131cb0ef41Sopenharmony_ci if (captures_started_ >= RegExpMacroAssembler::kMaxRegisterCount) { 9141cb0ef41Sopenharmony_ci ReportError(RegExpError::kTooManyCaptures); 9151cb0ef41Sopenharmony_ci return nullptr; 9161cb0ef41Sopenharmony_ci } 9171cb0ef41Sopenharmony_ci captures_started_++; 9181cb0ef41Sopenharmony_ci 9191cb0ef41Sopenharmony_ci if (is_named_capture) { 9201cb0ef41Sopenharmony_ci capture_name = ParseCaptureGroupName(CHECK_FAILED); 9211cb0ef41Sopenharmony_ci } 9221cb0ef41Sopenharmony_ci } 9231cb0ef41Sopenharmony_ci // Store current state and begin new disjunction parsing. 9241cb0ef41Sopenharmony_ci return zone()->template New<RegExpParserState>( 9251cb0ef41Sopenharmony_ci state, subexpr_type, lookaround_type, captures_started_, capture_name, 9261cb0ef41Sopenharmony_ci state->builder()->flags(), zone()); 9271cb0ef41Sopenharmony_ci} 9281cb0ef41Sopenharmony_ci 9291cb0ef41Sopenharmony_ci#ifdef DEBUG 9301cb0ef41Sopenharmony_cinamespace { 9311cb0ef41Sopenharmony_ci 9321cb0ef41Sopenharmony_cibool IsSpecialClassEscape(base::uc32 c) { 9331cb0ef41Sopenharmony_ci switch (c) { 9341cb0ef41Sopenharmony_ci case 'd': 9351cb0ef41Sopenharmony_ci case 'D': 9361cb0ef41Sopenharmony_ci case 's': 9371cb0ef41Sopenharmony_ci case 'S': 9381cb0ef41Sopenharmony_ci case 'w': 9391cb0ef41Sopenharmony_ci case 'W': 9401cb0ef41Sopenharmony_ci return true; 9411cb0ef41Sopenharmony_ci default: 9421cb0ef41Sopenharmony_ci return false; 9431cb0ef41Sopenharmony_ci } 9441cb0ef41Sopenharmony_ci} 9451cb0ef41Sopenharmony_ci 9461cb0ef41Sopenharmony_ci} // namespace 9471cb0ef41Sopenharmony_ci#endif 9481cb0ef41Sopenharmony_ci 9491cb0ef41Sopenharmony_ci// In order to know whether an escape is a backreference or not we have to scan 9501cb0ef41Sopenharmony_ci// the entire regexp and find the number of capturing parentheses. However we 9511cb0ef41Sopenharmony_ci// don't want to scan the regexp twice unless it is necessary. This mini-parser 9521cb0ef41Sopenharmony_ci// is called when needed. It can see the difference between capturing and 9531cb0ef41Sopenharmony_ci// noncapturing parentheses and can skip character classes and backslash-escaped 9541cb0ef41Sopenharmony_ci// characters. 9551cb0ef41Sopenharmony_ci// 9561cb0ef41Sopenharmony_ci// Important: The scanner has to be in a consistent state when calling 9571cb0ef41Sopenharmony_ci// ScanForCaptures, e.g. not in the middle of an escape sequence '\['. 9581cb0ef41Sopenharmony_citemplate <class CharT> 9591cb0ef41Sopenharmony_civoid RegExpParserImpl<CharT>::ScanForCaptures( 9601cb0ef41Sopenharmony_ci InClassEscapeState in_class_escape_state) { 9611cb0ef41Sopenharmony_ci DCHECK(!is_scanned_for_captures_); 9621cb0ef41Sopenharmony_ci const int saved_position = position(); 9631cb0ef41Sopenharmony_ci // Start with captures started previous to current position 9641cb0ef41Sopenharmony_ci int capture_count = captures_started(); 9651cb0ef41Sopenharmony_ci // When we start inside a character class, skip everything inside the class. 9661cb0ef41Sopenharmony_ci if (in_class_escape_state == InClassEscapeState::kInClass) { 9671cb0ef41Sopenharmony_ci int c; 9681cb0ef41Sopenharmony_ci while ((c = current()) != kEndMarker) { 9691cb0ef41Sopenharmony_ci Advance(); 9701cb0ef41Sopenharmony_ci if (c == '\\') { 9711cb0ef41Sopenharmony_ci Advance(); 9721cb0ef41Sopenharmony_ci } else { 9731cb0ef41Sopenharmony_ci if (c == ']') break; 9741cb0ef41Sopenharmony_ci } 9751cb0ef41Sopenharmony_ci } 9761cb0ef41Sopenharmony_ci } 9771cb0ef41Sopenharmony_ci // Add count of captures after this position. 9781cb0ef41Sopenharmony_ci int n; 9791cb0ef41Sopenharmony_ci while ((n = current()) != kEndMarker) { 9801cb0ef41Sopenharmony_ci Advance(); 9811cb0ef41Sopenharmony_ci switch (n) { 9821cb0ef41Sopenharmony_ci case '\\': 9831cb0ef41Sopenharmony_ci Advance(); 9841cb0ef41Sopenharmony_ci break; 9851cb0ef41Sopenharmony_ci case '[': { 9861cb0ef41Sopenharmony_ci int c; 9871cb0ef41Sopenharmony_ci while ((c = current()) != kEndMarker) { 9881cb0ef41Sopenharmony_ci Advance(); 9891cb0ef41Sopenharmony_ci if (c == '\\') { 9901cb0ef41Sopenharmony_ci Advance(); 9911cb0ef41Sopenharmony_ci } else { 9921cb0ef41Sopenharmony_ci if (c == ']') break; 9931cb0ef41Sopenharmony_ci } 9941cb0ef41Sopenharmony_ci } 9951cb0ef41Sopenharmony_ci break; 9961cb0ef41Sopenharmony_ci } 9971cb0ef41Sopenharmony_ci case '(': 9981cb0ef41Sopenharmony_ci if (current() == '?') { 9991cb0ef41Sopenharmony_ci // At this point we could be in 10001cb0ef41Sopenharmony_ci // * a non-capturing group '(:', 10011cb0ef41Sopenharmony_ci // * a lookbehind assertion '(?<=' '(?<!' 10021cb0ef41Sopenharmony_ci // * or a named capture '(?<'. 10031cb0ef41Sopenharmony_ci // 10041cb0ef41Sopenharmony_ci // Of these, only named captures are capturing groups. 10051cb0ef41Sopenharmony_ci 10061cb0ef41Sopenharmony_ci Advance(); 10071cb0ef41Sopenharmony_ci if (current() != '<') break; 10081cb0ef41Sopenharmony_ci 10091cb0ef41Sopenharmony_ci Advance(); 10101cb0ef41Sopenharmony_ci if (current() == '=' || current() == '!') break; 10111cb0ef41Sopenharmony_ci 10121cb0ef41Sopenharmony_ci // Found a possible named capture. It could turn out to be a syntax 10131cb0ef41Sopenharmony_ci // error (e.g. an unterminated or invalid name), but that distinction 10141cb0ef41Sopenharmony_ci // does not matter for our purposes. 10151cb0ef41Sopenharmony_ci has_named_captures_ = true; 10161cb0ef41Sopenharmony_ci } 10171cb0ef41Sopenharmony_ci capture_count++; 10181cb0ef41Sopenharmony_ci break; 10191cb0ef41Sopenharmony_ci } 10201cb0ef41Sopenharmony_ci } 10211cb0ef41Sopenharmony_ci capture_count_ = capture_count; 10221cb0ef41Sopenharmony_ci is_scanned_for_captures_ = true; 10231cb0ef41Sopenharmony_ci Reset(saved_position); 10241cb0ef41Sopenharmony_ci} 10251cb0ef41Sopenharmony_ci 10261cb0ef41Sopenharmony_citemplate <class CharT> 10271cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::ParseBackReferenceIndex(int* index_out) { 10281cb0ef41Sopenharmony_ci DCHECK_EQ('\\', current()); 10291cb0ef41Sopenharmony_ci DCHECK('1' <= Next() && Next() <= '9'); 10301cb0ef41Sopenharmony_ci // Try to parse a decimal literal that is no greater than the total number 10311cb0ef41Sopenharmony_ci // of left capturing parentheses in the input. 10321cb0ef41Sopenharmony_ci int start = position(); 10331cb0ef41Sopenharmony_ci int value = Next() - '0'; 10341cb0ef41Sopenharmony_ci Advance(2); 10351cb0ef41Sopenharmony_ci while (true) { 10361cb0ef41Sopenharmony_ci base::uc32 c = current(); 10371cb0ef41Sopenharmony_ci if (IsDecimalDigit(c)) { 10381cb0ef41Sopenharmony_ci value = 10 * value + (c - '0'); 10391cb0ef41Sopenharmony_ci if (value > RegExpMacroAssembler::kMaxRegisterCount) { 10401cb0ef41Sopenharmony_ci Reset(start); 10411cb0ef41Sopenharmony_ci return false; 10421cb0ef41Sopenharmony_ci } 10431cb0ef41Sopenharmony_ci Advance(); 10441cb0ef41Sopenharmony_ci } else { 10451cb0ef41Sopenharmony_ci break; 10461cb0ef41Sopenharmony_ci } 10471cb0ef41Sopenharmony_ci } 10481cb0ef41Sopenharmony_ci if (value > captures_started()) { 10491cb0ef41Sopenharmony_ci if (!is_scanned_for_captures_) 10501cb0ef41Sopenharmony_ci ScanForCaptures(InClassEscapeState::kNotInClass); 10511cb0ef41Sopenharmony_ci if (value > capture_count_) { 10521cb0ef41Sopenharmony_ci Reset(start); 10531cb0ef41Sopenharmony_ci return false; 10541cb0ef41Sopenharmony_ci } 10551cb0ef41Sopenharmony_ci } 10561cb0ef41Sopenharmony_ci *index_out = value; 10571cb0ef41Sopenharmony_ci return true; 10581cb0ef41Sopenharmony_ci} 10591cb0ef41Sopenharmony_ci 10601cb0ef41Sopenharmony_cinamespace { 10611cb0ef41Sopenharmony_ci 10621cb0ef41Sopenharmony_civoid push_code_unit(ZoneVector<base::uc16>* v, uint32_t code_unit) { 10631cb0ef41Sopenharmony_ci if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) { 10641cb0ef41Sopenharmony_ci v->push_back(code_unit); 10651cb0ef41Sopenharmony_ci } else { 10661cb0ef41Sopenharmony_ci v->push_back(unibrow::Utf16::LeadSurrogate(code_unit)); 10671cb0ef41Sopenharmony_ci v->push_back(unibrow::Utf16::TrailSurrogate(code_unit)); 10681cb0ef41Sopenharmony_ci } 10691cb0ef41Sopenharmony_ci} 10701cb0ef41Sopenharmony_ci 10711cb0ef41Sopenharmony_ci} // namespace 10721cb0ef41Sopenharmony_ci 10731cb0ef41Sopenharmony_citemplate <class CharT> 10741cb0ef41Sopenharmony_ciconst ZoneVector<base::uc16>* RegExpParserImpl<CharT>::ParseCaptureGroupName() { 10751cb0ef41Sopenharmony_ci // Due to special Advance requirements (see the next comment), rewind by one 10761cb0ef41Sopenharmony_ci // such that names starting with a surrogate pair are parsed correctly for 10771cb0ef41Sopenharmony_ci // patterns where the unicode flag is unset. 10781cb0ef41Sopenharmony_ci // 10791cb0ef41Sopenharmony_ci // Note that we use this odd pattern of rewinding the last advance in order 10801cb0ef41Sopenharmony_ci // to adhere to the common parser behavior of expecting `current` to point at 10811cb0ef41Sopenharmony_ci // the first candidate character for a function (e.g. when entering ParseFoo, 10821cb0ef41Sopenharmony_ci // `current` should point at the first character of Foo). 10831cb0ef41Sopenharmony_ci RewindByOneCodepoint(); 10841cb0ef41Sopenharmony_ci 10851cb0ef41Sopenharmony_ci ZoneVector<base::uc16>* name = 10861cb0ef41Sopenharmony_ci zone()->template New<ZoneVector<base::uc16>>(zone()); 10871cb0ef41Sopenharmony_ci 10881cb0ef41Sopenharmony_ci { 10891cb0ef41Sopenharmony_ci // Advance behavior inside this function is tricky since 10901cb0ef41Sopenharmony_ci // RegExpIdentifierName explicitly enables unicode (in spec terms, sets +U) 10911cb0ef41Sopenharmony_ci // and thus allows surrogate pairs and \u{}-style escapes even in 10921cb0ef41Sopenharmony_ci // non-unicode patterns. Therefore Advance within the capture group name 10931cb0ef41Sopenharmony_ci // has to force-enable unicode, and outside the name revert to default 10941cb0ef41Sopenharmony_ci // behavior. 10951cb0ef41Sopenharmony_ci ForceUnicodeScope force_unicode(this); 10961cb0ef41Sopenharmony_ci 10971cb0ef41Sopenharmony_ci bool at_start = true; 10981cb0ef41Sopenharmony_ci while (true) { 10991cb0ef41Sopenharmony_ci Advance(); 11001cb0ef41Sopenharmony_ci base::uc32 c = current(); 11011cb0ef41Sopenharmony_ci 11021cb0ef41Sopenharmony_ci // Convert unicode escapes. 11031cb0ef41Sopenharmony_ci if (c == '\\' && Next() == 'u') { 11041cb0ef41Sopenharmony_ci Advance(2); 11051cb0ef41Sopenharmony_ci if (!ParseUnicodeEscape(&c)) { 11061cb0ef41Sopenharmony_ci ReportError(RegExpError::kInvalidUnicodeEscape); 11071cb0ef41Sopenharmony_ci return nullptr; 11081cb0ef41Sopenharmony_ci } 11091cb0ef41Sopenharmony_ci RewindByOneCodepoint(); 11101cb0ef41Sopenharmony_ci } 11111cb0ef41Sopenharmony_ci 11121cb0ef41Sopenharmony_ci // The backslash char is misclassified as both ID_Start and ID_Continue. 11131cb0ef41Sopenharmony_ci if (c == '\\') { 11141cb0ef41Sopenharmony_ci ReportError(RegExpError::kInvalidCaptureGroupName); 11151cb0ef41Sopenharmony_ci return nullptr; 11161cb0ef41Sopenharmony_ci } 11171cb0ef41Sopenharmony_ci 11181cb0ef41Sopenharmony_ci if (at_start) { 11191cb0ef41Sopenharmony_ci if (!IsIdentifierStart(c)) { 11201cb0ef41Sopenharmony_ci ReportError(RegExpError::kInvalidCaptureGroupName); 11211cb0ef41Sopenharmony_ci return nullptr; 11221cb0ef41Sopenharmony_ci } 11231cb0ef41Sopenharmony_ci push_code_unit(name, c); 11241cb0ef41Sopenharmony_ci at_start = false; 11251cb0ef41Sopenharmony_ci } else { 11261cb0ef41Sopenharmony_ci if (c == '>') { 11271cb0ef41Sopenharmony_ci break; 11281cb0ef41Sopenharmony_ci } else if (IsIdentifierPart(c)) { 11291cb0ef41Sopenharmony_ci push_code_unit(name, c); 11301cb0ef41Sopenharmony_ci } else { 11311cb0ef41Sopenharmony_ci ReportError(RegExpError::kInvalidCaptureGroupName); 11321cb0ef41Sopenharmony_ci return nullptr; 11331cb0ef41Sopenharmony_ci } 11341cb0ef41Sopenharmony_ci } 11351cb0ef41Sopenharmony_ci } 11361cb0ef41Sopenharmony_ci } 11371cb0ef41Sopenharmony_ci 11381cb0ef41Sopenharmony_ci // This final advance goes back into the state of pointing at the next 11391cb0ef41Sopenharmony_ci // relevant char, which the rest of the parser expects. See also the previous 11401cb0ef41Sopenharmony_ci // comments in this function. 11411cb0ef41Sopenharmony_ci Advance(); 11421cb0ef41Sopenharmony_ci return name; 11431cb0ef41Sopenharmony_ci} 11441cb0ef41Sopenharmony_ci 11451cb0ef41Sopenharmony_citemplate <class CharT> 11461cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::CreateNamedCaptureAtIndex( 11471cb0ef41Sopenharmony_ci const ZoneVector<base::uc16>* name, int index) { 11481cb0ef41Sopenharmony_ci DCHECK(0 < index && index <= captures_started_); 11491cb0ef41Sopenharmony_ci DCHECK_NOT_NULL(name); 11501cb0ef41Sopenharmony_ci 11511cb0ef41Sopenharmony_ci RegExpCapture* capture = GetCapture(index); 11521cb0ef41Sopenharmony_ci DCHECK_NULL(capture->name()); 11531cb0ef41Sopenharmony_ci 11541cb0ef41Sopenharmony_ci capture->set_name(name); 11551cb0ef41Sopenharmony_ci 11561cb0ef41Sopenharmony_ci if (named_captures_ == nullptr) { 11571cb0ef41Sopenharmony_ci named_captures_ = 11581cb0ef41Sopenharmony_ci zone_->template New<ZoneSet<RegExpCapture*, RegExpCaptureNameLess>>( 11591cb0ef41Sopenharmony_ci zone()); 11601cb0ef41Sopenharmony_ci } else { 11611cb0ef41Sopenharmony_ci // Check for duplicates and bail if we find any. 11621cb0ef41Sopenharmony_ci 11631cb0ef41Sopenharmony_ci const auto& named_capture_it = named_captures_->find(capture); 11641cb0ef41Sopenharmony_ci if (named_capture_it != named_captures_->end()) { 11651cb0ef41Sopenharmony_ci ReportError(RegExpError::kDuplicateCaptureGroupName); 11661cb0ef41Sopenharmony_ci return false; 11671cb0ef41Sopenharmony_ci } 11681cb0ef41Sopenharmony_ci } 11691cb0ef41Sopenharmony_ci 11701cb0ef41Sopenharmony_ci named_captures_->emplace(capture); 11711cb0ef41Sopenharmony_ci 11721cb0ef41Sopenharmony_ci return true; 11731cb0ef41Sopenharmony_ci} 11741cb0ef41Sopenharmony_ci 11751cb0ef41Sopenharmony_citemplate <class CharT> 11761cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::ParseNamedBackReference( 11771cb0ef41Sopenharmony_ci RegExpBuilder* builder, RegExpParserState* state) { 11781cb0ef41Sopenharmony_ci // The parser is assumed to be on the '<' in \k<name>. 11791cb0ef41Sopenharmony_ci if (current() != '<') { 11801cb0ef41Sopenharmony_ci ReportError(RegExpError::kInvalidNamedReference); 11811cb0ef41Sopenharmony_ci return false; 11821cb0ef41Sopenharmony_ci } 11831cb0ef41Sopenharmony_ci 11841cb0ef41Sopenharmony_ci Advance(); 11851cb0ef41Sopenharmony_ci const ZoneVector<base::uc16>* name = ParseCaptureGroupName(); 11861cb0ef41Sopenharmony_ci if (name == nullptr) { 11871cb0ef41Sopenharmony_ci return false; 11881cb0ef41Sopenharmony_ci } 11891cb0ef41Sopenharmony_ci 11901cb0ef41Sopenharmony_ci if (state->IsInsideCaptureGroup(name)) { 11911cb0ef41Sopenharmony_ci builder->AddEmpty(); 11921cb0ef41Sopenharmony_ci } else { 11931cb0ef41Sopenharmony_ci RegExpBackReference* atom = 11941cb0ef41Sopenharmony_ci zone()->template New<RegExpBackReference>(builder->flags()); 11951cb0ef41Sopenharmony_ci atom->set_name(name); 11961cb0ef41Sopenharmony_ci 11971cb0ef41Sopenharmony_ci builder->AddAtom(atom); 11981cb0ef41Sopenharmony_ci 11991cb0ef41Sopenharmony_ci if (named_back_references_ == nullptr) { 12001cb0ef41Sopenharmony_ci named_back_references_ = 12011cb0ef41Sopenharmony_ci zone()->template New<ZoneList<RegExpBackReference*>>(1, zone()); 12021cb0ef41Sopenharmony_ci } 12031cb0ef41Sopenharmony_ci named_back_references_->Add(atom, zone()); 12041cb0ef41Sopenharmony_ci } 12051cb0ef41Sopenharmony_ci 12061cb0ef41Sopenharmony_ci return true; 12071cb0ef41Sopenharmony_ci} 12081cb0ef41Sopenharmony_ci 12091cb0ef41Sopenharmony_citemplate <class CharT> 12101cb0ef41Sopenharmony_civoid RegExpParserImpl<CharT>::PatchNamedBackReferences() { 12111cb0ef41Sopenharmony_ci if (named_back_references_ == nullptr) return; 12121cb0ef41Sopenharmony_ci 12131cb0ef41Sopenharmony_ci if (named_captures_ == nullptr) { 12141cb0ef41Sopenharmony_ci ReportError(RegExpError::kInvalidNamedCaptureReference); 12151cb0ef41Sopenharmony_ci return; 12161cb0ef41Sopenharmony_ci } 12171cb0ef41Sopenharmony_ci 12181cb0ef41Sopenharmony_ci // Look up and patch the actual capture for each named back reference. 12191cb0ef41Sopenharmony_ci 12201cb0ef41Sopenharmony_ci for (int i = 0; i < named_back_references_->length(); i++) { 12211cb0ef41Sopenharmony_ci RegExpBackReference* ref = named_back_references_->at(i); 12221cb0ef41Sopenharmony_ci 12231cb0ef41Sopenharmony_ci // Capture used to search the named_captures_ by name, index of the 12241cb0ef41Sopenharmony_ci // capture is never used. 12251cb0ef41Sopenharmony_ci static const int kInvalidIndex = 0; 12261cb0ef41Sopenharmony_ci RegExpCapture* search_capture = 12271cb0ef41Sopenharmony_ci zone()->template New<RegExpCapture>(kInvalidIndex); 12281cb0ef41Sopenharmony_ci DCHECK_NULL(search_capture->name()); 12291cb0ef41Sopenharmony_ci search_capture->set_name(ref->name()); 12301cb0ef41Sopenharmony_ci 12311cb0ef41Sopenharmony_ci int index = -1; 12321cb0ef41Sopenharmony_ci const auto& capture_it = named_captures_->find(search_capture); 12331cb0ef41Sopenharmony_ci if (capture_it != named_captures_->end()) { 12341cb0ef41Sopenharmony_ci index = (*capture_it)->index(); 12351cb0ef41Sopenharmony_ci } else { 12361cb0ef41Sopenharmony_ci ReportError(RegExpError::kInvalidNamedCaptureReference); 12371cb0ef41Sopenharmony_ci return; 12381cb0ef41Sopenharmony_ci } 12391cb0ef41Sopenharmony_ci 12401cb0ef41Sopenharmony_ci ref->set_capture(GetCapture(index)); 12411cb0ef41Sopenharmony_ci } 12421cb0ef41Sopenharmony_ci} 12431cb0ef41Sopenharmony_ci 12441cb0ef41Sopenharmony_citemplate <class CharT> 12451cb0ef41Sopenharmony_ciRegExpCapture* RegExpParserImpl<CharT>::GetCapture(int index) { 12461cb0ef41Sopenharmony_ci // The index for the capture groups are one-based. Its index in the list is 12471cb0ef41Sopenharmony_ci // zero-based. 12481cb0ef41Sopenharmony_ci const int known_captures = 12491cb0ef41Sopenharmony_ci is_scanned_for_captures_ ? capture_count_ : captures_started_; 12501cb0ef41Sopenharmony_ci DCHECK(index <= known_captures); 12511cb0ef41Sopenharmony_ci if (captures_ == nullptr) { 12521cb0ef41Sopenharmony_ci captures_ = 12531cb0ef41Sopenharmony_ci zone()->template New<ZoneList<RegExpCapture*>>(known_captures, zone()); 12541cb0ef41Sopenharmony_ci } 12551cb0ef41Sopenharmony_ci while (captures_->length() < known_captures) { 12561cb0ef41Sopenharmony_ci captures_->Add(zone()->template New<RegExpCapture>(captures_->length() + 1), 12571cb0ef41Sopenharmony_ci zone()); 12581cb0ef41Sopenharmony_ci } 12591cb0ef41Sopenharmony_ci return captures_->at(index - 1); 12601cb0ef41Sopenharmony_ci} 12611cb0ef41Sopenharmony_ci 12621cb0ef41Sopenharmony_citemplate <class CharT> 12631cb0ef41Sopenharmony_ciZoneVector<RegExpCapture*>* RegExpParserImpl<CharT>::GetNamedCaptures() const { 12641cb0ef41Sopenharmony_ci if (named_captures_ == nullptr || named_captures_->empty()) { 12651cb0ef41Sopenharmony_ci return nullptr; 12661cb0ef41Sopenharmony_ci } 12671cb0ef41Sopenharmony_ci 12681cb0ef41Sopenharmony_ci return zone()->template New<ZoneVector<RegExpCapture*>>( 12691cb0ef41Sopenharmony_ci named_captures_->begin(), named_captures_->end(), zone()); 12701cb0ef41Sopenharmony_ci} 12711cb0ef41Sopenharmony_ci 12721cb0ef41Sopenharmony_citemplate <class CharT> 12731cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::HasNamedCaptures( 12741cb0ef41Sopenharmony_ci InClassEscapeState in_class_escape_state) { 12751cb0ef41Sopenharmony_ci if (has_named_captures_ || is_scanned_for_captures_) { 12761cb0ef41Sopenharmony_ci return has_named_captures_; 12771cb0ef41Sopenharmony_ci } 12781cb0ef41Sopenharmony_ci 12791cb0ef41Sopenharmony_ci ScanForCaptures(in_class_escape_state); 12801cb0ef41Sopenharmony_ci DCHECK(is_scanned_for_captures_); 12811cb0ef41Sopenharmony_ci return has_named_captures_; 12821cb0ef41Sopenharmony_ci} 12831cb0ef41Sopenharmony_ci 12841cb0ef41Sopenharmony_ci// QuantifierPrefix :: 12851cb0ef41Sopenharmony_ci// { DecimalDigits } 12861cb0ef41Sopenharmony_ci// { DecimalDigits , } 12871cb0ef41Sopenharmony_ci// { DecimalDigits , DecimalDigits } 12881cb0ef41Sopenharmony_ci// 12891cb0ef41Sopenharmony_ci// Returns true if parsing succeeds, and set the min_out and max_out 12901cb0ef41Sopenharmony_ci// values. Values are truncated to RegExpTree::kInfinity if they overflow. 12911cb0ef41Sopenharmony_citemplate <class CharT> 12921cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::ParseIntervalQuantifier(int* min_out, 12931cb0ef41Sopenharmony_ci int* max_out) { 12941cb0ef41Sopenharmony_ci DCHECK_EQ(current(), '{'); 12951cb0ef41Sopenharmony_ci int start = position(); 12961cb0ef41Sopenharmony_ci Advance(); 12971cb0ef41Sopenharmony_ci int min = 0; 12981cb0ef41Sopenharmony_ci if (!IsDecimalDigit(current())) { 12991cb0ef41Sopenharmony_ci Reset(start); 13001cb0ef41Sopenharmony_ci return false; 13011cb0ef41Sopenharmony_ci } 13021cb0ef41Sopenharmony_ci while (IsDecimalDigit(current())) { 13031cb0ef41Sopenharmony_ci int next = current() - '0'; 13041cb0ef41Sopenharmony_ci if (min > (RegExpTree::kInfinity - next) / 10) { 13051cb0ef41Sopenharmony_ci // Overflow. Skip past remaining decimal digits and return -1. 13061cb0ef41Sopenharmony_ci do { 13071cb0ef41Sopenharmony_ci Advance(); 13081cb0ef41Sopenharmony_ci } while (IsDecimalDigit(current())); 13091cb0ef41Sopenharmony_ci min = RegExpTree::kInfinity; 13101cb0ef41Sopenharmony_ci break; 13111cb0ef41Sopenharmony_ci } 13121cb0ef41Sopenharmony_ci min = 10 * min + next; 13131cb0ef41Sopenharmony_ci Advance(); 13141cb0ef41Sopenharmony_ci } 13151cb0ef41Sopenharmony_ci int max = 0; 13161cb0ef41Sopenharmony_ci if (current() == '}') { 13171cb0ef41Sopenharmony_ci max = min; 13181cb0ef41Sopenharmony_ci Advance(); 13191cb0ef41Sopenharmony_ci } else if (current() == ',') { 13201cb0ef41Sopenharmony_ci Advance(); 13211cb0ef41Sopenharmony_ci if (current() == '}') { 13221cb0ef41Sopenharmony_ci max = RegExpTree::kInfinity; 13231cb0ef41Sopenharmony_ci Advance(); 13241cb0ef41Sopenharmony_ci } else { 13251cb0ef41Sopenharmony_ci while (IsDecimalDigit(current())) { 13261cb0ef41Sopenharmony_ci int next = current() - '0'; 13271cb0ef41Sopenharmony_ci if (max > (RegExpTree::kInfinity - next) / 10) { 13281cb0ef41Sopenharmony_ci do { 13291cb0ef41Sopenharmony_ci Advance(); 13301cb0ef41Sopenharmony_ci } while (IsDecimalDigit(current())); 13311cb0ef41Sopenharmony_ci max = RegExpTree::kInfinity; 13321cb0ef41Sopenharmony_ci break; 13331cb0ef41Sopenharmony_ci } 13341cb0ef41Sopenharmony_ci max = 10 * max + next; 13351cb0ef41Sopenharmony_ci Advance(); 13361cb0ef41Sopenharmony_ci } 13371cb0ef41Sopenharmony_ci if (current() != '}') { 13381cb0ef41Sopenharmony_ci Reset(start); 13391cb0ef41Sopenharmony_ci return false; 13401cb0ef41Sopenharmony_ci } 13411cb0ef41Sopenharmony_ci Advance(); 13421cb0ef41Sopenharmony_ci } 13431cb0ef41Sopenharmony_ci } else { 13441cb0ef41Sopenharmony_ci Reset(start); 13451cb0ef41Sopenharmony_ci return false; 13461cb0ef41Sopenharmony_ci } 13471cb0ef41Sopenharmony_ci *min_out = min; 13481cb0ef41Sopenharmony_ci *max_out = max; 13491cb0ef41Sopenharmony_ci return true; 13501cb0ef41Sopenharmony_ci} 13511cb0ef41Sopenharmony_ci 13521cb0ef41Sopenharmony_citemplate <class CharT> 13531cb0ef41Sopenharmony_cibase::uc32 RegExpParserImpl<CharT>::ParseOctalLiteral() { 13541cb0ef41Sopenharmony_ci DCHECK(('0' <= current() && current() <= '7') || current() == kEndMarker); 13551cb0ef41Sopenharmony_ci // For compatibility with some other browsers (not all), we parse 13561cb0ef41Sopenharmony_ci // up to three octal digits with a value below 256. 13571cb0ef41Sopenharmony_ci // ES#prod-annexB-LegacyOctalEscapeSequence 13581cb0ef41Sopenharmony_ci base::uc32 value = current() - '0'; 13591cb0ef41Sopenharmony_ci Advance(); 13601cb0ef41Sopenharmony_ci if ('0' <= current() && current() <= '7') { 13611cb0ef41Sopenharmony_ci value = value * 8 + current() - '0'; 13621cb0ef41Sopenharmony_ci Advance(); 13631cb0ef41Sopenharmony_ci if (value < 32 && '0' <= current() && current() <= '7') { 13641cb0ef41Sopenharmony_ci value = value * 8 + current() - '0'; 13651cb0ef41Sopenharmony_ci Advance(); 13661cb0ef41Sopenharmony_ci } 13671cb0ef41Sopenharmony_ci } 13681cb0ef41Sopenharmony_ci return value; 13691cb0ef41Sopenharmony_ci} 13701cb0ef41Sopenharmony_ci 13711cb0ef41Sopenharmony_citemplate <class CharT> 13721cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::ParseHexEscape(int length, base::uc32* value) { 13731cb0ef41Sopenharmony_ci int start = position(); 13741cb0ef41Sopenharmony_ci base::uc32 val = 0; 13751cb0ef41Sopenharmony_ci for (int i = 0; i < length; ++i) { 13761cb0ef41Sopenharmony_ci base::uc32 c = current(); 13771cb0ef41Sopenharmony_ci int d = base::HexValue(c); 13781cb0ef41Sopenharmony_ci if (d < 0) { 13791cb0ef41Sopenharmony_ci Reset(start); 13801cb0ef41Sopenharmony_ci return false; 13811cb0ef41Sopenharmony_ci } 13821cb0ef41Sopenharmony_ci val = val * 16 + d; 13831cb0ef41Sopenharmony_ci Advance(); 13841cb0ef41Sopenharmony_ci } 13851cb0ef41Sopenharmony_ci *value = val; 13861cb0ef41Sopenharmony_ci return true; 13871cb0ef41Sopenharmony_ci} 13881cb0ef41Sopenharmony_ci 13891cb0ef41Sopenharmony_ci// This parses RegExpUnicodeEscapeSequence as described in ECMA262. 13901cb0ef41Sopenharmony_citemplate <class CharT> 13911cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::ParseUnicodeEscape(base::uc32* value) { 13921cb0ef41Sopenharmony_ci // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are 13931cb0ef41Sopenharmony_ci // allowed). In the latter case, the number of hex digits between { } is 13941cb0ef41Sopenharmony_ci // arbitrary. \ and u have already been read. 13951cb0ef41Sopenharmony_ci if (current() == '{' && unicode()) { 13961cb0ef41Sopenharmony_ci int start = position(); 13971cb0ef41Sopenharmony_ci Advance(); 13981cb0ef41Sopenharmony_ci if (ParseUnlimitedLengthHexNumber(0x10FFFF, value)) { 13991cb0ef41Sopenharmony_ci if (current() == '}') { 14001cb0ef41Sopenharmony_ci Advance(); 14011cb0ef41Sopenharmony_ci return true; 14021cb0ef41Sopenharmony_ci } 14031cb0ef41Sopenharmony_ci } 14041cb0ef41Sopenharmony_ci Reset(start); 14051cb0ef41Sopenharmony_ci return false; 14061cb0ef41Sopenharmony_ci } 14071cb0ef41Sopenharmony_ci // \u but no {, or \u{...} escapes not allowed. 14081cb0ef41Sopenharmony_ci bool result = ParseHexEscape(4, value); 14091cb0ef41Sopenharmony_ci if (result && unicode() && unibrow::Utf16::IsLeadSurrogate(*value) && 14101cb0ef41Sopenharmony_ci current() == '\\') { 14111cb0ef41Sopenharmony_ci // Attempt to read trail surrogate. 14121cb0ef41Sopenharmony_ci int start = position(); 14131cb0ef41Sopenharmony_ci if (Next() == 'u') { 14141cb0ef41Sopenharmony_ci Advance(2); 14151cb0ef41Sopenharmony_ci base::uc32 trail; 14161cb0ef41Sopenharmony_ci if (ParseHexEscape(4, &trail) && 14171cb0ef41Sopenharmony_ci unibrow::Utf16::IsTrailSurrogate(trail)) { 14181cb0ef41Sopenharmony_ci *value = unibrow::Utf16::CombineSurrogatePair( 14191cb0ef41Sopenharmony_ci static_cast<base::uc16>(*value), static_cast<base::uc16>(trail)); 14201cb0ef41Sopenharmony_ci return true; 14211cb0ef41Sopenharmony_ci } 14221cb0ef41Sopenharmony_ci } 14231cb0ef41Sopenharmony_ci Reset(start); 14241cb0ef41Sopenharmony_ci } 14251cb0ef41Sopenharmony_ci return result; 14261cb0ef41Sopenharmony_ci} 14271cb0ef41Sopenharmony_ci 14281cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT 14291cb0ef41Sopenharmony_ci 14301cb0ef41Sopenharmony_cinamespace { 14311cb0ef41Sopenharmony_ci 14321cb0ef41Sopenharmony_cibool IsExactPropertyAlias(const char* property_name, UProperty property) { 14331cb0ef41Sopenharmony_ci const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME); 14341cb0ef41Sopenharmony_ci if (short_name != nullptr && strcmp(property_name, short_name) == 0) 14351cb0ef41Sopenharmony_ci return true; 14361cb0ef41Sopenharmony_ci for (int i = 0;; i++) { 14371cb0ef41Sopenharmony_ci const char* long_name = u_getPropertyName( 14381cb0ef41Sopenharmony_ci property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); 14391cb0ef41Sopenharmony_ci if (long_name == nullptr) break; 14401cb0ef41Sopenharmony_ci if (strcmp(property_name, long_name) == 0) return true; 14411cb0ef41Sopenharmony_ci } 14421cb0ef41Sopenharmony_ci return false; 14431cb0ef41Sopenharmony_ci} 14441cb0ef41Sopenharmony_ci 14451cb0ef41Sopenharmony_cibool IsExactPropertyValueAlias(const char* property_value_name, 14461cb0ef41Sopenharmony_ci UProperty property, int32_t property_value) { 14471cb0ef41Sopenharmony_ci const char* short_name = 14481cb0ef41Sopenharmony_ci u_getPropertyValueName(property, property_value, U_SHORT_PROPERTY_NAME); 14491cb0ef41Sopenharmony_ci if (short_name != nullptr && strcmp(property_value_name, short_name) == 0) { 14501cb0ef41Sopenharmony_ci return true; 14511cb0ef41Sopenharmony_ci } 14521cb0ef41Sopenharmony_ci for (int i = 0;; i++) { 14531cb0ef41Sopenharmony_ci const char* long_name = u_getPropertyValueName( 14541cb0ef41Sopenharmony_ci property, property_value, 14551cb0ef41Sopenharmony_ci static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); 14561cb0ef41Sopenharmony_ci if (long_name == nullptr) break; 14571cb0ef41Sopenharmony_ci if (strcmp(property_value_name, long_name) == 0) return true; 14581cb0ef41Sopenharmony_ci } 14591cb0ef41Sopenharmony_ci return false; 14601cb0ef41Sopenharmony_ci} 14611cb0ef41Sopenharmony_ci 14621cb0ef41Sopenharmony_cibool LookupPropertyValueName(UProperty property, 14631cb0ef41Sopenharmony_ci const char* property_value_name, bool negate, 14641cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* result, Zone* zone) { 14651cb0ef41Sopenharmony_ci UProperty property_for_lookup = property; 14661cb0ef41Sopenharmony_ci if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) { 14671cb0ef41Sopenharmony_ci // For the property Script_Extensions, we have to do the property value 14681cb0ef41Sopenharmony_ci // name lookup as if the property is Script. 14691cb0ef41Sopenharmony_ci property_for_lookup = UCHAR_SCRIPT; 14701cb0ef41Sopenharmony_ci } 14711cb0ef41Sopenharmony_ci int32_t property_value = 14721cb0ef41Sopenharmony_ci u_getPropertyValueEnum(property_for_lookup, property_value_name); 14731cb0ef41Sopenharmony_ci if (property_value == UCHAR_INVALID_CODE) return false; 14741cb0ef41Sopenharmony_ci 14751cb0ef41Sopenharmony_ci // We require the property name to match exactly to one of the property value 14761cb0ef41Sopenharmony_ci // aliases. However, u_getPropertyValueEnum uses loose matching. 14771cb0ef41Sopenharmony_ci if (!IsExactPropertyValueAlias(property_value_name, property_for_lookup, 14781cb0ef41Sopenharmony_ci property_value)) { 14791cb0ef41Sopenharmony_ci return false; 14801cb0ef41Sopenharmony_ci } 14811cb0ef41Sopenharmony_ci 14821cb0ef41Sopenharmony_ci UErrorCode ec = U_ZERO_ERROR; 14831cb0ef41Sopenharmony_ci icu::UnicodeSet set; 14841cb0ef41Sopenharmony_ci set.applyIntPropertyValue(property, property_value, ec); 14851cb0ef41Sopenharmony_ci bool success = ec == U_ZERO_ERROR && !set.isEmpty(); 14861cb0ef41Sopenharmony_ci 14871cb0ef41Sopenharmony_ci if (success) { 14881cb0ef41Sopenharmony_ci set.removeAllStrings(); 14891cb0ef41Sopenharmony_ci if (negate) set.complement(); 14901cb0ef41Sopenharmony_ci for (int i = 0; i < set.getRangeCount(); i++) { 14911cb0ef41Sopenharmony_ci result->Add( 14921cb0ef41Sopenharmony_ci CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)), 14931cb0ef41Sopenharmony_ci zone); 14941cb0ef41Sopenharmony_ci } 14951cb0ef41Sopenharmony_ci } 14961cb0ef41Sopenharmony_ci return success; 14971cb0ef41Sopenharmony_ci} 14981cb0ef41Sopenharmony_ci 14991cb0ef41Sopenharmony_citemplate <size_t N> 15001cb0ef41Sopenharmony_ciinline bool NameEquals(const char* name, const char (&literal)[N]) { 15011cb0ef41Sopenharmony_ci return strncmp(name, literal, N + 1) == 0; 15021cb0ef41Sopenharmony_ci} 15031cb0ef41Sopenharmony_ci 15041cb0ef41Sopenharmony_cibool LookupSpecialPropertyValueName(const char* name, 15051cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* result, 15061cb0ef41Sopenharmony_ci bool negate, Zone* zone) { 15071cb0ef41Sopenharmony_ci if (NameEquals(name, "Any")) { 15081cb0ef41Sopenharmony_ci if (negate) { 15091cb0ef41Sopenharmony_ci // Leave the list of character ranges empty, since the negation of 'Any' 15101cb0ef41Sopenharmony_ci // is the empty set. 15111cb0ef41Sopenharmony_ci } else { 15121cb0ef41Sopenharmony_ci result->Add(CharacterRange::Everything(), zone); 15131cb0ef41Sopenharmony_ci } 15141cb0ef41Sopenharmony_ci } else if (NameEquals(name, "ASCII")) { 15151cb0ef41Sopenharmony_ci result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint) 15161cb0ef41Sopenharmony_ci : CharacterRange::Range(0x0, 0x7F), 15171cb0ef41Sopenharmony_ci zone); 15181cb0ef41Sopenharmony_ci } else if (NameEquals(name, "Assigned")) { 15191cb0ef41Sopenharmony_ci return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", 15201cb0ef41Sopenharmony_ci !negate, result, zone); 15211cb0ef41Sopenharmony_ci } else { 15221cb0ef41Sopenharmony_ci return false; 15231cb0ef41Sopenharmony_ci } 15241cb0ef41Sopenharmony_ci return true; 15251cb0ef41Sopenharmony_ci} 15261cb0ef41Sopenharmony_ci 15271cb0ef41Sopenharmony_ci// Explicitly allowlist supported binary properties. The spec forbids supporting 15281cb0ef41Sopenharmony_ci// properties outside of this set to ensure interoperability. 15291cb0ef41Sopenharmony_cibool IsSupportedBinaryProperty(UProperty property) { 15301cb0ef41Sopenharmony_ci switch (property) { 15311cb0ef41Sopenharmony_ci case UCHAR_ALPHABETIC: 15321cb0ef41Sopenharmony_ci // 'Any' is not supported by ICU. See LookupSpecialPropertyValueName. 15331cb0ef41Sopenharmony_ci // 'ASCII' is not supported by ICU. See LookupSpecialPropertyValueName. 15341cb0ef41Sopenharmony_ci case UCHAR_ASCII_HEX_DIGIT: 15351cb0ef41Sopenharmony_ci // 'Assigned' is not supported by ICU. See LookupSpecialPropertyValueName. 15361cb0ef41Sopenharmony_ci case UCHAR_BIDI_CONTROL: 15371cb0ef41Sopenharmony_ci case UCHAR_BIDI_MIRRORED: 15381cb0ef41Sopenharmony_ci case UCHAR_CASE_IGNORABLE: 15391cb0ef41Sopenharmony_ci case UCHAR_CASED: 15401cb0ef41Sopenharmony_ci case UCHAR_CHANGES_WHEN_CASEFOLDED: 15411cb0ef41Sopenharmony_ci case UCHAR_CHANGES_WHEN_CASEMAPPED: 15421cb0ef41Sopenharmony_ci case UCHAR_CHANGES_WHEN_LOWERCASED: 15431cb0ef41Sopenharmony_ci case UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED: 15441cb0ef41Sopenharmony_ci case UCHAR_CHANGES_WHEN_TITLECASED: 15451cb0ef41Sopenharmony_ci case UCHAR_CHANGES_WHEN_UPPERCASED: 15461cb0ef41Sopenharmony_ci case UCHAR_DASH: 15471cb0ef41Sopenharmony_ci case UCHAR_DEFAULT_IGNORABLE_CODE_POINT: 15481cb0ef41Sopenharmony_ci case UCHAR_DEPRECATED: 15491cb0ef41Sopenharmony_ci case UCHAR_DIACRITIC: 15501cb0ef41Sopenharmony_ci case UCHAR_EMOJI: 15511cb0ef41Sopenharmony_ci case UCHAR_EMOJI_COMPONENT: 15521cb0ef41Sopenharmony_ci case UCHAR_EMOJI_MODIFIER_BASE: 15531cb0ef41Sopenharmony_ci case UCHAR_EMOJI_MODIFIER: 15541cb0ef41Sopenharmony_ci case UCHAR_EMOJI_PRESENTATION: 15551cb0ef41Sopenharmony_ci case UCHAR_EXTENDED_PICTOGRAPHIC: 15561cb0ef41Sopenharmony_ci case UCHAR_EXTENDER: 15571cb0ef41Sopenharmony_ci case UCHAR_GRAPHEME_BASE: 15581cb0ef41Sopenharmony_ci case UCHAR_GRAPHEME_EXTEND: 15591cb0ef41Sopenharmony_ci case UCHAR_HEX_DIGIT: 15601cb0ef41Sopenharmony_ci case UCHAR_ID_CONTINUE: 15611cb0ef41Sopenharmony_ci case UCHAR_ID_START: 15621cb0ef41Sopenharmony_ci case UCHAR_IDEOGRAPHIC: 15631cb0ef41Sopenharmony_ci case UCHAR_IDS_BINARY_OPERATOR: 15641cb0ef41Sopenharmony_ci case UCHAR_IDS_TRINARY_OPERATOR: 15651cb0ef41Sopenharmony_ci case UCHAR_JOIN_CONTROL: 15661cb0ef41Sopenharmony_ci case UCHAR_LOGICAL_ORDER_EXCEPTION: 15671cb0ef41Sopenharmony_ci case UCHAR_LOWERCASE: 15681cb0ef41Sopenharmony_ci case UCHAR_MATH: 15691cb0ef41Sopenharmony_ci case UCHAR_NONCHARACTER_CODE_POINT: 15701cb0ef41Sopenharmony_ci case UCHAR_PATTERN_SYNTAX: 15711cb0ef41Sopenharmony_ci case UCHAR_PATTERN_WHITE_SPACE: 15721cb0ef41Sopenharmony_ci case UCHAR_QUOTATION_MARK: 15731cb0ef41Sopenharmony_ci case UCHAR_RADICAL: 15741cb0ef41Sopenharmony_ci case UCHAR_REGIONAL_INDICATOR: 15751cb0ef41Sopenharmony_ci case UCHAR_S_TERM: 15761cb0ef41Sopenharmony_ci case UCHAR_SOFT_DOTTED: 15771cb0ef41Sopenharmony_ci case UCHAR_TERMINAL_PUNCTUATION: 15781cb0ef41Sopenharmony_ci case UCHAR_UNIFIED_IDEOGRAPH: 15791cb0ef41Sopenharmony_ci case UCHAR_UPPERCASE: 15801cb0ef41Sopenharmony_ci case UCHAR_VARIATION_SELECTOR: 15811cb0ef41Sopenharmony_ci case UCHAR_WHITE_SPACE: 15821cb0ef41Sopenharmony_ci case UCHAR_XID_CONTINUE: 15831cb0ef41Sopenharmony_ci case UCHAR_XID_START: 15841cb0ef41Sopenharmony_ci return true; 15851cb0ef41Sopenharmony_ci default: 15861cb0ef41Sopenharmony_ci break; 15871cb0ef41Sopenharmony_ci } 15881cb0ef41Sopenharmony_ci return false; 15891cb0ef41Sopenharmony_ci} 15901cb0ef41Sopenharmony_ci 15911cb0ef41Sopenharmony_cibool IsUnicodePropertyValueCharacter(char c) { 15921cb0ef41Sopenharmony_ci // https://tc39.github.io/proposal-regexp-unicode-property-escapes/ 15931cb0ef41Sopenharmony_ci // 15941cb0ef41Sopenharmony_ci // Note that using this to validate each parsed char is quite conservative. 15951cb0ef41Sopenharmony_ci // A possible alternative solution would be to only ensure the parsed 15961cb0ef41Sopenharmony_ci // property name/value candidate string does not contain '\0' characters and 15971cb0ef41Sopenharmony_ci // let ICU lookups trigger the final failure. 15981cb0ef41Sopenharmony_ci if ('a' <= c && c <= 'z') return true; 15991cb0ef41Sopenharmony_ci if ('A' <= c && c <= 'Z') return true; 16001cb0ef41Sopenharmony_ci if ('0' <= c && c <= '9') return true; 16011cb0ef41Sopenharmony_ci return (c == '_'); 16021cb0ef41Sopenharmony_ci} 16031cb0ef41Sopenharmony_ci 16041cb0ef41Sopenharmony_ci} // namespace 16051cb0ef41Sopenharmony_ci 16061cb0ef41Sopenharmony_citemplate <class CharT> 16071cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::ParsePropertyClassName(ZoneVector<char>* name_1, 16081cb0ef41Sopenharmony_ci ZoneVector<char>* name_2) { 16091cb0ef41Sopenharmony_ci DCHECK(name_1->empty()); 16101cb0ef41Sopenharmony_ci DCHECK(name_2->empty()); 16111cb0ef41Sopenharmony_ci // Parse the property class as follows: 16121cb0ef41Sopenharmony_ci // - In \p{name}, 'name' is interpreted 16131cb0ef41Sopenharmony_ci // - either as a general category property value name. 16141cb0ef41Sopenharmony_ci // - or as a binary property name. 16151cb0ef41Sopenharmony_ci // - In \p{name=value}, 'name' is interpreted as an enumerated property name, 16161cb0ef41Sopenharmony_ci // and 'value' is interpreted as one of the available property value names. 16171cb0ef41Sopenharmony_ci // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used. 16181cb0ef41Sopenharmony_ci // - Loose matching is not applied. 16191cb0ef41Sopenharmony_ci if (current() == '{') { 16201cb0ef41Sopenharmony_ci // Parse \p{[PropertyName=]PropertyNameValue} 16211cb0ef41Sopenharmony_ci for (Advance(); current() != '}' && current() != '='; Advance()) { 16221cb0ef41Sopenharmony_ci if (!IsUnicodePropertyValueCharacter(current())) return false; 16231cb0ef41Sopenharmony_ci if (!has_next()) return false; 16241cb0ef41Sopenharmony_ci name_1->push_back(static_cast<char>(current())); 16251cb0ef41Sopenharmony_ci } 16261cb0ef41Sopenharmony_ci if (current() == '=') { 16271cb0ef41Sopenharmony_ci for (Advance(); current() != '}'; Advance()) { 16281cb0ef41Sopenharmony_ci if (!IsUnicodePropertyValueCharacter(current())) return false; 16291cb0ef41Sopenharmony_ci if (!has_next()) return false; 16301cb0ef41Sopenharmony_ci name_2->push_back(static_cast<char>(current())); 16311cb0ef41Sopenharmony_ci } 16321cb0ef41Sopenharmony_ci name_2->push_back(0); // null-terminate string. 16331cb0ef41Sopenharmony_ci } 16341cb0ef41Sopenharmony_ci } else { 16351cb0ef41Sopenharmony_ci return false; 16361cb0ef41Sopenharmony_ci } 16371cb0ef41Sopenharmony_ci Advance(); 16381cb0ef41Sopenharmony_ci name_1->push_back(0); // null-terminate string. 16391cb0ef41Sopenharmony_ci 16401cb0ef41Sopenharmony_ci DCHECK(name_1->size() - 1 == std::strlen(name_1->data())); 16411cb0ef41Sopenharmony_ci DCHECK(name_2->empty() || name_2->size() - 1 == std::strlen(name_2->data())); 16421cb0ef41Sopenharmony_ci return true; 16431cb0ef41Sopenharmony_ci} 16441cb0ef41Sopenharmony_ci 16451cb0ef41Sopenharmony_citemplate <class CharT> 16461cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::AddPropertyClassRange( 16471cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* add_to, bool negate, 16481cb0ef41Sopenharmony_ci const ZoneVector<char>& name_1, const ZoneVector<char>& name_2) { 16491cb0ef41Sopenharmony_ci if (name_2.empty()) { 16501cb0ef41Sopenharmony_ci // First attempt to interpret as general category property value name. 16511cb0ef41Sopenharmony_ci const char* name = name_1.data(); 16521cb0ef41Sopenharmony_ci if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, 16531cb0ef41Sopenharmony_ci add_to, zone())) { 16541cb0ef41Sopenharmony_ci return true; 16551cb0ef41Sopenharmony_ci } 16561cb0ef41Sopenharmony_ci // Interpret "Any", "ASCII", and "Assigned". 16571cb0ef41Sopenharmony_ci if (LookupSpecialPropertyValueName(name, add_to, negate, zone())) { 16581cb0ef41Sopenharmony_ci return true; 16591cb0ef41Sopenharmony_ci } 16601cb0ef41Sopenharmony_ci // Then attempt to interpret as binary property name with value name 'Y'. 16611cb0ef41Sopenharmony_ci UProperty property = u_getPropertyEnum(name); 16621cb0ef41Sopenharmony_ci if (!IsSupportedBinaryProperty(property)) return false; 16631cb0ef41Sopenharmony_ci if (!IsExactPropertyAlias(name, property)) return false; 16641cb0ef41Sopenharmony_ci return LookupPropertyValueName(property, negate ? "N" : "Y", false, add_to, 16651cb0ef41Sopenharmony_ci zone()); 16661cb0ef41Sopenharmony_ci } else { 16671cb0ef41Sopenharmony_ci // Both property name and value name are specified. Attempt to interpret 16681cb0ef41Sopenharmony_ci // the property name as enumerated property. 16691cb0ef41Sopenharmony_ci const char* property_name = name_1.data(); 16701cb0ef41Sopenharmony_ci const char* value_name = name_2.data(); 16711cb0ef41Sopenharmony_ci UProperty property = u_getPropertyEnum(property_name); 16721cb0ef41Sopenharmony_ci if (!IsExactPropertyAlias(property_name, property)) return false; 16731cb0ef41Sopenharmony_ci if (property == UCHAR_GENERAL_CATEGORY) { 16741cb0ef41Sopenharmony_ci // We want to allow aggregate value names such as "Letter". 16751cb0ef41Sopenharmony_ci property = UCHAR_GENERAL_CATEGORY_MASK; 16761cb0ef41Sopenharmony_ci } else if (property != UCHAR_SCRIPT && 16771cb0ef41Sopenharmony_ci property != UCHAR_SCRIPT_EXTENSIONS) { 16781cb0ef41Sopenharmony_ci return false; 16791cb0ef41Sopenharmony_ci } 16801cb0ef41Sopenharmony_ci return LookupPropertyValueName(property, value_name, negate, add_to, 16811cb0ef41Sopenharmony_ci zone()); 16821cb0ef41Sopenharmony_ci } 16831cb0ef41Sopenharmony_ci} 16841cb0ef41Sopenharmony_ci 16851cb0ef41Sopenharmony_ci#else // V8_INTL_SUPPORT 16861cb0ef41Sopenharmony_ci 16871cb0ef41Sopenharmony_citemplate <class CharT> 16881cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::ParsePropertyClassName(ZoneVector<char>* name_1, 16891cb0ef41Sopenharmony_ci ZoneVector<char>* name_2) { 16901cb0ef41Sopenharmony_ci return false; 16911cb0ef41Sopenharmony_ci} 16921cb0ef41Sopenharmony_ci 16931cb0ef41Sopenharmony_citemplate <class CharT> 16941cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::AddPropertyClassRange( 16951cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* add_to, bool negate, 16961cb0ef41Sopenharmony_ci const ZoneVector<char>& name_1, const ZoneVector<char>& name_2) { 16971cb0ef41Sopenharmony_ci return false; 16981cb0ef41Sopenharmony_ci} 16991cb0ef41Sopenharmony_ci 17001cb0ef41Sopenharmony_ci#endif // V8_INTL_SUPPORT 17011cb0ef41Sopenharmony_ci 17021cb0ef41Sopenharmony_citemplate <class CharT> 17031cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::ParseUnlimitedLengthHexNumber(int max_value, 17041cb0ef41Sopenharmony_ci base::uc32* value) { 17051cb0ef41Sopenharmony_ci base::uc32 x = 0; 17061cb0ef41Sopenharmony_ci int d = base::HexValue(current()); 17071cb0ef41Sopenharmony_ci if (d < 0) { 17081cb0ef41Sopenharmony_ci return false; 17091cb0ef41Sopenharmony_ci } 17101cb0ef41Sopenharmony_ci while (d >= 0) { 17111cb0ef41Sopenharmony_ci x = x * 16 + d; 17121cb0ef41Sopenharmony_ci if (x > static_cast<base::uc32>(max_value)) { 17131cb0ef41Sopenharmony_ci return false; 17141cb0ef41Sopenharmony_ci } 17151cb0ef41Sopenharmony_ci Advance(); 17161cb0ef41Sopenharmony_ci d = base::HexValue(current()); 17171cb0ef41Sopenharmony_ci } 17181cb0ef41Sopenharmony_ci *value = x; 17191cb0ef41Sopenharmony_ci return true; 17201cb0ef41Sopenharmony_ci} 17211cb0ef41Sopenharmony_ci 17221cb0ef41Sopenharmony_ci// https://tc39.es/ecma262/#prod-CharacterEscape 17231cb0ef41Sopenharmony_citemplate <class CharT> 17241cb0ef41Sopenharmony_cibase::uc32 RegExpParserImpl<CharT>::ParseCharacterEscape( 17251cb0ef41Sopenharmony_ci InClassEscapeState in_class_escape_state, 17261cb0ef41Sopenharmony_ci bool* is_escaped_unicode_character) { 17271cb0ef41Sopenharmony_ci DCHECK_EQ('\\', current()); 17281cb0ef41Sopenharmony_ci DCHECK(has_next() && !IsSpecialClassEscape(Next())); 17291cb0ef41Sopenharmony_ci 17301cb0ef41Sopenharmony_ci Advance(); 17311cb0ef41Sopenharmony_ci 17321cb0ef41Sopenharmony_ci const base::uc32 c = current(); 17331cb0ef41Sopenharmony_ci switch (c) { 17341cb0ef41Sopenharmony_ci // CharacterEscape :: 17351cb0ef41Sopenharmony_ci // ControlEscape :: one of 17361cb0ef41Sopenharmony_ci // f n r t v 17371cb0ef41Sopenharmony_ci case 'f': 17381cb0ef41Sopenharmony_ci Advance(); 17391cb0ef41Sopenharmony_ci return '\f'; 17401cb0ef41Sopenharmony_ci case 'n': 17411cb0ef41Sopenharmony_ci Advance(); 17421cb0ef41Sopenharmony_ci return '\n'; 17431cb0ef41Sopenharmony_ci case 'r': 17441cb0ef41Sopenharmony_ci Advance(); 17451cb0ef41Sopenharmony_ci return '\r'; 17461cb0ef41Sopenharmony_ci case 't': 17471cb0ef41Sopenharmony_ci Advance(); 17481cb0ef41Sopenharmony_ci return '\t'; 17491cb0ef41Sopenharmony_ci case 'v': 17501cb0ef41Sopenharmony_ci Advance(); 17511cb0ef41Sopenharmony_ci return '\v'; 17521cb0ef41Sopenharmony_ci // CharacterEscape :: 17531cb0ef41Sopenharmony_ci // c ControlLetter 17541cb0ef41Sopenharmony_ci case 'c': { 17551cb0ef41Sopenharmony_ci base::uc32 controlLetter = Next(); 17561cb0ef41Sopenharmony_ci base::uc32 letter = controlLetter & ~('A' ^ 'a'); 17571cb0ef41Sopenharmony_ci if (letter >= 'A' && letter <= 'Z') { 17581cb0ef41Sopenharmony_ci Advance(2); 17591cb0ef41Sopenharmony_ci // Control letters mapped to ASCII control characters in the range 17601cb0ef41Sopenharmony_ci // 0x00-0x1F. 17611cb0ef41Sopenharmony_ci return controlLetter & 0x1F; 17621cb0ef41Sopenharmony_ci } 17631cb0ef41Sopenharmony_ci if (unicode()) { 17641cb0ef41Sopenharmony_ci // With /u, invalid escapes are not treated as identity escapes. 17651cb0ef41Sopenharmony_ci ReportError(RegExpError::kInvalidUnicodeEscape); 17661cb0ef41Sopenharmony_ci return 0; 17671cb0ef41Sopenharmony_ci } 17681cb0ef41Sopenharmony_ci if (in_class_escape_state == InClassEscapeState::kInClass) { 17691cb0ef41Sopenharmony_ci // Inside a character class, we also accept digits and underscore as 17701cb0ef41Sopenharmony_ci // control characters, unless with /u. See Annex B: 17711cb0ef41Sopenharmony_ci // ES#prod-annexB-ClassControlLetter 17721cb0ef41Sopenharmony_ci if ((controlLetter >= '0' && controlLetter <= '9') || 17731cb0ef41Sopenharmony_ci controlLetter == '_') { 17741cb0ef41Sopenharmony_ci Advance(2); 17751cb0ef41Sopenharmony_ci return controlLetter & 0x1F; 17761cb0ef41Sopenharmony_ci } 17771cb0ef41Sopenharmony_ci } 17781cb0ef41Sopenharmony_ci // We match JSC in reading the backslash as a literal 17791cb0ef41Sopenharmony_ci // character instead of as starting an escape. 17801cb0ef41Sopenharmony_ci return '\\'; 17811cb0ef41Sopenharmony_ci } 17821cb0ef41Sopenharmony_ci // CharacterEscape :: 17831cb0ef41Sopenharmony_ci // 0 [lookahead ∉ DecimalDigit] 17841cb0ef41Sopenharmony_ci // [~UnicodeMode] LegacyOctalEscapeSequence 17851cb0ef41Sopenharmony_ci case '0': 17861cb0ef41Sopenharmony_ci // \0 is interpreted as NUL if not followed by another digit. 17871cb0ef41Sopenharmony_ci if (Next() < '0' || Next() > '9') { 17881cb0ef41Sopenharmony_ci Advance(); 17891cb0ef41Sopenharmony_ci return 0; 17901cb0ef41Sopenharmony_ci } 17911cb0ef41Sopenharmony_ci V8_FALLTHROUGH; 17921cb0ef41Sopenharmony_ci case '1': 17931cb0ef41Sopenharmony_ci case '2': 17941cb0ef41Sopenharmony_ci case '3': 17951cb0ef41Sopenharmony_ci case '4': 17961cb0ef41Sopenharmony_ci case '5': 17971cb0ef41Sopenharmony_ci case '6': 17981cb0ef41Sopenharmony_ci case '7': 17991cb0ef41Sopenharmony_ci // For compatibility, we interpret a decimal escape that isn't 18001cb0ef41Sopenharmony_ci // a back reference (and therefore either \0 or not valid according 18011cb0ef41Sopenharmony_ci // to the specification) as a 1..3 digit octal character code. 18021cb0ef41Sopenharmony_ci // ES#prod-annexB-LegacyOctalEscapeSequence 18031cb0ef41Sopenharmony_ci if (unicode()) { 18041cb0ef41Sopenharmony_ci // With /u, decimal escape is not interpreted as octal character code. 18051cb0ef41Sopenharmony_ci ReportError(RegExpError::kInvalidClassEscape); 18061cb0ef41Sopenharmony_ci return 0; 18071cb0ef41Sopenharmony_ci } 18081cb0ef41Sopenharmony_ci return ParseOctalLiteral(); 18091cb0ef41Sopenharmony_ci // CharacterEscape :: 18101cb0ef41Sopenharmony_ci // HexEscapeSequence 18111cb0ef41Sopenharmony_ci case 'x': { 18121cb0ef41Sopenharmony_ci Advance(); 18131cb0ef41Sopenharmony_ci base::uc32 value; 18141cb0ef41Sopenharmony_ci if (ParseHexEscape(2, &value)) return value; 18151cb0ef41Sopenharmony_ci if (unicode()) { 18161cb0ef41Sopenharmony_ci // With /u, invalid escapes are not treated as identity escapes. 18171cb0ef41Sopenharmony_ci ReportError(RegExpError::kInvalidEscape); 18181cb0ef41Sopenharmony_ci return 0; 18191cb0ef41Sopenharmony_ci } 18201cb0ef41Sopenharmony_ci // If \x is not followed by a two-digit hexadecimal, treat it 18211cb0ef41Sopenharmony_ci // as an identity escape. 18221cb0ef41Sopenharmony_ci return 'x'; 18231cb0ef41Sopenharmony_ci } 18241cb0ef41Sopenharmony_ci // CharacterEscape :: 18251cb0ef41Sopenharmony_ci // RegExpUnicodeEscapeSequence [?UnicodeMode] 18261cb0ef41Sopenharmony_ci case 'u': { 18271cb0ef41Sopenharmony_ci Advance(); 18281cb0ef41Sopenharmony_ci base::uc32 value; 18291cb0ef41Sopenharmony_ci if (ParseUnicodeEscape(&value)) { 18301cb0ef41Sopenharmony_ci *is_escaped_unicode_character = true; 18311cb0ef41Sopenharmony_ci return value; 18321cb0ef41Sopenharmony_ci } 18331cb0ef41Sopenharmony_ci if (unicode()) { 18341cb0ef41Sopenharmony_ci // With /u, invalid escapes are not treated as identity escapes. 18351cb0ef41Sopenharmony_ci ReportError(RegExpError::kInvalidUnicodeEscape); 18361cb0ef41Sopenharmony_ci return 0; 18371cb0ef41Sopenharmony_ci } 18381cb0ef41Sopenharmony_ci // If \u is not followed by a two-digit hexadecimal, treat it 18391cb0ef41Sopenharmony_ci // as an identity escape. 18401cb0ef41Sopenharmony_ci return 'u'; 18411cb0ef41Sopenharmony_ci } 18421cb0ef41Sopenharmony_ci default: 18431cb0ef41Sopenharmony_ci break; 18441cb0ef41Sopenharmony_ci } 18451cb0ef41Sopenharmony_ci 18461cb0ef41Sopenharmony_ci // CharacterEscape :: 18471cb0ef41Sopenharmony_ci // IdentityEscape[?UnicodeMode, ?N] 18481cb0ef41Sopenharmony_ci // 18491cb0ef41Sopenharmony_ci // * With /u, no identity escapes except for syntax characters are 18501cb0ef41Sopenharmony_ci // allowed. 18511cb0ef41Sopenharmony_ci // * Without /u: 18521cb0ef41Sopenharmony_ci // * '\c' is not an IdentityEscape. 18531cb0ef41Sopenharmony_ci // * '\k' is not an IdentityEscape when named captures exist. 18541cb0ef41Sopenharmony_ci // * Otherwise, all identity escapes are allowed. 18551cb0ef41Sopenharmony_ci if (unicode()) { 18561cb0ef41Sopenharmony_ci if (!IsSyntaxCharacterOrSlash(c)) { 18571cb0ef41Sopenharmony_ci ReportError(RegExpError::kInvalidEscape); 18581cb0ef41Sopenharmony_ci return 0; 18591cb0ef41Sopenharmony_ci } 18601cb0ef41Sopenharmony_ci Advance(); 18611cb0ef41Sopenharmony_ci return c; 18621cb0ef41Sopenharmony_ci } 18631cb0ef41Sopenharmony_ci DCHECK(!unicode()); 18641cb0ef41Sopenharmony_ci if (c == 'c') { 18651cb0ef41Sopenharmony_ci ReportError(RegExpError::kInvalidEscape); 18661cb0ef41Sopenharmony_ci return 0; 18671cb0ef41Sopenharmony_ci } 18681cb0ef41Sopenharmony_ci Advance(); 18691cb0ef41Sopenharmony_ci // Note: It's important to Advance before the HasNamedCaptures call s.t. we 18701cb0ef41Sopenharmony_ci // don't start scanning in the middle of an escape. 18711cb0ef41Sopenharmony_ci if (c == 'k' && HasNamedCaptures(in_class_escape_state)) { 18721cb0ef41Sopenharmony_ci ReportError(RegExpError::kInvalidEscape); 18731cb0ef41Sopenharmony_ci return 0; 18741cb0ef41Sopenharmony_ci } 18751cb0ef41Sopenharmony_ci return c; 18761cb0ef41Sopenharmony_ci} 18771cb0ef41Sopenharmony_ci 18781cb0ef41Sopenharmony_ci// https://tc39.es/ecma262/#prod-ClassEscape 18791cb0ef41Sopenharmony_citemplate <class CharT> 18801cb0ef41Sopenharmony_civoid RegExpParserImpl<CharT>::ParseClassEscape( 18811cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* ranges, Zone* zone, 18821cb0ef41Sopenharmony_ci bool add_unicode_case_equivalents, base::uc32* char_out, 18831cb0ef41Sopenharmony_ci bool* is_class_escape) { 18841cb0ef41Sopenharmony_ci *is_class_escape = false; 18851cb0ef41Sopenharmony_ci 18861cb0ef41Sopenharmony_ci if (current() != '\\') { 18871cb0ef41Sopenharmony_ci // Not a ClassEscape. 18881cb0ef41Sopenharmony_ci *char_out = current(); 18891cb0ef41Sopenharmony_ci Advance(); 18901cb0ef41Sopenharmony_ci return; 18911cb0ef41Sopenharmony_ci } 18921cb0ef41Sopenharmony_ci 18931cb0ef41Sopenharmony_ci const base::uc32 next = Next(); 18941cb0ef41Sopenharmony_ci switch (next) { 18951cb0ef41Sopenharmony_ci case 'b': 18961cb0ef41Sopenharmony_ci *char_out = '\b'; 18971cb0ef41Sopenharmony_ci Advance(2); 18981cb0ef41Sopenharmony_ci return; 18991cb0ef41Sopenharmony_ci case '-': 19001cb0ef41Sopenharmony_ci if (unicode()) { 19011cb0ef41Sopenharmony_ci *char_out = next; 19021cb0ef41Sopenharmony_ci Advance(2); 19031cb0ef41Sopenharmony_ci return; 19041cb0ef41Sopenharmony_ci } 19051cb0ef41Sopenharmony_ci break; 19061cb0ef41Sopenharmony_ci case kEndMarker: 19071cb0ef41Sopenharmony_ci ReportError(RegExpError::kEscapeAtEndOfPattern); 19081cb0ef41Sopenharmony_ci return; 19091cb0ef41Sopenharmony_ci default: 19101cb0ef41Sopenharmony_ci break; 19111cb0ef41Sopenharmony_ci } 19121cb0ef41Sopenharmony_ci 19131cb0ef41Sopenharmony_ci static constexpr InClassEscapeState kInClassEscape = 19141cb0ef41Sopenharmony_ci InClassEscapeState::kInClass; 19151cb0ef41Sopenharmony_ci *is_class_escape = TryParseCharacterClassEscape( 19161cb0ef41Sopenharmony_ci next, kInClassEscape, ranges, zone, add_unicode_case_equivalents); 19171cb0ef41Sopenharmony_ci if (*is_class_escape) return; 19181cb0ef41Sopenharmony_ci 19191cb0ef41Sopenharmony_ci bool dummy = false; // Unused. 19201cb0ef41Sopenharmony_ci *char_out = ParseCharacterEscape(kInClassEscape, &dummy); 19211cb0ef41Sopenharmony_ci} 19221cb0ef41Sopenharmony_ci 19231cb0ef41Sopenharmony_ci// https://tc39.es/ecma262/#prod-CharacterClassEscape 19241cb0ef41Sopenharmony_citemplate <class CharT> 19251cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::TryParseCharacterClassEscape( 19261cb0ef41Sopenharmony_ci base::uc32 next, InClassEscapeState in_class_escape_state, 19271cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* ranges, Zone* zone, 19281cb0ef41Sopenharmony_ci bool add_unicode_case_equivalents) { 19291cb0ef41Sopenharmony_ci DCHECK_EQ(current(), '\\'); 19301cb0ef41Sopenharmony_ci DCHECK_EQ(Next(), next); 19311cb0ef41Sopenharmony_ci 19321cb0ef41Sopenharmony_ci switch (next) { 19331cb0ef41Sopenharmony_ci case 'd': 19341cb0ef41Sopenharmony_ci case 'D': 19351cb0ef41Sopenharmony_ci case 's': 19361cb0ef41Sopenharmony_ci case 'S': 19371cb0ef41Sopenharmony_ci case 'w': 19381cb0ef41Sopenharmony_ci case 'W': 19391cb0ef41Sopenharmony_ci CharacterRange::AddClassEscape(static_cast<StandardCharacterSet>(next), 19401cb0ef41Sopenharmony_ci ranges, add_unicode_case_equivalents, 19411cb0ef41Sopenharmony_ci zone); 19421cb0ef41Sopenharmony_ci Advance(2); 19431cb0ef41Sopenharmony_ci return true; 19441cb0ef41Sopenharmony_ci case 'p': 19451cb0ef41Sopenharmony_ci case 'P': { 19461cb0ef41Sopenharmony_ci if (!unicode()) return false; 19471cb0ef41Sopenharmony_ci bool negate = next == 'P'; 19481cb0ef41Sopenharmony_ci Advance(2); 19491cb0ef41Sopenharmony_ci ZoneVector<char> name_1(zone); 19501cb0ef41Sopenharmony_ci ZoneVector<char> name_2(zone); 19511cb0ef41Sopenharmony_ci if (!ParsePropertyClassName(&name_1, &name_2) || 19521cb0ef41Sopenharmony_ci !AddPropertyClassRange(ranges, negate, name_1, name_2)) { 19531cb0ef41Sopenharmony_ci ReportError(in_class_escape_state == InClassEscapeState::kInClass 19541cb0ef41Sopenharmony_ci ? RegExpError::kInvalidClassPropertyName 19551cb0ef41Sopenharmony_ci : RegExpError::kInvalidPropertyName); 19561cb0ef41Sopenharmony_ci } 19571cb0ef41Sopenharmony_ci return true; 19581cb0ef41Sopenharmony_ci } 19591cb0ef41Sopenharmony_ci default: 19601cb0ef41Sopenharmony_ci return false; 19611cb0ef41Sopenharmony_ci } 19621cb0ef41Sopenharmony_ci} 19631cb0ef41Sopenharmony_ci 19641cb0ef41Sopenharmony_citemplate <class CharT> 19651cb0ef41Sopenharmony_ciRegExpTree* RegExpParserImpl<CharT>::ParseCharacterClass( 19661cb0ef41Sopenharmony_ci const RegExpBuilder* builder) { 19671cb0ef41Sopenharmony_ci DCHECK_EQ(current(), '['); 19681cb0ef41Sopenharmony_ci Advance(); 19691cb0ef41Sopenharmony_ci bool is_negated = false; 19701cb0ef41Sopenharmony_ci if (current() == '^') { 19711cb0ef41Sopenharmony_ci is_negated = true; 19721cb0ef41Sopenharmony_ci Advance(); 19731cb0ef41Sopenharmony_ci } 19741cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* ranges = 19751cb0ef41Sopenharmony_ci zone()->template New<ZoneList<CharacterRange>>(2, zone()); 19761cb0ef41Sopenharmony_ci bool add_unicode_case_equivalents = unicode() && builder->ignore_case(); 19771cb0ef41Sopenharmony_ci while (has_more() && current() != ']') { 19781cb0ef41Sopenharmony_ci base::uc32 char_1, char_2; 19791cb0ef41Sopenharmony_ci bool is_class_1, is_class_2; 19801cb0ef41Sopenharmony_ci ParseClassEscape(ranges, zone(), add_unicode_case_equivalents, &char_1, 19811cb0ef41Sopenharmony_ci &is_class_1 CHECK_FAILED); 19821cb0ef41Sopenharmony_ci if (current() == '-') { 19831cb0ef41Sopenharmony_ci Advance(); 19841cb0ef41Sopenharmony_ci if (current() == kEndMarker) { 19851cb0ef41Sopenharmony_ci // If we reach the end we break out of the loop and let the 19861cb0ef41Sopenharmony_ci // following code report an error. 19871cb0ef41Sopenharmony_ci break; 19881cb0ef41Sopenharmony_ci } else if (current() == ']') { 19891cb0ef41Sopenharmony_ci if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone()); 19901cb0ef41Sopenharmony_ci ranges->Add(CharacterRange::Singleton('-'), zone()); 19911cb0ef41Sopenharmony_ci break; 19921cb0ef41Sopenharmony_ci } 19931cb0ef41Sopenharmony_ci ParseClassEscape(ranges, zone(), add_unicode_case_equivalents, &char_2, 19941cb0ef41Sopenharmony_ci &is_class_2 CHECK_FAILED); 19951cb0ef41Sopenharmony_ci if (is_class_1 || is_class_2) { 19961cb0ef41Sopenharmony_ci // Either end is an escaped character class. Treat the '-' verbatim. 19971cb0ef41Sopenharmony_ci if (unicode()) { 19981cb0ef41Sopenharmony_ci // ES2015 21.2.2.15.1 step 1. 19991cb0ef41Sopenharmony_ci return ReportError(RegExpError::kInvalidCharacterClass); 20001cb0ef41Sopenharmony_ci } 20011cb0ef41Sopenharmony_ci if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone()); 20021cb0ef41Sopenharmony_ci ranges->Add(CharacterRange::Singleton('-'), zone()); 20031cb0ef41Sopenharmony_ci if (!is_class_2) ranges->Add(CharacterRange::Singleton(char_2), zone()); 20041cb0ef41Sopenharmony_ci continue; 20051cb0ef41Sopenharmony_ci } 20061cb0ef41Sopenharmony_ci // ES2015 21.2.2.15.1 step 6. 20071cb0ef41Sopenharmony_ci if (char_1 > char_2) { 20081cb0ef41Sopenharmony_ci return ReportError(RegExpError::kOutOfOrderCharacterClass); 20091cb0ef41Sopenharmony_ci } 20101cb0ef41Sopenharmony_ci ranges->Add(CharacterRange::Range(char_1, char_2), zone()); 20111cb0ef41Sopenharmony_ci } else { 20121cb0ef41Sopenharmony_ci if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone()); 20131cb0ef41Sopenharmony_ci } 20141cb0ef41Sopenharmony_ci } 20151cb0ef41Sopenharmony_ci if (!has_more()) { 20161cb0ef41Sopenharmony_ci return ReportError(RegExpError::kUnterminatedCharacterClass); 20171cb0ef41Sopenharmony_ci } 20181cb0ef41Sopenharmony_ci Advance(); 20191cb0ef41Sopenharmony_ci RegExpCharacterClass::CharacterClassFlags character_class_flags; 20201cb0ef41Sopenharmony_ci if (is_negated) character_class_flags = RegExpCharacterClass::NEGATED; 20211cb0ef41Sopenharmony_ci return zone()->template New<RegExpCharacterClass>(zone(), ranges, 20221cb0ef41Sopenharmony_ci character_class_flags); 20231cb0ef41Sopenharmony_ci} 20241cb0ef41Sopenharmony_ci 20251cb0ef41Sopenharmony_ci#undef CHECK_FAILED 20261cb0ef41Sopenharmony_ci 20271cb0ef41Sopenharmony_citemplate <class CharT> 20281cb0ef41Sopenharmony_cibool RegExpParserImpl<CharT>::Parse(RegExpCompileData* result) { 20291cb0ef41Sopenharmony_ci DCHECK_NOT_NULL(result); 20301cb0ef41Sopenharmony_ci RegExpTree* tree = ParsePattern(); 20311cb0ef41Sopenharmony_ci 20321cb0ef41Sopenharmony_ci if (failed()) { 20331cb0ef41Sopenharmony_ci DCHECK_NULL(tree); 20341cb0ef41Sopenharmony_ci DCHECK_NE(error_, RegExpError::kNone); 20351cb0ef41Sopenharmony_ci result->error = error_; 20361cb0ef41Sopenharmony_ci result->error_pos = error_pos_; 20371cb0ef41Sopenharmony_ci return false; 20381cb0ef41Sopenharmony_ci } 20391cb0ef41Sopenharmony_ci 20401cb0ef41Sopenharmony_ci DCHECK_NOT_NULL(tree); 20411cb0ef41Sopenharmony_ci DCHECK_EQ(error_, RegExpError::kNone); 20421cb0ef41Sopenharmony_ci if (FLAG_trace_regexp_parser) { 20431cb0ef41Sopenharmony_ci StdoutStream os; 20441cb0ef41Sopenharmony_ci tree->Print(os, zone()); 20451cb0ef41Sopenharmony_ci os << "\n"; 20461cb0ef41Sopenharmony_ci } 20471cb0ef41Sopenharmony_ci 20481cb0ef41Sopenharmony_ci result->tree = tree; 20491cb0ef41Sopenharmony_ci const int capture_count = captures_started(); 20501cb0ef41Sopenharmony_ci result->simple = tree->IsAtom() && simple() && capture_count == 0; 20511cb0ef41Sopenharmony_ci result->contains_anchor = contains_anchor(); 20521cb0ef41Sopenharmony_ci result->capture_count = capture_count; 20531cb0ef41Sopenharmony_ci result->named_captures = GetNamedCaptures(); 20541cb0ef41Sopenharmony_ci return true; 20551cb0ef41Sopenharmony_ci} 20561cb0ef41Sopenharmony_ci 20571cb0ef41Sopenharmony_civoid RegExpBuilder::AddLeadSurrogate(base::uc16 lead_surrogate) { 20581cb0ef41Sopenharmony_ci DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate)); 20591cb0ef41Sopenharmony_ci FlushPendingSurrogate(); 20601cb0ef41Sopenharmony_ci // Hold onto the lead surrogate, waiting for a trail surrogate to follow. 20611cb0ef41Sopenharmony_ci pending_surrogate_ = lead_surrogate; 20621cb0ef41Sopenharmony_ci} 20631cb0ef41Sopenharmony_ci 20641cb0ef41Sopenharmony_civoid RegExpBuilder::AddTrailSurrogate(base::uc16 trail_surrogate) { 20651cb0ef41Sopenharmony_ci DCHECK(unibrow::Utf16::IsTrailSurrogate(trail_surrogate)); 20661cb0ef41Sopenharmony_ci if (pending_surrogate_ != kNoPendingSurrogate) { 20671cb0ef41Sopenharmony_ci base::uc16 lead_surrogate = pending_surrogate_; 20681cb0ef41Sopenharmony_ci pending_surrogate_ = kNoPendingSurrogate; 20691cb0ef41Sopenharmony_ci DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate)); 20701cb0ef41Sopenharmony_ci base::uc32 combined = 20711cb0ef41Sopenharmony_ci unibrow::Utf16::CombineSurrogatePair(lead_surrogate, trail_surrogate); 20721cb0ef41Sopenharmony_ci if (NeedsDesugaringForIgnoreCase(combined)) { 20731cb0ef41Sopenharmony_ci AddCharacterClassForDesugaring(combined); 20741cb0ef41Sopenharmony_ci } else { 20751cb0ef41Sopenharmony_ci ZoneList<base::uc16> surrogate_pair(2, zone()); 20761cb0ef41Sopenharmony_ci surrogate_pair.Add(lead_surrogate, zone()); 20771cb0ef41Sopenharmony_ci surrogate_pair.Add(trail_surrogate, zone()); 20781cb0ef41Sopenharmony_ci RegExpAtom* atom = 20791cb0ef41Sopenharmony_ci zone()->New<RegExpAtom>(surrogate_pair.ToConstVector()); 20801cb0ef41Sopenharmony_ci AddAtom(atom); 20811cb0ef41Sopenharmony_ci } 20821cb0ef41Sopenharmony_ci } else { 20831cb0ef41Sopenharmony_ci pending_surrogate_ = trail_surrogate; 20841cb0ef41Sopenharmony_ci FlushPendingSurrogate(); 20851cb0ef41Sopenharmony_ci } 20861cb0ef41Sopenharmony_ci} 20871cb0ef41Sopenharmony_ci 20881cb0ef41Sopenharmony_civoid RegExpBuilder::FlushPendingSurrogate() { 20891cb0ef41Sopenharmony_ci if (pending_surrogate_ != kNoPendingSurrogate) { 20901cb0ef41Sopenharmony_ci DCHECK(unicode()); 20911cb0ef41Sopenharmony_ci base::uc32 c = pending_surrogate_; 20921cb0ef41Sopenharmony_ci pending_surrogate_ = kNoPendingSurrogate; 20931cb0ef41Sopenharmony_ci AddCharacterClassForDesugaring(c); 20941cb0ef41Sopenharmony_ci } 20951cb0ef41Sopenharmony_ci} 20961cb0ef41Sopenharmony_ci 20971cb0ef41Sopenharmony_civoid RegExpBuilder::FlushCharacters() { 20981cb0ef41Sopenharmony_ci FlushPendingSurrogate(); 20991cb0ef41Sopenharmony_ci pending_empty_ = false; 21001cb0ef41Sopenharmony_ci if (characters_ != nullptr) { 21011cb0ef41Sopenharmony_ci RegExpTree* atom = zone()->New<RegExpAtom>(characters_->ToConstVector()); 21021cb0ef41Sopenharmony_ci characters_ = nullptr; 21031cb0ef41Sopenharmony_ci text_.emplace_back(atom); 21041cb0ef41Sopenharmony_ci LAST(ADD_ATOM); 21051cb0ef41Sopenharmony_ci } 21061cb0ef41Sopenharmony_ci} 21071cb0ef41Sopenharmony_ci 21081cb0ef41Sopenharmony_civoid RegExpBuilder::FlushText() { 21091cb0ef41Sopenharmony_ci FlushCharacters(); 21101cb0ef41Sopenharmony_ci size_t num_text = text_.size(); 21111cb0ef41Sopenharmony_ci if (num_text == 0) { 21121cb0ef41Sopenharmony_ci return; 21131cb0ef41Sopenharmony_ci } else if (num_text == 1) { 21141cb0ef41Sopenharmony_ci terms_.emplace_back(text_.back()); 21151cb0ef41Sopenharmony_ci } else { 21161cb0ef41Sopenharmony_ci RegExpText* text = zone()->New<RegExpText>(zone()); 21171cb0ef41Sopenharmony_ci for (size_t i = 0; i < num_text; i++) { 21181cb0ef41Sopenharmony_ci text_[i]->AppendToText(text, zone()); 21191cb0ef41Sopenharmony_ci } 21201cb0ef41Sopenharmony_ci terms_.emplace_back(text); 21211cb0ef41Sopenharmony_ci } 21221cb0ef41Sopenharmony_ci text_.clear(); 21231cb0ef41Sopenharmony_ci} 21241cb0ef41Sopenharmony_ci 21251cb0ef41Sopenharmony_civoid RegExpBuilder::AddCharacter(base::uc16 c) { 21261cb0ef41Sopenharmony_ci FlushPendingSurrogate(); 21271cb0ef41Sopenharmony_ci pending_empty_ = false; 21281cb0ef41Sopenharmony_ci if (NeedsDesugaringForIgnoreCase(c)) { 21291cb0ef41Sopenharmony_ci AddCharacterClassForDesugaring(c); 21301cb0ef41Sopenharmony_ci } else { 21311cb0ef41Sopenharmony_ci if (characters_ == nullptr) { 21321cb0ef41Sopenharmony_ci characters_ = zone()->New<ZoneList<base::uc16>>(4, zone()); 21331cb0ef41Sopenharmony_ci } 21341cb0ef41Sopenharmony_ci characters_->Add(c, zone()); 21351cb0ef41Sopenharmony_ci LAST(ADD_CHAR); 21361cb0ef41Sopenharmony_ci } 21371cb0ef41Sopenharmony_ci} 21381cb0ef41Sopenharmony_ci 21391cb0ef41Sopenharmony_civoid RegExpBuilder::AddUnicodeCharacter(base::uc32 c) { 21401cb0ef41Sopenharmony_ci if (c > static_cast<base::uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { 21411cb0ef41Sopenharmony_ci DCHECK(unicode()); 21421cb0ef41Sopenharmony_ci AddLeadSurrogate(unibrow::Utf16::LeadSurrogate(c)); 21431cb0ef41Sopenharmony_ci AddTrailSurrogate(unibrow::Utf16::TrailSurrogate(c)); 21441cb0ef41Sopenharmony_ci } else if (unicode() && unibrow::Utf16::IsLeadSurrogate(c)) { 21451cb0ef41Sopenharmony_ci AddLeadSurrogate(c); 21461cb0ef41Sopenharmony_ci } else if (unicode() && unibrow::Utf16::IsTrailSurrogate(c)) { 21471cb0ef41Sopenharmony_ci AddTrailSurrogate(c); 21481cb0ef41Sopenharmony_ci } else { 21491cb0ef41Sopenharmony_ci AddCharacter(static_cast<base::uc16>(c)); 21501cb0ef41Sopenharmony_ci } 21511cb0ef41Sopenharmony_ci} 21521cb0ef41Sopenharmony_ci 21531cb0ef41Sopenharmony_civoid RegExpBuilder::AddEscapedUnicodeCharacter(base::uc32 character) { 21541cb0ef41Sopenharmony_ci // A lead or trail surrogate parsed via escape sequence will not 21551cb0ef41Sopenharmony_ci // pair up with any preceding lead or following trail surrogate. 21561cb0ef41Sopenharmony_ci FlushPendingSurrogate(); 21571cb0ef41Sopenharmony_ci AddUnicodeCharacter(character); 21581cb0ef41Sopenharmony_ci FlushPendingSurrogate(); 21591cb0ef41Sopenharmony_ci} 21601cb0ef41Sopenharmony_ci 21611cb0ef41Sopenharmony_civoid RegExpBuilder::AddEmpty() { pending_empty_ = true; } 21621cb0ef41Sopenharmony_ci 21631cb0ef41Sopenharmony_civoid RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) { 21641cb0ef41Sopenharmony_ci if (NeedsDesugaringForUnicode(cc)) { 21651cb0ef41Sopenharmony_ci // With /u, character class needs to be desugared, so it 21661cb0ef41Sopenharmony_ci // must be a standalone term instead of being part of a RegExpText. 21671cb0ef41Sopenharmony_ci AddTerm(cc); 21681cb0ef41Sopenharmony_ci } else { 21691cb0ef41Sopenharmony_ci AddAtom(cc); 21701cb0ef41Sopenharmony_ci } 21711cb0ef41Sopenharmony_ci} 21721cb0ef41Sopenharmony_ci 21731cb0ef41Sopenharmony_civoid RegExpBuilder::AddCharacterClassForDesugaring(base::uc32 c) { 21741cb0ef41Sopenharmony_ci AddTerm(zone()->New<RegExpCharacterClass>( 21751cb0ef41Sopenharmony_ci zone(), CharacterRange::List(zone(), CharacterRange::Singleton(c)))); 21761cb0ef41Sopenharmony_ci} 21771cb0ef41Sopenharmony_ci 21781cb0ef41Sopenharmony_civoid RegExpBuilder::AddAtom(RegExpTree* term) { 21791cb0ef41Sopenharmony_ci if (term->IsEmpty()) { 21801cb0ef41Sopenharmony_ci AddEmpty(); 21811cb0ef41Sopenharmony_ci return; 21821cb0ef41Sopenharmony_ci } 21831cb0ef41Sopenharmony_ci if (term->IsTextElement()) { 21841cb0ef41Sopenharmony_ci FlushCharacters(); 21851cb0ef41Sopenharmony_ci text_.emplace_back(term); 21861cb0ef41Sopenharmony_ci } else { 21871cb0ef41Sopenharmony_ci FlushText(); 21881cb0ef41Sopenharmony_ci terms_.emplace_back(term); 21891cb0ef41Sopenharmony_ci } 21901cb0ef41Sopenharmony_ci LAST(ADD_ATOM); 21911cb0ef41Sopenharmony_ci} 21921cb0ef41Sopenharmony_ci 21931cb0ef41Sopenharmony_civoid RegExpBuilder::AddTerm(RegExpTree* term) { 21941cb0ef41Sopenharmony_ci FlushText(); 21951cb0ef41Sopenharmony_ci terms_.emplace_back(term); 21961cb0ef41Sopenharmony_ci LAST(ADD_ATOM); 21971cb0ef41Sopenharmony_ci} 21981cb0ef41Sopenharmony_ci 21991cb0ef41Sopenharmony_civoid RegExpBuilder::AddAssertion(RegExpTree* assert) { 22001cb0ef41Sopenharmony_ci FlushText(); 22011cb0ef41Sopenharmony_ci terms_.emplace_back(assert); 22021cb0ef41Sopenharmony_ci LAST(ADD_ASSERT); 22031cb0ef41Sopenharmony_ci} 22041cb0ef41Sopenharmony_ci 22051cb0ef41Sopenharmony_civoid RegExpBuilder::NewAlternative() { FlushTerms(); } 22061cb0ef41Sopenharmony_ci 22071cb0ef41Sopenharmony_civoid RegExpBuilder::FlushTerms() { 22081cb0ef41Sopenharmony_ci FlushText(); 22091cb0ef41Sopenharmony_ci size_t num_terms = terms_.size(); 22101cb0ef41Sopenharmony_ci RegExpTree* alternative; 22111cb0ef41Sopenharmony_ci if (num_terms == 0) { 22121cb0ef41Sopenharmony_ci alternative = zone()->New<RegExpEmpty>(); 22131cb0ef41Sopenharmony_ci } else if (num_terms == 1) { 22141cb0ef41Sopenharmony_ci alternative = terms_.back(); 22151cb0ef41Sopenharmony_ci } else { 22161cb0ef41Sopenharmony_ci alternative = 22171cb0ef41Sopenharmony_ci zone()->New<RegExpAlternative>(zone()->New<ZoneList<RegExpTree*>>( 22181cb0ef41Sopenharmony_ci base::VectorOf(terms_.begin(), terms_.size()), zone())); 22191cb0ef41Sopenharmony_ci } 22201cb0ef41Sopenharmony_ci alternatives_.emplace_back(alternative); 22211cb0ef41Sopenharmony_ci terms_.clear(); 22221cb0ef41Sopenharmony_ci LAST(ADD_NONE); 22231cb0ef41Sopenharmony_ci} 22241cb0ef41Sopenharmony_ci 22251cb0ef41Sopenharmony_cibool RegExpBuilder::NeedsDesugaringForUnicode(RegExpCharacterClass* cc) { 22261cb0ef41Sopenharmony_ci if (!unicode()) return false; 22271cb0ef41Sopenharmony_ci // TODO(yangguo): we could be smarter than this. Case-insensitivity does not 22281cb0ef41Sopenharmony_ci // necessarily mean that we need to desugar. It's probably nicer to have a 22291cb0ef41Sopenharmony_ci // separate pass to figure out unicode desugarings. 22301cb0ef41Sopenharmony_ci if (ignore_case()) return true; 22311cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* ranges = cc->ranges(zone()); 22321cb0ef41Sopenharmony_ci CharacterRange::Canonicalize(ranges); 22331cb0ef41Sopenharmony_ci for (int i = ranges->length() - 1; i >= 0; i--) { 22341cb0ef41Sopenharmony_ci base::uc32 from = ranges->at(i).from(); 22351cb0ef41Sopenharmony_ci base::uc32 to = ranges->at(i).to(); 22361cb0ef41Sopenharmony_ci // Check for non-BMP characters. 22371cb0ef41Sopenharmony_ci if (to >= kNonBmpStart) return true; 22381cb0ef41Sopenharmony_ci // Check for lone surrogates. 22391cb0ef41Sopenharmony_ci if (from <= kTrailSurrogateEnd && to >= kLeadSurrogateStart) return true; 22401cb0ef41Sopenharmony_ci } 22411cb0ef41Sopenharmony_ci return false; 22421cb0ef41Sopenharmony_ci} 22431cb0ef41Sopenharmony_ci 22441cb0ef41Sopenharmony_cibool RegExpBuilder::NeedsDesugaringForIgnoreCase(base::uc32 c) { 22451cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT 22461cb0ef41Sopenharmony_ci if (unicode() && ignore_case()) { 22471cb0ef41Sopenharmony_ci icu::UnicodeSet set(c, c); 22481cb0ef41Sopenharmony_ci set.closeOver(USET_CASE_INSENSITIVE); 22491cb0ef41Sopenharmony_ci set.removeAllStrings(); 22501cb0ef41Sopenharmony_ci return set.size() > 1; 22511cb0ef41Sopenharmony_ci } 22521cb0ef41Sopenharmony_ci // In the case where ICU is not included, we act as if the unicode flag is 22531cb0ef41Sopenharmony_ci // not set, and do not desugar. 22541cb0ef41Sopenharmony_ci#endif // V8_INTL_SUPPORT 22551cb0ef41Sopenharmony_ci return false; 22561cb0ef41Sopenharmony_ci} 22571cb0ef41Sopenharmony_ci 22581cb0ef41Sopenharmony_ciRegExpTree* RegExpBuilder::ToRegExp() { 22591cb0ef41Sopenharmony_ci FlushTerms(); 22601cb0ef41Sopenharmony_ci size_t num_alternatives = alternatives_.size(); 22611cb0ef41Sopenharmony_ci if (num_alternatives == 0) return zone()->New<RegExpEmpty>(); 22621cb0ef41Sopenharmony_ci if (num_alternatives == 1) return alternatives_.back(); 22631cb0ef41Sopenharmony_ci return zone()->New<RegExpDisjunction>(zone()->New<ZoneList<RegExpTree*>>( 22641cb0ef41Sopenharmony_ci base::VectorOf(alternatives_.begin(), alternatives_.size()), zone())); 22651cb0ef41Sopenharmony_ci} 22661cb0ef41Sopenharmony_ci 22671cb0ef41Sopenharmony_cibool RegExpBuilder::AddQuantifierToAtom( 22681cb0ef41Sopenharmony_ci int min, int max, RegExpQuantifier::QuantifierType quantifier_type) { 22691cb0ef41Sopenharmony_ci FlushPendingSurrogate(); 22701cb0ef41Sopenharmony_ci if (pending_empty_) { 22711cb0ef41Sopenharmony_ci pending_empty_ = false; 22721cb0ef41Sopenharmony_ci return true; 22731cb0ef41Sopenharmony_ci } 22741cb0ef41Sopenharmony_ci RegExpTree* atom; 22751cb0ef41Sopenharmony_ci if (characters_ != nullptr) { 22761cb0ef41Sopenharmony_ci DCHECK(last_added_ == ADD_CHAR); 22771cb0ef41Sopenharmony_ci // Last atom was character. 22781cb0ef41Sopenharmony_ci base::Vector<const base::uc16> char_vector = characters_->ToConstVector(); 22791cb0ef41Sopenharmony_ci int num_chars = char_vector.length(); 22801cb0ef41Sopenharmony_ci if (num_chars > 1) { 22811cb0ef41Sopenharmony_ci base::Vector<const base::uc16> prefix = 22821cb0ef41Sopenharmony_ci char_vector.SubVector(0, num_chars - 1); 22831cb0ef41Sopenharmony_ci text_.emplace_back(zone()->New<RegExpAtom>(prefix)); 22841cb0ef41Sopenharmony_ci char_vector = char_vector.SubVector(num_chars - 1, num_chars); 22851cb0ef41Sopenharmony_ci } 22861cb0ef41Sopenharmony_ci characters_ = nullptr; 22871cb0ef41Sopenharmony_ci atom = zone()->New<RegExpAtom>(char_vector); 22881cb0ef41Sopenharmony_ci FlushText(); 22891cb0ef41Sopenharmony_ci } else if (text_.size() > 0) { 22901cb0ef41Sopenharmony_ci DCHECK(last_added_ == ADD_ATOM); 22911cb0ef41Sopenharmony_ci atom = text_.back(); 22921cb0ef41Sopenharmony_ci text_.pop_back(); 22931cb0ef41Sopenharmony_ci FlushText(); 22941cb0ef41Sopenharmony_ci } else if (terms_.size() > 0) { 22951cb0ef41Sopenharmony_ci DCHECK(last_added_ == ADD_ATOM); 22961cb0ef41Sopenharmony_ci atom = terms_.back(); 22971cb0ef41Sopenharmony_ci terms_.pop_back(); 22981cb0ef41Sopenharmony_ci if (atom->IsLookaround()) { 22991cb0ef41Sopenharmony_ci // With /u, lookarounds are not quantifiable. 23001cb0ef41Sopenharmony_ci if (unicode()) return false; 23011cb0ef41Sopenharmony_ci // Lookbehinds are not quantifiable. 23021cb0ef41Sopenharmony_ci if (atom->AsLookaround()->type() == RegExpLookaround::LOOKBEHIND) { 23031cb0ef41Sopenharmony_ci return false; 23041cb0ef41Sopenharmony_ci } 23051cb0ef41Sopenharmony_ci } 23061cb0ef41Sopenharmony_ci if (atom->max_match() == 0) { 23071cb0ef41Sopenharmony_ci // Guaranteed to only match an empty string. 23081cb0ef41Sopenharmony_ci LAST(ADD_TERM); 23091cb0ef41Sopenharmony_ci if (min == 0) { 23101cb0ef41Sopenharmony_ci return true; 23111cb0ef41Sopenharmony_ci } 23121cb0ef41Sopenharmony_ci terms_.emplace_back(atom); 23131cb0ef41Sopenharmony_ci return true; 23141cb0ef41Sopenharmony_ci } 23151cb0ef41Sopenharmony_ci } else { 23161cb0ef41Sopenharmony_ci // Only call immediately after adding an atom or character! 23171cb0ef41Sopenharmony_ci UNREACHABLE(); 23181cb0ef41Sopenharmony_ci } 23191cb0ef41Sopenharmony_ci terms_.emplace_back( 23201cb0ef41Sopenharmony_ci zone()->New<RegExpQuantifier>(min, max, quantifier_type, atom)); 23211cb0ef41Sopenharmony_ci LAST(ADD_TERM); 23221cb0ef41Sopenharmony_ci return true; 23231cb0ef41Sopenharmony_ci} 23241cb0ef41Sopenharmony_ci 23251cb0ef41Sopenharmony_citemplate class RegExpParserImpl<uint8_t>; 23261cb0ef41Sopenharmony_citemplate class RegExpParserImpl<base::uc16>; 23271cb0ef41Sopenharmony_ci 23281cb0ef41Sopenharmony_ci} // namespace 23291cb0ef41Sopenharmony_ci 23301cb0ef41Sopenharmony_ci// static 23311cb0ef41Sopenharmony_cibool RegExpParser::ParseRegExpFromHeapString(Isolate* isolate, Zone* zone, 23321cb0ef41Sopenharmony_ci Handle<String> input, 23331cb0ef41Sopenharmony_ci RegExpFlags flags, 23341cb0ef41Sopenharmony_ci RegExpCompileData* result) { 23351cb0ef41Sopenharmony_ci DisallowGarbageCollection no_gc; 23361cb0ef41Sopenharmony_ci uintptr_t stack_limit = isolate->stack_guard()->real_climit(); 23371cb0ef41Sopenharmony_ci String::FlatContent content = input->GetFlatContent(no_gc); 23381cb0ef41Sopenharmony_ci if (content.IsOneByte()) { 23391cb0ef41Sopenharmony_ci base::Vector<const uint8_t> v = content.ToOneByteVector(); 23401cb0ef41Sopenharmony_ci return RegExpParserImpl<uint8_t>{v.begin(), v.length(), flags, 23411cb0ef41Sopenharmony_ci stack_limit, zone, no_gc} 23421cb0ef41Sopenharmony_ci .Parse(result); 23431cb0ef41Sopenharmony_ci } else { 23441cb0ef41Sopenharmony_ci base::Vector<const base::uc16> v = content.ToUC16Vector(); 23451cb0ef41Sopenharmony_ci return RegExpParserImpl<base::uc16>{v.begin(), v.length(), flags, 23461cb0ef41Sopenharmony_ci stack_limit, zone, no_gc} 23471cb0ef41Sopenharmony_ci .Parse(result); 23481cb0ef41Sopenharmony_ci } 23491cb0ef41Sopenharmony_ci} 23501cb0ef41Sopenharmony_ci 23511cb0ef41Sopenharmony_ci// static 23521cb0ef41Sopenharmony_citemplate <class CharT> 23531cb0ef41Sopenharmony_cibool RegExpParser::VerifyRegExpSyntax(Zone* zone, uintptr_t stack_limit, 23541cb0ef41Sopenharmony_ci const CharT* input, int input_length, 23551cb0ef41Sopenharmony_ci RegExpFlags flags, 23561cb0ef41Sopenharmony_ci RegExpCompileData* result, 23571cb0ef41Sopenharmony_ci const DisallowGarbageCollection& no_gc) { 23581cb0ef41Sopenharmony_ci return RegExpParserImpl<CharT>{input, input_length, flags, 23591cb0ef41Sopenharmony_ci stack_limit, zone, no_gc} 23601cb0ef41Sopenharmony_ci .Parse(result); 23611cb0ef41Sopenharmony_ci} 23621cb0ef41Sopenharmony_ci 23631cb0ef41Sopenharmony_citemplate bool RegExpParser::VerifyRegExpSyntax<uint8_t>( 23641cb0ef41Sopenharmony_ci Zone*, uintptr_t, const uint8_t*, int, RegExpFlags, RegExpCompileData*, 23651cb0ef41Sopenharmony_ci const DisallowGarbageCollection&); 23661cb0ef41Sopenharmony_citemplate bool RegExpParser::VerifyRegExpSyntax<base::uc16>( 23671cb0ef41Sopenharmony_ci Zone*, uintptr_t, const base::uc16*, int, RegExpFlags, RegExpCompileData*, 23681cb0ef41Sopenharmony_ci const DisallowGarbageCollection&); 23691cb0ef41Sopenharmony_ci 23701cb0ef41Sopenharmony_ci// static 23711cb0ef41Sopenharmony_cibool RegExpParser::VerifyRegExpSyntax(Isolate* isolate, Zone* zone, 23721cb0ef41Sopenharmony_ci Handle<String> input, RegExpFlags flags, 23731cb0ef41Sopenharmony_ci RegExpCompileData* result, 23741cb0ef41Sopenharmony_ci const DisallowGarbageCollection&) { 23751cb0ef41Sopenharmony_ci return ParseRegExpFromHeapString(isolate, zone, input, flags, result); 23761cb0ef41Sopenharmony_ci} 23771cb0ef41Sopenharmony_ci 23781cb0ef41Sopenharmony_ci#undef LAST 23791cb0ef41Sopenharmony_ci 23801cb0ef41Sopenharmony_ci} // namespace internal 23811cb0ef41Sopenharmony_ci} // namespace v8 2382