11cb0ef41Sopenharmony_ci// Copyright 2019 the V8 project authors. All rights reserved. 21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be 31cb0ef41Sopenharmony_ci// found in the LICENSE file. 41cb0ef41Sopenharmony_ci 51cb0ef41Sopenharmony_ci#ifndef V8_REGEXP_REGEXP_NODES_H_ 61cb0ef41Sopenharmony_ci#define V8_REGEXP_REGEXP_NODES_H_ 71cb0ef41Sopenharmony_ci 81cb0ef41Sopenharmony_ci#include "src/codegen/label.h" 91cb0ef41Sopenharmony_ci#include "src/regexp/regexp-macro-assembler.h" 101cb0ef41Sopenharmony_ci#include "src/zone/zone.h" 111cb0ef41Sopenharmony_ci 121cb0ef41Sopenharmony_cinamespace v8 { 131cb0ef41Sopenharmony_cinamespace internal { 141cb0ef41Sopenharmony_ci 151cb0ef41Sopenharmony_ciclass AlternativeGenerationList; 161cb0ef41Sopenharmony_ciclass BoyerMooreLookahead; 171cb0ef41Sopenharmony_ciclass GreedyLoopState; 181cb0ef41Sopenharmony_ciclass NodeVisitor; 191cb0ef41Sopenharmony_ciclass QuickCheckDetails; 201cb0ef41Sopenharmony_ciclass RegExpCompiler; 211cb0ef41Sopenharmony_ciclass Trace; 221cb0ef41Sopenharmony_cistruct PreloadState; 231cb0ef41Sopenharmony_ciclass ChoiceNode; 241cb0ef41Sopenharmony_ci 251cb0ef41Sopenharmony_ci#define FOR_EACH_NODE_TYPE(VISIT) \ 261cb0ef41Sopenharmony_ci VISIT(End) \ 271cb0ef41Sopenharmony_ci VISIT(Action) \ 281cb0ef41Sopenharmony_ci VISIT(Choice) \ 291cb0ef41Sopenharmony_ci VISIT(LoopChoice) \ 301cb0ef41Sopenharmony_ci VISIT(NegativeLookaroundChoice) \ 311cb0ef41Sopenharmony_ci VISIT(BackReference) \ 321cb0ef41Sopenharmony_ci VISIT(Assertion) \ 331cb0ef41Sopenharmony_ci VISIT(Text) 341cb0ef41Sopenharmony_ci 351cb0ef41Sopenharmony_cistruct NodeInfo final { 361cb0ef41Sopenharmony_ci NodeInfo() 371cb0ef41Sopenharmony_ci : being_analyzed(false), 381cb0ef41Sopenharmony_ci been_analyzed(false), 391cb0ef41Sopenharmony_ci follows_word_interest(false), 401cb0ef41Sopenharmony_ci follows_newline_interest(false), 411cb0ef41Sopenharmony_ci follows_start_interest(false), 421cb0ef41Sopenharmony_ci at_end(false), 431cb0ef41Sopenharmony_ci visited(false), 441cb0ef41Sopenharmony_ci replacement_calculated(false) {} 451cb0ef41Sopenharmony_ci 461cb0ef41Sopenharmony_ci // Returns true if the interests and assumptions of this node 471cb0ef41Sopenharmony_ci // matches the given one. 481cb0ef41Sopenharmony_ci bool Matches(NodeInfo* that) { 491cb0ef41Sopenharmony_ci return (at_end == that->at_end) && 501cb0ef41Sopenharmony_ci (follows_word_interest == that->follows_word_interest) && 511cb0ef41Sopenharmony_ci (follows_newline_interest == that->follows_newline_interest) && 521cb0ef41Sopenharmony_ci (follows_start_interest == that->follows_start_interest); 531cb0ef41Sopenharmony_ci } 541cb0ef41Sopenharmony_ci 551cb0ef41Sopenharmony_ci // Updates the interests of this node given the interests of the 561cb0ef41Sopenharmony_ci // node preceding it. 571cb0ef41Sopenharmony_ci void AddFromPreceding(NodeInfo* that) { 581cb0ef41Sopenharmony_ci at_end |= that->at_end; 591cb0ef41Sopenharmony_ci follows_word_interest |= that->follows_word_interest; 601cb0ef41Sopenharmony_ci follows_newline_interest |= that->follows_newline_interest; 611cb0ef41Sopenharmony_ci follows_start_interest |= that->follows_start_interest; 621cb0ef41Sopenharmony_ci } 631cb0ef41Sopenharmony_ci 641cb0ef41Sopenharmony_ci bool HasLookbehind() { 651cb0ef41Sopenharmony_ci return follows_word_interest || follows_newline_interest || 661cb0ef41Sopenharmony_ci follows_start_interest; 671cb0ef41Sopenharmony_ci } 681cb0ef41Sopenharmony_ci 691cb0ef41Sopenharmony_ci // Sets the interests of this node to include the interests of the 701cb0ef41Sopenharmony_ci // following node. 711cb0ef41Sopenharmony_ci void AddFromFollowing(NodeInfo* that) { 721cb0ef41Sopenharmony_ci follows_word_interest |= that->follows_word_interest; 731cb0ef41Sopenharmony_ci follows_newline_interest |= that->follows_newline_interest; 741cb0ef41Sopenharmony_ci follows_start_interest |= that->follows_start_interest; 751cb0ef41Sopenharmony_ci } 761cb0ef41Sopenharmony_ci 771cb0ef41Sopenharmony_ci void ResetCompilationState() { 781cb0ef41Sopenharmony_ci being_analyzed = false; 791cb0ef41Sopenharmony_ci been_analyzed = false; 801cb0ef41Sopenharmony_ci } 811cb0ef41Sopenharmony_ci 821cb0ef41Sopenharmony_ci bool being_analyzed : 1; 831cb0ef41Sopenharmony_ci bool been_analyzed : 1; 841cb0ef41Sopenharmony_ci 851cb0ef41Sopenharmony_ci // These bits are set of this node has to know what the preceding 861cb0ef41Sopenharmony_ci // character was. 871cb0ef41Sopenharmony_ci bool follows_word_interest : 1; 881cb0ef41Sopenharmony_ci bool follows_newline_interest : 1; 891cb0ef41Sopenharmony_ci bool follows_start_interest : 1; 901cb0ef41Sopenharmony_ci 911cb0ef41Sopenharmony_ci bool at_end : 1; 921cb0ef41Sopenharmony_ci bool visited : 1; 931cb0ef41Sopenharmony_ci bool replacement_calculated : 1; 941cb0ef41Sopenharmony_ci}; 951cb0ef41Sopenharmony_ci 961cb0ef41Sopenharmony_cistruct EatsAtLeastInfo final { 971cb0ef41Sopenharmony_ci EatsAtLeastInfo() : EatsAtLeastInfo(0) {} 981cb0ef41Sopenharmony_ci explicit EatsAtLeastInfo(uint8_t eats) 991cb0ef41Sopenharmony_ci : eats_at_least_from_possibly_start(eats), 1001cb0ef41Sopenharmony_ci eats_at_least_from_not_start(eats) {} 1011cb0ef41Sopenharmony_ci void SetMin(const EatsAtLeastInfo& other) { 1021cb0ef41Sopenharmony_ci if (other.eats_at_least_from_possibly_start < 1031cb0ef41Sopenharmony_ci eats_at_least_from_possibly_start) { 1041cb0ef41Sopenharmony_ci eats_at_least_from_possibly_start = 1051cb0ef41Sopenharmony_ci other.eats_at_least_from_possibly_start; 1061cb0ef41Sopenharmony_ci } 1071cb0ef41Sopenharmony_ci if (other.eats_at_least_from_not_start < eats_at_least_from_not_start) { 1081cb0ef41Sopenharmony_ci eats_at_least_from_not_start = other.eats_at_least_from_not_start; 1091cb0ef41Sopenharmony_ci } 1101cb0ef41Sopenharmony_ci } 1111cb0ef41Sopenharmony_ci 1121cb0ef41Sopenharmony_ci bool IsZero() const { 1131cb0ef41Sopenharmony_ci return eats_at_least_from_possibly_start == 0 && 1141cb0ef41Sopenharmony_ci eats_at_least_from_not_start == 0; 1151cb0ef41Sopenharmony_ci } 1161cb0ef41Sopenharmony_ci 1171cb0ef41Sopenharmony_ci // Any successful match starting from the current node will consume at least 1181cb0ef41Sopenharmony_ci // this many characters. This does not necessarily mean that there is a 1191cb0ef41Sopenharmony_ci // possible match with exactly this many characters, but we generally try to 1201cb0ef41Sopenharmony_ci // get this number as high as possible to allow for early exit on failure. 1211cb0ef41Sopenharmony_ci uint8_t eats_at_least_from_possibly_start; 1221cb0ef41Sopenharmony_ci 1231cb0ef41Sopenharmony_ci // Like eats_at_least_from_possibly_start, but with the additional assumption 1241cb0ef41Sopenharmony_ci // that start-of-string assertions (^) can't match. This value is greater than 1251cb0ef41Sopenharmony_ci // or equal to eats_at_least_from_possibly_start. 1261cb0ef41Sopenharmony_ci uint8_t eats_at_least_from_not_start; 1271cb0ef41Sopenharmony_ci}; 1281cb0ef41Sopenharmony_ci 1291cb0ef41Sopenharmony_ciclass RegExpNode : public ZoneObject { 1301cb0ef41Sopenharmony_ci public: 1311cb0ef41Sopenharmony_ci explicit RegExpNode(Zone* zone) 1321cb0ef41Sopenharmony_ci : replacement_(nullptr), 1331cb0ef41Sopenharmony_ci on_work_list_(false), 1341cb0ef41Sopenharmony_ci trace_count_(0), 1351cb0ef41Sopenharmony_ci zone_(zone) { 1361cb0ef41Sopenharmony_ci bm_info_[0] = bm_info_[1] = nullptr; 1371cb0ef41Sopenharmony_ci } 1381cb0ef41Sopenharmony_ci virtual ~RegExpNode(); 1391cb0ef41Sopenharmony_ci virtual void Accept(NodeVisitor* visitor) = 0; 1401cb0ef41Sopenharmony_ci // Generates a goto to this node or actually generates the code at this point. 1411cb0ef41Sopenharmony_ci virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; 1421cb0ef41Sopenharmony_ci // How many characters must this node consume at a minimum in order to 1431cb0ef41Sopenharmony_ci // succeed. The not_at_start argument is used to indicate that we know we are 1441cb0ef41Sopenharmony_ci // not at the start of the input. In this case anchored branches will always 1451cb0ef41Sopenharmony_ci // fail and can be ignored when determining how many characters are consumed 1461cb0ef41Sopenharmony_ci // on success. If this node has not been analyzed yet, EatsAtLeast returns 0. 1471cb0ef41Sopenharmony_ci int EatsAtLeast(bool not_at_start); 1481cb0ef41Sopenharmony_ci // Returns how many characters this node must consume in order to succeed, 1491cb0ef41Sopenharmony_ci // given that this is a LoopChoiceNode whose counter register is in a 1501cb0ef41Sopenharmony_ci // newly-initialized state at the current position in the generated code. For 1511cb0ef41Sopenharmony_ci // example, consider /a{6,8}/. Absent any extra information, the 1521cb0ef41Sopenharmony_ci // LoopChoiceNode for the repetition must report that it consumes at least 1531cb0ef41Sopenharmony_ci // zero characters, because it may have already looped several times. However, 1541cb0ef41Sopenharmony_ci // with a newly-initialized counter, it can report that it consumes at least 1551cb0ef41Sopenharmony_ci // six characters. 1561cb0ef41Sopenharmony_ci virtual EatsAtLeastInfo EatsAtLeastFromLoopEntry(); 1571cb0ef41Sopenharmony_ci // Emits some quick code that checks whether the preloaded characters match. 1581cb0ef41Sopenharmony_ci // Falls through on certain failure, jumps to the label on possible success. 1591cb0ef41Sopenharmony_ci // If the node cannot make a quick check it does nothing and returns false. 1601cb0ef41Sopenharmony_ci bool EmitQuickCheck(RegExpCompiler* compiler, Trace* bounds_check_trace, 1611cb0ef41Sopenharmony_ci Trace* trace, bool preload_has_checked_bounds, 1621cb0ef41Sopenharmony_ci Label* on_possible_success, 1631cb0ef41Sopenharmony_ci QuickCheckDetails* details_return, 1641cb0ef41Sopenharmony_ci bool fall_through_on_failure, ChoiceNode* predecessor); 1651cb0ef41Sopenharmony_ci // For a given number of characters this returns a mask and a value. The 1661cb0ef41Sopenharmony_ci // next n characters are anded with the mask and compared with the value. 1671cb0ef41Sopenharmony_ci // A comparison failure indicates the node cannot match the next n characters. 1681cb0ef41Sopenharmony_ci // A comparison success indicates the node may match. 1691cb0ef41Sopenharmony_ci virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1701cb0ef41Sopenharmony_ci RegExpCompiler* compiler, 1711cb0ef41Sopenharmony_ci int characters_filled_in, 1721cb0ef41Sopenharmony_ci bool not_at_start) = 0; 1731cb0ef41Sopenharmony_ci // Fills in quick check details for this node, given that this is a 1741cb0ef41Sopenharmony_ci // LoopChoiceNode whose counter register is in a newly-initialized state at 1751cb0ef41Sopenharmony_ci // the current position in the generated code. For example, consider /a{6,8}/. 1761cb0ef41Sopenharmony_ci // Absent any extra information, the LoopChoiceNode for the repetition cannot 1771cb0ef41Sopenharmony_ci // generate any useful quick check because a match might be the (empty) 1781cb0ef41Sopenharmony_ci // continuation node. However, with a newly-initialized counter, it can 1791cb0ef41Sopenharmony_ci // generate a quick check for several 'a' characters at once. 1801cb0ef41Sopenharmony_ci virtual void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details, 1811cb0ef41Sopenharmony_ci RegExpCompiler* compiler, 1821cb0ef41Sopenharmony_ci int characters_filled_in, 1831cb0ef41Sopenharmony_ci bool not_at_start); 1841cb0ef41Sopenharmony_ci static const int kNodeIsTooComplexForGreedyLoops = kMinInt; 1851cb0ef41Sopenharmony_ci virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } 1861cb0ef41Sopenharmony_ci // Only returns the successor for a text node of length 1 that matches any 1871cb0ef41Sopenharmony_ci // character and that has no guards on it. 1881cb0ef41Sopenharmony_ci virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( 1891cb0ef41Sopenharmony_ci RegExpCompiler* compiler) { 1901cb0ef41Sopenharmony_ci return nullptr; 1911cb0ef41Sopenharmony_ci } 1921cb0ef41Sopenharmony_ci 1931cb0ef41Sopenharmony_ci // Collects information on the possible code units (mod 128) that can match if 1941cb0ef41Sopenharmony_ci // we look forward. This is used for a Boyer-Moore-like string searching 1951cb0ef41Sopenharmony_ci // implementation. TODO(erikcorry): This should share more code with 1961cb0ef41Sopenharmony_ci // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit 1971cb0ef41Sopenharmony_ci // the number of nodes we are willing to look at in order to create this data. 1981cb0ef41Sopenharmony_ci static const int kRecursionBudget = 200; 1991cb0ef41Sopenharmony_ci bool KeepRecursing(RegExpCompiler* compiler); 2001cb0ef41Sopenharmony_ci virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, 2011cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm, bool not_at_start) { 2021cb0ef41Sopenharmony_ci UNREACHABLE(); 2031cb0ef41Sopenharmony_ci } 2041cb0ef41Sopenharmony_ci 2051cb0ef41Sopenharmony_ci // If we know that the input is one-byte then there are some nodes that can 2061cb0ef41Sopenharmony_ci // never match. This method returns a node that can be substituted for 2071cb0ef41Sopenharmony_ci // itself, or nullptr if the node can never match. 2081cb0ef41Sopenharmony_ci virtual RegExpNode* FilterOneByte(int depth, RegExpFlags flags) { 2091cb0ef41Sopenharmony_ci return this; 2101cb0ef41Sopenharmony_ci } 2111cb0ef41Sopenharmony_ci // Helper for FilterOneByte. 2121cb0ef41Sopenharmony_ci RegExpNode* replacement() { 2131cb0ef41Sopenharmony_ci DCHECK(info()->replacement_calculated); 2141cb0ef41Sopenharmony_ci return replacement_; 2151cb0ef41Sopenharmony_ci } 2161cb0ef41Sopenharmony_ci RegExpNode* set_replacement(RegExpNode* replacement) { 2171cb0ef41Sopenharmony_ci info()->replacement_calculated = true; 2181cb0ef41Sopenharmony_ci replacement_ = replacement; 2191cb0ef41Sopenharmony_ci return replacement; // For convenience. 2201cb0ef41Sopenharmony_ci } 2211cb0ef41Sopenharmony_ci 2221cb0ef41Sopenharmony_ci // We want to avoid recalculating the lookahead info, so we store it on the 2231cb0ef41Sopenharmony_ci // node. Only info that is for this node is stored. We can tell that the 2241cb0ef41Sopenharmony_ci // info is for this node when offset == 0, so the information is calculated 2251cb0ef41Sopenharmony_ci // relative to this node. 2261cb0ef41Sopenharmony_ci void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, int offset) { 2271cb0ef41Sopenharmony_ci if (offset == 0) set_bm_info(not_at_start, bm); 2281cb0ef41Sopenharmony_ci } 2291cb0ef41Sopenharmony_ci 2301cb0ef41Sopenharmony_ci Label* label() { return &label_; } 2311cb0ef41Sopenharmony_ci // If non-generic code is generated for a node (i.e. the node is not at the 2321cb0ef41Sopenharmony_ci // start of the trace) then it cannot be reused. This variable sets a limit 2331cb0ef41Sopenharmony_ci // on how often we allow that to happen before we insist on starting a new 2341cb0ef41Sopenharmony_ci // trace and generating generic code for a node that can be reused by flushing 2351cb0ef41Sopenharmony_ci // the deferred actions in the current trace and generating a goto. 2361cb0ef41Sopenharmony_ci static const int kMaxCopiesCodeGenerated = 10; 2371cb0ef41Sopenharmony_ci 2381cb0ef41Sopenharmony_ci bool on_work_list() { return on_work_list_; } 2391cb0ef41Sopenharmony_ci void set_on_work_list(bool value) { on_work_list_ = value; } 2401cb0ef41Sopenharmony_ci 2411cb0ef41Sopenharmony_ci NodeInfo* info() { return &info_; } 2421cb0ef41Sopenharmony_ci const EatsAtLeastInfo* eats_at_least_info() const { return &eats_at_least_; } 2431cb0ef41Sopenharmony_ci void set_eats_at_least_info(const EatsAtLeastInfo& eats_at_least) { 2441cb0ef41Sopenharmony_ci eats_at_least_ = eats_at_least; 2451cb0ef41Sopenharmony_ci } 2461cb0ef41Sopenharmony_ci 2471cb0ef41Sopenharmony_ci // TODO(v8:10441): This is a hacky way to avoid exponential code size growth 2481cb0ef41Sopenharmony_ci // for very large choice nodes that can be generated by unicode property 2491cb0ef41Sopenharmony_ci // escapes. In order to avoid inlining (i.e. trace recursion), we pretend to 2501cb0ef41Sopenharmony_ci // have generated the maximum count of code copies already. 2511cb0ef41Sopenharmony_ci // We should instead fix this properly, e.g. by using the code size budget 2521cb0ef41Sopenharmony_ci // (flush_budget) or by generating property escape matches as calls to a C 2531cb0ef41Sopenharmony_ci // function. 2541cb0ef41Sopenharmony_ci void SetDoNotInline() { trace_count_ = kMaxCopiesCodeGenerated; } 2551cb0ef41Sopenharmony_ci 2561cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm_info(bool not_at_start) { 2571cb0ef41Sopenharmony_ci return bm_info_[not_at_start ? 1 : 0]; 2581cb0ef41Sopenharmony_ci } 2591cb0ef41Sopenharmony_ci 2601cb0ef41Sopenharmony_ci Zone* zone() const { return zone_; } 2611cb0ef41Sopenharmony_ci 2621cb0ef41Sopenharmony_ci protected: 2631cb0ef41Sopenharmony_ci enum LimitResult { DONE, CONTINUE }; 2641cb0ef41Sopenharmony_ci RegExpNode* replacement_; 2651cb0ef41Sopenharmony_ci 2661cb0ef41Sopenharmony_ci LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace); 2671cb0ef41Sopenharmony_ci 2681cb0ef41Sopenharmony_ci void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) { 2691cb0ef41Sopenharmony_ci bm_info_[not_at_start ? 1 : 0] = bm; 2701cb0ef41Sopenharmony_ci } 2711cb0ef41Sopenharmony_ci 2721cb0ef41Sopenharmony_ci private: 2731cb0ef41Sopenharmony_ci static const int kFirstCharBudget = 10; 2741cb0ef41Sopenharmony_ci Label label_; 2751cb0ef41Sopenharmony_ci bool on_work_list_; 2761cb0ef41Sopenharmony_ci NodeInfo info_; 2771cb0ef41Sopenharmony_ci 2781cb0ef41Sopenharmony_ci // Saved values for EatsAtLeast results, to avoid recomputation. Filled in 2791cb0ef41Sopenharmony_ci // during analysis (valid if info_.been_analyzed is true). 2801cb0ef41Sopenharmony_ci EatsAtLeastInfo eats_at_least_; 2811cb0ef41Sopenharmony_ci 2821cb0ef41Sopenharmony_ci // This variable keeps track of how many times code has been generated for 2831cb0ef41Sopenharmony_ci // this node (in different traces). We don't keep track of where the 2841cb0ef41Sopenharmony_ci // generated code is located unless the code is generated at the start of 2851cb0ef41Sopenharmony_ci // a trace, in which case it is generic and can be reused by flushing the 2861cb0ef41Sopenharmony_ci // deferred operations in the current trace and generating a goto. 2871cb0ef41Sopenharmony_ci int trace_count_; 2881cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm_info_[2]; 2891cb0ef41Sopenharmony_ci 2901cb0ef41Sopenharmony_ci Zone* zone_; 2911cb0ef41Sopenharmony_ci}; 2921cb0ef41Sopenharmony_ci 2931cb0ef41Sopenharmony_ciclass SeqRegExpNode : public RegExpNode { 2941cb0ef41Sopenharmony_ci public: 2951cb0ef41Sopenharmony_ci explicit SeqRegExpNode(RegExpNode* on_success) 2961cb0ef41Sopenharmony_ci : RegExpNode(on_success->zone()), on_success_(on_success) {} 2971cb0ef41Sopenharmony_ci RegExpNode* on_success() { return on_success_; } 2981cb0ef41Sopenharmony_ci void set_on_success(RegExpNode* node) { on_success_ = node; } 2991cb0ef41Sopenharmony_ci RegExpNode* FilterOneByte(int depth, RegExpFlags flags) override; 3001cb0ef41Sopenharmony_ci void FillInBMInfo(Isolate* isolate, int offset, int budget, 3011cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm, bool not_at_start) override { 3021cb0ef41Sopenharmony_ci on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); 3031cb0ef41Sopenharmony_ci if (offset == 0) set_bm_info(not_at_start, bm); 3041cb0ef41Sopenharmony_ci } 3051cb0ef41Sopenharmony_ci 3061cb0ef41Sopenharmony_ci protected: 3071cb0ef41Sopenharmony_ci RegExpNode* FilterSuccessor(int depth, RegExpFlags flags); 3081cb0ef41Sopenharmony_ci 3091cb0ef41Sopenharmony_ci private: 3101cb0ef41Sopenharmony_ci RegExpNode* on_success_; 3111cb0ef41Sopenharmony_ci}; 3121cb0ef41Sopenharmony_ci 3131cb0ef41Sopenharmony_ciclass ActionNode : public SeqRegExpNode { 3141cb0ef41Sopenharmony_ci public: 3151cb0ef41Sopenharmony_ci enum ActionType { 3161cb0ef41Sopenharmony_ci SET_REGISTER_FOR_LOOP, 3171cb0ef41Sopenharmony_ci INCREMENT_REGISTER, 3181cb0ef41Sopenharmony_ci STORE_POSITION, 3191cb0ef41Sopenharmony_ci BEGIN_POSITIVE_SUBMATCH, 3201cb0ef41Sopenharmony_ci BEGIN_NEGATIVE_SUBMATCH, 3211cb0ef41Sopenharmony_ci POSITIVE_SUBMATCH_SUCCESS, 3221cb0ef41Sopenharmony_ci EMPTY_MATCH_CHECK, 3231cb0ef41Sopenharmony_ci CLEAR_CAPTURES 3241cb0ef41Sopenharmony_ci }; 3251cb0ef41Sopenharmony_ci static ActionNode* SetRegisterForLoop(int reg, int val, 3261cb0ef41Sopenharmony_ci RegExpNode* on_success); 3271cb0ef41Sopenharmony_ci static ActionNode* IncrementRegister(int reg, RegExpNode* on_success); 3281cb0ef41Sopenharmony_ci static ActionNode* StorePosition(int reg, bool is_capture, 3291cb0ef41Sopenharmony_ci RegExpNode* on_success); 3301cb0ef41Sopenharmony_ci static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success); 3311cb0ef41Sopenharmony_ci static ActionNode* BeginPositiveSubmatch(int stack_pointer_reg, 3321cb0ef41Sopenharmony_ci int position_reg, 3331cb0ef41Sopenharmony_ci RegExpNode* on_success); 3341cb0ef41Sopenharmony_ci static ActionNode* BeginNegativeSubmatch(int stack_pointer_reg, 3351cb0ef41Sopenharmony_ci int position_reg, 3361cb0ef41Sopenharmony_ci RegExpNode* on_success); 3371cb0ef41Sopenharmony_ci static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg, 3381cb0ef41Sopenharmony_ci int restore_reg, 3391cb0ef41Sopenharmony_ci int clear_capture_count, 3401cb0ef41Sopenharmony_ci int clear_capture_from, 3411cb0ef41Sopenharmony_ci RegExpNode* on_success); 3421cb0ef41Sopenharmony_ci static ActionNode* EmptyMatchCheck(int start_register, 3431cb0ef41Sopenharmony_ci int repetition_register, 3441cb0ef41Sopenharmony_ci int repetition_limit, 3451cb0ef41Sopenharmony_ci RegExpNode* on_success); 3461cb0ef41Sopenharmony_ci void Accept(NodeVisitor* visitor) override; 3471cb0ef41Sopenharmony_ci void Emit(RegExpCompiler* compiler, Trace* trace) override; 3481cb0ef41Sopenharmony_ci void GetQuickCheckDetails(QuickCheckDetails* details, 3491cb0ef41Sopenharmony_ci RegExpCompiler* compiler, int filled_in, 3501cb0ef41Sopenharmony_ci bool not_at_start) override; 3511cb0ef41Sopenharmony_ci void FillInBMInfo(Isolate* isolate, int offset, int budget, 3521cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm, bool not_at_start) override; 3531cb0ef41Sopenharmony_ci ActionType action_type() { return action_type_; } 3541cb0ef41Sopenharmony_ci // TODO(erikcorry): We should allow some action nodes in greedy loops. 3551cb0ef41Sopenharmony_ci int GreedyLoopTextLength() override { 3561cb0ef41Sopenharmony_ci return kNodeIsTooComplexForGreedyLoops; 3571cb0ef41Sopenharmony_ci } 3581cb0ef41Sopenharmony_ci 3591cb0ef41Sopenharmony_ci private: 3601cb0ef41Sopenharmony_ci union { 3611cb0ef41Sopenharmony_ci struct { 3621cb0ef41Sopenharmony_ci int reg; 3631cb0ef41Sopenharmony_ci int value; 3641cb0ef41Sopenharmony_ci } u_store_register; 3651cb0ef41Sopenharmony_ci struct { 3661cb0ef41Sopenharmony_ci int reg; 3671cb0ef41Sopenharmony_ci } u_increment_register; 3681cb0ef41Sopenharmony_ci struct { 3691cb0ef41Sopenharmony_ci int reg; 3701cb0ef41Sopenharmony_ci bool is_capture; 3711cb0ef41Sopenharmony_ci } u_position_register; 3721cb0ef41Sopenharmony_ci struct { 3731cb0ef41Sopenharmony_ci int stack_pointer_register; 3741cb0ef41Sopenharmony_ci int current_position_register; 3751cb0ef41Sopenharmony_ci int clear_register_count; 3761cb0ef41Sopenharmony_ci int clear_register_from; 3771cb0ef41Sopenharmony_ci } u_submatch; 3781cb0ef41Sopenharmony_ci struct { 3791cb0ef41Sopenharmony_ci int start_register; 3801cb0ef41Sopenharmony_ci int repetition_register; 3811cb0ef41Sopenharmony_ci int repetition_limit; 3821cb0ef41Sopenharmony_ci } u_empty_match_check; 3831cb0ef41Sopenharmony_ci struct { 3841cb0ef41Sopenharmony_ci int range_from; 3851cb0ef41Sopenharmony_ci int range_to; 3861cb0ef41Sopenharmony_ci } u_clear_captures; 3871cb0ef41Sopenharmony_ci } data_; 3881cb0ef41Sopenharmony_ci ActionNode(ActionType action_type, RegExpNode* on_success) 3891cb0ef41Sopenharmony_ci : SeqRegExpNode(on_success), action_type_(action_type) {} 3901cb0ef41Sopenharmony_ci ActionType action_type_; 3911cb0ef41Sopenharmony_ci friend class DotPrinterImpl; 3921cb0ef41Sopenharmony_ci friend Zone; 3931cb0ef41Sopenharmony_ci}; 3941cb0ef41Sopenharmony_ci 3951cb0ef41Sopenharmony_ciclass TextNode : public SeqRegExpNode { 3961cb0ef41Sopenharmony_ci public: 3971cb0ef41Sopenharmony_ci TextNode(ZoneList<TextElement>* elms, bool read_backward, 3981cb0ef41Sopenharmony_ci RegExpNode* on_success) 3991cb0ef41Sopenharmony_ci : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {} 4001cb0ef41Sopenharmony_ci TextNode(RegExpCharacterClass* that, bool read_backward, 4011cb0ef41Sopenharmony_ci RegExpNode* on_success) 4021cb0ef41Sopenharmony_ci : SeqRegExpNode(on_success), 4031cb0ef41Sopenharmony_ci elms_(zone()->New<ZoneList<TextElement>>(1, zone())), 4041cb0ef41Sopenharmony_ci read_backward_(read_backward) { 4051cb0ef41Sopenharmony_ci elms_->Add(TextElement::CharClass(that), zone()); 4061cb0ef41Sopenharmony_ci } 4071cb0ef41Sopenharmony_ci // Create TextNode for a single character class for the given ranges. 4081cb0ef41Sopenharmony_ci static TextNode* CreateForCharacterRanges(Zone* zone, 4091cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* ranges, 4101cb0ef41Sopenharmony_ci bool read_backward, 4111cb0ef41Sopenharmony_ci RegExpNode* on_success); 4121cb0ef41Sopenharmony_ci // Create TextNode for a surrogate pair (i.e. match a sequence of two uc16 4131cb0ef41Sopenharmony_ci // code unit ranges). 4141cb0ef41Sopenharmony_ci static TextNode* CreateForSurrogatePair( 4151cb0ef41Sopenharmony_ci Zone* zone, CharacterRange lead, ZoneList<CharacterRange>* trail_ranges, 4161cb0ef41Sopenharmony_ci bool read_backward, RegExpNode* on_success); 4171cb0ef41Sopenharmony_ci static TextNode* CreateForSurrogatePair(Zone* zone, 4181cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* lead_ranges, 4191cb0ef41Sopenharmony_ci CharacterRange trail, 4201cb0ef41Sopenharmony_ci bool read_backward, 4211cb0ef41Sopenharmony_ci RegExpNode* on_success); 4221cb0ef41Sopenharmony_ci void Accept(NodeVisitor* visitor) override; 4231cb0ef41Sopenharmony_ci void Emit(RegExpCompiler* compiler, Trace* trace) override; 4241cb0ef41Sopenharmony_ci void GetQuickCheckDetails(QuickCheckDetails* details, 4251cb0ef41Sopenharmony_ci RegExpCompiler* compiler, int characters_filled_in, 4261cb0ef41Sopenharmony_ci bool not_at_start) override; 4271cb0ef41Sopenharmony_ci ZoneList<TextElement>* elements() { return elms_; } 4281cb0ef41Sopenharmony_ci bool read_backward() { return read_backward_; } 4291cb0ef41Sopenharmony_ci void MakeCaseIndependent(Isolate* isolate, bool is_one_byte, 4301cb0ef41Sopenharmony_ci RegExpFlags flags); 4311cb0ef41Sopenharmony_ci int GreedyLoopTextLength() override; 4321cb0ef41Sopenharmony_ci RegExpNode* GetSuccessorOfOmnivorousTextNode( 4331cb0ef41Sopenharmony_ci RegExpCompiler* compiler) override; 4341cb0ef41Sopenharmony_ci void FillInBMInfo(Isolate* isolate, int offset, int budget, 4351cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm, bool not_at_start) override; 4361cb0ef41Sopenharmony_ci void CalculateOffsets(); 4371cb0ef41Sopenharmony_ci RegExpNode* FilterOneByte(int depth, RegExpFlags flags) override; 4381cb0ef41Sopenharmony_ci int Length(); 4391cb0ef41Sopenharmony_ci 4401cb0ef41Sopenharmony_ci private: 4411cb0ef41Sopenharmony_ci enum TextEmitPassType { 4421cb0ef41Sopenharmony_ci NON_LATIN1_MATCH, // Check for characters that can't match. 4431cb0ef41Sopenharmony_ci SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. 4441cb0ef41Sopenharmony_ci NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. 4451cb0ef41Sopenharmony_ci CASE_CHARACTER_MATCH, // Case-independent single character check. 4461cb0ef41Sopenharmony_ci CHARACTER_CLASS_MATCH // Character class. 4471cb0ef41Sopenharmony_ci }; 4481cb0ef41Sopenharmony_ci static bool SkipPass(TextEmitPassType pass, bool ignore_case); 4491cb0ef41Sopenharmony_ci static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH; 4501cb0ef41Sopenharmony_ci static const int kLastPass = CHARACTER_CLASS_MATCH; 4511cb0ef41Sopenharmony_ci void TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass, 4521cb0ef41Sopenharmony_ci bool preloaded, Trace* trace, bool first_element_checked, 4531cb0ef41Sopenharmony_ci int* checked_up_to); 4541cb0ef41Sopenharmony_ci ZoneList<TextElement>* elms_; 4551cb0ef41Sopenharmony_ci bool read_backward_; 4561cb0ef41Sopenharmony_ci}; 4571cb0ef41Sopenharmony_ci 4581cb0ef41Sopenharmony_ciclass AssertionNode : public SeqRegExpNode { 4591cb0ef41Sopenharmony_ci public: 4601cb0ef41Sopenharmony_ci enum AssertionType { 4611cb0ef41Sopenharmony_ci AT_END, 4621cb0ef41Sopenharmony_ci AT_START, 4631cb0ef41Sopenharmony_ci AT_BOUNDARY, 4641cb0ef41Sopenharmony_ci AT_NON_BOUNDARY, 4651cb0ef41Sopenharmony_ci AFTER_NEWLINE 4661cb0ef41Sopenharmony_ci }; 4671cb0ef41Sopenharmony_ci static AssertionNode* AtEnd(RegExpNode* on_success) { 4681cb0ef41Sopenharmony_ci return on_success->zone()->New<AssertionNode>(AT_END, on_success); 4691cb0ef41Sopenharmony_ci } 4701cb0ef41Sopenharmony_ci static AssertionNode* AtStart(RegExpNode* on_success) { 4711cb0ef41Sopenharmony_ci return on_success->zone()->New<AssertionNode>(AT_START, on_success); 4721cb0ef41Sopenharmony_ci } 4731cb0ef41Sopenharmony_ci static AssertionNode* AtBoundary(RegExpNode* on_success) { 4741cb0ef41Sopenharmony_ci return on_success->zone()->New<AssertionNode>(AT_BOUNDARY, on_success); 4751cb0ef41Sopenharmony_ci } 4761cb0ef41Sopenharmony_ci static AssertionNode* AtNonBoundary(RegExpNode* on_success) { 4771cb0ef41Sopenharmony_ci return on_success->zone()->New<AssertionNode>(AT_NON_BOUNDARY, on_success); 4781cb0ef41Sopenharmony_ci } 4791cb0ef41Sopenharmony_ci static AssertionNode* AfterNewline(RegExpNode* on_success) { 4801cb0ef41Sopenharmony_ci return on_success->zone()->New<AssertionNode>(AFTER_NEWLINE, on_success); 4811cb0ef41Sopenharmony_ci } 4821cb0ef41Sopenharmony_ci void Accept(NodeVisitor* visitor) override; 4831cb0ef41Sopenharmony_ci void Emit(RegExpCompiler* compiler, Trace* trace) override; 4841cb0ef41Sopenharmony_ci void GetQuickCheckDetails(QuickCheckDetails* details, 4851cb0ef41Sopenharmony_ci RegExpCompiler* compiler, int filled_in, 4861cb0ef41Sopenharmony_ci bool not_at_start) override; 4871cb0ef41Sopenharmony_ci void FillInBMInfo(Isolate* isolate, int offset, int budget, 4881cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm, bool not_at_start) override; 4891cb0ef41Sopenharmony_ci AssertionType assertion_type() { return assertion_type_; } 4901cb0ef41Sopenharmony_ci 4911cb0ef41Sopenharmony_ci private: 4921cb0ef41Sopenharmony_ci friend Zone; 4931cb0ef41Sopenharmony_ci 4941cb0ef41Sopenharmony_ci void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); 4951cb0ef41Sopenharmony_ci enum IfPrevious { kIsNonWord, kIsWord }; 4961cb0ef41Sopenharmony_ci void BacktrackIfPrevious(RegExpCompiler* compiler, Trace* trace, 4971cb0ef41Sopenharmony_ci IfPrevious backtrack_if_previous); 4981cb0ef41Sopenharmony_ci AssertionNode(AssertionType t, RegExpNode* on_success) 4991cb0ef41Sopenharmony_ci : SeqRegExpNode(on_success), assertion_type_(t) {} 5001cb0ef41Sopenharmony_ci AssertionType assertion_type_; 5011cb0ef41Sopenharmony_ci}; 5021cb0ef41Sopenharmony_ci 5031cb0ef41Sopenharmony_ciclass BackReferenceNode : public SeqRegExpNode { 5041cb0ef41Sopenharmony_ci public: 5051cb0ef41Sopenharmony_ci BackReferenceNode(int start_reg, int end_reg, RegExpFlags flags, 5061cb0ef41Sopenharmony_ci bool read_backward, RegExpNode* on_success) 5071cb0ef41Sopenharmony_ci : SeqRegExpNode(on_success), 5081cb0ef41Sopenharmony_ci start_reg_(start_reg), 5091cb0ef41Sopenharmony_ci end_reg_(end_reg), 5101cb0ef41Sopenharmony_ci flags_(flags), 5111cb0ef41Sopenharmony_ci read_backward_(read_backward) {} 5121cb0ef41Sopenharmony_ci void Accept(NodeVisitor* visitor) override; 5131cb0ef41Sopenharmony_ci int start_register() { return start_reg_; } 5141cb0ef41Sopenharmony_ci int end_register() { return end_reg_; } 5151cb0ef41Sopenharmony_ci bool read_backward() { return read_backward_; } 5161cb0ef41Sopenharmony_ci void Emit(RegExpCompiler* compiler, Trace* trace) override; 5171cb0ef41Sopenharmony_ci void GetQuickCheckDetails(QuickCheckDetails* details, 5181cb0ef41Sopenharmony_ci RegExpCompiler* compiler, int characters_filled_in, 5191cb0ef41Sopenharmony_ci bool not_at_start) override { 5201cb0ef41Sopenharmony_ci return; 5211cb0ef41Sopenharmony_ci } 5221cb0ef41Sopenharmony_ci void FillInBMInfo(Isolate* isolate, int offset, int budget, 5231cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm, bool not_at_start) override; 5241cb0ef41Sopenharmony_ci 5251cb0ef41Sopenharmony_ci private: 5261cb0ef41Sopenharmony_ci int start_reg_; 5271cb0ef41Sopenharmony_ci int end_reg_; 5281cb0ef41Sopenharmony_ci RegExpFlags flags_; 5291cb0ef41Sopenharmony_ci bool read_backward_; 5301cb0ef41Sopenharmony_ci}; 5311cb0ef41Sopenharmony_ci 5321cb0ef41Sopenharmony_ciclass EndNode : public RegExpNode { 5331cb0ef41Sopenharmony_ci public: 5341cb0ef41Sopenharmony_ci enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; 5351cb0ef41Sopenharmony_ci EndNode(Action action, Zone* zone) : RegExpNode(zone), action_(action) {} 5361cb0ef41Sopenharmony_ci void Accept(NodeVisitor* visitor) override; 5371cb0ef41Sopenharmony_ci void Emit(RegExpCompiler* compiler, Trace* trace) override; 5381cb0ef41Sopenharmony_ci void GetQuickCheckDetails(QuickCheckDetails* details, 5391cb0ef41Sopenharmony_ci RegExpCompiler* compiler, int characters_filled_in, 5401cb0ef41Sopenharmony_ci bool not_at_start) override { 5411cb0ef41Sopenharmony_ci // Returning 0 from EatsAtLeast should ensure we never get here. 5421cb0ef41Sopenharmony_ci UNREACHABLE(); 5431cb0ef41Sopenharmony_ci } 5441cb0ef41Sopenharmony_ci void FillInBMInfo(Isolate* isolate, int offset, int budget, 5451cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm, bool not_at_start) override { 5461cb0ef41Sopenharmony_ci // Returning 0 from EatsAtLeast should ensure we never get here. 5471cb0ef41Sopenharmony_ci UNREACHABLE(); 5481cb0ef41Sopenharmony_ci } 5491cb0ef41Sopenharmony_ci 5501cb0ef41Sopenharmony_ci private: 5511cb0ef41Sopenharmony_ci Action action_; 5521cb0ef41Sopenharmony_ci}; 5531cb0ef41Sopenharmony_ci 5541cb0ef41Sopenharmony_ciclass NegativeSubmatchSuccess : public EndNode { 5551cb0ef41Sopenharmony_ci public: 5561cb0ef41Sopenharmony_ci NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg, 5571cb0ef41Sopenharmony_ci int clear_capture_count, int clear_capture_start, 5581cb0ef41Sopenharmony_ci Zone* zone) 5591cb0ef41Sopenharmony_ci : EndNode(NEGATIVE_SUBMATCH_SUCCESS, zone), 5601cb0ef41Sopenharmony_ci stack_pointer_register_(stack_pointer_reg), 5611cb0ef41Sopenharmony_ci current_position_register_(position_reg), 5621cb0ef41Sopenharmony_ci clear_capture_count_(clear_capture_count), 5631cb0ef41Sopenharmony_ci clear_capture_start_(clear_capture_start) {} 5641cb0ef41Sopenharmony_ci void Emit(RegExpCompiler* compiler, Trace* trace) override; 5651cb0ef41Sopenharmony_ci 5661cb0ef41Sopenharmony_ci private: 5671cb0ef41Sopenharmony_ci int stack_pointer_register_; 5681cb0ef41Sopenharmony_ci int current_position_register_; 5691cb0ef41Sopenharmony_ci int clear_capture_count_; 5701cb0ef41Sopenharmony_ci int clear_capture_start_; 5711cb0ef41Sopenharmony_ci}; 5721cb0ef41Sopenharmony_ci 5731cb0ef41Sopenharmony_ciclass Guard : public ZoneObject { 5741cb0ef41Sopenharmony_ci public: 5751cb0ef41Sopenharmony_ci enum Relation { LT, GEQ }; 5761cb0ef41Sopenharmony_ci Guard(int reg, Relation op, int value) : reg_(reg), op_(op), value_(value) {} 5771cb0ef41Sopenharmony_ci int reg() { return reg_; } 5781cb0ef41Sopenharmony_ci Relation op() { return op_; } 5791cb0ef41Sopenharmony_ci int value() { return value_; } 5801cb0ef41Sopenharmony_ci 5811cb0ef41Sopenharmony_ci private: 5821cb0ef41Sopenharmony_ci int reg_; 5831cb0ef41Sopenharmony_ci Relation op_; 5841cb0ef41Sopenharmony_ci int value_; 5851cb0ef41Sopenharmony_ci}; 5861cb0ef41Sopenharmony_ci 5871cb0ef41Sopenharmony_ciclass GuardedAlternative { 5881cb0ef41Sopenharmony_ci public: 5891cb0ef41Sopenharmony_ci explicit GuardedAlternative(RegExpNode* node) 5901cb0ef41Sopenharmony_ci : node_(node), guards_(nullptr) {} 5911cb0ef41Sopenharmony_ci void AddGuard(Guard* guard, Zone* zone); 5921cb0ef41Sopenharmony_ci RegExpNode* node() { return node_; } 5931cb0ef41Sopenharmony_ci void set_node(RegExpNode* node) { node_ = node; } 5941cb0ef41Sopenharmony_ci ZoneList<Guard*>* guards() { return guards_; } 5951cb0ef41Sopenharmony_ci 5961cb0ef41Sopenharmony_ci private: 5971cb0ef41Sopenharmony_ci RegExpNode* node_; 5981cb0ef41Sopenharmony_ci ZoneList<Guard*>* guards_; 5991cb0ef41Sopenharmony_ci}; 6001cb0ef41Sopenharmony_ci 6011cb0ef41Sopenharmony_ciclass AlternativeGeneration; 6021cb0ef41Sopenharmony_ci 6031cb0ef41Sopenharmony_ciclass ChoiceNode : public RegExpNode { 6041cb0ef41Sopenharmony_ci public: 6051cb0ef41Sopenharmony_ci explicit ChoiceNode(int expected_size, Zone* zone) 6061cb0ef41Sopenharmony_ci : RegExpNode(zone), 6071cb0ef41Sopenharmony_ci alternatives_( 6081cb0ef41Sopenharmony_ci zone->New<ZoneList<GuardedAlternative>>(expected_size, zone)), 6091cb0ef41Sopenharmony_ci not_at_start_(false), 6101cb0ef41Sopenharmony_ci being_calculated_(false) {} 6111cb0ef41Sopenharmony_ci void Accept(NodeVisitor* visitor) override; 6121cb0ef41Sopenharmony_ci void AddAlternative(GuardedAlternative node) { 6131cb0ef41Sopenharmony_ci alternatives()->Add(node, zone()); 6141cb0ef41Sopenharmony_ci } 6151cb0ef41Sopenharmony_ci ZoneList<GuardedAlternative>* alternatives() { return alternatives_; } 6161cb0ef41Sopenharmony_ci void Emit(RegExpCompiler* compiler, Trace* trace) override; 6171cb0ef41Sopenharmony_ci void GetQuickCheckDetails(QuickCheckDetails* details, 6181cb0ef41Sopenharmony_ci RegExpCompiler* compiler, int characters_filled_in, 6191cb0ef41Sopenharmony_ci bool not_at_start) override; 6201cb0ef41Sopenharmony_ci void FillInBMInfo(Isolate* isolate, int offset, int budget, 6211cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm, bool not_at_start) override; 6221cb0ef41Sopenharmony_ci 6231cb0ef41Sopenharmony_ci bool being_calculated() { return being_calculated_; } 6241cb0ef41Sopenharmony_ci bool not_at_start() { return not_at_start_; } 6251cb0ef41Sopenharmony_ci void set_not_at_start() { not_at_start_ = true; } 6261cb0ef41Sopenharmony_ci void set_being_calculated(bool b) { being_calculated_ = b; } 6271cb0ef41Sopenharmony_ci virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { 6281cb0ef41Sopenharmony_ci return true; 6291cb0ef41Sopenharmony_ci } 6301cb0ef41Sopenharmony_ci RegExpNode* FilterOneByte(int depth, RegExpFlags flags) override; 6311cb0ef41Sopenharmony_ci virtual bool read_backward() { return false; } 6321cb0ef41Sopenharmony_ci 6331cb0ef41Sopenharmony_ci protected: 6341cb0ef41Sopenharmony_ci int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); 6351cb0ef41Sopenharmony_ci ZoneList<GuardedAlternative>* alternatives_; 6361cb0ef41Sopenharmony_ci 6371cb0ef41Sopenharmony_ci private: 6381cb0ef41Sopenharmony_ci template <typename...> 6391cb0ef41Sopenharmony_ci friend class Analysis; 6401cb0ef41Sopenharmony_ci 6411cb0ef41Sopenharmony_ci void GenerateGuard(RegExpMacroAssembler* macro_assembler, Guard* guard, 6421cb0ef41Sopenharmony_ci Trace* trace); 6431cb0ef41Sopenharmony_ci int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least); 6441cb0ef41Sopenharmony_ci void EmitOutOfLineContinuation(RegExpCompiler* compiler, Trace* trace, 6451cb0ef41Sopenharmony_ci GuardedAlternative alternative, 6461cb0ef41Sopenharmony_ci AlternativeGeneration* alt_gen, 6471cb0ef41Sopenharmony_ci int preload_characters, 6481cb0ef41Sopenharmony_ci bool next_expects_preload); 6491cb0ef41Sopenharmony_ci void SetUpPreLoad(RegExpCompiler* compiler, Trace* current_trace, 6501cb0ef41Sopenharmony_ci PreloadState* preloads); 6511cb0ef41Sopenharmony_ci void AssertGuardsMentionRegisters(Trace* trace); 6521cb0ef41Sopenharmony_ci int EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, Trace* trace); 6531cb0ef41Sopenharmony_ci Trace* EmitGreedyLoop(RegExpCompiler* compiler, Trace* trace, 6541cb0ef41Sopenharmony_ci AlternativeGenerationList* alt_gens, 6551cb0ef41Sopenharmony_ci PreloadState* preloads, 6561cb0ef41Sopenharmony_ci GreedyLoopState* greedy_loop_state, int text_length); 6571cb0ef41Sopenharmony_ci void EmitChoices(RegExpCompiler* compiler, 6581cb0ef41Sopenharmony_ci AlternativeGenerationList* alt_gens, int first_choice, 6591cb0ef41Sopenharmony_ci Trace* trace, PreloadState* preloads); 6601cb0ef41Sopenharmony_ci 6611cb0ef41Sopenharmony_ci // If true, this node is never checked at the start of the input. 6621cb0ef41Sopenharmony_ci // Allows a new trace to start with at_start() set to false. 6631cb0ef41Sopenharmony_ci bool not_at_start_; 6641cb0ef41Sopenharmony_ci bool being_calculated_; 6651cb0ef41Sopenharmony_ci}; 6661cb0ef41Sopenharmony_ci 6671cb0ef41Sopenharmony_ciclass NegativeLookaroundChoiceNode : public ChoiceNode { 6681cb0ef41Sopenharmony_ci public: 6691cb0ef41Sopenharmony_ci explicit NegativeLookaroundChoiceNode(GuardedAlternative this_must_fail, 6701cb0ef41Sopenharmony_ci GuardedAlternative then_do_this, 6711cb0ef41Sopenharmony_ci Zone* zone) 6721cb0ef41Sopenharmony_ci : ChoiceNode(2, zone) { 6731cb0ef41Sopenharmony_ci AddAlternative(this_must_fail); 6741cb0ef41Sopenharmony_ci AddAlternative(then_do_this); 6751cb0ef41Sopenharmony_ci } 6761cb0ef41Sopenharmony_ci void GetQuickCheckDetails(QuickCheckDetails* details, 6771cb0ef41Sopenharmony_ci RegExpCompiler* compiler, int characters_filled_in, 6781cb0ef41Sopenharmony_ci bool not_at_start) override; 6791cb0ef41Sopenharmony_ci void FillInBMInfo(Isolate* isolate, int offset, int budget, 6801cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm, bool not_at_start) override { 6811cb0ef41Sopenharmony_ci continue_node()->FillInBMInfo(isolate, offset, budget - 1, bm, 6821cb0ef41Sopenharmony_ci not_at_start); 6831cb0ef41Sopenharmony_ci if (offset == 0) set_bm_info(not_at_start, bm); 6841cb0ef41Sopenharmony_ci } 6851cb0ef41Sopenharmony_ci static constexpr int kLookaroundIndex = 0; 6861cb0ef41Sopenharmony_ci static constexpr int kContinueIndex = 1; 6871cb0ef41Sopenharmony_ci RegExpNode* lookaround_node() { 6881cb0ef41Sopenharmony_ci return alternatives()->at(kLookaroundIndex).node(); 6891cb0ef41Sopenharmony_ci } 6901cb0ef41Sopenharmony_ci RegExpNode* continue_node() { 6911cb0ef41Sopenharmony_ci return alternatives()->at(kContinueIndex).node(); 6921cb0ef41Sopenharmony_ci } 6931cb0ef41Sopenharmony_ci // For a negative lookahead we don't emit the quick check for the 6941cb0ef41Sopenharmony_ci // alternative that is expected to fail. This is because quick check code 6951cb0ef41Sopenharmony_ci // starts by loading enough characters for the alternative that takes fewest 6961cb0ef41Sopenharmony_ci // characters, but on a negative lookahead the negative branch did not take 6971cb0ef41Sopenharmony_ci // part in that calculation (EatsAtLeast) so the assumptions don't hold. 6981cb0ef41Sopenharmony_ci bool try_to_emit_quick_check_for_alternative(bool is_first) override { 6991cb0ef41Sopenharmony_ci return !is_first; 7001cb0ef41Sopenharmony_ci } 7011cb0ef41Sopenharmony_ci void Accept(NodeVisitor* visitor) override; 7021cb0ef41Sopenharmony_ci RegExpNode* FilterOneByte(int depth, RegExpFlags flags) override; 7031cb0ef41Sopenharmony_ci}; 7041cb0ef41Sopenharmony_ci 7051cb0ef41Sopenharmony_ciclass LoopChoiceNode : public ChoiceNode { 7061cb0ef41Sopenharmony_ci public: 7071cb0ef41Sopenharmony_ci LoopChoiceNode(bool body_can_be_zero_length, bool read_backward, 7081cb0ef41Sopenharmony_ci int min_loop_iterations, Zone* zone) 7091cb0ef41Sopenharmony_ci : ChoiceNode(2, zone), 7101cb0ef41Sopenharmony_ci loop_node_(nullptr), 7111cb0ef41Sopenharmony_ci continue_node_(nullptr), 7121cb0ef41Sopenharmony_ci body_can_be_zero_length_(body_can_be_zero_length), 7131cb0ef41Sopenharmony_ci read_backward_(read_backward), 7141cb0ef41Sopenharmony_ci traversed_loop_initialization_node_(false), 7151cb0ef41Sopenharmony_ci min_loop_iterations_(min_loop_iterations) {} 7161cb0ef41Sopenharmony_ci void AddLoopAlternative(GuardedAlternative alt); 7171cb0ef41Sopenharmony_ci void AddContinueAlternative(GuardedAlternative alt); 7181cb0ef41Sopenharmony_ci void Emit(RegExpCompiler* compiler, Trace* trace) override; 7191cb0ef41Sopenharmony_ci void GetQuickCheckDetails(QuickCheckDetails* details, 7201cb0ef41Sopenharmony_ci RegExpCompiler* compiler, int characters_filled_in, 7211cb0ef41Sopenharmony_ci bool not_at_start) override; 7221cb0ef41Sopenharmony_ci void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details, 7231cb0ef41Sopenharmony_ci RegExpCompiler* compiler, 7241cb0ef41Sopenharmony_ci int characters_filled_in, 7251cb0ef41Sopenharmony_ci bool not_at_start) override; 7261cb0ef41Sopenharmony_ci void FillInBMInfo(Isolate* isolate, int offset, int budget, 7271cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm, bool not_at_start) override; 7281cb0ef41Sopenharmony_ci EatsAtLeastInfo EatsAtLeastFromLoopEntry() override; 7291cb0ef41Sopenharmony_ci RegExpNode* loop_node() { return loop_node_; } 7301cb0ef41Sopenharmony_ci RegExpNode* continue_node() { return continue_node_; } 7311cb0ef41Sopenharmony_ci bool body_can_be_zero_length() { return body_can_be_zero_length_; } 7321cb0ef41Sopenharmony_ci int min_loop_iterations() const { return min_loop_iterations_; } 7331cb0ef41Sopenharmony_ci bool read_backward() override { return read_backward_; } 7341cb0ef41Sopenharmony_ci void Accept(NodeVisitor* visitor) override; 7351cb0ef41Sopenharmony_ci RegExpNode* FilterOneByte(int depth, RegExpFlags flags) override; 7361cb0ef41Sopenharmony_ci 7371cb0ef41Sopenharmony_ci private: 7381cb0ef41Sopenharmony_ci // AddAlternative is made private for loop nodes because alternatives 7391cb0ef41Sopenharmony_ci // should not be added freely, we need to keep track of which node 7401cb0ef41Sopenharmony_ci // goes back to the node itself. 7411cb0ef41Sopenharmony_ci void AddAlternative(GuardedAlternative node) { 7421cb0ef41Sopenharmony_ci ChoiceNode::AddAlternative(node); 7431cb0ef41Sopenharmony_ci } 7441cb0ef41Sopenharmony_ci 7451cb0ef41Sopenharmony_ci RegExpNode* loop_node_; 7461cb0ef41Sopenharmony_ci RegExpNode* continue_node_; 7471cb0ef41Sopenharmony_ci bool body_can_be_zero_length_; 7481cb0ef41Sopenharmony_ci bool read_backward_; 7491cb0ef41Sopenharmony_ci 7501cb0ef41Sopenharmony_ci // Temporary marker set only while generating quick check details. Represents 7511cb0ef41Sopenharmony_ci // whether GetQuickCheckDetails traversed the initialization node for this 7521cb0ef41Sopenharmony_ci // loop's counter. If so, we may be able to generate stricter quick checks 7531cb0ef41Sopenharmony_ci // because we know the loop node must match at least min_loop_iterations_ 7541cb0ef41Sopenharmony_ci // times before the continuation node can match. 7551cb0ef41Sopenharmony_ci bool traversed_loop_initialization_node_; 7561cb0ef41Sopenharmony_ci 7571cb0ef41Sopenharmony_ci // The minimum number of times the loop_node_ must match before the 7581cb0ef41Sopenharmony_ci // continue_node_ might be considered. This value can be temporarily decreased 7591cb0ef41Sopenharmony_ci // while generating quick check details, to represent the remaining iterations 7601cb0ef41Sopenharmony_ci // after the completed portion of the quick check details. 7611cb0ef41Sopenharmony_ci int min_loop_iterations_; 7621cb0ef41Sopenharmony_ci 7631cb0ef41Sopenharmony_ci friend class IterationDecrementer; 7641cb0ef41Sopenharmony_ci friend class LoopInitializationMarker; 7651cb0ef41Sopenharmony_ci}; 7661cb0ef41Sopenharmony_ci 7671cb0ef41Sopenharmony_ciclass NodeVisitor { 7681cb0ef41Sopenharmony_ci public: 7691cb0ef41Sopenharmony_ci virtual ~NodeVisitor() = default; 7701cb0ef41Sopenharmony_ci#define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that) = 0; 7711cb0ef41Sopenharmony_ci FOR_EACH_NODE_TYPE(DECLARE_VISIT) 7721cb0ef41Sopenharmony_ci#undef DECLARE_VISIT 7731cb0ef41Sopenharmony_ci}; 7741cb0ef41Sopenharmony_ci 7751cb0ef41Sopenharmony_ci} // namespace internal 7761cb0ef41Sopenharmony_ci} // namespace v8 7771cb0ef41Sopenharmony_ci 7781cb0ef41Sopenharmony_ci#endif // V8_REGEXP_REGEXP_NODES_H_ 779