11cb0ef41Sopenharmony_ci// Copyright 2019 the V8 project authors. All rights reserved. 21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be 31cb0ef41Sopenharmony_ci// found in the LICENSE file. 41cb0ef41Sopenharmony_ci 51cb0ef41Sopenharmony_ci#include "src/regexp/regexp-compiler.h" 61cb0ef41Sopenharmony_ci 71cb0ef41Sopenharmony_ci#include "src/base/safe_conversions.h" 81cb0ef41Sopenharmony_ci#include "src/execution/isolate.h" 91cb0ef41Sopenharmony_ci#include "src/objects/fixed-array-inl.h" 101cb0ef41Sopenharmony_ci#include "src/regexp/regexp-macro-assembler-arch.h" 111cb0ef41Sopenharmony_ci#include "src/strings/unicode-inl.h" 121cb0ef41Sopenharmony_ci#include "src/zone/zone-list-inl.h" 131cb0ef41Sopenharmony_ci 141cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT 151cb0ef41Sopenharmony_ci#include "src/regexp/special-case.h" 161cb0ef41Sopenharmony_ci#include "unicode/locid.h" 171cb0ef41Sopenharmony_ci#include "unicode/uniset.h" 181cb0ef41Sopenharmony_ci#include "unicode/utypes.h" 191cb0ef41Sopenharmony_ci#endif // V8_INTL_SUPPORT 201cb0ef41Sopenharmony_ci 211cb0ef41Sopenharmony_cinamespace v8 { 221cb0ef41Sopenharmony_cinamespace internal { 231cb0ef41Sopenharmony_ci 241cb0ef41Sopenharmony_ciusing namespace regexp_compiler_constants; // NOLINT(build/namespaces) 251cb0ef41Sopenharmony_ci 261cb0ef41Sopenharmony_ci// ------------------------------------------------------------------- 271cb0ef41Sopenharmony_ci// Implementation of the Irregexp regular expression engine. 281cb0ef41Sopenharmony_ci// 291cb0ef41Sopenharmony_ci// The Irregexp regular expression engine is intended to be a complete 301cb0ef41Sopenharmony_ci// implementation of ECMAScript regular expressions. It generates either 311cb0ef41Sopenharmony_ci// bytecodes or native code. 321cb0ef41Sopenharmony_ci 331cb0ef41Sopenharmony_ci// The Irregexp regexp engine is structured in three steps. 341cb0ef41Sopenharmony_ci// 1) The parser generates an abstract syntax tree. See ast.cc. 351cb0ef41Sopenharmony_ci// 2) From the AST a node network is created. The nodes are all 361cb0ef41Sopenharmony_ci// subclasses of RegExpNode. The nodes represent states when 371cb0ef41Sopenharmony_ci// executing a regular expression. Several optimizations are 381cb0ef41Sopenharmony_ci// performed on the node network. 391cb0ef41Sopenharmony_ci// 3) From the nodes we generate either byte codes or native code 401cb0ef41Sopenharmony_ci// that can actually execute the regular expression (perform 411cb0ef41Sopenharmony_ci// the search). The code generation step is described in more 421cb0ef41Sopenharmony_ci// detail below. 431cb0ef41Sopenharmony_ci 441cb0ef41Sopenharmony_ci// Code generation. 451cb0ef41Sopenharmony_ci// 461cb0ef41Sopenharmony_ci// The nodes are divided into four main categories. 471cb0ef41Sopenharmony_ci// * Choice nodes 481cb0ef41Sopenharmony_ci// These represent places where the regular expression can 491cb0ef41Sopenharmony_ci// match in more than one way. For example on entry to an 501cb0ef41Sopenharmony_ci// alternation (foo|bar) or a repetition (*, +, ? or {}). 511cb0ef41Sopenharmony_ci// * Action nodes 521cb0ef41Sopenharmony_ci// These represent places where some action should be 531cb0ef41Sopenharmony_ci// performed. Examples include recording the current position 541cb0ef41Sopenharmony_ci// in the input string to a register (in order to implement 551cb0ef41Sopenharmony_ci// captures) or other actions on register for example in order 561cb0ef41Sopenharmony_ci// to implement the counters needed for {} repetitions. 571cb0ef41Sopenharmony_ci// * Matching nodes 581cb0ef41Sopenharmony_ci// These attempt to match some element part of the input string. 591cb0ef41Sopenharmony_ci// Examples of elements include character classes, plain strings 601cb0ef41Sopenharmony_ci// or back references. 611cb0ef41Sopenharmony_ci// * End nodes 621cb0ef41Sopenharmony_ci// These are used to implement the actions required on finding 631cb0ef41Sopenharmony_ci// a successful match or failing to find a match. 641cb0ef41Sopenharmony_ci// 651cb0ef41Sopenharmony_ci// The code generated (whether as byte codes or native code) maintains 661cb0ef41Sopenharmony_ci// some state as it runs. This consists of the following elements: 671cb0ef41Sopenharmony_ci// 681cb0ef41Sopenharmony_ci// * The capture registers. Used for string captures. 691cb0ef41Sopenharmony_ci// * Other registers. Used for counters etc. 701cb0ef41Sopenharmony_ci// * The current position. 711cb0ef41Sopenharmony_ci// * The stack of backtracking information. Used when a matching node 721cb0ef41Sopenharmony_ci// fails to find a match and needs to try an alternative. 731cb0ef41Sopenharmony_ci// 741cb0ef41Sopenharmony_ci// Conceptual regular expression execution model: 751cb0ef41Sopenharmony_ci// 761cb0ef41Sopenharmony_ci// There is a simple conceptual model of regular expression execution 771cb0ef41Sopenharmony_ci// which will be presented first. The actual code generated is a more 781cb0ef41Sopenharmony_ci// efficient simulation of the simple conceptual model: 791cb0ef41Sopenharmony_ci// 801cb0ef41Sopenharmony_ci// * Choice nodes are implemented as follows: 811cb0ef41Sopenharmony_ci// For each choice except the last { 821cb0ef41Sopenharmony_ci// push current position 831cb0ef41Sopenharmony_ci// push backtrack code location 841cb0ef41Sopenharmony_ci// <generate code to test for choice> 851cb0ef41Sopenharmony_ci// backtrack code location: 861cb0ef41Sopenharmony_ci// pop current position 871cb0ef41Sopenharmony_ci// } 881cb0ef41Sopenharmony_ci// <generate code to test for last choice> 891cb0ef41Sopenharmony_ci// 901cb0ef41Sopenharmony_ci// * Actions nodes are generated as follows 911cb0ef41Sopenharmony_ci// <push affected registers on backtrack stack> 921cb0ef41Sopenharmony_ci// <generate code to perform action> 931cb0ef41Sopenharmony_ci// push backtrack code location 941cb0ef41Sopenharmony_ci// <generate code to test for following nodes> 951cb0ef41Sopenharmony_ci// backtrack code location: 961cb0ef41Sopenharmony_ci// <pop affected registers to restore their state> 971cb0ef41Sopenharmony_ci// <pop backtrack location from stack and go to it> 981cb0ef41Sopenharmony_ci// 991cb0ef41Sopenharmony_ci// * Matching nodes are generated as follows: 1001cb0ef41Sopenharmony_ci// if input string matches at current position 1011cb0ef41Sopenharmony_ci// update current position 1021cb0ef41Sopenharmony_ci// <generate code to test for following nodes> 1031cb0ef41Sopenharmony_ci// else 1041cb0ef41Sopenharmony_ci// <pop backtrack location from stack and go to it> 1051cb0ef41Sopenharmony_ci// 1061cb0ef41Sopenharmony_ci// Thus it can be seen that the current position is saved and restored 1071cb0ef41Sopenharmony_ci// by the choice nodes, whereas the registers are saved and restored by 1081cb0ef41Sopenharmony_ci// by the action nodes that manipulate them. 1091cb0ef41Sopenharmony_ci// 1101cb0ef41Sopenharmony_ci// The other interesting aspect of this model is that nodes are generated 1111cb0ef41Sopenharmony_ci// at the point where they are needed by a recursive call to Emit(). If 1121cb0ef41Sopenharmony_ci// the node has already been code generated then the Emit() call will 1131cb0ef41Sopenharmony_ci// generate a jump to the previously generated code instead. In order to 1141cb0ef41Sopenharmony_ci// limit recursion it is possible for the Emit() function to put the node 1151cb0ef41Sopenharmony_ci// on a work list for later generation and instead generate a jump. The 1161cb0ef41Sopenharmony_ci// destination of the jump is resolved later when the code is generated. 1171cb0ef41Sopenharmony_ci// 1181cb0ef41Sopenharmony_ci// Actual regular expression code generation. 1191cb0ef41Sopenharmony_ci// 1201cb0ef41Sopenharmony_ci// Code generation is actually more complicated than the above. In order 1211cb0ef41Sopenharmony_ci// to improve the efficiency of the generated code some optimizations are 1221cb0ef41Sopenharmony_ci// performed 1231cb0ef41Sopenharmony_ci// 1241cb0ef41Sopenharmony_ci// * Choice nodes have 1-character lookahead. 1251cb0ef41Sopenharmony_ci// A choice node looks at the following character and eliminates some of 1261cb0ef41Sopenharmony_ci// the choices immediately based on that character. This is not yet 1271cb0ef41Sopenharmony_ci// implemented. 1281cb0ef41Sopenharmony_ci// * Simple greedy loops store reduced backtracking information. 1291cb0ef41Sopenharmony_ci// A quantifier like /.*foo/m will greedily match the whole input. It will 1301cb0ef41Sopenharmony_ci// then need to backtrack to a point where it can match "foo". The naive 1311cb0ef41Sopenharmony_ci// implementation of this would push each character position onto the 1321cb0ef41Sopenharmony_ci// backtracking stack, then pop them off one by one. This would use space 1331cb0ef41Sopenharmony_ci// proportional to the length of the input string. However since the "." 1341cb0ef41Sopenharmony_ci// can only match in one way and always has a constant length (in this case 1351cb0ef41Sopenharmony_ci// of 1) it suffices to store the current position on the top of the stack 1361cb0ef41Sopenharmony_ci// once. Matching now becomes merely incrementing the current position and 1371cb0ef41Sopenharmony_ci// backtracking becomes decrementing the current position and checking the 1381cb0ef41Sopenharmony_ci// result against the stored current position. This is faster and saves 1391cb0ef41Sopenharmony_ci// space. 1401cb0ef41Sopenharmony_ci// * The current state is virtualized. 1411cb0ef41Sopenharmony_ci// This is used to defer expensive operations until it is clear that they 1421cb0ef41Sopenharmony_ci// are needed and to generate code for a node more than once, allowing 1431cb0ef41Sopenharmony_ci// specialized an efficient versions of the code to be created. This is 1441cb0ef41Sopenharmony_ci// explained in the section below. 1451cb0ef41Sopenharmony_ci// 1461cb0ef41Sopenharmony_ci// Execution state virtualization. 1471cb0ef41Sopenharmony_ci// 1481cb0ef41Sopenharmony_ci// Instead of emitting code, nodes that manipulate the state can record their 1491cb0ef41Sopenharmony_ci// manipulation in an object called the Trace. The Trace object can record a 1501cb0ef41Sopenharmony_ci// current position offset, an optional backtrack code location on the top of 1511cb0ef41Sopenharmony_ci// the virtualized backtrack stack and some register changes. When a node is 1521cb0ef41Sopenharmony_ci// to be emitted it can flush the Trace or update it. Flushing the Trace 1531cb0ef41Sopenharmony_ci// will emit code to bring the actual state into line with the virtual state. 1541cb0ef41Sopenharmony_ci// Avoiding flushing the state can postpone some work (e.g. updates of capture 1551cb0ef41Sopenharmony_ci// registers). Postponing work can save time when executing the regular 1561cb0ef41Sopenharmony_ci// expression since it may be found that the work never has to be done as a 1571cb0ef41Sopenharmony_ci// failure to match can occur. In addition it is much faster to jump to a 1581cb0ef41Sopenharmony_ci// known backtrack code location than it is to pop an unknown backtrack 1591cb0ef41Sopenharmony_ci// location from the stack and jump there. 1601cb0ef41Sopenharmony_ci// 1611cb0ef41Sopenharmony_ci// The virtual state found in the Trace affects code generation. For example 1621cb0ef41Sopenharmony_ci// the virtual state contains the difference between the actual current 1631cb0ef41Sopenharmony_ci// position and the virtual current position, and matching code needs to use 1641cb0ef41Sopenharmony_ci// this offset to attempt a match in the correct location of the input 1651cb0ef41Sopenharmony_ci// string. Therefore code generated for a non-trivial trace is specialized 1661cb0ef41Sopenharmony_ci// to that trace. The code generator therefore has the ability to generate 1671cb0ef41Sopenharmony_ci// code for each node several times. In order to limit the size of the 1681cb0ef41Sopenharmony_ci// generated code there is an arbitrary limit on how many specialized sets of 1691cb0ef41Sopenharmony_ci// code may be generated for a given node. If the limit is reached, the 1701cb0ef41Sopenharmony_ci// trace is flushed and a generic version of the code for a node is emitted. 1711cb0ef41Sopenharmony_ci// This is subsequently used for that node. The code emitted for non-generic 1721cb0ef41Sopenharmony_ci// trace is not recorded in the node and so it cannot currently be reused in 1731cb0ef41Sopenharmony_ci// the event that code generation is requested for an identical trace. 1741cb0ef41Sopenharmony_ci 1751cb0ef41Sopenharmony_cinamespace { 1761cb0ef41Sopenharmony_ci 1771cb0ef41Sopenharmony_ciconstexpr base::uc32 MaxCodeUnit(const bool one_byte) { 1781cb0ef41Sopenharmony_ci STATIC_ASSERT(String::kMaxOneByteCharCodeU <= 1791cb0ef41Sopenharmony_ci std::numeric_limits<uint16_t>::max()); 1801cb0ef41Sopenharmony_ci STATIC_ASSERT(String::kMaxUtf16CodeUnitU <= 1811cb0ef41Sopenharmony_ci std::numeric_limits<uint16_t>::max()); 1821cb0ef41Sopenharmony_ci return one_byte ? String::kMaxOneByteCharCodeU : String::kMaxUtf16CodeUnitU; 1831cb0ef41Sopenharmony_ci} 1841cb0ef41Sopenharmony_ci 1851cb0ef41Sopenharmony_ciconstexpr uint32_t CharMask(const bool one_byte) { 1861cb0ef41Sopenharmony_ci STATIC_ASSERT(base::bits::IsPowerOfTwo(String::kMaxOneByteCharCodeU + 1)); 1871cb0ef41Sopenharmony_ci STATIC_ASSERT(base::bits::IsPowerOfTwo(String::kMaxUtf16CodeUnitU + 1)); 1881cb0ef41Sopenharmony_ci return MaxCodeUnit(one_byte); 1891cb0ef41Sopenharmony_ci} 1901cb0ef41Sopenharmony_ci 1911cb0ef41Sopenharmony_ci} // namespace 1921cb0ef41Sopenharmony_ci 1931cb0ef41Sopenharmony_civoid RegExpTree::AppendToText(RegExpText* text, Zone* zone) { UNREACHABLE(); } 1941cb0ef41Sopenharmony_ci 1951cb0ef41Sopenharmony_civoid RegExpAtom::AppendToText(RegExpText* text, Zone* zone) { 1961cb0ef41Sopenharmony_ci text->AddElement(TextElement::Atom(this), zone); 1971cb0ef41Sopenharmony_ci} 1981cb0ef41Sopenharmony_ci 1991cb0ef41Sopenharmony_civoid RegExpCharacterClass::AppendToText(RegExpText* text, Zone* zone) { 2001cb0ef41Sopenharmony_ci text->AddElement(TextElement::CharClass(this), zone); 2011cb0ef41Sopenharmony_ci} 2021cb0ef41Sopenharmony_ci 2031cb0ef41Sopenharmony_civoid RegExpText::AppendToText(RegExpText* text, Zone* zone) { 2041cb0ef41Sopenharmony_ci for (int i = 0; i < elements()->length(); i++) 2051cb0ef41Sopenharmony_ci text->AddElement(elements()->at(i), zone); 2061cb0ef41Sopenharmony_ci} 2071cb0ef41Sopenharmony_ci 2081cb0ef41Sopenharmony_ciTextElement TextElement::Atom(RegExpAtom* atom) { 2091cb0ef41Sopenharmony_ci return TextElement(ATOM, atom); 2101cb0ef41Sopenharmony_ci} 2111cb0ef41Sopenharmony_ci 2121cb0ef41Sopenharmony_ciTextElement TextElement::CharClass(RegExpCharacterClass* char_class) { 2131cb0ef41Sopenharmony_ci return TextElement(CHAR_CLASS, char_class); 2141cb0ef41Sopenharmony_ci} 2151cb0ef41Sopenharmony_ci 2161cb0ef41Sopenharmony_ciint TextElement::length() const { 2171cb0ef41Sopenharmony_ci switch (text_type()) { 2181cb0ef41Sopenharmony_ci case ATOM: 2191cb0ef41Sopenharmony_ci return atom()->length(); 2201cb0ef41Sopenharmony_ci 2211cb0ef41Sopenharmony_ci case CHAR_CLASS: 2221cb0ef41Sopenharmony_ci return 1; 2231cb0ef41Sopenharmony_ci } 2241cb0ef41Sopenharmony_ci UNREACHABLE(); 2251cb0ef41Sopenharmony_ci} 2261cb0ef41Sopenharmony_ci 2271cb0ef41Sopenharmony_ciclass RecursionCheck { 2281cb0ef41Sopenharmony_ci public: 2291cb0ef41Sopenharmony_ci explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { 2301cb0ef41Sopenharmony_ci compiler->IncrementRecursionDepth(); 2311cb0ef41Sopenharmony_ci } 2321cb0ef41Sopenharmony_ci ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } 2331cb0ef41Sopenharmony_ci 2341cb0ef41Sopenharmony_ci private: 2351cb0ef41Sopenharmony_ci RegExpCompiler* compiler_; 2361cb0ef41Sopenharmony_ci}; 2371cb0ef41Sopenharmony_ci 2381cb0ef41Sopenharmony_ci// Attempts to compile the regexp using an Irregexp code generator. Returns 2391cb0ef41Sopenharmony_ci// a fixed array or a null handle depending on whether it succeeded. 2401cb0ef41Sopenharmony_ciRegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count, 2411cb0ef41Sopenharmony_ci RegExpFlags flags, bool one_byte) 2421cb0ef41Sopenharmony_ci : next_register_(JSRegExp::RegistersForCaptureCount(capture_count)), 2431cb0ef41Sopenharmony_ci unicode_lookaround_stack_register_(kNoRegister), 2441cb0ef41Sopenharmony_ci unicode_lookaround_position_register_(kNoRegister), 2451cb0ef41Sopenharmony_ci work_list_(nullptr), 2461cb0ef41Sopenharmony_ci recursion_depth_(0), 2471cb0ef41Sopenharmony_ci flags_(flags), 2481cb0ef41Sopenharmony_ci one_byte_(one_byte), 2491cb0ef41Sopenharmony_ci reg_exp_too_big_(false), 2501cb0ef41Sopenharmony_ci limiting_recursion_(false), 2511cb0ef41Sopenharmony_ci optimize_(FLAG_regexp_optimization), 2521cb0ef41Sopenharmony_ci read_backward_(false), 2531cb0ef41Sopenharmony_ci current_expansion_factor_(1), 2541cb0ef41Sopenharmony_ci frequency_collator_(), 2551cb0ef41Sopenharmony_ci isolate_(isolate), 2561cb0ef41Sopenharmony_ci zone_(zone) { 2571cb0ef41Sopenharmony_ci accept_ = zone->New<EndNode>(EndNode::ACCEPT, zone); 2581cb0ef41Sopenharmony_ci DCHECK_GE(RegExpMacroAssembler::kMaxRegister, next_register_ - 1); 2591cb0ef41Sopenharmony_ci} 2601cb0ef41Sopenharmony_ci 2611cb0ef41Sopenharmony_ciRegExpCompiler::CompilationResult RegExpCompiler::Assemble( 2621cb0ef41Sopenharmony_ci Isolate* isolate, RegExpMacroAssembler* macro_assembler, RegExpNode* start, 2631cb0ef41Sopenharmony_ci int capture_count, Handle<String> pattern) { 2641cb0ef41Sopenharmony_ci macro_assembler_ = macro_assembler; 2651cb0ef41Sopenharmony_ci 2661cb0ef41Sopenharmony_ci ZoneVector<RegExpNode*> work_list(zone()); 2671cb0ef41Sopenharmony_ci work_list_ = &work_list; 2681cb0ef41Sopenharmony_ci Label fail; 2691cb0ef41Sopenharmony_ci macro_assembler_->PushBacktrack(&fail); 2701cb0ef41Sopenharmony_ci Trace new_trace; 2711cb0ef41Sopenharmony_ci start->Emit(this, &new_trace); 2721cb0ef41Sopenharmony_ci macro_assembler_->BindJumpTarget(&fail); 2731cb0ef41Sopenharmony_ci macro_assembler_->Fail(); 2741cb0ef41Sopenharmony_ci while (!work_list.empty()) { 2751cb0ef41Sopenharmony_ci RegExpNode* node = work_list.back(); 2761cb0ef41Sopenharmony_ci work_list.pop_back(); 2771cb0ef41Sopenharmony_ci node->set_on_work_list(false); 2781cb0ef41Sopenharmony_ci if (!node->label()->is_bound()) node->Emit(this, &new_trace); 2791cb0ef41Sopenharmony_ci } 2801cb0ef41Sopenharmony_ci if (reg_exp_too_big_) { 2811cb0ef41Sopenharmony_ci if (FLAG_correctness_fuzzer_suppressions) { 2821cb0ef41Sopenharmony_ci FATAL("Aborting on excess zone allocation"); 2831cb0ef41Sopenharmony_ci } 2841cb0ef41Sopenharmony_ci macro_assembler_->AbortedCodeGeneration(); 2851cb0ef41Sopenharmony_ci return CompilationResult::RegExpTooBig(); 2861cb0ef41Sopenharmony_ci } 2871cb0ef41Sopenharmony_ci 2881cb0ef41Sopenharmony_ci Handle<HeapObject> code = macro_assembler_->GetCode(pattern); 2891cb0ef41Sopenharmony_ci isolate->IncreaseTotalRegexpCodeGenerated(code); 2901cb0ef41Sopenharmony_ci work_list_ = nullptr; 2911cb0ef41Sopenharmony_ci 2921cb0ef41Sopenharmony_ci return {code, next_register_}; 2931cb0ef41Sopenharmony_ci} 2941cb0ef41Sopenharmony_ci 2951cb0ef41Sopenharmony_cibool Trace::DeferredAction::Mentions(int that) { 2961cb0ef41Sopenharmony_ci if (action_type() == ActionNode::CLEAR_CAPTURES) { 2971cb0ef41Sopenharmony_ci Interval range = static_cast<DeferredClearCaptures*>(this)->range(); 2981cb0ef41Sopenharmony_ci return range.Contains(that); 2991cb0ef41Sopenharmony_ci } else { 3001cb0ef41Sopenharmony_ci return reg() == that; 3011cb0ef41Sopenharmony_ci } 3021cb0ef41Sopenharmony_ci} 3031cb0ef41Sopenharmony_ci 3041cb0ef41Sopenharmony_cibool Trace::mentions_reg(int reg) { 3051cb0ef41Sopenharmony_ci for (DeferredAction* action = actions_; action != nullptr; 3061cb0ef41Sopenharmony_ci action = action->next()) { 3071cb0ef41Sopenharmony_ci if (action->Mentions(reg)) return true; 3081cb0ef41Sopenharmony_ci } 3091cb0ef41Sopenharmony_ci return false; 3101cb0ef41Sopenharmony_ci} 3111cb0ef41Sopenharmony_ci 3121cb0ef41Sopenharmony_cibool Trace::GetStoredPosition(int reg, int* cp_offset) { 3131cb0ef41Sopenharmony_ci DCHECK_EQ(0, *cp_offset); 3141cb0ef41Sopenharmony_ci for (DeferredAction* action = actions_; action != nullptr; 3151cb0ef41Sopenharmony_ci action = action->next()) { 3161cb0ef41Sopenharmony_ci if (action->Mentions(reg)) { 3171cb0ef41Sopenharmony_ci if (action->action_type() == ActionNode::STORE_POSITION) { 3181cb0ef41Sopenharmony_ci *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); 3191cb0ef41Sopenharmony_ci return true; 3201cb0ef41Sopenharmony_ci } else { 3211cb0ef41Sopenharmony_ci return false; 3221cb0ef41Sopenharmony_ci } 3231cb0ef41Sopenharmony_ci } 3241cb0ef41Sopenharmony_ci } 3251cb0ef41Sopenharmony_ci return false; 3261cb0ef41Sopenharmony_ci} 3271cb0ef41Sopenharmony_ci 3281cb0ef41Sopenharmony_ci// A (dynamically-sized) set of unsigned integers that behaves especially well 3291cb0ef41Sopenharmony_ci// on small integers (< kFirstLimit). May do zone-allocation. 3301cb0ef41Sopenharmony_ciclass DynamicBitSet : public ZoneObject { 3311cb0ef41Sopenharmony_ci public: 3321cb0ef41Sopenharmony_ci V8_EXPORT_PRIVATE bool Get(unsigned value) const { 3331cb0ef41Sopenharmony_ci if (value < kFirstLimit) { 3341cb0ef41Sopenharmony_ci return (first_ & (1 << value)) != 0; 3351cb0ef41Sopenharmony_ci } else if (remaining_ == nullptr) { 3361cb0ef41Sopenharmony_ci return false; 3371cb0ef41Sopenharmony_ci } else { 3381cb0ef41Sopenharmony_ci return remaining_->Contains(value); 3391cb0ef41Sopenharmony_ci } 3401cb0ef41Sopenharmony_ci } 3411cb0ef41Sopenharmony_ci 3421cb0ef41Sopenharmony_ci // Destructively set a value in this set. 3431cb0ef41Sopenharmony_ci void Set(unsigned value, Zone* zone) { 3441cb0ef41Sopenharmony_ci if (value < kFirstLimit) { 3451cb0ef41Sopenharmony_ci first_ |= (1 << value); 3461cb0ef41Sopenharmony_ci } else { 3471cb0ef41Sopenharmony_ci if (remaining_ == nullptr) 3481cb0ef41Sopenharmony_ci remaining_ = zone->New<ZoneList<unsigned>>(1, zone); 3491cb0ef41Sopenharmony_ci if (remaining_->is_empty() || !remaining_->Contains(value)) 3501cb0ef41Sopenharmony_ci remaining_->Add(value, zone); 3511cb0ef41Sopenharmony_ci } 3521cb0ef41Sopenharmony_ci } 3531cb0ef41Sopenharmony_ci 3541cb0ef41Sopenharmony_ci private: 3551cb0ef41Sopenharmony_ci static constexpr unsigned kFirstLimit = 32; 3561cb0ef41Sopenharmony_ci 3571cb0ef41Sopenharmony_ci uint32_t first_ = 0; 3581cb0ef41Sopenharmony_ci ZoneList<unsigned>* remaining_ = nullptr; 3591cb0ef41Sopenharmony_ci}; 3601cb0ef41Sopenharmony_ci 3611cb0ef41Sopenharmony_ciint Trace::FindAffectedRegisters(DynamicBitSet* affected_registers, 3621cb0ef41Sopenharmony_ci Zone* zone) { 3631cb0ef41Sopenharmony_ci int max_register = RegExpCompiler::kNoRegister; 3641cb0ef41Sopenharmony_ci for (DeferredAction* action = actions_; action != nullptr; 3651cb0ef41Sopenharmony_ci action = action->next()) { 3661cb0ef41Sopenharmony_ci if (action->action_type() == ActionNode::CLEAR_CAPTURES) { 3671cb0ef41Sopenharmony_ci Interval range = static_cast<DeferredClearCaptures*>(action)->range(); 3681cb0ef41Sopenharmony_ci for (int i = range.from(); i <= range.to(); i++) 3691cb0ef41Sopenharmony_ci affected_registers->Set(i, zone); 3701cb0ef41Sopenharmony_ci if (range.to() > max_register) max_register = range.to(); 3711cb0ef41Sopenharmony_ci } else { 3721cb0ef41Sopenharmony_ci affected_registers->Set(action->reg(), zone); 3731cb0ef41Sopenharmony_ci if (action->reg() > max_register) max_register = action->reg(); 3741cb0ef41Sopenharmony_ci } 3751cb0ef41Sopenharmony_ci } 3761cb0ef41Sopenharmony_ci return max_register; 3771cb0ef41Sopenharmony_ci} 3781cb0ef41Sopenharmony_ci 3791cb0ef41Sopenharmony_civoid Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler, 3801cb0ef41Sopenharmony_ci int max_register, 3811cb0ef41Sopenharmony_ci const DynamicBitSet& registers_to_pop, 3821cb0ef41Sopenharmony_ci const DynamicBitSet& registers_to_clear) { 3831cb0ef41Sopenharmony_ci for (int reg = max_register; reg >= 0; reg--) { 3841cb0ef41Sopenharmony_ci if (registers_to_pop.Get(reg)) { 3851cb0ef41Sopenharmony_ci assembler->PopRegister(reg); 3861cb0ef41Sopenharmony_ci } else if (registers_to_clear.Get(reg)) { 3871cb0ef41Sopenharmony_ci int clear_to = reg; 3881cb0ef41Sopenharmony_ci while (reg > 0 && registers_to_clear.Get(reg - 1)) { 3891cb0ef41Sopenharmony_ci reg--; 3901cb0ef41Sopenharmony_ci } 3911cb0ef41Sopenharmony_ci assembler->ClearRegisters(reg, clear_to); 3921cb0ef41Sopenharmony_ci } 3931cb0ef41Sopenharmony_ci } 3941cb0ef41Sopenharmony_ci} 3951cb0ef41Sopenharmony_ci 3961cb0ef41Sopenharmony_civoid Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, 3971cb0ef41Sopenharmony_ci int max_register, 3981cb0ef41Sopenharmony_ci const DynamicBitSet& affected_registers, 3991cb0ef41Sopenharmony_ci DynamicBitSet* registers_to_pop, 4001cb0ef41Sopenharmony_ci DynamicBitSet* registers_to_clear, 4011cb0ef41Sopenharmony_ci Zone* zone) { 4021cb0ef41Sopenharmony_ci // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1. 4031cb0ef41Sopenharmony_ci const int push_limit = (assembler->stack_limit_slack() + 1) / 2; 4041cb0ef41Sopenharmony_ci 4051cb0ef41Sopenharmony_ci // Count pushes performed to force a stack limit check occasionally. 4061cb0ef41Sopenharmony_ci int pushes = 0; 4071cb0ef41Sopenharmony_ci 4081cb0ef41Sopenharmony_ci for (int reg = 0; reg <= max_register; reg++) { 4091cb0ef41Sopenharmony_ci if (!affected_registers.Get(reg)) continue; 4101cb0ef41Sopenharmony_ci 4111cb0ef41Sopenharmony_ci // The chronologically first deferred action in the trace 4121cb0ef41Sopenharmony_ci // is used to infer the action needed to restore a register 4131cb0ef41Sopenharmony_ci // to its previous state (or not, if it's safe to ignore it). 4141cb0ef41Sopenharmony_ci enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR }; 4151cb0ef41Sopenharmony_ci DeferredActionUndoType undo_action = IGNORE; 4161cb0ef41Sopenharmony_ci 4171cb0ef41Sopenharmony_ci int value = 0; 4181cb0ef41Sopenharmony_ci bool absolute = false; 4191cb0ef41Sopenharmony_ci bool clear = false; 4201cb0ef41Sopenharmony_ci static const int kNoStore = kMinInt; 4211cb0ef41Sopenharmony_ci int store_position = kNoStore; 4221cb0ef41Sopenharmony_ci // This is a little tricky because we are scanning the actions in reverse 4231cb0ef41Sopenharmony_ci // historical order (newest first). 4241cb0ef41Sopenharmony_ci for (DeferredAction* action = actions_; action != nullptr; 4251cb0ef41Sopenharmony_ci action = action->next()) { 4261cb0ef41Sopenharmony_ci if (action->Mentions(reg)) { 4271cb0ef41Sopenharmony_ci switch (action->action_type()) { 4281cb0ef41Sopenharmony_ci case ActionNode::SET_REGISTER_FOR_LOOP: { 4291cb0ef41Sopenharmony_ci Trace::DeferredSetRegisterForLoop* psr = 4301cb0ef41Sopenharmony_ci static_cast<Trace::DeferredSetRegisterForLoop*>(action); 4311cb0ef41Sopenharmony_ci if (!absolute) { 4321cb0ef41Sopenharmony_ci value += psr->value(); 4331cb0ef41Sopenharmony_ci absolute = true; 4341cb0ef41Sopenharmony_ci } 4351cb0ef41Sopenharmony_ci // SET_REGISTER_FOR_LOOP is only used for newly introduced loop 4361cb0ef41Sopenharmony_ci // counters. They can have a significant previous value if they 4371cb0ef41Sopenharmony_ci // occur in a loop. TODO(lrn): Propagate this information, so 4381cb0ef41Sopenharmony_ci // we can set undo_action to IGNORE if we know there is no value to 4391cb0ef41Sopenharmony_ci // restore. 4401cb0ef41Sopenharmony_ci undo_action = RESTORE; 4411cb0ef41Sopenharmony_ci DCHECK_EQ(store_position, kNoStore); 4421cb0ef41Sopenharmony_ci DCHECK(!clear); 4431cb0ef41Sopenharmony_ci break; 4441cb0ef41Sopenharmony_ci } 4451cb0ef41Sopenharmony_ci case ActionNode::INCREMENT_REGISTER: 4461cb0ef41Sopenharmony_ci if (!absolute) { 4471cb0ef41Sopenharmony_ci value++; 4481cb0ef41Sopenharmony_ci } 4491cb0ef41Sopenharmony_ci DCHECK_EQ(store_position, kNoStore); 4501cb0ef41Sopenharmony_ci DCHECK(!clear); 4511cb0ef41Sopenharmony_ci undo_action = RESTORE; 4521cb0ef41Sopenharmony_ci break; 4531cb0ef41Sopenharmony_ci case ActionNode::STORE_POSITION: { 4541cb0ef41Sopenharmony_ci Trace::DeferredCapture* pc = 4551cb0ef41Sopenharmony_ci static_cast<Trace::DeferredCapture*>(action); 4561cb0ef41Sopenharmony_ci if (!clear && store_position == kNoStore) { 4571cb0ef41Sopenharmony_ci store_position = pc->cp_offset(); 4581cb0ef41Sopenharmony_ci } 4591cb0ef41Sopenharmony_ci 4601cb0ef41Sopenharmony_ci // For captures we know that stores and clears alternate. 4611cb0ef41Sopenharmony_ci // Other register, are never cleared, and if the occur 4621cb0ef41Sopenharmony_ci // inside a loop, they might be assigned more than once. 4631cb0ef41Sopenharmony_ci if (reg <= 1) { 4641cb0ef41Sopenharmony_ci // Registers zero and one, aka "capture zero", is 4651cb0ef41Sopenharmony_ci // always set correctly if we succeed. There is no 4661cb0ef41Sopenharmony_ci // need to undo a setting on backtrack, because we 4671cb0ef41Sopenharmony_ci // will set it again or fail. 4681cb0ef41Sopenharmony_ci undo_action = IGNORE; 4691cb0ef41Sopenharmony_ci } else { 4701cb0ef41Sopenharmony_ci undo_action = pc->is_capture() ? CLEAR : RESTORE; 4711cb0ef41Sopenharmony_ci } 4721cb0ef41Sopenharmony_ci DCHECK(!absolute); 4731cb0ef41Sopenharmony_ci DCHECK_EQ(value, 0); 4741cb0ef41Sopenharmony_ci break; 4751cb0ef41Sopenharmony_ci } 4761cb0ef41Sopenharmony_ci case ActionNode::CLEAR_CAPTURES: { 4771cb0ef41Sopenharmony_ci // Since we're scanning in reverse order, if we've already 4781cb0ef41Sopenharmony_ci // set the position we have to ignore historically earlier 4791cb0ef41Sopenharmony_ci // clearing operations. 4801cb0ef41Sopenharmony_ci if (store_position == kNoStore) { 4811cb0ef41Sopenharmony_ci clear = true; 4821cb0ef41Sopenharmony_ci } 4831cb0ef41Sopenharmony_ci undo_action = RESTORE; 4841cb0ef41Sopenharmony_ci DCHECK(!absolute); 4851cb0ef41Sopenharmony_ci DCHECK_EQ(value, 0); 4861cb0ef41Sopenharmony_ci break; 4871cb0ef41Sopenharmony_ci } 4881cb0ef41Sopenharmony_ci default: 4891cb0ef41Sopenharmony_ci UNREACHABLE(); 4901cb0ef41Sopenharmony_ci } 4911cb0ef41Sopenharmony_ci } 4921cb0ef41Sopenharmony_ci } 4931cb0ef41Sopenharmony_ci // Prepare for the undo-action (e.g., push if it's going to be popped). 4941cb0ef41Sopenharmony_ci if (undo_action == RESTORE) { 4951cb0ef41Sopenharmony_ci pushes++; 4961cb0ef41Sopenharmony_ci RegExpMacroAssembler::StackCheckFlag stack_check = 4971cb0ef41Sopenharmony_ci RegExpMacroAssembler::kNoStackLimitCheck; 4981cb0ef41Sopenharmony_ci if (pushes == push_limit) { 4991cb0ef41Sopenharmony_ci stack_check = RegExpMacroAssembler::kCheckStackLimit; 5001cb0ef41Sopenharmony_ci pushes = 0; 5011cb0ef41Sopenharmony_ci } 5021cb0ef41Sopenharmony_ci 5031cb0ef41Sopenharmony_ci assembler->PushRegister(reg, stack_check); 5041cb0ef41Sopenharmony_ci registers_to_pop->Set(reg, zone); 5051cb0ef41Sopenharmony_ci } else if (undo_action == CLEAR) { 5061cb0ef41Sopenharmony_ci registers_to_clear->Set(reg, zone); 5071cb0ef41Sopenharmony_ci } 5081cb0ef41Sopenharmony_ci // Perform the chronologically last action (or accumulated increment) 5091cb0ef41Sopenharmony_ci // for the register. 5101cb0ef41Sopenharmony_ci if (store_position != kNoStore) { 5111cb0ef41Sopenharmony_ci assembler->WriteCurrentPositionToRegister(reg, store_position); 5121cb0ef41Sopenharmony_ci } else if (clear) { 5131cb0ef41Sopenharmony_ci assembler->ClearRegisters(reg, reg); 5141cb0ef41Sopenharmony_ci } else if (absolute) { 5151cb0ef41Sopenharmony_ci assembler->SetRegister(reg, value); 5161cb0ef41Sopenharmony_ci } else if (value != 0) { 5171cb0ef41Sopenharmony_ci assembler->AdvanceRegister(reg, value); 5181cb0ef41Sopenharmony_ci } 5191cb0ef41Sopenharmony_ci } 5201cb0ef41Sopenharmony_ci} 5211cb0ef41Sopenharmony_ci 5221cb0ef41Sopenharmony_ci// This is called as we come into a loop choice node and some other tricky 5231cb0ef41Sopenharmony_ci// nodes. It normalizes the state of the code generator to ensure we can 5241cb0ef41Sopenharmony_ci// generate generic code. 5251cb0ef41Sopenharmony_civoid Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { 5261cb0ef41Sopenharmony_ci RegExpMacroAssembler* assembler = compiler->macro_assembler(); 5271cb0ef41Sopenharmony_ci 5281cb0ef41Sopenharmony_ci DCHECK(!is_trivial()); 5291cb0ef41Sopenharmony_ci 5301cb0ef41Sopenharmony_ci if (actions_ == nullptr && backtrack() == nullptr) { 5311cb0ef41Sopenharmony_ci // Here we just have some deferred cp advances to fix and we are back to 5321cb0ef41Sopenharmony_ci // a normal situation. We may also have to forget some information gained 5331cb0ef41Sopenharmony_ci // through a quick check that was already performed. 5341cb0ef41Sopenharmony_ci if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_); 5351cb0ef41Sopenharmony_ci // Create a new trivial state and generate the node with that. 5361cb0ef41Sopenharmony_ci Trace new_state; 5371cb0ef41Sopenharmony_ci successor->Emit(compiler, &new_state); 5381cb0ef41Sopenharmony_ci return; 5391cb0ef41Sopenharmony_ci } 5401cb0ef41Sopenharmony_ci 5411cb0ef41Sopenharmony_ci // Generate deferred actions here along with code to undo them again. 5421cb0ef41Sopenharmony_ci DynamicBitSet affected_registers; 5431cb0ef41Sopenharmony_ci 5441cb0ef41Sopenharmony_ci if (backtrack() != nullptr) { 5451cb0ef41Sopenharmony_ci // Here we have a concrete backtrack location. These are set up by choice 5461cb0ef41Sopenharmony_ci // nodes and so they indicate that we have a deferred save of the current 5471cb0ef41Sopenharmony_ci // position which we may need to emit here. 5481cb0ef41Sopenharmony_ci assembler->PushCurrentPosition(); 5491cb0ef41Sopenharmony_ci } 5501cb0ef41Sopenharmony_ci 5511cb0ef41Sopenharmony_ci int max_register = 5521cb0ef41Sopenharmony_ci FindAffectedRegisters(&affected_registers, compiler->zone()); 5531cb0ef41Sopenharmony_ci DynamicBitSet registers_to_pop; 5541cb0ef41Sopenharmony_ci DynamicBitSet registers_to_clear; 5551cb0ef41Sopenharmony_ci PerformDeferredActions(assembler, max_register, affected_registers, 5561cb0ef41Sopenharmony_ci ®isters_to_pop, ®isters_to_clear, 5571cb0ef41Sopenharmony_ci compiler->zone()); 5581cb0ef41Sopenharmony_ci if (cp_offset_ != 0) { 5591cb0ef41Sopenharmony_ci assembler->AdvanceCurrentPosition(cp_offset_); 5601cb0ef41Sopenharmony_ci } 5611cb0ef41Sopenharmony_ci 5621cb0ef41Sopenharmony_ci // Create a new trivial state and generate the node with that. 5631cb0ef41Sopenharmony_ci Label undo; 5641cb0ef41Sopenharmony_ci assembler->PushBacktrack(&undo); 5651cb0ef41Sopenharmony_ci if (successor->KeepRecursing(compiler)) { 5661cb0ef41Sopenharmony_ci Trace new_state; 5671cb0ef41Sopenharmony_ci successor->Emit(compiler, &new_state); 5681cb0ef41Sopenharmony_ci } else { 5691cb0ef41Sopenharmony_ci compiler->AddWork(successor); 5701cb0ef41Sopenharmony_ci assembler->GoTo(successor->label()); 5711cb0ef41Sopenharmony_ci } 5721cb0ef41Sopenharmony_ci 5731cb0ef41Sopenharmony_ci // On backtrack we need to restore state. 5741cb0ef41Sopenharmony_ci assembler->BindJumpTarget(&undo); 5751cb0ef41Sopenharmony_ci RestoreAffectedRegisters(assembler, max_register, registers_to_pop, 5761cb0ef41Sopenharmony_ci registers_to_clear); 5771cb0ef41Sopenharmony_ci if (backtrack() == nullptr) { 5781cb0ef41Sopenharmony_ci assembler->Backtrack(); 5791cb0ef41Sopenharmony_ci } else { 5801cb0ef41Sopenharmony_ci assembler->PopCurrentPosition(); 5811cb0ef41Sopenharmony_ci assembler->GoTo(backtrack()); 5821cb0ef41Sopenharmony_ci } 5831cb0ef41Sopenharmony_ci} 5841cb0ef41Sopenharmony_ci 5851cb0ef41Sopenharmony_civoid NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { 5861cb0ef41Sopenharmony_ci RegExpMacroAssembler* assembler = compiler->macro_assembler(); 5871cb0ef41Sopenharmony_ci 5881cb0ef41Sopenharmony_ci // Omit flushing the trace. We discard the entire stack frame anyway. 5891cb0ef41Sopenharmony_ci 5901cb0ef41Sopenharmony_ci if (!label()->is_bound()) { 5911cb0ef41Sopenharmony_ci // We are completely independent of the trace, since we ignore it, 5921cb0ef41Sopenharmony_ci // so this code can be used as the generic version. 5931cb0ef41Sopenharmony_ci assembler->Bind(label()); 5941cb0ef41Sopenharmony_ci } 5951cb0ef41Sopenharmony_ci 5961cb0ef41Sopenharmony_ci // Throw away everything on the backtrack stack since the start 5971cb0ef41Sopenharmony_ci // of the negative submatch and restore the character position. 5981cb0ef41Sopenharmony_ci assembler->ReadCurrentPositionFromRegister(current_position_register_); 5991cb0ef41Sopenharmony_ci assembler->ReadStackPointerFromRegister(stack_pointer_register_); 6001cb0ef41Sopenharmony_ci if (clear_capture_count_ > 0) { 6011cb0ef41Sopenharmony_ci // Clear any captures that might have been performed during the success 6021cb0ef41Sopenharmony_ci // of the body of the negative look-ahead. 6031cb0ef41Sopenharmony_ci int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1; 6041cb0ef41Sopenharmony_ci assembler->ClearRegisters(clear_capture_start_, clear_capture_end); 6051cb0ef41Sopenharmony_ci } 6061cb0ef41Sopenharmony_ci // Now that we have unwound the stack we find at the top of the stack the 6071cb0ef41Sopenharmony_ci // backtrack that the BeginNegativeSubmatch node got. 6081cb0ef41Sopenharmony_ci assembler->Backtrack(); 6091cb0ef41Sopenharmony_ci} 6101cb0ef41Sopenharmony_ci 6111cb0ef41Sopenharmony_civoid EndNode::Emit(RegExpCompiler* compiler, Trace* trace) { 6121cb0ef41Sopenharmony_ci if (!trace->is_trivial()) { 6131cb0ef41Sopenharmony_ci trace->Flush(compiler, this); 6141cb0ef41Sopenharmony_ci return; 6151cb0ef41Sopenharmony_ci } 6161cb0ef41Sopenharmony_ci RegExpMacroAssembler* assembler = compiler->macro_assembler(); 6171cb0ef41Sopenharmony_ci if (!label()->is_bound()) { 6181cb0ef41Sopenharmony_ci assembler->Bind(label()); 6191cb0ef41Sopenharmony_ci } 6201cb0ef41Sopenharmony_ci switch (action_) { 6211cb0ef41Sopenharmony_ci case ACCEPT: 6221cb0ef41Sopenharmony_ci assembler->Succeed(); 6231cb0ef41Sopenharmony_ci return; 6241cb0ef41Sopenharmony_ci case BACKTRACK: 6251cb0ef41Sopenharmony_ci assembler->GoTo(trace->backtrack()); 6261cb0ef41Sopenharmony_ci return; 6271cb0ef41Sopenharmony_ci case NEGATIVE_SUBMATCH_SUCCESS: 6281cb0ef41Sopenharmony_ci // This case is handled in a different virtual method. 6291cb0ef41Sopenharmony_ci UNREACHABLE(); 6301cb0ef41Sopenharmony_ci } 6311cb0ef41Sopenharmony_ci UNIMPLEMENTED(); 6321cb0ef41Sopenharmony_ci} 6331cb0ef41Sopenharmony_ci 6341cb0ef41Sopenharmony_civoid GuardedAlternative::AddGuard(Guard* guard, Zone* zone) { 6351cb0ef41Sopenharmony_ci if (guards_ == nullptr) guards_ = zone->New<ZoneList<Guard*>>(1, zone); 6361cb0ef41Sopenharmony_ci guards_->Add(guard, zone); 6371cb0ef41Sopenharmony_ci} 6381cb0ef41Sopenharmony_ci 6391cb0ef41Sopenharmony_ciActionNode* ActionNode::SetRegisterForLoop(int reg, int val, 6401cb0ef41Sopenharmony_ci RegExpNode* on_success) { 6411cb0ef41Sopenharmony_ci ActionNode* result = 6421cb0ef41Sopenharmony_ci on_success->zone()->New<ActionNode>(SET_REGISTER_FOR_LOOP, on_success); 6431cb0ef41Sopenharmony_ci result->data_.u_store_register.reg = reg; 6441cb0ef41Sopenharmony_ci result->data_.u_store_register.value = val; 6451cb0ef41Sopenharmony_ci return result; 6461cb0ef41Sopenharmony_ci} 6471cb0ef41Sopenharmony_ci 6481cb0ef41Sopenharmony_ciActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) { 6491cb0ef41Sopenharmony_ci ActionNode* result = 6501cb0ef41Sopenharmony_ci on_success->zone()->New<ActionNode>(INCREMENT_REGISTER, on_success); 6511cb0ef41Sopenharmony_ci result->data_.u_increment_register.reg = reg; 6521cb0ef41Sopenharmony_ci return result; 6531cb0ef41Sopenharmony_ci} 6541cb0ef41Sopenharmony_ci 6551cb0ef41Sopenharmony_ciActionNode* ActionNode::StorePosition(int reg, bool is_capture, 6561cb0ef41Sopenharmony_ci RegExpNode* on_success) { 6571cb0ef41Sopenharmony_ci ActionNode* result = 6581cb0ef41Sopenharmony_ci on_success->zone()->New<ActionNode>(STORE_POSITION, on_success); 6591cb0ef41Sopenharmony_ci result->data_.u_position_register.reg = reg; 6601cb0ef41Sopenharmony_ci result->data_.u_position_register.is_capture = is_capture; 6611cb0ef41Sopenharmony_ci return result; 6621cb0ef41Sopenharmony_ci} 6631cb0ef41Sopenharmony_ci 6641cb0ef41Sopenharmony_ciActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) { 6651cb0ef41Sopenharmony_ci ActionNode* result = 6661cb0ef41Sopenharmony_ci on_success->zone()->New<ActionNode>(CLEAR_CAPTURES, on_success); 6671cb0ef41Sopenharmony_ci result->data_.u_clear_captures.range_from = range.from(); 6681cb0ef41Sopenharmony_ci result->data_.u_clear_captures.range_to = range.to(); 6691cb0ef41Sopenharmony_ci return result; 6701cb0ef41Sopenharmony_ci} 6711cb0ef41Sopenharmony_ci 6721cb0ef41Sopenharmony_ciActionNode* ActionNode::BeginPositiveSubmatch(int stack_reg, int position_reg, 6731cb0ef41Sopenharmony_ci RegExpNode* on_success) { 6741cb0ef41Sopenharmony_ci ActionNode* result = 6751cb0ef41Sopenharmony_ci on_success->zone()->New<ActionNode>(BEGIN_POSITIVE_SUBMATCH, on_success); 6761cb0ef41Sopenharmony_ci result->data_.u_submatch.stack_pointer_register = stack_reg; 6771cb0ef41Sopenharmony_ci result->data_.u_submatch.current_position_register = position_reg; 6781cb0ef41Sopenharmony_ci return result; 6791cb0ef41Sopenharmony_ci} 6801cb0ef41Sopenharmony_ci 6811cb0ef41Sopenharmony_ciActionNode* ActionNode::BeginNegativeSubmatch(int stack_reg, int position_reg, 6821cb0ef41Sopenharmony_ci RegExpNode* on_success) { 6831cb0ef41Sopenharmony_ci ActionNode* result = 6841cb0ef41Sopenharmony_ci on_success->zone()->New<ActionNode>(BEGIN_NEGATIVE_SUBMATCH, on_success); 6851cb0ef41Sopenharmony_ci result->data_.u_submatch.stack_pointer_register = stack_reg; 6861cb0ef41Sopenharmony_ci result->data_.u_submatch.current_position_register = position_reg; 6871cb0ef41Sopenharmony_ci return result; 6881cb0ef41Sopenharmony_ci} 6891cb0ef41Sopenharmony_ci 6901cb0ef41Sopenharmony_ciActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg, int position_reg, 6911cb0ef41Sopenharmony_ci int clear_register_count, 6921cb0ef41Sopenharmony_ci int clear_register_from, 6931cb0ef41Sopenharmony_ci RegExpNode* on_success) { 6941cb0ef41Sopenharmony_ci ActionNode* result = on_success->zone()->New<ActionNode>( 6951cb0ef41Sopenharmony_ci POSITIVE_SUBMATCH_SUCCESS, on_success); 6961cb0ef41Sopenharmony_ci result->data_.u_submatch.stack_pointer_register = stack_reg; 6971cb0ef41Sopenharmony_ci result->data_.u_submatch.current_position_register = position_reg; 6981cb0ef41Sopenharmony_ci result->data_.u_submatch.clear_register_count = clear_register_count; 6991cb0ef41Sopenharmony_ci result->data_.u_submatch.clear_register_from = clear_register_from; 7001cb0ef41Sopenharmony_ci return result; 7011cb0ef41Sopenharmony_ci} 7021cb0ef41Sopenharmony_ci 7031cb0ef41Sopenharmony_ciActionNode* ActionNode::EmptyMatchCheck(int start_register, 7041cb0ef41Sopenharmony_ci int repetition_register, 7051cb0ef41Sopenharmony_ci int repetition_limit, 7061cb0ef41Sopenharmony_ci RegExpNode* on_success) { 7071cb0ef41Sopenharmony_ci ActionNode* result = 7081cb0ef41Sopenharmony_ci on_success->zone()->New<ActionNode>(EMPTY_MATCH_CHECK, on_success); 7091cb0ef41Sopenharmony_ci result->data_.u_empty_match_check.start_register = start_register; 7101cb0ef41Sopenharmony_ci result->data_.u_empty_match_check.repetition_register = repetition_register; 7111cb0ef41Sopenharmony_ci result->data_.u_empty_match_check.repetition_limit = repetition_limit; 7121cb0ef41Sopenharmony_ci return result; 7131cb0ef41Sopenharmony_ci} 7141cb0ef41Sopenharmony_ci 7151cb0ef41Sopenharmony_ci#define DEFINE_ACCEPT(Type) \ 7161cb0ef41Sopenharmony_ci void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); } 7171cb0ef41Sopenharmony_ciFOR_EACH_NODE_TYPE(DEFINE_ACCEPT) 7181cb0ef41Sopenharmony_ci#undef DEFINE_ACCEPT 7191cb0ef41Sopenharmony_ci 7201cb0ef41Sopenharmony_ci// ------------------------------------------------------------------- 7211cb0ef41Sopenharmony_ci// Emit code. 7221cb0ef41Sopenharmony_ci 7231cb0ef41Sopenharmony_civoid ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, 7241cb0ef41Sopenharmony_ci Guard* guard, Trace* trace) { 7251cb0ef41Sopenharmony_ci switch (guard->op()) { 7261cb0ef41Sopenharmony_ci case Guard::LT: 7271cb0ef41Sopenharmony_ci DCHECK(!trace->mentions_reg(guard->reg())); 7281cb0ef41Sopenharmony_ci macro_assembler->IfRegisterGE(guard->reg(), guard->value(), 7291cb0ef41Sopenharmony_ci trace->backtrack()); 7301cb0ef41Sopenharmony_ci break; 7311cb0ef41Sopenharmony_ci case Guard::GEQ: 7321cb0ef41Sopenharmony_ci DCHECK(!trace->mentions_reg(guard->reg())); 7331cb0ef41Sopenharmony_ci macro_assembler->IfRegisterLT(guard->reg(), guard->value(), 7341cb0ef41Sopenharmony_ci trace->backtrack()); 7351cb0ef41Sopenharmony_ci break; 7361cb0ef41Sopenharmony_ci } 7371cb0ef41Sopenharmony_ci} 7381cb0ef41Sopenharmony_ci 7391cb0ef41Sopenharmony_cinamespace { 7401cb0ef41Sopenharmony_ci 7411cb0ef41Sopenharmony_ci#ifdef DEBUG 7421cb0ef41Sopenharmony_cibool ContainsOnlyUtf16CodeUnits(unibrow::uchar* chars, int length) { 7431cb0ef41Sopenharmony_ci STATIC_ASSERT(sizeof(unibrow::uchar) == 4); 7441cb0ef41Sopenharmony_ci for (int i = 0; i < length; i++) { 7451cb0ef41Sopenharmony_ci if (chars[i] > String::kMaxUtf16CodeUnit) return false; 7461cb0ef41Sopenharmony_ci } 7471cb0ef41Sopenharmony_ci return true; 7481cb0ef41Sopenharmony_ci} 7491cb0ef41Sopenharmony_ci#endif // DEBUG 7501cb0ef41Sopenharmony_ci 7511cb0ef41Sopenharmony_ci// Returns the number of characters in the equivalence class, omitting those 7521cb0ef41Sopenharmony_ci// that cannot occur in the source string because it is Latin1. 7531cb0ef41Sopenharmony_ciint GetCaseIndependentLetters(Isolate* isolate, base::uc16 character, 7541cb0ef41Sopenharmony_ci bool one_byte_subject, unibrow::uchar* letters, 7551cb0ef41Sopenharmony_ci int letter_length) { 7561cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT 7571cb0ef41Sopenharmony_ci if (RegExpCaseFolding::IgnoreSet().contains(character)) { 7581cb0ef41Sopenharmony_ci letters[0] = character; 7591cb0ef41Sopenharmony_ci DCHECK(ContainsOnlyUtf16CodeUnits(letters, 1)); 7601cb0ef41Sopenharmony_ci return 1; 7611cb0ef41Sopenharmony_ci } 7621cb0ef41Sopenharmony_ci bool in_special_add_set = 7631cb0ef41Sopenharmony_ci RegExpCaseFolding::SpecialAddSet().contains(character); 7641cb0ef41Sopenharmony_ci 7651cb0ef41Sopenharmony_ci icu::UnicodeSet set; 7661cb0ef41Sopenharmony_ci set.add(character); 7671cb0ef41Sopenharmony_ci set = set.closeOver(USET_CASE_INSENSITIVE); 7681cb0ef41Sopenharmony_ci 7691cb0ef41Sopenharmony_ci UChar32 canon = 0; 7701cb0ef41Sopenharmony_ci if (in_special_add_set) { 7711cb0ef41Sopenharmony_ci canon = RegExpCaseFolding::Canonicalize(character); 7721cb0ef41Sopenharmony_ci } 7731cb0ef41Sopenharmony_ci 7741cb0ef41Sopenharmony_ci int32_t range_count = set.getRangeCount(); 7751cb0ef41Sopenharmony_ci int items = 0; 7761cb0ef41Sopenharmony_ci for (int32_t i = 0; i < range_count; i++) { 7771cb0ef41Sopenharmony_ci UChar32 start = set.getRangeStart(i); 7781cb0ef41Sopenharmony_ci UChar32 end = set.getRangeEnd(i); 7791cb0ef41Sopenharmony_ci CHECK(end - start + items <= letter_length); 7801cb0ef41Sopenharmony_ci for (UChar32 cu = start; cu <= end; cu++) { 7811cb0ef41Sopenharmony_ci if (one_byte_subject && cu > String::kMaxOneByteCharCode) break; 7821cb0ef41Sopenharmony_ci if (in_special_add_set && RegExpCaseFolding::Canonicalize(cu) != canon) { 7831cb0ef41Sopenharmony_ci continue; 7841cb0ef41Sopenharmony_ci } 7851cb0ef41Sopenharmony_ci letters[items++] = static_cast<unibrow::uchar>(cu); 7861cb0ef41Sopenharmony_ci } 7871cb0ef41Sopenharmony_ci } 7881cb0ef41Sopenharmony_ci DCHECK(ContainsOnlyUtf16CodeUnits(letters, items)); 7891cb0ef41Sopenharmony_ci return items; 7901cb0ef41Sopenharmony_ci#else 7911cb0ef41Sopenharmony_ci int length = 7921cb0ef41Sopenharmony_ci isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); 7931cb0ef41Sopenharmony_ci // Unibrow returns 0 or 1 for characters where case independence is 7941cb0ef41Sopenharmony_ci // trivial. 7951cb0ef41Sopenharmony_ci if (length == 0) { 7961cb0ef41Sopenharmony_ci letters[0] = character; 7971cb0ef41Sopenharmony_ci length = 1; 7981cb0ef41Sopenharmony_ci } 7991cb0ef41Sopenharmony_ci 8001cb0ef41Sopenharmony_ci if (one_byte_subject) { 8011cb0ef41Sopenharmony_ci int new_length = 0; 8021cb0ef41Sopenharmony_ci for (int i = 0; i < length; i++) { 8031cb0ef41Sopenharmony_ci if (letters[i] <= String::kMaxOneByteCharCode) { 8041cb0ef41Sopenharmony_ci letters[new_length++] = letters[i]; 8051cb0ef41Sopenharmony_ci } 8061cb0ef41Sopenharmony_ci } 8071cb0ef41Sopenharmony_ci length = new_length; 8081cb0ef41Sopenharmony_ci } 8091cb0ef41Sopenharmony_ci 8101cb0ef41Sopenharmony_ci DCHECK(ContainsOnlyUtf16CodeUnits(letters, length)); 8111cb0ef41Sopenharmony_ci return length; 8121cb0ef41Sopenharmony_ci#endif // V8_INTL_SUPPORT 8131cb0ef41Sopenharmony_ci} 8141cb0ef41Sopenharmony_ci 8151cb0ef41Sopenharmony_ciinline bool EmitSimpleCharacter(Isolate* isolate, RegExpCompiler* compiler, 8161cb0ef41Sopenharmony_ci base::uc16 c, Label* on_failure, int cp_offset, 8171cb0ef41Sopenharmony_ci bool check, bool preloaded) { 8181cb0ef41Sopenharmony_ci RegExpMacroAssembler* assembler = compiler->macro_assembler(); 8191cb0ef41Sopenharmony_ci bool bound_checked = false; 8201cb0ef41Sopenharmony_ci if (!preloaded) { 8211cb0ef41Sopenharmony_ci assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 8221cb0ef41Sopenharmony_ci bound_checked = true; 8231cb0ef41Sopenharmony_ci } 8241cb0ef41Sopenharmony_ci assembler->CheckNotCharacter(c, on_failure); 8251cb0ef41Sopenharmony_ci return bound_checked; 8261cb0ef41Sopenharmony_ci} 8271cb0ef41Sopenharmony_ci 8281cb0ef41Sopenharmony_ci// Only emits non-letters (things that don't have case). Only used for case 8291cb0ef41Sopenharmony_ci// independent matches. 8301cb0ef41Sopenharmony_ciinline bool EmitAtomNonLetter(Isolate* isolate, RegExpCompiler* compiler, 8311cb0ef41Sopenharmony_ci base::uc16 c, Label* on_failure, int cp_offset, 8321cb0ef41Sopenharmony_ci bool check, bool preloaded) { 8331cb0ef41Sopenharmony_ci RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 8341cb0ef41Sopenharmony_ci bool one_byte = compiler->one_byte(); 8351cb0ef41Sopenharmony_ci unibrow::uchar chars[4]; 8361cb0ef41Sopenharmony_ci int length = GetCaseIndependentLetters(isolate, c, one_byte, chars, 4); 8371cb0ef41Sopenharmony_ci if (length < 1) { 8381cb0ef41Sopenharmony_ci // This can't match. Must be an one-byte subject and a non-one-byte 8391cb0ef41Sopenharmony_ci // character. We do not need to do anything since the one-byte pass 8401cb0ef41Sopenharmony_ci // already handled this. 8411cb0ef41Sopenharmony_ci return false; // Bounds not checked. 8421cb0ef41Sopenharmony_ci } 8431cb0ef41Sopenharmony_ci bool checked = false; 8441cb0ef41Sopenharmony_ci // We handle the length > 1 case in a later pass. 8451cb0ef41Sopenharmony_ci if (length == 1) { 8461cb0ef41Sopenharmony_ci if (one_byte && c > String::kMaxOneByteCharCodeU) { 8471cb0ef41Sopenharmony_ci // Can't match - see above. 8481cb0ef41Sopenharmony_ci return false; // Bounds not checked. 8491cb0ef41Sopenharmony_ci } 8501cb0ef41Sopenharmony_ci if (!preloaded) { 8511cb0ef41Sopenharmony_ci macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 8521cb0ef41Sopenharmony_ci checked = check; 8531cb0ef41Sopenharmony_ci } 8541cb0ef41Sopenharmony_ci macro_assembler->CheckNotCharacter(c, on_failure); 8551cb0ef41Sopenharmony_ci } 8561cb0ef41Sopenharmony_ci return checked; 8571cb0ef41Sopenharmony_ci} 8581cb0ef41Sopenharmony_ci 8591cb0ef41Sopenharmony_cibool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, 8601cb0ef41Sopenharmony_ci bool one_byte, base::uc16 c1, base::uc16 c2, 8611cb0ef41Sopenharmony_ci Label* on_failure) { 8621cb0ef41Sopenharmony_ci const uint32_t char_mask = CharMask(one_byte); 8631cb0ef41Sopenharmony_ci base::uc16 exor = c1 ^ c2; 8641cb0ef41Sopenharmony_ci // Check whether exor has only one bit set. 8651cb0ef41Sopenharmony_ci if (((exor - 1) & exor) == 0) { 8661cb0ef41Sopenharmony_ci // If c1 and c2 differ only by one bit. 8671cb0ef41Sopenharmony_ci // Ecma262UnCanonicalize always gives the highest number last. 8681cb0ef41Sopenharmony_ci DCHECK(c2 > c1); 8691cb0ef41Sopenharmony_ci base::uc16 mask = char_mask ^ exor; 8701cb0ef41Sopenharmony_ci macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure); 8711cb0ef41Sopenharmony_ci return true; 8721cb0ef41Sopenharmony_ci } 8731cb0ef41Sopenharmony_ci DCHECK(c2 > c1); 8741cb0ef41Sopenharmony_ci base::uc16 diff = c2 - c1; 8751cb0ef41Sopenharmony_ci if (((diff - 1) & diff) == 0 && c1 >= diff) { 8761cb0ef41Sopenharmony_ci // If the characters differ by 2^n but don't differ by one bit then 8771cb0ef41Sopenharmony_ci // subtract the difference from the found character, then do the or 8781cb0ef41Sopenharmony_ci // trick. We avoid the theoretical case where negative numbers are 8791cb0ef41Sopenharmony_ci // involved in order to simplify code generation. 8801cb0ef41Sopenharmony_ci base::uc16 mask = char_mask ^ diff; 8811cb0ef41Sopenharmony_ci macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff, mask, 8821cb0ef41Sopenharmony_ci on_failure); 8831cb0ef41Sopenharmony_ci return true; 8841cb0ef41Sopenharmony_ci } 8851cb0ef41Sopenharmony_ci return false; 8861cb0ef41Sopenharmony_ci} 8871cb0ef41Sopenharmony_ci 8881cb0ef41Sopenharmony_ci// Only emits letters (things that have case). Only used for case independent 8891cb0ef41Sopenharmony_ci// matches. 8901cb0ef41Sopenharmony_ciinline bool EmitAtomLetter(Isolate* isolate, RegExpCompiler* compiler, 8911cb0ef41Sopenharmony_ci base::uc16 c, Label* on_failure, int cp_offset, 8921cb0ef41Sopenharmony_ci bool check, bool preloaded) { 8931cb0ef41Sopenharmony_ci RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 8941cb0ef41Sopenharmony_ci bool one_byte = compiler->one_byte(); 8951cb0ef41Sopenharmony_ci unibrow::uchar chars[4]; 8961cb0ef41Sopenharmony_ci int length = GetCaseIndependentLetters(isolate, c, one_byte, chars, 4); 8971cb0ef41Sopenharmony_ci if (length <= 1) return false; 8981cb0ef41Sopenharmony_ci // We may not need to check against the end of the input string 8991cb0ef41Sopenharmony_ci // if this character lies before a character that matched. 9001cb0ef41Sopenharmony_ci if (!preloaded) { 9011cb0ef41Sopenharmony_ci macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 9021cb0ef41Sopenharmony_ci } 9031cb0ef41Sopenharmony_ci Label ok; 9041cb0ef41Sopenharmony_ci switch (length) { 9051cb0ef41Sopenharmony_ci case 2: { 9061cb0ef41Sopenharmony_ci if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0], 9071cb0ef41Sopenharmony_ci chars[1], on_failure)) { 9081cb0ef41Sopenharmony_ci } else { 9091cb0ef41Sopenharmony_ci macro_assembler->CheckCharacter(chars[0], &ok); 9101cb0ef41Sopenharmony_ci macro_assembler->CheckNotCharacter(chars[1], on_failure); 9111cb0ef41Sopenharmony_ci macro_assembler->Bind(&ok); 9121cb0ef41Sopenharmony_ci } 9131cb0ef41Sopenharmony_ci break; 9141cb0ef41Sopenharmony_ci } 9151cb0ef41Sopenharmony_ci case 4: 9161cb0ef41Sopenharmony_ci macro_assembler->CheckCharacter(chars[3], &ok); 9171cb0ef41Sopenharmony_ci V8_FALLTHROUGH; 9181cb0ef41Sopenharmony_ci case 3: 9191cb0ef41Sopenharmony_ci macro_assembler->CheckCharacter(chars[0], &ok); 9201cb0ef41Sopenharmony_ci macro_assembler->CheckCharacter(chars[1], &ok); 9211cb0ef41Sopenharmony_ci macro_assembler->CheckNotCharacter(chars[2], on_failure); 9221cb0ef41Sopenharmony_ci macro_assembler->Bind(&ok); 9231cb0ef41Sopenharmony_ci break; 9241cb0ef41Sopenharmony_ci default: 9251cb0ef41Sopenharmony_ci UNREACHABLE(); 9261cb0ef41Sopenharmony_ci } 9271cb0ef41Sopenharmony_ci return true; 9281cb0ef41Sopenharmony_ci} 9291cb0ef41Sopenharmony_ci 9301cb0ef41Sopenharmony_civoid EmitBoundaryTest(RegExpMacroAssembler* masm, int border, 9311cb0ef41Sopenharmony_ci Label* fall_through, Label* above_or_equal, 9321cb0ef41Sopenharmony_ci Label* below) { 9331cb0ef41Sopenharmony_ci if (below != fall_through) { 9341cb0ef41Sopenharmony_ci masm->CheckCharacterLT(border, below); 9351cb0ef41Sopenharmony_ci if (above_or_equal != fall_through) masm->GoTo(above_or_equal); 9361cb0ef41Sopenharmony_ci } else { 9371cb0ef41Sopenharmony_ci masm->CheckCharacterGT(border - 1, above_or_equal); 9381cb0ef41Sopenharmony_ci } 9391cb0ef41Sopenharmony_ci} 9401cb0ef41Sopenharmony_ci 9411cb0ef41Sopenharmony_civoid EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, int first, int last, 9421cb0ef41Sopenharmony_ci Label* fall_through, Label* in_range, 9431cb0ef41Sopenharmony_ci Label* out_of_range) { 9441cb0ef41Sopenharmony_ci if (in_range == fall_through) { 9451cb0ef41Sopenharmony_ci if (first == last) { 9461cb0ef41Sopenharmony_ci masm->CheckNotCharacter(first, out_of_range); 9471cb0ef41Sopenharmony_ci } else { 9481cb0ef41Sopenharmony_ci masm->CheckCharacterNotInRange(first, last, out_of_range); 9491cb0ef41Sopenharmony_ci } 9501cb0ef41Sopenharmony_ci } else { 9511cb0ef41Sopenharmony_ci if (first == last) { 9521cb0ef41Sopenharmony_ci masm->CheckCharacter(first, in_range); 9531cb0ef41Sopenharmony_ci } else { 9541cb0ef41Sopenharmony_ci masm->CheckCharacterInRange(first, last, in_range); 9551cb0ef41Sopenharmony_ci } 9561cb0ef41Sopenharmony_ci if (out_of_range != fall_through) masm->GoTo(out_of_range); 9571cb0ef41Sopenharmony_ci } 9581cb0ef41Sopenharmony_ci} 9591cb0ef41Sopenharmony_ci 9601cb0ef41Sopenharmony_ci// even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. 9611cb0ef41Sopenharmony_ci// odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. 9621cb0ef41Sopenharmony_civoid EmitUseLookupTable(RegExpMacroAssembler* masm, 9631cb0ef41Sopenharmony_ci ZoneList<base::uc32>* ranges, uint32_t start_index, 9641cb0ef41Sopenharmony_ci uint32_t end_index, base::uc32 min_char, 9651cb0ef41Sopenharmony_ci Label* fall_through, Label* even_label, 9661cb0ef41Sopenharmony_ci Label* odd_label) { 9671cb0ef41Sopenharmony_ci static const uint32_t kSize = RegExpMacroAssembler::kTableSize; 9681cb0ef41Sopenharmony_ci static const uint32_t kMask = RegExpMacroAssembler::kTableMask; 9691cb0ef41Sopenharmony_ci 9701cb0ef41Sopenharmony_ci base::uc32 base = (min_char & ~kMask); 9711cb0ef41Sopenharmony_ci USE(base); 9721cb0ef41Sopenharmony_ci 9731cb0ef41Sopenharmony_ci // Assert that everything is on one kTableSize page. 9741cb0ef41Sopenharmony_ci for (uint32_t i = start_index; i <= end_index; i++) { 9751cb0ef41Sopenharmony_ci DCHECK_EQ(ranges->at(i) & ~kMask, base); 9761cb0ef41Sopenharmony_ci } 9771cb0ef41Sopenharmony_ci DCHECK(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base); 9781cb0ef41Sopenharmony_ci 9791cb0ef41Sopenharmony_ci char templ[kSize]; 9801cb0ef41Sopenharmony_ci Label* on_bit_set; 9811cb0ef41Sopenharmony_ci Label* on_bit_clear; 9821cb0ef41Sopenharmony_ci int bit; 9831cb0ef41Sopenharmony_ci if (even_label == fall_through) { 9841cb0ef41Sopenharmony_ci on_bit_set = odd_label; 9851cb0ef41Sopenharmony_ci on_bit_clear = even_label; 9861cb0ef41Sopenharmony_ci bit = 1; 9871cb0ef41Sopenharmony_ci } else { 9881cb0ef41Sopenharmony_ci on_bit_set = even_label; 9891cb0ef41Sopenharmony_ci on_bit_clear = odd_label; 9901cb0ef41Sopenharmony_ci bit = 0; 9911cb0ef41Sopenharmony_ci } 9921cb0ef41Sopenharmony_ci for (uint32_t i = 0; i < (ranges->at(start_index) & kMask) && i < kSize; 9931cb0ef41Sopenharmony_ci i++) { 9941cb0ef41Sopenharmony_ci templ[i] = bit; 9951cb0ef41Sopenharmony_ci } 9961cb0ef41Sopenharmony_ci uint32_t j = 0; 9971cb0ef41Sopenharmony_ci bit ^= 1; 9981cb0ef41Sopenharmony_ci for (uint32_t i = start_index; i < end_index; i++) { 9991cb0ef41Sopenharmony_ci for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) { 10001cb0ef41Sopenharmony_ci templ[j] = bit; 10011cb0ef41Sopenharmony_ci } 10021cb0ef41Sopenharmony_ci bit ^= 1; 10031cb0ef41Sopenharmony_ci } 10041cb0ef41Sopenharmony_ci for (uint32_t i = j; i < kSize; i++) { 10051cb0ef41Sopenharmony_ci templ[i] = bit; 10061cb0ef41Sopenharmony_ci } 10071cb0ef41Sopenharmony_ci Factory* factory = masm->isolate()->factory(); 10081cb0ef41Sopenharmony_ci // TODO(erikcorry): Cache these. 10091cb0ef41Sopenharmony_ci Handle<ByteArray> ba = factory->NewByteArray(kSize, AllocationType::kOld); 10101cb0ef41Sopenharmony_ci for (uint32_t i = 0; i < kSize; i++) { 10111cb0ef41Sopenharmony_ci ba->set(i, templ[i]); 10121cb0ef41Sopenharmony_ci } 10131cb0ef41Sopenharmony_ci masm->CheckBitInTable(ba, on_bit_set); 10141cb0ef41Sopenharmony_ci if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); 10151cb0ef41Sopenharmony_ci} 10161cb0ef41Sopenharmony_ci 10171cb0ef41Sopenharmony_civoid CutOutRange(RegExpMacroAssembler* masm, ZoneList<base::uc32>* ranges, 10181cb0ef41Sopenharmony_ci uint32_t start_index, uint32_t end_index, uint32_t cut_index, 10191cb0ef41Sopenharmony_ci Label* even_label, Label* odd_label) { 10201cb0ef41Sopenharmony_ci bool odd = (((cut_index - start_index) & 1) == 1); 10211cb0ef41Sopenharmony_ci Label* in_range_label = odd ? odd_label : even_label; 10221cb0ef41Sopenharmony_ci Label dummy; 10231cb0ef41Sopenharmony_ci EmitDoubleBoundaryTest(masm, ranges->at(cut_index), 10241cb0ef41Sopenharmony_ci ranges->at(cut_index + 1) - 1, &dummy, in_range_label, 10251cb0ef41Sopenharmony_ci &dummy); 10261cb0ef41Sopenharmony_ci DCHECK(!dummy.is_linked()); 10271cb0ef41Sopenharmony_ci // Cut out the single range by rewriting the array. This creates a new 10281cb0ef41Sopenharmony_ci // range that is a merger of the two ranges on either side of the one we 10291cb0ef41Sopenharmony_ci // are cutting out. The oddity of the labels is preserved. 10301cb0ef41Sopenharmony_ci for (uint32_t j = cut_index; j > start_index; j--) { 10311cb0ef41Sopenharmony_ci ranges->at(j) = ranges->at(j - 1); 10321cb0ef41Sopenharmony_ci } 10331cb0ef41Sopenharmony_ci for (uint32_t j = cut_index + 1; j < end_index; j++) { 10341cb0ef41Sopenharmony_ci ranges->at(j) = ranges->at(j + 1); 10351cb0ef41Sopenharmony_ci } 10361cb0ef41Sopenharmony_ci} 10371cb0ef41Sopenharmony_ci 10381cb0ef41Sopenharmony_ci// Unicode case. Split the search space into kSize spaces that are handled 10391cb0ef41Sopenharmony_ci// with recursion. 10401cb0ef41Sopenharmony_civoid SplitSearchSpace(ZoneList<base::uc32>* ranges, uint32_t start_index, 10411cb0ef41Sopenharmony_ci uint32_t end_index, uint32_t* new_start_index, 10421cb0ef41Sopenharmony_ci uint32_t* new_end_index, base::uc32* border) { 10431cb0ef41Sopenharmony_ci static const uint32_t kSize = RegExpMacroAssembler::kTableSize; 10441cb0ef41Sopenharmony_ci static const uint32_t kMask = RegExpMacroAssembler::kTableMask; 10451cb0ef41Sopenharmony_ci 10461cb0ef41Sopenharmony_ci base::uc32 first = ranges->at(start_index); 10471cb0ef41Sopenharmony_ci base::uc32 last = ranges->at(end_index) - 1; 10481cb0ef41Sopenharmony_ci 10491cb0ef41Sopenharmony_ci *new_start_index = start_index; 10501cb0ef41Sopenharmony_ci *border = (ranges->at(start_index) & ~kMask) + kSize; 10511cb0ef41Sopenharmony_ci while (*new_start_index < end_index) { 10521cb0ef41Sopenharmony_ci if (ranges->at(*new_start_index) > *border) break; 10531cb0ef41Sopenharmony_ci (*new_start_index)++; 10541cb0ef41Sopenharmony_ci } 10551cb0ef41Sopenharmony_ci // new_start_index is the index of the first edge that is beyond the 10561cb0ef41Sopenharmony_ci // current kSize space. 10571cb0ef41Sopenharmony_ci 10581cb0ef41Sopenharmony_ci // For very large search spaces we do a binary chop search of the non-Latin1 10591cb0ef41Sopenharmony_ci // space instead of just going to the end of the current kSize space. The 10601cb0ef41Sopenharmony_ci // heuristics are complicated a little by the fact that any 128-character 10611cb0ef41Sopenharmony_ci // encoding space can be quickly tested with a table lookup, so we don't 10621cb0ef41Sopenharmony_ci // wish to do binary chop search at a smaller granularity than that. A 10631cb0ef41Sopenharmony_ci // 128-character space can take up a lot of space in the ranges array if, 10641cb0ef41Sopenharmony_ci // for example, we only want to match every second character (eg. the lower 10651cb0ef41Sopenharmony_ci // case characters on some Unicode pages). 10661cb0ef41Sopenharmony_ci uint32_t binary_chop_index = (end_index + start_index) / 2; 10671cb0ef41Sopenharmony_ci // The first test ensures that we get to the code that handles the Latin1 10681cb0ef41Sopenharmony_ci // range with a single not-taken branch, speeding up this important 10691cb0ef41Sopenharmony_ci // character range (even non-Latin1 charset-based text has spaces and 10701cb0ef41Sopenharmony_ci // punctuation). 10711cb0ef41Sopenharmony_ci if (*border - 1 > String::kMaxOneByteCharCode && // Latin1 case. 10721cb0ef41Sopenharmony_ci end_index - start_index > (*new_start_index - start_index) * 2 && 10731cb0ef41Sopenharmony_ci last - first > kSize * 2 && binary_chop_index > *new_start_index && 10741cb0ef41Sopenharmony_ci ranges->at(binary_chop_index) >= first + 2 * kSize) { 10751cb0ef41Sopenharmony_ci uint32_t scan_forward_for_section_border = binary_chop_index; 10761cb0ef41Sopenharmony_ci uint32_t new_border = (ranges->at(binary_chop_index) | kMask) + 1; 10771cb0ef41Sopenharmony_ci 10781cb0ef41Sopenharmony_ci while (scan_forward_for_section_border < end_index) { 10791cb0ef41Sopenharmony_ci if (ranges->at(scan_forward_for_section_border) > new_border) { 10801cb0ef41Sopenharmony_ci *new_start_index = scan_forward_for_section_border; 10811cb0ef41Sopenharmony_ci *border = new_border; 10821cb0ef41Sopenharmony_ci break; 10831cb0ef41Sopenharmony_ci } 10841cb0ef41Sopenharmony_ci scan_forward_for_section_border++; 10851cb0ef41Sopenharmony_ci } 10861cb0ef41Sopenharmony_ci } 10871cb0ef41Sopenharmony_ci 10881cb0ef41Sopenharmony_ci DCHECK(*new_start_index > start_index); 10891cb0ef41Sopenharmony_ci *new_end_index = *new_start_index - 1; 10901cb0ef41Sopenharmony_ci if (ranges->at(*new_end_index) == *border) { 10911cb0ef41Sopenharmony_ci (*new_end_index)--; 10921cb0ef41Sopenharmony_ci } 10931cb0ef41Sopenharmony_ci if (*border >= ranges->at(end_index)) { 10941cb0ef41Sopenharmony_ci *border = ranges->at(end_index); 10951cb0ef41Sopenharmony_ci *new_start_index = end_index; // Won't be used. 10961cb0ef41Sopenharmony_ci *new_end_index = end_index - 1; 10971cb0ef41Sopenharmony_ci } 10981cb0ef41Sopenharmony_ci} 10991cb0ef41Sopenharmony_ci 11001cb0ef41Sopenharmony_ci// Gets a series of segment boundaries representing a character class. If the 11011cb0ef41Sopenharmony_ci// character is in the range between an even and an odd boundary (counting from 11021cb0ef41Sopenharmony_ci// start_index) then go to even_label, otherwise go to odd_label. We already 11031cb0ef41Sopenharmony_ci// know that the character is in the range of min_char to max_char inclusive. 11041cb0ef41Sopenharmony_ci// Either label can be nullptr indicating backtracking. Either label can also 11051cb0ef41Sopenharmony_ci// be equal to the fall_through label. 11061cb0ef41Sopenharmony_civoid GenerateBranches(RegExpMacroAssembler* masm, ZoneList<base::uc32>* ranges, 11071cb0ef41Sopenharmony_ci uint32_t start_index, uint32_t end_index, 11081cb0ef41Sopenharmony_ci base::uc32 min_char, base::uc32 max_char, 11091cb0ef41Sopenharmony_ci Label* fall_through, Label* even_label, 11101cb0ef41Sopenharmony_ci Label* odd_label) { 11111cb0ef41Sopenharmony_ci DCHECK_LE(min_char, String::kMaxUtf16CodeUnit); 11121cb0ef41Sopenharmony_ci DCHECK_LE(max_char, String::kMaxUtf16CodeUnit); 11131cb0ef41Sopenharmony_ci 11141cb0ef41Sopenharmony_ci base::uc32 first = ranges->at(start_index); 11151cb0ef41Sopenharmony_ci base::uc32 last = ranges->at(end_index) - 1; 11161cb0ef41Sopenharmony_ci 11171cb0ef41Sopenharmony_ci DCHECK_LT(min_char, first); 11181cb0ef41Sopenharmony_ci 11191cb0ef41Sopenharmony_ci // Just need to test if the character is before or on-or-after 11201cb0ef41Sopenharmony_ci // a particular character. 11211cb0ef41Sopenharmony_ci if (start_index == end_index) { 11221cb0ef41Sopenharmony_ci EmitBoundaryTest(masm, first, fall_through, even_label, odd_label); 11231cb0ef41Sopenharmony_ci return; 11241cb0ef41Sopenharmony_ci } 11251cb0ef41Sopenharmony_ci 11261cb0ef41Sopenharmony_ci // Another almost trivial case: There is one interval in the middle that is 11271cb0ef41Sopenharmony_ci // different from the end intervals. 11281cb0ef41Sopenharmony_ci if (start_index + 1 == end_index) { 11291cb0ef41Sopenharmony_ci EmitDoubleBoundaryTest(masm, first, last, fall_through, even_label, 11301cb0ef41Sopenharmony_ci odd_label); 11311cb0ef41Sopenharmony_ci return; 11321cb0ef41Sopenharmony_ci } 11331cb0ef41Sopenharmony_ci 11341cb0ef41Sopenharmony_ci // It's not worth using table lookup if there are very few intervals in the 11351cb0ef41Sopenharmony_ci // character class. 11361cb0ef41Sopenharmony_ci if (end_index - start_index <= 6) { 11371cb0ef41Sopenharmony_ci // It is faster to test for individual characters, so we look for those 11381cb0ef41Sopenharmony_ci // first, then try arbitrary ranges in the second round. 11391cb0ef41Sopenharmony_ci static uint32_t kNoCutIndex = -1; 11401cb0ef41Sopenharmony_ci uint32_t cut = kNoCutIndex; 11411cb0ef41Sopenharmony_ci for (uint32_t i = start_index; i < end_index; i++) { 11421cb0ef41Sopenharmony_ci if (ranges->at(i) == ranges->at(i + 1) - 1) { 11431cb0ef41Sopenharmony_ci cut = i; 11441cb0ef41Sopenharmony_ci break; 11451cb0ef41Sopenharmony_ci } 11461cb0ef41Sopenharmony_ci } 11471cb0ef41Sopenharmony_ci if (cut == kNoCutIndex) cut = start_index; 11481cb0ef41Sopenharmony_ci CutOutRange(masm, ranges, start_index, end_index, cut, even_label, 11491cb0ef41Sopenharmony_ci odd_label); 11501cb0ef41Sopenharmony_ci DCHECK_GE(end_index - start_index, 2); 11511cb0ef41Sopenharmony_ci GenerateBranches(masm, ranges, start_index + 1, end_index - 1, min_char, 11521cb0ef41Sopenharmony_ci max_char, fall_through, even_label, odd_label); 11531cb0ef41Sopenharmony_ci return; 11541cb0ef41Sopenharmony_ci } 11551cb0ef41Sopenharmony_ci 11561cb0ef41Sopenharmony_ci // If there are a lot of intervals in the regexp, then we will use tables to 11571cb0ef41Sopenharmony_ci // determine whether the character is inside or outside the character class. 11581cb0ef41Sopenharmony_ci static const int kBits = RegExpMacroAssembler::kTableSizeBits; 11591cb0ef41Sopenharmony_ci 11601cb0ef41Sopenharmony_ci if ((max_char >> kBits) == (min_char >> kBits)) { 11611cb0ef41Sopenharmony_ci EmitUseLookupTable(masm, ranges, start_index, end_index, min_char, 11621cb0ef41Sopenharmony_ci fall_through, even_label, odd_label); 11631cb0ef41Sopenharmony_ci return; 11641cb0ef41Sopenharmony_ci } 11651cb0ef41Sopenharmony_ci 11661cb0ef41Sopenharmony_ci if ((min_char >> kBits) != first >> kBits) { 11671cb0ef41Sopenharmony_ci masm->CheckCharacterLT(first, odd_label); 11681cb0ef41Sopenharmony_ci GenerateBranches(masm, ranges, start_index + 1, end_index, first, max_char, 11691cb0ef41Sopenharmony_ci fall_through, odd_label, even_label); 11701cb0ef41Sopenharmony_ci return; 11711cb0ef41Sopenharmony_ci } 11721cb0ef41Sopenharmony_ci 11731cb0ef41Sopenharmony_ci uint32_t new_start_index = 0; 11741cb0ef41Sopenharmony_ci uint32_t new_end_index = 0; 11751cb0ef41Sopenharmony_ci base::uc32 border = 0; 11761cb0ef41Sopenharmony_ci 11771cb0ef41Sopenharmony_ci SplitSearchSpace(ranges, start_index, end_index, &new_start_index, 11781cb0ef41Sopenharmony_ci &new_end_index, &border); 11791cb0ef41Sopenharmony_ci 11801cb0ef41Sopenharmony_ci Label handle_rest; 11811cb0ef41Sopenharmony_ci Label* above = &handle_rest; 11821cb0ef41Sopenharmony_ci if (border == last + 1) { 11831cb0ef41Sopenharmony_ci // We didn't find any section that started after the limit, so everything 11841cb0ef41Sopenharmony_ci // above the border is one of the terminal labels. 11851cb0ef41Sopenharmony_ci above = (end_index & 1) != (start_index & 1) ? odd_label : even_label; 11861cb0ef41Sopenharmony_ci DCHECK(new_end_index == end_index - 1); 11871cb0ef41Sopenharmony_ci } 11881cb0ef41Sopenharmony_ci 11891cb0ef41Sopenharmony_ci DCHECK_LE(start_index, new_end_index); 11901cb0ef41Sopenharmony_ci DCHECK_LE(new_start_index, end_index); 11911cb0ef41Sopenharmony_ci DCHECK_LT(start_index, new_start_index); 11921cb0ef41Sopenharmony_ci DCHECK_LT(new_end_index, end_index); 11931cb0ef41Sopenharmony_ci DCHECK(new_end_index + 1 == new_start_index || 11941cb0ef41Sopenharmony_ci (new_end_index + 2 == new_start_index && 11951cb0ef41Sopenharmony_ci border == ranges->at(new_end_index + 1))); 11961cb0ef41Sopenharmony_ci DCHECK_LT(min_char, border - 1); 11971cb0ef41Sopenharmony_ci DCHECK_LT(border, max_char); 11981cb0ef41Sopenharmony_ci DCHECK_LT(ranges->at(new_end_index), border); 11991cb0ef41Sopenharmony_ci DCHECK(border < ranges->at(new_start_index) || 12001cb0ef41Sopenharmony_ci (border == ranges->at(new_start_index) && 12011cb0ef41Sopenharmony_ci new_start_index == end_index && new_end_index == end_index - 1 && 12021cb0ef41Sopenharmony_ci border == last + 1)); 12031cb0ef41Sopenharmony_ci DCHECK(new_start_index == 0 || border >= ranges->at(new_start_index - 1)); 12041cb0ef41Sopenharmony_ci 12051cb0ef41Sopenharmony_ci masm->CheckCharacterGT(border - 1, above); 12061cb0ef41Sopenharmony_ci Label dummy; 12071cb0ef41Sopenharmony_ci GenerateBranches(masm, ranges, start_index, new_end_index, min_char, 12081cb0ef41Sopenharmony_ci border - 1, &dummy, even_label, odd_label); 12091cb0ef41Sopenharmony_ci if (handle_rest.is_linked()) { 12101cb0ef41Sopenharmony_ci masm->Bind(&handle_rest); 12111cb0ef41Sopenharmony_ci bool flip = (new_start_index & 1) != (start_index & 1); 12121cb0ef41Sopenharmony_ci GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char, 12131cb0ef41Sopenharmony_ci &dummy, flip ? odd_label : even_label, 12141cb0ef41Sopenharmony_ci flip ? even_label : odd_label); 12151cb0ef41Sopenharmony_ci } 12161cb0ef41Sopenharmony_ci} 12171cb0ef41Sopenharmony_ci 12181cb0ef41Sopenharmony_civoid EmitCharClass(RegExpMacroAssembler* macro_assembler, 12191cb0ef41Sopenharmony_ci RegExpCharacterClass* cc, bool one_byte, Label* on_failure, 12201cb0ef41Sopenharmony_ci int cp_offset, bool check_offset, bool preloaded, 12211cb0ef41Sopenharmony_ci Zone* zone) { 12221cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* ranges = cc->ranges(zone); 12231cb0ef41Sopenharmony_ci CharacterRange::Canonicalize(ranges); 12241cb0ef41Sopenharmony_ci 12251cb0ef41Sopenharmony_ci // Now that all processing (like case-insensitivity) is done, clamp the 12261cb0ef41Sopenharmony_ci // ranges to the set of ranges that may actually occur in the subject string. 12271cb0ef41Sopenharmony_ci if (one_byte) CharacterRange::ClampToOneByte(ranges); 12281cb0ef41Sopenharmony_ci 12291cb0ef41Sopenharmony_ci const int ranges_length = ranges->length(); 12301cb0ef41Sopenharmony_ci if (ranges_length == 0) { 12311cb0ef41Sopenharmony_ci if (!cc->is_negated()) { 12321cb0ef41Sopenharmony_ci macro_assembler->GoTo(on_failure); 12331cb0ef41Sopenharmony_ci } 12341cb0ef41Sopenharmony_ci if (check_offset) { 12351cb0ef41Sopenharmony_ci macro_assembler->CheckPosition(cp_offset, on_failure); 12361cb0ef41Sopenharmony_ci } 12371cb0ef41Sopenharmony_ci return; 12381cb0ef41Sopenharmony_ci } 12391cb0ef41Sopenharmony_ci 12401cb0ef41Sopenharmony_ci const base::uc32 max_char = MaxCodeUnit(one_byte); 12411cb0ef41Sopenharmony_ci if (ranges_length == 1 && ranges->at(0).IsEverything(max_char)) { 12421cb0ef41Sopenharmony_ci if (cc->is_negated()) { 12431cb0ef41Sopenharmony_ci macro_assembler->GoTo(on_failure); 12441cb0ef41Sopenharmony_ci } else { 12451cb0ef41Sopenharmony_ci // This is a common case hit by non-anchored expressions. 12461cb0ef41Sopenharmony_ci if (check_offset) { 12471cb0ef41Sopenharmony_ci macro_assembler->CheckPosition(cp_offset, on_failure); 12481cb0ef41Sopenharmony_ci } 12491cb0ef41Sopenharmony_ci } 12501cb0ef41Sopenharmony_ci return; 12511cb0ef41Sopenharmony_ci } 12521cb0ef41Sopenharmony_ci 12531cb0ef41Sopenharmony_ci if (!preloaded) { 12541cb0ef41Sopenharmony_ci macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); 12551cb0ef41Sopenharmony_ci } 12561cb0ef41Sopenharmony_ci 12571cb0ef41Sopenharmony_ci if (cc->is_standard(zone) && macro_assembler->CheckSpecialCharacterClass( 12581cb0ef41Sopenharmony_ci cc->standard_type(), on_failure)) { 12591cb0ef41Sopenharmony_ci return; 12601cb0ef41Sopenharmony_ci } 12611cb0ef41Sopenharmony_ci 12621cb0ef41Sopenharmony_ci static constexpr int kMaxRangesForInlineBranchGeneration = 16; 12631cb0ef41Sopenharmony_ci if (ranges_length > kMaxRangesForInlineBranchGeneration) { 12641cb0ef41Sopenharmony_ci // For large range sets, emit a more compact instruction sequence to avoid 12651cb0ef41Sopenharmony_ci // a potentially problematic increase in code size. 12661cb0ef41Sopenharmony_ci // Note the flipped logic below (we check InRange if negated, NotInRange if 12671cb0ef41Sopenharmony_ci // not negated); this is necessary since the method falls through on 12681cb0ef41Sopenharmony_ci // failure whereas we want to fall through on success. 12691cb0ef41Sopenharmony_ci if (cc->is_negated()) { 12701cb0ef41Sopenharmony_ci if (macro_assembler->CheckCharacterInRangeArray(ranges, on_failure)) { 12711cb0ef41Sopenharmony_ci return; 12721cb0ef41Sopenharmony_ci } 12731cb0ef41Sopenharmony_ci } else { 12741cb0ef41Sopenharmony_ci if (macro_assembler->CheckCharacterNotInRangeArray(ranges, on_failure)) { 12751cb0ef41Sopenharmony_ci return; 12761cb0ef41Sopenharmony_ci } 12771cb0ef41Sopenharmony_ci } 12781cb0ef41Sopenharmony_ci } 12791cb0ef41Sopenharmony_ci 12801cb0ef41Sopenharmony_ci // Generate a flat list of range boundaries for consumption by 12811cb0ef41Sopenharmony_ci // GenerateBranches. See the comment on that function for how the list should 12821cb0ef41Sopenharmony_ci // be structured 12831cb0ef41Sopenharmony_ci ZoneList<base::uc32>* range_boundaries = 12841cb0ef41Sopenharmony_ci zone->New<ZoneList<base::uc32>>(ranges_length * 2, zone); 12851cb0ef41Sopenharmony_ci 12861cb0ef41Sopenharmony_ci bool zeroth_entry_is_failure = !cc->is_negated(); 12871cb0ef41Sopenharmony_ci 12881cb0ef41Sopenharmony_ci for (int i = 0; i < ranges_length; i++) { 12891cb0ef41Sopenharmony_ci CharacterRange& range = ranges->at(i); 12901cb0ef41Sopenharmony_ci if (range.from() == 0) { 12911cb0ef41Sopenharmony_ci DCHECK_EQ(i, 0); 12921cb0ef41Sopenharmony_ci zeroth_entry_is_failure = !zeroth_entry_is_failure; 12931cb0ef41Sopenharmony_ci } else { 12941cb0ef41Sopenharmony_ci range_boundaries->Add(range.from(), zone); 12951cb0ef41Sopenharmony_ci } 12961cb0ef41Sopenharmony_ci // `+ 1` to convert from inclusive to exclusive `to`. 12971cb0ef41Sopenharmony_ci // [from, to] == [from, to+1[. 12981cb0ef41Sopenharmony_ci range_boundaries->Add(range.to() + 1, zone); 12991cb0ef41Sopenharmony_ci } 13001cb0ef41Sopenharmony_ci int end_index = range_boundaries->length() - 1; 13011cb0ef41Sopenharmony_ci if (range_boundaries->at(end_index) > max_char) { 13021cb0ef41Sopenharmony_ci end_index--; 13031cb0ef41Sopenharmony_ci } 13041cb0ef41Sopenharmony_ci 13051cb0ef41Sopenharmony_ci Label fall_through; 13061cb0ef41Sopenharmony_ci GenerateBranches(macro_assembler, range_boundaries, 13071cb0ef41Sopenharmony_ci 0, // start_index. 13081cb0ef41Sopenharmony_ci end_index, 13091cb0ef41Sopenharmony_ci 0, // min_char. 13101cb0ef41Sopenharmony_ci max_char, &fall_through, 13111cb0ef41Sopenharmony_ci zeroth_entry_is_failure ? &fall_through : on_failure, 13121cb0ef41Sopenharmony_ci zeroth_entry_is_failure ? on_failure : &fall_through); 13131cb0ef41Sopenharmony_ci macro_assembler->Bind(&fall_through); 13141cb0ef41Sopenharmony_ci} 13151cb0ef41Sopenharmony_ci 13161cb0ef41Sopenharmony_ci} // namespace 13171cb0ef41Sopenharmony_ci 13181cb0ef41Sopenharmony_ciRegExpNode::~RegExpNode() = default; 13191cb0ef41Sopenharmony_ci 13201cb0ef41Sopenharmony_ciRegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, 13211cb0ef41Sopenharmony_ci Trace* trace) { 13221cb0ef41Sopenharmony_ci // If we are generating a greedy loop then don't stop and don't reuse code. 13231cb0ef41Sopenharmony_ci if (trace->stop_node() != nullptr) { 13241cb0ef41Sopenharmony_ci return CONTINUE; 13251cb0ef41Sopenharmony_ci } 13261cb0ef41Sopenharmony_ci 13271cb0ef41Sopenharmony_ci RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 13281cb0ef41Sopenharmony_ci if (trace->is_trivial()) { 13291cb0ef41Sopenharmony_ci if (label_.is_bound() || on_work_list() || !KeepRecursing(compiler)) { 13301cb0ef41Sopenharmony_ci // If a generic version is already scheduled to be generated or we have 13311cb0ef41Sopenharmony_ci // recursed too deeply then just generate a jump to that code. 13321cb0ef41Sopenharmony_ci macro_assembler->GoTo(&label_); 13331cb0ef41Sopenharmony_ci // This will queue it up for generation of a generic version if it hasn't 13341cb0ef41Sopenharmony_ci // already been queued. 13351cb0ef41Sopenharmony_ci compiler->AddWork(this); 13361cb0ef41Sopenharmony_ci return DONE; 13371cb0ef41Sopenharmony_ci } 13381cb0ef41Sopenharmony_ci // Generate generic version of the node and bind the label for later use. 13391cb0ef41Sopenharmony_ci macro_assembler->Bind(&label_); 13401cb0ef41Sopenharmony_ci return CONTINUE; 13411cb0ef41Sopenharmony_ci } 13421cb0ef41Sopenharmony_ci 13431cb0ef41Sopenharmony_ci // We are being asked to make a non-generic version. Keep track of how many 13441cb0ef41Sopenharmony_ci // non-generic versions we generate so as not to overdo it. 13451cb0ef41Sopenharmony_ci trace_count_++; 13461cb0ef41Sopenharmony_ci if (KeepRecursing(compiler) && compiler->optimize() && 13471cb0ef41Sopenharmony_ci trace_count_ < kMaxCopiesCodeGenerated) { 13481cb0ef41Sopenharmony_ci return CONTINUE; 13491cb0ef41Sopenharmony_ci } 13501cb0ef41Sopenharmony_ci 13511cb0ef41Sopenharmony_ci // If we get here code has been generated for this node too many times or 13521cb0ef41Sopenharmony_ci // recursion is too deep. Time to switch to a generic version. The code for 13531cb0ef41Sopenharmony_ci // generic versions above can handle deep recursion properly. 13541cb0ef41Sopenharmony_ci bool was_limiting = compiler->limiting_recursion(); 13551cb0ef41Sopenharmony_ci compiler->set_limiting_recursion(true); 13561cb0ef41Sopenharmony_ci trace->Flush(compiler, this); 13571cb0ef41Sopenharmony_ci compiler->set_limiting_recursion(was_limiting); 13581cb0ef41Sopenharmony_ci return DONE; 13591cb0ef41Sopenharmony_ci} 13601cb0ef41Sopenharmony_ci 13611cb0ef41Sopenharmony_cibool RegExpNode::KeepRecursing(RegExpCompiler* compiler) { 13621cb0ef41Sopenharmony_ci return !compiler->limiting_recursion() && 13631cb0ef41Sopenharmony_ci compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion; 13641cb0ef41Sopenharmony_ci} 13651cb0ef41Sopenharmony_ci 13661cb0ef41Sopenharmony_civoid ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, 13671cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm, bool not_at_start) { 13681cb0ef41Sopenharmony_ci if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) { 13691cb0ef41Sopenharmony_ci // Anything may follow a positive submatch success, thus we need to accept 13701cb0ef41Sopenharmony_ci // all characters from this position onwards. 13711cb0ef41Sopenharmony_ci bm->SetRest(offset); 13721cb0ef41Sopenharmony_ci } else { 13731cb0ef41Sopenharmony_ci on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); 13741cb0ef41Sopenharmony_ci } 13751cb0ef41Sopenharmony_ci SaveBMInfo(bm, not_at_start, offset); 13761cb0ef41Sopenharmony_ci} 13771cb0ef41Sopenharmony_ci 13781cb0ef41Sopenharmony_civoid ActionNode::GetQuickCheckDetails(QuickCheckDetails* details, 13791cb0ef41Sopenharmony_ci RegExpCompiler* compiler, int filled_in, 13801cb0ef41Sopenharmony_ci bool not_at_start) { 13811cb0ef41Sopenharmony_ci if (action_type_ == SET_REGISTER_FOR_LOOP) { 13821cb0ef41Sopenharmony_ci on_success()->GetQuickCheckDetailsFromLoopEntry(details, compiler, 13831cb0ef41Sopenharmony_ci filled_in, not_at_start); 13841cb0ef41Sopenharmony_ci } else { 13851cb0ef41Sopenharmony_ci on_success()->GetQuickCheckDetails(details, compiler, filled_in, 13861cb0ef41Sopenharmony_ci not_at_start); 13871cb0ef41Sopenharmony_ci } 13881cb0ef41Sopenharmony_ci} 13891cb0ef41Sopenharmony_ci 13901cb0ef41Sopenharmony_civoid AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, 13911cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm, bool not_at_start) { 13921cb0ef41Sopenharmony_ci // Match the behaviour of EatsAtLeast on this node. 13931cb0ef41Sopenharmony_ci if (assertion_type() == AT_START && not_at_start) return; 13941cb0ef41Sopenharmony_ci on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); 13951cb0ef41Sopenharmony_ci SaveBMInfo(bm, not_at_start, offset); 13961cb0ef41Sopenharmony_ci} 13971cb0ef41Sopenharmony_ci 13981cb0ef41Sopenharmony_civoid NegativeLookaroundChoiceNode::GetQuickCheckDetails( 13991cb0ef41Sopenharmony_ci QuickCheckDetails* details, RegExpCompiler* compiler, int filled_in, 14001cb0ef41Sopenharmony_ci bool not_at_start) { 14011cb0ef41Sopenharmony_ci RegExpNode* node = continue_node(); 14021cb0ef41Sopenharmony_ci return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start); 14031cb0ef41Sopenharmony_ci} 14041cb0ef41Sopenharmony_ci 14051cb0ef41Sopenharmony_cinamespace { 14061cb0ef41Sopenharmony_ci 14071cb0ef41Sopenharmony_ci// Takes the left-most 1-bit and smears it out, setting all bits to its right. 14081cb0ef41Sopenharmony_ciinline uint32_t SmearBitsRight(uint32_t v) { 14091cb0ef41Sopenharmony_ci v |= v >> 1; 14101cb0ef41Sopenharmony_ci v |= v >> 2; 14111cb0ef41Sopenharmony_ci v |= v >> 4; 14121cb0ef41Sopenharmony_ci v |= v >> 8; 14131cb0ef41Sopenharmony_ci v |= v >> 16; 14141cb0ef41Sopenharmony_ci return v; 14151cb0ef41Sopenharmony_ci} 14161cb0ef41Sopenharmony_ci 14171cb0ef41Sopenharmony_ci} // namespace 14181cb0ef41Sopenharmony_ci 14191cb0ef41Sopenharmony_cibool QuickCheckDetails::Rationalize(bool asc) { 14201cb0ef41Sopenharmony_ci bool found_useful_op = false; 14211cb0ef41Sopenharmony_ci const uint32_t char_mask = CharMask(asc); 14221cb0ef41Sopenharmony_ci mask_ = 0; 14231cb0ef41Sopenharmony_ci value_ = 0; 14241cb0ef41Sopenharmony_ci int char_shift = 0; 14251cb0ef41Sopenharmony_ci for (int i = 0; i < characters_; i++) { 14261cb0ef41Sopenharmony_ci Position* pos = &positions_[i]; 14271cb0ef41Sopenharmony_ci if ((pos->mask & String::kMaxOneByteCharCode) != 0) { 14281cb0ef41Sopenharmony_ci found_useful_op = true; 14291cb0ef41Sopenharmony_ci } 14301cb0ef41Sopenharmony_ci mask_ |= (pos->mask & char_mask) << char_shift; 14311cb0ef41Sopenharmony_ci value_ |= (pos->value & char_mask) << char_shift; 14321cb0ef41Sopenharmony_ci char_shift += asc ? 8 : 16; 14331cb0ef41Sopenharmony_ci } 14341cb0ef41Sopenharmony_ci return found_useful_op; 14351cb0ef41Sopenharmony_ci} 14361cb0ef41Sopenharmony_ci 14371cb0ef41Sopenharmony_ciint RegExpNode::EatsAtLeast(bool not_at_start) { 14381cb0ef41Sopenharmony_ci return not_at_start ? eats_at_least_.eats_at_least_from_not_start 14391cb0ef41Sopenharmony_ci : eats_at_least_.eats_at_least_from_possibly_start; 14401cb0ef41Sopenharmony_ci} 14411cb0ef41Sopenharmony_ci 14421cb0ef41Sopenharmony_ciEatsAtLeastInfo RegExpNode::EatsAtLeastFromLoopEntry() { 14431cb0ef41Sopenharmony_ci // SET_REGISTER_FOR_LOOP is only used to initialize loop counters, and it 14441cb0ef41Sopenharmony_ci // implies that the following node must be a LoopChoiceNode. If we need to 14451cb0ef41Sopenharmony_ci // set registers to constant values for other reasons, we could introduce a 14461cb0ef41Sopenharmony_ci // new action type SET_REGISTER that doesn't imply anything about its 14471cb0ef41Sopenharmony_ci // successor. 14481cb0ef41Sopenharmony_ci UNREACHABLE(); 14491cb0ef41Sopenharmony_ci} 14501cb0ef41Sopenharmony_ci 14511cb0ef41Sopenharmony_civoid RegExpNode::GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details, 14521cb0ef41Sopenharmony_ci RegExpCompiler* compiler, 14531cb0ef41Sopenharmony_ci int characters_filled_in, 14541cb0ef41Sopenharmony_ci bool not_at_start) { 14551cb0ef41Sopenharmony_ci // See comment in RegExpNode::EatsAtLeastFromLoopEntry. 14561cb0ef41Sopenharmony_ci UNREACHABLE(); 14571cb0ef41Sopenharmony_ci} 14581cb0ef41Sopenharmony_ci 14591cb0ef41Sopenharmony_ciEatsAtLeastInfo LoopChoiceNode::EatsAtLeastFromLoopEntry() { 14601cb0ef41Sopenharmony_ci DCHECK_EQ(alternatives_->length(), 2); // There's just loop and continue. 14611cb0ef41Sopenharmony_ci 14621cb0ef41Sopenharmony_ci if (read_backward()) { 14631cb0ef41Sopenharmony_ci // The eats_at_least value is not used if reading backward. The 14641cb0ef41Sopenharmony_ci // EatsAtLeastPropagator should've zeroed it as well. 14651cb0ef41Sopenharmony_ci DCHECK_EQ(eats_at_least_info()->eats_at_least_from_possibly_start, 0); 14661cb0ef41Sopenharmony_ci DCHECK_EQ(eats_at_least_info()->eats_at_least_from_not_start, 0); 14671cb0ef41Sopenharmony_ci return {}; 14681cb0ef41Sopenharmony_ci } 14691cb0ef41Sopenharmony_ci 14701cb0ef41Sopenharmony_ci // Figure out how much the loop body itself eats, not including anything in 14711cb0ef41Sopenharmony_ci // the continuation case. In general, the nodes in the loop body should report 14721cb0ef41Sopenharmony_ci // that they eat at least the number eaten by the continuation node, since any 14731cb0ef41Sopenharmony_ci // successful match in the loop body must also include the continuation node. 14741cb0ef41Sopenharmony_ci // However, in some cases involving positive lookaround, the loop body under- 14751cb0ef41Sopenharmony_ci // reports its appetite, so use saturated math here to avoid negative numbers. 14761cb0ef41Sopenharmony_ci uint8_t loop_body_from_not_start = base::saturated_cast<uint8_t>( 14771cb0ef41Sopenharmony_ci loop_node_->EatsAtLeast(true) - continue_node_->EatsAtLeast(true)); 14781cb0ef41Sopenharmony_ci uint8_t loop_body_from_possibly_start = base::saturated_cast<uint8_t>( 14791cb0ef41Sopenharmony_ci loop_node_->EatsAtLeast(false) - continue_node_->EatsAtLeast(true)); 14801cb0ef41Sopenharmony_ci 14811cb0ef41Sopenharmony_ci // Limit the number of loop iterations to avoid overflow in subsequent steps. 14821cb0ef41Sopenharmony_ci int loop_iterations = base::saturated_cast<uint8_t>(min_loop_iterations()); 14831cb0ef41Sopenharmony_ci 14841cb0ef41Sopenharmony_ci EatsAtLeastInfo result; 14851cb0ef41Sopenharmony_ci result.eats_at_least_from_not_start = 14861cb0ef41Sopenharmony_ci base::saturated_cast<uint8_t>(loop_iterations * loop_body_from_not_start + 14871cb0ef41Sopenharmony_ci continue_node_->EatsAtLeast(true)); 14881cb0ef41Sopenharmony_ci if (loop_iterations > 0 && loop_body_from_possibly_start > 0) { 14891cb0ef41Sopenharmony_ci // First loop iteration eats at least one, so all subsequent iterations 14901cb0ef41Sopenharmony_ci // and the after-loop chunk are guaranteed to not be at the start. 14911cb0ef41Sopenharmony_ci result.eats_at_least_from_possibly_start = base::saturated_cast<uint8_t>( 14921cb0ef41Sopenharmony_ci loop_body_from_possibly_start + 14931cb0ef41Sopenharmony_ci (loop_iterations - 1) * loop_body_from_not_start + 14941cb0ef41Sopenharmony_ci continue_node_->EatsAtLeast(true)); 14951cb0ef41Sopenharmony_ci } else { 14961cb0ef41Sopenharmony_ci // Loop body might eat nothing, so only continue node contributes. 14971cb0ef41Sopenharmony_ci result.eats_at_least_from_possibly_start = 14981cb0ef41Sopenharmony_ci continue_node_->EatsAtLeast(false); 14991cb0ef41Sopenharmony_ci } 15001cb0ef41Sopenharmony_ci return result; 15011cb0ef41Sopenharmony_ci} 15021cb0ef41Sopenharmony_ci 15031cb0ef41Sopenharmony_cibool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, 15041cb0ef41Sopenharmony_ci Trace* bounds_check_trace, Trace* trace, 15051cb0ef41Sopenharmony_ci bool preload_has_checked_bounds, 15061cb0ef41Sopenharmony_ci Label* on_possible_success, 15071cb0ef41Sopenharmony_ci QuickCheckDetails* details, 15081cb0ef41Sopenharmony_ci bool fall_through_on_failure, 15091cb0ef41Sopenharmony_ci ChoiceNode* predecessor) { 15101cb0ef41Sopenharmony_ci DCHECK_NOT_NULL(predecessor); 15111cb0ef41Sopenharmony_ci if (details->characters() == 0) return false; 15121cb0ef41Sopenharmony_ci GetQuickCheckDetails(details, compiler, 0, 15131cb0ef41Sopenharmony_ci trace->at_start() == Trace::FALSE_VALUE); 15141cb0ef41Sopenharmony_ci if (details->cannot_match()) return false; 15151cb0ef41Sopenharmony_ci if (!details->Rationalize(compiler->one_byte())) return false; 15161cb0ef41Sopenharmony_ci DCHECK(details->characters() == 1 || 15171cb0ef41Sopenharmony_ci compiler->macro_assembler()->CanReadUnaligned()); 15181cb0ef41Sopenharmony_ci uint32_t mask = details->mask(); 15191cb0ef41Sopenharmony_ci uint32_t value = details->value(); 15201cb0ef41Sopenharmony_ci 15211cb0ef41Sopenharmony_ci RegExpMacroAssembler* assembler = compiler->macro_assembler(); 15221cb0ef41Sopenharmony_ci 15231cb0ef41Sopenharmony_ci if (trace->characters_preloaded() != details->characters()) { 15241cb0ef41Sopenharmony_ci DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset()); 15251cb0ef41Sopenharmony_ci // The bounds check is performed using the minimum number of characters 15261cb0ef41Sopenharmony_ci // any choice would eat, so if the bounds check fails, then none of the 15271cb0ef41Sopenharmony_ci // choices can succeed, so we can just immediately backtrack, rather 15281cb0ef41Sopenharmony_ci // than go to the next choice. The number of characters preloaded may be 15291cb0ef41Sopenharmony_ci // less than the number used for the bounds check. 15301cb0ef41Sopenharmony_ci int eats_at_least = predecessor->EatsAtLeast( 15311cb0ef41Sopenharmony_ci bounds_check_trace->at_start() == Trace::FALSE_VALUE); 15321cb0ef41Sopenharmony_ci DCHECK_GE(eats_at_least, details->characters()); 15331cb0ef41Sopenharmony_ci assembler->LoadCurrentCharacter( 15341cb0ef41Sopenharmony_ci trace->cp_offset(), bounds_check_trace->backtrack(), 15351cb0ef41Sopenharmony_ci !preload_has_checked_bounds, details->characters(), eats_at_least); 15361cb0ef41Sopenharmony_ci } 15371cb0ef41Sopenharmony_ci 15381cb0ef41Sopenharmony_ci bool need_mask = true; 15391cb0ef41Sopenharmony_ci 15401cb0ef41Sopenharmony_ci if (details->characters() == 1) { 15411cb0ef41Sopenharmony_ci // If number of characters preloaded is 1 then we used a byte or 16 bit 15421cb0ef41Sopenharmony_ci // load so the value is already masked down. 15431cb0ef41Sopenharmony_ci const uint32_t char_mask = CharMask(compiler->one_byte()); 15441cb0ef41Sopenharmony_ci if ((mask & char_mask) == char_mask) need_mask = false; 15451cb0ef41Sopenharmony_ci mask &= char_mask; 15461cb0ef41Sopenharmony_ci } else { 15471cb0ef41Sopenharmony_ci // For 2-character preloads in one-byte mode or 1-character preloads in 15481cb0ef41Sopenharmony_ci // two-byte mode we also use a 16 bit load with zero extend. 15491cb0ef41Sopenharmony_ci static const uint32_t kTwoByteMask = 0xFFFF; 15501cb0ef41Sopenharmony_ci static const uint32_t kFourByteMask = 0xFFFFFFFF; 15511cb0ef41Sopenharmony_ci if (details->characters() == 2 && compiler->one_byte()) { 15521cb0ef41Sopenharmony_ci if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false; 15531cb0ef41Sopenharmony_ci } else if (details->characters() == 1 && !compiler->one_byte()) { 15541cb0ef41Sopenharmony_ci if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false; 15551cb0ef41Sopenharmony_ci } else { 15561cb0ef41Sopenharmony_ci if (mask == kFourByteMask) need_mask = false; 15571cb0ef41Sopenharmony_ci } 15581cb0ef41Sopenharmony_ci } 15591cb0ef41Sopenharmony_ci 15601cb0ef41Sopenharmony_ci if (fall_through_on_failure) { 15611cb0ef41Sopenharmony_ci if (need_mask) { 15621cb0ef41Sopenharmony_ci assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); 15631cb0ef41Sopenharmony_ci } else { 15641cb0ef41Sopenharmony_ci assembler->CheckCharacter(value, on_possible_success); 15651cb0ef41Sopenharmony_ci } 15661cb0ef41Sopenharmony_ci } else { 15671cb0ef41Sopenharmony_ci if (need_mask) { 15681cb0ef41Sopenharmony_ci assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack()); 15691cb0ef41Sopenharmony_ci } else { 15701cb0ef41Sopenharmony_ci assembler->CheckNotCharacter(value, trace->backtrack()); 15711cb0ef41Sopenharmony_ci } 15721cb0ef41Sopenharmony_ci } 15731cb0ef41Sopenharmony_ci return true; 15741cb0ef41Sopenharmony_ci} 15751cb0ef41Sopenharmony_ci 15761cb0ef41Sopenharmony_ci// Here is the meat of GetQuickCheckDetails (see also the comment on the 15771cb0ef41Sopenharmony_ci// super-class in the .h file). 15781cb0ef41Sopenharmony_ci// 15791cb0ef41Sopenharmony_ci// We iterate along the text object, building up for each character a 15801cb0ef41Sopenharmony_ci// mask and value that can be used to test for a quick failure to match. 15811cb0ef41Sopenharmony_ci// The masks and values for the positions will be combined into a single 15821cb0ef41Sopenharmony_ci// machine word for the current character width in order to be used in 15831cb0ef41Sopenharmony_ci// generating a quick check. 15841cb0ef41Sopenharmony_civoid TextNode::GetQuickCheckDetails(QuickCheckDetails* details, 15851cb0ef41Sopenharmony_ci RegExpCompiler* compiler, 15861cb0ef41Sopenharmony_ci int characters_filled_in, 15871cb0ef41Sopenharmony_ci bool not_at_start) { 15881cb0ef41Sopenharmony_ci // Do not collect any quick check details if the text node reads backward, 15891cb0ef41Sopenharmony_ci // since it reads in the opposite direction than we use for quick checks. 15901cb0ef41Sopenharmony_ci if (read_backward()) return; 15911cb0ef41Sopenharmony_ci Isolate* isolate = compiler->macro_assembler()->isolate(); 15921cb0ef41Sopenharmony_ci DCHECK(characters_filled_in < details->characters()); 15931cb0ef41Sopenharmony_ci int characters = details->characters(); 15941cb0ef41Sopenharmony_ci const uint32_t char_mask = CharMask(compiler->one_byte()); 15951cb0ef41Sopenharmony_ci for (int k = 0; k < elements()->length(); k++) { 15961cb0ef41Sopenharmony_ci TextElement elm = elements()->at(k); 15971cb0ef41Sopenharmony_ci if (elm.text_type() == TextElement::ATOM) { 15981cb0ef41Sopenharmony_ci base::Vector<const base::uc16> quarks = elm.atom()->data(); 15991cb0ef41Sopenharmony_ci for (int i = 0; i < characters && i < quarks.length(); i++) { 16001cb0ef41Sopenharmony_ci QuickCheckDetails::Position* pos = 16011cb0ef41Sopenharmony_ci details->positions(characters_filled_in); 16021cb0ef41Sopenharmony_ci base::uc16 c = quarks[i]; 16031cb0ef41Sopenharmony_ci if (IsIgnoreCase(compiler->flags())) { 16041cb0ef41Sopenharmony_ci unibrow::uchar chars[4]; 16051cb0ef41Sopenharmony_ci int length = GetCaseIndependentLetters( 16061cb0ef41Sopenharmony_ci isolate, c, compiler->one_byte(), chars, 4); 16071cb0ef41Sopenharmony_ci if (length == 0) { 16081cb0ef41Sopenharmony_ci // This can happen because all case variants are non-Latin1, but we 16091cb0ef41Sopenharmony_ci // know the input is Latin1. 16101cb0ef41Sopenharmony_ci details->set_cannot_match(); 16111cb0ef41Sopenharmony_ci pos->determines_perfectly = false; 16121cb0ef41Sopenharmony_ci return; 16131cb0ef41Sopenharmony_ci } 16141cb0ef41Sopenharmony_ci if (length == 1) { 16151cb0ef41Sopenharmony_ci // This letter has no case equivalents, so it's nice and simple 16161cb0ef41Sopenharmony_ci // and the mask-compare will determine definitely whether we have 16171cb0ef41Sopenharmony_ci // a match at this character position. 16181cb0ef41Sopenharmony_ci pos->mask = char_mask; 16191cb0ef41Sopenharmony_ci pos->value = chars[0]; 16201cb0ef41Sopenharmony_ci pos->determines_perfectly = true; 16211cb0ef41Sopenharmony_ci } else { 16221cb0ef41Sopenharmony_ci uint32_t common_bits = char_mask; 16231cb0ef41Sopenharmony_ci uint32_t bits = chars[0]; 16241cb0ef41Sopenharmony_ci for (int j = 1; j < length; j++) { 16251cb0ef41Sopenharmony_ci uint32_t differing_bits = ((chars[j] & common_bits) ^ bits); 16261cb0ef41Sopenharmony_ci common_bits ^= differing_bits; 16271cb0ef41Sopenharmony_ci bits &= common_bits; 16281cb0ef41Sopenharmony_ci } 16291cb0ef41Sopenharmony_ci // If length is 2 and common bits has only one zero in it then 16301cb0ef41Sopenharmony_ci // our mask and compare instruction will determine definitely 16311cb0ef41Sopenharmony_ci // whether we have a match at this character position. Otherwise 16321cb0ef41Sopenharmony_ci // it can only be an approximate check. 16331cb0ef41Sopenharmony_ci uint32_t one_zero = (common_bits | ~char_mask); 16341cb0ef41Sopenharmony_ci if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) { 16351cb0ef41Sopenharmony_ci pos->determines_perfectly = true; 16361cb0ef41Sopenharmony_ci } 16371cb0ef41Sopenharmony_ci pos->mask = common_bits; 16381cb0ef41Sopenharmony_ci pos->value = bits; 16391cb0ef41Sopenharmony_ci } 16401cb0ef41Sopenharmony_ci } else { 16411cb0ef41Sopenharmony_ci // Don't ignore case. Nice simple case where the mask-compare will 16421cb0ef41Sopenharmony_ci // determine definitely whether we have a match at this character 16431cb0ef41Sopenharmony_ci // position. 16441cb0ef41Sopenharmony_ci if (c > char_mask) { 16451cb0ef41Sopenharmony_ci details->set_cannot_match(); 16461cb0ef41Sopenharmony_ci pos->determines_perfectly = false; 16471cb0ef41Sopenharmony_ci return; 16481cb0ef41Sopenharmony_ci } 16491cb0ef41Sopenharmony_ci pos->mask = char_mask; 16501cb0ef41Sopenharmony_ci pos->value = c; 16511cb0ef41Sopenharmony_ci pos->determines_perfectly = true; 16521cb0ef41Sopenharmony_ci } 16531cb0ef41Sopenharmony_ci characters_filled_in++; 16541cb0ef41Sopenharmony_ci DCHECK(characters_filled_in <= details->characters()); 16551cb0ef41Sopenharmony_ci if (characters_filled_in == details->characters()) { 16561cb0ef41Sopenharmony_ci return; 16571cb0ef41Sopenharmony_ci } 16581cb0ef41Sopenharmony_ci } 16591cb0ef41Sopenharmony_ci } else { 16601cb0ef41Sopenharmony_ci QuickCheckDetails::Position* pos = 16611cb0ef41Sopenharmony_ci details->positions(characters_filled_in); 16621cb0ef41Sopenharmony_ci RegExpCharacterClass* tree = elm.char_class(); 16631cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* ranges = tree->ranges(zone()); 16641cb0ef41Sopenharmony_ci if (tree->is_negated() || ranges->is_empty()) { 16651cb0ef41Sopenharmony_ci // A quick check uses multi-character mask and compare. There is no 16661cb0ef41Sopenharmony_ci // useful way to incorporate a negative char class into this scheme 16671cb0ef41Sopenharmony_ci // so we just conservatively create a mask and value that will always 16681cb0ef41Sopenharmony_ci // succeed. 16691cb0ef41Sopenharmony_ci // Likewise for empty ranges (empty ranges can occur e.g. when 16701cb0ef41Sopenharmony_ci // compiling for one-byte subjects and impossible (non-one-byte) ranges 16711cb0ef41Sopenharmony_ci // have been removed). 16721cb0ef41Sopenharmony_ci pos->mask = 0; 16731cb0ef41Sopenharmony_ci pos->value = 0; 16741cb0ef41Sopenharmony_ci } else { 16751cb0ef41Sopenharmony_ci int first_range = 0; 16761cb0ef41Sopenharmony_ci while (ranges->at(first_range).from() > char_mask) { 16771cb0ef41Sopenharmony_ci first_range++; 16781cb0ef41Sopenharmony_ci if (first_range == ranges->length()) { 16791cb0ef41Sopenharmony_ci details->set_cannot_match(); 16801cb0ef41Sopenharmony_ci pos->determines_perfectly = false; 16811cb0ef41Sopenharmony_ci return; 16821cb0ef41Sopenharmony_ci } 16831cb0ef41Sopenharmony_ci } 16841cb0ef41Sopenharmony_ci CharacterRange range = ranges->at(first_range); 16851cb0ef41Sopenharmony_ci const base::uc32 first_from = range.from(); 16861cb0ef41Sopenharmony_ci const base::uc32 first_to = 16871cb0ef41Sopenharmony_ci (range.to() > char_mask) ? char_mask : range.to(); 16881cb0ef41Sopenharmony_ci const uint32_t differing_bits = (first_from ^ first_to); 16891cb0ef41Sopenharmony_ci // A mask and compare is only perfect if the differing bits form a 16901cb0ef41Sopenharmony_ci // number like 00011111 with one single block of trailing 1s. 16911cb0ef41Sopenharmony_ci if ((differing_bits & (differing_bits + 1)) == 0 && 16921cb0ef41Sopenharmony_ci first_from + differing_bits == first_to) { 16931cb0ef41Sopenharmony_ci pos->determines_perfectly = true; 16941cb0ef41Sopenharmony_ci } 16951cb0ef41Sopenharmony_ci uint32_t common_bits = ~SmearBitsRight(differing_bits); 16961cb0ef41Sopenharmony_ci uint32_t bits = (first_from & common_bits); 16971cb0ef41Sopenharmony_ci for (int i = first_range + 1; i < ranges->length(); i++) { 16981cb0ef41Sopenharmony_ci range = ranges->at(i); 16991cb0ef41Sopenharmony_ci const base::uc32 from = range.from(); 17001cb0ef41Sopenharmony_ci if (from > char_mask) continue; 17011cb0ef41Sopenharmony_ci const base::uc32 to = 17021cb0ef41Sopenharmony_ci (range.to() > char_mask) ? char_mask : range.to(); 17031cb0ef41Sopenharmony_ci // Here we are combining more ranges into the mask and compare 17041cb0ef41Sopenharmony_ci // value. With each new range the mask becomes more sparse and 17051cb0ef41Sopenharmony_ci // so the chances of a false positive rise. A character class 17061cb0ef41Sopenharmony_ci // with multiple ranges is assumed never to be equivalent to a 17071cb0ef41Sopenharmony_ci // mask and compare operation. 17081cb0ef41Sopenharmony_ci pos->determines_perfectly = false; 17091cb0ef41Sopenharmony_ci uint32_t new_common_bits = (from ^ to); 17101cb0ef41Sopenharmony_ci new_common_bits = ~SmearBitsRight(new_common_bits); 17111cb0ef41Sopenharmony_ci common_bits &= new_common_bits; 17121cb0ef41Sopenharmony_ci bits &= new_common_bits; 17131cb0ef41Sopenharmony_ci uint32_t new_differing_bits = (from & common_bits) ^ bits; 17141cb0ef41Sopenharmony_ci common_bits ^= new_differing_bits; 17151cb0ef41Sopenharmony_ci bits &= common_bits; 17161cb0ef41Sopenharmony_ci } 17171cb0ef41Sopenharmony_ci pos->mask = common_bits; 17181cb0ef41Sopenharmony_ci pos->value = bits; 17191cb0ef41Sopenharmony_ci } 17201cb0ef41Sopenharmony_ci characters_filled_in++; 17211cb0ef41Sopenharmony_ci DCHECK(characters_filled_in <= details->characters()); 17221cb0ef41Sopenharmony_ci if (characters_filled_in == details->characters()) return; 17231cb0ef41Sopenharmony_ci } 17241cb0ef41Sopenharmony_ci } 17251cb0ef41Sopenharmony_ci DCHECK(characters_filled_in != details->characters()); 17261cb0ef41Sopenharmony_ci if (!details->cannot_match()) { 17271cb0ef41Sopenharmony_ci on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in, 17281cb0ef41Sopenharmony_ci true); 17291cb0ef41Sopenharmony_ci } 17301cb0ef41Sopenharmony_ci} 17311cb0ef41Sopenharmony_ci 17321cb0ef41Sopenharmony_civoid QuickCheckDetails::Clear() { 17331cb0ef41Sopenharmony_ci for (int i = 0; i < characters_; i++) { 17341cb0ef41Sopenharmony_ci positions_[i].mask = 0; 17351cb0ef41Sopenharmony_ci positions_[i].value = 0; 17361cb0ef41Sopenharmony_ci positions_[i].determines_perfectly = false; 17371cb0ef41Sopenharmony_ci } 17381cb0ef41Sopenharmony_ci characters_ = 0; 17391cb0ef41Sopenharmony_ci} 17401cb0ef41Sopenharmony_ci 17411cb0ef41Sopenharmony_civoid QuickCheckDetails::Advance(int by, bool one_byte) { 17421cb0ef41Sopenharmony_ci if (by >= characters_ || by < 0) { 17431cb0ef41Sopenharmony_ci DCHECK_IMPLIES(by < 0, characters_ == 0); 17441cb0ef41Sopenharmony_ci Clear(); 17451cb0ef41Sopenharmony_ci return; 17461cb0ef41Sopenharmony_ci } 17471cb0ef41Sopenharmony_ci DCHECK_LE(characters_ - by, 4); 17481cb0ef41Sopenharmony_ci DCHECK_LE(characters_, 4); 17491cb0ef41Sopenharmony_ci for (int i = 0; i < characters_ - by; i++) { 17501cb0ef41Sopenharmony_ci positions_[i] = positions_[by + i]; 17511cb0ef41Sopenharmony_ci } 17521cb0ef41Sopenharmony_ci for (int i = characters_ - by; i < characters_; i++) { 17531cb0ef41Sopenharmony_ci positions_[i].mask = 0; 17541cb0ef41Sopenharmony_ci positions_[i].value = 0; 17551cb0ef41Sopenharmony_ci positions_[i].determines_perfectly = false; 17561cb0ef41Sopenharmony_ci } 17571cb0ef41Sopenharmony_ci characters_ -= by; 17581cb0ef41Sopenharmony_ci // We could change mask_ and value_ here but we would never advance unless 17591cb0ef41Sopenharmony_ci // they had already been used in a check and they won't be used again because 17601cb0ef41Sopenharmony_ci // it would gain us nothing. So there's no point. 17611cb0ef41Sopenharmony_ci} 17621cb0ef41Sopenharmony_ci 17631cb0ef41Sopenharmony_civoid QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) { 17641cb0ef41Sopenharmony_ci DCHECK(characters_ == other->characters_); 17651cb0ef41Sopenharmony_ci if (other->cannot_match_) { 17661cb0ef41Sopenharmony_ci return; 17671cb0ef41Sopenharmony_ci } 17681cb0ef41Sopenharmony_ci if (cannot_match_) { 17691cb0ef41Sopenharmony_ci *this = *other; 17701cb0ef41Sopenharmony_ci return; 17711cb0ef41Sopenharmony_ci } 17721cb0ef41Sopenharmony_ci for (int i = from_index; i < characters_; i++) { 17731cb0ef41Sopenharmony_ci QuickCheckDetails::Position* pos = positions(i); 17741cb0ef41Sopenharmony_ci QuickCheckDetails::Position* other_pos = other->positions(i); 17751cb0ef41Sopenharmony_ci if (pos->mask != other_pos->mask || pos->value != other_pos->value || 17761cb0ef41Sopenharmony_ci !other_pos->determines_perfectly) { 17771cb0ef41Sopenharmony_ci // Our mask-compare operation will be approximate unless we have the 17781cb0ef41Sopenharmony_ci // exact same operation on both sides of the alternation. 17791cb0ef41Sopenharmony_ci pos->determines_perfectly = false; 17801cb0ef41Sopenharmony_ci } 17811cb0ef41Sopenharmony_ci pos->mask &= other_pos->mask; 17821cb0ef41Sopenharmony_ci pos->value &= pos->mask; 17831cb0ef41Sopenharmony_ci other_pos->value &= pos->mask; 17841cb0ef41Sopenharmony_ci uint32_t differing_bits = (pos->value ^ other_pos->value); 17851cb0ef41Sopenharmony_ci pos->mask &= ~differing_bits; 17861cb0ef41Sopenharmony_ci pos->value &= pos->mask; 17871cb0ef41Sopenharmony_ci } 17881cb0ef41Sopenharmony_ci} 17891cb0ef41Sopenharmony_ci 17901cb0ef41Sopenharmony_ciclass VisitMarker { 17911cb0ef41Sopenharmony_ci public: 17921cb0ef41Sopenharmony_ci explicit VisitMarker(NodeInfo* info) : info_(info) { 17931cb0ef41Sopenharmony_ci DCHECK(!info->visited); 17941cb0ef41Sopenharmony_ci info->visited = true; 17951cb0ef41Sopenharmony_ci } 17961cb0ef41Sopenharmony_ci ~VisitMarker() { info_->visited = false; } 17971cb0ef41Sopenharmony_ci 17981cb0ef41Sopenharmony_ci private: 17991cb0ef41Sopenharmony_ci NodeInfo* info_; 18001cb0ef41Sopenharmony_ci}; 18011cb0ef41Sopenharmony_ci 18021cb0ef41Sopenharmony_ci// Temporarily sets traversed_loop_initialization_node_. 18031cb0ef41Sopenharmony_ciclass LoopInitializationMarker { 18041cb0ef41Sopenharmony_ci public: 18051cb0ef41Sopenharmony_ci explicit LoopInitializationMarker(LoopChoiceNode* node) : node_(node) { 18061cb0ef41Sopenharmony_ci DCHECK(!node_->traversed_loop_initialization_node_); 18071cb0ef41Sopenharmony_ci node_->traversed_loop_initialization_node_ = true; 18081cb0ef41Sopenharmony_ci } 18091cb0ef41Sopenharmony_ci ~LoopInitializationMarker() { 18101cb0ef41Sopenharmony_ci DCHECK(node_->traversed_loop_initialization_node_); 18111cb0ef41Sopenharmony_ci node_->traversed_loop_initialization_node_ = false; 18121cb0ef41Sopenharmony_ci } 18131cb0ef41Sopenharmony_ci LoopInitializationMarker(const LoopInitializationMarker&) = delete; 18141cb0ef41Sopenharmony_ci LoopInitializationMarker& operator=(const LoopInitializationMarker&) = delete; 18151cb0ef41Sopenharmony_ci 18161cb0ef41Sopenharmony_ci private: 18171cb0ef41Sopenharmony_ci LoopChoiceNode* node_; 18181cb0ef41Sopenharmony_ci}; 18191cb0ef41Sopenharmony_ci 18201cb0ef41Sopenharmony_ci// Temporarily decrements min_loop_iterations_. 18211cb0ef41Sopenharmony_ciclass IterationDecrementer { 18221cb0ef41Sopenharmony_ci public: 18231cb0ef41Sopenharmony_ci explicit IterationDecrementer(LoopChoiceNode* node) : node_(node) { 18241cb0ef41Sopenharmony_ci DCHECK_GT(node_->min_loop_iterations_, 0); 18251cb0ef41Sopenharmony_ci --node_->min_loop_iterations_; 18261cb0ef41Sopenharmony_ci } 18271cb0ef41Sopenharmony_ci ~IterationDecrementer() { ++node_->min_loop_iterations_; } 18281cb0ef41Sopenharmony_ci IterationDecrementer(const IterationDecrementer&) = delete; 18291cb0ef41Sopenharmony_ci IterationDecrementer& operator=(const IterationDecrementer&) = delete; 18301cb0ef41Sopenharmony_ci 18311cb0ef41Sopenharmony_ci private: 18321cb0ef41Sopenharmony_ci LoopChoiceNode* node_; 18331cb0ef41Sopenharmony_ci}; 18341cb0ef41Sopenharmony_ci 18351cb0ef41Sopenharmony_ciRegExpNode* SeqRegExpNode::FilterOneByte(int depth, RegExpFlags flags) { 18361cb0ef41Sopenharmony_ci if (info()->replacement_calculated) return replacement(); 18371cb0ef41Sopenharmony_ci if (depth < 0) return this; 18381cb0ef41Sopenharmony_ci DCHECK(!info()->visited); 18391cb0ef41Sopenharmony_ci VisitMarker marker(info()); 18401cb0ef41Sopenharmony_ci return FilterSuccessor(depth - 1, flags); 18411cb0ef41Sopenharmony_ci} 18421cb0ef41Sopenharmony_ci 18431cb0ef41Sopenharmony_ciRegExpNode* SeqRegExpNode::FilterSuccessor(int depth, RegExpFlags flags) { 18441cb0ef41Sopenharmony_ci RegExpNode* next = on_success_->FilterOneByte(depth - 1, flags); 18451cb0ef41Sopenharmony_ci if (next == nullptr) return set_replacement(nullptr); 18461cb0ef41Sopenharmony_ci on_success_ = next; 18471cb0ef41Sopenharmony_ci return set_replacement(this); 18481cb0ef41Sopenharmony_ci} 18491cb0ef41Sopenharmony_ci 18501cb0ef41Sopenharmony_ci// We need to check for the following characters: 0x39C 0x3BC 0x178. 18511cb0ef41Sopenharmony_cibool RangeContainsLatin1Equivalents(CharacterRange range) { 18521cb0ef41Sopenharmony_ci // TODO(dcarney): this could be a lot more efficient. 18531cb0ef41Sopenharmony_ci return range.Contains(0x039C) || range.Contains(0x03BC) || 18541cb0ef41Sopenharmony_ci range.Contains(0x0178); 18551cb0ef41Sopenharmony_ci} 18561cb0ef41Sopenharmony_ci 18571cb0ef41Sopenharmony_cinamespace { 18581cb0ef41Sopenharmony_ci 18591cb0ef41Sopenharmony_cibool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) { 18601cb0ef41Sopenharmony_ci for (int i = 0; i < ranges->length(); i++) { 18611cb0ef41Sopenharmony_ci // TODO(dcarney): this could be a lot more efficient. 18621cb0ef41Sopenharmony_ci if (RangeContainsLatin1Equivalents(ranges->at(i))) return true; 18631cb0ef41Sopenharmony_ci } 18641cb0ef41Sopenharmony_ci return false; 18651cb0ef41Sopenharmony_ci} 18661cb0ef41Sopenharmony_ci 18671cb0ef41Sopenharmony_ci} // namespace 18681cb0ef41Sopenharmony_ci 18691cb0ef41Sopenharmony_ciRegExpNode* TextNode::FilterOneByte(int depth, RegExpFlags flags) { 18701cb0ef41Sopenharmony_ci if (info()->replacement_calculated) return replacement(); 18711cb0ef41Sopenharmony_ci if (depth < 0) return this; 18721cb0ef41Sopenharmony_ci DCHECK(!info()->visited); 18731cb0ef41Sopenharmony_ci VisitMarker marker(info()); 18741cb0ef41Sopenharmony_ci int element_count = elements()->length(); 18751cb0ef41Sopenharmony_ci for (int i = 0; i < element_count; i++) { 18761cb0ef41Sopenharmony_ci TextElement elm = elements()->at(i); 18771cb0ef41Sopenharmony_ci if (elm.text_type() == TextElement::ATOM) { 18781cb0ef41Sopenharmony_ci base::Vector<const base::uc16> quarks = elm.atom()->data(); 18791cb0ef41Sopenharmony_ci for (int j = 0; j < quarks.length(); j++) { 18801cb0ef41Sopenharmony_ci base::uc16 c = quarks[j]; 18811cb0ef41Sopenharmony_ci if (IsIgnoreCase(flags)) { 18821cb0ef41Sopenharmony_ci c = unibrow::Latin1::TryConvertToLatin1(c); 18831cb0ef41Sopenharmony_ci } 18841cb0ef41Sopenharmony_ci if (c > unibrow::Latin1::kMaxChar) return set_replacement(nullptr); 18851cb0ef41Sopenharmony_ci // Replace quark in case we converted to Latin-1. 18861cb0ef41Sopenharmony_ci base::uc16* writable_quarks = const_cast<base::uc16*>(quarks.begin()); 18871cb0ef41Sopenharmony_ci writable_quarks[j] = c; 18881cb0ef41Sopenharmony_ci } 18891cb0ef41Sopenharmony_ci } else { 18901cb0ef41Sopenharmony_ci DCHECK(elm.text_type() == TextElement::CHAR_CLASS); 18911cb0ef41Sopenharmony_ci RegExpCharacterClass* cc = elm.char_class(); 18921cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* ranges = cc->ranges(zone()); 18931cb0ef41Sopenharmony_ci CharacterRange::Canonicalize(ranges); 18941cb0ef41Sopenharmony_ci // Now they are in order so we only need to look at the first. 18951cb0ef41Sopenharmony_ci int range_count = ranges->length(); 18961cb0ef41Sopenharmony_ci if (cc->is_negated()) { 18971cb0ef41Sopenharmony_ci if (range_count != 0 && ranges->at(0).from() == 0 && 18981cb0ef41Sopenharmony_ci ranges->at(0).to() >= String::kMaxOneByteCharCode) { 18991cb0ef41Sopenharmony_ci // This will be handled in a later filter. 19001cb0ef41Sopenharmony_ci if (IsIgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) { 19011cb0ef41Sopenharmony_ci continue; 19021cb0ef41Sopenharmony_ci } 19031cb0ef41Sopenharmony_ci return set_replacement(nullptr); 19041cb0ef41Sopenharmony_ci } 19051cb0ef41Sopenharmony_ci } else { 19061cb0ef41Sopenharmony_ci if (range_count == 0 || 19071cb0ef41Sopenharmony_ci ranges->at(0).from() > String::kMaxOneByteCharCode) { 19081cb0ef41Sopenharmony_ci // This will be handled in a later filter. 19091cb0ef41Sopenharmony_ci if (IsIgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) { 19101cb0ef41Sopenharmony_ci continue; 19111cb0ef41Sopenharmony_ci } 19121cb0ef41Sopenharmony_ci return set_replacement(nullptr); 19131cb0ef41Sopenharmony_ci } 19141cb0ef41Sopenharmony_ci } 19151cb0ef41Sopenharmony_ci } 19161cb0ef41Sopenharmony_ci } 19171cb0ef41Sopenharmony_ci return FilterSuccessor(depth - 1, flags); 19181cb0ef41Sopenharmony_ci} 19191cb0ef41Sopenharmony_ci 19201cb0ef41Sopenharmony_ciRegExpNode* LoopChoiceNode::FilterOneByte(int depth, RegExpFlags flags) { 19211cb0ef41Sopenharmony_ci if (info()->replacement_calculated) return replacement(); 19221cb0ef41Sopenharmony_ci if (depth < 0) return this; 19231cb0ef41Sopenharmony_ci if (info()->visited) return this; 19241cb0ef41Sopenharmony_ci { 19251cb0ef41Sopenharmony_ci VisitMarker marker(info()); 19261cb0ef41Sopenharmony_ci 19271cb0ef41Sopenharmony_ci RegExpNode* continue_replacement = 19281cb0ef41Sopenharmony_ci continue_node_->FilterOneByte(depth - 1, flags); 19291cb0ef41Sopenharmony_ci // If we can't continue after the loop then there is no sense in doing the 19301cb0ef41Sopenharmony_ci // loop. 19311cb0ef41Sopenharmony_ci if (continue_replacement == nullptr) return set_replacement(nullptr); 19321cb0ef41Sopenharmony_ci } 19331cb0ef41Sopenharmony_ci 19341cb0ef41Sopenharmony_ci return ChoiceNode::FilterOneByte(depth - 1, flags); 19351cb0ef41Sopenharmony_ci} 19361cb0ef41Sopenharmony_ci 19371cb0ef41Sopenharmony_ciRegExpNode* ChoiceNode::FilterOneByte(int depth, RegExpFlags flags) { 19381cb0ef41Sopenharmony_ci if (info()->replacement_calculated) return replacement(); 19391cb0ef41Sopenharmony_ci if (depth < 0) return this; 19401cb0ef41Sopenharmony_ci if (info()->visited) return this; 19411cb0ef41Sopenharmony_ci VisitMarker marker(info()); 19421cb0ef41Sopenharmony_ci int choice_count = alternatives_->length(); 19431cb0ef41Sopenharmony_ci 19441cb0ef41Sopenharmony_ci for (int i = 0; i < choice_count; i++) { 19451cb0ef41Sopenharmony_ci GuardedAlternative alternative = alternatives_->at(i); 19461cb0ef41Sopenharmony_ci if (alternative.guards() != nullptr && 19471cb0ef41Sopenharmony_ci alternative.guards()->length() != 0) { 19481cb0ef41Sopenharmony_ci set_replacement(this); 19491cb0ef41Sopenharmony_ci return this; 19501cb0ef41Sopenharmony_ci } 19511cb0ef41Sopenharmony_ci } 19521cb0ef41Sopenharmony_ci 19531cb0ef41Sopenharmony_ci int surviving = 0; 19541cb0ef41Sopenharmony_ci RegExpNode* survivor = nullptr; 19551cb0ef41Sopenharmony_ci for (int i = 0; i < choice_count; i++) { 19561cb0ef41Sopenharmony_ci GuardedAlternative alternative = alternatives_->at(i); 19571cb0ef41Sopenharmony_ci RegExpNode* replacement = 19581cb0ef41Sopenharmony_ci alternative.node()->FilterOneByte(depth - 1, flags); 19591cb0ef41Sopenharmony_ci DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK. 19601cb0ef41Sopenharmony_ci if (replacement != nullptr) { 19611cb0ef41Sopenharmony_ci alternatives_->at(i).set_node(replacement); 19621cb0ef41Sopenharmony_ci surviving++; 19631cb0ef41Sopenharmony_ci survivor = replacement; 19641cb0ef41Sopenharmony_ci } 19651cb0ef41Sopenharmony_ci } 19661cb0ef41Sopenharmony_ci if (surviving < 2) return set_replacement(survivor); 19671cb0ef41Sopenharmony_ci 19681cb0ef41Sopenharmony_ci set_replacement(this); 19691cb0ef41Sopenharmony_ci if (surviving == choice_count) { 19701cb0ef41Sopenharmony_ci return this; 19711cb0ef41Sopenharmony_ci } 19721cb0ef41Sopenharmony_ci // Only some of the nodes survived the filtering. We need to rebuild the 19731cb0ef41Sopenharmony_ci // alternatives list. 19741cb0ef41Sopenharmony_ci ZoneList<GuardedAlternative>* new_alternatives = 19751cb0ef41Sopenharmony_ci zone()->New<ZoneList<GuardedAlternative>>(surviving, zone()); 19761cb0ef41Sopenharmony_ci for (int i = 0; i < choice_count; i++) { 19771cb0ef41Sopenharmony_ci RegExpNode* replacement = 19781cb0ef41Sopenharmony_ci alternatives_->at(i).node()->FilterOneByte(depth - 1, flags); 19791cb0ef41Sopenharmony_ci if (replacement != nullptr) { 19801cb0ef41Sopenharmony_ci alternatives_->at(i).set_node(replacement); 19811cb0ef41Sopenharmony_ci new_alternatives->Add(alternatives_->at(i), zone()); 19821cb0ef41Sopenharmony_ci } 19831cb0ef41Sopenharmony_ci } 19841cb0ef41Sopenharmony_ci alternatives_ = new_alternatives; 19851cb0ef41Sopenharmony_ci return this; 19861cb0ef41Sopenharmony_ci} 19871cb0ef41Sopenharmony_ci 19881cb0ef41Sopenharmony_ciRegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth, 19891cb0ef41Sopenharmony_ci RegExpFlags flags) { 19901cb0ef41Sopenharmony_ci if (info()->replacement_calculated) return replacement(); 19911cb0ef41Sopenharmony_ci if (depth < 0) return this; 19921cb0ef41Sopenharmony_ci if (info()->visited) return this; 19931cb0ef41Sopenharmony_ci VisitMarker marker(info()); 19941cb0ef41Sopenharmony_ci // Alternative 0 is the negative lookahead, alternative 1 is what comes 19951cb0ef41Sopenharmony_ci // afterwards. 19961cb0ef41Sopenharmony_ci RegExpNode* node = continue_node(); 19971cb0ef41Sopenharmony_ci RegExpNode* replacement = node->FilterOneByte(depth - 1, flags); 19981cb0ef41Sopenharmony_ci if (replacement == nullptr) return set_replacement(nullptr); 19991cb0ef41Sopenharmony_ci alternatives_->at(kContinueIndex).set_node(replacement); 20001cb0ef41Sopenharmony_ci 20011cb0ef41Sopenharmony_ci RegExpNode* neg_node = lookaround_node(); 20021cb0ef41Sopenharmony_ci RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, flags); 20031cb0ef41Sopenharmony_ci // If the negative lookahead is always going to fail then 20041cb0ef41Sopenharmony_ci // we don't need to check it. 20051cb0ef41Sopenharmony_ci if (neg_replacement == nullptr) return set_replacement(replacement); 20061cb0ef41Sopenharmony_ci alternatives_->at(kLookaroundIndex).set_node(neg_replacement); 20071cb0ef41Sopenharmony_ci return set_replacement(this); 20081cb0ef41Sopenharmony_ci} 20091cb0ef41Sopenharmony_ci 20101cb0ef41Sopenharmony_civoid LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 20111cb0ef41Sopenharmony_ci RegExpCompiler* compiler, 20121cb0ef41Sopenharmony_ci int characters_filled_in, 20131cb0ef41Sopenharmony_ci bool not_at_start) { 20141cb0ef41Sopenharmony_ci if (body_can_be_zero_length_ || info()->visited) return; 20151cb0ef41Sopenharmony_ci not_at_start = not_at_start || this->not_at_start(); 20161cb0ef41Sopenharmony_ci DCHECK_EQ(alternatives_->length(), 2); // There's just loop and continue. 20171cb0ef41Sopenharmony_ci if (traversed_loop_initialization_node_ && min_loop_iterations_ > 0 && 20181cb0ef41Sopenharmony_ci loop_node_->EatsAtLeast(not_at_start) > 20191cb0ef41Sopenharmony_ci continue_node_->EatsAtLeast(true)) { 20201cb0ef41Sopenharmony_ci // Loop body is guaranteed to execute at least once, and consume characters 20211cb0ef41Sopenharmony_ci // when it does, meaning the only possible quick checks from this point 20221cb0ef41Sopenharmony_ci // begin with the loop body. We may recursively visit this LoopChoiceNode, 20231cb0ef41Sopenharmony_ci // but we temporarily decrease its minimum iteration counter so we know when 20241cb0ef41Sopenharmony_ci // to check the continue case. 20251cb0ef41Sopenharmony_ci IterationDecrementer next_iteration(this); 20261cb0ef41Sopenharmony_ci loop_node_->GetQuickCheckDetails(details, compiler, characters_filled_in, 20271cb0ef41Sopenharmony_ci not_at_start); 20281cb0ef41Sopenharmony_ci } else { 20291cb0ef41Sopenharmony_ci // Might not consume anything in the loop body, so treat it like a normal 20301cb0ef41Sopenharmony_ci // ChoiceNode (and don't recursively visit this node again). 20311cb0ef41Sopenharmony_ci VisitMarker marker(info()); 20321cb0ef41Sopenharmony_ci ChoiceNode::GetQuickCheckDetails(details, compiler, characters_filled_in, 20331cb0ef41Sopenharmony_ci not_at_start); 20341cb0ef41Sopenharmony_ci } 20351cb0ef41Sopenharmony_ci} 20361cb0ef41Sopenharmony_ci 20371cb0ef41Sopenharmony_civoid LoopChoiceNode::GetQuickCheckDetailsFromLoopEntry( 20381cb0ef41Sopenharmony_ci QuickCheckDetails* details, RegExpCompiler* compiler, 20391cb0ef41Sopenharmony_ci int characters_filled_in, bool not_at_start) { 20401cb0ef41Sopenharmony_ci if (traversed_loop_initialization_node_) { 20411cb0ef41Sopenharmony_ci // We already entered this loop once, exited via its continuation node, and 20421cb0ef41Sopenharmony_ci // followed an outer loop's back-edge to before the loop entry point. We 20431cb0ef41Sopenharmony_ci // could try to reset the minimum iteration count to its starting value at 20441cb0ef41Sopenharmony_ci // this point, but that seems like more trouble than it's worth. It's safe 20451cb0ef41Sopenharmony_ci // to keep going with the current (possibly reduced) minimum iteration 20461cb0ef41Sopenharmony_ci // count. 20471cb0ef41Sopenharmony_ci GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start); 20481cb0ef41Sopenharmony_ci } else { 20491cb0ef41Sopenharmony_ci // We are entering a loop via its counter initialization action, meaning we 20501cb0ef41Sopenharmony_ci // are guaranteed to run the loop body at least some minimum number of times 20511cb0ef41Sopenharmony_ci // before running the continuation node. Set a flag so that this node knows 20521cb0ef41Sopenharmony_ci // (now and any times we visit it again recursively) that it was entered 20531cb0ef41Sopenharmony_ci // from the top. 20541cb0ef41Sopenharmony_ci LoopInitializationMarker marker(this); 20551cb0ef41Sopenharmony_ci GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start); 20561cb0ef41Sopenharmony_ci } 20571cb0ef41Sopenharmony_ci} 20581cb0ef41Sopenharmony_ci 20591cb0ef41Sopenharmony_civoid LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, 20601cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm, bool not_at_start) { 20611cb0ef41Sopenharmony_ci if (body_can_be_zero_length_ || budget <= 0) { 20621cb0ef41Sopenharmony_ci bm->SetRest(offset); 20631cb0ef41Sopenharmony_ci SaveBMInfo(bm, not_at_start, offset); 20641cb0ef41Sopenharmony_ci return; 20651cb0ef41Sopenharmony_ci } 20661cb0ef41Sopenharmony_ci ChoiceNode::FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); 20671cb0ef41Sopenharmony_ci SaveBMInfo(bm, not_at_start, offset); 20681cb0ef41Sopenharmony_ci} 20691cb0ef41Sopenharmony_ci 20701cb0ef41Sopenharmony_civoid ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 20711cb0ef41Sopenharmony_ci RegExpCompiler* compiler, 20721cb0ef41Sopenharmony_ci int characters_filled_in, 20731cb0ef41Sopenharmony_ci bool not_at_start) { 20741cb0ef41Sopenharmony_ci not_at_start = (not_at_start || not_at_start_); 20751cb0ef41Sopenharmony_ci int choice_count = alternatives_->length(); 20761cb0ef41Sopenharmony_ci DCHECK_LT(0, choice_count); 20771cb0ef41Sopenharmony_ci alternatives_->at(0).node()->GetQuickCheckDetails( 20781cb0ef41Sopenharmony_ci details, compiler, characters_filled_in, not_at_start); 20791cb0ef41Sopenharmony_ci for (int i = 1; i < choice_count; i++) { 20801cb0ef41Sopenharmony_ci QuickCheckDetails new_details(details->characters()); 20811cb0ef41Sopenharmony_ci RegExpNode* node = alternatives_->at(i).node(); 20821cb0ef41Sopenharmony_ci node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in, 20831cb0ef41Sopenharmony_ci not_at_start); 20841cb0ef41Sopenharmony_ci // Here we merge the quick match details of the two branches. 20851cb0ef41Sopenharmony_ci details->Merge(&new_details, characters_filled_in); 20861cb0ef41Sopenharmony_ci } 20871cb0ef41Sopenharmony_ci} 20881cb0ef41Sopenharmony_ci 20891cb0ef41Sopenharmony_cinamespace { 20901cb0ef41Sopenharmony_ci 20911cb0ef41Sopenharmony_ci// Check for [0-9A-Z_a-z]. 20921cb0ef41Sopenharmony_civoid EmitWordCheck(RegExpMacroAssembler* assembler, Label* word, 20931cb0ef41Sopenharmony_ci Label* non_word, bool fall_through_on_word) { 20941cb0ef41Sopenharmony_ci if (assembler->CheckSpecialCharacterClass( 20951cb0ef41Sopenharmony_ci fall_through_on_word ? StandardCharacterSet::kWord 20961cb0ef41Sopenharmony_ci : StandardCharacterSet::kNotWord, 20971cb0ef41Sopenharmony_ci fall_through_on_word ? non_word : word)) { 20981cb0ef41Sopenharmony_ci // Optimized implementation available. 20991cb0ef41Sopenharmony_ci return; 21001cb0ef41Sopenharmony_ci } 21011cb0ef41Sopenharmony_ci assembler->CheckCharacterGT('z', non_word); 21021cb0ef41Sopenharmony_ci assembler->CheckCharacterLT('0', non_word); 21031cb0ef41Sopenharmony_ci assembler->CheckCharacterGT('a' - 1, word); 21041cb0ef41Sopenharmony_ci assembler->CheckCharacterLT('9' + 1, word); 21051cb0ef41Sopenharmony_ci assembler->CheckCharacterLT('A', non_word); 21061cb0ef41Sopenharmony_ci assembler->CheckCharacterLT('Z' + 1, word); 21071cb0ef41Sopenharmony_ci if (fall_through_on_word) { 21081cb0ef41Sopenharmony_ci assembler->CheckNotCharacter('_', non_word); 21091cb0ef41Sopenharmony_ci } else { 21101cb0ef41Sopenharmony_ci assembler->CheckCharacter('_', word); 21111cb0ef41Sopenharmony_ci } 21121cb0ef41Sopenharmony_ci} 21131cb0ef41Sopenharmony_ci 21141cb0ef41Sopenharmony_ci// Emit the code to check for a ^ in multiline mode (1-character lookbehind 21151cb0ef41Sopenharmony_ci// that matches newline or the start of input). 21161cb0ef41Sopenharmony_civoid EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace) { 21171cb0ef41Sopenharmony_ci RegExpMacroAssembler* assembler = compiler->macro_assembler(); 21181cb0ef41Sopenharmony_ci 21191cb0ef41Sopenharmony_ci // We will load the previous character into the current character register. 21201cb0ef41Sopenharmony_ci Trace new_trace(*trace); 21211cb0ef41Sopenharmony_ci new_trace.InvalidateCurrentCharacter(); 21221cb0ef41Sopenharmony_ci 21231cb0ef41Sopenharmony_ci // A positive (> 0) cp_offset means we've already successfully matched a 21241cb0ef41Sopenharmony_ci // non-empty-width part of the pattern, and thus cannot be at or before the 21251cb0ef41Sopenharmony_ci // start of the subject string. We can thus skip both at-start and 21261cb0ef41Sopenharmony_ci // bounds-checks when loading the one-character lookbehind. 21271cb0ef41Sopenharmony_ci const bool may_be_at_or_before_subject_string_start = 21281cb0ef41Sopenharmony_ci new_trace.cp_offset() <= 0; 21291cb0ef41Sopenharmony_ci 21301cb0ef41Sopenharmony_ci Label ok; 21311cb0ef41Sopenharmony_ci if (may_be_at_or_before_subject_string_start) { 21321cb0ef41Sopenharmony_ci // The start of input counts as a newline in this context, so skip to ok if 21331cb0ef41Sopenharmony_ci // we are at the start. 21341cb0ef41Sopenharmony_ci assembler->CheckAtStart(new_trace.cp_offset(), &ok); 21351cb0ef41Sopenharmony_ci } 21361cb0ef41Sopenharmony_ci 21371cb0ef41Sopenharmony_ci // If we've already checked that we are not at the start of input, it's okay 21381cb0ef41Sopenharmony_ci // to load the previous character without bounds checks. 21391cb0ef41Sopenharmony_ci const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start; 21401cb0ef41Sopenharmony_ci assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, 21411cb0ef41Sopenharmony_ci new_trace.backtrack(), can_skip_bounds_check); 21421cb0ef41Sopenharmony_ci if (!assembler->CheckSpecialCharacterClass( 21431cb0ef41Sopenharmony_ci StandardCharacterSet::kLineTerminator, new_trace.backtrack())) { 21441cb0ef41Sopenharmony_ci // Newline means \n, \r, 0x2028 or 0x2029. 21451cb0ef41Sopenharmony_ci if (!compiler->one_byte()) { 21461cb0ef41Sopenharmony_ci assembler->CheckCharacterAfterAnd(0x2028, 0xFFFE, &ok); 21471cb0ef41Sopenharmony_ci } 21481cb0ef41Sopenharmony_ci assembler->CheckCharacter('\n', &ok); 21491cb0ef41Sopenharmony_ci assembler->CheckNotCharacter('\r', new_trace.backtrack()); 21501cb0ef41Sopenharmony_ci } 21511cb0ef41Sopenharmony_ci assembler->Bind(&ok); 21521cb0ef41Sopenharmony_ci on_success->Emit(compiler, &new_trace); 21531cb0ef41Sopenharmony_ci} 21541cb0ef41Sopenharmony_ci 21551cb0ef41Sopenharmony_ci} // namespace 21561cb0ef41Sopenharmony_ci 21571cb0ef41Sopenharmony_ci// Emit the code to handle \b and \B (word-boundary or non-word-boundary). 21581cb0ef41Sopenharmony_civoid AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { 21591cb0ef41Sopenharmony_ci RegExpMacroAssembler* assembler = compiler->macro_assembler(); 21601cb0ef41Sopenharmony_ci Isolate* isolate = assembler->isolate(); 21611cb0ef41Sopenharmony_ci Trace::TriBool next_is_word_character = Trace::UNKNOWN; 21621cb0ef41Sopenharmony_ci bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); 21631cb0ef41Sopenharmony_ci BoyerMooreLookahead* lookahead = bm_info(not_at_start); 21641cb0ef41Sopenharmony_ci if (lookahead == nullptr) { 21651cb0ef41Sopenharmony_ci int eats_at_least = 21661cb0ef41Sopenharmony_ci std::min(kMaxLookaheadForBoyerMoore, EatsAtLeast(not_at_start)); 21671cb0ef41Sopenharmony_ci if (eats_at_least >= 1) { 21681cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm = 21691cb0ef41Sopenharmony_ci zone()->New<BoyerMooreLookahead>(eats_at_least, compiler, zone()); 21701cb0ef41Sopenharmony_ci FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start); 21711cb0ef41Sopenharmony_ci if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE_VALUE; 21721cb0ef41Sopenharmony_ci if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; 21731cb0ef41Sopenharmony_ci } 21741cb0ef41Sopenharmony_ci } else { 21751cb0ef41Sopenharmony_ci if (lookahead->at(0)->is_non_word()) 21761cb0ef41Sopenharmony_ci next_is_word_character = Trace::FALSE_VALUE; 21771cb0ef41Sopenharmony_ci if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; 21781cb0ef41Sopenharmony_ci } 21791cb0ef41Sopenharmony_ci bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY); 21801cb0ef41Sopenharmony_ci if (next_is_word_character == Trace::UNKNOWN) { 21811cb0ef41Sopenharmony_ci Label before_non_word; 21821cb0ef41Sopenharmony_ci Label before_word; 21831cb0ef41Sopenharmony_ci if (trace->characters_preloaded() != 1) { 21841cb0ef41Sopenharmony_ci assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); 21851cb0ef41Sopenharmony_ci } 21861cb0ef41Sopenharmony_ci // Fall through on non-word. 21871cb0ef41Sopenharmony_ci EmitWordCheck(assembler, &before_word, &before_non_word, false); 21881cb0ef41Sopenharmony_ci // Next character is not a word character. 21891cb0ef41Sopenharmony_ci assembler->Bind(&before_non_word); 21901cb0ef41Sopenharmony_ci Label ok; 21911cb0ef41Sopenharmony_ci BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); 21921cb0ef41Sopenharmony_ci assembler->GoTo(&ok); 21931cb0ef41Sopenharmony_ci 21941cb0ef41Sopenharmony_ci assembler->Bind(&before_word); 21951cb0ef41Sopenharmony_ci BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); 21961cb0ef41Sopenharmony_ci assembler->Bind(&ok); 21971cb0ef41Sopenharmony_ci } else if (next_is_word_character == Trace::TRUE_VALUE) { 21981cb0ef41Sopenharmony_ci BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); 21991cb0ef41Sopenharmony_ci } else { 22001cb0ef41Sopenharmony_ci DCHECK(next_is_word_character == Trace::FALSE_VALUE); 22011cb0ef41Sopenharmony_ci BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); 22021cb0ef41Sopenharmony_ci } 22031cb0ef41Sopenharmony_ci} 22041cb0ef41Sopenharmony_ci 22051cb0ef41Sopenharmony_civoid AssertionNode::BacktrackIfPrevious( 22061cb0ef41Sopenharmony_ci RegExpCompiler* compiler, Trace* trace, 22071cb0ef41Sopenharmony_ci AssertionNode::IfPrevious backtrack_if_previous) { 22081cb0ef41Sopenharmony_ci RegExpMacroAssembler* assembler = compiler->macro_assembler(); 22091cb0ef41Sopenharmony_ci Trace new_trace(*trace); 22101cb0ef41Sopenharmony_ci new_trace.InvalidateCurrentCharacter(); 22111cb0ef41Sopenharmony_ci 22121cb0ef41Sopenharmony_ci Label fall_through; 22131cb0ef41Sopenharmony_ci Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.backtrack() 22141cb0ef41Sopenharmony_ci : &fall_through; 22151cb0ef41Sopenharmony_ci Label* word = backtrack_if_previous == kIsNonWord ? &fall_through 22161cb0ef41Sopenharmony_ci : new_trace.backtrack(); 22171cb0ef41Sopenharmony_ci 22181cb0ef41Sopenharmony_ci // A positive (> 0) cp_offset means we've already successfully matched a 22191cb0ef41Sopenharmony_ci // non-empty-width part of the pattern, and thus cannot be at or before the 22201cb0ef41Sopenharmony_ci // start of the subject string. We can thus skip both at-start and 22211cb0ef41Sopenharmony_ci // bounds-checks when loading the one-character lookbehind. 22221cb0ef41Sopenharmony_ci const bool may_be_at_or_before_subject_string_start = 22231cb0ef41Sopenharmony_ci new_trace.cp_offset() <= 0; 22241cb0ef41Sopenharmony_ci 22251cb0ef41Sopenharmony_ci if (may_be_at_or_before_subject_string_start) { 22261cb0ef41Sopenharmony_ci // The start of input counts as a non-word character, so the question is 22271cb0ef41Sopenharmony_ci // decided if we are at the start. 22281cb0ef41Sopenharmony_ci assembler->CheckAtStart(new_trace.cp_offset(), non_word); 22291cb0ef41Sopenharmony_ci } 22301cb0ef41Sopenharmony_ci 22311cb0ef41Sopenharmony_ci // If we've already checked that we are not at the start of input, it's okay 22321cb0ef41Sopenharmony_ci // to load the previous character without bounds checks. 22331cb0ef41Sopenharmony_ci const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start; 22341cb0ef41Sopenharmony_ci assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, non_word, 22351cb0ef41Sopenharmony_ci can_skip_bounds_check); 22361cb0ef41Sopenharmony_ci EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); 22371cb0ef41Sopenharmony_ci 22381cb0ef41Sopenharmony_ci assembler->Bind(&fall_through); 22391cb0ef41Sopenharmony_ci on_success()->Emit(compiler, &new_trace); 22401cb0ef41Sopenharmony_ci} 22411cb0ef41Sopenharmony_ci 22421cb0ef41Sopenharmony_civoid AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, 22431cb0ef41Sopenharmony_ci RegExpCompiler* compiler, 22441cb0ef41Sopenharmony_ci int filled_in, bool not_at_start) { 22451cb0ef41Sopenharmony_ci if (assertion_type_ == AT_START && not_at_start) { 22461cb0ef41Sopenharmony_ci details->set_cannot_match(); 22471cb0ef41Sopenharmony_ci return; 22481cb0ef41Sopenharmony_ci } 22491cb0ef41Sopenharmony_ci return on_success()->GetQuickCheckDetails(details, compiler, filled_in, 22501cb0ef41Sopenharmony_ci not_at_start); 22511cb0ef41Sopenharmony_ci} 22521cb0ef41Sopenharmony_ci 22531cb0ef41Sopenharmony_civoid AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { 22541cb0ef41Sopenharmony_ci RegExpMacroAssembler* assembler = compiler->macro_assembler(); 22551cb0ef41Sopenharmony_ci switch (assertion_type_) { 22561cb0ef41Sopenharmony_ci case AT_END: { 22571cb0ef41Sopenharmony_ci Label ok; 22581cb0ef41Sopenharmony_ci assembler->CheckPosition(trace->cp_offset(), &ok); 22591cb0ef41Sopenharmony_ci assembler->GoTo(trace->backtrack()); 22601cb0ef41Sopenharmony_ci assembler->Bind(&ok); 22611cb0ef41Sopenharmony_ci break; 22621cb0ef41Sopenharmony_ci } 22631cb0ef41Sopenharmony_ci case AT_START: { 22641cb0ef41Sopenharmony_ci if (trace->at_start() == Trace::FALSE_VALUE) { 22651cb0ef41Sopenharmony_ci assembler->GoTo(trace->backtrack()); 22661cb0ef41Sopenharmony_ci return; 22671cb0ef41Sopenharmony_ci } 22681cb0ef41Sopenharmony_ci if (trace->at_start() == Trace::UNKNOWN) { 22691cb0ef41Sopenharmony_ci assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack()); 22701cb0ef41Sopenharmony_ci Trace at_start_trace = *trace; 22711cb0ef41Sopenharmony_ci at_start_trace.set_at_start(Trace::TRUE_VALUE); 22721cb0ef41Sopenharmony_ci on_success()->Emit(compiler, &at_start_trace); 22731cb0ef41Sopenharmony_ci return; 22741cb0ef41Sopenharmony_ci } 22751cb0ef41Sopenharmony_ci } break; 22761cb0ef41Sopenharmony_ci case AFTER_NEWLINE: 22771cb0ef41Sopenharmony_ci EmitHat(compiler, on_success(), trace); 22781cb0ef41Sopenharmony_ci return; 22791cb0ef41Sopenharmony_ci case AT_BOUNDARY: 22801cb0ef41Sopenharmony_ci case AT_NON_BOUNDARY: { 22811cb0ef41Sopenharmony_ci EmitBoundaryCheck(compiler, trace); 22821cb0ef41Sopenharmony_ci return; 22831cb0ef41Sopenharmony_ci } 22841cb0ef41Sopenharmony_ci } 22851cb0ef41Sopenharmony_ci on_success()->Emit(compiler, trace); 22861cb0ef41Sopenharmony_ci} 22871cb0ef41Sopenharmony_ci 22881cb0ef41Sopenharmony_cinamespace { 22891cb0ef41Sopenharmony_ci 22901cb0ef41Sopenharmony_cibool DeterminedAlready(QuickCheckDetails* quick_check, int offset) { 22911cb0ef41Sopenharmony_ci if (quick_check == nullptr) return false; 22921cb0ef41Sopenharmony_ci if (offset >= quick_check->characters()) return false; 22931cb0ef41Sopenharmony_ci return quick_check->positions(offset)->determines_perfectly; 22941cb0ef41Sopenharmony_ci} 22951cb0ef41Sopenharmony_ci 22961cb0ef41Sopenharmony_civoid UpdateBoundsCheck(int index, int* checked_up_to) { 22971cb0ef41Sopenharmony_ci if (index > *checked_up_to) { 22981cb0ef41Sopenharmony_ci *checked_up_to = index; 22991cb0ef41Sopenharmony_ci } 23001cb0ef41Sopenharmony_ci} 23011cb0ef41Sopenharmony_ci 23021cb0ef41Sopenharmony_ci} // namespace 23031cb0ef41Sopenharmony_ci 23041cb0ef41Sopenharmony_ci// We call this repeatedly to generate code for each pass over the text node. 23051cb0ef41Sopenharmony_ci// The passes are in increasing order of difficulty because we hope one 23061cb0ef41Sopenharmony_ci// of the first passes will fail in which case we are saved the work of the 23071cb0ef41Sopenharmony_ci// later passes. for example for the case independent regexp /%[asdfghjkl]a/ 23081cb0ef41Sopenharmony_ci// we will check the '%' in the first pass, the case independent 'a' in the 23091cb0ef41Sopenharmony_ci// second pass and the character class in the last pass. 23101cb0ef41Sopenharmony_ci// 23111cb0ef41Sopenharmony_ci// The passes are done from right to left, so for example to test for /bar/ 23121cb0ef41Sopenharmony_ci// we will first test for an 'r' with offset 2, then an 'a' with offset 1 23131cb0ef41Sopenharmony_ci// and then a 'b' with offset 0. This means we can avoid the end-of-input 23141cb0ef41Sopenharmony_ci// bounds check most of the time. In the example we only need to check for 23151cb0ef41Sopenharmony_ci// end-of-input when loading the putative 'r'. 23161cb0ef41Sopenharmony_ci// 23171cb0ef41Sopenharmony_ci// A slight complication involves the fact that the first character may already 23181cb0ef41Sopenharmony_ci// be fetched into a register by the previous node. In this case we want to 23191cb0ef41Sopenharmony_ci// do the test for that character first. We do this in separate passes. The 23201cb0ef41Sopenharmony_ci// 'preloaded' argument indicates that we are doing such a 'pass'. If such a 23211cb0ef41Sopenharmony_ci// pass has been performed then subsequent passes will have true in 23221cb0ef41Sopenharmony_ci// first_element_checked to indicate that that character does not need to be 23231cb0ef41Sopenharmony_ci// checked again. 23241cb0ef41Sopenharmony_ci// 23251cb0ef41Sopenharmony_ci// In addition to all this we are passed a Trace, which can 23261cb0ef41Sopenharmony_ci// contain an AlternativeGeneration object. In this AlternativeGeneration 23271cb0ef41Sopenharmony_ci// object we can see details of any quick check that was already passed in 23281cb0ef41Sopenharmony_ci// order to get to the code we are now generating. The quick check can involve 23291cb0ef41Sopenharmony_ci// loading characters, which means we do not need to recheck the bounds 23301cb0ef41Sopenharmony_ci// up to the limit the quick check already checked. In addition the quick 23311cb0ef41Sopenharmony_ci// check can have involved a mask and compare operation which may simplify 23321cb0ef41Sopenharmony_ci// or obviate the need for further checks at some character positions. 23331cb0ef41Sopenharmony_civoid TextNode::TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass, 23341cb0ef41Sopenharmony_ci bool preloaded, Trace* trace, 23351cb0ef41Sopenharmony_ci bool first_element_checked, int* checked_up_to) { 23361cb0ef41Sopenharmony_ci RegExpMacroAssembler* assembler = compiler->macro_assembler(); 23371cb0ef41Sopenharmony_ci Isolate* isolate = assembler->isolate(); 23381cb0ef41Sopenharmony_ci bool one_byte = compiler->one_byte(); 23391cb0ef41Sopenharmony_ci Label* backtrack = trace->backtrack(); 23401cb0ef41Sopenharmony_ci QuickCheckDetails* quick_check = trace->quick_check_performed(); 23411cb0ef41Sopenharmony_ci int element_count = elements()->length(); 23421cb0ef41Sopenharmony_ci int backward_offset = read_backward() ? -Length() : 0; 23431cb0ef41Sopenharmony_ci for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { 23441cb0ef41Sopenharmony_ci TextElement elm = elements()->at(i); 23451cb0ef41Sopenharmony_ci int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset; 23461cb0ef41Sopenharmony_ci if (elm.text_type() == TextElement::ATOM) { 23471cb0ef41Sopenharmony_ci if (SkipPass(pass, IsIgnoreCase(compiler->flags()))) continue; 23481cb0ef41Sopenharmony_ci base::Vector<const base::uc16> quarks = elm.atom()->data(); 23491cb0ef41Sopenharmony_ci for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { 23501cb0ef41Sopenharmony_ci if (first_element_checked && i == 0 && j == 0) continue; 23511cb0ef41Sopenharmony_ci if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; 23521cb0ef41Sopenharmony_ci base::uc16 quark = quarks[j]; 23531cb0ef41Sopenharmony_ci if (IsIgnoreCase(compiler->flags())) { 23541cb0ef41Sopenharmony_ci // Everywhere else we assume that a non-Latin-1 character cannot match 23551cb0ef41Sopenharmony_ci // a Latin-1 character. Avoid the cases where this is assumption is 23561cb0ef41Sopenharmony_ci // invalid by using the Latin1 equivalent instead. 23571cb0ef41Sopenharmony_ci quark = unibrow::Latin1::TryConvertToLatin1(quark); 23581cb0ef41Sopenharmony_ci } 23591cb0ef41Sopenharmony_ci bool needs_bounds_check = 23601cb0ef41Sopenharmony_ci *checked_up_to < cp_offset + j || read_backward(); 23611cb0ef41Sopenharmony_ci bool bounds_checked = false; 23621cb0ef41Sopenharmony_ci switch (pass) { 23631cb0ef41Sopenharmony_ci case NON_LATIN1_MATCH: 23641cb0ef41Sopenharmony_ci DCHECK(one_byte); 23651cb0ef41Sopenharmony_ci if (quark > String::kMaxOneByteCharCode) { 23661cb0ef41Sopenharmony_ci assembler->GoTo(backtrack); 23671cb0ef41Sopenharmony_ci return; 23681cb0ef41Sopenharmony_ci } 23691cb0ef41Sopenharmony_ci break; 23701cb0ef41Sopenharmony_ci case NON_LETTER_CHARACTER_MATCH: 23711cb0ef41Sopenharmony_ci bounds_checked = 23721cb0ef41Sopenharmony_ci EmitAtomNonLetter(isolate, compiler, quark, backtrack, 23731cb0ef41Sopenharmony_ci cp_offset + j, needs_bounds_check, preloaded); 23741cb0ef41Sopenharmony_ci break; 23751cb0ef41Sopenharmony_ci case SIMPLE_CHARACTER_MATCH: 23761cb0ef41Sopenharmony_ci bounds_checked = EmitSimpleCharacter(isolate, compiler, quark, 23771cb0ef41Sopenharmony_ci backtrack, cp_offset + j, 23781cb0ef41Sopenharmony_ci needs_bounds_check, preloaded); 23791cb0ef41Sopenharmony_ci break; 23801cb0ef41Sopenharmony_ci case CASE_CHARACTER_MATCH: 23811cb0ef41Sopenharmony_ci bounds_checked = 23821cb0ef41Sopenharmony_ci EmitAtomLetter(isolate, compiler, quark, backtrack, 23831cb0ef41Sopenharmony_ci cp_offset + j, needs_bounds_check, preloaded); 23841cb0ef41Sopenharmony_ci break; 23851cb0ef41Sopenharmony_ci default: 23861cb0ef41Sopenharmony_ci break; 23871cb0ef41Sopenharmony_ci } 23881cb0ef41Sopenharmony_ci if (bounds_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); 23891cb0ef41Sopenharmony_ci } 23901cb0ef41Sopenharmony_ci } else { 23911cb0ef41Sopenharmony_ci DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type()); 23921cb0ef41Sopenharmony_ci if (pass == CHARACTER_CLASS_MATCH) { 23931cb0ef41Sopenharmony_ci if (first_element_checked && i == 0) continue; 23941cb0ef41Sopenharmony_ci if (DeterminedAlready(quick_check, elm.cp_offset())) continue; 23951cb0ef41Sopenharmony_ci RegExpCharacterClass* cc = elm.char_class(); 23961cb0ef41Sopenharmony_ci bool bounds_check = *checked_up_to < cp_offset || read_backward(); 23971cb0ef41Sopenharmony_ci EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, 23981cb0ef41Sopenharmony_ci bounds_check, preloaded, zone()); 23991cb0ef41Sopenharmony_ci UpdateBoundsCheck(cp_offset, checked_up_to); 24001cb0ef41Sopenharmony_ci } 24011cb0ef41Sopenharmony_ci } 24021cb0ef41Sopenharmony_ci } 24031cb0ef41Sopenharmony_ci} 24041cb0ef41Sopenharmony_ci 24051cb0ef41Sopenharmony_ciint TextNode::Length() { 24061cb0ef41Sopenharmony_ci TextElement elm = elements()->last(); 24071cb0ef41Sopenharmony_ci DCHECK_LE(0, elm.cp_offset()); 24081cb0ef41Sopenharmony_ci return elm.cp_offset() + elm.length(); 24091cb0ef41Sopenharmony_ci} 24101cb0ef41Sopenharmony_ci 24111cb0ef41Sopenharmony_cibool TextNode::SkipPass(TextEmitPassType pass, bool ignore_case) { 24121cb0ef41Sopenharmony_ci if (ignore_case) { 24131cb0ef41Sopenharmony_ci return pass == SIMPLE_CHARACTER_MATCH; 24141cb0ef41Sopenharmony_ci } else { 24151cb0ef41Sopenharmony_ci return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; 24161cb0ef41Sopenharmony_ci } 24171cb0ef41Sopenharmony_ci} 24181cb0ef41Sopenharmony_ci 24191cb0ef41Sopenharmony_ciTextNode* TextNode::CreateForCharacterRanges(Zone* zone, 24201cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* ranges, 24211cb0ef41Sopenharmony_ci bool read_backward, 24221cb0ef41Sopenharmony_ci RegExpNode* on_success) { 24231cb0ef41Sopenharmony_ci DCHECK_NOT_NULL(ranges); 24241cb0ef41Sopenharmony_ci // TODO(jgruber): There's no fundamental need to create this 24251cb0ef41Sopenharmony_ci // RegExpCharacterClass; we could refactor to avoid the allocation. 24261cb0ef41Sopenharmony_ci return zone->New<TextNode>(zone->New<RegExpCharacterClass>(zone, ranges), 24271cb0ef41Sopenharmony_ci read_backward, on_success); 24281cb0ef41Sopenharmony_ci} 24291cb0ef41Sopenharmony_ci 24301cb0ef41Sopenharmony_ciTextNode* TextNode::CreateForSurrogatePair( 24311cb0ef41Sopenharmony_ci Zone* zone, CharacterRange lead, ZoneList<CharacterRange>* trail_ranges, 24321cb0ef41Sopenharmony_ci bool read_backward, RegExpNode* on_success) { 24331cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead); 24341cb0ef41Sopenharmony_ci ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(2, zone); 24351cb0ef41Sopenharmony_ci elms->Add(TextElement::CharClass( 24361cb0ef41Sopenharmony_ci zone->New<RegExpCharacterClass>(zone, lead_ranges)), 24371cb0ef41Sopenharmony_ci zone); 24381cb0ef41Sopenharmony_ci elms->Add(TextElement::CharClass( 24391cb0ef41Sopenharmony_ci zone->New<RegExpCharacterClass>(zone, trail_ranges)), 24401cb0ef41Sopenharmony_ci zone); 24411cb0ef41Sopenharmony_ci return zone->New<TextNode>(elms, read_backward, on_success); 24421cb0ef41Sopenharmony_ci} 24431cb0ef41Sopenharmony_ci 24441cb0ef41Sopenharmony_ciTextNode* TextNode::CreateForSurrogatePair( 24451cb0ef41Sopenharmony_ci Zone* zone, ZoneList<CharacterRange>* lead_ranges, CharacterRange trail, 24461cb0ef41Sopenharmony_ci bool read_backward, RegExpNode* on_success) { 24471cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail); 24481cb0ef41Sopenharmony_ci ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(2, zone); 24491cb0ef41Sopenharmony_ci elms->Add(TextElement::CharClass( 24501cb0ef41Sopenharmony_ci zone->New<RegExpCharacterClass>(zone, lead_ranges)), 24511cb0ef41Sopenharmony_ci zone); 24521cb0ef41Sopenharmony_ci elms->Add(TextElement::CharClass( 24531cb0ef41Sopenharmony_ci zone->New<RegExpCharacterClass>(zone, trail_ranges)), 24541cb0ef41Sopenharmony_ci zone); 24551cb0ef41Sopenharmony_ci return zone->New<TextNode>(elms, read_backward, on_success); 24561cb0ef41Sopenharmony_ci} 24571cb0ef41Sopenharmony_ci 24581cb0ef41Sopenharmony_ci// This generates the code to match a text node. A text node can contain 24591cb0ef41Sopenharmony_ci// straight character sequences (possibly to be matched in a case-independent 24601cb0ef41Sopenharmony_ci// way) and character classes. For efficiency we do not do this in a single 24611cb0ef41Sopenharmony_ci// pass from left to right. Instead we pass over the text node several times, 24621cb0ef41Sopenharmony_ci// emitting code for some character positions every time. See the comment on 24631cb0ef41Sopenharmony_ci// TextEmitPass for details. 24641cb0ef41Sopenharmony_civoid TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { 24651cb0ef41Sopenharmony_ci LimitResult limit_result = LimitVersions(compiler, trace); 24661cb0ef41Sopenharmony_ci if (limit_result == DONE) return; 24671cb0ef41Sopenharmony_ci DCHECK(limit_result == CONTINUE); 24681cb0ef41Sopenharmony_ci 24691cb0ef41Sopenharmony_ci if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) { 24701cb0ef41Sopenharmony_ci compiler->SetRegExpTooBig(); 24711cb0ef41Sopenharmony_ci return; 24721cb0ef41Sopenharmony_ci } 24731cb0ef41Sopenharmony_ci 24741cb0ef41Sopenharmony_ci if (compiler->one_byte()) { 24751cb0ef41Sopenharmony_ci int dummy = 0; 24761cb0ef41Sopenharmony_ci TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy); 24771cb0ef41Sopenharmony_ci } 24781cb0ef41Sopenharmony_ci 24791cb0ef41Sopenharmony_ci bool first_elt_done = false; 24801cb0ef41Sopenharmony_ci int bound_checked_to = trace->cp_offset() - 1; 24811cb0ef41Sopenharmony_ci bound_checked_to += trace->bound_checked_up_to(); 24821cb0ef41Sopenharmony_ci 24831cb0ef41Sopenharmony_ci // If a character is preloaded into the current character register then 24841cb0ef41Sopenharmony_ci // check that now. 24851cb0ef41Sopenharmony_ci if (trace->characters_preloaded() == 1) { 24861cb0ef41Sopenharmony_ci for (int pass = kFirstRealPass; pass <= kLastPass; pass++) { 24871cb0ef41Sopenharmony_ci TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), true, trace, 24881cb0ef41Sopenharmony_ci false, &bound_checked_to); 24891cb0ef41Sopenharmony_ci } 24901cb0ef41Sopenharmony_ci first_elt_done = true; 24911cb0ef41Sopenharmony_ci } 24921cb0ef41Sopenharmony_ci 24931cb0ef41Sopenharmony_ci for (int pass = kFirstRealPass; pass <= kLastPass; pass++) { 24941cb0ef41Sopenharmony_ci TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), false, trace, 24951cb0ef41Sopenharmony_ci first_elt_done, &bound_checked_to); 24961cb0ef41Sopenharmony_ci } 24971cb0ef41Sopenharmony_ci 24981cb0ef41Sopenharmony_ci Trace successor_trace(*trace); 24991cb0ef41Sopenharmony_ci // If we advance backward, we may end up at the start. 25001cb0ef41Sopenharmony_ci successor_trace.AdvanceCurrentPositionInTrace( 25011cb0ef41Sopenharmony_ci read_backward() ? -Length() : Length(), compiler); 25021cb0ef41Sopenharmony_ci successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN 25031cb0ef41Sopenharmony_ci : Trace::FALSE_VALUE); 25041cb0ef41Sopenharmony_ci RecursionCheck rc(compiler); 25051cb0ef41Sopenharmony_ci on_success()->Emit(compiler, &successor_trace); 25061cb0ef41Sopenharmony_ci} 25071cb0ef41Sopenharmony_ci 25081cb0ef41Sopenharmony_civoid Trace::InvalidateCurrentCharacter() { characters_preloaded_ = 0; } 25091cb0ef41Sopenharmony_ci 25101cb0ef41Sopenharmony_civoid Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { 25111cb0ef41Sopenharmony_ci // We don't have an instruction for shifting the current character register 25121cb0ef41Sopenharmony_ci // down or for using a shifted value for anything so lets just forget that 25131cb0ef41Sopenharmony_ci // we preloaded any characters into it. 25141cb0ef41Sopenharmony_ci characters_preloaded_ = 0; 25151cb0ef41Sopenharmony_ci // Adjust the offsets of the quick check performed information. This 25161cb0ef41Sopenharmony_ci // information is used to find out what we already determined about the 25171cb0ef41Sopenharmony_ci // characters by means of mask and compare. 25181cb0ef41Sopenharmony_ci quick_check_performed_.Advance(by, compiler->one_byte()); 25191cb0ef41Sopenharmony_ci cp_offset_ += by; 25201cb0ef41Sopenharmony_ci if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { 25211cb0ef41Sopenharmony_ci compiler->SetRegExpTooBig(); 25221cb0ef41Sopenharmony_ci cp_offset_ = 0; 25231cb0ef41Sopenharmony_ci } 25241cb0ef41Sopenharmony_ci bound_checked_up_to_ = std::max(0, bound_checked_up_to_ - by); 25251cb0ef41Sopenharmony_ci} 25261cb0ef41Sopenharmony_ci 25271cb0ef41Sopenharmony_civoid TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte, 25281cb0ef41Sopenharmony_ci RegExpFlags flags) { 25291cb0ef41Sopenharmony_ci if (!IsIgnoreCase(flags)) return; 25301cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT 25311cb0ef41Sopenharmony_ci if (NeedsUnicodeCaseEquivalents(flags)) return; 25321cb0ef41Sopenharmony_ci#endif 25331cb0ef41Sopenharmony_ci 25341cb0ef41Sopenharmony_ci int element_count = elements()->length(); 25351cb0ef41Sopenharmony_ci for (int i = 0; i < element_count; i++) { 25361cb0ef41Sopenharmony_ci TextElement elm = elements()->at(i); 25371cb0ef41Sopenharmony_ci if (elm.text_type() == TextElement::CHAR_CLASS) { 25381cb0ef41Sopenharmony_ci RegExpCharacterClass* cc = elm.char_class(); 25391cb0ef41Sopenharmony_ci // None of the standard character classes is different in the case 25401cb0ef41Sopenharmony_ci // independent case and it slows us down if we don't know that. 25411cb0ef41Sopenharmony_ci if (cc->is_standard(zone())) continue; 25421cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* ranges = cc->ranges(zone()); 25431cb0ef41Sopenharmony_ci CharacterRange::AddCaseEquivalents(isolate, zone(), ranges, is_one_byte); 25441cb0ef41Sopenharmony_ci } 25451cb0ef41Sopenharmony_ci } 25461cb0ef41Sopenharmony_ci} 25471cb0ef41Sopenharmony_ci 25481cb0ef41Sopenharmony_ciint TextNode::GreedyLoopTextLength() { return Length(); } 25491cb0ef41Sopenharmony_ci 25501cb0ef41Sopenharmony_ciRegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( 25511cb0ef41Sopenharmony_ci RegExpCompiler* compiler) { 25521cb0ef41Sopenharmony_ci if (read_backward()) return nullptr; 25531cb0ef41Sopenharmony_ci if (elements()->length() != 1) return nullptr; 25541cb0ef41Sopenharmony_ci TextElement elm = elements()->at(0); 25551cb0ef41Sopenharmony_ci if (elm.text_type() != TextElement::CHAR_CLASS) return nullptr; 25561cb0ef41Sopenharmony_ci RegExpCharacterClass* node = elm.char_class(); 25571cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* ranges = node->ranges(zone()); 25581cb0ef41Sopenharmony_ci CharacterRange::Canonicalize(ranges); 25591cb0ef41Sopenharmony_ci if (node->is_negated()) { 25601cb0ef41Sopenharmony_ci return ranges->length() == 0 ? on_success() : nullptr; 25611cb0ef41Sopenharmony_ci } 25621cb0ef41Sopenharmony_ci if (ranges->length() != 1) return nullptr; 25631cb0ef41Sopenharmony_ci const base::uc32 max_char = MaxCodeUnit(compiler->one_byte()); 25641cb0ef41Sopenharmony_ci return ranges->at(0).IsEverything(max_char) ? on_success() : nullptr; 25651cb0ef41Sopenharmony_ci} 25661cb0ef41Sopenharmony_ci 25671cb0ef41Sopenharmony_ci// Finds the fixed match length of a sequence of nodes that goes from 25681cb0ef41Sopenharmony_ci// this alternative and back to this choice node. If there are variable 25691cb0ef41Sopenharmony_ci// length nodes or other complications in the way then return a sentinel 25701cb0ef41Sopenharmony_ci// value indicating that a greedy loop cannot be constructed. 25711cb0ef41Sopenharmony_ciint ChoiceNode::GreedyLoopTextLengthForAlternative( 25721cb0ef41Sopenharmony_ci GuardedAlternative* alternative) { 25731cb0ef41Sopenharmony_ci int length = 0; 25741cb0ef41Sopenharmony_ci RegExpNode* node = alternative->node(); 25751cb0ef41Sopenharmony_ci // Later we will generate code for all these text nodes using recursion 25761cb0ef41Sopenharmony_ci // so we have to limit the max number. 25771cb0ef41Sopenharmony_ci int recursion_depth = 0; 25781cb0ef41Sopenharmony_ci while (node != this) { 25791cb0ef41Sopenharmony_ci if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { 25801cb0ef41Sopenharmony_ci return kNodeIsTooComplexForGreedyLoops; 25811cb0ef41Sopenharmony_ci } 25821cb0ef41Sopenharmony_ci int node_length = node->GreedyLoopTextLength(); 25831cb0ef41Sopenharmony_ci if (node_length == kNodeIsTooComplexForGreedyLoops) { 25841cb0ef41Sopenharmony_ci return kNodeIsTooComplexForGreedyLoops; 25851cb0ef41Sopenharmony_ci } 25861cb0ef41Sopenharmony_ci length += node_length; 25871cb0ef41Sopenharmony_ci SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); 25881cb0ef41Sopenharmony_ci node = seq_node->on_success(); 25891cb0ef41Sopenharmony_ci } 25901cb0ef41Sopenharmony_ci if (read_backward()) { 25911cb0ef41Sopenharmony_ci length = -length; 25921cb0ef41Sopenharmony_ci } 25931cb0ef41Sopenharmony_ci // Check that we can jump by the whole text length. If not, return sentinel 25941cb0ef41Sopenharmony_ci // to indicate the we can't construct a greedy loop. 25951cb0ef41Sopenharmony_ci if (length < RegExpMacroAssembler::kMinCPOffset || 25961cb0ef41Sopenharmony_ci length > RegExpMacroAssembler::kMaxCPOffset) { 25971cb0ef41Sopenharmony_ci return kNodeIsTooComplexForGreedyLoops; 25981cb0ef41Sopenharmony_ci } 25991cb0ef41Sopenharmony_ci return length; 26001cb0ef41Sopenharmony_ci} 26011cb0ef41Sopenharmony_ci 26021cb0ef41Sopenharmony_civoid LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) { 26031cb0ef41Sopenharmony_ci DCHECK_NULL(loop_node_); 26041cb0ef41Sopenharmony_ci AddAlternative(alt); 26051cb0ef41Sopenharmony_ci loop_node_ = alt.node(); 26061cb0ef41Sopenharmony_ci} 26071cb0ef41Sopenharmony_ci 26081cb0ef41Sopenharmony_civoid LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { 26091cb0ef41Sopenharmony_ci DCHECK_NULL(continue_node_); 26101cb0ef41Sopenharmony_ci AddAlternative(alt); 26111cb0ef41Sopenharmony_ci continue_node_ = alt.node(); 26121cb0ef41Sopenharmony_ci} 26131cb0ef41Sopenharmony_ci 26141cb0ef41Sopenharmony_civoid LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 26151cb0ef41Sopenharmony_ci RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 26161cb0ef41Sopenharmony_ci if (trace->stop_node() == this) { 26171cb0ef41Sopenharmony_ci // Back edge of greedy optimized loop node graph. 26181cb0ef41Sopenharmony_ci int text_length = 26191cb0ef41Sopenharmony_ci GreedyLoopTextLengthForAlternative(&(alternatives_->at(0))); 26201cb0ef41Sopenharmony_ci DCHECK_NE(kNodeIsTooComplexForGreedyLoops, text_length); 26211cb0ef41Sopenharmony_ci // Update the counter-based backtracking info on the stack. This is an 26221cb0ef41Sopenharmony_ci // optimization for greedy loops (see below). 26231cb0ef41Sopenharmony_ci DCHECK(trace->cp_offset() == text_length); 26241cb0ef41Sopenharmony_ci macro_assembler->AdvanceCurrentPosition(text_length); 26251cb0ef41Sopenharmony_ci macro_assembler->GoTo(trace->loop_label()); 26261cb0ef41Sopenharmony_ci return; 26271cb0ef41Sopenharmony_ci } 26281cb0ef41Sopenharmony_ci DCHECK_NULL(trace->stop_node()); 26291cb0ef41Sopenharmony_ci if (!trace->is_trivial()) { 26301cb0ef41Sopenharmony_ci trace->Flush(compiler, this); 26311cb0ef41Sopenharmony_ci return; 26321cb0ef41Sopenharmony_ci } 26331cb0ef41Sopenharmony_ci ChoiceNode::Emit(compiler, trace); 26341cb0ef41Sopenharmony_ci} 26351cb0ef41Sopenharmony_ci 26361cb0ef41Sopenharmony_ciint ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, 26371cb0ef41Sopenharmony_ci int eats_at_least) { 26381cb0ef41Sopenharmony_ci int preload_characters = std::min(4, eats_at_least); 26391cb0ef41Sopenharmony_ci DCHECK_LE(preload_characters, 4); 26401cb0ef41Sopenharmony_ci if (compiler->macro_assembler()->CanReadUnaligned()) { 26411cb0ef41Sopenharmony_ci bool one_byte = compiler->one_byte(); 26421cb0ef41Sopenharmony_ci if (one_byte) { 26431cb0ef41Sopenharmony_ci // We can't preload 3 characters because there is no machine instruction 26441cb0ef41Sopenharmony_ci // to do that. We can't just load 4 because we could be reading 26451cb0ef41Sopenharmony_ci // beyond the end of the string, which could cause a memory fault. 26461cb0ef41Sopenharmony_ci if (preload_characters == 3) preload_characters = 2; 26471cb0ef41Sopenharmony_ci } else { 26481cb0ef41Sopenharmony_ci if (preload_characters > 2) preload_characters = 2; 26491cb0ef41Sopenharmony_ci } 26501cb0ef41Sopenharmony_ci } else { 26511cb0ef41Sopenharmony_ci if (preload_characters > 1) preload_characters = 1; 26521cb0ef41Sopenharmony_ci } 26531cb0ef41Sopenharmony_ci return preload_characters; 26541cb0ef41Sopenharmony_ci} 26551cb0ef41Sopenharmony_ci 26561cb0ef41Sopenharmony_ci// This class is used when generating the alternatives in a choice node. It 26571cb0ef41Sopenharmony_ci// records the way the alternative is being code generated. 26581cb0ef41Sopenharmony_ciclass AlternativeGeneration : public Malloced { 26591cb0ef41Sopenharmony_ci public: 26601cb0ef41Sopenharmony_ci AlternativeGeneration() 26611cb0ef41Sopenharmony_ci : possible_success(), 26621cb0ef41Sopenharmony_ci expects_preload(false), 26631cb0ef41Sopenharmony_ci after(), 26641cb0ef41Sopenharmony_ci quick_check_details() {} 26651cb0ef41Sopenharmony_ci Label possible_success; 26661cb0ef41Sopenharmony_ci bool expects_preload; 26671cb0ef41Sopenharmony_ci Label after; 26681cb0ef41Sopenharmony_ci QuickCheckDetails quick_check_details; 26691cb0ef41Sopenharmony_ci}; 26701cb0ef41Sopenharmony_ci 26711cb0ef41Sopenharmony_ci// Creates a list of AlternativeGenerations. If the list has a reasonable 26721cb0ef41Sopenharmony_ci// size then it is on the stack, otherwise the excess is on the heap. 26731cb0ef41Sopenharmony_ciclass AlternativeGenerationList { 26741cb0ef41Sopenharmony_ci public: 26751cb0ef41Sopenharmony_ci AlternativeGenerationList(int count, Zone* zone) : alt_gens_(count, zone) { 26761cb0ef41Sopenharmony_ci for (int i = 0; i < count && i < kAFew; i++) { 26771cb0ef41Sopenharmony_ci alt_gens_.Add(a_few_alt_gens_ + i, zone); 26781cb0ef41Sopenharmony_ci } 26791cb0ef41Sopenharmony_ci for (int i = kAFew; i < count; i++) { 26801cb0ef41Sopenharmony_ci alt_gens_.Add(new AlternativeGeneration(), zone); 26811cb0ef41Sopenharmony_ci } 26821cb0ef41Sopenharmony_ci } 26831cb0ef41Sopenharmony_ci ~AlternativeGenerationList() { 26841cb0ef41Sopenharmony_ci for (int i = kAFew; i < alt_gens_.length(); i++) { 26851cb0ef41Sopenharmony_ci delete alt_gens_[i]; 26861cb0ef41Sopenharmony_ci alt_gens_[i] = nullptr; 26871cb0ef41Sopenharmony_ci } 26881cb0ef41Sopenharmony_ci } 26891cb0ef41Sopenharmony_ci 26901cb0ef41Sopenharmony_ci AlternativeGeneration* at(int i) { return alt_gens_[i]; } 26911cb0ef41Sopenharmony_ci 26921cb0ef41Sopenharmony_ci private: 26931cb0ef41Sopenharmony_ci static const int kAFew = 10; 26941cb0ef41Sopenharmony_ci ZoneList<AlternativeGeneration*> alt_gens_; 26951cb0ef41Sopenharmony_ci AlternativeGeneration a_few_alt_gens_[kAFew]; 26961cb0ef41Sopenharmony_ci}; 26971cb0ef41Sopenharmony_ci 26981cb0ef41Sopenharmony_civoid BoyerMoorePositionInfo::Set(int character) { 26991cb0ef41Sopenharmony_ci SetInterval(Interval(character, character)); 27001cb0ef41Sopenharmony_ci} 27011cb0ef41Sopenharmony_ci 27021cb0ef41Sopenharmony_cinamespace { 27031cb0ef41Sopenharmony_ci 27041cb0ef41Sopenharmony_ciContainedInLattice AddRange(ContainedInLattice containment, const int* ranges, 27051cb0ef41Sopenharmony_ci int ranges_length, Interval new_range) { 27061cb0ef41Sopenharmony_ci DCHECK_EQ(1, ranges_length & 1); 27071cb0ef41Sopenharmony_ci DCHECK_EQ(String::kMaxCodePoint + 1, ranges[ranges_length - 1]); 27081cb0ef41Sopenharmony_ci if (containment == kLatticeUnknown) return containment; 27091cb0ef41Sopenharmony_ci bool inside = false; 27101cb0ef41Sopenharmony_ci int last = 0; 27111cb0ef41Sopenharmony_ci for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) { 27121cb0ef41Sopenharmony_ci // Consider the range from last to ranges[i]. 27131cb0ef41Sopenharmony_ci // We haven't got to the new range yet. 27141cb0ef41Sopenharmony_ci if (ranges[i] <= new_range.from()) continue; 27151cb0ef41Sopenharmony_ci // New range is wholly inside last-ranges[i]. Note that new_range.to() is 27161cb0ef41Sopenharmony_ci // inclusive, but the values in ranges are not. 27171cb0ef41Sopenharmony_ci if (last <= new_range.from() && new_range.to() < ranges[i]) { 27181cb0ef41Sopenharmony_ci return Combine(containment, inside ? kLatticeIn : kLatticeOut); 27191cb0ef41Sopenharmony_ci } 27201cb0ef41Sopenharmony_ci return kLatticeUnknown; 27211cb0ef41Sopenharmony_ci } 27221cb0ef41Sopenharmony_ci return containment; 27231cb0ef41Sopenharmony_ci} 27241cb0ef41Sopenharmony_ci 27251cb0ef41Sopenharmony_ciint BitsetFirstSetBit(BoyerMoorePositionInfo::Bitset bitset) { 27261cb0ef41Sopenharmony_ci STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == 27271cb0ef41Sopenharmony_ci 2 * kInt64Size * kBitsPerByte); 27281cb0ef41Sopenharmony_ci 27291cb0ef41Sopenharmony_ci // Slight fiddling is needed here, since the bitset is of length 128 while 27301cb0ef41Sopenharmony_ci // CountTrailingZeros requires an integral type and std::bitset can only 27311cb0ef41Sopenharmony_ci // convert to unsigned long long. So we handle the most- and least-significant 27321cb0ef41Sopenharmony_ci // bits separately. 27331cb0ef41Sopenharmony_ci 27341cb0ef41Sopenharmony_ci { 27351cb0ef41Sopenharmony_ci static constexpr BoyerMoorePositionInfo::Bitset mask(~uint64_t{0}); 27361cb0ef41Sopenharmony_ci BoyerMoorePositionInfo::Bitset masked_bitset = bitset & mask; 27371cb0ef41Sopenharmony_ci STATIC_ASSERT(kInt64Size >= sizeof(decltype(masked_bitset.to_ullong()))); 27381cb0ef41Sopenharmony_ci uint64_t lsb = masked_bitset.to_ullong(); 27391cb0ef41Sopenharmony_ci if (lsb != 0) return base::bits::CountTrailingZeros(lsb); 27401cb0ef41Sopenharmony_ci } 27411cb0ef41Sopenharmony_ci 27421cb0ef41Sopenharmony_ci { 27431cb0ef41Sopenharmony_ci BoyerMoorePositionInfo::Bitset masked_bitset = bitset >> 64; 27441cb0ef41Sopenharmony_ci uint64_t msb = masked_bitset.to_ullong(); 27451cb0ef41Sopenharmony_ci if (msb != 0) return 64 + base::bits::CountTrailingZeros(msb); 27461cb0ef41Sopenharmony_ci } 27471cb0ef41Sopenharmony_ci 27481cb0ef41Sopenharmony_ci return -1; 27491cb0ef41Sopenharmony_ci} 27501cb0ef41Sopenharmony_ci 27511cb0ef41Sopenharmony_ci} // namespace 27521cb0ef41Sopenharmony_ci 27531cb0ef41Sopenharmony_civoid BoyerMoorePositionInfo::SetInterval(const Interval& interval) { 27541cb0ef41Sopenharmony_ci w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); 27551cb0ef41Sopenharmony_ci 27561cb0ef41Sopenharmony_ci if (interval.size() >= kMapSize) { 27571cb0ef41Sopenharmony_ci map_count_ = kMapSize; 27581cb0ef41Sopenharmony_ci map_.set(); 27591cb0ef41Sopenharmony_ci return; 27601cb0ef41Sopenharmony_ci } 27611cb0ef41Sopenharmony_ci 27621cb0ef41Sopenharmony_ci for (int i = interval.from(); i <= interval.to(); i++) { 27631cb0ef41Sopenharmony_ci int mod_character = (i & kMask); 27641cb0ef41Sopenharmony_ci if (!map_[mod_character]) { 27651cb0ef41Sopenharmony_ci map_count_++; 27661cb0ef41Sopenharmony_ci map_.set(mod_character); 27671cb0ef41Sopenharmony_ci } 27681cb0ef41Sopenharmony_ci if (map_count_ == kMapSize) return; 27691cb0ef41Sopenharmony_ci } 27701cb0ef41Sopenharmony_ci} 27711cb0ef41Sopenharmony_ci 27721cb0ef41Sopenharmony_civoid BoyerMoorePositionInfo::SetAll() { 27731cb0ef41Sopenharmony_ci w_ = kLatticeUnknown; 27741cb0ef41Sopenharmony_ci if (map_count_ != kMapSize) { 27751cb0ef41Sopenharmony_ci map_count_ = kMapSize; 27761cb0ef41Sopenharmony_ci map_.set(); 27771cb0ef41Sopenharmony_ci } 27781cb0ef41Sopenharmony_ci} 27791cb0ef41Sopenharmony_ci 27801cb0ef41Sopenharmony_ciBoyerMooreLookahead::BoyerMooreLookahead(int length, RegExpCompiler* compiler, 27811cb0ef41Sopenharmony_ci Zone* zone) 27821cb0ef41Sopenharmony_ci : length_(length), 27831cb0ef41Sopenharmony_ci compiler_(compiler), 27841cb0ef41Sopenharmony_ci max_char_(MaxCodeUnit(compiler->one_byte())) { 27851cb0ef41Sopenharmony_ci bitmaps_ = zone->New<ZoneList<BoyerMoorePositionInfo*>>(length, zone); 27861cb0ef41Sopenharmony_ci for (int i = 0; i < length; i++) { 27871cb0ef41Sopenharmony_ci bitmaps_->Add(zone->New<BoyerMoorePositionInfo>(), zone); 27881cb0ef41Sopenharmony_ci } 27891cb0ef41Sopenharmony_ci} 27901cb0ef41Sopenharmony_ci 27911cb0ef41Sopenharmony_ci// Find the longest range of lookahead that has the fewest number of different 27921cb0ef41Sopenharmony_ci// characters that can occur at a given position. Since we are optimizing two 27931cb0ef41Sopenharmony_ci// different parameters at once this is a tradeoff. 27941cb0ef41Sopenharmony_cibool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) { 27951cb0ef41Sopenharmony_ci int biggest_points = 0; 27961cb0ef41Sopenharmony_ci // If more than 32 characters out of 128 can occur it is unlikely that we can 27971cb0ef41Sopenharmony_ci // be lucky enough to step forwards much of the time. 27981cb0ef41Sopenharmony_ci const int kMaxMax = 32; 27991cb0ef41Sopenharmony_ci for (int max_number_of_chars = 4; max_number_of_chars < kMaxMax; 28001cb0ef41Sopenharmony_ci max_number_of_chars *= 2) { 28011cb0ef41Sopenharmony_ci biggest_points = 28021cb0ef41Sopenharmony_ci FindBestInterval(max_number_of_chars, biggest_points, from, to); 28031cb0ef41Sopenharmony_ci } 28041cb0ef41Sopenharmony_ci if (biggest_points == 0) return false; 28051cb0ef41Sopenharmony_ci return true; 28061cb0ef41Sopenharmony_ci} 28071cb0ef41Sopenharmony_ci 28081cb0ef41Sopenharmony_ci// Find the highest-points range between 0 and length_ where the character 28091cb0ef41Sopenharmony_ci// information is not too vague. 'Too vague' means that there are more than 28101cb0ef41Sopenharmony_ci// max_number_of_chars that can occur at this position. Calculates the number 28111cb0ef41Sopenharmony_ci// of points as the product of width-of-the-range and 28121cb0ef41Sopenharmony_ci// probability-of-finding-one-of-the-characters, where the probability is 28131cb0ef41Sopenharmony_ci// calculated using the frequency distribution of the sample subject string. 28141cb0ef41Sopenharmony_ciint BoyerMooreLookahead::FindBestInterval(int max_number_of_chars, 28151cb0ef41Sopenharmony_ci int old_biggest_points, int* from, 28161cb0ef41Sopenharmony_ci int* to) { 28171cb0ef41Sopenharmony_ci int biggest_points = old_biggest_points; 28181cb0ef41Sopenharmony_ci static const int kSize = RegExpMacroAssembler::kTableSize; 28191cb0ef41Sopenharmony_ci for (int i = 0; i < length_;) { 28201cb0ef41Sopenharmony_ci while (i < length_ && Count(i) > max_number_of_chars) i++; 28211cb0ef41Sopenharmony_ci if (i == length_) break; 28221cb0ef41Sopenharmony_ci int remembered_from = i; 28231cb0ef41Sopenharmony_ci 28241cb0ef41Sopenharmony_ci BoyerMoorePositionInfo::Bitset union_bitset; 28251cb0ef41Sopenharmony_ci for (; i < length_ && Count(i) <= max_number_of_chars; i++) { 28261cb0ef41Sopenharmony_ci union_bitset |= bitmaps_->at(i)->raw_bitset(); 28271cb0ef41Sopenharmony_ci } 28281cb0ef41Sopenharmony_ci 28291cb0ef41Sopenharmony_ci int frequency = 0; 28301cb0ef41Sopenharmony_ci 28311cb0ef41Sopenharmony_ci // Iterate only over set bits. 28321cb0ef41Sopenharmony_ci int j; 28331cb0ef41Sopenharmony_ci while ((j = BitsetFirstSetBit(union_bitset)) != -1) { 28341cb0ef41Sopenharmony_ci DCHECK(union_bitset[j]); // Sanity check. 28351cb0ef41Sopenharmony_ci // Add 1 to the frequency to give a small per-character boost for 28361cb0ef41Sopenharmony_ci // the cases where our sampling is not good enough and many 28371cb0ef41Sopenharmony_ci // characters have a frequency of zero. This means the frequency 28381cb0ef41Sopenharmony_ci // can theoretically be up to 2*kSize though we treat it mostly as 28391cb0ef41Sopenharmony_ci // a fraction of kSize. 28401cb0ef41Sopenharmony_ci frequency += compiler_->frequency_collator()->Frequency(j) + 1; 28411cb0ef41Sopenharmony_ci union_bitset.reset(j); 28421cb0ef41Sopenharmony_ci } 28431cb0ef41Sopenharmony_ci 28441cb0ef41Sopenharmony_ci // We use the probability of skipping times the distance we are skipping to 28451cb0ef41Sopenharmony_ci // judge the effectiveness of this. Actually we have a cut-off: By 28461cb0ef41Sopenharmony_ci // dividing by 2 we switch off the skipping if the probability of skipping 28471cb0ef41Sopenharmony_ci // is less than 50%. This is because the multibyte mask-and-compare 28481cb0ef41Sopenharmony_ci // skipping in quickcheck is more likely to do well on this case. 28491cb0ef41Sopenharmony_ci bool in_quickcheck_range = 28501cb0ef41Sopenharmony_ci ((i - remembered_from < 4) || 28511cb0ef41Sopenharmony_ci (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2)); 28521cb0ef41Sopenharmony_ci // Called 'probability' but it is only a rough estimate and can actually 28531cb0ef41Sopenharmony_ci // be outside the 0-kSize range. 28541cb0ef41Sopenharmony_ci int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency; 28551cb0ef41Sopenharmony_ci int points = (i - remembered_from) * probability; 28561cb0ef41Sopenharmony_ci if (points > biggest_points) { 28571cb0ef41Sopenharmony_ci *from = remembered_from; 28581cb0ef41Sopenharmony_ci *to = i - 1; 28591cb0ef41Sopenharmony_ci biggest_points = points; 28601cb0ef41Sopenharmony_ci } 28611cb0ef41Sopenharmony_ci } 28621cb0ef41Sopenharmony_ci return biggest_points; 28631cb0ef41Sopenharmony_ci} 28641cb0ef41Sopenharmony_ci 28651cb0ef41Sopenharmony_ci// Take all the characters that will not prevent a successful match if they 28661cb0ef41Sopenharmony_ci// occur in the subject string in the range between min_lookahead and 28671cb0ef41Sopenharmony_ci// max_lookahead (inclusive) measured from the current position. If the 28681cb0ef41Sopenharmony_ci// character at max_lookahead offset is not one of these characters, then we 28691cb0ef41Sopenharmony_ci// can safely skip forwards by the number of characters in the range. 28701cb0ef41Sopenharmony_ciint BoyerMooreLookahead::GetSkipTable(int min_lookahead, int max_lookahead, 28711cb0ef41Sopenharmony_ci Handle<ByteArray> boolean_skip_table) { 28721cb0ef41Sopenharmony_ci const int kSkipArrayEntry = 0; 28731cb0ef41Sopenharmony_ci const int kDontSkipArrayEntry = 1; 28741cb0ef41Sopenharmony_ci 28751cb0ef41Sopenharmony_ci std::memset(boolean_skip_table->GetDataStartAddress(), kSkipArrayEntry, 28761cb0ef41Sopenharmony_ci boolean_skip_table->length()); 28771cb0ef41Sopenharmony_ci 28781cb0ef41Sopenharmony_ci for (int i = max_lookahead; i >= min_lookahead; i--) { 28791cb0ef41Sopenharmony_ci BoyerMoorePositionInfo::Bitset bitset = bitmaps_->at(i)->raw_bitset(); 28801cb0ef41Sopenharmony_ci 28811cb0ef41Sopenharmony_ci // Iterate only over set bits. 28821cb0ef41Sopenharmony_ci int j; 28831cb0ef41Sopenharmony_ci while ((j = BitsetFirstSetBit(bitset)) != -1) { 28841cb0ef41Sopenharmony_ci DCHECK(bitset[j]); // Sanity check. 28851cb0ef41Sopenharmony_ci boolean_skip_table->set(j, kDontSkipArrayEntry); 28861cb0ef41Sopenharmony_ci bitset.reset(j); 28871cb0ef41Sopenharmony_ci } 28881cb0ef41Sopenharmony_ci } 28891cb0ef41Sopenharmony_ci 28901cb0ef41Sopenharmony_ci const int skip = max_lookahead + 1 - min_lookahead; 28911cb0ef41Sopenharmony_ci return skip; 28921cb0ef41Sopenharmony_ci} 28931cb0ef41Sopenharmony_ci 28941cb0ef41Sopenharmony_ci// See comment above on the implementation of GetSkipTable. 28951cb0ef41Sopenharmony_civoid BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { 28961cb0ef41Sopenharmony_ci const int kSize = RegExpMacroAssembler::kTableSize; 28971cb0ef41Sopenharmony_ci 28981cb0ef41Sopenharmony_ci int min_lookahead = 0; 28991cb0ef41Sopenharmony_ci int max_lookahead = 0; 29001cb0ef41Sopenharmony_ci 29011cb0ef41Sopenharmony_ci if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return; 29021cb0ef41Sopenharmony_ci 29031cb0ef41Sopenharmony_ci // Check if we only have a single non-empty position info, and that info 29041cb0ef41Sopenharmony_ci // contains precisely one character. 29051cb0ef41Sopenharmony_ci bool found_single_character = false; 29061cb0ef41Sopenharmony_ci int single_character = 0; 29071cb0ef41Sopenharmony_ci for (int i = max_lookahead; i >= min_lookahead; i--) { 29081cb0ef41Sopenharmony_ci BoyerMoorePositionInfo* map = bitmaps_->at(i); 29091cb0ef41Sopenharmony_ci if (map->map_count() == 0) continue; 29101cb0ef41Sopenharmony_ci 29111cb0ef41Sopenharmony_ci if (found_single_character || map->map_count() > 1) { 29121cb0ef41Sopenharmony_ci found_single_character = false; 29131cb0ef41Sopenharmony_ci break; 29141cb0ef41Sopenharmony_ci } 29151cb0ef41Sopenharmony_ci 29161cb0ef41Sopenharmony_ci DCHECK(!found_single_character); 29171cb0ef41Sopenharmony_ci DCHECK_EQ(map->map_count(), 1); 29181cb0ef41Sopenharmony_ci 29191cb0ef41Sopenharmony_ci found_single_character = true; 29201cb0ef41Sopenharmony_ci single_character = BitsetFirstSetBit(map->raw_bitset()); 29211cb0ef41Sopenharmony_ci 29221cb0ef41Sopenharmony_ci DCHECK_NE(single_character, -1); 29231cb0ef41Sopenharmony_ci } 29241cb0ef41Sopenharmony_ci 29251cb0ef41Sopenharmony_ci int lookahead_width = max_lookahead + 1 - min_lookahead; 29261cb0ef41Sopenharmony_ci 29271cb0ef41Sopenharmony_ci if (found_single_character && lookahead_width == 1 && max_lookahead < 3) { 29281cb0ef41Sopenharmony_ci // The mask-compare can probably handle this better. 29291cb0ef41Sopenharmony_ci return; 29301cb0ef41Sopenharmony_ci } 29311cb0ef41Sopenharmony_ci 29321cb0ef41Sopenharmony_ci if (found_single_character) { 29331cb0ef41Sopenharmony_ci Label cont, again; 29341cb0ef41Sopenharmony_ci masm->Bind(&again); 29351cb0ef41Sopenharmony_ci masm->LoadCurrentCharacter(max_lookahead, &cont, true); 29361cb0ef41Sopenharmony_ci if (max_char_ > kSize) { 29371cb0ef41Sopenharmony_ci masm->CheckCharacterAfterAnd(single_character, 29381cb0ef41Sopenharmony_ci RegExpMacroAssembler::kTableMask, &cont); 29391cb0ef41Sopenharmony_ci } else { 29401cb0ef41Sopenharmony_ci masm->CheckCharacter(single_character, &cont); 29411cb0ef41Sopenharmony_ci } 29421cb0ef41Sopenharmony_ci masm->AdvanceCurrentPosition(lookahead_width); 29431cb0ef41Sopenharmony_ci masm->GoTo(&again); 29441cb0ef41Sopenharmony_ci masm->Bind(&cont); 29451cb0ef41Sopenharmony_ci return; 29461cb0ef41Sopenharmony_ci } 29471cb0ef41Sopenharmony_ci 29481cb0ef41Sopenharmony_ci Factory* factory = masm->isolate()->factory(); 29491cb0ef41Sopenharmony_ci Handle<ByteArray> boolean_skip_table = 29501cb0ef41Sopenharmony_ci factory->NewByteArray(kSize, AllocationType::kOld); 29511cb0ef41Sopenharmony_ci int skip_distance = 29521cb0ef41Sopenharmony_ci GetSkipTable(min_lookahead, max_lookahead, boolean_skip_table); 29531cb0ef41Sopenharmony_ci DCHECK_NE(0, skip_distance); 29541cb0ef41Sopenharmony_ci 29551cb0ef41Sopenharmony_ci Label cont, again; 29561cb0ef41Sopenharmony_ci masm->Bind(&again); 29571cb0ef41Sopenharmony_ci masm->LoadCurrentCharacter(max_lookahead, &cont, true); 29581cb0ef41Sopenharmony_ci masm->CheckBitInTable(boolean_skip_table, &cont); 29591cb0ef41Sopenharmony_ci masm->AdvanceCurrentPosition(skip_distance); 29601cb0ef41Sopenharmony_ci masm->GoTo(&again); 29611cb0ef41Sopenharmony_ci masm->Bind(&cont); 29621cb0ef41Sopenharmony_ci} 29631cb0ef41Sopenharmony_ci 29641cb0ef41Sopenharmony_ci/* Code generation for choice nodes. 29651cb0ef41Sopenharmony_ci * 29661cb0ef41Sopenharmony_ci * We generate quick checks that do a mask and compare to eliminate a 29671cb0ef41Sopenharmony_ci * choice. If the quick check succeeds then it jumps to the continuation to 29681cb0ef41Sopenharmony_ci * do slow checks and check subsequent nodes. If it fails (the common case) 29691cb0ef41Sopenharmony_ci * it falls through to the next choice. 29701cb0ef41Sopenharmony_ci * 29711cb0ef41Sopenharmony_ci * Here is the desired flow graph. Nodes directly below each other imply 29721cb0ef41Sopenharmony_ci * fallthrough. Alternatives 1 and 2 have quick checks. Alternative 29731cb0ef41Sopenharmony_ci * 3 doesn't have a quick check so we have to call the slow check. 29741cb0ef41Sopenharmony_ci * Nodes are marked Qn for quick checks and Sn for slow checks. The entire 29751cb0ef41Sopenharmony_ci * regexp continuation is generated directly after the Sn node, up to the 29761cb0ef41Sopenharmony_ci * next GoTo if we decide to reuse some already generated code. Some 29771cb0ef41Sopenharmony_ci * nodes expect preload_characters to be preloaded into the current 29781cb0ef41Sopenharmony_ci * character register. R nodes do this preloading. Vertices are marked 29791cb0ef41Sopenharmony_ci * F for failures and S for success (possible success in the case of quick 29801cb0ef41Sopenharmony_ci * nodes). L, V, < and > are used as arrow heads. 29811cb0ef41Sopenharmony_ci * 29821cb0ef41Sopenharmony_ci * ----------> R 29831cb0ef41Sopenharmony_ci * | 29841cb0ef41Sopenharmony_ci * V 29851cb0ef41Sopenharmony_ci * Q1 -----> S1 29861cb0ef41Sopenharmony_ci * | S / 29871cb0ef41Sopenharmony_ci * F| / 29881cb0ef41Sopenharmony_ci * | F/ 29891cb0ef41Sopenharmony_ci * | / 29901cb0ef41Sopenharmony_ci * | R 29911cb0ef41Sopenharmony_ci * | / 29921cb0ef41Sopenharmony_ci * V L 29931cb0ef41Sopenharmony_ci * Q2 -----> S2 29941cb0ef41Sopenharmony_ci * | S / 29951cb0ef41Sopenharmony_ci * F| / 29961cb0ef41Sopenharmony_ci * | F/ 29971cb0ef41Sopenharmony_ci * | / 29981cb0ef41Sopenharmony_ci * | R 29991cb0ef41Sopenharmony_ci * | / 30001cb0ef41Sopenharmony_ci * V L 30011cb0ef41Sopenharmony_ci * S3 30021cb0ef41Sopenharmony_ci * | 30031cb0ef41Sopenharmony_ci * F| 30041cb0ef41Sopenharmony_ci * | 30051cb0ef41Sopenharmony_ci * R 30061cb0ef41Sopenharmony_ci * | 30071cb0ef41Sopenharmony_ci * backtrack V 30081cb0ef41Sopenharmony_ci * <----------Q4 30091cb0ef41Sopenharmony_ci * \ F | 30101cb0ef41Sopenharmony_ci * \ |S 30111cb0ef41Sopenharmony_ci * \ F V 30121cb0ef41Sopenharmony_ci * \-----S4 30131cb0ef41Sopenharmony_ci * 30141cb0ef41Sopenharmony_ci * For greedy loops we push the current position, then generate the code that 30151cb0ef41Sopenharmony_ci * eats the input specially in EmitGreedyLoop. The other choice (the 30161cb0ef41Sopenharmony_ci * continuation) is generated by the normal code in EmitChoices, and steps back 30171cb0ef41Sopenharmony_ci * in the input to the starting position when it fails to match. The loop code 30181cb0ef41Sopenharmony_ci * looks like this (U is the unwind code that steps back in the greedy loop). 30191cb0ef41Sopenharmony_ci * 30201cb0ef41Sopenharmony_ci * _____ 30211cb0ef41Sopenharmony_ci * / \ 30221cb0ef41Sopenharmony_ci * V | 30231cb0ef41Sopenharmony_ci * ----------> S1 | 30241cb0ef41Sopenharmony_ci * /| | 30251cb0ef41Sopenharmony_ci * / |S | 30261cb0ef41Sopenharmony_ci * F/ \_____/ 30271cb0ef41Sopenharmony_ci * / 30281cb0ef41Sopenharmony_ci * |<----- 30291cb0ef41Sopenharmony_ci * | \ 30301cb0ef41Sopenharmony_ci * V |S 30311cb0ef41Sopenharmony_ci * Q2 ---> U----->backtrack 30321cb0ef41Sopenharmony_ci * | F / 30331cb0ef41Sopenharmony_ci * S| / 30341cb0ef41Sopenharmony_ci * V F / 30351cb0ef41Sopenharmony_ci * S2--/ 30361cb0ef41Sopenharmony_ci */ 30371cb0ef41Sopenharmony_ci 30381cb0ef41Sopenharmony_ciGreedyLoopState::GreedyLoopState(bool not_at_start) { 30391cb0ef41Sopenharmony_ci counter_backtrack_trace_.set_backtrack(&label_); 30401cb0ef41Sopenharmony_ci if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE); 30411cb0ef41Sopenharmony_ci} 30421cb0ef41Sopenharmony_ci 30431cb0ef41Sopenharmony_civoid ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { 30441cb0ef41Sopenharmony_ci#ifdef DEBUG 30451cb0ef41Sopenharmony_ci int choice_count = alternatives_->length(); 30461cb0ef41Sopenharmony_ci for (int i = 0; i < choice_count - 1; i++) { 30471cb0ef41Sopenharmony_ci GuardedAlternative alternative = alternatives_->at(i); 30481cb0ef41Sopenharmony_ci ZoneList<Guard*>* guards = alternative.guards(); 30491cb0ef41Sopenharmony_ci int guard_count = (guards == nullptr) ? 0 : guards->length(); 30501cb0ef41Sopenharmony_ci for (int j = 0; j < guard_count; j++) { 30511cb0ef41Sopenharmony_ci DCHECK(!trace->mentions_reg(guards->at(j)->reg())); 30521cb0ef41Sopenharmony_ci } 30531cb0ef41Sopenharmony_ci } 30541cb0ef41Sopenharmony_ci#endif 30551cb0ef41Sopenharmony_ci} 30561cb0ef41Sopenharmony_ci 30571cb0ef41Sopenharmony_civoid ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, Trace* current_trace, 30581cb0ef41Sopenharmony_ci PreloadState* state) { 30591cb0ef41Sopenharmony_ci if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { 30601cb0ef41Sopenharmony_ci // Save some time by looking at most one machine word ahead. 30611cb0ef41Sopenharmony_ci state->eats_at_least_ = 30621cb0ef41Sopenharmony_ci EatsAtLeast(current_trace->at_start() == Trace::FALSE_VALUE); 30631cb0ef41Sopenharmony_ci } 30641cb0ef41Sopenharmony_ci state->preload_characters_ = 30651cb0ef41Sopenharmony_ci CalculatePreloadCharacters(compiler, state->eats_at_least_); 30661cb0ef41Sopenharmony_ci 30671cb0ef41Sopenharmony_ci state->preload_is_current_ = 30681cb0ef41Sopenharmony_ci (current_trace->characters_preloaded() == state->preload_characters_); 30691cb0ef41Sopenharmony_ci state->preload_has_checked_bounds_ = state->preload_is_current_; 30701cb0ef41Sopenharmony_ci} 30711cb0ef41Sopenharmony_ci 30721cb0ef41Sopenharmony_civoid ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 30731cb0ef41Sopenharmony_ci int choice_count = alternatives_->length(); 30741cb0ef41Sopenharmony_ci 30751cb0ef41Sopenharmony_ci if (choice_count == 1 && alternatives_->at(0).guards() == nullptr) { 30761cb0ef41Sopenharmony_ci alternatives_->at(0).node()->Emit(compiler, trace); 30771cb0ef41Sopenharmony_ci return; 30781cb0ef41Sopenharmony_ci } 30791cb0ef41Sopenharmony_ci 30801cb0ef41Sopenharmony_ci AssertGuardsMentionRegisters(trace); 30811cb0ef41Sopenharmony_ci 30821cb0ef41Sopenharmony_ci LimitResult limit_result = LimitVersions(compiler, trace); 30831cb0ef41Sopenharmony_ci if (limit_result == DONE) return; 30841cb0ef41Sopenharmony_ci DCHECK(limit_result == CONTINUE); 30851cb0ef41Sopenharmony_ci 30861cb0ef41Sopenharmony_ci // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for 30871cb0ef41Sopenharmony_ci // other choice nodes we only flush if we are out of code size budget. 30881cb0ef41Sopenharmony_ci if (trace->flush_budget() == 0 && trace->actions() != nullptr) { 30891cb0ef41Sopenharmony_ci trace->Flush(compiler, this); 30901cb0ef41Sopenharmony_ci return; 30911cb0ef41Sopenharmony_ci } 30921cb0ef41Sopenharmony_ci 30931cb0ef41Sopenharmony_ci RecursionCheck rc(compiler); 30941cb0ef41Sopenharmony_ci 30951cb0ef41Sopenharmony_ci PreloadState preload; 30961cb0ef41Sopenharmony_ci preload.init(); 30971cb0ef41Sopenharmony_ci GreedyLoopState greedy_loop_state(not_at_start()); 30981cb0ef41Sopenharmony_ci 30991cb0ef41Sopenharmony_ci int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0)); 31001cb0ef41Sopenharmony_ci AlternativeGenerationList alt_gens(choice_count, zone()); 31011cb0ef41Sopenharmony_ci 31021cb0ef41Sopenharmony_ci if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { 31031cb0ef41Sopenharmony_ci trace = EmitGreedyLoop(compiler, trace, &alt_gens, &preload, 31041cb0ef41Sopenharmony_ci &greedy_loop_state, text_length); 31051cb0ef41Sopenharmony_ci } else { 31061cb0ef41Sopenharmony_ci // TODO(erikcorry): Delete this. We don't need this label, but it makes us 31071cb0ef41Sopenharmony_ci // match the traces produced pre-cleanup. 31081cb0ef41Sopenharmony_ci Label second_choice; 31091cb0ef41Sopenharmony_ci compiler->macro_assembler()->Bind(&second_choice); 31101cb0ef41Sopenharmony_ci 31111cb0ef41Sopenharmony_ci preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace); 31121cb0ef41Sopenharmony_ci 31131cb0ef41Sopenharmony_ci EmitChoices(compiler, &alt_gens, 0, trace, &preload); 31141cb0ef41Sopenharmony_ci } 31151cb0ef41Sopenharmony_ci 31161cb0ef41Sopenharmony_ci // At this point we need to generate slow checks for the alternatives where 31171cb0ef41Sopenharmony_ci // the quick check was inlined. We can recognize these because the associated 31181cb0ef41Sopenharmony_ci // label was bound. 31191cb0ef41Sopenharmony_ci int new_flush_budget = trace->flush_budget() / choice_count; 31201cb0ef41Sopenharmony_ci for (int i = 0; i < choice_count; i++) { 31211cb0ef41Sopenharmony_ci AlternativeGeneration* alt_gen = alt_gens.at(i); 31221cb0ef41Sopenharmony_ci Trace new_trace(*trace); 31231cb0ef41Sopenharmony_ci // If there are actions to be flushed we have to limit how many times 31241cb0ef41Sopenharmony_ci // they are flushed. Take the budget of the parent trace and distribute 31251cb0ef41Sopenharmony_ci // it fairly amongst the children. 31261cb0ef41Sopenharmony_ci if (new_trace.actions() != nullptr) { 31271cb0ef41Sopenharmony_ci new_trace.set_flush_budget(new_flush_budget); 31281cb0ef41Sopenharmony_ci } 31291cb0ef41Sopenharmony_ci bool next_expects_preload = 31301cb0ef41Sopenharmony_ci i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload; 31311cb0ef41Sopenharmony_ci EmitOutOfLineContinuation(compiler, &new_trace, alternatives_->at(i), 31321cb0ef41Sopenharmony_ci alt_gen, preload.preload_characters_, 31331cb0ef41Sopenharmony_ci next_expects_preload); 31341cb0ef41Sopenharmony_ci } 31351cb0ef41Sopenharmony_ci} 31361cb0ef41Sopenharmony_ci 31371cb0ef41Sopenharmony_ciTrace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, Trace* trace, 31381cb0ef41Sopenharmony_ci AlternativeGenerationList* alt_gens, 31391cb0ef41Sopenharmony_ci PreloadState* preload, 31401cb0ef41Sopenharmony_ci GreedyLoopState* greedy_loop_state, 31411cb0ef41Sopenharmony_ci int text_length) { 31421cb0ef41Sopenharmony_ci RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 31431cb0ef41Sopenharmony_ci // Here we have special handling for greedy loops containing only text nodes 31441cb0ef41Sopenharmony_ci // and other simple nodes. These are handled by pushing the current 31451cb0ef41Sopenharmony_ci // position on the stack and then incrementing the current position each 31461cb0ef41Sopenharmony_ci // time around the switch. On backtrack we decrement the current position 31471cb0ef41Sopenharmony_ci // and check it against the pushed value. This avoids pushing backtrack 31481cb0ef41Sopenharmony_ci // information for each iteration of the loop, which could take up a lot of 31491cb0ef41Sopenharmony_ci // space. 31501cb0ef41Sopenharmony_ci DCHECK(trace->stop_node() == nullptr); 31511cb0ef41Sopenharmony_ci macro_assembler->PushCurrentPosition(); 31521cb0ef41Sopenharmony_ci Label greedy_match_failed; 31531cb0ef41Sopenharmony_ci Trace greedy_match_trace; 31541cb0ef41Sopenharmony_ci if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE); 31551cb0ef41Sopenharmony_ci greedy_match_trace.set_backtrack(&greedy_match_failed); 31561cb0ef41Sopenharmony_ci Label loop_label; 31571cb0ef41Sopenharmony_ci macro_assembler->Bind(&loop_label); 31581cb0ef41Sopenharmony_ci greedy_match_trace.set_stop_node(this); 31591cb0ef41Sopenharmony_ci greedy_match_trace.set_loop_label(&loop_label); 31601cb0ef41Sopenharmony_ci alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace); 31611cb0ef41Sopenharmony_ci macro_assembler->Bind(&greedy_match_failed); 31621cb0ef41Sopenharmony_ci 31631cb0ef41Sopenharmony_ci Label second_choice; // For use in greedy matches. 31641cb0ef41Sopenharmony_ci macro_assembler->Bind(&second_choice); 31651cb0ef41Sopenharmony_ci 31661cb0ef41Sopenharmony_ci Trace* new_trace = greedy_loop_state->counter_backtrack_trace(); 31671cb0ef41Sopenharmony_ci 31681cb0ef41Sopenharmony_ci EmitChoices(compiler, alt_gens, 1, new_trace, preload); 31691cb0ef41Sopenharmony_ci 31701cb0ef41Sopenharmony_ci macro_assembler->Bind(greedy_loop_state->label()); 31711cb0ef41Sopenharmony_ci // If we have unwound to the bottom then backtrack. 31721cb0ef41Sopenharmony_ci macro_assembler->CheckGreedyLoop(trace->backtrack()); 31731cb0ef41Sopenharmony_ci // Otherwise try the second priority at an earlier position. 31741cb0ef41Sopenharmony_ci macro_assembler->AdvanceCurrentPosition(-text_length); 31751cb0ef41Sopenharmony_ci macro_assembler->GoTo(&second_choice); 31761cb0ef41Sopenharmony_ci return new_trace; 31771cb0ef41Sopenharmony_ci} 31781cb0ef41Sopenharmony_ci 31791cb0ef41Sopenharmony_ciint ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, 31801cb0ef41Sopenharmony_ci Trace* trace) { 31811cb0ef41Sopenharmony_ci int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized; 31821cb0ef41Sopenharmony_ci if (alternatives_->length() != 2) return eats_at_least; 31831cb0ef41Sopenharmony_ci 31841cb0ef41Sopenharmony_ci GuardedAlternative alt1 = alternatives_->at(1); 31851cb0ef41Sopenharmony_ci if (alt1.guards() != nullptr && alt1.guards()->length() != 0) { 31861cb0ef41Sopenharmony_ci return eats_at_least; 31871cb0ef41Sopenharmony_ci } 31881cb0ef41Sopenharmony_ci RegExpNode* eats_anything_node = alt1.node(); 31891cb0ef41Sopenharmony_ci if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) { 31901cb0ef41Sopenharmony_ci return eats_at_least; 31911cb0ef41Sopenharmony_ci } 31921cb0ef41Sopenharmony_ci 31931cb0ef41Sopenharmony_ci // Really we should be creating a new trace when we execute this function, 31941cb0ef41Sopenharmony_ci // but there is no need, because the code it generates cannot backtrack, and 31951cb0ef41Sopenharmony_ci // we always arrive here with a trivial trace (since it's the entry to a 31961cb0ef41Sopenharmony_ci // loop. That also implies that there are no preloaded characters, which is 31971cb0ef41Sopenharmony_ci // good, because it means we won't be violating any assumptions by 31981cb0ef41Sopenharmony_ci // overwriting those characters with new load instructions. 31991cb0ef41Sopenharmony_ci DCHECK(trace->is_trivial()); 32001cb0ef41Sopenharmony_ci 32011cb0ef41Sopenharmony_ci RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 32021cb0ef41Sopenharmony_ci Isolate* isolate = macro_assembler->isolate(); 32031cb0ef41Sopenharmony_ci // At this point we know that we are at a non-greedy loop that will eat 32041cb0ef41Sopenharmony_ci // any character one at a time. Any non-anchored regexp has such a 32051cb0ef41Sopenharmony_ci // loop prepended to it in order to find where it starts. We look for 32061cb0ef41Sopenharmony_ci // a pattern of the form ...abc... where we can look 6 characters ahead 32071cb0ef41Sopenharmony_ci // and step forwards 3 if the character is not one of abc. Abc need 32081cb0ef41Sopenharmony_ci // not be atoms, they can be any reasonably limited character class or 32091cb0ef41Sopenharmony_ci // small alternation. 32101cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm = bm_info(false); 32111cb0ef41Sopenharmony_ci if (bm == nullptr) { 32121cb0ef41Sopenharmony_ci eats_at_least = std::min(kMaxLookaheadForBoyerMoore, EatsAtLeast(false)); 32131cb0ef41Sopenharmony_ci if (eats_at_least >= 1) { 32141cb0ef41Sopenharmony_ci bm = zone()->New<BoyerMooreLookahead>(eats_at_least, compiler, zone()); 32151cb0ef41Sopenharmony_ci GuardedAlternative alt0 = alternatives_->at(0); 32161cb0ef41Sopenharmony_ci alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm, false); 32171cb0ef41Sopenharmony_ci } 32181cb0ef41Sopenharmony_ci } 32191cb0ef41Sopenharmony_ci if (bm != nullptr) { 32201cb0ef41Sopenharmony_ci bm->EmitSkipInstructions(macro_assembler); 32211cb0ef41Sopenharmony_ci } 32221cb0ef41Sopenharmony_ci return eats_at_least; 32231cb0ef41Sopenharmony_ci} 32241cb0ef41Sopenharmony_ci 32251cb0ef41Sopenharmony_civoid ChoiceNode::EmitChoices(RegExpCompiler* compiler, 32261cb0ef41Sopenharmony_ci AlternativeGenerationList* alt_gens, 32271cb0ef41Sopenharmony_ci int first_choice, Trace* trace, 32281cb0ef41Sopenharmony_ci PreloadState* preload) { 32291cb0ef41Sopenharmony_ci RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 32301cb0ef41Sopenharmony_ci SetUpPreLoad(compiler, trace, preload); 32311cb0ef41Sopenharmony_ci 32321cb0ef41Sopenharmony_ci // For now we just call all choices one after the other. The idea ultimately 32331cb0ef41Sopenharmony_ci // is to use the Dispatch table to try only the relevant ones. 32341cb0ef41Sopenharmony_ci int choice_count = alternatives_->length(); 32351cb0ef41Sopenharmony_ci 32361cb0ef41Sopenharmony_ci int new_flush_budget = trace->flush_budget() / choice_count; 32371cb0ef41Sopenharmony_ci 32381cb0ef41Sopenharmony_ci for (int i = first_choice; i < choice_count; i++) { 32391cb0ef41Sopenharmony_ci bool is_last = i == choice_count - 1; 32401cb0ef41Sopenharmony_ci bool fall_through_on_failure = !is_last; 32411cb0ef41Sopenharmony_ci GuardedAlternative alternative = alternatives_->at(i); 32421cb0ef41Sopenharmony_ci AlternativeGeneration* alt_gen = alt_gens->at(i); 32431cb0ef41Sopenharmony_ci alt_gen->quick_check_details.set_characters(preload->preload_characters_); 32441cb0ef41Sopenharmony_ci ZoneList<Guard*>* guards = alternative.guards(); 32451cb0ef41Sopenharmony_ci int guard_count = (guards == nullptr) ? 0 : guards->length(); 32461cb0ef41Sopenharmony_ci Trace new_trace(*trace); 32471cb0ef41Sopenharmony_ci new_trace.set_characters_preloaded( 32481cb0ef41Sopenharmony_ci preload->preload_is_current_ ? preload->preload_characters_ : 0); 32491cb0ef41Sopenharmony_ci if (preload->preload_has_checked_bounds_) { 32501cb0ef41Sopenharmony_ci new_trace.set_bound_checked_up_to(preload->preload_characters_); 32511cb0ef41Sopenharmony_ci } 32521cb0ef41Sopenharmony_ci new_trace.quick_check_performed()->Clear(); 32531cb0ef41Sopenharmony_ci if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE); 32541cb0ef41Sopenharmony_ci if (!is_last) { 32551cb0ef41Sopenharmony_ci new_trace.set_backtrack(&alt_gen->after); 32561cb0ef41Sopenharmony_ci } 32571cb0ef41Sopenharmony_ci alt_gen->expects_preload = preload->preload_is_current_; 32581cb0ef41Sopenharmony_ci bool generate_full_check_inline = false; 32591cb0ef41Sopenharmony_ci if (compiler->optimize() && 32601cb0ef41Sopenharmony_ci try_to_emit_quick_check_for_alternative(i == 0) && 32611cb0ef41Sopenharmony_ci alternative.node()->EmitQuickCheck( 32621cb0ef41Sopenharmony_ci compiler, trace, &new_trace, preload->preload_has_checked_bounds_, 32631cb0ef41Sopenharmony_ci &alt_gen->possible_success, &alt_gen->quick_check_details, 32641cb0ef41Sopenharmony_ci fall_through_on_failure, this)) { 32651cb0ef41Sopenharmony_ci // Quick check was generated for this choice. 32661cb0ef41Sopenharmony_ci preload->preload_is_current_ = true; 32671cb0ef41Sopenharmony_ci preload->preload_has_checked_bounds_ = true; 32681cb0ef41Sopenharmony_ci // If we generated the quick check to fall through on possible success, 32691cb0ef41Sopenharmony_ci // we now need to generate the full check inline. 32701cb0ef41Sopenharmony_ci if (!fall_through_on_failure) { 32711cb0ef41Sopenharmony_ci macro_assembler->Bind(&alt_gen->possible_success); 32721cb0ef41Sopenharmony_ci new_trace.set_quick_check_performed(&alt_gen->quick_check_details); 32731cb0ef41Sopenharmony_ci new_trace.set_characters_preloaded(preload->preload_characters_); 32741cb0ef41Sopenharmony_ci new_trace.set_bound_checked_up_to(preload->preload_characters_); 32751cb0ef41Sopenharmony_ci generate_full_check_inline = true; 32761cb0ef41Sopenharmony_ci } 32771cb0ef41Sopenharmony_ci } else if (alt_gen->quick_check_details.cannot_match()) { 32781cb0ef41Sopenharmony_ci if (!fall_through_on_failure) { 32791cb0ef41Sopenharmony_ci macro_assembler->GoTo(trace->backtrack()); 32801cb0ef41Sopenharmony_ci } 32811cb0ef41Sopenharmony_ci continue; 32821cb0ef41Sopenharmony_ci } else { 32831cb0ef41Sopenharmony_ci // No quick check was generated. Put the full code here. 32841cb0ef41Sopenharmony_ci // If this is not the first choice then there could be slow checks from 32851cb0ef41Sopenharmony_ci // previous cases that go here when they fail. There's no reason to 32861cb0ef41Sopenharmony_ci // insist that they preload characters since the slow check we are about 32871cb0ef41Sopenharmony_ci // to generate probably can't use it. 32881cb0ef41Sopenharmony_ci if (i != first_choice) { 32891cb0ef41Sopenharmony_ci alt_gen->expects_preload = false; 32901cb0ef41Sopenharmony_ci new_trace.InvalidateCurrentCharacter(); 32911cb0ef41Sopenharmony_ci } 32921cb0ef41Sopenharmony_ci generate_full_check_inline = true; 32931cb0ef41Sopenharmony_ci } 32941cb0ef41Sopenharmony_ci if (generate_full_check_inline) { 32951cb0ef41Sopenharmony_ci if (new_trace.actions() != nullptr) { 32961cb0ef41Sopenharmony_ci new_trace.set_flush_budget(new_flush_budget); 32971cb0ef41Sopenharmony_ci } 32981cb0ef41Sopenharmony_ci for (int j = 0; j < guard_count; j++) { 32991cb0ef41Sopenharmony_ci GenerateGuard(macro_assembler, guards->at(j), &new_trace); 33001cb0ef41Sopenharmony_ci } 33011cb0ef41Sopenharmony_ci alternative.node()->Emit(compiler, &new_trace); 33021cb0ef41Sopenharmony_ci preload->preload_is_current_ = false; 33031cb0ef41Sopenharmony_ci } 33041cb0ef41Sopenharmony_ci macro_assembler->Bind(&alt_gen->after); 33051cb0ef41Sopenharmony_ci } 33061cb0ef41Sopenharmony_ci} 33071cb0ef41Sopenharmony_ci 33081cb0ef41Sopenharmony_civoid ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler, 33091cb0ef41Sopenharmony_ci Trace* trace, 33101cb0ef41Sopenharmony_ci GuardedAlternative alternative, 33111cb0ef41Sopenharmony_ci AlternativeGeneration* alt_gen, 33121cb0ef41Sopenharmony_ci int preload_characters, 33131cb0ef41Sopenharmony_ci bool next_expects_preload) { 33141cb0ef41Sopenharmony_ci if (!alt_gen->possible_success.is_linked()) return; 33151cb0ef41Sopenharmony_ci 33161cb0ef41Sopenharmony_ci RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 33171cb0ef41Sopenharmony_ci macro_assembler->Bind(&alt_gen->possible_success); 33181cb0ef41Sopenharmony_ci Trace out_of_line_trace(*trace); 33191cb0ef41Sopenharmony_ci out_of_line_trace.set_characters_preloaded(preload_characters); 33201cb0ef41Sopenharmony_ci out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details); 33211cb0ef41Sopenharmony_ci if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE); 33221cb0ef41Sopenharmony_ci ZoneList<Guard*>* guards = alternative.guards(); 33231cb0ef41Sopenharmony_ci int guard_count = (guards == nullptr) ? 0 : guards->length(); 33241cb0ef41Sopenharmony_ci if (next_expects_preload) { 33251cb0ef41Sopenharmony_ci Label reload_current_char; 33261cb0ef41Sopenharmony_ci out_of_line_trace.set_backtrack(&reload_current_char); 33271cb0ef41Sopenharmony_ci for (int j = 0; j < guard_count; j++) { 33281cb0ef41Sopenharmony_ci GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace); 33291cb0ef41Sopenharmony_ci } 33301cb0ef41Sopenharmony_ci alternative.node()->Emit(compiler, &out_of_line_trace); 33311cb0ef41Sopenharmony_ci macro_assembler->Bind(&reload_current_char); 33321cb0ef41Sopenharmony_ci // Reload the current character, since the next quick check expects that. 33331cb0ef41Sopenharmony_ci // We don't need to check bounds here because we only get into this 33341cb0ef41Sopenharmony_ci // code through a quick check which already did the checked load. 33351cb0ef41Sopenharmony_ci macro_assembler->LoadCurrentCharacter(trace->cp_offset(), nullptr, false, 33361cb0ef41Sopenharmony_ci preload_characters); 33371cb0ef41Sopenharmony_ci macro_assembler->GoTo(&(alt_gen->after)); 33381cb0ef41Sopenharmony_ci } else { 33391cb0ef41Sopenharmony_ci out_of_line_trace.set_backtrack(&(alt_gen->after)); 33401cb0ef41Sopenharmony_ci for (int j = 0; j < guard_count; j++) { 33411cb0ef41Sopenharmony_ci GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace); 33421cb0ef41Sopenharmony_ci } 33431cb0ef41Sopenharmony_ci alternative.node()->Emit(compiler, &out_of_line_trace); 33441cb0ef41Sopenharmony_ci } 33451cb0ef41Sopenharmony_ci} 33461cb0ef41Sopenharmony_ci 33471cb0ef41Sopenharmony_civoid ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { 33481cb0ef41Sopenharmony_ci RegExpMacroAssembler* assembler = compiler->macro_assembler(); 33491cb0ef41Sopenharmony_ci LimitResult limit_result = LimitVersions(compiler, trace); 33501cb0ef41Sopenharmony_ci if (limit_result == DONE) return; 33511cb0ef41Sopenharmony_ci DCHECK(limit_result == CONTINUE); 33521cb0ef41Sopenharmony_ci 33531cb0ef41Sopenharmony_ci RecursionCheck rc(compiler); 33541cb0ef41Sopenharmony_ci 33551cb0ef41Sopenharmony_ci switch (action_type_) { 33561cb0ef41Sopenharmony_ci case STORE_POSITION: { 33571cb0ef41Sopenharmony_ci Trace::DeferredCapture new_capture(data_.u_position_register.reg, 33581cb0ef41Sopenharmony_ci data_.u_position_register.is_capture, 33591cb0ef41Sopenharmony_ci trace); 33601cb0ef41Sopenharmony_ci Trace new_trace = *trace; 33611cb0ef41Sopenharmony_ci new_trace.add_action(&new_capture); 33621cb0ef41Sopenharmony_ci on_success()->Emit(compiler, &new_trace); 33631cb0ef41Sopenharmony_ci break; 33641cb0ef41Sopenharmony_ci } 33651cb0ef41Sopenharmony_ci case INCREMENT_REGISTER: { 33661cb0ef41Sopenharmony_ci Trace::DeferredIncrementRegister new_increment( 33671cb0ef41Sopenharmony_ci data_.u_increment_register.reg); 33681cb0ef41Sopenharmony_ci Trace new_trace = *trace; 33691cb0ef41Sopenharmony_ci new_trace.add_action(&new_increment); 33701cb0ef41Sopenharmony_ci on_success()->Emit(compiler, &new_trace); 33711cb0ef41Sopenharmony_ci break; 33721cb0ef41Sopenharmony_ci } 33731cb0ef41Sopenharmony_ci case SET_REGISTER_FOR_LOOP: { 33741cb0ef41Sopenharmony_ci Trace::DeferredSetRegisterForLoop new_set(data_.u_store_register.reg, 33751cb0ef41Sopenharmony_ci data_.u_store_register.value); 33761cb0ef41Sopenharmony_ci Trace new_trace = *trace; 33771cb0ef41Sopenharmony_ci new_trace.add_action(&new_set); 33781cb0ef41Sopenharmony_ci on_success()->Emit(compiler, &new_trace); 33791cb0ef41Sopenharmony_ci break; 33801cb0ef41Sopenharmony_ci } 33811cb0ef41Sopenharmony_ci case CLEAR_CAPTURES: { 33821cb0ef41Sopenharmony_ci Trace::DeferredClearCaptures new_capture(Interval( 33831cb0ef41Sopenharmony_ci data_.u_clear_captures.range_from, data_.u_clear_captures.range_to)); 33841cb0ef41Sopenharmony_ci Trace new_trace = *trace; 33851cb0ef41Sopenharmony_ci new_trace.add_action(&new_capture); 33861cb0ef41Sopenharmony_ci on_success()->Emit(compiler, &new_trace); 33871cb0ef41Sopenharmony_ci break; 33881cb0ef41Sopenharmony_ci } 33891cb0ef41Sopenharmony_ci case BEGIN_POSITIVE_SUBMATCH: 33901cb0ef41Sopenharmony_ci case BEGIN_NEGATIVE_SUBMATCH: 33911cb0ef41Sopenharmony_ci if (!trace->is_trivial()) { 33921cb0ef41Sopenharmony_ci trace->Flush(compiler, this); 33931cb0ef41Sopenharmony_ci } else { 33941cb0ef41Sopenharmony_ci assembler->WriteCurrentPositionToRegister( 33951cb0ef41Sopenharmony_ci data_.u_submatch.current_position_register, 0); 33961cb0ef41Sopenharmony_ci assembler->WriteStackPointerToRegister( 33971cb0ef41Sopenharmony_ci data_.u_submatch.stack_pointer_register); 33981cb0ef41Sopenharmony_ci on_success()->Emit(compiler, trace); 33991cb0ef41Sopenharmony_ci } 34001cb0ef41Sopenharmony_ci break; 34011cb0ef41Sopenharmony_ci case EMPTY_MATCH_CHECK: { 34021cb0ef41Sopenharmony_ci int start_pos_reg = data_.u_empty_match_check.start_register; 34031cb0ef41Sopenharmony_ci int stored_pos = 0; 34041cb0ef41Sopenharmony_ci int rep_reg = data_.u_empty_match_check.repetition_register; 34051cb0ef41Sopenharmony_ci bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister); 34061cb0ef41Sopenharmony_ci bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos); 34071cb0ef41Sopenharmony_ci if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) { 34081cb0ef41Sopenharmony_ci // If we know we haven't advanced and there is no minimum we 34091cb0ef41Sopenharmony_ci // can just backtrack immediately. 34101cb0ef41Sopenharmony_ci assembler->GoTo(trace->backtrack()); 34111cb0ef41Sopenharmony_ci } else if (know_dist && stored_pos < trace->cp_offset()) { 34121cb0ef41Sopenharmony_ci // If we know we've advanced we can generate the continuation 34131cb0ef41Sopenharmony_ci // immediately. 34141cb0ef41Sopenharmony_ci on_success()->Emit(compiler, trace); 34151cb0ef41Sopenharmony_ci } else if (!trace->is_trivial()) { 34161cb0ef41Sopenharmony_ci trace->Flush(compiler, this); 34171cb0ef41Sopenharmony_ci } else { 34181cb0ef41Sopenharmony_ci Label skip_empty_check; 34191cb0ef41Sopenharmony_ci // If we have a minimum number of repetitions we check the current 34201cb0ef41Sopenharmony_ci // number first and skip the empty check if it's not enough. 34211cb0ef41Sopenharmony_ci if (has_minimum) { 34221cb0ef41Sopenharmony_ci int limit = data_.u_empty_match_check.repetition_limit; 34231cb0ef41Sopenharmony_ci assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check); 34241cb0ef41Sopenharmony_ci } 34251cb0ef41Sopenharmony_ci // If the match is empty we bail out, otherwise we fall through 34261cb0ef41Sopenharmony_ci // to the on-success continuation. 34271cb0ef41Sopenharmony_ci assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register, 34281cb0ef41Sopenharmony_ci trace->backtrack()); 34291cb0ef41Sopenharmony_ci assembler->Bind(&skip_empty_check); 34301cb0ef41Sopenharmony_ci on_success()->Emit(compiler, trace); 34311cb0ef41Sopenharmony_ci } 34321cb0ef41Sopenharmony_ci break; 34331cb0ef41Sopenharmony_ci } 34341cb0ef41Sopenharmony_ci case POSITIVE_SUBMATCH_SUCCESS: { 34351cb0ef41Sopenharmony_ci if (!trace->is_trivial()) { 34361cb0ef41Sopenharmony_ci trace->Flush(compiler, this); 34371cb0ef41Sopenharmony_ci return; 34381cb0ef41Sopenharmony_ci } 34391cb0ef41Sopenharmony_ci assembler->ReadCurrentPositionFromRegister( 34401cb0ef41Sopenharmony_ci data_.u_submatch.current_position_register); 34411cb0ef41Sopenharmony_ci assembler->ReadStackPointerFromRegister( 34421cb0ef41Sopenharmony_ci data_.u_submatch.stack_pointer_register); 34431cb0ef41Sopenharmony_ci int clear_register_count = data_.u_submatch.clear_register_count; 34441cb0ef41Sopenharmony_ci if (clear_register_count == 0) { 34451cb0ef41Sopenharmony_ci on_success()->Emit(compiler, trace); 34461cb0ef41Sopenharmony_ci return; 34471cb0ef41Sopenharmony_ci } 34481cb0ef41Sopenharmony_ci int clear_registers_from = data_.u_submatch.clear_register_from; 34491cb0ef41Sopenharmony_ci Label clear_registers_backtrack; 34501cb0ef41Sopenharmony_ci Trace new_trace = *trace; 34511cb0ef41Sopenharmony_ci new_trace.set_backtrack(&clear_registers_backtrack); 34521cb0ef41Sopenharmony_ci on_success()->Emit(compiler, &new_trace); 34531cb0ef41Sopenharmony_ci 34541cb0ef41Sopenharmony_ci assembler->Bind(&clear_registers_backtrack); 34551cb0ef41Sopenharmony_ci int clear_registers_to = clear_registers_from + clear_register_count - 1; 34561cb0ef41Sopenharmony_ci assembler->ClearRegisters(clear_registers_from, clear_registers_to); 34571cb0ef41Sopenharmony_ci 34581cb0ef41Sopenharmony_ci DCHECK(trace->backtrack() == nullptr); 34591cb0ef41Sopenharmony_ci assembler->Backtrack(); 34601cb0ef41Sopenharmony_ci return; 34611cb0ef41Sopenharmony_ci } 34621cb0ef41Sopenharmony_ci default: 34631cb0ef41Sopenharmony_ci UNREACHABLE(); 34641cb0ef41Sopenharmony_ci } 34651cb0ef41Sopenharmony_ci} 34661cb0ef41Sopenharmony_ci 34671cb0ef41Sopenharmony_civoid BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 34681cb0ef41Sopenharmony_ci RegExpMacroAssembler* assembler = compiler->macro_assembler(); 34691cb0ef41Sopenharmony_ci if (!trace->is_trivial()) { 34701cb0ef41Sopenharmony_ci trace->Flush(compiler, this); 34711cb0ef41Sopenharmony_ci return; 34721cb0ef41Sopenharmony_ci } 34731cb0ef41Sopenharmony_ci 34741cb0ef41Sopenharmony_ci LimitResult limit_result = LimitVersions(compiler, trace); 34751cb0ef41Sopenharmony_ci if (limit_result == DONE) return; 34761cb0ef41Sopenharmony_ci DCHECK(limit_result == CONTINUE); 34771cb0ef41Sopenharmony_ci 34781cb0ef41Sopenharmony_ci RecursionCheck rc(compiler); 34791cb0ef41Sopenharmony_ci 34801cb0ef41Sopenharmony_ci DCHECK_EQ(start_reg_ + 1, end_reg_); 34811cb0ef41Sopenharmony_ci if (IsIgnoreCase(flags_)) { 34821cb0ef41Sopenharmony_ci bool unicode = IsUnicode(flags_); 34831cb0ef41Sopenharmony_ci assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(), 34841cb0ef41Sopenharmony_ci unicode, trace->backtrack()); 34851cb0ef41Sopenharmony_ci } else { 34861cb0ef41Sopenharmony_ci assembler->CheckNotBackReference(start_reg_, read_backward(), 34871cb0ef41Sopenharmony_ci trace->backtrack()); 34881cb0ef41Sopenharmony_ci } 34891cb0ef41Sopenharmony_ci // We are going to advance backward, so we may end up at the start. 34901cb0ef41Sopenharmony_ci if (read_backward()) trace->set_at_start(Trace::UNKNOWN); 34911cb0ef41Sopenharmony_ci 34921cb0ef41Sopenharmony_ci // Check that the back reference does not end inside a surrogate pair. 34931cb0ef41Sopenharmony_ci if (IsUnicode(flags_) && !compiler->one_byte()) { 34941cb0ef41Sopenharmony_ci assembler->CheckNotInSurrogatePair(trace->cp_offset(), trace->backtrack()); 34951cb0ef41Sopenharmony_ci } 34961cb0ef41Sopenharmony_ci on_success()->Emit(compiler, trace); 34971cb0ef41Sopenharmony_ci} 34981cb0ef41Sopenharmony_ci 34991cb0ef41Sopenharmony_civoid TextNode::CalculateOffsets() { 35001cb0ef41Sopenharmony_ci int element_count = elements()->length(); 35011cb0ef41Sopenharmony_ci // Set up the offsets of the elements relative to the start. This is a fixed 35021cb0ef41Sopenharmony_ci // quantity since a TextNode can only contain fixed-width things. 35031cb0ef41Sopenharmony_ci int cp_offset = 0; 35041cb0ef41Sopenharmony_ci for (int i = 0; i < element_count; i++) { 35051cb0ef41Sopenharmony_ci TextElement& elm = elements()->at(i); 35061cb0ef41Sopenharmony_ci elm.set_cp_offset(cp_offset); 35071cb0ef41Sopenharmony_ci cp_offset += elm.length(); 35081cb0ef41Sopenharmony_ci } 35091cb0ef41Sopenharmony_ci} 35101cb0ef41Sopenharmony_ci 35111cb0ef41Sopenharmony_cinamespace { 35121cb0ef41Sopenharmony_ci 35131cb0ef41Sopenharmony_ci// Assertion propagation moves information about assertions such as 35141cb0ef41Sopenharmony_ci// \b to the affected nodes. For instance, in /.\b./ information must 35151cb0ef41Sopenharmony_ci// be propagated to the first '.' that whatever follows needs to know 35161cb0ef41Sopenharmony_ci// if it matched a word or a non-word, and to the second '.' that it 35171cb0ef41Sopenharmony_ci// has to check if it succeeds a word or non-word. In this case the 35181cb0ef41Sopenharmony_ci// result will be something like: 35191cb0ef41Sopenharmony_ci// 35201cb0ef41Sopenharmony_ci// +-------+ +------------+ 35211cb0ef41Sopenharmony_ci// | . | | . | 35221cb0ef41Sopenharmony_ci// +-------+ ---> +------------+ 35231cb0ef41Sopenharmony_ci// | word? | | check word | 35241cb0ef41Sopenharmony_ci// +-------+ +------------+ 35251cb0ef41Sopenharmony_ciclass AssertionPropagator : public AllStatic { 35261cb0ef41Sopenharmony_ci public: 35271cb0ef41Sopenharmony_ci static void VisitText(TextNode* that) {} 35281cb0ef41Sopenharmony_ci 35291cb0ef41Sopenharmony_ci static void VisitAction(ActionNode* that) { 35301cb0ef41Sopenharmony_ci // If the next node is interested in what it follows then this node 35311cb0ef41Sopenharmony_ci // has to be interested too so it can pass the information on. 35321cb0ef41Sopenharmony_ci that->info()->AddFromFollowing(that->on_success()->info()); 35331cb0ef41Sopenharmony_ci } 35341cb0ef41Sopenharmony_ci 35351cb0ef41Sopenharmony_ci static void VisitChoice(ChoiceNode* that, int i) { 35361cb0ef41Sopenharmony_ci // Anything the following nodes need to know has to be known by 35371cb0ef41Sopenharmony_ci // this node also, so it can pass it on. 35381cb0ef41Sopenharmony_ci that->info()->AddFromFollowing(that->alternatives()->at(i).node()->info()); 35391cb0ef41Sopenharmony_ci } 35401cb0ef41Sopenharmony_ci 35411cb0ef41Sopenharmony_ci static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) { 35421cb0ef41Sopenharmony_ci that->info()->AddFromFollowing(that->continue_node()->info()); 35431cb0ef41Sopenharmony_ci } 35441cb0ef41Sopenharmony_ci 35451cb0ef41Sopenharmony_ci static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) { 35461cb0ef41Sopenharmony_ci that->info()->AddFromFollowing(that->loop_node()->info()); 35471cb0ef41Sopenharmony_ci } 35481cb0ef41Sopenharmony_ci 35491cb0ef41Sopenharmony_ci static void VisitNegativeLookaroundChoiceLookaroundNode( 35501cb0ef41Sopenharmony_ci NegativeLookaroundChoiceNode* that) { 35511cb0ef41Sopenharmony_ci VisitChoice(that, NegativeLookaroundChoiceNode::kLookaroundIndex); 35521cb0ef41Sopenharmony_ci } 35531cb0ef41Sopenharmony_ci 35541cb0ef41Sopenharmony_ci static void VisitNegativeLookaroundChoiceContinueNode( 35551cb0ef41Sopenharmony_ci NegativeLookaroundChoiceNode* that) { 35561cb0ef41Sopenharmony_ci VisitChoice(that, NegativeLookaroundChoiceNode::kContinueIndex); 35571cb0ef41Sopenharmony_ci } 35581cb0ef41Sopenharmony_ci 35591cb0ef41Sopenharmony_ci static void VisitBackReference(BackReferenceNode* that) {} 35601cb0ef41Sopenharmony_ci 35611cb0ef41Sopenharmony_ci static void VisitAssertion(AssertionNode* that) {} 35621cb0ef41Sopenharmony_ci}; 35631cb0ef41Sopenharmony_ci 35641cb0ef41Sopenharmony_ci// Propagates information about the minimum size of successful matches from 35651cb0ef41Sopenharmony_ci// successor nodes to their predecessors. Note that all eats_at_least values 35661cb0ef41Sopenharmony_ci// are initialized to zero before analysis. 35671cb0ef41Sopenharmony_ciclass EatsAtLeastPropagator : public AllStatic { 35681cb0ef41Sopenharmony_ci public: 35691cb0ef41Sopenharmony_ci static void VisitText(TextNode* that) { 35701cb0ef41Sopenharmony_ci // The eats_at_least value is not used if reading backward. 35711cb0ef41Sopenharmony_ci if (!that->read_backward()) { 35721cb0ef41Sopenharmony_ci // We are not at the start after this node, and thus we can use the 35731cb0ef41Sopenharmony_ci // successor's eats_at_least_from_not_start value. 35741cb0ef41Sopenharmony_ci uint8_t eats_at_least = base::saturated_cast<uint8_t>( 35751cb0ef41Sopenharmony_ci that->Length() + that->on_success() 35761cb0ef41Sopenharmony_ci ->eats_at_least_info() 35771cb0ef41Sopenharmony_ci ->eats_at_least_from_not_start); 35781cb0ef41Sopenharmony_ci that->set_eats_at_least_info(EatsAtLeastInfo(eats_at_least)); 35791cb0ef41Sopenharmony_ci } 35801cb0ef41Sopenharmony_ci } 35811cb0ef41Sopenharmony_ci 35821cb0ef41Sopenharmony_ci static void VisitAction(ActionNode* that) { 35831cb0ef41Sopenharmony_ci switch (that->action_type()) { 35841cb0ef41Sopenharmony_ci case ActionNode::BEGIN_POSITIVE_SUBMATCH: 35851cb0ef41Sopenharmony_ci case ActionNode::POSITIVE_SUBMATCH_SUCCESS: 35861cb0ef41Sopenharmony_ci // We do not propagate eats_at_least data through positive lookarounds, 35871cb0ef41Sopenharmony_ci // because they rewind input. 35881cb0ef41Sopenharmony_ci // TODO(v8:11859) Potential approaches for fixing this include: 35891cb0ef41Sopenharmony_ci // 1. Add a dedicated choice node for positive lookaround, similar to 35901cb0ef41Sopenharmony_ci // NegativeLookaroundChoiceNode. 35911cb0ef41Sopenharmony_ci // 2. Add an eats_at_least_inside_loop field to EatsAtLeastInfo, which 35921cb0ef41Sopenharmony_ci // is <= eats_at_least_from_possibly_start, and use that value in 35931cb0ef41Sopenharmony_ci // EatsAtLeastFromLoopEntry. 35941cb0ef41Sopenharmony_ci DCHECK(that->eats_at_least_info()->IsZero()); 35951cb0ef41Sopenharmony_ci break; 35961cb0ef41Sopenharmony_ci case ActionNode::SET_REGISTER_FOR_LOOP: 35971cb0ef41Sopenharmony_ci // SET_REGISTER_FOR_LOOP indicates a loop entry point, which means the 35981cb0ef41Sopenharmony_ci // loop body will run at least the minimum number of times before the 35991cb0ef41Sopenharmony_ci // continuation case can run. 36001cb0ef41Sopenharmony_ci that->set_eats_at_least_info( 36011cb0ef41Sopenharmony_ci that->on_success()->EatsAtLeastFromLoopEntry()); 36021cb0ef41Sopenharmony_ci break; 36031cb0ef41Sopenharmony_ci case ActionNode::BEGIN_NEGATIVE_SUBMATCH: 36041cb0ef41Sopenharmony_ci default: 36051cb0ef41Sopenharmony_ci // Otherwise, the current node eats at least as much as its successor. 36061cb0ef41Sopenharmony_ci // Note: we can propagate eats_at_least data for BEGIN_NEGATIVE_SUBMATCH 36071cb0ef41Sopenharmony_ci // because NegativeLookaroundChoiceNode ignores its lookaround successor 36081cb0ef41Sopenharmony_ci // when computing eats-at-least and quick check information. 36091cb0ef41Sopenharmony_ci that->set_eats_at_least_info(*that->on_success()->eats_at_least_info()); 36101cb0ef41Sopenharmony_ci break; 36111cb0ef41Sopenharmony_ci } 36121cb0ef41Sopenharmony_ci } 36131cb0ef41Sopenharmony_ci 36141cb0ef41Sopenharmony_ci static void VisitChoice(ChoiceNode* that, int i) { 36151cb0ef41Sopenharmony_ci // The minimum possible match from a choice node is the minimum of its 36161cb0ef41Sopenharmony_ci // successors. 36171cb0ef41Sopenharmony_ci EatsAtLeastInfo eats_at_least = 36181cb0ef41Sopenharmony_ci i == 0 ? EatsAtLeastInfo(UINT8_MAX) : *that->eats_at_least_info(); 36191cb0ef41Sopenharmony_ci eats_at_least.SetMin( 36201cb0ef41Sopenharmony_ci *that->alternatives()->at(i).node()->eats_at_least_info()); 36211cb0ef41Sopenharmony_ci that->set_eats_at_least_info(eats_at_least); 36221cb0ef41Sopenharmony_ci } 36231cb0ef41Sopenharmony_ci 36241cb0ef41Sopenharmony_ci static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) { 36251cb0ef41Sopenharmony_ci if (!that->read_backward()) { 36261cb0ef41Sopenharmony_ci that->set_eats_at_least_info( 36271cb0ef41Sopenharmony_ci *that->continue_node()->eats_at_least_info()); 36281cb0ef41Sopenharmony_ci } 36291cb0ef41Sopenharmony_ci } 36301cb0ef41Sopenharmony_ci 36311cb0ef41Sopenharmony_ci static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {} 36321cb0ef41Sopenharmony_ci 36331cb0ef41Sopenharmony_ci static void VisitNegativeLookaroundChoiceLookaroundNode( 36341cb0ef41Sopenharmony_ci NegativeLookaroundChoiceNode* that) {} 36351cb0ef41Sopenharmony_ci 36361cb0ef41Sopenharmony_ci static void VisitNegativeLookaroundChoiceContinueNode( 36371cb0ef41Sopenharmony_ci NegativeLookaroundChoiceNode* that) { 36381cb0ef41Sopenharmony_ci that->set_eats_at_least_info(*that->continue_node()->eats_at_least_info()); 36391cb0ef41Sopenharmony_ci } 36401cb0ef41Sopenharmony_ci 36411cb0ef41Sopenharmony_ci static void VisitBackReference(BackReferenceNode* that) { 36421cb0ef41Sopenharmony_ci if (!that->read_backward()) { 36431cb0ef41Sopenharmony_ci that->set_eats_at_least_info(*that->on_success()->eats_at_least_info()); 36441cb0ef41Sopenharmony_ci } 36451cb0ef41Sopenharmony_ci } 36461cb0ef41Sopenharmony_ci 36471cb0ef41Sopenharmony_ci static void VisitAssertion(AssertionNode* that) { 36481cb0ef41Sopenharmony_ci EatsAtLeastInfo eats_at_least = *that->on_success()->eats_at_least_info(); 36491cb0ef41Sopenharmony_ci if (that->assertion_type() == AssertionNode::AT_START) { 36501cb0ef41Sopenharmony_ci // If we know we are not at the start and we are asked "how many 36511cb0ef41Sopenharmony_ci // characters will you match if you succeed?" then we can answer anything 36521cb0ef41Sopenharmony_ci // since false implies false. So let's just set the max answer 36531cb0ef41Sopenharmony_ci // (UINT8_MAX) since that won't prevent us from preloading a lot of 36541cb0ef41Sopenharmony_ci // characters for the other branches in the node graph. 36551cb0ef41Sopenharmony_ci eats_at_least.eats_at_least_from_not_start = UINT8_MAX; 36561cb0ef41Sopenharmony_ci } 36571cb0ef41Sopenharmony_ci that->set_eats_at_least_info(eats_at_least); 36581cb0ef41Sopenharmony_ci } 36591cb0ef41Sopenharmony_ci}; 36601cb0ef41Sopenharmony_ci 36611cb0ef41Sopenharmony_ci} // namespace 36621cb0ef41Sopenharmony_ci 36631cb0ef41Sopenharmony_ci// ------------------------------------------------------------------- 36641cb0ef41Sopenharmony_ci// Analysis 36651cb0ef41Sopenharmony_ci 36661cb0ef41Sopenharmony_ci// Iterates the node graph and provides the opportunity for propagators to set 36671cb0ef41Sopenharmony_ci// values that depend on successor nodes. 36681cb0ef41Sopenharmony_citemplate <typename... Propagators> 36691cb0ef41Sopenharmony_ciclass Analysis : public NodeVisitor { 36701cb0ef41Sopenharmony_ci public: 36711cb0ef41Sopenharmony_ci Analysis(Isolate* isolate, bool is_one_byte, RegExpFlags flags) 36721cb0ef41Sopenharmony_ci : isolate_(isolate), 36731cb0ef41Sopenharmony_ci is_one_byte_(is_one_byte), 36741cb0ef41Sopenharmony_ci flags_(flags), 36751cb0ef41Sopenharmony_ci error_(RegExpError::kNone) {} 36761cb0ef41Sopenharmony_ci 36771cb0ef41Sopenharmony_ci void EnsureAnalyzed(RegExpNode* that) { 36781cb0ef41Sopenharmony_ci StackLimitCheck check(isolate()); 36791cb0ef41Sopenharmony_ci if (check.HasOverflowed()) { 36801cb0ef41Sopenharmony_ci if (FLAG_correctness_fuzzer_suppressions) { 36811cb0ef41Sopenharmony_ci FATAL("Analysis: Aborting on stack overflow"); 36821cb0ef41Sopenharmony_ci } 36831cb0ef41Sopenharmony_ci fail(RegExpError::kAnalysisStackOverflow); 36841cb0ef41Sopenharmony_ci return; 36851cb0ef41Sopenharmony_ci } 36861cb0ef41Sopenharmony_ci if (that->info()->been_analyzed || that->info()->being_analyzed) return; 36871cb0ef41Sopenharmony_ci that->info()->being_analyzed = true; 36881cb0ef41Sopenharmony_ci that->Accept(this); 36891cb0ef41Sopenharmony_ci that->info()->being_analyzed = false; 36901cb0ef41Sopenharmony_ci that->info()->been_analyzed = true; 36911cb0ef41Sopenharmony_ci } 36921cb0ef41Sopenharmony_ci 36931cb0ef41Sopenharmony_ci bool has_failed() { return error_ != RegExpError::kNone; } 36941cb0ef41Sopenharmony_ci RegExpError error() { 36951cb0ef41Sopenharmony_ci DCHECK(error_ != RegExpError::kNone); 36961cb0ef41Sopenharmony_ci return error_; 36971cb0ef41Sopenharmony_ci } 36981cb0ef41Sopenharmony_ci void fail(RegExpError error) { error_ = error; } 36991cb0ef41Sopenharmony_ci 37001cb0ef41Sopenharmony_ci Isolate* isolate() const { return isolate_; } 37011cb0ef41Sopenharmony_ci 37021cb0ef41Sopenharmony_ci void VisitEnd(EndNode* that) override { 37031cb0ef41Sopenharmony_ci // nothing to do 37041cb0ef41Sopenharmony_ci } 37051cb0ef41Sopenharmony_ci 37061cb0ef41Sopenharmony_ci// Used to call the given static function on each propagator / variadic template 37071cb0ef41Sopenharmony_ci// argument. 37081cb0ef41Sopenharmony_ci#define STATIC_FOR_EACH(expr) \ 37091cb0ef41Sopenharmony_ci do { \ 37101cb0ef41Sopenharmony_ci int dummy[] = {((expr), 0)...}; \ 37111cb0ef41Sopenharmony_ci USE(dummy); \ 37121cb0ef41Sopenharmony_ci } while (false) 37131cb0ef41Sopenharmony_ci 37141cb0ef41Sopenharmony_ci void VisitText(TextNode* that) override { 37151cb0ef41Sopenharmony_ci that->MakeCaseIndependent(isolate(), is_one_byte_, flags_); 37161cb0ef41Sopenharmony_ci EnsureAnalyzed(that->on_success()); 37171cb0ef41Sopenharmony_ci if (has_failed()) return; 37181cb0ef41Sopenharmony_ci that->CalculateOffsets(); 37191cb0ef41Sopenharmony_ci STATIC_FOR_EACH(Propagators::VisitText(that)); 37201cb0ef41Sopenharmony_ci } 37211cb0ef41Sopenharmony_ci 37221cb0ef41Sopenharmony_ci void VisitAction(ActionNode* that) override { 37231cb0ef41Sopenharmony_ci EnsureAnalyzed(that->on_success()); 37241cb0ef41Sopenharmony_ci if (has_failed()) return; 37251cb0ef41Sopenharmony_ci STATIC_FOR_EACH(Propagators::VisitAction(that)); 37261cb0ef41Sopenharmony_ci } 37271cb0ef41Sopenharmony_ci 37281cb0ef41Sopenharmony_ci void VisitChoice(ChoiceNode* that) override { 37291cb0ef41Sopenharmony_ci for (int i = 0; i < that->alternatives()->length(); i++) { 37301cb0ef41Sopenharmony_ci EnsureAnalyzed(that->alternatives()->at(i).node()); 37311cb0ef41Sopenharmony_ci if (has_failed()) return; 37321cb0ef41Sopenharmony_ci STATIC_FOR_EACH(Propagators::VisitChoice(that, i)); 37331cb0ef41Sopenharmony_ci } 37341cb0ef41Sopenharmony_ci } 37351cb0ef41Sopenharmony_ci 37361cb0ef41Sopenharmony_ci void VisitLoopChoice(LoopChoiceNode* that) override { 37371cb0ef41Sopenharmony_ci DCHECK_EQ(that->alternatives()->length(), 2); // Just loop and continue. 37381cb0ef41Sopenharmony_ci 37391cb0ef41Sopenharmony_ci // First propagate all information from the continuation node. 37401cb0ef41Sopenharmony_ci EnsureAnalyzed(that->continue_node()); 37411cb0ef41Sopenharmony_ci if (has_failed()) return; 37421cb0ef41Sopenharmony_ci STATIC_FOR_EACH(Propagators::VisitLoopChoiceContinueNode(that)); 37431cb0ef41Sopenharmony_ci 37441cb0ef41Sopenharmony_ci // Check the loop last since it may need the value of this node 37451cb0ef41Sopenharmony_ci // to get a correct result. 37461cb0ef41Sopenharmony_ci EnsureAnalyzed(that->loop_node()); 37471cb0ef41Sopenharmony_ci if (has_failed()) return; 37481cb0ef41Sopenharmony_ci STATIC_FOR_EACH(Propagators::VisitLoopChoiceLoopNode(that)); 37491cb0ef41Sopenharmony_ci } 37501cb0ef41Sopenharmony_ci 37511cb0ef41Sopenharmony_ci void VisitNegativeLookaroundChoice( 37521cb0ef41Sopenharmony_ci NegativeLookaroundChoiceNode* that) override { 37531cb0ef41Sopenharmony_ci DCHECK_EQ(that->alternatives()->length(), 2); // Lookaround and continue. 37541cb0ef41Sopenharmony_ci 37551cb0ef41Sopenharmony_ci EnsureAnalyzed(that->lookaround_node()); 37561cb0ef41Sopenharmony_ci if (has_failed()) return; 37571cb0ef41Sopenharmony_ci STATIC_FOR_EACH( 37581cb0ef41Sopenharmony_ci Propagators::VisitNegativeLookaroundChoiceLookaroundNode(that)); 37591cb0ef41Sopenharmony_ci 37601cb0ef41Sopenharmony_ci EnsureAnalyzed(that->continue_node()); 37611cb0ef41Sopenharmony_ci if (has_failed()) return; 37621cb0ef41Sopenharmony_ci STATIC_FOR_EACH( 37631cb0ef41Sopenharmony_ci Propagators::VisitNegativeLookaroundChoiceContinueNode(that)); 37641cb0ef41Sopenharmony_ci } 37651cb0ef41Sopenharmony_ci 37661cb0ef41Sopenharmony_ci void VisitBackReference(BackReferenceNode* that) override { 37671cb0ef41Sopenharmony_ci EnsureAnalyzed(that->on_success()); 37681cb0ef41Sopenharmony_ci if (has_failed()) return; 37691cb0ef41Sopenharmony_ci STATIC_FOR_EACH(Propagators::VisitBackReference(that)); 37701cb0ef41Sopenharmony_ci } 37711cb0ef41Sopenharmony_ci 37721cb0ef41Sopenharmony_ci void VisitAssertion(AssertionNode* that) override { 37731cb0ef41Sopenharmony_ci EnsureAnalyzed(that->on_success()); 37741cb0ef41Sopenharmony_ci if (has_failed()) return; 37751cb0ef41Sopenharmony_ci STATIC_FOR_EACH(Propagators::VisitAssertion(that)); 37761cb0ef41Sopenharmony_ci } 37771cb0ef41Sopenharmony_ci 37781cb0ef41Sopenharmony_ci#undef STATIC_FOR_EACH 37791cb0ef41Sopenharmony_ci 37801cb0ef41Sopenharmony_ci private: 37811cb0ef41Sopenharmony_ci Isolate* isolate_; 37821cb0ef41Sopenharmony_ci const bool is_one_byte_; 37831cb0ef41Sopenharmony_ci const RegExpFlags flags_; 37841cb0ef41Sopenharmony_ci RegExpError error_; 37851cb0ef41Sopenharmony_ci 37861cb0ef41Sopenharmony_ci DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); 37871cb0ef41Sopenharmony_ci}; 37881cb0ef41Sopenharmony_ci 37891cb0ef41Sopenharmony_ciRegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpFlags flags, 37901cb0ef41Sopenharmony_ci RegExpNode* node) { 37911cb0ef41Sopenharmony_ci Analysis<AssertionPropagator, EatsAtLeastPropagator> analysis( 37921cb0ef41Sopenharmony_ci isolate, is_one_byte, flags); 37931cb0ef41Sopenharmony_ci DCHECK_EQ(node->info()->been_analyzed, false); 37941cb0ef41Sopenharmony_ci analysis.EnsureAnalyzed(node); 37951cb0ef41Sopenharmony_ci DCHECK_IMPLIES(analysis.has_failed(), analysis.error() != RegExpError::kNone); 37961cb0ef41Sopenharmony_ci return analysis.has_failed() ? analysis.error() : RegExpError::kNone; 37971cb0ef41Sopenharmony_ci} 37981cb0ef41Sopenharmony_ci 37991cb0ef41Sopenharmony_civoid BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, 38001cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm, 38011cb0ef41Sopenharmony_ci bool not_at_start) { 38021cb0ef41Sopenharmony_ci // Working out the set of characters that a backreference can match is too 38031cb0ef41Sopenharmony_ci // hard, so we just say that any character can match. 38041cb0ef41Sopenharmony_ci bm->SetRest(offset); 38051cb0ef41Sopenharmony_ci SaveBMInfo(bm, not_at_start, offset); 38061cb0ef41Sopenharmony_ci} 38071cb0ef41Sopenharmony_ci 38081cb0ef41Sopenharmony_ciSTATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == 38091cb0ef41Sopenharmony_ci RegExpMacroAssembler::kTableSize); 38101cb0ef41Sopenharmony_ci 38111cb0ef41Sopenharmony_civoid ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, 38121cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm, bool not_at_start) { 38131cb0ef41Sopenharmony_ci ZoneList<GuardedAlternative>* alts = alternatives(); 38141cb0ef41Sopenharmony_ci budget = (budget - 1) / alts->length(); 38151cb0ef41Sopenharmony_ci for (int i = 0; i < alts->length(); i++) { 38161cb0ef41Sopenharmony_ci GuardedAlternative& alt = alts->at(i); 38171cb0ef41Sopenharmony_ci if (alt.guards() != nullptr && alt.guards()->length() != 0) { 38181cb0ef41Sopenharmony_ci bm->SetRest(offset); // Give up trying to fill in info. 38191cb0ef41Sopenharmony_ci SaveBMInfo(bm, not_at_start, offset); 38201cb0ef41Sopenharmony_ci return; 38211cb0ef41Sopenharmony_ci } 38221cb0ef41Sopenharmony_ci alt.node()->FillInBMInfo(isolate, offset, budget, bm, not_at_start); 38231cb0ef41Sopenharmony_ci } 38241cb0ef41Sopenharmony_ci SaveBMInfo(bm, not_at_start, offset); 38251cb0ef41Sopenharmony_ci} 38261cb0ef41Sopenharmony_ci 38271cb0ef41Sopenharmony_civoid TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget, 38281cb0ef41Sopenharmony_ci BoyerMooreLookahead* bm, bool not_at_start) { 38291cb0ef41Sopenharmony_ci if (initial_offset >= bm->length()) return; 38301cb0ef41Sopenharmony_ci int offset = initial_offset; 38311cb0ef41Sopenharmony_ci int max_char = bm->max_char(); 38321cb0ef41Sopenharmony_ci for (int i = 0; i < elements()->length(); i++) { 38331cb0ef41Sopenharmony_ci if (offset >= bm->length()) { 38341cb0ef41Sopenharmony_ci if (initial_offset == 0) set_bm_info(not_at_start, bm); 38351cb0ef41Sopenharmony_ci return; 38361cb0ef41Sopenharmony_ci } 38371cb0ef41Sopenharmony_ci TextElement text = elements()->at(i); 38381cb0ef41Sopenharmony_ci if (text.text_type() == TextElement::ATOM) { 38391cb0ef41Sopenharmony_ci RegExpAtom* atom = text.atom(); 38401cb0ef41Sopenharmony_ci for (int j = 0; j < atom->length(); j++, offset++) { 38411cb0ef41Sopenharmony_ci if (offset >= bm->length()) { 38421cb0ef41Sopenharmony_ci if (initial_offset == 0) set_bm_info(not_at_start, bm); 38431cb0ef41Sopenharmony_ci return; 38441cb0ef41Sopenharmony_ci } 38451cb0ef41Sopenharmony_ci base::uc16 character = atom->data()[j]; 38461cb0ef41Sopenharmony_ci if (IsIgnoreCase(bm->compiler()->flags())) { 38471cb0ef41Sopenharmony_ci unibrow::uchar chars[4]; 38481cb0ef41Sopenharmony_ci int length = GetCaseIndependentLetters( 38491cb0ef41Sopenharmony_ci isolate, character, bm->max_char() == String::kMaxOneByteCharCode, 38501cb0ef41Sopenharmony_ci chars, 4); 38511cb0ef41Sopenharmony_ci for (int k = 0; k < length; k++) { 38521cb0ef41Sopenharmony_ci bm->Set(offset, chars[k]); 38531cb0ef41Sopenharmony_ci } 38541cb0ef41Sopenharmony_ci } else { 38551cb0ef41Sopenharmony_ci if (character <= max_char) bm->Set(offset, character); 38561cb0ef41Sopenharmony_ci } 38571cb0ef41Sopenharmony_ci } 38581cb0ef41Sopenharmony_ci } else { 38591cb0ef41Sopenharmony_ci DCHECK_EQ(TextElement::CHAR_CLASS, text.text_type()); 38601cb0ef41Sopenharmony_ci RegExpCharacterClass* char_class = text.char_class(); 38611cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* ranges = char_class->ranges(zone()); 38621cb0ef41Sopenharmony_ci if (char_class->is_negated()) { 38631cb0ef41Sopenharmony_ci bm->SetAll(offset); 38641cb0ef41Sopenharmony_ci } else { 38651cb0ef41Sopenharmony_ci for (int k = 0; k < ranges->length(); k++) { 38661cb0ef41Sopenharmony_ci CharacterRange& range = ranges->at(k); 38671cb0ef41Sopenharmony_ci if (static_cast<int>(range.from()) > max_char) continue; 38681cb0ef41Sopenharmony_ci int to = std::min(max_char, static_cast<int>(range.to())); 38691cb0ef41Sopenharmony_ci bm->SetInterval(offset, Interval(range.from(), to)); 38701cb0ef41Sopenharmony_ci } 38711cb0ef41Sopenharmony_ci } 38721cb0ef41Sopenharmony_ci offset++; 38731cb0ef41Sopenharmony_ci } 38741cb0ef41Sopenharmony_ci } 38751cb0ef41Sopenharmony_ci if (offset >= bm->length()) { 38761cb0ef41Sopenharmony_ci if (initial_offset == 0) set_bm_info(not_at_start, bm); 38771cb0ef41Sopenharmony_ci return; 38781cb0ef41Sopenharmony_ci } 38791cb0ef41Sopenharmony_ci on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, 38801cb0ef41Sopenharmony_ci true); // Not at start after a text node. 38811cb0ef41Sopenharmony_ci if (initial_offset == 0) set_bm_info(not_at_start, bm); 38821cb0ef41Sopenharmony_ci} 38831cb0ef41Sopenharmony_ci 38841cb0ef41Sopenharmony_ciRegExpNode* RegExpCompiler::OptionallyStepBackToLeadSurrogate( 38851cb0ef41Sopenharmony_ci RegExpNode* on_success) { 38861cb0ef41Sopenharmony_ci DCHECK(!read_backward()); 38871cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( 38881cb0ef41Sopenharmony_ci zone(), CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); 38891cb0ef41Sopenharmony_ci ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( 38901cb0ef41Sopenharmony_ci zone(), CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); 38911cb0ef41Sopenharmony_ci 38921cb0ef41Sopenharmony_ci ChoiceNode* optional_step_back = zone()->New<ChoiceNode>(2, zone()); 38931cb0ef41Sopenharmony_ci 38941cb0ef41Sopenharmony_ci int stack_register = UnicodeLookaroundStackRegister(); 38951cb0ef41Sopenharmony_ci int position_register = UnicodeLookaroundPositionRegister(); 38961cb0ef41Sopenharmony_ci RegExpNode* step_back = TextNode::CreateForCharacterRanges( 38971cb0ef41Sopenharmony_ci zone(), lead_surrogates, true, on_success); 38981cb0ef41Sopenharmony_ci RegExpLookaround::Builder builder(true, step_back, stack_register, 38991cb0ef41Sopenharmony_ci position_register); 39001cb0ef41Sopenharmony_ci RegExpNode* match_trail = TextNode::CreateForCharacterRanges( 39011cb0ef41Sopenharmony_ci zone(), trail_surrogates, false, builder.on_match_success()); 39021cb0ef41Sopenharmony_ci 39031cb0ef41Sopenharmony_ci optional_step_back->AddAlternative( 39041cb0ef41Sopenharmony_ci GuardedAlternative(builder.ForMatch(match_trail))); 39051cb0ef41Sopenharmony_ci optional_step_back->AddAlternative(GuardedAlternative(on_success)); 39061cb0ef41Sopenharmony_ci 39071cb0ef41Sopenharmony_ci return optional_step_back; 39081cb0ef41Sopenharmony_ci} 39091cb0ef41Sopenharmony_ci 39101cb0ef41Sopenharmony_ciRegExpNode* RegExpCompiler::PreprocessRegExp(RegExpCompileData* data, 39111cb0ef41Sopenharmony_ci RegExpFlags flags, 39121cb0ef41Sopenharmony_ci bool is_one_byte) { 39131cb0ef41Sopenharmony_ci // Wrap the body of the regexp in capture #0. 39141cb0ef41Sopenharmony_ci RegExpNode* captured_body = 39151cb0ef41Sopenharmony_ci RegExpCapture::ToNode(data->tree, 0, this, accept()); 39161cb0ef41Sopenharmony_ci RegExpNode* node = captured_body; 39171cb0ef41Sopenharmony_ci if (!data->tree->IsAnchoredAtStart() && !IsSticky(flags)) { 39181cb0ef41Sopenharmony_ci // Add a .*? at the beginning, outside the body capture, unless 39191cb0ef41Sopenharmony_ci // this expression is anchored at the beginning or sticky. 39201cb0ef41Sopenharmony_ci RegExpNode* loop_node = RegExpQuantifier::ToNode( 39211cb0ef41Sopenharmony_ci 0, RegExpTree::kInfinity, false, 39221cb0ef41Sopenharmony_ci zone()->New<RegExpCharacterClass>(StandardCharacterSet::kEverything), 39231cb0ef41Sopenharmony_ci this, captured_body, data->contains_anchor); 39241cb0ef41Sopenharmony_ci 39251cb0ef41Sopenharmony_ci if (data->contains_anchor) { 39261cb0ef41Sopenharmony_ci // Unroll loop once, to take care of the case that might start 39271cb0ef41Sopenharmony_ci // at the start of input. 39281cb0ef41Sopenharmony_ci ChoiceNode* first_step_node = zone()->New<ChoiceNode>(2, zone()); 39291cb0ef41Sopenharmony_ci first_step_node->AddAlternative(GuardedAlternative(captured_body)); 39301cb0ef41Sopenharmony_ci first_step_node->AddAlternative(GuardedAlternative(zone()->New<TextNode>( 39311cb0ef41Sopenharmony_ci zone()->New<RegExpCharacterClass>(StandardCharacterSet::kEverything), 39321cb0ef41Sopenharmony_ci false, loop_node))); 39331cb0ef41Sopenharmony_ci node = first_step_node; 39341cb0ef41Sopenharmony_ci } else { 39351cb0ef41Sopenharmony_ci node = loop_node; 39361cb0ef41Sopenharmony_ci } 39371cb0ef41Sopenharmony_ci } 39381cb0ef41Sopenharmony_ci if (is_one_byte) { 39391cb0ef41Sopenharmony_ci node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags); 39401cb0ef41Sopenharmony_ci // Do it again to propagate the new nodes to places where they were not 39411cb0ef41Sopenharmony_ci // put because they had not been calculated yet. 39421cb0ef41Sopenharmony_ci if (node != nullptr) { 39431cb0ef41Sopenharmony_ci node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags); 39441cb0ef41Sopenharmony_ci } 39451cb0ef41Sopenharmony_ci } else if (IsUnicode(flags) && (IsGlobal(flags) || IsSticky(flags))) { 39461cb0ef41Sopenharmony_ci node = OptionallyStepBackToLeadSurrogate(node); 39471cb0ef41Sopenharmony_ci } 39481cb0ef41Sopenharmony_ci 39491cb0ef41Sopenharmony_ci if (node == nullptr) node = zone()->New<EndNode>(EndNode::BACKTRACK, zone()); 39501cb0ef41Sopenharmony_ci return node; 39511cb0ef41Sopenharmony_ci} 39521cb0ef41Sopenharmony_ci 39531cb0ef41Sopenharmony_civoid RegExpCompiler::ToNodeCheckForStackOverflow() { 39541cb0ef41Sopenharmony_ci if (StackLimitCheck{isolate()}.HasOverflowed()) { 39551cb0ef41Sopenharmony_ci FatalProcessOutOfMemory(isolate(), "RegExpCompiler"); 39561cb0ef41Sopenharmony_ci } 39571cb0ef41Sopenharmony_ci} 39581cb0ef41Sopenharmony_ci 39591cb0ef41Sopenharmony_ci} // namespace internal 39601cb0ef41Sopenharmony_ci} // namespace v8 3961