11cb0ef41Sopenharmony_ci// Copyright 2019 the V8 project authors. All rights reserved.
21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be
31cb0ef41Sopenharmony_ci// found in the LICENSE file.
41cb0ef41Sopenharmony_ci
51cb0ef41Sopenharmony_ci#include "src/regexp/regexp-compiler.h"
61cb0ef41Sopenharmony_ci
71cb0ef41Sopenharmony_ci#include "src/base/safe_conversions.h"
81cb0ef41Sopenharmony_ci#include "src/execution/isolate.h"
91cb0ef41Sopenharmony_ci#include "src/objects/fixed-array-inl.h"
101cb0ef41Sopenharmony_ci#include "src/regexp/regexp-macro-assembler-arch.h"
111cb0ef41Sopenharmony_ci#include "src/strings/unicode-inl.h"
121cb0ef41Sopenharmony_ci#include "src/zone/zone-list-inl.h"
131cb0ef41Sopenharmony_ci
141cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT
151cb0ef41Sopenharmony_ci#include "src/regexp/special-case.h"
161cb0ef41Sopenharmony_ci#include "unicode/locid.h"
171cb0ef41Sopenharmony_ci#include "unicode/uniset.h"
181cb0ef41Sopenharmony_ci#include "unicode/utypes.h"
191cb0ef41Sopenharmony_ci#endif  // V8_INTL_SUPPORT
201cb0ef41Sopenharmony_ci
211cb0ef41Sopenharmony_cinamespace v8 {
221cb0ef41Sopenharmony_cinamespace internal {
231cb0ef41Sopenharmony_ci
241cb0ef41Sopenharmony_ciusing namespace regexp_compiler_constants;  // NOLINT(build/namespaces)
251cb0ef41Sopenharmony_ci
261cb0ef41Sopenharmony_ci// -------------------------------------------------------------------
271cb0ef41Sopenharmony_ci// Implementation of the Irregexp regular expression engine.
281cb0ef41Sopenharmony_ci//
291cb0ef41Sopenharmony_ci// The Irregexp regular expression engine is intended to be a complete
301cb0ef41Sopenharmony_ci// implementation of ECMAScript regular expressions.  It generates either
311cb0ef41Sopenharmony_ci// bytecodes or native code.
321cb0ef41Sopenharmony_ci
331cb0ef41Sopenharmony_ci//   The Irregexp regexp engine is structured in three steps.
341cb0ef41Sopenharmony_ci//   1) The parser generates an abstract syntax tree.  See ast.cc.
351cb0ef41Sopenharmony_ci//   2) From the AST a node network is created.  The nodes are all
361cb0ef41Sopenharmony_ci//      subclasses of RegExpNode.  The nodes represent states when
371cb0ef41Sopenharmony_ci//      executing a regular expression.  Several optimizations are
381cb0ef41Sopenharmony_ci//      performed on the node network.
391cb0ef41Sopenharmony_ci//   3) From the nodes we generate either byte codes or native code
401cb0ef41Sopenharmony_ci//      that can actually execute the regular expression (perform
411cb0ef41Sopenharmony_ci//      the search).  The code generation step is described in more
421cb0ef41Sopenharmony_ci//      detail below.
431cb0ef41Sopenharmony_ci
441cb0ef41Sopenharmony_ci// Code generation.
451cb0ef41Sopenharmony_ci//
461cb0ef41Sopenharmony_ci//   The nodes are divided into four main categories.
471cb0ef41Sopenharmony_ci//   * Choice nodes
481cb0ef41Sopenharmony_ci//        These represent places where the regular expression can
491cb0ef41Sopenharmony_ci//        match in more than one way.  For example on entry to an
501cb0ef41Sopenharmony_ci//        alternation (foo|bar) or a repetition (*, +, ? or {}).
511cb0ef41Sopenharmony_ci//   * Action nodes
521cb0ef41Sopenharmony_ci//        These represent places where some action should be
531cb0ef41Sopenharmony_ci//        performed.  Examples include recording the current position
541cb0ef41Sopenharmony_ci//        in the input string to a register (in order to implement
551cb0ef41Sopenharmony_ci//        captures) or other actions on register for example in order
561cb0ef41Sopenharmony_ci//        to implement the counters needed for {} repetitions.
571cb0ef41Sopenharmony_ci//   * Matching nodes
581cb0ef41Sopenharmony_ci//        These attempt to match some element part of the input string.
591cb0ef41Sopenharmony_ci//        Examples of elements include character classes, plain strings
601cb0ef41Sopenharmony_ci//        or back references.
611cb0ef41Sopenharmony_ci//   * End nodes
621cb0ef41Sopenharmony_ci//        These are used to implement the actions required on finding
631cb0ef41Sopenharmony_ci//        a successful match or failing to find a match.
641cb0ef41Sopenharmony_ci//
651cb0ef41Sopenharmony_ci//   The code generated (whether as byte codes or native code) maintains
661cb0ef41Sopenharmony_ci//   some state as it runs.  This consists of the following elements:
671cb0ef41Sopenharmony_ci//
681cb0ef41Sopenharmony_ci//   * The capture registers.  Used for string captures.
691cb0ef41Sopenharmony_ci//   * Other registers.  Used for counters etc.
701cb0ef41Sopenharmony_ci//   * The current position.
711cb0ef41Sopenharmony_ci//   * The stack of backtracking information.  Used when a matching node
721cb0ef41Sopenharmony_ci//     fails to find a match and needs to try an alternative.
731cb0ef41Sopenharmony_ci//
741cb0ef41Sopenharmony_ci// Conceptual regular expression execution model:
751cb0ef41Sopenharmony_ci//
761cb0ef41Sopenharmony_ci//   There is a simple conceptual model of regular expression execution
771cb0ef41Sopenharmony_ci//   which will be presented first.  The actual code generated is a more
781cb0ef41Sopenharmony_ci//   efficient simulation of the simple conceptual model:
791cb0ef41Sopenharmony_ci//
801cb0ef41Sopenharmony_ci//   * Choice nodes are implemented as follows:
811cb0ef41Sopenharmony_ci//     For each choice except the last {
821cb0ef41Sopenharmony_ci//       push current position
831cb0ef41Sopenharmony_ci//       push backtrack code location
841cb0ef41Sopenharmony_ci//       <generate code to test for choice>
851cb0ef41Sopenharmony_ci//       backtrack code location:
861cb0ef41Sopenharmony_ci//       pop current position
871cb0ef41Sopenharmony_ci//     }
881cb0ef41Sopenharmony_ci//     <generate code to test for last choice>
891cb0ef41Sopenharmony_ci//
901cb0ef41Sopenharmony_ci//   * Actions nodes are generated as follows
911cb0ef41Sopenharmony_ci//     <push affected registers on backtrack stack>
921cb0ef41Sopenharmony_ci//     <generate code to perform action>
931cb0ef41Sopenharmony_ci//     push backtrack code location
941cb0ef41Sopenharmony_ci//     <generate code to test for following nodes>
951cb0ef41Sopenharmony_ci//     backtrack code location:
961cb0ef41Sopenharmony_ci//     <pop affected registers to restore their state>
971cb0ef41Sopenharmony_ci//     <pop backtrack location from stack and go to it>
981cb0ef41Sopenharmony_ci//
991cb0ef41Sopenharmony_ci//   * Matching nodes are generated as follows:
1001cb0ef41Sopenharmony_ci//     if input string matches at current position
1011cb0ef41Sopenharmony_ci//       update current position
1021cb0ef41Sopenharmony_ci//       <generate code to test for following nodes>
1031cb0ef41Sopenharmony_ci//     else
1041cb0ef41Sopenharmony_ci//       <pop backtrack location from stack and go to it>
1051cb0ef41Sopenharmony_ci//
1061cb0ef41Sopenharmony_ci//   Thus it can be seen that the current position is saved and restored
1071cb0ef41Sopenharmony_ci//   by the choice nodes, whereas the registers are saved and restored by
1081cb0ef41Sopenharmony_ci//   by the action nodes that manipulate them.
1091cb0ef41Sopenharmony_ci//
1101cb0ef41Sopenharmony_ci//   The other interesting aspect of this model is that nodes are generated
1111cb0ef41Sopenharmony_ci//   at the point where they are needed by a recursive call to Emit().  If
1121cb0ef41Sopenharmony_ci//   the node has already been code generated then the Emit() call will
1131cb0ef41Sopenharmony_ci//   generate a jump to the previously generated code instead.  In order to
1141cb0ef41Sopenharmony_ci//   limit recursion it is possible for the Emit() function to put the node
1151cb0ef41Sopenharmony_ci//   on a work list for later generation and instead generate a jump.  The
1161cb0ef41Sopenharmony_ci//   destination of the jump is resolved later when the code is generated.
1171cb0ef41Sopenharmony_ci//
1181cb0ef41Sopenharmony_ci// Actual regular expression code generation.
1191cb0ef41Sopenharmony_ci//
1201cb0ef41Sopenharmony_ci//   Code generation is actually more complicated than the above.  In order
1211cb0ef41Sopenharmony_ci//   to improve the efficiency of the generated code some optimizations are
1221cb0ef41Sopenharmony_ci//   performed
1231cb0ef41Sopenharmony_ci//
1241cb0ef41Sopenharmony_ci//   * Choice nodes have 1-character lookahead.
1251cb0ef41Sopenharmony_ci//     A choice node looks at the following character and eliminates some of
1261cb0ef41Sopenharmony_ci//     the choices immediately based on that character.  This is not yet
1271cb0ef41Sopenharmony_ci//     implemented.
1281cb0ef41Sopenharmony_ci//   * Simple greedy loops store reduced backtracking information.
1291cb0ef41Sopenharmony_ci//     A quantifier like /.*foo/m will greedily match the whole input.  It will
1301cb0ef41Sopenharmony_ci//     then need to backtrack to a point where it can match "foo".  The naive
1311cb0ef41Sopenharmony_ci//     implementation of this would push each character position onto the
1321cb0ef41Sopenharmony_ci//     backtracking stack, then pop them off one by one.  This would use space
1331cb0ef41Sopenharmony_ci//     proportional to the length of the input string.  However since the "."
1341cb0ef41Sopenharmony_ci//     can only match in one way and always has a constant length (in this case
1351cb0ef41Sopenharmony_ci//     of 1) it suffices to store the current position on the top of the stack
1361cb0ef41Sopenharmony_ci//     once.  Matching now becomes merely incrementing the current position and
1371cb0ef41Sopenharmony_ci//     backtracking becomes decrementing the current position and checking the
1381cb0ef41Sopenharmony_ci//     result against the stored current position.  This is faster and saves
1391cb0ef41Sopenharmony_ci//     space.
1401cb0ef41Sopenharmony_ci//   * The current state is virtualized.
1411cb0ef41Sopenharmony_ci//     This is used to defer expensive operations until it is clear that they
1421cb0ef41Sopenharmony_ci//     are needed and to generate code for a node more than once, allowing
1431cb0ef41Sopenharmony_ci//     specialized an efficient versions of the code to be created. This is
1441cb0ef41Sopenharmony_ci//     explained in the section below.
1451cb0ef41Sopenharmony_ci//
1461cb0ef41Sopenharmony_ci// Execution state virtualization.
1471cb0ef41Sopenharmony_ci//
1481cb0ef41Sopenharmony_ci//   Instead of emitting code, nodes that manipulate the state can record their
1491cb0ef41Sopenharmony_ci//   manipulation in an object called the Trace.  The Trace object can record a
1501cb0ef41Sopenharmony_ci//   current position offset, an optional backtrack code location on the top of
1511cb0ef41Sopenharmony_ci//   the virtualized backtrack stack and some register changes.  When a node is
1521cb0ef41Sopenharmony_ci//   to be emitted it can flush the Trace or update it.  Flushing the Trace
1531cb0ef41Sopenharmony_ci//   will emit code to bring the actual state into line with the virtual state.
1541cb0ef41Sopenharmony_ci//   Avoiding flushing the state can postpone some work (e.g. updates of capture
1551cb0ef41Sopenharmony_ci//   registers).  Postponing work can save time when executing the regular
1561cb0ef41Sopenharmony_ci//   expression since it may be found that the work never has to be done as a
1571cb0ef41Sopenharmony_ci//   failure to match can occur.  In addition it is much faster to jump to a
1581cb0ef41Sopenharmony_ci//   known backtrack code location than it is to pop an unknown backtrack
1591cb0ef41Sopenharmony_ci//   location from the stack and jump there.
1601cb0ef41Sopenharmony_ci//
1611cb0ef41Sopenharmony_ci//   The virtual state found in the Trace affects code generation.  For example
1621cb0ef41Sopenharmony_ci//   the virtual state contains the difference between the actual current
1631cb0ef41Sopenharmony_ci//   position and the virtual current position, and matching code needs to use
1641cb0ef41Sopenharmony_ci//   this offset to attempt a match in the correct location of the input
1651cb0ef41Sopenharmony_ci//   string.  Therefore code generated for a non-trivial trace is specialized
1661cb0ef41Sopenharmony_ci//   to that trace.  The code generator therefore has the ability to generate
1671cb0ef41Sopenharmony_ci//   code for each node several times.  In order to limit the size of the
1681cb0ef41Sopenharmony_ci//   generated code there is an arbitrary limit on how many specialized sets of
1691cb0ef41Sopenharmony_ci//   code may be generated for a given node.  If the limit is reached, the
1701cb0ef41Sopenharmony_ci//   trace is flushed and a generic version of the code for a node is emitted.
1711cb0ef41Sopenharmony_ci//   This is subsequently used for that node.  The code emitted for non-generic
1721cb0ef41Sopenharmony_ci//   trace is not recorded in the node and so it cannot currently be reused in
1731cb0ef41Sopenharmony_ci//   the event that code generation is requested for an identical trace.
1741cb0ef41Sopenharmony_ci
1751cb0ef41Sopenharmony_cinamespace {
1761cb0ef41Sopenharmony_ci
1771cb0ef41Sopenharmony_ciconstexpr base::uc32 MaxCodeUnit(const bool one_byte) {
1781cb0ef41Sopenharmony_ci  STATIC_ASSERT(String::kMaxOneByteCharCodeU <=
1791cb0ef41Sopenharmony_ci                std::numeric_limits<uint16_t>::max());
1801cb0ef41Sopenharmony_ci  STATIC_ASSERT(String::kMaxUtf16CodeUnitU <=
1811cb0ef41Sopenharmony_ci                std::numeric_limits<uint16_t>::max());
1821cb0ef41Sopenharmony_ci  return one_byte ? String::kMaxOneByteCharCodeU : String::kMaxUtf16CodeUnitU;
1831cb0ef41Sopenharmony_ci}
1841cb0ef41Sopenharmony_ci
1851cb0ef41Sopenharmony_ciconstexpr uint32_t CharMask(const bool one_byte) {
1861cb0ef41Sopenharmony_ci  STATIC_ASSERT(base::bits::IsPowerOfTwo(String::kMaxOneByteCharCodeU + 1));
1871cb0ef41Sopenharmony_ci  STATIC_ASSERT(base::bits::IsPowerOfTwo(String::kMaxUtf16CodeUnitU + 1));
1881cb0ef41Sopenharmony_ci  return MaxCodeUnit(one_byte);
1891cb0ef41Sopenharmony_ci}
1901cb0ef41Sopenharmony_ci
1911cb0ef41Sopenharmony_ci}  // namespace
1921cb0ef41Sopenharmony_ci
1931cb0ef41Sopenharmony_civoid RegExpTree::AppendToText(RegExpText* text, Zone* zone) { UNREACHABLE(); }
1941cb0ef41Sopenharmony_ci
1951cb0ef41Sopenharmony_civoid RegExpAtom::AppendToText(RegExpText* text, Zone* zone) {
1961cb0ef41Sopenharmony_ci  text->AddElement(TextElement::Atom(this), zone);
1971cb0ef41Sopenharmony_ci}
1981cb0ef41Sopenharmony_ci
1991cb0ef41Sopenharmony_civoid RegExpCharacterClass::AppendToText(RegExpText* text, Zone* zone) {
2001cb0ef41Sopenharmony_ci  text->AddElement(TextElement::CharClass(this), zone);
2011cb0ef41Sopenharmony_ci}
2021cb0ef41Sopenharmony_ci
2031cb0ef41Sopenharmony_civoid RegExpText::AppendToText(RegExpText* text, Zone* zone) {
2041cb0ef41Sopenharmony_ci  for (int i = 0; i < elements()->length(); i++)
2051cb0ef41Sopenharmony_ci    text->AddElement(elements()->at(i), zone);
2061cb0ef41Sopenharmony_ci}
2071cb0ef41Sopenharmony_ci
2081cb0ef41Sopenharmony_ciTextElement TextElement::Atom(RegExpAtom* atom) {
2091cb0ef41Sopenharmony_ci  return TextElement(ATOM, atom);
2101cb0ef41Sopenharmony_ci}
2111cb0ef41Sopenharmony_ci
2121cb0ef41Sopenharmony_ciTextElement TextElement::CharClass(RegExpCharacterClass* char_class) {
2131cb0ef41Sopenharmony_ci  return TextElement(CHAR_CLASS, char_class);
2141cb0ef41Sopenharmony_ci}
2151cb0ef41Sopenharmony_ci
2161cb0ef41Sopenharmony_ciint TextElement::length() const {
2171cb0ef41Sopenharmony_ci  switch (text_type()) {
2181cb0ef41Sopenharmony_ci    case ATOM:
2191cb0ef41Sopenharmony_ci      return atom()->length();
2201cb0ef41Sopenharmony_ci
2211cb0ef41Sopenharmony_ci    case CHAR_CLASS:
2221cb0ef41Sopenharmony_ci      return 1;
2231cb0ef41Sopenharmony_ci  }
2241cb0ef41Sopenharmony_ci  UNREACHABLE();
2251cb0ef41Sopenharmony_ci}
2261cb0ef41Sopenharmony_ci
2271cb0ef41Sopenharmony_ciclass RecursionCheck {
2281cb0ef41Sopenharmony_ci public:
2291cb0ef41Sopenharmony_ci  explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
2301cb0ef41Sopenharmony_ci    compiler->IncrementRecursionDepth();
2311cb0ef41Sopenharmony_ci  }
2321cb0ef41Sopenharmony_ci  ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
2331cb0ef41Sopenharmony_ci
2341cb0ef41Sopenharmony_ci private:
2351cb0ef41Sopenharmony_ci  RegExpCompiler* compiler_;
2361cb0ef41Sopenharmony_ci};
2371cb0ef41Sopenharmony_ci
2381cb0ef41Sopenharmony_ci// Attempts to compile the regexp using an Irregexp code generator.  Returns
2391cb0ef41Sopenharmony_ci// a fixed array or a null handle depending on whether it succeeded.
2401cb0ef41Sopenharmony_ciRegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
2411cb0ef41Sopenharmony_ci                               RegExpFlags flags, bool one_byte)
2421cb0ef41Sopenharmony_ci    : next_register_(JSRegExp::RegistersForCaptureCount(capture_count)),
2431cb0ef41Sopenharmony_ci      unicode_lookaround_stack_register_(kNoRegister),
2441cb0ef41Sopenharmony_ci      unicode_lookaround_position_register_(kNoRegister),
2451cb0ef41Sopenharmony_ci      work_list_(nullptr),
2461cb0ef41Sopenharmony_ci      recursion_depth_(0),
2471cb0ef41Sopenharmony_ci      flags_(flags),
2481cb0ef41Sopenharmony_ci      one_byte_(one_byte),
2491cb0ef41Sopenharmony_ci      reg_exp_too_big_(false),
2501cb0ef41Sopenharmony_ci      limiting_recursion_(false),
2511cb0ef41Sopenharmony_ci      optimize_(FLAG_regexp_optimization),
2521cb0ef41Sopenharmony_ci      read_backward_(false),
2531cb0ef41Sopenharmony_ci      current_expansion_factor_(1),
2541cb0ef41Sopenharmony_ci      frequency_collator_(),
2551cb0ef41Sopenharmony_ci      isolate_(isolate),
2561cb0ef41Sopenharmony_ci      zone_(zone) {
2571cb0ef41Sopenharmony_ci  accept_ = zone->New<EndNode>(EndNode::ACCEPT, zone);
2581cb0ef41Sopenharmony_ci  DCHECK_GE(RegExpMacroAssembler::kMaxRegister, next_register_ - 1);
2591cb0ef41Sopenharmony_ci}
2601cb0ef41Sopenharmony_ci
2611cb0ef41Sopenharmony_ciRegExpCompiler::CompilationResult RegExpCompiler::Assemble(
2621cb0ef41Sopenharmony_ci    Isolate* isolate, RegExpMacroAssembler* macro_assembler, RegExpNode* start,
2631cb0ef41Sopenharmony_ci    int capture_count, Handle<String> pattern) {
2641cb0ef41Sopenharmony_ci  macro_assembler_ = macro_assembler;
2651cb0ef41Sopenharmony_ci
2661cb0ef41Sopenharmony_ci  ZoneVector<RegExpNode*> work_list(zone());
2671cb0ef41Sopenharmony_ci  work_list_ = &work_list;
2681cb0ef41Sopenharmony_ci  Label fail;
2691cb0ef41Sopenharmony_ci  macro_assembler_->PushBacktrack(&fail);
2701cb0ef41Sopenharmony_ci  Trace new_trace;
2711cb0ef41Sopenharmony_ci  start->Emit(this, &new_trace);
2721cb0ef41Sopenharmony_ci  macro_assembler_->BindJumpTarget(&fail);
2731cb0ef41Sopenharmony_ci  macro_assembler_->Fail();
2741cb0ef41Sopenharmony_ci  while (!work_list.empty()) {
2751cb0ef41Sopenharmony_ci    RegExpNode* node = work_list.back();
2761cb0ef41Sopenharmony_ci    work_list.pop_back();
2771cb0ef41Sopenharmony_ci    node->set_on_work_list(false);
2781cb0ef41Sopenharmony_ci    if (!node->label()->is_bound()) node->Emit(this, &new_trace);
2791cb0ef41Sopenharmony_ci  }
2801cb0ef41Sopenharmony_ci  if (reg_exp_too_big_) {
2811cb0ef41Sopenharmony_ci    if (FLAG_correctness_fuzzer_suppressions) {
2821cb0ef41Sopenharmony_ci      FATAL("Aborting on excess zone allocation");
2831cb0ef41Sopenharmony_ci    }
2841cb0ef41Sopenharmony_ci    macro_assembler_->AbortedCodeGeneration();
2851cb0ef41Sopenharmony_ci    return CompilationResult::RegExpTooBig();
2861cb0ef41Sopenharmony_ci  }
2871cb0ef41Sopenharmony_ci
2881cb0ef41Sopenharmony_ci  Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
2891cb0ef41Sopenharmony_ci  isolate->IncreaseTotalRegexpCodeGenerated(code);
2901cb0ef41Sopenharmony_ci  work_list_ = nullptr;
2911cb0ef41Sopenharmony_ci
2921cb0ef41Sopenharmony_ci  return {code, next_register_};
2931cb0ef41Sopenharmony_ci}
2941cb0ef41Sopenharmony_ci
2951cb0ef41Sopenharmony_cibool Trace::DeferredAction::Mentions(int that) {
2961cb0ef41Sopenharmony_ci  if (action_type() == ActionNode::CLEAR_CAPTURES) {
2971cb0ef41Sopenharmony_ci    Interval range = static_cast<DeferredClearCaptures*>(this)->range();
2981cb0ef41Sopenharmony_ci    return range.Contains(that);
2991cb0ef41Sopenharmony_ci  } else {
3001cb0ef41Sopenharmony_ci    return reg() == that;
3011cb0ef41Sopenharmony_ci  }
3021cb0ef41Sopenharmony_ci}
3031cb0ef41Sopenharmony_ci
3041cb0ef41Sopenharmony_cibool Trace::mentions_reg(int reg) {
3051cb0ef41Sopenharmony_ci  for (DeferredAction* action = actions_; action != nullptr;
3061cb0ef41Sopenharmony_ci       action = action->next()) {
3071cb0ef41Sopenharmony_ci    if (action->Mentions(reg)) return true;
3081cb0ef41Sopenharmony_ci  }
3091cb0ef41Sopenharmony_ci  return false;
3101cb0ef41Sopenharmony_ci}
3111cb0ef41Sopenharmony_ci
3121cb0ef41Sopenharmony_cibool Trace::GetStoredPosition(int reg, int* cp_offset) {
3131cb0ef41Sopenharmony_ci  DCHECK_EQ(0, *cp_offset);
3141cb0ef41Sopenharmony_ci  for (DeferredAction* action = actions_; action != nullptr;
3151cb0ef41Sopenharmony_ci       action = action->next()) {
3161cb0ef41Sopenharmony_ci    if (action->Mentions(reg)) {
3171cb0ef41Sopenharmony_ci      if (action->action_type() == ActionNode::STORE_POSITION) {
3181cb0ef41Sopenharmony_ci        *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
3191cb0ef41Sopenharmony_ci        return true;
3201cb0ef41Sopenharmony_ci      } else {
3211cb0ef41Sopenharmony_ci        return false;
3221cb0ef41Sopenharmony_ci      }
3231cb0ef41Sopenharmony_ci    }
3241cb0ef41Sopenharmony_ci  }
3251cb0ef41Sopenharmony_ci  return false;
3261cb0ef41Sopenharmony_ci}
3271cb0ef41Sopenharmony_ci
3281cb0ef41Sopenharmony_ci// A (dynamically-sized) set of unsigned integers that behaves especially well
3291cb0ef41Sopenharmony_ci// on small integers (< kFirstLimit). May do zone-allocation.
3301cb0ef41Sopenharmony_ciclass DynamicBitSet : public ZoneObject {
3311cb0ef41Sopenharmony_ci public:
3321cb0ef41Sopenharmony_ci  V8_EXPORT_PRIVATE bool Get(unsigned value) const {
3331cb0ef41Sopenharmony_ci    if (value < kFirstLimit) {
3341cb0ef41Sopenharmony_ci      return (first_ & (1 << value)) != 0;
3351cb0ef41Sopenharmony_ci    } else if (remaining_ == nullptr) {
3361cb0ef41Sopenharmony_ci      return false;
3371cb0ef41Sopenharmony_ci    } else {
3381cb0ef41Sopenharmony_ci      return remaining_->Contains(value);
3391cb0ef41Sopenharmony_ci    }
3401cb0ef41Sopenharmony_ci  }
3411cb0ef41Sopenharmony_ci
3421cb0ef41Sopenharmony_ci  // Destructively set a value in this set.
3431cb0ef41Sopenharmony_ci  void Set(unsigned value, Zone* zone) {
3441cb0ef41Sopenharmony_ci    if (value < kFirstLimit) {
3451cb0ef41Sopenharmony_ci      first_ |= (1 << value);
3461cb0ef41Sopenharmony_ci    } else {
3471cb0ef41Sopenharmony_ci      if (remaining_ == nullptr)
3481cb0ef41Sopenharmony_ci        remaining_ = zone->New<ZoneList<unsigned>>(1, zone);
3491cb0ef41Sopenharmony_ci      if (remaining_->is_empty() || !remaining_->Contains(value))
3501cb0ef41Sopenharmony_ci        remaining_->Add(value, zone);
3511cb0ef41Sopenharmony_ci    }
3521cb0ef41Sopenharmony_ci  }
3531cb0ef41Sopenharmony_ci
3541cb0ef41Sopenharmony_ci private:
3551cb0ef41Sopenharmony_ci  static constexpr unsigned kFirstLimit = 32;
3561cb0ef41Sopenharmony_ci
3571cb0ef41Sopenharmony_ci  uint32_t first_ = 0;
3581cb0ef41Sopenharmony_ci  ZoneList<unsigned>* remaining_ = nullptr;
3591cb0ef41Sopenharmony_ci};
3601cb0ef41Sopenharmony_ci
3611cb0ef41Sopenharmony_ciint Trace::FindAffectedRegisters(DynamicBitSet* affected_registers,
3621cb0ef41Sopenharmony_ci                                 Zone* zone) {
3631cb0ef41Sopenharmony_ci  int max_register = RegExpCompiler::kNoRegister;
3641cb0ef41Sopenharmony_ci  for (DeferredAction* action = actions_; action != nullptr;
3651cb0ef41Sopenharmony_ci       action = action->next()) {
3661cb0ef41Sopenharmony_ci    if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
3671cb0ef41Sopenharmony_ci      Interval range = static_cast<DeferredClearCaptures*>(action)->range();
3681cb0ef41Sopenharmony_ci      for (int i = range.from(); i <= range.to(); i++)
3691cb0ef41Sopenharmony_ci        affected_registers->Set(i, zone);
3701cb0ef41Sopenharmony_ci      if (range.to() > max_register) max_register = range.to();
3711cb0ef41Sopenharmony_ci    } else {
3721cb0ef41Sopenharmony_ci      affected_registers->Set(action->reg(), zone);
3731cb0ef41Sopenharmony_ci      if (action->reg() > max_register) max_register = action->reg();
3741cb0ef41Sopenharmony_ci    }
3751cb0ef41Sopenharmony_ci  }
3761cb0ef41Sopenharmony_ci  return max_register;
3771cb0ef41Sopenharmony_ci}
3781cb0ef41Sopenharmony_ci
3791cb0ef41Sopenharmony_civoid Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
3801cb0ef41Sopenharmony_ci                                     int max_register,
3811cb0ef41Sopenharmony_ci                                     const DynamicBitSet& registers_to_pop,
3821cb0ef41Sopenharmony_ci                                     const DynamicBitSet& registers_to_clear) {
3831cb0ef41Sopenharmony_ci  for (int reg = max_register; reg >= 0; reg--) {
3841cb0ef41Sopenharmony_ci    if (registers_to_pop.Get(reg)) {
3851cb0ef41Sopenharmony_ci      assembler->PopRegister(reg);
3861cb0ef41Sopenharmony_ci    } else if (registers_to_clear.Get(reg)) {
3871cb0ef41Sopenharmony_ci      int clear_to = reg;
3881cb0ef41Sopenharmony_ci      while (reg > 0 && registers_to_clear.Get(reg - 1)) {
3891cb0ef41Sopenharmony_ci        reg--;
3901cb0ef41Sopenharmony_ci      }
3911cb0ef41Sopenharmony_ci      assembler->ClearRegisters(reg, clear_to);
3921cb0ef41Sopenharmony_ci    }
3931cb0ef41Sopenharmony_ci  }
3941cb0ef41Sopenharmony_ci}
3951cb0ef41Sopenharmony_ci
3961cb0ef41Sopenharmony_civoid Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
3971cb0ef41Sopenharmony_ci                                   int max_register,
3981cb0ef41Sopenharmony_ci                                   const DynamicBitSet& affected_registers,
3991cb0ef41Sopenharmony_ci                                   DynamicBitSet* registers_to_pop,
4001cb0ef41Sopenharmony_ci                                   DynamicBitSet* registers_to_clear,
4011cb0ef41Sopenharmony_ci                                   Zone* zone) {
4021cb0ef41Sopenharmony_ci  // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
4031cb0ef41Sopenharmony_ci  const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
4041cb0ef41Sopenharmony_ci
4051cb0ef41Sopenharmony_ci  // Count pushes performed to force a stack limit check occasionally.
4061cb0ef41Sopenharmony_ci  int pushes = 0;
4071cb0ef41Sopenharmony_ci
4081cb0ef41Sopenharmony_ci  for (int reg = 0; reg <= max_register; reg++) {
4091cb0ef41Sopenharmony_ci    if (!affected_registers.Get(reg)) continue;
4101cb0ef41Sopenharmony_ci
4111cb0ef41Sopenharmony_ci    // The chronologically first deferred action in the trace
4121cb0ef41Sopenharmony_ci    // is used to infer the action needed to restore a register
4131cb0ef41Sopenharmony_ci    // to its previous state (or not, if it's safe to ignore it).
4141cb0ef41Sopenharmony_ci    enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
4151cb0ef41Sopenharmony_ci    DeferredActionUndoType undo_action = IGNORE;
4161cb0ef41Sopenharmony_ci
4171cb0ef41Sopenharmony_ci    int value = 0;
4181cb0ef41Sopenharmony_ci    bool absolute = false;
4191cb0ef41Sopenharmony_ci    bool clear = false;
4201cb0ef41Sopenharmony_ci    static const int kNoStore = kMinInt;
4211cb0ef41Sopenharmony_ci    int store_position = kNoStore;
4221cb0ef41Sopenharmony_ci    // This is a little tricky because we are scanning the actions in reverse
4231cb0ef41Sopenharmony_ci    // historical order (newest first).
4241cb0ef41Sopenharmony_ci    for (DeferredAction* action = actions_; action != nullptr;
4251cb0ef41Sopenharmony_ci         action = action->next()) {
4261cb0ef41Sopenharmony_ci      if (action->Mentions(reg)) {
4271cb0ef41Sopenharmony_ci        switch (action->action_type()) {
4281cb0ef41Sopenharmony_ci          case ActionNode::SET_REGISTER_FOR_LOOP: {
4291cb0ef41Sopenharmony_ci            Trace::DeferredSetRegisterForLoop* psr =
4301cb0ef41Sopenharmony_ci                static_cast<Trace::DeferredSetRegisterForLoop*>(action);
4311cb0ef41Sopenharmony_ci            if (!absolute) {
4321cb0ef41Sopenharmony_ci              value += psr->value();
4331cb0ef41Sopenharmony_ci              absolute = true;
4341cb0ef41Sopenharmony_ci            }
4351cb0ef41Sopenharmony_ci            // SET_REGISTER_FOR_LOOP is only used for newly introduced loop
4361cb0ef41Sopenharmony_ci            // counters. They can have a significant previous value if they
4371cb0ef41Sopenharmony_ci            // occur in a loop. TODO(lrn): Propagate this information, so
4381cb0ef41Sopenharmony_ci            // we can set undo_action to IGNORE if we know there is no value to
4391cb0ef41Sopenharmony_ci            // restore.
4401cb0ef41Sopenharmony_ci            undo_action = RESTORE;
4411cb0ef41Sopenharmony_ci            DCHECK_EQ(store_position, kNoStore);
4421cb0ef41Sopenharmony_ci            DCHECK(!clear);
4431cb0ef41Sopenharmony_ci            break;
4441cb0ef41Sopenharmony_ci          }
4451cb0ef41Sopenharmony_ci          case ActionNode::INCREMENT_REGISTER:
4461cb0ef41Sopenharmony_ci            if (!absolute) {
4471cb0ef41Sopenharmony_ci              value++;
4481cb0ef41Sopenharmony_ci            }
4491cb0ef41Sopenharmony_ci            DCHECK_EQ(store_position, kNoStore);
4501cb0ef41Sopenharmony_ci            DCHECK(!clear);
4511cb0ef41Sopenharmony_ci            undo_action = RESTORE;
4521cb0ef41Sopenharmony_ci            break;
4531cb0ef41Sopenharmony_ci          case ActionNode::STORE_POSITION: {
4541cb0ef41Sopenharmony_ci            Trace::DeferredCapture* pc =
4551cb0ef41Sopenharmony_ci                static_cast<Trace::DeferredCapture*>(action);
4561cb0ef41Sopenharmony_ci            if (!clear && store_position == kNoStore) {
4571cb0ef41Sopenharmony_ci              store_position = pc->cp_offset();
4581cb0ef41Sopenharmony_ci            }
4591cb0ef41Sopenharmony_ci
4601cb0ef41Sopenharmony_ci            // For captures we know that stores and clears alternate.
4611cb0ef41Sopenharmony_ci            // Other register, are never cleared, and if the occur
4621cb0ef41Sopenharmony_ci            // inside a loop, they might be assigned more than once.
4631cb0ef41Sopenharmony_ci            if (reg <= 1) {
4641cb0ef41Sopenharmony_ci              // Registers zero and one, aka "capture zero", is
4651cb0ef41Sopenharmony_ci              // always set correctly if we succeed. There is no
4661cb0ef41Sopenharmony_ci              // need to undo a setting on backtrack, because we
4671cb0ef41Sopenharmony_ci              // will set it again or fail.
4681cb0ef41Sopenharmony_ci              undo_action = IGNORE;
4691cb0ef41Sopenharmony_ci            } else {
4701cb0ef41Sopenharmony_ci              undo_action = pc->is_capture() ? CLEAR : RESTORE;
4711cb0ef41Sopenharmony_ci            }
4721cb0ef41Sopenharmony_ci            DCHECK(!absolute);
4731cb0ef41Sopenharmony_ci            DCHECK_EQ(value, 0);
4741cb0ef41Sopenharmony_ci            break;
4751cb0ef41Sopenharmony_ci          }
4761cb0ef41Sopenharmony_ci          case ActionNode::CLEAR_CAPTURES: {
4771cb0ef41Sopenharmony_ci            // Since we're scanning in reverse order, if we've already
4781cb0ef41Sopenharmony_ci            // set the position we have to ignore historically earlier
4791cb0ef41Sopenharmony_ci            // clearing operations.
4801cb0ef41Sopenharmony_ci            if (store_position == kNoStore) {
4811cb0ef41Sopenharmony_ci              clear = true;
4821cb0ef41Sopenharmony_ci            }
4831cb0ef41Sopenharmony_ci            undo_action = RESTORE;
4841cb0ef41Sopenharmony_ci            DCHECK(!absolute);
4851cb0ef41Sopenharmony_ci            DCHECK_EQ(value, 0);
4861cb0ef41Sopenharmony_ci            break;
4871cb0ef41Sopenharmony_ci          }
4881cb0ef41Sopenharmony_ci          default:
4891cb0ef41Sopenharmony_ci            UNREACHABLE();
4901cb0ef41Sopenharmony_ci        }
4911cb0ef41Sopenharmony_ci      }
4921cb0ef41Sopenharmony_ci    }
4931cb0ef41Sopenharmony_ci    // Prepare for the undo-action (e.g., push if it's going to be popped).
4941cb0ef41Sopenharmony_ci    if (undo_action == RESTORE) {
4951cb0ef41Sopenharmony_ci      pushes++;
4961cb0ef41Sopenharmony_ci      RegExpMacroAssembler::StackCheckFlag stack_check =
4971cb0ef41Sopenharmony_ci          RegExpMacroAssembler::kNoStackLimitCheck;
4981cb0ef41Sopenharmony_ci      if (pushes == push_limit) {
4991cb0ef41Sopenharmony_ci        stack_check = RegExpMacroAssembler::kCheckStackLimit;
5001cb0ef41Sopenharmony_ci        pushes = 0;
5011cb0ef41Sopenharmony_ci      }
5021cb0ef41Sopenharmony_ci
5031cb0ef41Sopenharmony_ci      assembler->PushRegister(reg, stack_check);
5041cb0ef41Sopenharmony_ci      registers_to_pop->Set(reg, zone);
5051cb0ef41Sopenharmony_ci    } else if (undo_action == CLEAR) {
5061cb0ef41Sopenharmony_ci      registers_to_clear->Set(reg, zone);
5071cb0ef41Sopenharmony_ci    }
5081cb0ef41Sopenharmony_ci    // Perform the chronologically last action (or accumulated increment)
5091cb0ef41Sopenharmony_ci    // for the register.
5101cb0ef41Sopenharmony_ci    if (store_position != kNoStore) {
5111cb0ef41Sopenharmony_ci      assembler->WriteCurrentPositionToRegister(reg, store_position);
5121cb0ef41Sopenharmony_ci    } else if (clear) {
5131cb0ef41Sopenharmony_ci      assembler->ClearRegisters(reg, reg);
5141cb0ef41Sopenharmony_ci    } else if (absolute) {
5151cb0ef41Sopenharmony_ci      assembler->SetRegister(reg, value);
5161cb0ef41Sopenharmony_ci    } else if (value != 0) {
5171cb0ef41Sopenharmony_ci      assembler->AdvanceRegister(reg, value);
5181cb0ef41Sopenharmony_ci    }
5191cb0ef41Sopenharmony_ci  }
5201cb0ef41Sopenharmony_ci}
5211cb0ef41Sopenharmony_ci
5221cb0ef41Sopenharmony_ci// This is called as we come into a loop choice node and some other tricky
5231cb0ef41Sopenharmony_ci// nodes.  It normalizes the state of the code generator to ensure we can
5241cb0ef41Sopenharmony_ci// generate generic code.
5251cb0ef41Sopenharmony_civoid Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
5261cb0ef41Sopenharmony_ci  RegExpMacroAssembler* assembler = compiler->macro_assembler();
5271cb0ef41Sopenharmony_ci
5281cb0ef41Sopenharmony_ci  DCHECK(!is_trivial());
5291cb0ef41Sopenharmony_ci
5301cb0ef41Sopenharmony_ci  if (actions_ == nullptr && backtrack() == nullptr) {
5311cb0ef41Sopenharmony_ci    // Here we just have some deferred cp advances to fix and we are back to
5321cb0ef41Sopenharmony_ci    // a normal situation.  We may also have to forget some information gained
5331cb0ef41Sopenharmony_ci    // through a quick check that was already performed.
5341cb0ef41Sopenharmony_ci    if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
5351cb0ef41Sopenharmony_ci    // Create a new trivial state and generate the node with that.
5361cb0ef41Sopenharmony_ci    Trace new_state;
5371cb0ef41Sopenharmony_ci    successor->Emit(compiler, &new_state);
5381cb0ef41Sopenharmony_ci    return;
5391cb0ef41Sopenharmony_ci  }
5401cb0ef41Sopenharmony_ci
5411cb0ef41Sopenharmony_ci  // Generate deferred actions here along with code to undo them again.
5421cb0ef41Sopenharmony_ci  DynamicBitSet affected_registers;
5431cb0ef41Sopenharmony_ci
5441cb0ef41Sopenharmony_ci  if (backtrack() != nullptr) {
5451cb0ef41Sopenharmony_ci    // Here we have a concrete backtrack location.  These are set up by choice
5461cb0ef41Sopenharmony_ci    // nodes and so they indicate that we have a deferred save of the current
5471cb0ef41Sopenharmony_ci    // position which we may need to emit here.
5481cb0ef41Sopenharmony_ci    assembler->PushCurrentPosition();
5491cb0ef41Sopenharmony_ci  }
5501cb0ef41Sopenharmony_ci
5511cb0ef41Sopenharmony_ci  int max_register =
5521cb0ef41Sopenharmony_ci      FindAffectedRegisters(&affected_registers, compiler->zone());
5531cb0ef41Sopenharmony_ci  DynamicBitSet registers_to_pop;
5541cb0ef41Sopenharmony_ci  DynamicBitSet registers_to_clear;
5551cb0ef41Sopenharmony_ci  PerformDeferredActions(assembler, max_register, affected_registers,
5561cb0ef41Sopenharmony_ci                         &registers_to_pop, &registers_to_clear,
5571cb0ef41Sopenharmony_ci                         compiler->zone());
5581cb0ef41Sopenharmony_ci  if (cp_offset_ != 0) {
5591cb0ef41Sopenharmony_ci    assembler->AdvanceCurrentPosition(cp_offset_);
5601cb0ef41Sopenharmony_ci  }
5611cb0ef41Sopenharmony_ci
5621cb0ef41Sopenharmony_ci  // Create a new trivial state and generate the node with that.
5631cb0ef41Sopenharmony_ci  Label undo;
5641cb0ef41Sopenharmony_ci  assembler->PushBacktrack(&undo);
5651cb0ef41Sopenharmony_ci  if (successor->KeepRecursing(compiler)) {
5661cb0ef41Sopenharmony_ci    Trace new_state;
5671cb0ef41Sopenharmony_ci    successor->Emit(compiler, &new_state);
5681cb0ef41Sopenharmony_ci  } else {
5691cb0ef41Sopenharmony_ci    compiler->AddWork(successor);
5701cb0ef41Sopenharmony_ci    assembler->GoTo(successor->label());
5711cb0ef41Sopenharmony_ci  }
5721cb0ef41Sopenharmony_ci
5731cb0ef41Sopenharmony_ci  // On backtrack we need to restore state.
5741cb0ef41Sopenharmony_ci  assembler->BindJumpTarget(&undo);
5751cb0ef41Sopenharmony_ci  RestoreAffectedRegisters(assembler, max_register, registers_to_pop,
5761cb0ef41Sopenharmony_ci                           registers_to_clear);
5771cb0ef41Sopenharmony_ci  if (backtrack() == nullptr) {
5781cb0ef41Sopenharmony_ci    assembler->Backtrack();
5791cb0ef41Sopenharmony_ci  } else {
5801cb0ef41Sopenharmony_ci    assembler->PopCurrentPosition();
5811cb0ef41Sopenharmony_ci    assembler->GoTo(backtrack());
5821cb0ef41Sopenharmony_ci  }
5831cb0ef41Sopenharmony_ci}
5841cb0ef41Sopenharmony_ci
5851cb0ef41Sopenharmony_civoid NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
5861cb0ef41Sopenharmony_ci  RegExpMacroAssembler* assembler = compiler->macro_assembler();
5871cb0ef41Sopenharmony_ci
5881cb0ef41Sopenharmony_ci  // Omit flushing the trace. We discard the entire stack frame anyway.
5891cb0ef41Sopenharmony_ci
5901cb0ef41Sopenharmony_ci  if (!label()->is_bound()) {
5911cb0ef41Sopenharmony_ci    // We are completely independent of the trace, since we ignore it,
5921cb0ef41Sopenharmony_ci    // so this code can be used as the generic version.
5931cb0ef41Sopenharmony_ci    assembler->Bind(label());
5941cb0ef41Sopenharmony_ci  }
5951cb0ef41Sopenharmony_ci
5961cb0ef41Sopenharmony_ci  // Throw away everything on the backtrack stack since the start
5971cb0ef41Sopenharmony_ci  // of the negative submatch and restore the character position.
5981cb0ef41Sopenharmony_ci  assembler->ReadCurrentPositionFromRegister(current_position_register_);
5991cb0ef41Sopenharmony_ci  assembler->ReadStackPointerFromRegister(stack_pointer_register_);
6001cb0ef41Sopenharmony_ci  if (clear_capture_count_ > 0) {
6011cb0ef41Sopenharmony_ci    // Clear any captures that might have been performed during the success
6021cb0ef41Sopenharmony_ci    // of the body of the negative look-ahead.
6031cb0ef41Sopenharmony_ci    int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
6041cb0ef41Sopenharmony_ci    assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
6051cb0ef41Sopenharmony_ci  }
6061cb0ef41Sopenharmony_ci  // Now that we have unwound the stack we find at the top of the stack the
6071cb0ef41Sopenharmony_ci  // backtrack that the BeginNegativeSubmatch node got.
6081cb0ef41Sopenharmony_ci  assembler->Backtrack();
6091cb0ef41Sopenharmony_ci}
6101cb0ef41Sopenharmony_ci
6111cb0ef41Sopenharmony_civoid EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
6121cb0ef41Sopenharmony_ci  if (!trace->is_trivial()) {
6131cb0ef41Sopenharmony_ci    trace->Flush(compiler, this);
6141cb0ef41Sopenharmony_ci    return;
6151cb0ef41Sopenharmony_ci  }
6161cb0ef41Sopenharmony_ci  RegExpMacroAssembler* assembler = compiler->macro_assembler();
6171cb0ef41Sopenharmony_ci  if (!label()->is_bound()) {
6181cb0ef41Sopenharmony_ci    assembler->Bind(label());
6191cb0ef41Sopenharmony_ci  }
6201cb0ef41Sopenharmony_ci  switch (action_) {
6211cb0ef41Sopenharmony_ci    case ACCEPT:
6221cb0ef41Sopenharmony_ci      assembler->Succeed();
6231cb0ef41Sopenharmony_ci      return;
6241cb0ef41Sopenharmony_ci    case BACKTRACK:
6251cb0ef41Sopenharmony_ci      assembler->GoTo(trace->backtrack());
6261cb0ef41Sopenharmony_ci      return;
6271cb0ef41Sopenharmony_ci    case NEGATIVE_SUBMATCH_SUCCESS:
6281cb0ef41Sopenharmony_ci      // This case is handled in a different virtual method.
6291cb0ef41Sopenharmony_ci      UNREACHABLE();
6301cb0ef41Sopenharmony_ci  }
6311cb0ef41Sopenharmony_ci  UNIMPLEMENTED();
6321cb0ef41Sopenharmony_ci}
6331cb0ef41Sopenharmony_ci
6341cb0ef41Sopenharmony_civoid GuardedAlternative::AddGuard(Guard* guard, Zone* zone) {
6351cb0ef41Sopenharmony_ci  if (guards_ == nullptr) guards_ = zone->New<ZoneList<Guard*>>(1, zone);
6361cb0ef41Sopenharmony_ci  guards_->Add(guard, zone);
6371cb0ef41Sopenharmony_ci}
6381cb0ef41Sopenharmony_ci
6391cb0ef41Sopenharmony_ciActionNode* ActionNode::SetRegisterForLoop(int reg, int val,
6401cb0ef41Sopenharmony_ci                                           RegExpNode* on_success) {
6411cb0ef41Sopenharmony_ci  ActionNode* result =
6421cb0ef41Sopenharmony_ci      on_success->zone()->New<ActionNode>(SET_REGISTER_FOR_LOOP, on_success);
6431cb0ef41Sopenharmony_ci  result->data_.u_store_register.reg = reg;
6441cb0ef41Sopenharmony_ci  result->data_.u_store_register.value = val;
6451cb0ef41Sopenharmony_ci  return result;
6461cb0ef41Sopenharmony_ci}
6471cb0ef41Sopenharmony_ci
6481cb0ef41Sopenharmony_ciActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
6491cb0ef41Sopenharmony_ci  ActionNode* result =
6501cb0ef41Sopenharmony_ci      on_success->zone()->New<ActionNode>(INCREMENT_REGISTER, on_success);
6511cb0ef41Sopenharmony_ci  result->data_.u_increment_register.reg = reg;
6521cb0ef41Sopenharmony_ci  return result;
6531cb0ef41Sopenharmony_ci}
6541cb0ef41Sopenharmony_ci
6551cb0ef41Sopenharmony_ciActionNode* ActionNode::StorePosition(int reg, bool is_capture,
6561cb0ef41Sopenharmony_ci                                      RegExpNode* on_success) {
6571cb0ef41Sopenharmony_ci  ActionNode* result =
6581cb0ef41Sopenharmony_ci      on_success->zone()->New<ActionNode>(STORE_POSITION, on_success);
6591cb0ef41Sopenharmony_ci  result->data_.u_position_register.reg = reg;
6601cb0ef41Sopenharmony_ci  result->data_.u_position_register.is_capture = is_capture;
6611cb0ef41Sopenharmony_ci  return result;
6621cb0ef41Sopenharmony_ci}
6631cb0ef41Sopenharmony_ci
6641cb0ef41Sopenharmony_ciActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) {
6651cb0ef41Sopenharmony_ci  ActionNode* result =
6661cb0ef41Sopenharmony_ci      on_success->zone()->New<ActionNode>(CLEAR_CAPTURES, on_success);
6671cb0ef41Sopenharmony_ci  result->data_.u_clear_captures.range_from = range.from();
6681cb0ef41Sopenharmony_ci  result->data_.u_clear_captures.range_to = range.to();
6691cb0ef41Sopenharmony_ci  return result;
6701cb0ef41Sopenharmony_ci}
6711cb0ef41Sopenharmony_ci
6721cb0ef41Sopenharmony_ciActionNode* ActionNode::BeginPositiveSubmatch(int stack_reg, int position_reg,
6731cb0ef41Sopenharmony_ci                                              RegExpNode* on_success) {
6741cb0ef41Sopenharmony_ci  ActionNode* result =
6751cb0ef41Sopenharmony_ci      on_success->zone()->New<ActionNode>(BEGIN_POSITIVE_SUBMATCH, on_success);
6761cb0ef41Sopenharmony_ci  result->data_.u_submatch.stack_pointer_register = stack_reg;
6771cb0ef41Sopenharmony_ci  result->data_.u_submatch.current_position_register = position_reg;
6781cb0ef41Sopenharmony_ci  return result;
6791cb0ef41Sopenharmony_ci}
6801cb0ef41Sopenharmony_ci
6811cb0ef41Sopenharmony_ciActionNode* ActionNode::BeginNegativeSubmatch(int stack_reg, int position_reg,
6821cb0ef41Sopenharmony_ci                                              RegExpNode* on_success) {
6831cb0ef41Sopenharmony_ci  ActionNode* result =
6841cb0ef41Sopenharmony_ci      on_success->zone()->New<ActionNode>(BEGIN_NEGATIVE_SUBMATCH, on_success);
6851cb0ef41Sopenharmony_ci  result->data_.u_submatch.stack_pointer_register = stack_reg;
6861cb0ef41Sopenharmony_ci  result->data_.u_submatch.current_position_register = position_reg;
6871cb0ef41Sopenharmony_ci  return result;
6881cb0ef41Sopenharmony_ci}
6891cb0ef41Sopenharmony_ci
6901cb0ef41Sopenharmony_ciActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg, int position_reg,
6911cb0ef41Sopenharmony_ci                                                int clear_register_count,
6921cb0ef41Sopenharmony_ci                                                int clear_register_from,
6931cb0ef41Sopenharmony_ci                                                RegExpNode* on_success) {
6941cb0ef41Sopenharmony_ci  ActionNode* result = on_success->zone()->New<ActionNode>(
6951cb0ef41Sopenharmony_ci      POSITIVE_SUBMATCH_SUCCESS, on_success);
6961cb0ef41Sopenharmony_ci  result->data_.u_submatch.stack_pointer_register = stack_reg;
6971cb0ef41Sopenharmony_ci  result->data_.u_submatch.current_position_register = position_reg;
6981cb0ef41Sopenharmony_ci  result->data_.u_submatch.clear_register_count = clear_register_count;
6991cb0ef41Sopenharmony_ci  result->data_.u_submatch.clear_register_from = clear_register_from;
7001cb0ef41Sopenharmony_ci  return result;
7011cb0ef41Sopenharmony_ci}
7021cb0ef41Sopenharmony_ci
7031cb0ef41Sopenharmony_ciActionNode* ActionNode::EmptyMatchCheck(int start_register,
7041cb0ef41Sopenharmony_ci                                        int repetition_register,
7051cb0ef41Sopenharmony_ci                                        int repetition_limit,
7061cb0ef41Sopenharmony_ci                                        RegExpNode* on_success) {
7071cb0ef41Sopenharmony_ci  ActionNode* result =
7081cb0ef41Sopenharmony_ci      on_success->zone()->New<ActionNode>(EMPTY_MATCH_CHECK, on_success);
7091cb0ef41Sopenharmony_ci  result->data_.u_empty_match_check.start_register = start_register;
7101cb0ef41Sopenharmony_ci  result->data_.u_empty_match_check.repetition_register = repetition_register;
7111cb0ef41Sopenharmony_ci  result->data_.u_empty_match_check.repetition_limit = repetition_limit;
7121cb0ef41Sopenharmony_ci  return result;
7131cb0ef41Sopenharmony_ci}
7141cb0ef41Sopenharmony_ci
7151cb0ef41Sopenharmony_ci#define DEFINE_ACCEPT(Type) \
7161cb0ef41Sopenharmony_ci  void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); }
7171cb0ef41Sopenharmony_ciFOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
7181cb0ef41Sopenharmony_ci#undef DEFINE_ACCEPT
7191cb0ef41Sopenharmony_ci
7201cb0ef41Sopenharmony_ci// -------------------------------------------------------------------
7211cb0ef41Sopenharmony_ci// Emit code.
7221cb0ef41Sopenharmony_ci
7231cb0ef41Sopenharmony_civoid ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
7241cb0ef41Sopenharmony_ci                               Guard* guard, Trace* trace) {
7251cb0ef41Sopenharmony_ci  switch (guard->op()) {
7261cb0ef41Sopenharmony_ci    case Guard::LT:
7271cb0ef41Sopenharmony_ci      DCHECK(!trace->mentions_reg(guard->reg()));
7281cb0ef41Sopenharmony_ci      macro_assembler->IfRegisterGE(guard->reg(), guard->value(),
7291cb0ef41Sopenharmony_ci                                    trace->backtrack());
7301cb0ef41Sopenharmony_ci      break;
7311cb0ef41Sopenharmony_ci    case Guard::GEQ:
7321cb0ef41Sopenharmony_ci      DCHECK(!trace->mentions_reg(guard->reg()));
7331cb0ef41Sopenharmony_ci      macro_assembler->IfRegisterLT(guard->reg(), guard->value(),
7341cb0ef41Sopenharmony_ci                                    trace->backtrack());
7351cb0ef41Sopenharmony_ci      break;
7361cb0ef41Sopenharmony_ci  }
7371cb0ef41Sopenharmony_ci}
7381cb0ef41Sopenharmony_ci
7391cb0ef41Sopenharmony_cinamespace {
7401cb0ef41Sopenharmony_ci
7411cb0ef41Sopenharmony_ci#ifdef DEBUG
7421cb0ef41Sopenharmony_cibool ContainsOnlyUtf16CodeUnits(unibrow::uchar* chars, int length) {
7431cb0ef41Sopenharmony_ci  STATIC_ASSERT(sizeof(unibrow::uchar) == 4);
7441cb0ef41Sopenharmony_ci  for (int i = 0; i < length; i++) {
7451cb0ef41Sopenharmony_ci    if (chars[i] > String::kMaxUtf16CodeUnit) return false;
7461cb0ef41Sopenharmony_ci  }
7471cb0ef41Sopenharmony_ci  return true;
7481cb0ef41Sopenharmony_ci}
7491cb0ef41Sopenharmony_ci#endif  // DEBUG
7501cb0ef41Sopenharmony_ci
7511cb0ef41Sopenharmony_ci// Returns the number of characters in the equivalence class, omitting those
7521cb0ef41Sopenharmony_ci// that cannot occur in the source string because it is Latin1.
7531cb0ef41Sopenharmony_ciint GetCaseIndependentLetters(Isolate* isolate, base::uc16 character,
7541cb0ef41Sopenharmony_ci                              bool one_byte_subject, unibrow::uchar* letters,
7551cb0ef41Sopenharmony_ci                              int letter_length) {
7561cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT
7571cb0ef41Sopenharmony_ci  if (RegExpCaseFolding::IgnoreSet().contains(character)) {
7581cb0ef41Sopenharmony_ci    letters[0] = character;
7591cb0ef41Sopenharmony_ci    DCHECK(ContainsOnlyUtf16CodeUnits(letters, 1));
7601cb0ef41Sopenharmony_ci    return 1;
7611cb0ef41Sopenharmony_ci  }
7621cb0ef41Sopenharmony_ci  bool in_special_add_set =
7631cb0ef41Sopenharmony_ci      RegExpCaseFolding::SpecialAddSet().contains(character);
7641cb0ef41Sopenharmony_ci
7651cb0ef41Sopenharmony_ci  icu::UnicodeSet set;
7661cb0ef41Sopenharmony_ci  set.add(character);
7671cb0ef41Sopenharmony_ci  set = set.closeOver(USET_CASE_INSENSITIVE);
7681cb0ef41Sopenharmony_ci
7691cb0ef41Sopenharmony_ci  UChar32 canon = 0;
7701cb0ef41Sopenharmony_ci  if (in_special_add_set) {
7711cb0ef41Sopenharmony_ci    canon = RegExpCaseFolding::Canonicalize(character);
7721cb0ef41Sopenharmony_ci  }
7731cb0ef41Sopenharmony_ci
7741cb0ef41Sopenharmony_ci  int32_t range_count = set.getRangeCount();
7751cb0ef41Sopenharmony_ci  int items = 0;
7761cb0ef41Sopenharmony_ci  for (int32_t i = 0; i < range_count; i++) {
7771cb0ef41Sopenharmony_ci    UChar32 start = set.getRangeStart(i);
7781cb0ef41Sopenharmony_ci    UChar32 end = set.getRangeEnd(i);
7791cb0ef41Sopenharmony_ci    CHECK(end - start + items <= letter_length);
7801cb0ef41Sopenharmony_ci    for (UChar32 cu = start; cu <= end; cu++) {
7811cb0ef41Sopenharmony_ci      if (one_byte_subject && cu > String::kMaxOneByteCharCode) break;
7821cb0ef41Sopenharmony_ci      if (in_special_add_set && RegExpCaseFolding::Canonicalize(cu) != canon) {
7831cb0ef41Sopenharmony_ci        continue;
7841cb0ef41Sopenharmony_ci      }
7851cb0ef41Sopenharmony_ci      letters[items++] = static_cast<unibrow::uchar>(cu);
7861cb0ef41Sopenharmony_ci    }
7871cb0ef41Sopenharmony_ci  }
7881cb0ef41Sopenharmony_ci  DCHECK(ContainsOnlyUtf16CodeUnits(letters, items));
7891cb0ef41Sopenharmony_ci  return items;
7901cb0ef41Sopenharmony_ci#else
7911cb0ef41Sopenharmony_ci  int length =
7921cb0ef41Sopenharmony_ci      isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
7931cb0ef41Sopenharmony_ci  // Unibrow returns 0 or 1 for characters where case independence is
7941cb0ef41Sopenharmony_ci  // trivial.
7951cb0ef41Sopenharmony_ci  if (length == 0) {
7961cb0ef41Sopenharmony_ci    letters[0] = character;
7971cb0ef41Sopenharmony_ci    length = 1;
7981cb0ef41Sopenharmony_ci  }
7991cb0ef41Sopenharmony_ci
8001cb0ef41Sopenharmony_ci  if (one_byte_subject) {
8011cb0ef41Sopenharmony_ci    int new_length = 0;
8021cb0ef41Sopenharmony_ci    for (int i = 0; i < length; i++) {
8031cb0ef41Sopenharmony_ci      if (letters[i] <= String::kMaxOneByteCharCode) {
8041cb0ef41Sopenharmony_ci        letters[new_length++] = letters[i];
8051cb0ef41Sopenharmony_ci      }
8061cb0ef41Sopenharmony_ci    }
8071cb0ef41Sopenharmony_ci    length = new_length;
8081cb0ef41Sopenharmony_ci  }
8091cb0ef41Sopenharmony_ci
8101cb0ef41Sopenharmony_ci  DCHECK(ContainsOnlyUtf16CodeUnits(letters, length));
8111cb0ef41Sopenharmony_ci  return length;
8121cb0ef41Sopenharmony_ci#endif  // V8_INTL_SUPPORT
8131cb0ef41Sopenharmony_ci}
8141cb0ef41Sopenharmony_ci
8151cb0ef41Sopenharmony_ciinline bool EmitSimpleCharacter(Isolate* isolate, RegExpCompiler* compiler,
8161cb0ef41Sopenharmony_ci                                base::uc16 c, Label* on_failure, int cp_offset,
8171cb0ef41Sopenharmony_ci                                bool check, bool preloaded) {
8181cb0ef41Sopenharmony_ci  RegExpMacroAssembler* assembler = compiler->macro_assembler();
8191cb0ef41Sopenharmony_ci  bool bound_checked = false;
8201cb0ef41Sopenharmony_ci  if (!preloaded) {
8211cb0ef41Sopenharmony_ci    assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
8221cb0ef41Sopenharmony_ci    bound_checked = true;
8231cb0ef41Sopenharmony_ci  }
8241cb0ef41Sopenharmony_ci  assembler->CheckNotCharacter(c, on_failure);
8251cb0ef41Sopenharmony_ci  return bound_checked;
8261cb0ef41Sopenharmony_ci}
8271cb0ef41Sopenharmony_ci
8281cb0ef41Sopenharmony_ci// Only emits non-letters (things that don't have case).  Only used for case
8291cb0ef41Sopenharmony_ci// independent matches.
8301cb0ef41Sopenharmony_ciinline bool EmitAtomNonLetter(Isolate* isolate, RegExpCompiler* compiler,
8311cb0ef41Sopenharmony_ci                              base::uc16 c, Label* on_failure, int cp_offset,
8321cb0ef41Sopenharmony_ci                              bool check, bool preloaded) {
8331cb0ef41Sopenharmony_ci  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
8341cb0ef41Sopenharmony_ci  bool one_byte = compiler->one_byte();
8351cb0ef41Sopenharmony_ci  unibrow::uchar chars[4];
8361cb0ef41Sopenharmony_ci  int length = GetCaseIndependentLetters(isolate, c, one_byte, chars, 4);
8371cb0ef41Sopenharmony_ci  if (length < 1) {
8381cb0ef41Sopenharmony_ci    // This can't match.  Must be an one-byte subject and a non-one-byte
8391cb0ef41Sopenharmony_ci    // character.  We do not need to do anything since the one-byte pass
8401cb0ef41Sopenharmony_ci    // already handled this.
8411cb0ef41Sopenharmony_ci    return false;  // Bounds not checked.
8421cb0ef41Sopenharmony_ci  }
8431cb0ef41Sopenharmony_ci  bool checked = false;
8441cb0ef41Sopenharmony_ci  // We handle the length > 1 case in a later pass.
8451cb0ef41Sopenharmony_ci  if (length == 1) {
8461cb0ef41Sopenharmony_ci    if (one_byte && c > String::kMaxOneByteCharCodeU) {
8471cb0ef41Sopenharmony_ci      // Can't match - see above.
8481cb0ef41Sopenharmony_ci      return false;  // Bounds not checked.
8491cb0ef41Sopenharmony_ci    }
8501cb0ef41Sopenharmony_ci    if (!preloaded) {
8511cb0ef41Sopenharmony_ci      macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
8521cb0ef41Sopenharmony_ci      checked = check;
8531cb0ef41Sopenharmony_ci    }
8541cb0ef41Sopenharmony_ci    macro_assembler->CheckNotCharacter(c, on_failure);
8551cb0ef41Sopenharmony_ci  }
8561cb0ef41Sopenharmony_ci  return checked;
8571cb0ef41Sopenharmony_ci}
8581cb0ef41Sopenharmony_ci
8591cb0ef41Sopenharmony_cibool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
8601cb0ef41Sopenharmony_ci                               bool one_byte, base::uc16 c1, base::uc16 c2,
8611cb0ef41Sopenharmony_ci                               Label* on_failure) {
8621cb0ef41Sopenharmony_ci  const uint32_t char_mask = CharMask(one_byte);
8631cb0ef41Sopenharmony_ci  base::uc16 exor = c1 ^ c2;
8641cb0ef41Sopenharmony_ci  // Check whether exor has only one bit set.
8651cb0ef41Sopenharmony_ci  if (((exor - 1) & exor) == 0) {
8661cb0ef41Sopenharmony_ci    // If c1 and c2 differ only by one bit.
8671cb0ef41Sopenharmony_ci    // Ecma262UnCanonicalize always gives the highest number last.
8681cb0ef41Sopenharmony_ci    DCHECK(c2 > c1);
8691cb0ef41Sopenharmony_ci    base::uc16 mask = char_mask ^ exor;
8701cb0ef41Sopenharmony_ci    macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
8711cb0ef41Sopenharmony_ci    return true;
8721cb0ef41Sopenharmony_ci  }
8731cb0ef41Sopenharmony_ci  DCHECK(c2 > c1);
8741cb0ef41Sopenharmony_ci  base::uc16 diff = c2 - c1;
8751cb0ef41Sopenharmony_ci  if (((diff - 1) & diff) == 0 && c1 >= diff) {
8761cb0ef41Sopenharmony_ci    // If the characters differ by 2^n but don't differ by one bit then
8771cb0ef41Sopenharmony_ci    // subtract the difference from the found character, then do the or
8781cb0ef41Sopenharmony_ci    // trick.  We avoid the theoretical case where negative numbers are
8791cb0ef41Sopenharmony_ci    // involved in order to simplify code generation.
8801cb0ef41Sopenharmony_ci    base::uc16 mask = char_mask ^ diff;
8811cb0ef41Sopenharmony_ci    macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff, mask,
8821cb0ef41Sopenharmony_ci                                                    on_failure);
8831cb0ef41Sopenharmony_ci    return true;
8841cb0ef41Sopenharmony_ci  }
8851cb0ef41Sopenharmony_ci  return false;
8861cb0ef41Sopenharmony_ci}
8871cb0ef41Sopenharmony_ci
8881cb0ef41Sopenharmony_ci// Only emits letters (things that have case).  Only used for case independent
8891cb0ef41Sopenharmony_ci// matches.
8901cb0ef41Sopenharmony_ciinline bool EmitAtomLetter(Isolate* isolate, RegExpCompiler* compiler,
8911cb0ef41Sopenharmony_ci                           base::uc16 c, Label* on_failure, int cp_offset,
8921cb0ef41Sopenharmony_ci                           bool check, bool preloaded) {
8931cb0ef41Sopenharmony_ci  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
8941cb0ef41Sopenharmony_ci  bool one_byte = compiler->one_byte();
8951cb0ef41Sopenharmony_ci  unibrow::uchar chars[4];
8961cb0ef41Sopenharmony_ci  int length = GetCaseIndependentLetters(isolate, c, one_byte, chars, 4);
8971cb0ef41Sopenharmony_ci  if (length <= 1) return false;
8981cb0ef41Sopenharmony_ci  // We may not need to check against the end of the input string
8991cb0ef41Sopenharmony_ci  // if this character lies before a character that matched.
9001cb0ef41Sopenharmony_ci  if (!preloaded) {
9011cb0ef41Sopenharmony_ci    macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
9021cb0ef41Sopenharmony_ci  }
9031cb0ef41Sopenharmony_ci  Label ok;
9041cb0ef41Sopenharmony_ci  switch (length) {
9051cb0ef41Sopenharmony_ci    case 2: {
9061cb0ef41Sopenharmony_ci      if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
9071cb0ef41Sopenharmony_ci                                    chars[1], on_failure)) {
9081cb0ef41Sopenharmony_ci      } else {
9091cb0ef41Sopenharmony_ci        macro_assembler->CheckCharacter(chars[0], &ok);
9101cb0ef41Sopenharmony_ci        macro_assembler->CheckNotCharacter(chars[1], on_failure);
9111cb0ef41Sopenharmony_ci        macro_assembler->Bind(&ok);
9121cb0ef41Sopenharmony_ci      }
9131cb0ef41Sopenharmony_ci      break;
9141cb0ef41Sopenharmony_ci    }
9151cb0ef41Sopenharmony_ci    case 4:
9161cb0ef41Sopenharmony_ci      macro_assembler->CheckCharacter(chars[3], &ok);
9171cb0ef41Sopenharmony_ci      V8_FALLTHROUGH;
9181cb0ef41Sopenharmony_ci    case 3:
9191cb0ef41Sopenharmony_ci      macro_assembler->CheckCharacter(chars[0], &ok);
9201cb0ef41Sopenharmony_ci      macro_assembler->CheckCharacter(chars[1], &ok);
9211cb0ef41Sopenharmony_ci      macro_assembler->CheckNotCharacter(chars[2], on_failure);
9221cb0ef41Sopenharmony_ci      macro_assembler->Bind(&ok);
9231cb0ef41Sopenharmony_ci      break;
9241cb0ef41Sopenharmony_ci    default:
9251cb0ef41Sopenharmony_ci      UNREACHABLE();
9261cb0ef41Sopenharmony_ci  }
9271cb0ef41Sopenharmony_ci  return true;
9281cb0ef41Sopenharmony_ci}
9291cb0ef41Sopenharmony_ci
9301cb0ef41Sopenharmony_civoid EmitBoundaryTest(RegExpMacroAssembler* masm, int border,
9311cb0ef41Sopenharmony_ci                      Label* fall_through, Label* above_or_equal,
9321cb0ef41Sopenharmony_ci                      Label* below) {
9331cb0ef41Sopenharmony_ci  if (below != fall_through) {
9341cb0ef41Sopenharmony_ci    masm->CheckCharacterLT(border, below);
9351cb0ef41Sopenharmony_ci    if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
9361cb0ef41Sopenharmony_ci  } else {
9371cb0ef41Sopenharmony_ci    masm->CheckCharacterGT(border - 1, above_or_equal);
9381cb0ef41Sopenharmony_ci  }
9391cb0ef41Sopenharmony_ci}
9401cb0ef41Sopenharmony_ci
9411cb0ef41Sopenharmony_civoid EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, int first, int last,
9421cb0ef41Sopenharmony_ci                            Label* fall_through, Label* in_range,
9431cb0ef41Sopenharmony_ci                            Label* out_of_range) {
9441cb0ef41Sopenharmony_ci  if (in_range == fall_through) {
9451cb0ef41Sopenharmony_ci    if (first == last) {
9461cb0ef41Sopenharmony_ci      masm->CheckNotCharacter(first, out_of_range);
9471cb0ef41Sopenharmony_ci    } else {
9481cb0ef41Sopenharmony_ci      masm->CheckCharacterNotInRange(first, last, out_of_range);
9491cb0ef41Sopenharmony_ci    }
9501cb0ef41Sopenharmony_ci  } else {
9511cb0ef41Sopenharmony_ci    if (first == last) {
9521cb0ef41Sopenharmony_ci      masm->CheckCharacter(first, in_range);
9531cb0ef41Sopenharmony_ci    } else {
9541cb0ef41Sopenharmony_ci      masm->CheckCharacterInRange(first, last, in_range);
9551cb0ef41Sopenharmony_ci    }
9561cb0ef41Sopenharmony_ci    if (out_of_range != fall_through) masm->GoTo(out_of_range);
9571cb0ef41Sopenharmony_ci  }
9581cb0ef41Sopenharmony_ci}
9591cb0ef41Sopenharmony_ci
9601cb0ef41Sopenharmony_ci// even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
9611cb0ef41Sopenharmony_ci// odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
9621cb0ef41Sopenharmony_civoid EmitUseLookupTable(RegExpMacroAssembler* masm,
9631cb0ef41Sopenharmony_ci                        ZoneList<base::uc32>* ranges, uint32_t start_index,
9641cb0ef41Sopenharmony_ci                        uint32_t end_index, base::uc32 min_char,
9651cb0ef41Sopenharmony_ci                        Label* fall_through, Label* even_label,
9661cb0ef41Sopenharmony_ci                        Label* odd_label) {
9671cb0ef41Sopenharmony_ci  static const uint32_t kSize = RegExpMacroAssembler::kTableSize;
9681cb0ef41Sopenharmony_ci  static const uint32_t kMask = RegExpMacroAssembler::kTableMask;
9691cb0ef41Sopenharmony_ci
9701cb0ef41Sopenharmony_ci  base::uc32 base = (min_char & ~kMask);
9711cb0ef41Sopenharmony_ci  USE(base);
9721cb0ef41Sopenharmony_ci
9731cb0ef41Sopenharmony_ci  // Assert that everything is on one kTableSize page.
9741cb0ef41Sopenharmony_ci  for (uint32_t i = start_index; i <= end_index; i++) {
9751cb0ef41Sopenharmony_ci    DCHECK_EQ(ranges->at(i) & ~kMask, base);
9761cb0ef41Sopenharmony_ci  }
9771cb0ef41Sopenharmony_ci  DCHECK(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base);
9781cb0ef41Sopenharmony_ci
9791cb0ef41Sopenharmony_ci  char templ[kSize];
9801cb0ef41Sopenharmony_ci  Label* on_bit_set;
9811cb0ef41Sopenharmony_ci  Label* on_bit_clear;
9821cb0ef41Sopenharmony_ci  int bit;
9831cb0ef41Sopenharmony_ci  if (even_label == fall_through) {
9841cb0ef41Sopenharmony_ci    on_bit_set = odd_label;
9851cb0ef41Sopenharmony_ci    on_bit_clear = even_label;
9861cb0ef41Sopenharmony_ci    bit = 1;
9871cb0ef41Sopenharmony_ci  } else {
9881cb0ef41Sopenharmony_ci    on_bit_set = even_label;
9891cb0ef41Sopenharmony_ci    on_bit_clear = odd_label;
9901cb0ef41Sopenharmony_ci    bit = 0;
9911cb0ef41Sopenharmony_ci  }
9921cb0ef41Sopenharmony_ci  for (uint32_t i = 0; i < (ranges->at(start_index) & kMask) && i < kSize;
9931cb0ef41Sopenharmony_ci       i++) {
9941cb0ef41Sopenharmony_ci    templ[i] = bit;
9951cb0ef41Sopenharmony_ci  }
9961cb0ef41Sopenharmony_ci  uint32_t j = 0;
9971cb0ef41Sopenharmony_ci  bit ^= 1;
9981cb0ef41Sopenharmony_ci  for (uint32_t i = start_index; i < end_index; i++) {
9991cb0ef41Sopenharmony_ci    for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) {
10001cb0ef41Sopenharmony_ci      templ[j] = bit;
10011cb0ef41Sopenharmony_ci    }
10021cb0ef41Sopenharmony_ci    bit ^= 1;
10031cb0ef41Sopenharmony_ci  }
10041cb0ef41Sopenharmony_ci  for (uint32_t i = j; i < kSize; i++) {
10051cb0ef41Sopenharmony_ci    templ[i] = bit;
10061cb0ef41Sopenharmony_ci  }
10071cb0ef41Sopenharmony_ci  Factory* factory = masm->isolate()->factory();
10081cb0ef41Sopenharmony_ci  // TODO(erikcorry): Cache these.
10091cb0ef41Sopenharmony_ci  Handle<ByteArray> ba = factory->NewByteArray(kSize, AllocationType::kOld);
10101cb0ef41Sopenharmony_ci  for (uint32_t i = 0; i < kSize; i++) {
10111cb0ef41Sopenharmony_ci    ba->set(i, templ[i]);
10121cb0ef41Sopenharmony_ci  }
10131cb0ef41Sopenharmony_ci  masm->CheckBitInTable(ba, on_bit_set);
10141cb0ef41Sopenharmony_ci  if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
10151cb0ef41Sopenharmony_ci}
10161cb0ef41Sopenharmony_ci
10171cb0ef41Sopenharmony_civoid CutOutRange(RegExpMacroAssembler* masm, ZoneList<base::uc32>* ranges,
10181cb0ef41Sopenharmony_ci                 uint32_t start_index, uint32_t end_index, uint32_t cut_index,
10191cb0ef41Sopenharmony_ci                 Label* even_label, Label* odd_label) {
10201cb0ef41Sopenharmony_ci  bool odd = (((cut_index - start_index) & 1) == 1);
10211cb0ef41Sopenharmony_ci  Label* in_range_label = odd ? odd_label : even_label;
10221cb0ef41Sopenharmony_ci  Label dummy;
10231cb0ef41Sopenharmony_ci  EmitDoubleBoundaryTest(masm, ranges->at(cut_index),
10241cb0ef41Sopenharmony_ci                         ranges->at(cut_index + 1) - 1, &dummy, in_range_label,
10251cb0ef41Sopenharmony_ci                         &dummy);
10261cb0ef41Sopenharmony_ci  DCHECK(!dummy.is_linked());
10271cb0ef41Sopenharmony_ci  // Cut out the single range by rewriting the array.  This creates a new
10281cb0ef41Sopenharmony_ci  // range that is a merger of the two ranges on either side of the one we
10291cb0ef41Sopenharmony_ci  // are cutting out.  The oddity of the labels is preserved.
10301cb0ef41Sopenharmony_ci  for (uint32_t j = cut_index; j > start_index; j--) {
10311cb0ef41Sopenharmony_ci    ranges->at(j) = ranges->at(j - 1);
10321cb0ef41Sopenharmony_ci  }
10331cb0ef41Sopenharmony_ci  for (uint32_t j = cut_index + 1; j < end_index; j++) {
10341cb0ef41Sopenharmony_ci    ranges->at(j) = ranges->at(j + 1);
10351cb0ef41Sopenharmony_ci  }
10361cb0ef41Sopenharmony_ci}
10371cb0ef41Sopenharmony_ci
10381cb0ef41Sopenharmony_ci// Unicode case.  Split the search space into kSize spaces that are handled
10391cb0ef41Sopenharmony_ci// with recursion.
10401cb0ef41Sopenharmony_civoid SplitSearchSpace(ZoneList<base::uc32>* ranges, uint32_t start_index,
10411cb0ef41Sopenharmony_ci                      uint32_t end_index, uint32_t* new_start_index,
10421cb0ef41Sopenharmony_ci                      uint32_t* new_end_index, base::uc32* border) {
10431cb0ef41Sopenharmony_ci  static const uint32_t kSize = RegExpMacroAssembler::kTableSize;
10441cb0ef41Sopenharmony_ci  static const uint32_t kMask = RegExpMacroAssembler::kTableMask;
10451cb0ef41Sopenharmony_ci
10461cb0ef41Sopenharmony_ci  base::uc32 first = ranges->at(start_index);
10471cb0ef41Sopenharmony_ci  base::uc32 last = ranges->at(end_index) - 1;
10481cb0ef41Sopenharmony_ci
10491cb0ef41Sopenharmony_ci  *new_start_index = start_index;
10501cb0ef41Sopenharmony_ci  *border = (ranges->at(start_index) & ~kMask) + kSize;
10511cb0ef41Sopenharmony_ci  while (*new_start_index < end_index) {
10521cb0ef41Sopenharmony_ci    if (ranges->at(*new_start_index) > *border) break;
10531cb0ef41Sopenharmony_ci    (*new_start_index)++;
10541cb0ef41Sopenharmony_ci  }
10551cb0ef41Sopenharmony_ci  // new_start_index is the index of the first edge that is beyond the
10561cb0ef41Sopenharmony_ci  // current kSize space.
10571cb0ef41Sopenharmony_ci
10581cb0ef41Sopenharmony_ci  // For very large search spaces we do a binary chop search of the non-Latin1
10591cb0ef41Sopenharmony_ci  // space instead of just going to the end of the current kSize space.  The
10601cb0ef41Sopenharmony_ci  // heuristics are complicated a little by the fact that any 128-character
10611cb0ef41Sopenharmony_ci  // encoding space can be quickly tested with a table lookup, so we don't
10621cb0ef41Sopenharmony_ci  // wish to do binary chop search at a smaller granularity than that.  A
10631cb0ef41Sopenharmony_ci  // 128-character space can take up a lot of space in the ranges array if,
10641cb0ef41Sopenharmony_ci  // for example, we only want to match every second character (eg. the lower
10651cb0ef41Sopenharmony_ci  // case characters on some Unicode pages).
10661cb0ef41Sopenharmony_ci  uint32_t binary_chop_index = (end_index + start_index) / 2;
10671cb0ef41Sopenharmony_ci  // The first test ensures that we get to the code that handles the Latin1
10681cb0ef41Sopenharmony_ci  // range with a single not-taken branch, speeding up this important
10691cb0ef41Sopenharmony_ci  // character range (even non-Latin1 charset-based text has spaces and
10701cb0ef41Sopenharmony_ci  // punctuation).
10711cb0ef41Sopenharmony_ci  if (*border - 1 > String::kMaxOneByteCharCode &&  // Latin1 case.
10721cb0ef41Sopenharmony_ci      end_index - start_index > (*new_start_index - start_index) * 2 &&
10731cb0ef41Sopenharmony_ci      last - first > kSize * 2 && binary_chop_index > *new_start_index &&
10741cb0ef41Sopenharmony_ci      ranges->at(binary_chop_index) >= first + 2 * kSize) {
10751cb0ef41Sopenharmony_ci    uint32_t scan_forward_for_section_border = binary_chop_index;
10761cb0ef41Sopenharmony_ci    uint32_t new_border = (ranges->at(binary_chop_index) | kMask) + 1;
10771cb0ef41Sopenharmony_ci
10781cb0ef41Sopenharmony_ci    while (scan_forward_for_section_border < end_index) {
10791cb0ef41Sopenharmony_ci      if (ranges->at(scan_forward_for_section_border) > new_border) {
10801cb0ef41Sopenharmony_ci        *new_start_index = scan_forward_for_section_border;
10811cb0ef41Sopenharmony_ci        *border = new_border;
10821cb0ef41Sopenharmony_ci        break;
10831cb0ef41Sopenharmony_ci      }
10841cb0ef41Sopenharmony_ci      scan_forward_for_section_border++;
10851cb0ef41Sopenharmony_ci    }
10861cb0ef41Sopenharmony_ci  }
10871cb0ef41Sopenharmony_ci
10881cb0ef41Sopenharmony_ci  DCHECK(*new_start_index > start_index);
10891cb0ef41Sopenharmony_ci  *new_end_index = *new_start_index - 1;
10901cb0ef41Sopenharmony_ci  if (ranges->at(*new_end_index) == *border) {
10911cb0ef41Sopenharmony_ci    (*new_end_index)--;
10921cb0ef41Sopenharmony_ci  }
10931cb0ef41Sopenharmony_ci  if (*border >= ranges->at(end_index)) {
10941cb0ef41Sopenharmony_ci    *border = ranges->at(end_index);
10951cb0ef41Sopenharmony_ci    *new_start_index = end_index;  // Won't be used.
10961cb0ef41Sopenharmony_ci    *new_end_index = end_index - 1;
10971cb0ef41Sopenharmony_ci  }
10981cb0ef41Sopenharmony_ci}
10991cb0ef41Sopenharmony_ci
11001cb0ef41Sopenharmony_ci// Gets a series of segment boundaries representing a character class.  If the
11011cb0ef41Sopenharmony_ci// character is in the range between an even and an odd boundary (counting from
11021cb0ef41Sopenharmony_ci// start_index) then go to even_label, otherwise go to odd_label.  We already
11031cb0ef41Sopenharmony_ci// know that the character is in the range of min_char to max_char inclusive.
11041cb0ef41Sopenharmony_ci// Either label can be nullptr indicating backtracking.  Either label can also
11051cb0ef41Sopenharmony_ci// be equal to the fall_through label.
11061cb0ef41Sopenharmony_civoid GenerateBranches(RegExpMacroAssembler* masm, ZoneList<base::uc32>* ranges,
11071cb0ef41Sopenharmony_ci                      uint32_t start_index, uint32_t end_index,
11081cb0ef41Sopenharmony_ci                      base::uc32 min_char, base::uc32 max_char,
11091cb0ef41Sopenharmony_ci                      Label* fall_through, Label* even_label,
11101cb0ef41Sopenharmony_ci                      Label* odd_label) {
11111cb0ef41Sopenharmony_ci  DCHECK_LE(min_char, String::kMaxUtf16CodeUnit);
11121cb0ef41Sopenharmony_ci  DCHECK_LE(max_char, String::kMaxUtf16CodeUnit);
11131cb0ef41Sopenharmony_ci
11141cb0ef41Sopenharmony_ci  base::uc32 first = ranges->at(start_index);
11151cb0ef41Sopenharmony_ci  base::uc32 last = ranges->at(end_index) - 1;
11161cb0ef41Sopenharmony_ci
11171cb0ef41Sopenharmony_ci  DCHECK_LT(min_char, first);
11181cb0ef41Sopenharmony_ci
11191cb0ef41Sopenharmony_ci  // Just need to test if the character is before or on-or-after
11201cb0ef41Sopenharmony_ci  // a particular character.
11211cb0ef41Sopenharmony_ci  if (start_index == end_index) {
11221cb0ef41Sopenharmony_ci    EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
11231cb0ef41Sopenharmony_ci    return;
11241cb0ef41Sopenharmony_ci  }
11251cb0ef41Sopenharmony_ci
11261cb0ef41Sopenharmony_ci  // Another almost trivial case:  There is one interval in the middle that is
11271cb0ef41Sopenharmony_ci  // different from the end intervals.
11281cb0ef41Sopenharmony_ci  if (start_index + 1 == end_index) {
11291cb0ef41Sopenharmony_ci    EmitDoubleBoundaryTest(masm, first, last, fall_through, even_label,
11301cb0ef41Sopenharmony_ci                           odd_label);
11311cb0ef41Sopenharmony_ci    return;
11321cb0ef41Sopenharmony_ci  }
11331cb0ef41Sopenharmony_ci
11341cb0ef41Sopenharmony_ci  // It's not worth using table lookup if there are very few intervals in the
11351cb0ef41Sopenharmony_ci  // character class.
11361cb0ef41Sopenharmony_ci  if (end_index - start_index <= 6) {
11371cb0ef41Sopenharmony_ci    // It is faster to test for individual characters, so we look for those
11381cb0ef41Sopenharmony_ci    // first, then try arbitrary ranges in the second round.
11391cb0ef41Sopenharmony_ci    static uint32_t kNoCutIndex = -1;
11401cb0ef41Sopenharmony_ci    uint32_t cut = kNoCutIndex;
11411cb0ef41Sopenharmony_ci    for (uint32_t i = start_index; i < end_index; i++) {
11421cb0ef41Sopenharmony_ci      if (ranges->at(i) == ranges->at(i + 1) - 1) {
11431cb0ef41Sopenharmony_ci        cut = i;
11441cb0ef41Sopenharmony_ci        break;
11451cb0ef41Sopenharmony_ci      }
11461cb0ef41Sopenharmony_ci    }
11471cb0ef41Sopenharmony_ci    if (cut == kNoCutIndex) cut = start_index;
11481cb0ef41Sopenharmony_ci    CutOutRange(masm, ranges, start_index, end_index, cut, even_label,
11491cb0ef41Sopenharmony_ci                odd_label);
11501cb0ef41Sopenharmony_ci    DCHECK_GE(end_index - start_index, 2);
11511cb0ef41Sopenharmony_ci    GenerateBranches(masm, ranges, start_index + 1, end_index - 1, min_char,
11521cb0ef41Sopenharmony_ci                     max_char, fall_through, even_label, odd_label);
11531cb0ef41Sopenharmony_ci    return;
11541cb0ef41Sopenharmony_ci  }
11551cb0ef41Sopenharmony_ci
11561cb0ef41Sopenharmony_ci  // If there are a lot of intervals in the regexp, then we will use tables to
11571cb0ef41Sopenharmony_ci  // determine whether the character is inside or outside the character class.
11581cb0ef41Sopenharmony_ci  static const int kBits = RegExpMacroAssembler::kTableSizeBits;
11591cb0ef41Sopenharmony_ci
11601cb0ef41Sopenharmony_ci  if ((max_char >> kBits) == (min_char >> kBits)) {
11611cb0ef41Sopenharmony_ci    EmitUseLookupTable(masm, ranges, start_index, end_index, min_char,
11621cb0ef41Sopenharmony_ci                       fall_through, even_label, odd_label);
11631cb0ef41Sopenharmony_ci    return;
11641cb0ef41Sopenharmony_ci  }
11651cb0ef41Sopenharmony_ci
11661cb0ef41Sopenharmony_ci  if ((min_char >> kBits) != first >> kBits) {
11671cb0ef41Sopenharmony_ci    masm->CheckCharacterLT(first, odd_label);
11681cb0ef41Sopenharmony_ci    GenerateBranches(masm, ranges, start_index + 1, end_index, first, max_char,
11691cb0ef41Sopenharmony_ci                     fall_through, odd_label, even_label);
11701cb0ef41Sopenharmony_ci    return;
11711cb0ef41Sopenharmony_ci  }
11721cb0ef41Sopenharmony_ci
11731cb0ef41Sopenharmony_ci  uint32_t new_start_index = 0;
11741cb0ef41Sopenharmony_ci  uint32_t new_end_index = 0;
11751cb0ef41Sopenharmony_ci  base::uc32 border = 0;
11761cb0ef41Sopenharmony_ci
11771cb0ef41Sopenharmony_ci  SplitSearchSpace(ranges, start_index, end_index, &new_start_index,
11781cb0ef41Sopenharmony_ci                   &new_end_index, &border);
11791cb0ef41Sopenharmony_ci
11801cb0ef41Sopenharmony_ci  Label handle_rest;
11811cb0ef41Sopenharmony_ci  Label* above = &handle_rest;
11821cb0ef41Sopenharmony_ci  if (border == last + 1) {
11831cb0ef41Sopenharmony_ci    // We didn't find any section that started after the limit, so everything
11841cb0ef41Sopenharmony_ci    // above the border is one of the terminal labels.
11851cb0ef41Sopenharmony_ci    above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
11861cb0ef41Sopenharmony_ci    DCHECK(new_end_index == end_index - 1);
11871cb0ef41Sopenharmony_ci  }
11881cb0ef41Sopenharmony_ci
11891cb0ef41Sopenharmony_ci  DCHECK_LE(start_index, new_end_index);
11901cb0ef41Sopenharmony_ci  DCHECK_LE(new_start_index, end_index);
11911cb0ef41Sopenharmony_ci  DCHECK_LT(start_index, new_start_index);
11921cb0ef41Sopenharmony_ci  DCHECK_LT(new_end_index, end_index);
11931cb0ef41Sopenharmony_ci  DCHECK(new_end_index + 1 == new_start_index ||
11941cb0ef41Sopenharmony_ci         (new_end_index + 2 == new_start_index &&
11951cb0ef41Sopenharmony_ci          border == ranges->at(new_end_index + 1)));
11961cb0ef41Sopenharmony_ci  DCHECK_LT(min_char, border - 1);
11971cb0ef41Sopenharmony_ci  DCHECK_LT(border, max_char);
11981cb0ef41Sopenharmony_ci  DCHECK_LT(ranges->at(new_end_index), border);
11991cb0ef41Sopenharmony_ci  DCHECK(border < ranges->at(new_start_index) ||
12001cb0ef41Sopenharmony_ci         (border == ranges->at(new_start_index) &&
12011cb0ef41Sopenharmony_ci          new_start_index == end_index && new_end_index == end_index - 1 &&
12021cb0ef41Sopenharmony_ci          border == last + 1));
12031cb0ef41Sopenharmony_ci  DCHECK(new_start_index == 0 || border >= ranges->at(new_start_index - 1));
12041cb0ef41Sopenharmony_ci
12051cb0ef41Sopenharmony_ci  masm->CheckCharacterGT(border - 1, above);
12061cb0ef41Sopenharmony_ci  Label dummy;
12071cb0ef41Sopenharmony_ci  GenerateBranches(masm, ranges, start_index, new_end_index, min_char,
12081cb0ef41Sopenharmony_ci                   border - 1, &dummy, even_label, odd_label);
12091cb0ef41Sopenharmony_ci  if (handle_rest.is_linked()) {
12101cb0ef41Sopenharmony_ci    masm->Bind(&handle_rest);
12111cb0ef41Sopenharmony_ci    bool flip = (new_start_index & 1) != (start_index & 1);
12121cb0ef41Sopenharmony_ci    GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char,
12131cb0ef41Sopenharmony_ci                     &dummy, flip ? odd_label : even_label,
12141cb0ef41Sopenharmony_ci                     flip ? even_label : odd_label);
12151cb0ef41Sopenharmony_ci  }
12161cb0ef41Sopenharmony_ci}
12171cb0ef41Sopenharmony_ci
12181cb0ef41Sopenharmony_civoid EmitCharClass(RegExpMacroAssembler* macro_assembler,
12191cb0ef41Sopenharmony_ci                   RegExpCharacterClass* cc, bool one_byte, Label* on_failure,
12201cb0ef41Sopenharmony_ci                   int cp_offset, bool check_offset, bool preloaded,
12211cb0ef41Sopenharmony_ci                   Zone* zone) {
12221cb0ef41Sopenharmony_ci  ZoneList<CharacterRange>* ranges = cc->ranges(zone);
12231cb0ef41Sopenharmony_ci  CharacterRange::Canonicalize(ranges);
12241cb0ef41Sopenharmony_ci
12251cb0ef41Sopenharmony_ci  // Now that all processing (like case-insensitivity) is done, clamp the
12261cb0ef41Sopenharmony_ci  // ranges to the set of ranges that may actually occur in the subject string.
12271cb0ef41Sopenharmony_ci  if (one_byte) CharacterRange::ClampToOneByte(ranges);
12281cb0ef41Sopenharmony_ci
12291cb0ef41Sopenharmony_ci  const int ranges_length = ranges->length();
12301cb0ef41Sopenharmony_ci  if (ranges_length == 0) {
12311cb0ef41Sopenharmony_ci    if (!cc->is_negated()) {
12321cb0ef41Sopenharmony_ci      macro_assembler->GoTo(on_failure);
12331cb0ef41Sopenharmony_ci    }
12341cb0ef41Sopenharmony_ci    if (check_offset) {
12351cb0ef41Sopenharmony_ci      macro_assembler->CheckPosition(cp_offset, on_failure);
12361cb0ef41Sopenharmony_ci    }
12371cb0ef41Sopenharmony_ci    return;
12381cb0ef41Sopenharmony_ci  }
12391cb0ef41Sopenharmony_ci
12401cb0ef41Sopenharmony_ci  const base::uc32 max_char = MaxCodeUnit(one_byte);
12411cb0ef41Sopenharmony_ci  if (ranges_length == 1 && ranges->at(0).IsEverything(max_char)) {
12421cb0ef41Sopenharmony_ci    if (cc->is_negated()) {
12431cb0ef41Sopenharmony_ci      macro_assembler->GoTo(on_failure);
12441cb0ef41Sopenharmony_ci    } else {
12451cb0ef41Sopenharmony_ci      // This is a common case hit by non-anchored expressions.
12461cb0ef41Sopenharmony_ci      if (check_offset) {
12471cb0ef41Sopenharmony_ci        macro_assembler->CheckPosition(cp_offset, on_failure);
12481cb0ef41Sopenharmony_ci      }
12491cb0ef41Sopenharmony_ci    }
12501cb0ef41Sopenharmony_ci    return;
12511cb0ef41Sopenharmony_ci  }
12521cb0ef41Sopenharmony_ci
12531cb0ef41Sopenharmony_ci  if (!preloaded) {
12541cb0ef41Sopenharmony_ci    macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
12551cb0ef41Sopenharmony_ci  }
12561cb0ef41Sopenharmony_ci
12571cb0ef41Sopenharmony_ci  if (cc->is_standard(zone) && macro_assembler->CheckSpecialCharacterClass(
12581cb0ef41Sopenharmony_ci                                   cc->standard_type(), on_failure)) {
12591cb0ef41Sopenharmony_ci    return;
12601cb0ef41Sopenharmony_ci  }
12611cb0ef41Sopenharmony_ci
12621cb0ef41Sopenharmony_ci  static constexpr int kMaxRangesForInlineBranchGeneration = 16;
12631cb0ef41Sopenharmony_ci  if (ranges_length > kMaxRangesForInlineBranchGeneration) {
12641cb0ef41Sopenharmony_ci    // For large range sets, emit a more compact instruction sequence to avoid
12651cb0ef41Sopenharmony_ci    // a potentially problematic increase in code size.
12661cb0ef41Sopenharmony_ci    // Note the flipped logic below (we check InRange if negated, NotInRange if
12671cb0ef41Sopenharmony_ci    // not negated); this is necessary since the method falls through on
12681cb0ef41Sopenharmony_ci    // failure whereas we want to fall through on success.
12691cb0ef41Sopenharmony_ci    if (cc->is_negated()) {
12701cb0ef41Sopenharmony_ci      if (macro_assembler->CheckCharacterInRangeArray(ranges, on_failure)) {
12711cb0ef41Sopenharmony_ci        return;
12721cb0ef41Sopenharmony_ci      }
12731cb0ef41Sopenharmony_ci    } else {
12741cb0ef41Sopenharmony_ci      if (macro_assembler->CheckCharacterNotInRangeArray(ranges, on_failure)) {
12751cb0ef41Sopenharmony_ci        return;
12761cb0ef41Sopenharmony_ci      }
12771cb0ef41Sopenharmony_ci    }
12781cb0ef41Sopenharmony_ci  }
12791cb0ef41Sopenharmony_ci
12801cb0ef41Sopenharmony_ci  // Generate a flat list of range boundaries for consumption by
12811cb0ef41Sopenharmony_ci  // GenerateBranches. See the comment on that function for how the list should
12821cb0ef41Sopenharmony_ci  // be structured
12831cb0ef41Sopenharmony_ci  ZoneList<base::uc32>* range_boundaries =
12841cb0ef41Sopenharmony_ci      zone->New<ZoneList<base::uc32>>(ranges_length * 2, zone);
12851cb0ef41Sopenharmony_ci
12861cb0ef41Sopenharmony_ci  bool zeroth_entry_is_failure = !cc->is_negated();
12871cb0ef41Sopenharmony_ci
12881cb0ef41Sopenharmony_ci  for (int i = 0; i < ranges_length; i++) {
12891cb0ef41Sopenharmony_ci    CharacterRange& range = ranges->at(i);
12901cb0ef41Sopenharmony_ci    if (range.from() == 0) {
12911cb0ef41Sopenharmony_ci      DCHECK_EQ(i, 0);
12921cb0ef41Sopenharmony_ci      zeroth_entry_is_failure = !zeroth_entry_is_failure;
12931cb0ef41Sopenharmony_ci    } else {
12941cb0ef41Sopenharmony_ci      range_boundaries->Add(range.from(), zone);
12951cb0ef41Sopenharmony_ci    }
12961cb0ef41Sopenharmony_ci    // `+ 1` to convert from inclusive to exclusive `to`.
12971cb0ef41Sopenharmony_ci    // [from, to] == [from, to+1[.
12981cb0ef41Sopenharmony_ci    range_boundaries->Add(range.to() + 1, zone);
12991cb0ef41Sopenharmony_ci  }
13001cb0ef41Sopenharmony_ci  int end_index = range_boundaries->length() - 1;
13011cb0ef41Sopenharmony_ci  if (range_boundaries->at(end_index) > max_char) {
13021cb0ef41Sopenharmony_ci    end_index--;
13031cb0ef41Sopenharmony_ci  }
13041cb0ef41Sopenharmony_ci
13051cb0ef41Sopenharmony_ci  Label fall_through;
13061cb0ef41Sopenharmony_ci  GenerateBranches(macro_assembler, range_boundaries,
13071cb0ef41Sopenharmony_ci                   0,  // start_index.
13081cb0ef41Sopenharmony_ci                   end_index,
13091cb0ef41Sopenharmony_ci                   0,  // min_char.
13101cb0ef41Sopenharmony_ci                   max_char, &fall_through,
13111cb0ef41Sopenharmony_ci                   zeroth_entry_is_failure ? &fall_through : on_failure,
13121cb0ef41Sopenharmony_ci                   zeroth_entry_is_failure ? on_failure : &fall_through);
13131cb0ef41Sopenharmony_ci  macro_assembler->Bind(&fall_through);
13141cb0ef41Sopenharmony_ci}
13151cb0ef41Sopenharmony_ci
13161cb0ef41Sopenharmony_ci}  // namespace
13171cb0ef41Sopenharmony_ci
13181cb0ef41Sopenharmony_ciRegExpNode::~RegExpNode() = default;
13191cb0ef41Sopenharmony_ci
13201cb0ef41Sopenharmony_ciRegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
13211cb0ef41Sopenharmony_ci                                                  Trace* trace) {
13221cb0ef41Sopenharmony_ci  // If we are generating a greedy loop then don't stop and don't reuse code.
13231cb0ef41Sopenharmony_ci  if (trace->stop_node() != nullptr) {
13241cb0ef41Sopenharmony_ci    return CONTINUE;
13251cb0ef41Sopenharmony_ci  }
13261cb0ef41Sopenharmony_ci
13271cb0ef41Sopenharmony_ci  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
13281cb0ef41Sopenharmony_ci  if (trace->is_trivial()) {
13291cb0ef41Sopenharmony_ci    if (label_.is_bound() || on_work_list() || !KeepRecursing(compiler)) {
13301cb0ef41Sopenharmony_ci      // If a generic version is already scheduled to be generated or we have
13311cb0ef41Sopenharmony_ci      // recursed too deeply then just generate a jump to that code.
13321cb0ef41Sopenharmony_ci      macro_assembler->GoTo(&label_);
13331cb0ef41Sopenharmony_ci      // This will queue it up for generation of a generic version if it hasn't
13341cb0ef41Sopenharmony_ci      // already been queued.
13351cb0ef41Sopenharmony_ci      compiler->AddWork(this);
13361cb0ef41Sopenharmony_ci      return DONE;
13371cb0ef41Sopenharmony_ci    }
13381cb0ef41Sopenharmony_ci    // Generate generic version of the node and bind the label for later use.
13391cb0ef41Sopenharmony_ci    macro_assembler->Bind(&label_);
13401cb0ef41Sopenharmony_ci    return CONTINUE;
13411cb0ef41Sopenharmony_ci  }
13421cb0ef41Sopenharmony_ci
13431cb0ef41Sopenharmony_ci  // We are being asked to make a non-generic version.  Keep track of how many
13441cb0ef41Sopenharmony_ci  // non-generic versions we generate so as not to overdo it.
13451cb0ef41Sopenharmony_ci  trace_count_++;
13461cb0ef41Sopenharmony_ci  if (KeepRecursing(compiler) && compiler->optimize() &&
13471cb0ef41Sopenharmony_ci      trace_count_ < kMaxCopiesCodeGenerated) {
13481cb0ef41Sopenharmony_ci    return CONTINUE;
13491cb0ef41Sopenharmony_ci  }
13501cb0ef41Sopenharmony_ci
13511cb0ef41Sopenharmony_ci  // If we get here code has been generated for this node too many times or
13521cb0ef41Sopenharmony_ci  // recursion is too deep.  Time to switch to a generic version.  The code for
13531cb0ef41Sopenharmony_ci  // generic versions above can handle deep recursion properly.
13541cb0ef41Sopenharmony_ci  bool was_limiting = compiler->limiting_recursion();
13551cb0ef41Sopenharmony_ci  compiler->set_limiting_recursion(true);
13561cb0ef41Sopenharmony_ci  trace->Flush(compiler, this);
13571cb0ef41Sopenharmony_ci  compiler->set_limiting_recursion(was_limiting);
13581cb0ef41Sopenharmony_ci  return DONE;
13591cb0ef41Sopenharmony_ci}
13601cb0ef41Sopenharmony_ci
13611cb0ef41Sopenharmony_cibool RegExpNode::KeepRecursing(RegExpCompiler* compiler) {
13621cb0ef41Sopenharmony_ci  return !compiler->limiting_recursion() &&
13631cb0ef41Sopenharmony_ci         compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion;
13641cb0ef41Sopenharmony_ci}
13651cb0ef41Sopenharmony_ci
13661cb0ef41Sopenharmony_civoid ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
13671cb0ef41Sopenharmony_ci                              BoyerMooreLookahead* bm, bool not_at_start) {
13681cb0ef41Sopenharmony_ci  if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) {
13691cb0ef41Sopenharmony_ci    // Anything may follow a positive submatch success, thus we need to accept
13701cb0ef41Sopenharmony_ci    // all characters from this position onwards.
13711cb0ef41Sopenharmony_ci    bm->SetRest(offset);
13721cb0ef41Sopenharmony_ci  } else {
13731cb0ef41Sopenharmony_ci    on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
13741cb0ef41Sopenharmony_ci  }
13751cb0ef41Sopenharmony_ci  SaveBMInfo(bm, not_at_start, offset);
13761cb0ef41Sopenharmony_ci}
13771cb0ef41Sopenharmony_ci
13781cb0ef41Sopenharmony_civoid ActionNode::GetQuickCheckDetails(QuickCheckDetails* details,
13791cb0ef41Sopenharmony_ci                                      RegExpCompiler* compiler, int filled_in,
13801cb0ef41Sopenharmony_ci                                      bool not_at_start) {
13811cb0ef41Sopenharmony_ci  if (action_type_ == SET_REGISTER_FOR_LOOP) {
13821cb0ef41Sopenharmony_ci    on_success()->GetQuickCheckDetailsFromLoopEntry(details, compiler,
13831cb0ef41Sopenharmony_ci                                                    filled_in, not_at_start);
13841cb0ef41Sopenharmony_ci  } else {
13851cb0ef41Sopenharmony_ci    on_success()->GetQuickCheckDetails(details, compiler, filled_in,
13861cb0ef41Sopenharmony_ci                                       not_at_start);
13871cb0ef41Sopenharmony_ci  }
13881cb0ef41Sopenharmony_ci}
13891cb0ef41Sopenharmony_ci
13901cb0ef41Sopenharmony_civoid AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
13911cb0ef41Sopenharmony_ci                                 BoyerMooreLookahead* bm, bool not_at_start) {
13921cb0ef41Sopenharmony_ci  // Match the behaviour of EatsAtLeast on this node.
13931cb0ef41Sopenharmony_ci  if (assertion_type() == AT_START && not_at_start) return;
13941cb0ef41Sopenharmony_ci  on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
13951cb0ef41Sopenharmony_ci  SaveBMInfo(bm, not_at_start, offset);
13961cb0ef41Sopenharmony_ci}
13971cb0ef41Sopenharmony_ci
13981cb0ef41Sopenharmony_civoid NegativeLookaroundChoiceNode::GetQuickCheckDetails(
13991cb0ef41Sopenharmony_ci    QuickCheckDetails* details, RegExpCompiler* compiler, int filled_in,
14001cb0ef41Sopenharmony_ci    bool not_at_start) {
14011cb0ef41Sopenharmony_ci  RegExpNode* node = continue_node();
14021cb0ef41Sopenharmony_ci  return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
14031cb0ef41Sopenharmony_ci}
14041cb0ef41Sopenharmony_ci
14051cb0ef41Sopenharmony_cinamespace {
14061cb0ef41Sopenharmony_ci
14071cb0ef41Sopenharmony_ci// Takes the left-most 1-bit and smears it out, setting all bits to its right.
14081cb0ef41Sopenharmony_ciinline uint32_t SmearBitsRight(uint32_t v) {
14091cb0ef41Sopenharmony_ci  v |= v >> 1;
14101cb0ef41Sopenharmony_ci  v |= v >> 2;
14111cb0ef41Sopenharmony_ci  v |= v >> 4;
14121cb0ef41Sopenharmony_ci  v |= v >> 8;
14131cb0ef41Sopenharmony_ci  v |= v >> 16;
14141cb0ef41Sopenharmony_ci  return v;
14151cb0ef41Sopenharmony_ci}
14161cb0ef41Sopenharmony_ci
14171cb0ef41Sopenharmony_ci}  // namespace
14181cb0ef41Sopenharmony_ci
14191cb0ef41Sopenharmony_cibool QuickCheckDetails::Rationalize(bool asc) {
14201cb0ef41Sopenharmony_ci  bool found_useful_op = false;
14211cb0ef41Sopenharmony_ci  const uint32_t char_mask = CharMask(asc);
14221cb0ef41Sopenharmony_ci  mask_ = 0;
14231cb0ef41Sopenharmony_ci  value_ = 0;
14241cb0ef41Sopenharmony_ci  int char_shift = 0;
14251cb0ef41Sopenharmony_ci  for (int i = 0; i < characters_; i++) {
14261cb0ef41Sopenharmony_ci    Position* pos = &positions_[i];
14271cb0ef41Sopenharmony_ci    if ((pos->mask & String::kMaxOneByteCharCode) != 0) {
14281cb0ef41Sopenharmony_ci      found_useful_op = true;
14291cb0ef41Sopenharmony_ci    }
14301cb0ef41Sopenharmony_ci    mask_ |= (pos->mask & char_mask) << char_shift;
14311cb0ef41Sopenharmony_ci    value_ |= (pos->value & char_mask) << char_shift;
14321cb0ef41Sopenharmony_ci    char_shift += asc ? 8 : 16;
14331cb0ef41Sopenharmony_ci  }
14341cb0ef41Sopenharmony_ci  return found_useful_op;
14351cb0ef41Sopenharmony_ci}
14361cb0ef41Sopenharmony_ci
14371cb0ef41Sopenharmony_ciint RegExpNode::EatsAtLeast(bool not_at_start) {
14381cb0ef41Sopenharmony_ci  return not_at_start ? eats_at_least_.eats_at_least_from_not_start
14391cb0ef41Sopenharmony_ci                      : eats_at_least_.eats_at_least_from_possibly_start;
14401cb0ef41Sopenharmony_ci}
14411cb0ef41Sopenharmony_ci
14421cb0ef41Sopenharmony_ciEatsAtLeastInfo RegExpNode::EatsAtLeastFromLoopEntry() {
14431cb0ef41Sopenharmony_ci  // SET_REGISTER_FOR_LOOP is only used to initialize loop counters, and it
14441cb0ef41Sopenharmony_ci  // implies that the following node must be a LoopChoiceNode. If we need to
14451cb0ef41Sopenharmony_ci  // set registers to constant values for other reasons, we could introduce a
14461cb0ef41Sopenharmony_ci  // new action type SET_REGISTER that doesn't imply anything about its
14471cb0ef41Sopenharmony_ci  // successor.
14481cb0ef41Sopenharmony_ci  UNREACHABLE();
14491cb0ef41Sopenharmony_ci}
14501cb0ef41Sopenharmony_ci
14511cb0ef41Sopenharmony_civoid RegExpNode::GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details,
14521cb0ef41Sopenharmony_ci                                                   RegExpCompiler* compiler,
14531cb0ef41Sopenharmony_ci                                                   int characters_filled_in,
14541cb0ef41Sopenharmony_ci                                                   bool not_at_start) {
14551cb0ef41Sopenharmony_ci  // See comment in RegExpNode::EatsAtLeastFromLoopEntry.
14561cb0ef41Sopenharmony_ci  UNREACHABLE();
14571cb0ef41Sopenharmony_ci}
14581cb0ef41Sopenharmony_ci
14591cb0ef41Sopenharmony_ciEatsAtLeastInfo LoopChoiceNode::EatsAtLeastFromLoopEntry() {
14601cb0ef41Sopenharmony_ci  DCHECK_EQ(alternatives_->length(), 2);  // There's just loop and continue.
14611cb0ef41Sopenharmony_ci
14621cb0ef41Sopenharmony_ci  if (read_backward()) {
14631cb0ef41Sopenharmony_ci    // The eats_at_least value is not used if reading backward. The
14641cb0ef41Sopenharmony_ci    // EatsAtLeastPropagator should've zeroed it as well.
14651cb0ef41Sopenharmony_ci    DCHECK_EQ(eats_at_least_info()->eats_at_least_from_possibly_start, 0);
14661cb0ef41Sopenharmony_ci    DCHECK_EQ(eats_at_least_info()->eats_at_least_from_not_start, 0);
14671cb0ef41Sopenharmony_ci    return {};
14681cb0ef41Sopenharmony_ci  }
14691cb0ef41Sopenharmony_ci
14701cb0ef41Sopenharmony_ci  // Figure out how much the loop body itself eats, not including anything in
14711cb0ef41Sopenharmony_ci  // the continuation case. In general, the nodes in the loop body should report
14721cb0ef41Sopenharmony_ci  // that they eat at least the number eaten by the continuation node, since any
14731cb0ef41Sopenharmony_ci  // successful match in the loop body must also include the continuation node.
14741cb0ef41Sopenharmony_ci  // However, in some cases involving positive lookaround, the loop body under-
14751cb0ef41Sopenharmony_ci  // reports its appetite, so use saturated math here to avoid negative numbers.
14761cb0ef41Sopenharmony_ci  uint8_t loop_body_from_not_start = base::saturated_cast<uint8_t>(
14771cb0ef41Sopenharmony_ci      loop_node_->EatsAtLeast(true) - continue_node_->EatsAtLeast(true));
14781cb0ef41Sopenharmony_ci  uint8_t loop_body_from_possibly_start = base::saturated_cast<uint8_t>(
14791cb0ef41Sopenharmony_ci      loop_node_->EatsAtLeast(false) - continue_node_->EatsAtLeast(true));
14801cb0ef41Sopenharmony_ci
14811cb0ef41Sopenharmony_ci  // Limit the number of loop iterations to avoid overflow in subsequent steps.
14821cb0ef41Sopenharmony_ci  int loop_iterations = base::saturated_cast<uint8_t>(min_loop_iterations());
14831cb0ef41Sopenharmony_ci
14841cb0ef41Sopenharmony_ci  EatsAtLeastInfo result;
14851cb0ef41Sopenharmony_ci  result.eats_at_least_from_not_start =
14861cb0ef41Sopenharmony_ci      base::saturated_cast<uint8_t>(loop_iterations * loop_body_from_not_start +
14871cb0ef41Sopenharmony_ci                                    continue_node_->EatsAtLeast(true));
14881cb0ef41Sopenharmony_ci  if (loop_iterations > 0 && loop_body_from_possibly_start > 0) {
14891cb0ef41Sopenharmony_ci    // First loop iteration eats at least one, so all subsequent iterations
14901cb0ef41Sopenharmony_ci    // and the after-loop chunk are guaranteed to not be at the start.
14911cb0ef41Sopenharmony_ci    result.eats_at_least_from_possibly_start = base::saturated_cast<uint8_t>(
14921cb0ef41Sopenharmony_ci        loop_body_from_possibly_start +
14931cb0ef41Sopenharmony_ci        (loop_iterations - 1) * loop_body_from_not_start +
14941cb0ef41Sopenharmony_ci        continue_node_->EatsAtLeast(true));
14951cb0ef41Sopenharmony_ci  } else {
14961cb0ef41Sopenharmony_ci    // Loop body might eat nothing, so only continue node contributes.
14971cb0ef41Sopenharmony_ci    result.eats_at_least_from_possibly_start =
14981cb0ef41Sopenharmony_ci        continue_node_->EatsAtLeast(false);
14991cb0ef41Sopenharmony_ci  }
15001cb0ef41Sopenharmony_ci  return result;
15011cb0ef41Sopenharmony_ci}
15021cb0ef41Sopenharmony_ci
15031cb0ef41Sopenharmony_cibool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
15041cb0ef41Sopenharmony_ci                                Trace* bounds_check_trace, Trace* trace,
15051cb0ef41Sopenharmony_ci                                bool preload_has_checked_bounds,
15061cb0ef41Sopenharmony_ci                                Label* on_possible_success,
15071cb0ef41Sopenharmony_ci                                QuickCheckDetails* details,
15081cb0ef41Sopenharmony_ci                                bool fall_through_on_failure,
15091cb0ef41Sopenharmony_ci                                ChoiceNode* predecessor) {
15101cb0ef41Sopenharmony_ci  DCHECK_NOT_NULL(predecessor);
15111cb0ef41Sopenharmony_ci  if (details->characters() == 0) return false;
15121cb0ef41Sopenharmony_ci  GetQuickCheckDetails(details, compiler, 0,
15131cb0ef41Sopenharmony_ci                       trace->at_start() == Trace::FALSE_VALUE);
15141cb0ef41Sopenharmony_ci  if (details->cannot_match()) return false;
15151cb0ef41Sopenharmony_ci  if (!details->Rationalize(compiler->one_byte())) return false;
15161cb0ef41Sopenharmony_ci  DCHECK(details->characters() == 1 ||
15171cb0ef41Sopenharmony_ci         compiler->macro_assembler()->CanReadUnaligned());
15181cb0ef41Sopenharmony_ci  uint32_t mask = details->mask();
15191cb0ef41Sopenharmony_ci  uint32_t value = details->value();
15201cb0ef41Sopenharmony_ci
15211cb0ef41Sopenharmony_ci  RegExpMacroAssembler* assembler = compiler->macro_assembler();
15221cb0ef41Sopenharmony_ci
15231cb0ef41Sopenharmony_ci  if (trace->characters_preloaded() != details->characters()) {
15241cb0ef41Sopenharmony_ci    DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset());
15251cb0ef41Sopenharmony_ci    // The bounds check is performed using the minimum number of characters
15261cb0ef41Sopenharmony_ci    // any choice would eat, so if the bounds check fails, then none of the
15271cb0ef41Sopenharmony_ci    // choices can succeed, so we can just immediately backtrack, rather
15281cb0ef41Sopenharmony_ci    // than go to the next choice. The number of characters preloaded may be
15291cb0ef41Sopenharmony_ci    // less than the number used for the bounds check.
15301cb0ef41Sopenharmony_ci    int eats_at_least = predecessor->EatsAtLeast(
15311cb0ef41Sopenharmony_ci        bounds_check_trace->at_start() == Trace::FALSE_VALUE);
15321cb0ef41Sopenharmony_ci    DCHECK_GE(eats_at_least, details->characters());
15331cb0ef41Sopenharmony_ci    assembler->LoadCurrentCharacter(
15341cb0ef41Sopenharmony_ci        trace->cp_offset(), bounds_check_trace->backtrack(),
15351cb0ef41Sopenharmony_ci        !preload_has_checked_bounds, details->characters(), eats_at_least);
15361cb0ef41Sopenharmony_ci  }
15371cb0ef41Sopenharmony_ci
15381cb0ef41Sopenharmony_ci  bool need_mask = true;
15391cb0ef41Sopenharmony_ci
15401cb0ef41Sopenharmony_ci  if (details->characters() == 1) {
15411cb0ef41Sopenharmony_ci    // If number of characters preloaded is 1 then we used a byte or 16 bit
15421cb0ef41Sopenharmony_ci    // load so the value is already masked down.
15431cb0ef41Sopenharmony_ci    const uint32_t char_mask = CharMask(compiler->one_byte());
15441cb0ef41Sopenharmony_ci    if ((mask & char_mask) == char_mask) need_mask = false;
15451cb0ef41Sopenharmony_ci    mask &= char_mask;
15461cb0ef41Sopenharmony_ci  } else {
15471cb0ef41Sopenharmony_ci    // For 2-character preloads in one-byte mode or 1-character preloads in
15481cb0ef41Sopenharmony_ci    // two-byte mode we also use a 16 bit load with zero extend.
15491cb0ef41Sopenharmony_ci    static const uint32_t kTwoByteMask = 0xFFFF;
15501cb0ef41Sopenharmony_ci    static const uint32_t kFourByteMask = 0xFFFFFFFF;
15511cb0ef41Sopenharmony_ci    if (details->characters() == 2 && compiler->one_byte()) {
15521cb0ef41Sopenharmony_ci      if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
15531cb0ef41Sopenharmony_ci    } else if (details->characters() == 1 && !compiler->one_byte()) {
15541cb0ef41Sopenharmony_ci      if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
15551cb0ef41Sopenharmony_ci    } else {
15561cb0ef41Sopenharmony_ci      if (mask == kFourByteMask) need_mask = false;
15571cb0ef41Sopenharmony_ci    }
15581cb0ef41Sopenharmony_ci  }
15591cb0ef41Sopenharmony_ci
15601cb0ef41Sopenharmony_ci  if (fall_through_on_failure) {
15611cb0ef41Sopenharmony_ci    if (need_mask) {
15621cb0ef41Sopenharmony_ci      assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
15631cb0ef41Sopenharmony_ci    } else {
15641cb0ef41Sopenharmony_ci      assembler->CheckCharacter(value, on_possible_success);
15651cb0ef41Sopenharmony_ci    }
15661cb0ef41Sopenharmony_ci  } else {
15671cb0ef41Sopenharmony_ci    if (need_mask) {
15681cb0ef41Sopenharmony_ci      assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());
15691cb0ef41Sopenharmony_ci    } else {
15701cb0ef41Sopenharmony_ci      assembler->CheckNotCharacter(value, trace->backtrack());
15711cb0ef41Sopenharmony_ci    }
15721cb0ef41Sopenharmony_ci  }
15731cb0ef41Sopenharmony_ci  return true;
15741cb0ef41Sopenharmony_ci}
15751cb0ef41Sopenharmony_ci
15761cb0ef41Sopenharmony_ci// Here is the meat of GetQuickCheckDetails (see also the comment on the
15771cb0ef41Sopenharmony_ci// super-class in the .h file).
15781cb0ef41Sopenharmony_ci//
15791cb0ef41Sopenharmony_ci// We iterate along the text object, building up for each character a
15801cb0ef41Sopenharmony_ci// mask and value that can be used to test for a quick failure to match.
15811cb0ef41Sopenharmony_ci// The masks and values for the positions will be combined into a single
15821cb0ef41Sopenharmony_ci// machine word for the current character width in order to be used in
15831cb0ef41Sopenharmony_ci// generating a quick check.
15841cb0ef41Sopenharmony_civoid TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
15851cb0ef41Sopenharmony_ci                                    RegExpCompiler* compiler,
15861cb0ef41Sopenharmony_ci                                    int characters_filled_in,
15871cb0ef41Sopenharmony_ci                                    bool not_at_start) {
15881cb0ef41Sopenharmony_ci  // Do not collect any quick check details if the text node reads backward,
15891cb0ef41Sopenharmony_ci  // since it reads in the opposite direction than we use for quick checks.
15901cb0ef41Sopenharmony_ci  if (read_backward()) return;
15911cb0ef41Sopenharmony_ci  Isolate* isolate = compiler->macro_assembler()->isolate();
15921cb0ef41Sopenharmony_ci  DCHECK(characters_filled_in < details->characters());
15931cb0ef41Sopenharmony_ci  int characters = details->characters();
15941cb0ef41Sopenharmony_ci  const uint32_t char_mask = CharMask(compiler->one_byte());
15951cb0ef41Sopenharmony_ci  for (int k = 0; k < elements()->length(); k++) {
15961cb0ef41Sopenharmony_ci    TextElement elm = elements()->at(k);
15971cb0ef41Sopenharmony_ci    if (elm.text_type() == TextElement::ATOM) {
15981cb0ef41Sopenharmony_ci      base::Vector<const base::uc16> quarks = elm.atom()->data();
15991cb0ef41Sopenharmony_ci      for (int i = 0; i < characters && i < quarks.length(); i++) {
16001cb0ef41Sopenharmony_ci        QuickCheckDetails::Position* pos =
16011cb0ef41Sopenharmony_ci            details->positions(characters_filled_in);
16021cb0ef41Sopenharmony_ci        base::uc16 c = quarks[i];
16031cb0ef41Sopenharmony_ci        if (IsIgnoreCase(compiler->flags())) {
16041cb0ef41Sopenharmony_ci          unibrow::uchar chars[4];
16051cb0ef41Sopenharmony_ci          int length = GetCaseIndependentLetters(
16061cb0ef41Sopenharmony_ci              isolate, c, compiler->one_byte(), chars, 4);
16071cb0ef41Sopenharmony_ci          if (length == 0) {
16081cb0ef41Sopenharmony_ci            // This can happen because all case variants are non-Latin1, but we
16091cb0ef41Sopenharmony_ci            // know the input is Latin1.
16101cb0ef41Sopenharmony_ci            details->set_cannot_match();
16111cb0ef41Sopenharmony_ci            pos->determines_perfectly = false;
16121cb0ef41Sopenharmony_ci            return;
16131cb0ef41Sopenharmony_ci          }
16141cb0ef41Sopenharmony_ci          if (length == 1) {
16151cb0ef41Sopenharmony_ci            // This letter has no case equivalents, so it's nice and simple
16161cb0ef41Sopenharmony_ci            // and the mask-compare will determine definitely whether we have
16171cb0ef41Sopenharmony_ci            // a match at this character position.
16181cb0ef41Sopenharmony_ci            pos->mask = char_mask;
16191cb0ef41Sopenharmony_ci            pos->value = chars[0];
16201cb0ef41Sopenharmony_ci            pos->determines_perfectly = true;
16211cb0ef41Sopenharmony_ci          } else {
16221cb0ef41Sopenharmony_ci            uint32_t common_bits = char_mask;
16231cb0ef41Sopenharmony_ci            uint32_t bits = chars[0];
16241cb0ef41Sopenharmony_ci            for (int j = 1; j < length; j++) {
16251cb0ef41Sopenharmony_ci              uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
16261cb0ef41Sopenharmony_ci              common_bits ^= differing_bits;
16271cb0ef41Sopenharmony_ci              bits &= common_bits;
16281cb0ef41Sopenharmony_ci            }
16291cb0ef41Sopenharmony_ci            // If length is 2 and common bits has only one zero in it then
16301cb0ef41Sopenharmony_ci            // our mask and compare instruction will determine definitely
16311cb0ef41Sopenharmony_ci            // whether we have a match at this character position.  Otherwise
16321cb0ef41Sopenharmony_ci            // it can only be an approximate check.
16331cb0ef41Sopenharmony_ci            uint32_t one_zero = (common_bits | ~char_mask);
16341cb0ef41Sopenharmony_ci            if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
16351cb0ef41Sopenharmony_ci              pos->determines_perfectly = true;
16361cb0ef41Sopenharmony_ci            }
16371cb0ef41Sopenharmony_ci            pos->mask = common_bits;
16381cb0ef41Sopenharmony_ci            pos->value = bits;
16391cb0ef41Sopenharmony_ci          }
16401cb0ef41Sopenharmony_ci        } else {
16411cb0ef41Sopenharmony_ci          // Don't ignore case.  Nice simple case where the mask-compare will
16421cb0ef41Sopenharmony_ci          // determine definitely whether we have a match at this character
16431cb0ef41Sopenharmony_ci          // position.
16441cb0ef41Sopenharmony_ci          if (c > char_mask) {
16451cb0ef41Sopenharmony_ci            details->set_cannot_match();
16461cb0ef41Sopenharmony_ci            pos->determines_perfectly = false;
16471cb0ef41Sopenharmony_ci            return;
16481cb0ef41Sopenharmony_ci          }
16491cb0ef41Sopenharmony_ci          pos->mask = char_mask;
16501cb0ef41Sopenharmony_ci          pos->value = c;
16511cb0ef41Sopenharmony_ci          pos->determines_perfectly = true;
16521cb0ef41Sopenharmony_ci        }
16531cb0ef41Sopenharmony_ci        characters_filled_in++;
16541cb0ef41Sopenharmony_ci        DCHECK(characters_filled_in <= details->characters());
16551cb0ef41Sopenharmony_ci        if (characters_filled_in == details->characters()) {
16561cb0ef41Sopenharmony_ci          return;
16571cb0ef41Sopenharmony_ci        }
16581cb0ef41Sopenharmony_ci      }
16591cb0ef41Sopenharmony_ci    } else {
16601cb0ef41Sopenharmony_ci      QuickCheckDetails::Position* pos =
16611cb0ef41Sopenharmony_ci          details->positions(characters_filled_in);
16621cb0ef41Sopenharmony_ci      RegExpCharacterClass* tree = elm.char_class();
16631cb0ef41Sopenharmony_ci      ZoneList<CharacterRange>* ranges = tree->ranges(zone());
16641cb0ef41Sopenharmony_ci      if (tree->is_negated() || ranges->is_empty()) {
16651cb0ef41Sopenharmony_ci        // A quick check uses multi-character mask and compare.  There is no
16661cb0ef41Sopenharmony_ci        // useful way to incorporate a negative char class into this scheme
16671cb0ef41Sopenharmony_ci        // so we just conservatively create a mask and value that will always
16681cb0ef41Sopenharmony_ci        // succeed.
16691cb0ef41Sopenharmony_ci        // Likewise for empty ranges (empty ranges can occur e.g. when
16701cb0ef41Sopenharmony_ci        // compiling for one-byte subjects and impossible (non-one-byte) ranges
16711cb0ef41Sopenharmony_ci        // have been removed).
16721cb0ef41Sopenharmony_ci        pos->mask = 0;
16731cb0ef41Sopenharmony_ci        pos->value = 0;
16741cb0ef41Sopenharmony_ci      } else {
16751cb0ef41Sopenharmony_ci        int first_range = 0;
16761cb0ef41Sopenharmony_ci        while (ranges->at(first_range).from() > char_mask) {
16771cb0ef41Sopenharmony_ci          first_range++;
16781cb0ef41Sopenharmony_ci          if (first_range == ranges->length()) {
16791cb0ef41Sopenharmony_ci            details->set_cannot_match();
16801cb0ef41Sopenharmony_ci            pos->determines_perfectly = false;
16811cb0ef41Sopenharmony_ci            return;
16821cb0ef41Sopenharmony_ci          }
16831cb0ef41Sopenharmony_ci        }
16841cb0ef41Sopenharmony_ci        CharacterRange range = ranges->at(first_range);
16851cb0ef41Sopenharmony_ci        const base::uc32 first_from = range.from();
16861cb0ef41Sopenharmony_ci        const base::uc32 first_to =
16871cb0ef41Sopenharmony_ci            (range.to() > char_mask) ? char_mask : range.to();
16881cb0ef41Sopenharmony_ci        const uint32_t differing_bits = (first_from ^ first_to);
16891cb0ef41Sopenharmony_ci        // A mask and compare is only perfect if the differing bits form a
16901cb0ef41Sopenharmony_ci        // number like 00011111 with one single block of trailing 1s.
16911cb0ef41Sopenharmony_ci        if ((differing_bits & (differing_bits + 1)) == 0 &&
16921cb0ef41Sopenharmony_ci            first_from + differing_bits == first_to) {
16931cb0ef41Sopenharmony_ci          pos->determines_perfectly = true;
16941cb0ef41Sopenharmony_ci        }
16951cb0ef41Sopenharmony_ci        uint32_t common_bits = ~SmearBitsRight(differing_bits);
16961cb0ef41Sopenharmony_ci        uint32_t bits = (first_from & common_bits);
16971cb0ef41Sopenharmony_ci        for (int i = first_range + 1; i < ranges->length(); i++) {
16981cb0ef41Sopenharmony_ci          range = ranges->at(i);
16991cb0ef41Sopenharmony_ci          const base::uc32 from = range.from();
17001cb0ef41Sopenharmony_ci          if (from > char_mask) continue;
17011cb0ef41Sopenharmony_ci          const base::uc32 to =
17021cb0ef41Sopenharmony_ci              (range.to() > char_mask) ? char_mask : range.to();
17031cb0ef41Sopenharmony_ci          // Here we are combining more ranges into the mask and compare
17041cb0ef41Sopenharmony_ci          // value.  With each new range the mask becomes more sparse and
17051cb0ef41Sopenharmony_ci          // so the chances of a false positive rise.  A character class
17061cb0ef41Sopenharmony_ci          // with multiple ranges is assumed never to be equivalent to a
17071cb0ef41Sopenharmony_ci          // mask and compare operation.
17081cb0ef41Sopenharmony_ci          pos->determines_perfectly = false;
17091cb0ef41Sopenharmony_ci          uint32_t new_common_bits = (from ^ to);
17101cb0ef41Sopenharmony_ci          new_common_bits = ~SmearBitsRight(new_common_bits);
17111cb0ef41Sopenharmony_ci          common_bits &= new_common_bits;
17121cb0ef41Sopenharmony_ci          bits &= new_common_bits;
17131cb0ef41Sopenharmony_ci          uint32_t new_differing_bits = (from & common_bits) ^ bits;
17141cb0ef41Sopenharmony_ci          common_bits ^= new_differing_bits;
17151cb0ef41Sopenharmony_ci          bits &= common_bits;
17161cb0ef41Sopenharmony_ci        }
17171cb0ef41Sopenharmony_ci        pos->mask = common_bits;
17181cb0ef41Sopenharmony_ci        pos->value = bits;
17191cb0ef41Sopenharmony_ci      }
17201cb0ef41Sopenharmony_ci      characters_filled_in++;
17211cb0ef41Sopenharmony_ci      DCHECK(characters_filled_in <= details->characters());
17221cb0ef41Sopenharmony_ci      if (characters_filled_in == details->characters()) return;
17231cb0ef41Sopenharmony_ci    }
17241cb0ef41Sopenharmony_ci  }
17251cb0ef41Sopenharmony_ci  DCHECK(characters_filled_in != details->characters());
17261cb0ef41Sopenharmony_ci  if (!details->cannot_match()) {
17271cb0ef41Sopenharmony_ci    on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in,
17281cb0ef41Sopenharmony_ci                                       true);
17291cb0ef41Sopenharmony_ci  }
17301cb0ef41Sopenharmony_ci}
17311cb0ef41Sopenharmony_ci
17321cb0ef41Sopenharmony_civoid QuickCheckDetails::Clear() {
17331cb0ef41Sopenharmony_ci  for (int i = 0; i < characters_; i++) {
17341cb0ef41Sopenharmony_ci    positions_[i].mask = 0;
17351cb0ef41Sopenharmony_ci    positions_[i].value = 0;
17361cb0ef41Sopenharmony_ci    positions_[i].determines_perfectly = false;
17371cb0ef41Sopenharmony_ci  }
17381cb0ef41Sopenharmony_ci  characters_ = 0;
17391cb0ef41Sopenharmony_ci}
17401cb0ef41Sopenharmony_ci
17411cb0ef41Sopenharmony_civoid QuickCheckDetails::Advance(int by, bool one_byte) {
17421cb0ef41Sopenharmony_ci  if (by >= characters_ || by < 0) {
17431cb0ef41Sopenharmony_ci    DCHECK_IMPLIES(by < 0, characters_ == 0);
17441cb0ef41Sopenharmony_ci    Clear();
17451cb0ef41Sopenharmony_ci    return;
17461cb0ef41Sopenharmony_ci  }
17471cb0ef41Sopenharmony_ci  DCHECK_LE(characters_ - by, 4);
17481cb0ef41Sopenharmony_ci  DCHECK_LE(characters_, 4);
17491cb0ef41Sopenharmony_ci  for (int i = 0; i < characters_ - by; i++) {
17501cb0ef41Sopenharmony_ci    positions_[i] = positions_[by + i];
17511cb0ef41Sopenharmony_ci  }
17521cb0ef41Sopenharmony_ci  for (int i = characters_ - by; i < characters_; i++) {
17531cb0ef41Sopenharmony_ci    positions_[i].mask = 0;
17541cb0ef41Sopenharmony_ci    positions_[i].value = 0;
17551cb0ef41Sopenharmony_ci    positions_[i].determines_perfectly = false;
17561cb0ef41Sopenharmony_ci  }
17571cb0ef41Sopenharmony_ci  characters_ -= by;
17581cb0ef41Sopenharmony_ci  // We could change mask_ and value_ here but we would never advance unless
17591cb0ef41Sopenharmony_ci  // they had already been used in a check and they won't be used again because
17601cb0ef41Sopenharmony_ci  // it would gain us nothing.  So there's no point.
17611cb0ef41Sopenharmony_ci}
17621cb0ef41Sopenharmony_ci
17631cb0ef41Sopenharmony_civoid QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
17641cb0ef41Sopenharmony_ci  DCHECK(characters_ == other->characters_);
17651cb0ef41Sopenharmony_ci  if (other->cannot_match_) {
17661cb0ef41Sopenharmony_ci    return;
17671cb0ef41Sopenharmony_ci  }
17681cb0ef41Sopenharmony_ci  if (cannot_match_) {
17691cb0ef41Sopenharmony_ci    *this = *other;
17701cb0ef41Sopenharmony_ci    return;
17711cb0ef41Sopenharmony_ci  }
17721cb0ef41Sopenharmony_ci  for (int i = from_index; i < characters_; i++) {
17731cb0ef41Sopenharmony_ci    QuickCheckDetails::Position* pos = positions(i);
17741cb0ef41Sopenharmony_ci    QuickCheckDetails::Position* other_pos = other->positions(i);
17751cb0ef41Sopenharmony_ci    if (pos->mask != other_pos->mask || pos->value != other_pos->value ||
17761cb0ef41Sopenharmony_ci        !other_pos->determines_perfectly) {
17771cb0ef41Sopenharmony_ci      // Our mask-compare operation will be approximate unless we have the
17781cb0ef41Sopenharmony_ci      // exact same operation on both sides of the alternation.
17791cb0ef41Sopenharmony_ci      pos->determines_perfectly = false;
17801cb0ef41Sopenharmony_ci    }
17811cb0ef41Sopenharmony_ci    pos->mask &= other_pos->mask;
17821cb0ef41Sopenharmony_ci    pos->value &= pos->mask;
17831cb0ef41Sopenharmony_ci    other_pos->value &= pos->mask;
17841cb0ef41Sopenharmony_ci    uint32_t differing_bits = (pos->value ^ other_pos->value);
17851cb0ef41Sopenharmony_ci    pos->mask &= ~differing_bits;
17861cb0ef41Sopenharmony_ci    pos->value &= pos->mask;
17871cb0ef41Sopenharmony_ci  }
17881cb0ef41Sopenharmony_ci}
17891cb0ef41Sopenharmony_ci
17901cb0ef41Sopenharmony_ciclass VisitMarker {
17911cb0ef41Sopenharmony_ci public:
17921cb0ef41Sopenharmony_ci  explicit VisitMarker(NodeInfo* info) : info_(info) {
17931cb0ef41Sopenharmony_ci    DCHECK(!info->visited);
17941cb0ef41Sopenharmony_ci    info->visited = true;
17951cb0ef41Sopenharmony_ci  }
17961cb0ef41Sopenharmony_ci  ~VisitMarker() { info_->visited = false; }
17971cb0ef41Sopenharmony_ci
17981cb0ef41Sopenharmony_ci private:
17991cb0ef41Sopenharmony_ci  NodeInfo* info_;
18001cb0ef41Sopenharmony_ci};
18011cb0ef41Sopenharmony_ci
18021cb0ef41Sopenharmony_ci// Temporarily sets traversed_loop_initialization_node_.
18031cb0ef41Sopenharmony_ciclass LoopInitializationMarker {
18041cb0ef41Sopenharmony_ci public:
18051cb0ef41Sopenharmony_ci  explicit LoopInitializationMarker(LoopChoiceNode* node) : node_(node) {
18061cb0ef41Sopenharmony_ci    DCHECK(!node_->traversed_loop_initialization_node_);
18071cb0ef41Sopenharmony_ci    node_->traversed_loop_initialization_node_ = true;
18081cb0ef41Sopenharmony_ci  }
18091cb0ef41Sopenharmony_ci  ~LoopInitializationMarker() {
18101cb0ef41Sopenharmony_ci    DCHECK(node_->traversed_loop_initialization_node_);
18111cb0ef41Sopenharmony_ci    node_->traversed_loop_initialization_node_ = false;
18121cb0ef41Sopenharmony_ci  }
18131cb0ef41Sopenharmony_ci  LoopInitializationMarker(const LoopInitializationMarker&) = delete;
18141cb0ef41Sopenharmony_ci  LoopInitializationMarker& operator=(const LoopInitializationMarker&) = delete;
18151cb0ef41Sopenharmony_ci
18161cb0ef41Sopenharmony_ci private:
18171cb0ef41Sopenharmony_ci  LoopChoiceNode* node_;
18181cb0ef41Sopenharmony_ci};
18191cb0ef41Sopenharmony_ci
18201cb0ef41Sopenharmony_ci// Temporarily decrements min_loop_iterations_.
18211cb0ef41Sopenharmony_ciclass IterationDecrementer {
18221cb0ef41Sopenharmony_ci public:
18231cb0ef41Sopenharmony_ci  explicit IterationDecrementer(LoopChoiceNode* node) : node_(node) {
18241cb0ef41Sopenharmony_ci    DCHECK_GT(node_->min_loop_iterations_, 0);
18251cb0ef41Sopenharmony_ci    --node_->min_loop_iterations_;
18261cb0ef41Sopenharmony_ci  }
18271cb0ef41Sopenharmony_ci  ~IterationDecrementer() { ++node_->min_loop_iterations_; }
18281cb0ef41Sopenharmony_ci  IterationDecrementer(const IterationDecrementer&) = delete;
18291cb0ef41Sopenharmony_ci  IterationDecrementer& operator=(const IterationDecrementer&) = delete;
18301cb0ef41Sopenharmony_ci
18311cb0ef41Sopenharmony_ci private:
18321cb0ef41Sopenharmony_ci  LoopChoiceNode* node_;
18331cb0ef41Sopenharmony_ci};
18341cb0ef41Sopenharmony_ci
18351cb0ef41Sopenharmony_ciRegExpNode* SeqRegExpNode::FilterOneByte(int depth, RegExpFlags flags) {
18361cb0ef41Sopenharmony_ci  if (info()->replacement_calculated) return replacement();
18371cb0ef41Sopenharmony_ci  if (depth < 0) return this;
18381cb0ef41Sopenharmony_ci  DCHECK(!info()->visited);
18391cb0ef41Sopenharmony_ci  VisitMarker marker(info());
18401cb0ef41Sopenharmony_ci  return FilterSuccessor(depth - 1, flags);
18411cb0ef41Sopenharmony_ci}
18421cb0ef41Sopenharmony_ci
18431cb0ef41Sopenharmony_ciRegExpNode* SeqRegExpNode::FilterSuccessor(int depth, RegExpFlags flags) {
18441cb0ef41Sopenharmony_ci  RegExpNode* next = on_success_->FilterOneByte(depth - 1, flags);
18451cb0ef41Sopenharmony_ci  if (next == nullptr) return set_replacement(nullptr);
18461cb0ef41Sopenharmony_ci  on_success_ = next;
18471cb0ef41Sopenharmony_ci  return set_replacement(this);
18481cb0ef41Sopenharmony_ci}
18491cb0ef41Sopenharmony_ci
18501cb0ef41Sopenharmony_ci// We need to check for the following characters: 0x39C 0x3BC 0x178.
18511cb0ef41Sopenharmony_cibool RangeContainsLatin1Equivalents(CharacterRange range) {
18521cb0ef41Sopenharmony_ci  // TODO(dcarney): this could be a lot more efficient.
18531cb0ef41Sopenharmony_ci  return range.Contains(0x039C) || range.Contains(0x03BC) ||
18541cb0ef41Sopenharmony_ci         range.Contains(0x0178);
18551cb0ef41Sopenharmony_ci}
18561cb0ef41Sopenharmony_ci
18571cb0ef41Sopenharmony_cinamespace {
18581cb0ef41Sopenharmony_ci
18591cb0ef41Sopenharmony_cibool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
18601cb0ef41Sopenharmony_ci  for (int i = 0; i < ranges->length(); i++) {
18611cb0ef41Sopenharmony_ci    // TODO(dcarney): this could be a lot more efficient.
18621cb0ef41Sopenharmony_ci    if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;
18631cb0ef41Sopenharmony_ci  }
18641cb0ef41Sopenharmony_ci  return false;
18651cb0ef41Sopenharmony_ci}
18661cb0ef41Sopenharmony_ci
18671cb0ef41Sopenharmony_ci}  // namespace
18681cb0ef41Sopenharmony_ci
18691cb0ef41Sopenharmony_ciRegExpNode* TextNode::FilterOneByte(int depth, RegExpFlags flags) {
18701cb0ef41Sopenharmony_ci  if (info()->replacement_calculated) return replacement();
18711cb0ef41Sopenharmony_ci  if (depth < 0) return this;
18721cb0ef41Sopenharmony_ci  DCHECK(!info()->visited);
18731cb0ef41Sopenharmony_ci  VisitMarker marker(info());
18741cb0ef41Sopenharmony_ci  int element_count = elements()->length();
18751cb0ef41Sopenharmony_ci  for (int i = 0; i < element_count; i++) {
18761cb0ef41Sopenharmony_ci    TextElement elm = elements()->at(i);
18771cb0ef41Sopenharmony_ci    if (elm.text_type() == TextElement::ATOM) {
18781cb0ef41Sopenharmony_ci      base::Vector<const base::uc16> quarks = elm.atom()->data();
18791cb0ef41Sopenharmony_ci      for (int j = 0; j < quarks.length(); j++) {
18801cb0ef41Sopenharmony_ci        base::uc16 c = quarks[j];
18811cb0ef41Sopenharmony_ci        if (IsIgnoreCase(flags)) {
18821cb0ef41Sopenharmony_ci          c = unibrow::Latin1::TryConvertToLatin1(c);
18831cb0ef41Sopenharmony_ci        }
18841cb0ef41Sopenharmony_ci        if (c > unibrow::Latin1::kMaxChar) return set_replacement(nullptr);
18851cb0ef41Sopenharmony_ci        // Replace quark in case we converted to Latin-1.
18861cb0ef41Sopenharmony_ci        base::uc16* writable_quarks = const_cast<base::uc16*>(quarks.begin());
18871cb0ef41Sopenharmony_ci        writable_quarks[j] = c;
18881cb0ef41Sopenharmony_ci      }
18891cb0ef41Sopenharmony_ci    } else {
18901cb0ef41Sopenharmony_ci      DCHECK(elm.text_type() == TextElement::CHAR_CLASS);
18911cb0ef41Sopenharmony_ci      RegExpCharacterClass* cc = elm.char_class();
18921cb0ef41Sopenharmony_ci      ZoneList<CharacterRange>* ranges = cc->ranges(zone());
18931cb0ef41Sopenharmony_ci      CharacterRange::Canonicalize(ranges);
18941cb0ef41Sopenharmony_ci      // Now they are in order so we only need to look at the first.
18951cb0ef41Sopenharmony_ci      int range_count = ranges->length();
18961cb0ef41Sopenharmony_ci      if (cc->is_negated()) {
18971cb0ef41Sopenharmony_ci        if (range_count != 0 && ranges->at(0).from() == 0 &&
18981cb0ef41Sopenharmony_ci            ranges->at(0).to() >= String::kMaxOneByteCharCode) {
18991cb0ef41Sopenharmony_ci          // This will be handled in a later filter.
19001cb0ef41Sopenharmony_ci          if (IsIgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
19011cb0ef41Sopenharmony_ci            continue;
19021cb0ef41Sopenharmony_ci          }
19031cb0ef41Sopenharmony_ci          return set_replacement(nullptr);
19041cb0ef41Sopenharmony_ci        }
19051cb0ef41Sopenharmony_ci      } else {
19061cb0ef41Sopenharmony_ci        if (range_count == 0 ||
19071cb0ef41Sopenharmony_ci            ranges->at(0).from() > String::kMaxOneByteCharCode) {
19081cb0ef41Sopenharmony_ci          // This will be handled in a later filter.
19091cb0ef41Sopenharmony_ci          if (IsIgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
19101cb0ef41Sopenharmony_ci            continue;
19111cb0ef41Sopenharmony_ci          }
19121cb0ef41Sopenharmony_ci          return set_replacement(nullptr);
19131cb0ef41Sopenharmony_ci        }
19141cb0ef41Sopenharmony_ci      }
19151cb0ef41Sopenharmony_ci    }
19161cb0ef41Sopenharmony_ci  }
19171cb0ef41Sopenharmony_ci  return FilterSuccessor(depth - 1, flags);
19181cb0ef41Sopenharmony_ci}
19191cb0ef41Sopenharmony_ci
19201cb0ef41Sopenharmony_ciRegExpNode* LoopChoiceNode::FilterOneByte(int depth, RegExpFlags flags) {
19211cb0ef41Sopenharmony_ci  if (info()->replacement_calculated) return replacement();
19221cb0ef41Sopenharmony_ci  if (depth < 0) return this;
19231cb0ef41Sopenharmony_ci  if (info()->visited) return this;
19241cb0ef41Sopenharmony_ci  {
19251cb0ef41Sopenharmony_ci    VisitMarker marker(info());
19261cb0ef41Sopenharmony_ci
19271cb0ef41Sopenharmony_ci    RegExpNode* continue_replacement =
19281cb0ef41Sopenharmony_ci        continue_node_->FilterOneByte(depth - 1, flags);
19291cb0ef41Sopenharmony_ci    // If we can't continue after the loop then there is no sense in doing the
19301cb0ef41Sopenharmony_ci    // loop.
19311cb0ef41Sopenharmony_ci    if (continue_replacement == nullptr) return set_replacement(nullptr);
19321cb0ef41Sopenharmony_ci  }
19331cb0ef41Sopenharmony_ci
19341cb0ef41Sopenharmony_ci  return ChoiceNode::FilterOneByte(depth - 1, flags);
19351cb0ef41Sopenharmony_ci}
19361cb0ef41Sopenharmony_ci
19371cb0ef41Sopenharmony_ciRegExpNode* ChoiceNode::FilterOneByte(int depth, RegExpFlags flags) {
19381cb0ef41Sopenharmony_ci  if (info()->replacement_calculated) return replacement();
19391cb0ef41Sopenharmony_ci  if (depth < 0) return this;
19401cb0ef41Sopenharmony_ci  if (info()->visited) return this;
19411cb0ef41Sopenharmony_ci  VisitMarker marker(info());
19421cb0ef41Sopenharmony_ci  int choice_count = alternatives_->length();
19431cb0ef41Sopenharmony_ci
19441cb0ef41Sopenharmony_ci  for (int i = 0; i < choice_count; i++) {
19451cb0ef41Sopenharmony_ci    GuardedAlternative alternative = alternatives_->at(i);
19461cb0ef41Sopenharmony_ci    if (alternative.guards() != nullptr &&
19471cb0ef41Sopenharmony_ci        alternative.guards()->length() != 0) {
19481cb0ef41Sopenharmony_ci      set_replacement(this);
19491cb0ef41Sopenharmony_ci      return this;
19501cb0ef41Sopenharmony_ci    }
19511cb0ef41Sopenharmony_ci  }
19521cb0ef41Sopenharmony_ci
19531cb0ef41Sopenharmony_ci  int surviving = 0;
19541cb0ef41Sopenharmony_ci  RegExpNode* survivor = nullptr;
19551cb0ef41Sopenharmony_ci  for (int i = 0; i < choice_count; i++) {
19561cb0ef41Sopenharmony_ci    GuardedAlternative alternative = alternatives_->at(i);
19571cb0ef41Sopenharmony_ci    RegExpNode* replacement =
19581cb0ef41Sopenharmony_ci        alternative.node()->FilterOneByte(depth - 1, flags);
19591cb0ef41Sopenharmony_ci    DCHECK(replacement != this);  // No missing EMPTY_MATCH_CHECK.
19601cb0ef41Sopenharmony_ci    if (replacement != nullptr) {
19611cb0ef41Sopenharmony_ci      alternatives_->at(i).set_node(replacement);
19621cb0ef41Sopenharmony_ci      surviving++;
19631cb0ef41Sopenharmony_ci      survivor = replacement;
19641cb0ef41Sopenharmony_ci    }
19651cb0ef41Sopenharmony_ci  }
19661cb0ef41Sopenharmony_ci  if (surviving < 2) return set_replacement(survivor);
19671cb0ef41Sopenharmony_ci
19681cb0ef41Sopenharmony_ci  set_replacement(this);
19691cb0ef41Sopenharmony_ci  if (surviving == choice_count) {
19701cb0ef41Sopenharmony_ci    return this;
19711cb0ef41Sopenharmony_ci  }
19721cb0ef41Sopenharmony_ci  // Only some of the nodes survived the filtering.  We need to rebuild the
19731cb0ef41Sopenharmony_ci  // alternatives list.
19741cb0ef41Sopenharmony_ci  ZoneList<GuardedAlternative>* new_alternatives =
19751cb0ef41Sopenharmony_ci      zone()->New<ZoneList<GuardedAlternative>>(surviving, zone());
19761cb0ef41Sopenharmony_ci  for (int i = 0; i < choice_count; i++) {
19771cb0ef41Sopenharmony_ci    RegExpNode* replacement =
19781cb0ef41Sopenharmony_ci        alternatives_->at(i).node()->FilterOneByte(depth - 1, flags);
19791cb0ef41Sopenharmony_ci    if (replacement != nullptr) {
19801cb0ef41Sopenharmony_ci      alternatives_->at(i).set_node(replacement);
19811cb0ef41Sopenharmony_ci      new_alternatives->Add(alternatives_->at(i), zone());
19821cb0ef41Sopenharmony_ci    }
19831cb0ef41Sopenharmony_ci  }
19841cb0ef41Sopenharmony_ci  alternatives_ = new_alternatives;
19851cb0ef41Sopenharmony_ci  return this;
19861cb0ef41Sopenharmony_ci}
19871cb0ef41Sopenharmony_ci
19881cb0ef41Sopenharmony_ciRegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth,
19891cb0ef41Sopenharmony_ci                                                        RegExpFlags flags) {
19901cb0ef41Sopenharmony_ci  if (info()->replacement_calculated) return replacement();
19911cb0ef41Sopenharmony_ci  if (depth < 0) return this;
19921cb0ef41Sopenharmony_ci  if (info()->visited) return this;
19931cb0ef41Sopenharmony_ci  VisitMarker marker(info());
19941cb0ef41Sopenharmony_ci  // Alternative 0 is the negative lookahead, alternative 1 is what comes
19951cb0ef41Sopenharmony_ci  // afterwards.
19961cb0ef41Sopenharmony_ci  RegExpNode* node = continue_node();
19971cb0ef41Sopenharmony_ci  RegExpNode* replacement = node->FilterOneByte(depth - 1, flags);
19981cb0ef41Sopenharmony_ci  if (replacement == nullptr) return set_replacement(nullptr);
19991cb0ef41Sopenharmony_ci  alternatives_->at(kContinueIndex).set_node(replacement);
20001cb0ef41Sopenharmony_ci
20011cb0ef41Sopenharmony_ci  RegExpNode* neg_node = lookaround_node();
20021cb0ef41Sopenharmony_ci  RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, flags);
20031cb0ef41Sopenharmony_ci  // If the negative lookahead is always going to fail then
20041cb0ef41Sopenharmony_ci  // we don't need to check it.
20051cb0ef41Sopenharmony_ci  if (neg_replacement == nullptr) return set_replacement(replacement);
20061cb0ef41Sopenharmony_ci  alternatives_->at(kLookaroundIndex).set_node(neg_replacement);
20071cb0ef41Sopenharmony_ci  return set_replacement(this);
20081cb0ef41Sopenharmony_ci}
20091cb0ef41Sopenharmony_ci
20101cb0ef41Sopenharmony_civoid LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
20111cb0ef41Sopenharmony_ci                                          RegExpCompiler* compiler,
20121cb0ef41Sopenharmony_ci                                          int characters_filled_in,
20131cb0ef41Sopenharmony_ci                                          bool not_at_start) {
20141cb0ef41Sopenharmony_ci  if (body_can_be_zero_length_ || info()->visited) return;
20151cb0ef41Sopenharmony_ci  not_at_start = not_at_start || this->not_at_start();
20161cb0ef41Sopenharmony_ci  DCHECK_EQ(alternatives_->length(), 2);  // There's just loop and continue.
20171cb0ef41Sopenharmony_ci  if (traversed_loop_initialization_node_ && min_loop_iterations_ > 0 &&
20181cb0ef41Sopenharmony_ci      loop_node_->EatsAtLeast(not_at_start) >
20191cb0ef41Sopenharmony_ci          continue_node_->EatsAtLeast(true)) {
20201cb0ef41Sopenharmony_ci    // Loop body is guaranteed to execute at least once, and consume characters
20211cb0ef41Sopenharmony_ci    // when it does, meaning the only possible quick checks from this point
20221cb0ef41Sopenharmony_ci    // begin with the loop body. We may recursively visit this LoopChoiceNode,
20231cb0ef41Sopenharmony_ci    // but we temporarily decrease its minimum iteration counter so we know when
20241cb0ef41Sopenharmony_ci    // to check the continue case.
20251cb0ef41Sopenharmony_ci    IterationDecrementer next_iteration(this);
20261cb0ef41Sopenharmony_ci    loop_node_->GetQuickCheckDetails(details, compiler, characters_filled_in,
20271cb0ef41Sopenharmony_ci                                     not_at_start);
20281cb0ef41Sopenharmony_ci  } else {
20291cb0ef41Sopenharmony_ci    // Might not consume anything in the loop body, so treat it like a normal
20301cb0ef41Sopenharmony_ci    // ChoiceNode (and don't recursively visit this node again).
20311cb0ef41Sopenharmony_ci    VisitMarker marker(info());
20321cb0ef41Sopenharmony_ci    ChoiceNode::GetQuickCheckDetails(details, compiler, characters_filled_in,
20331cb0ef41Sopenharmony_ci                                     not_at_start);
20341cb0ef41Sopenharmony_ci  }
20351cb0ef41Sopenharmony_ci}
20361cb0ef41Sopenharmony_ci
20371cb0ef41Sopenharmony_civoid LoopChoiceNode::GetQuickCheckDetailsFromLoopEntry(
20381cb0ef41Sopenharmony_ci    QuickCheckDetails* details, RegExpCompiler* compiler,
20391cb0ef41Sopenharmony_ci    int characters_filled_in, bool not_at_start) {
20401cb0ef41Sopenharmony_ci  if (traversed_loop_initialization_node_) {
20411cb0ef41Sopenharmony_ci    // We already entered this loop once, exited via its continuation node, and
20421cb0ef41Sopenharmony_ci    // followed an outer loop's back-edge to before the loop entry point. We
20431cb0ef41Sopenharmony_ci    // could try to reset the minimum iteration count to its starting value at
20441cb0ef41Sopenharmony_ci    // this point, but that seems like more trouble than it's worth. It's safe
20451cb0ef41Sopenharmony_ci    // to keep going with the current (possibly reduced) minimum iteration
20461cb0ef41Sopenharmony_ci    // count.
20471cb0ef41Sopenharmony_ci    GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start);
20481cb0ef41Sopenharmony_ci  } else {
20491cb0ef41Sopenharmony_ci    // We are entering a loop via its counter initialization action, meaning we
20501cb0ef41Sopenharmony_ci    // are guaranteed to run the loop body at least some minimum number of times
20511cb0ef41Sopenharmony_ci    // before running the continuation node. Set a flag so that this node knows
20521cb0ef41Sopenharmony_ci    // (now and any times we visit it again recursively) that it was entered
20531cb0ef41Sopenharmony_ci    // from the top.
20541cb0ef41Sopenharmony_ci    LoopInitializationMarker marker(this);
20551cb0ef41Sopenharmony_ci    GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start);
20561cb0ef41Sopenharmony_ci  }
20571cb0ef41Sopenharmony_ci}
20581cb0ef41Sopenharmony_ci
20591cb0ef41Sopenharmony_civoid LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
20601cb0ef41Sopenharmony_ci                                  BoyerMooreLookahead* bm, bool not_at_start) {
20611cb0ef41Sopenharmony_ci  if (body_can_be_zero_length_ || budget <= 0) {
20621cb0ef41Sopenharmony_ci    bm->SetRest(offset);
20631cb0ef41Sopenharmony_ci    SaveBMInfo(bm, not_at_start, offset);
20641cb0ef41Sopenharmony_ci    return;
20651cb0ef41Sopenharmony_ci  }
20661cb0ef41Sopenharmony_ci  ChoiceNode::FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
20671cb0ef41Sopenharmony_ci  SaveBMInfo(bm, not_at_start, offset);
20681cb0ef41Sopenharmony_ci}
20691cb0ef41Sopenharmony_ci
20701cb0ef41Sopenharmony_civoid ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
20711cb0ef41Sopenharmony_ci                                      RegExpCompiler* compiler,
20721cb0ef41Sopenharmony_ci                                      int characters_filled_in,
20731cb0ef41Sopenharmony_ci                                      bool not_at_start) {
20741cb0ef41Sopenharmony_ci  not_at_start = (not_at_start || not_at_start_);
20751cb0ef41Sopenharmony_ci  int choice_count = alternatives_->length();
20761cb0ef41Sopenharmony_ci  DCHECK_LT(0, choice_count);
20771cb0ef41Sopenharmony_ci  alternatives_->at(0).node()->GetQuickCheckDetails(
20781cb0ef41Sopenharmony_ci      details, compiler, characters_filled_in, not_at_start);
20791cb0ef41Sopenharmony_ci  for (int i = 1; i < choice_count; i++) {
20801cb0ef41Sopenharmony_ci    QuickCheckDetails new_details(details->characters());
20811cb0ef41Sopenharmony_ci    RegExpNode* node = alternatives_->at(i).node();
20821cb0ef41Sopenharmony_ci    node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in,
20831cb0ef41Sopenharmony_ci                               not_at_start);
20841cb0ef41Sopenharmony_ci    // Here we merge the quick match details of the two branches.
20851cb0ef41Sopenharmony_ci    details->Merge(&new_details, characters_filled_in);
20861cb0ef41Sopenharmony_ci  }
20871cb0ef41Sopenharmony_ci}
20881cb0ef41Sopenharmony_ci
20891cb0ef41Sopenharmony_cinamespace {
20901cb0ef41Sopenharmony_ci
20911cb0ef41Sopenharmony_ci// Check for [0-9A-Z_a-z].
20921cb0ef41Sopenharmony_civoid EmitWordCheck(RegExpMacroAssembler* assembler, Label* word,
20931cb0ef41Sopenharmony_ci                   Label* non_word, bool fall_through_on_word) {
20941cb0ef41Sopenharmony_ci  if (assembler->CheckSpecialCharacterClass(
20951cb0ef41Sopenharmony_ci          fall_through_on_word ? StandardCharacterSet::kWord
20961cb0ef41Sopenharmony_ci                               : StandardCharacterSet::kNotWord,
20971cb0ef41Sopenharmony_ci          fall_through_on_word ? non_word : word)) {
20981cb0ef41Sopenharmony_ci    // Optimized implementation available.
20991cb0ef41Sopenharmony_ci    return;
21001cb0ef41Sopenharmony_ci  }
21011cb0ef41Sopenharmony_ci  assembler->CheckCharacterGT('z', non_word);
21021cb0ef41Sopenharmony_ci  assembler->CheckCharacterLT('0', non_word);
21031cb0ef41Sopenharmony_ci  assembler->CheckCharacterGT('a' - 1, word);
21041cb0ef41Sopenharmony_ci  assembler->CheckCharacterLT('9' + 1, word);
21051cb0ef41Sopenharmony_ci  assembler->CheckCharacterLT('A', non_word);
21061cb0ef41Sopenharmony_ci  assembler->CheckCharacterLT('Z' + 1, word);
21071cb0ef41Sopenharmony_ci  if (fall_through_on_word) {
21081cb0ef41Sopenharmony_ci    assembler->CheckNotCharacter('_', non_word);
21091cb0ef41Sopenharmony_ci  } else {
21101cb0ef41Sopenharmony_ci    assembler->CheckCharacter('_', word);
21111cb0ef41Sopenharmony_ci  }
21121cb0ef41Sopenharmony_ci}
21131cb0ef41Sopenharmony_ci
21141cb0ef41Sopenharmony_ci// Emit the code to check for a ^ in multiline mode (1-character lookbehind
21151cb0ef41Sopenharmony_ci// that matches newline or the start of input).
21161cb0ef41Sopenharmony_civoid EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace) {
21171cb0ef41Sopenharmony_ci  RegExpMacroAssembler* assembler = compiler->macro_assembler();
21181cb0ef41Sopenharmony_ci
21191cb0ef41Sopenharmony_ci  // We will load the previous character into the current character register.
21201cb0ef41Sopenharmony_ci  Trace new_trace(*trace);
21211cb0ef41Sopenharmony_ci  new_trace.InvalidateCurrentCharacter();
21221cb0ef41Sopenharmony_ci
21231cb0ef41Sopenharmony_ci  // A positive (> 0) cp_offset means we've already successfully matched a
21241cb0ef41Sopenharmony_ci  // non-empty-width part of the pattern, and thus cannot be at or before the
21251cb0ef41Sopenharmony_ci  // start of the subject string. We can thus skip both at-start and
21261cb0ef41Sopenharmony_ci  // bounds-checks when loading the one-character lookbehind.
21271cb0ef41Sopenharmony_ci  const bool may_be_at_or_before_subject_string_start =
21281cb0ef41Sopenharmony_ci      new_trace.cp_offset() <= 0;
21291cb0ef41Sopenharmony_ci
21301cb0ef41Sopenharmony_ci  Label ok;
21311cb0ef41Sopenharmony_ci  if (may_be_at_or_before_subject_string_start) {
21321cb0ef41Sopenharmony_ci    // The start of input counts as a newline in this context, so skip to ok if
21331cb0ef41Sopenharmony_ci    // we are at the start.
21341cb0ef41Sopenharmony_ci    assembler->CheckAtStart(new_trace.cp_offset(), &ok);
21351cb0ef41Sopenharmony_ci  }
21361cb0ef41Sopenharmony_ci
21371cb0ef41Sopenharmony_ci  // If we've already checked that we are not at the start of input, it's okay
21381cb0ef41Sopenharmony_ci  // to load the previous character without bounds checks.
21391cb0ef41Sopenharmony_ci  const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
21401cb0ef41Sopenharmony_ci  assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
21411cb0ef41Sopenharmony_ci                                  new_trace.backtrack(), can_skip_bounds_check);
21421cb0ef41Sopenharmony_ci  if (!assembler->CheckSpecialCharacterClass(
21431cb0ef41Sopenharmony_ci          StandardCharacterSet::kLineTerminator, new_trace.backtrack())) {
21441cb0ef41Sopenharmony_ci    // Newline means \n, \r, 0x2028 or 0x2029.
21451cb0ef41Sopenharmony_ci    if (!compiler->one_byte()) {
21461cb0ef41Sopenharmony_ci      assembler->CheckCharacterAfterAnd(0x2028, 0xFFFE, &ok);
21471cb0ef41Sopenharmony_ci    }
21481cb0ef41Sopenharmony_ci    assembler->CheckCharacter('\n', &ok);
21491cb0ef41Sopenharmony_ci    assembler->CheckNotCharacter('\r', new_trace.backtrack());
21501cb0ef41Sopenharmony_ci  }
21511cb0ef41Sopenharmony_ci  assembler->Bind(&ok);
21521cb0ef41Sopenharmony_ci  on_success->Emit(compiler, &new_trace);
21531cb0ef41Sopenharmony_ci}
21541cb0ef41Sopenharmony_ci
21551cb0ef41Sopenharmony_ci}  // namespace
21561cb0ef41Sopenharmony_ci
21571cb0ef41Sopenharmony_ci// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
21581cb0ef41Sopenharmony_civoid AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
21591cb0ef41Sopenharmony_ci  RegExpMacroAssembler* assembler = compiler->macro_assembler();
21601cb0ef41Sopenharmony_ci  Isolate* isolate = assembler->isolate();
21611cb0ef41Sopenharmony_ci  Trace::TriBool next_is_word_character = Trace::UNKNOWN;
21621cb0ef41Sopenharmony_ci  bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
21631cb0ef41Sopenharmony_ci  BoyerMooreLookahead* lookahead = bm_info(not_at_start);
21641cb0ef41Sopenharmony_ci  if (lookahead == nullptr) {
21651cb0ef41Sopenharmony_ci    int eats_at_least =
21661cb0ef41Sopenharmony_ci        std::min(kMaxLookaheadForBoyerMoore, EatsAtLeast(not_at_start));
21671cb0ef41Sopenharmony_ci    if (eats_at_least >= 1) {
21681cb0ef41Sopenharmony_ci      BoyerMooreLookahead* bm =
21691cb0ef41Sopenharmony_ci          zone()->New<BoyerMooreLookahead>(eats_at_least, compiler, zone());
21701cb0ef41Sopenharmony_ci      FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start);
21711cb0ef41Sopenharmony_ci      if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE_VALUE;
21721cb0ef41Sopenharmony_ci      if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
21731cb0ef41Sopenharmony_ci    }
21741cb0ef41Sopenharmony_ci  } else {
21751cb0ef41Sopenharmony_ci    if (lookahead->at(0)->is_non_word())
21761cb0ef41Sopenharmony_ci      next_is_word_character = Trace::FALSE_VALUE;
21771cb0ef41Sopenharmony_ci    if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
21781cb0ef41Sopenharmony_ci  }
21791cb0ef41Sopenharmony_ci  bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);
21801cb0ef41Sopenharmony_ci  if (next_is_word_character == Trace::UNKNOWN) {
21811cb0ef41Sopenharmony_ci    Label before_non_word;
21821cb0ef41Sopenharmony_ci    Label before_word;
21831cb0ef41Sopenharmony_ci    if (trace->characters_preloaded() != 1) {
21841cb0ef41Sopenharmony_ci      assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
21851cb0ef41Sopenharmony_ci    }
21861cb0ef41Sopenharmony_ci    // Fall through on non-word.
21871cb0ef41Sopenharmony_ci    EmitWordCheck(assembler, &before_word, &before_non_word, false);
21881cb0ef41Sopenharmony_ci    // Next character is not a word character.
21891cb0ef41Sopenharmony_ci    assembler->Bind(&before_non_word);
21901cb0ef41Sopenharmony_ci    Label ok;
21911cb0ef41Sopenharmony_ci    BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
21921cb0ef41Sopenharmony_ci    assembler->GoTo(&ok);
21931cb0ef41Sopenharmony_ci
21941cb0ef41Sopenharmony_ci    assembler->Bind(&before_word);
21951cb0ef41Sopenharmony_ci    BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
21961cb0ef41Sopenharmony_ci    assembler->Bind(&ok);
21971cb0ef41Sopenharmony_ci  } else if (next_is_word_character == Trace::TRUE_VALUE) {
21981cb0ef41Sopenharmony_ci    BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
21991cb0ef41Sopenharmony_ci  } else {
22001cb0ef41Sopenharmony_ci    DCHECK(next_is_word_character == Trace::FALSE_VALUE);
22011cb0ef41Sopenharmony_ci    BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
22021cb0ef41Sopenharmony_ci  }
22031cb0ef41Sopenharmony_ci}
22041cb0ef41Sopenharmony_ci
22051cb0ef41Sopenharmony_civoid AssertionNode::BacktrackIfPrevious(
22061cb0ef41Sopenharmony_ci    RegExpCompiler* compiler, Trace* trace,
22071cb0ef41Sopenharmony_ci    AssertionNode::IfPrevious backtrack_if_previous) {
22081cb0ef41Sopenharmony_ci  RegExpMacroAssembler* assembler = compiler->macro_assembler();
22091cb0ef41Sopenharmony_ci  Trace new_trace(*trace);
22101cb0ef41Sopenharmony_ci  new_trace.InvalidateCurrentCharacter();
22111cb0ef41Sopenharmony_ci
22121cb0ef41Sopenharmony_ci  Label fall_through;
22131cb0ef41Sopenharmony_ci  Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.backtrack()
22141cb0ef41Sopenharmony_ci                                                        : &fall_through;
22151cb0ef41Sopenharmony_ci  Label* word = backtrack_if_previous == kIsNonWord ? &fall_through
22161cb0ef41Sopenharmony_ci                                                    : new_trace.backtrack();
22171cb0ef41Sopenharmony_ci
22181cb0ef41Sopenharmony_ci  // A positive (> 0) cp_offset means we've already successfully matched a
22191cb0ef41Sopenharmony_ci  // non-empty-width part of the pattern, and thus cannot be at or before the
22201cb0ef41Sopenharmony_ci  // start of the subject string. We can thus skip both at-start and
22211cb0ef41Sopenharmony_ci  // bounds-checks when loading the one-character lookbehind.
22221cb0ef41Sopenharmony_ci  const bool may_be_at_or_before_subject_string_start =
22231cb0ef41Sopenharmony_ci      new_trace.cp_offset() <= 0;
22241cb0ef41Sopenharmony_ci
22251cb0ef41Sopenharmony_ci  if (may_be_at_or_before_subject_string_start) {
22261cb0ef41Sopenharmony_ci    // The start of input counts as a non-word character, so the question is
22271cb0ef41Sopenharmony_ci    // decided if we are at the start.
22281cb0ef41Sopenharmony_ci    assembler->CheckAtStart(new_trace.cp_offset(), non_word);
22291cb0ef41Sopenharmony_ci  }
22301cb0ef41Sopenharmony_ci
22311cb0ef41Sopenharmony_ci  // If we've already checked that we are not at the start of input, it's okay
22321cb0ef41Sopenharmony_ci  // to load the previous character without bounds checks.
22331cb0ef41Sopenharmony_ci  const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
22341cb0ef41Sopenharmony_ci  assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, non_word,
22351cb0ef41Sopenharmony_ci                                  can_skip_bounds_check);
22361cb0ef41Sopenharmony_ci  EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
22371cb0ef41Sopenharmony_ci
22381cb0ef41Sopenharmony_ci  assembler->Bind(&fall_through);
22391cb0ef41Sopenharmony_ci  on_success()->Emit(compiler, &new_trace);
22401cb0ef41Sopenharmony_ci}
22411cb0ef41Sopenharmony_ci
22421cb0ef41Sopenharmony_civoid AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
22431cb0ef41Sopenharmony_ci                                         RegExpCompiler* compiler,
22441cb0ef41Sopenharmony_ci                                         int filled_in, bool not_at_start) {
22451cb0ef41Sopenharmony_ci  if (assertion_type_ == AT_START && not_at_start) {
22461cb0ef41Sopenharmony_ci    details->set_cannot_match();
22471cb0ef41Sopenharmony_ci    return;
22481cb0ef41Sopenharmony_ci  }
22491cb0ef41Sopenharmony_ci  return on_success()->GetQuickCheckDetails(details, compiler, filled_in,
22501cb0ef41Sopenharmony_ci                                            not_at_start);
22511cb0ef41Sopenharmony_ci}
22521cb0ef41Sopenharmony_ci
22531cb0ef41Sopenharmony_civoid AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
22541cb0ef41Sopenharmony_ci  RegExpMacroAssembler* assembler = compiler->macro_assembler();
22551cb0ef41Sopenharmony_ci  switch (assertion_type_) {
22561cb0ef41Sopenharmony_ci    case AT_END: {
22571cb0ef41Sopenharmony_ci      Label ok;
22581cb0ef41Sopenharmony_ci      assembler->CheckPosition(trace->cp_offset(), &ok);
22591cb0ef41Sopenharmony_ci      assembler->GoTo(trace->backtrack());
22601cb0ef41Sopenharmony_ci      assembler->Bind(&ok);
22611cb0ef41Sopenharmony_ci      break;
22621cb0ef41Sopenharmony_ci    }
22631cb0ef41Sopenharmony_ci    case AT_START: {
22641cb0ef41Sopenharmony_ci      if (trace->at_start() == Trace::FALSE_VALUE) {
22651cb0ef41Sopenharmony_ci        assembler->GoTo(trace->backtrack());
22661cb0ef41Sopenharmony_ci        return;
22671cb0ef41Sopenharmony_ci      }
22681cb0ef41Sopenharmony_ci      if (trace->at_start() == Trace::UNKNOWN) {
22691cb0ef41Sopenharmony_ci        assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
22701cb0ef41Sopenharmony_ci        Trace at_start_trace = *trace;
22711cb0ef41Sopenharmony_ci        at_start_trace.set_at_start(Trace::TRUE_VALUE);
22721cb0ef41Sopenharmony_ci        on_success()->Emit(compiler, &at_start_trace);
22731cb0ef41Sopenharmony_ci        return;
22741cb0ef41Sopenharmony_ci      }
22751cb0ef41Sopenharmony_ci    } break;
22761cb0ef41Sopenharmony_ci    case AFTER_NEWLINE:
22771cb0ef41Sopenharmony_ci      EmitHat(compiler, on_success(), trace);
22781cb0ef41Sopenharmony_ci      return;
22791cb0ef41Sopenharmony_ci    case AT_BOUNDARY:
22801cb0ef41Sopenharmony_ci    case AT_NON_BOUNDARY: {
22811cb0ef41Sopenharmony_ci      EmitBoundaryCheck(compiler, trace);
22821cb0ef41Sopenharmony_ci      return;
22831cb0ef41Sopenharmony_ci    }
22841cb0ef41Sopenharmony_ci  }
22851cb0ef41Sopenharmony_ci  on_success()->Emit(compiler, trace);
22861cb0ef41Sopenharmony_ci}
22871cb0ef41Sopenharmony_ci
22881cb0ef41Sopenharmony_cinamespace {
22891cb0ef41Sopenharmony_ci
22901cb0ef41Sopenharmony_cibool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {
22911cb0ef41Sopenharmony_ci  if (quick_check == nullptr) return false;
22921cb0ef41Sopenharmony_ci  if (offset >= quick_check->characters()) return false;
22931cb0ef41Sopenharmony_ci  return quick_check->positions(offset)->determines_perfectly;
22941cb0ef41Sopenharmony_ci}
22951cb0ef41Sopenharmony_ci
22961cb0ef41Sopenharmony_civoid UpdateBoundsCheck(int index, int* checked_up_to) {
22971cb0ef41Sopenharmony_ci  if (index > *checked_up_to) {
22981cb0ef41Sopenharmony_ci    *checked_up_to = index;
22991cb0ef41Sopenharmony_ci  }
23001cb0ef41Sopenharmony_ci}
23011cb0ef41Sopenharmony_ci
23021cb0ef41Sopenharmony_ci}  // namespace
23031cb0ef41Sopenharmony_ci
23041cb0ef41Sopenharmony_ci// We call this repeatedly to generate code for each pass over the text node.
23051cb0ef41Sopenharmony_ci// The passes are in increasing order of difficulty because we hope one
23061cb0ef41Sopenharmony_ci// of the first passes will fail in which case we are saved the work of the
23071cb0ef41Sopenharmony_ci// later passes.  for example for the case independent regexp /%[asdfghjkl]a/
23081cb0ef41Sopenharmony_ci// we will check the '%' in the first pass, the case independent 'a' in the
23091cb0ef41Sopenharmony_ci// second pass and the character class in the last pass.
23101cb0ef41Sopenharmony_ci//
23111cb0ef41Sopenharmony_ci// The passes are done from right to left, so for example to test for /bar/
23121cb0ef41Sopenharmony_ci// we will first test for an 'r' with offset 2, then an 'a' with offset 1
23131cb0ef41Sopenharmony_ci// and then a 'b' with offset 0.  This means we can avoid the end-of-input
23141cb0ef41Sopenharmony_ci// bounds check most of the time.  In the example we only need to check for
23151cb0ef41Sopenharmony_ci// end-of-input when loading the putative 'r'.
23161cb0ef41Sopenharmony_ci//
23171cb0ef41Sopenharmony_ci// A slight complication involves the fact that the first character may already
23181cb0ef41Sopenharmony_ci// be fetched into a register by the previous node.  In this case we want to
23191cb0ef41Sopenharmony_ci// do the test for that character first.  We do this in separate passes.  The
23201cb0ef41Sopenharmony_ci// 'preloaded' argument indicates that we are doing such a 'pass'.  If such a
23211cb0ef41Sopenharmony_ci// pass has been performed then subsequent passes will have true in
23221cb0ef41Sopenharmony_ci// first_element_checked to indicate that that character does not need to be
23231cb0ef41Sopenharmony_ci// checked again.
23241cb0ef41Sopenharmony_ci//
23251cb0ef41Sopenharmony_ci// In addition to all this we are passed a Trace, which can
23261cb0ef41Sopenharmony_ci// contain an AlternativeGeneration object.  In this AlternativeGeneration
23271cb0ef41Sopenharmony_ci// object we can see details of any quick check that was already passed in
23281cb0ef41Sopenharmony_ci// order to get to the code we are now generating.  The quick check can involve
23291cb0ef41Sopenharmony_ci// loading characters, which means we do not need to recheck the bounds
23301cb0ef41Sopenharmony_ci// up to the limit the quick check already checked.  In addition the quick
23311cb0ef41Sopenharmony_ci// check can have involved a mask and compare operation which may simplify
23321cb0ef41Sopenharmony_ci// or obviate the need for further checks at some character positions.
23331cb0ef41Sopenharmony_civoid TextNode::TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass,
23341cb0ef41Sopenharmony_ci                            bool preloaded, Trace* trace,
23351cb0ef41Sopenharmony_ci                            bool first_element_checked, int* checked_up_to) {
23361cb0ef41Sopenharmony_ci  RegExpMacroAssembler* assembler = compiler->macro_assembler();
23371cb0ef41Sopenharmony_ci  Isolate* isolate = assembler->isolate();
23381cb0ef41Sopenharmony_ci  bool one_byte = compiler->one_byte();
23391cb0ef41Sopenharmony_ci  Label* backtrack = trace->backtrack();
23401cb0ef41Sopenharmony_ci  QuickCheckDetails* quick_check = trace->quick_check_performed();
23411cb0ef41Sopenharmony_ci  int element_count = elements()->length();
23421cb0ef41Sopenharmony_ci  int backward_offset = read_backward() ? -Length() : 0;
23431cb0ef41Sopenharmony_ci  for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
23441cb0ef41Sopenharmony_ci    TextElement elm = elements()->at(i);
23451cb0ef41Sopenharmony_ci    int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
23461cb0ef41Sopenharmony_ci    if (elm.text_type() == TextElement::ATOM) {
23471cb0ef41Sopenharmony_ci      if (SkipPass(pass, IsIgnoreCase(compiler->flags()))) continue;
23481cb0ef41Sopenharmony_ci      base::Vector<const base::uc16> quarks = elm.atom()->data();
23491cb0ef41Sopenharmony_ci      for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
23501cb0ef41Sopenharmony_ci        if (first_element_checked && i == 0 && j == 0) continue;
23511cb0ef41Sopenharmony_ci        if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
23521cb0ef41Sopenharmony_ci        base::uc16 quark = quarks[j];
23531cb0ef41Sopenharmony_ci        if (IsIgnoreCase(compiler->flags())) {
23541cb0ef41Sopenharmony_ci          // Everywhere else we assume that a non-Latin-1 character cannot match
23551cb0ef41Sopenharmony_ci          // a Latin-1 character. Avoid the cases where this is assumption is
23561cb0ef41Sopenharmony_ci          // invalid by using the Latin1 equivalent instead.
23571cb0ef41Sopenharmony_ci          quark = unibrow::Latin1::TryConvertToLatin1(quark);
23581cb0ef41Sopenharmony_ci        }
23591cb0ef41Sopenharmony_ci        bool needs_bounds_check =
23601cb0ef41Sopenharmony_ci            *checked_up_to < cp_offset + j || read_backward();
23611cb0ef41Sopenharmony_ci        bool bounds_checked = false;
23621cb0ef41Sopenharmony_ci        switch (pass) {
23631cb0ef41Sopenharmony_ci          case NON_LATIN1_MATCH:
23641cb0ef41Sopenharmony_ci            DCHECK(one_byte);
23651cb0ef41Sopenharmony_ci            if (quark > String::kMaxOneByteCharCode) {
23661cb0ef41Sopenharmony_ci              assembler->GoTo(backtrack);
23671cb0ef41Sopenharmony_ci              return;
23681cb0ef41Sopenharmony_ci            }
23691cb0ef41Sopenharmony_ci            break;
23701cb0ef41Sopenharmony_ci          case NON_LETTER_CHARACTER_MATCH:
23711cb0ef41Sopenharmony_ci            bounds_checked =
23721cb0ef41Sopenharmony_ci                EmitAtomNonLetter(isolate, compiler, quark, backtrack,
23731cb0ef41Sopenharmony_ci                                  cp_offset + j, needs_bounds_check, preloaded);
23741cb0ef41Sopenharmony_ci            break;
23751cb0ef41Sopenharmony_ci          case SIMPLE_CHARACTER_MATCH:
23761cb0ef41Sopenharmony_ci            bounds_checked = EmitSimpleCharacter(isolate, compiler, quark,
23771cb0ef41Sopenharmony_ci                                                 backtrack, cp_offset + j,
23781cb0ef41Sopenharmony_ci                                                 needs_bounds_check, preloaded);
23791cb0ef41Sopenharmony_ci            break;
23801cb0ef41Sopenharmony_ci          case CASE_CHARACTER_MATCH:
23811cb0ef41Sopenharmony_ci            bounds_checked =
23821cb0ef41Sopenharmony_ci                EmitAtomLetter(isolate, compiler, quark, backtrack,
23831cb0ef41Sopenharmony_ci                               cp_offset + j, needs_bounds_check, preloaded);
23841cb0ef41Sopenharmony_ci            break;
23851cb0ef41Sopenharmony_ci          default:
23861cb0ef41Sopenharmony_ci            break;
23871cb0ef41Sopenharmony_ci        }
23881cb0ef41Sopenharmony_ci        if (bounds_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
23891cb0ef41Sopenharmony_ci      }
23901cb0ef41Sopenharmony_ci    } else {
23911cb0ef41Sopenharmony_ci      DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type());
23921cb0ef41Sopenharmony_ci      if (pass == CHARACTER_CLASS_MATCH) {
23931cb0ef41Sopenharmony_ci        if (first_element_checked && i == 0) continue;
23941cb0ef41Sopenharmony_ci        if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
23951cb0ef41Sopenharmony_ci        RegExpCharacterClass* cc = elm.char_class();
23961cb0ef41Sopenharmony_ci        bool bounds_check = *checked_up_to < cp_offset || read_backward();
23971cb0ef41Sopenharmony_ci        EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
23981cb0ef41Sopenharmony_ci                      bounds_check, preloaded, zone());
23991cb0ef41Sopenharmony_ci        UpdateBoundsCheck(cp_offset, checked_up_to);
24001cb0ef41Sopenharmony_ci      }
24011cb0ef41Sopenharmony_ci    }
24021cb0ef41Sopenharmony_ci  }
24031cb0ef41Sopenharmony_ci}
24041cb0ef41Sopenharmony_ci
24051cb0ef41Sopenharmony_ciint TextNode::Length() {
24061cb0ef41Sopenharmony_ci  TextElement elm = elements()->last();
24071cb0ef41Sopenharmony_ci  DCHECK_LE(0, elm.cp_offset());
24081cb0ef41Sopenharmony_ci  return elm.cp_offset() + elm.length();
24091cb0ef41Sopenharmony_ci}
24101cb0ef41Sopenharmony_ci
24111cb0ef41Sopenharmony_cibool TextNode::SkipPass(TextEmitPassType pass, bool ignore_case) {
24121cb0ef41Sopenharmony_ci  if (ignore_case) {
24131cb0ef41Sopenharmony_ci    return pass == SIMPLE_CHARACTER_MATCH;
24141cb0ef41Sopenharmony_ci  } else {
24151cb0ef41Sopenharmony_ci    return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;
24161cb0ef41Sopenharmony_ci  }
24171cb0ef41Sopenharmony_ci}
24181cb0ef41Sopenharmony_ci
24191cb0ef41Sopenharmony_ciTextNode* TextNode::CreateForCharacterRanges(Zone* zone,
24201cb0ef41Sopenharmony_ci                                             ZoneList<CharacterRange>* ranges,
24211cb0ef41Sopenharmony_ci                                             bool read_backward,
24221cb0ef41Sopenharmony_ci                                             RegExpNode* on_success) {
24231cb0ef41Sopenharmony_ci  DCHECK_NOT_NULL(ranges);
24241cb0ef41Sopenharmony_ci  // TODO(jgruber): There's no fundamental need to create this
24251cb0ef41Sopenharmony_ci  // RegExpCharacterClass; we could refactor to avoid the allocation.
24261cb0ef41Sopenharmony_ci  return zone->New<TextNode>(zone->New<RegExpCharacterClass>(zone, ranges),
24271cb0ef41Sopenharmony_ci                             read_backward, on_success);
24281cb0ef41Sopenharmony_ci}
24291cb0ef41Sopenharmony_ci
24301cb0ef41Sopenharmony_ciTextNode* TextNode::CreateForSurrogatePair(
24311cb0ef41Sopenharmony_ci    Zone* zone, CharacterRange lead, ZoneList<CharacterRange>* trail_ranges,
24321cb0ef41Sopenharmony_ci    bool read_backward, RegExpNode* on_success) {
24331cb0ef41Sopenharmony_ci  ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
24341cb0ef41Sopenharmony_ci  ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(2, zone);
24351cb0ef41Sopenharmony_ci  elms->Add(TextElement::CharClass(
24361cb0ef41Sopenharmony_ci                zone->New<RegExpCharacterClass>(zone, lead_ranges)),
24371cb0ef41Sopenharmony_ci            zone);
24381cb0ef41Sopenharmony_ci  elms->Add(TextElement::CharClass(
24391cb0ef41Sopenharmony_ci                zone->New<RegExpCharacterClass>(zone, trail_ranges)),
24401cb0ef41Sopenharmony_ci            zone);
24411cb0ef41Sopenharmony_ci  return zone->New<TextNode>(elms, read_backward, on_success);
24421cb0ef41Sopenharmony_ci}
24431cb0ef41Sopenharmony_ci
24441cb0ef41Sopenharmony_ciTextNode* TextNode::CreateForSurrogatePair(
24451cb0ef41Sopenharmony_ci    Zone* zone, ZoneList<CharacterRange>* lead_ranges, CharacterRange trail,
24461cb0ef41Sopenharmony_ci    bool read_backward, RegExpNode* on_success) {
24471cb0ef41Sopenharmony_ci  ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
24481cb0ef41Sopenharmony_ci  ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(2, zone);
24491cb0ef41Sopenharmony_ci  elms->Add(TextElement::CharClass(
24501cb0ef41Sopenharmony_ci                zone->New<RegExpCharacterClass>(zone, lead_ranges)),
24511cb0ef41Sopenharmony_ci            zone);
24521cb0ef41Sopenharmony_ci  elms->Add(TextElement::CharClass(
24531cb0ef41Sopenharmony_ci                zone->New<RegExpCharacterClass>(zone, trail_ranges)),
24541cb0ef41Sopenharmony_ci            zone);
24551cb0ef41Sopenharmony_ci  return zone->New<TextNode>(elms, read_backward, on_success);
24561cb0ef41Sopenharmony_ci}
24571cb0ef41Sopenharmony_ci
24581cb0ef41Sopenharmony_ci// This generates the code to match a text node.  A text node can contain
24591cb0ef41Sopenharmony_ci// straight character sequences (possibly to be matched in a case-independent
24601cb0ef41Sopenharmony_ci// way) and character classes.  For efficiency we do not do this in a single
24611cb0ef41Sopenharmony_ci// pass from left to right.  Instead we pass over the text node several times,
24621cb0ef41Sopenharmony_ci// emitting code for some character positions every time.  See the comment on
24631cb0ef41Sopenharmony_ci// TextEmitPass for details.
24641cb0ef41Sopenharmony_civoid TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
24651cb0ef41Sopenharmony_ci  LimitResult limit_result = LimitVersions(compiler, trace);
24661cb0ef41Sopenharmony_ci  if (limit_result == DONE) return;
24671cb0ef41Sopenharmony_ci  DCHECK(limit_result == CONTINUE);
24681cb0ef41Sopenharmony_ci
24691cb0ef41Sopenharmony_ci  if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
24701cb0ef41Sopenharmony_ci    compiler->SetRegExpTooBig();
24711cb0ef41Sopenharmony_ci    return;
24721cb0ef41Sopenharmony_ci  }
24731cb0ef41Sopenharmony_ci
24741cb0ef41Sopenharmony_ci  if (compiler->one_byte()) {
24751cb0ef41Sopenharmony_ci    int dummy = 0;
24761cb0ef41Sopenharmony_ci    TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
24771cb0ef41Sopenharmony_ci  }
24781cb0ef41Sopenharmony_ci
24791cb0ef41Sopenharmony_ci  bool first_elt_done = false;
24801cb0ef41Sopenharmony_ci  int bound_checked_to = trace->cp_offset() - 1;
24811cb0ef41Sopenharmony_ci  bound_checked_to += trace->bound_checked_up_to();
24821cb0ef41Sopenharmony_ci
24831cb0ef41Sopenharmony_ci  // If a character is preloaded into the current character register then
24841cb0ef41Sopenharmony_ci  // check that now.
24851cb0ef41Sopenharmony_ci  if (trace->characters_preloaded() == 1) {
24861cb0ef41Sopenharmony_ci    for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
24871cb0ef41Sopenharmony_ci      TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), true, trace,
24881cb0ef41Sopenharmony_ci                   false, &bound_checked_to);
24891cb0ef41Sopenharmony_ci    }
24901cb0ef41Sopenharmony_ci    first_elt_done = true;
24911cb0ef41Sopenharmony_ci  }
24921cb0ef41Sopenharmony_ci
24931cb0ef41Sopenharmony_ci  for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
24941cb0ef41Sopenharmony_ci    TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), false, trace,
24951cb0ef41Sopenharmony_ci                 first_elt_done, &bound_checked_to);
24961cb0ef41Sopenharmony_ci  }
24971cb0ef41Sopenharmony_ci
24981cb0ef41Sopenharmony_ci  Trace successor_trace(*trace);
24991cb0ef41Sopenharmony_ci  // If we advance backward, we may end up at the start.
25001cb0ef41Sopenharmony_ci  successor_trace.AdvanceCurrentPositionInTrace(
25011cb0ef41Sopenharmony_ci      read_backward() ? -Length() : Length(), compiler);
25021cb0ef41Sopenharmony_ci  successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
25031cb0ef41Sopenharmony_ci                                               : Trace::FALSE_VALUE);
25041cb0ef41Sopenharmony_ci  RecursionCheck rc(compiler);
25051cb0ef41Sopenharmony_ci  on_success()->Emit(compiler, &successor_trace);
25061cb0ef41Sopenharmony_ci}
25071cb0ef41Sopenharmony_ci
25081cb0ef41Sopenharmony_civoid Trace::InvalidateCurrentCharacter() { characters_preloaded_ = 0; }
25091cb0ef41Sopenharmony_ci
25101cb0ef41Sopenharmony_civoid Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
25111cb0ef41Sopenharmony_ci  // We don't have an instruction for shifting the current character register
25121cb0ef41Sopenharmony_ci  // down or for using a shifted value for anything so lets just forget that
25131cb0ef41Sopenharmony_ci  // we preloaded any characters into it.
25141cb0ef41Sopenharmony_ci  characters_preloaded_ = 0;
25151cb0ef41Sopenharmony_ci  // Adjust the offsets of the quick check performed information.  This
25161cb0ef41Sopenharmony_ci  // information is used to find out what we already determined about the
25171cb0ef41Sopenharmony_ci  // characters by means of mask and compare.
25181cb0ef41Sopenharmony_ci  quick_check_performed_.Advance(by, compiler->one_byte());
25191cb0ef41Sopenharmony_ci  cp_offset_ += by;
25201cb0ef41Sopenharmony_ci  if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
25211cb0ef41Sopenharmony_ci    compiler->SetRegExpTooBig();
25221cb0ef41Sopenharmony_ci    cp_offset_ = 0;
25231cb0ef41Sopenharmony_ci  }
25241cb0ef41Sopenharmony_ci  bound_checked_up_to_ = std::max(0, bound_checked_up_to_ - by);
25251cb0ef41Sopenharmony_ci}
25261cb0ef41Sopenharmony_ci
25271cb0ef41Sopenharmony_civoid TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte,
25281cb0ef41Sopenharmony_ci                                   RegExpFlags flags) {
25291cb0ef41Sopenharmony_ci  if (!IsIgnoreCase(flags)) return;
25301cb0ef41Sopenharmony_ci#ifdef V8_INTL_SUPPORT
25311cb0ef41Sopenharmony_ci  if (NeedsUnicodeCaseEquivalents(flags)) return;
25321cb0ef41Sopenharmony_ci#endif
25331cb0ef41Sopenharmony_ci
25341cb0ef41Sopenharmony_ci  int element_count = elements()->length();
25351cb0ef41Sopenharmony_ci  for (int i = 0; i < element_count; i++) {
25361cb0ef41Sopenharmony_ci    TextElement elm = elements()->at(i);
25371cb0ef41Sopenharmony_ci    if (elm.text_type() == TextElement::CHAR_CLASS) {
25381cb0ef41Sopenharmony_ci      RegExpCharacterClass* cc = elm.char_class();
25391cb0ef41Sopenharmony_ci      // None of the standard character classes is different in the case
25401cb0ef41Sopenharmony_ci      // independent case and it slows us down if we don't know that.
25411cb0ef41Sopenharmony_ci      if (cc->is_standard(zone())) continue;
25421cb0ef41Sopenharmony_ci      ZoneList<CharacterRange>* ranges = cc->ranges(zone());
25431cb0ef41Sopenharmony_ci      CharacterRange::AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);
25441cb0ef41Sopenharmony_ci    }
25451cb0ef41Sopenharmony_ci  }
25461cb0ef41Sopenharmony_ci}
25471cb0ef41Sopenharmony_ci
25481cb0ef41Sopenharmony_ciint TextNode::GreedyLoopTextLength() { return Length(); }
25491cb0ef41Sopenharmony_ci
25501cb0ef41Sopenharmony_ciRegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
25511cb0ef41Sopenharmony_ci    RegExpCompiler* compiler) {
25521cb0ef41Sopenharmony_ci  if (read_backward()) return nullptr;
25531cb0ef41Sopenharmony_ci  if (elements()->length() != 1) return nullptr;
25541cb0ef41Sopenharmony_ci  TextElement elm = elements()->at(0);
25551cb0ef41Sopenharmony_ci  if (elm.text_type() != TextElement::CHAR_CLASS) return nullptr;
25561cb0ef41Sopenharmony_ci  RegExpCharacterClass* node = elm.char_class();
25571cb0ef41Sopenharmony_ci  ZoneList<CharacterRange>* ranges = node->ranges(zone());
25581cb0ef41Sopenharmony_ci  CharacterRange::Canonicalize(ranges);
25591cb0ef41Sopenharmony_ci  if (node->is_negated()) {
25601cb0ef41Sopenharmony_ci    return ranges->length() == 0 ? on_success() : nullptr;
25611cb0ef41Sopenharmony_ci  }
25621cb0ef41Sopenharmony_ci  if (ranges->length() != 1) return nullptr;
25631cb0ef41Sopenharmony_ci  const base::uc32 max_char = MaxCodeUnit(compiler->one_byte());
25641cb0ef41Sopenharmony_ci  return ranges->at(0).IsEverything(max_char) ? on_success() : nullptr;
25651cb0ef41Sopenharmony_ci}
25661cb0ef41Sopenharmony_ci
25671cb0ef41Sopenharmony_ci// Finds the fixed match length of a sequence of nodes that goes from
25681cb0ef41Sopenharmony_ci// this alternative and back to this choice node.  If there are variable
25691cb0ef41Sopenharmony_ci// length nodes or other complications in the way then return a sentinel
25701cb0ef41Sopenharmony_ci// value indicating that a greedy loop cannot be constructed.
25711cb0ef41Sopenharmony_ciint ChoiceNode::GreedyLoopTextLengthForAlternative(
25721cb0ef41Sopenharmony_ci    GuardedAlternative* alternative) {
25731cb0ef41Sopenharmony_ci  int length = 0;
25741cb0ef41Sopenharmony_ci  RegExpNode* node = alternative->node();
25751cb0ef41Sopenharmony_ci  // Later we will generate code for all these text nodes using recursion
25761cb0ef41Sopenharmony_ci  // so we have to limit the max number.
25771cb0ef41Sopenharmony_ci  int recursion_depth = 0;
25781cb0ef41Sopenharmony_ci  while (node != this) {
25791cb0ef41Sopenharmony_ci    if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
25801cb0ef41Sopenharmony_ci      return kNodeIsTooComplexForGreedyLoops;
25811cb0ef41Sopenharmony_ci    }
25821cb0ef41Sopenharmony_ci    int node_length = node->GreedyLoopTextLength();
25831cb0ef41Sopenharmony_ci    if (node_length == kNodeIsTooComplexForGreedyLoops) {
25841cb0ef41Sopenharmony_ci      return kNodeIsTooComplexForGreedyLoops;
25851cb0ef41Sopenharmony_ci    }
25861cb0ef41Sopenharmony_ci    length += node_length;
25871cb0ef41Sopenharmony_ci    SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
25881cb0ef41Sopenharmony_ci    node = seq_node->on_success();
25891cb0ef41Sopenharmony_ci  }
25901cb0ef41Sopenharmony_ci  if (read_backward()) {
25911cb0ef41Sopenharmony_ci    length = -length;
25921cb0ef41Sopenharmony_ci  }
25931cb0ef41Sopenharmony_ci  // Check that we can jump by the whole text length. If not, return sentinel
25941cb0ef41Sopenharmony_ci  // to indicate the we can't construct a greedy loop.
25951cb0ef41Sopenharmony_ci  if (length < RegExpMacroAssembler::kMinCPOffset ||
25961cb0ef41Sopenharmony_ci      length > RegExpMacroAssembler::kMaxCPOffset) {
25971cb0ef41Sopenharmony_ci    return kNodeIsTooComplexForGreedyLoops;
25981cb0ef41Sopenharmony_ci  }
25991cb0ef41Sopenharmony_ci  return length;
26001cb0ef41Sopenharmony_ci}
26011cb0ef41Sopenharmony_ci
26021cb0ef41Sopenharmony_civoid LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {
26031cb0ef41Sopenharmony_ci  DCHECK_NULL(loop_node_);
26041cb0ef41Sopenharmony_ci  AddAlternative(alt);
26051cb0ef41Sopenharmony_ci  loop_node_ = alt.node();
26061cb0ef41Sopenharmony_ci}
26071cb0ef41Sopenharmony_ci
26081cb0ef41Sopenharmony_civoid LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
26091cb0ef41Sopenharmony_ci  DCHECK_NULL(continue_node_);
26101cb0ef41Sopenharmony_ci  AddAlternative(alt);
26111cb0ef41Sopenharmony_ci  continue_node_ = alt.node();
26121cb0ef41Sopenharmony_ci}
26131cb0ef41Sopenharmony_ci
26141cb0ef41Sopenharmony_civoid LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
26151cb0ef41Sopenharmony_ci  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
26161cb0ef41Sopenharmony_ci  if (trace->stop_node() == this) {
26171cb0ef41Sopenharmony_ci    // Back edge of greedy optimized loop node graph.
26181cb0ef41Sopenharmony_ci    int text_length =
26191cb0ef41Sopenharmony_ci        GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
26201cb0ef41Sopenharmony_ci    DCHECK_NE(kNodeIsTooComplexForGreedyLoops, text_length);
26211cb0ef41Sopenharmony_ci    // Update the counter-based backtracking info on the stack.  This is an
26221cb0ef41Sopenharmony_ci    // optimization for greedy loops (see below).
26231cb0ef41Sopenharmony_ci    DCHECK(trace->cp_offset() == text_length);
26241cb0ef41Sopenharmony_ci    macro_assembler->AdvanceCurrentPosition(text_length);
26251cb0ef41Sopenharmony_ci    macro_assembler->GoTo(trace->loop_label());
26261cb0ef41Sopenharmony_ci    return;
26271cb0ef41Sopenharmony_ci  }
26281cb0ef41Sopenharmony_ci  DCHECK_NULL(trace->stop_node());
26291cb0ef41Sopenharmony_ci  if (!trace->is_trivial()) {
26301cb0ef41Sopenharmony_ci    trace->Flush(compiler, this);
26311cb0ef41Sopenharmony_ci    return;
26321cb0ef41Sopenharmony_ci  }
26331cb0ef41Sopenharmony_ci  ChoiceNode::Emit(compiler, trace);
26341cb0ef41Sopenharmony_ci}
26351cb0ef41Sopenharmony_ci
26361cb0ef41Sopenharmony_ciint ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
26371cb0ef41Sopenharmony_ci                                           int eats_at_least) {
26381cb0ef41Sopenharmony_ci  int preload_characters = std::min(4, eats_at_least);
26391cb0ef41Sopenharmony_ci  DCHECK_LE(preload_characters, 4);
26401cb0ef41Sopenharmony_ci  if (compiler->macro_assembler()->CanReadUnaligned()) {
26411cb0ef41Sopenharmony_ci    bool one_byte = compiler->one_byte();
26421cb0ef41Sopenharmony_ci    if (one_byte) {
26431cb0ef41Sopenharmony_ci      // We can't preload 3 characters because there is no machine instruction
26441cb0ef41Sopenharmony_ci      // to do that.  We can't just load 4 because we could be reading
26451cb0ef41Sopenharmony_ci      // beyond the end of the string, which could cause a memory fault.
26461cb0ef41Sopenharmony_ci      if (preload_characters == 3) preload_characters = 2;
26471cb0ef41Sopenharmony_ci    } else {
26481cb0ef41Sopenharmony_ci      if (preload_characters > 2) preload_characters = 2;
26491cb0ef41Sopenharmony_ci    }
26501cb0ef41Sopenharmony_ci  } else {
26511cb0ef41Sopenharmony_ci    if (preload_characters > 1) preload_characters = 1;
26521cb0ef41Sopenharmony_ci  }
26531cb0ef41Sopenharmony_ci  return preload_characters;
26541cb0ef41Sopenharmony_ci}
26551cb0ef41Sopenharmony_ci
26561cb0ef41Sopenharmony_ci// This class is used when generating the alternatives in a choice node.  It
26571cb0ef41Sopenharmony_ci// records the way the alternative is being code generated.
26581cb0ef41Sopenharmony_ciclass AlternativeGeneration : public Malloced {
26591cb0ef41Sopenharmony_ci public:
26601cb0ef41Sopenharmony_ci  AlternativeGeneration()
26611cb0ef41Sopenharmony_ci      : possible_success(),
26621cb0ef41Sopenharmony_ci        expects_preload(false),
26631cb0ef41Sopenharmony_ci        after(),
26641cb0ef41Sopenharmony_ci        quick_check_details() {}
26651cb0ef41Sopenharmony_ci  Label possible_success;
26661cb0ef41Sopenharmony_ci  bool expects_preload;
26671cb0ef41Sopenharmony_ci  Label after;
26681cb0ef41Sopenharmony_ci  QuickCheckDetails quick_check_details;
26691cb0ef41Sopenharmony_ci};
26701cb0ef41Sopenharmony_ci
26711cb0ef41Sopenharmony_ci// Creates a list of AlternativeGenerations.  If the list has a reasonable
26721cb0ef41Sopenharmony_ci// size then it is on the stack, otherwise the excess is on the heap.
26731cb0ef41Sopenharmony_ciclass AlternativeGenerationList {
26741cb0ef41Sopenharmony_ci public:
26751cb0ef41Sopenharmony_ci  AlternativeGenerationList(int count, Zone* zone) : alt_gens_(count, zone) {
26761cb0ef41Sopenharmony_ci    for (int i = 0; i < count && i < kAFew; i++) {
26771cb0ef41Sopenharmony_ci      alt_gens_.Add(a_few_alt_gens_ + i, zone);
26781cb0ef41Sopenharmony_ci    }
26791cb0ef41Sopenharmony_ci    for (int i = kAFew; i < count; i++) {
26801cb0ef41Sopenharmony_ci      alt_gens_.Add(new AlternativeGeneration(), zone);
26811cb0ef41Sopenharmony_ci    }
26821cb0ef41Sopenharmony_ci  }
26831cb0ef41Sopenharmony_ci  ~AlternativeGenerationList() {
26841cb0ef41Sopenharmony_ci    for (int i = kAFew; i < alt_gens_.length(); i++) {
26851cb0ef41Sopenharmony_ci      delete alt_gens_[i];
26861cb0ef41Sopenharmony_ci      alt_gens_[i] = nullptr;
26871cb0ef41Sopenharmony_ci    }
26881cb0ef41Sopenharmony_ci  }
26891cb0ef41Sopenharmony_ci
26901cb0ef41Sopenharmony_ci  AlternativeGeneration* at(int i) { return alt_gens_[i]; }
26911cb0ef41Sopenharmony_ci
26921cb0ef41Sopenharmony_ci private:
26931cb0ef41Sopenharmony_ci  static const int kAFew = 10;
26941cb0ef41Sopenharmony_ci  ZoneList<AlternativeGeneration*> alt_gens_;
26951cb0ef41Sopenharmony_ci  AlternativeGeneration a_few_alt_gens_[kAFew];
26961cb0ef41Sopenharmony_ci};
26971cb0ef41Sopenharmony_ci
26981cb0ef41Sopenharmony_civoid BoyerMoorePositionInfo::Set(int character) {
26991cb0ef41Sopenharmony_ci  SetInterval(Interval(character, character));
27001cb0ef41Sopenharmony_ci}
27011cb0ef41Sopenharmony_ci
27021cb0ef41Sopenharmony_cinamespace {
27031cb0ef41Sopenharmony_ci
27041cb0ef41Sopenharmony_ciContainedInLattice AddRange(ContainedInLattice containment, const int* ranges,
27051cb0ef41Sopenharmony_ci                            int ranges_length, Interval new_range) {
27061cb0ef41Sopenharmony_ci  DCHECK_EQ(1, ranges_length & 1);
27071cb0ef41Sopenharmony_ci  DCHECK_EQ(String::kMaxCodePoint + 1, ranges[ranges_length - 1]);
27081cb0ef41Sopenharmony_ci  if (containment == kLatticeUnknown) return containment;
27091cb0ef41Sopenharmony_ci  bool inside = false;
27101cb0ef41Sopenharmony_ci  int last = 0;
27111cb0ef41Sopenharmony_ci  for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {
27121cb0ef41Sopenharmony_ci    // Consider the range from last to ranges[i].
27131cb0ef41Sopenharmony_ci    // We haven't got to the new range yet.
27141cb0ef41Sopenharmony_ci    if (ranges[i] <= new_range.from()) continue;
27151cb0ef41Sopenharmony_ci    // New range is wholly inside last-ranges[i].  Note that new_range.to() is
27161cb0ef41Sopenharmony_ci    // inclusive, but the values in ranges are not.
27171cb0ef41Sopenharmony_ci    if (last <= new_range.from() && new_range.to() < ranges[i]) {
27181cb0ef41Sopenharmony_ci      return Combine(containment, inside ? kLatticeIn : kLatticeOut);
27191cb0ef41Sopenharmony_ci    }
27201cb0ef41Sopenharmony_ci    return kLatticeUnknown;
27211cb0ef41Sopenharmony_ci  }
27221cb0ef41Sopenharmony_ci  return containment;
27231cb0ef41Sopenharmony_ci}
27241cb0ef41Sopenharmony_ci
27251cb0ef41Sopenharmony_ciint BitsetFirstSetBit(BoyerMoorePositionInfo::Bitset bitset) {
27261cb0ef41Sopenharmony_ci  STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
27271cb0ef41Sopenharmony_ci                2 * kInt64Size * kBitsPerByte);
27281cb0ef41Sopenharmony_ci
27291cb0ef41Sopenharmony_ci  // Slight fiddling is needed here, since the bitset is of length 128 while
27301cb0ef41Sopenharmony_ci  // CountTrailingZeros requires an integral type and std::bitset can only
27311cb0ef41Sopenharmony_ci  // convert to unsigned long long. So we handle the most- and least-significant
27321cb0ef41Sopenharmony_ci  // bits separately.
27331cb0ef41Sopenharmony_ci
27341cb0ef41Sopenharmony_ci  {
27351cb0ef41Sopenharmony_ci    static constexpr BoyerMoorePositionInfo::Bitset mask(~uint64_t{0});
27361cb0ef41Sopenharmony_ci    BoyerMoorePositionInfo::Bitset masked_bitset = bitset & mask;
27371cb0ef41Sopenharmony_ci    STATIC_ASSERT(kInt64Size >= sizeof(decltype(masked_bitset.to_ullong())));
27381cb0ef41Sopenharmony_ci    uint64_t lsb = masked_bitset.to_ullong();
27391cb0ef41Sopenharmony_ci    if (lsb != 0) return base::bits::CountTrailingZeros(lsb);
27401cb0ef41Sopenharmony_ci  }
27411cb0ef41Sopenharmony_ci
27421cb0ef41Sopenharmony_ci  {
27431cb0ef41Sopenharmony_ci    BoyerMoorePositionInfo::Bitset masked_bitset = bitset >> 64;
27441cb0ef41Sopenharmony_ci    uint64_t msb = masked_bitset.to_ullong();
27451cb0ef41Sopenharmony_ci    if (msb != 0) return 64 + base::bits::CountTrailingZeros(msb);
27461cb0ef41Sopenharmony_ci  }
27471cb0ef41Sopenharmony_ci
27481cb0ef41Sopenharmony_ci  return -1;
27491cb0ef41Sopenharmony_ci}
27501cb0ef41Sopenharmony_ci
27511cb0ef41Sopenharmony_ci}  // namespace
27521cb0ef41Sopenharmony_ci
27531cb0ef41Sopenharmony_civoid BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
27541cb0ef41Sopenharmony_ci  w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
27551cb0ef41Sopenharmony_ci
27561cb0ef41Sopenharmony_ci  if (interval.size() >= kMapSize) {
27571cb0ef41Sopenharmony_ci    map_count_ = kMapSize;
27581cb0ef41Sopenharmony_ci    map_.set();
27591cb0ef41Sopenharmony_ci    return;
27601cb0ef41Sopenharmony_ci  }
27611cb0ef41Sopenharmony_ci
27621cb0ef41Sopenharmony_ci  for (int i = interval.from(); i <= interval.to(); i++) {
27631cb0ef41Sopenharmony_ci    int mod_character = (i & kMask);
27641cb0ef41Sopenharmony_ci    if (!map_[mod_character]) {
27651cb0ef41Sopenharmony_ci      map_count_++;
27661cb0ef41Sopenharmony_ci      map_.set(mod_character);
27671cb0ef41Sopenharmony_ci    }
27681cb0ef41Sopenharmony_ci    if (map_count_ == kMapSize) return;
27691cb0ef41Sopenharmony_ci  }
27701cb0ef41Sopenharmony_ci}
27711cb0ef41Sopenharmony_ci
27721cb0ef41Sopenharmony_civoid BoyerMoorePositionInfo::SetAll() {
27731cb0ef41Sopenharmony_ci  w_ = kLatticeUnknown;
27741cb0ef41Sopenharmony_ci  if (map_count_ != kMapSize) {
27751cb0ef41Sopenharmony_ci    map_count_ = kMapSize;
27761cb0ef41Sopenharmony_ci    map_.set();
27771cb0ef41Sopenharmony_ci  }
27781cb0ef41Sopenharmony_ci}
27791cb0ef41Sopenharmony_ci
27801cb0ef41Sopenharmony_ciBoyerMooreLookahead::BoyerMooreLookahead(int length, RegExpCompiler* compiler,
27811cb0ef41Sopenharmony_ci                                         Zone* zone)
27821cb0ef41Sopenharmony_ci    : length_(length),
27831cb0ef41Sopenharmony_ci      compiler_(compiler),
27841cb0ef41Sopenharmony_ci      max_char_(MaxCodeUnit(compiler->one_byte())) {
27851cb0ef41Sopenharmony_ci  bitmaps_ = zone->New<ZoneList<BoyerMoorePositionInfo*>>(length, zone);
27861cb0ef41Sopenharmony_ci  for (int i = 0; i < length; i++) {
27871cb0ef41Sopenharmony_ci    bitmaps_->Add(zone->New<BoyerMoorePositionInfo>(), zone);
27881cb0ef41Sopenharmony_ci  }
27891cb0ef41Sopenharmony_ci}
27901cb0ef41Sopenharmony_ci
27911cb0ef41Sopenharmony_ci// Find the longest range of lookahead that has the fewest number of different
27921cb0ef41Sopenharmony_ci// characters that can occur at a given position.  Since we are optimizing two
27931cb0ef41Sopenharmony_ci// different parameters at once this is a tradeoff.
27941cb0ef41Sopenharmony_cibool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {
27951cb0ef41Sopenharmony_ci  int biggest_points = 0;
27961cb0ef41Sopenharmony_ci  // If more than 32 characters out of 128 can occur it is unlikely that we can
27971cb0ef41Sopenharmony_ci  // be lucky enough to step forwards much of the time.
27981cb0ef41Sopenharmony_ci  const int kMaxMax = 32;
27991cb0ef41Sopenharmony_ci  for (int max_number_of_chars = 4; max_number_of_chars < kMaxMax;
28001cb0ef41Sopenharmony_ci       max_number_of_chars *= 2) {
28011cb0ef41Sopenharmony_ci    biggest_points =
28021cb0ef41Sopenharmony_ci        FindBestInterval(max_number_of_chars, biggest_points, from, to);
28031cb0ef41Sopenharmony_ci  }
28041cb0ef41Sopenharmony_ci  if (biggest_points == 0) return false;
28051cb0ef41Sopenharmony_ci  return true;
28061cb0ef41Sopenharmony_ci}
28071cb0ef41Sopenharmony_ci
28081cb0ef41Sopenharmony_ci// Find the highest-points range between 0 and length_ where the character
28091cb0ef41Sopenharmony_ci// information is not too vague.  'Too vague' means that there are more than
28101cb0ef41Sopenharmony_ci// max_number_of_chars that can occur at this position.  Calculates the number
28111cb0ef41Sopenharmony_ci// of points as the product of width-of-the-range and
28121cb0ef41Sopenharmony_ci// probability-of-finding-one-of-the-characters, where the probability is
28131cb0ef41Sopenharmony_ci// calculated using the frequency distribution of the sample subject string.
28141cb0ef41Sopenharmony_ciint BoyerMooreLookahead::FindBestInterval(int max_number_of_chars,
28151cb0ef41Sopenharmony_ci                                          int old_biggest_points, int* from,
28161cb0ef41Sopenharmony_ci                                          int* to) {
28171cb0ef41Sopenharmony_ci  int biggest_points = old_biggest_points;
28181cb0ef41Sopenharmony_ci  static const int kSize = RegExpMacroAssembler::kTableSize;
28191cb0ef41Sopenharmony_ci  for (int i = 0; i < length_;) {
28201cb0ef41Sopenharmony_ci    while (i < length_ && Count(i) > max_number_of_chars) i++;
28211cb0ef41Sopenharmony_ci    if (i == length_) break;
28221cb0ef41Sopenharmony_ci    int remembered_from = i;
28231cb0ef41Sopenharmony_ci
28241cb0ef41Sopenharmony_ci    BoyerMoorePositionInfo::Bitset union_bitset;
28251cb0ef41Sopenharmony_ci    for (; i < length_ && Count(i) <= max_number_of_chars; i++) {
28261cb0ef41Sopenharmony_ci      union_bitset |= bitmaps_->at(i)->raw_bitset();
28271cb0ef41Sopenharmony_ci    }
28281cb0ef41Sopenharmony_ci
28291cb0ef41Sopenharmony_ci    int frequency = 0;
28301cb0ef41Sopenharmony_ci
28311cb0ef41Sopenharmony_ci    // Iterate only over set bits.
28321cb0ef41Sopenharmony_ci    int j;
28331cb0ef41Sopenharmony_ci    while ((j = BitsetFirstSetBit(union_bitset)) != -1) {
28341cb0ef41Sopenharmony_ci      DCHECK(union_bitset[j]);  // Sanity check.
28351cb0ef41Sopenharmony_ci      // Add 1 to the frequency to give a small per-character boost for
28361cb0ef41Sopenharmony_ci      // the cases where our sampling is not good enough and many
28371cb0ef41Sopenharmony_ci      // characters have a frequency of zero.  This means the frequency
28381cb0ef41Sopenharmony_ci      // can theoretically be up to 2*kSize though we treat it mostly as
28391cb0ef41Sopenharmony_ci      // a fraction of kSize.
28401cb0ef41Sopenharmony_ci      frequency += compiler_->frequency_collator()->Frequency(j) + 1;
28411cb0ef41Sopenharmony_ci      union_bitset.reset(j);
28421cb0ef41Sopenharmony_ci    }
28431cb0ef41Sopenharmony_ci
28441cb0ef41Sopenharmony_ci    // We use the probability of skipping times the distance we are skipping to
28451cb0ef41Sopenharmony_ci    // judge the effectiveness of this.  Actually we have a cut-off:  By
28461cb0ef41Sopenharmony_ci    // dividing by 2 we switch off the skipping if the probability of skipping
28471cb0ef41Sopenharmony_ci    // is less than 50%.  This is because the multibyte mask-and-compare
28481cb0ef41Sopenharmony_ci    // skipping in quickcheck is more likely to do well on this case.
28491cb0ef41Sopenharmony_ci    bool in_quickcheck_range =
28501cb0ef41Sopenharmony_ci        ((i - remembered_from < 4) ||
28511cb0ef41Sopenharmony_ci         (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));
28521cb0ef41Sopenharmony_ci    // Called 'probability' but it is only a rough estimate and can actually
28531cb0ef41Sopenharmony_ci    // be outside the 0-kSize range.
28541cb0ef41Sopenharmony_ci    int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
28551cb0ef41Sopenharmony_ci    int points = (i - remembered_from) * probability;
28561cb0ef41Sopenharmony_ci    if (points > biggest_points) {
28571cb0ef41Sopenharmony_ci      *from = remembered_from;
28581cb0ef41Sopenharmony_ci      *to = i - 1;
28591cb0ef41Sopenharmony_ci      biggest_points = points;
28601cb0ef41Sopenharmony_ci    }
28611cb0ef41Sopenharmony_ci  }
28621cb0ef41Sopenharmony_ci  return biggest_points;
28631cb0ef41Sopenharmony_ci}
28641cb0ef41Sopenharmony_ci
28651cb0ef41Sopenharmony_ci// Take all the characters that will not prevent a successful match if they
28661cb0ef41Sopenharmony_ci// occur in the subject string in the range between min_lookahead and
28671cb0ef41Sopenharmony_ci// max_lookahead (inclusive) measured from the current position.  If the
28681cb0ef41Sopenharmony_ci// character at max_lookahead offset is not one of these characters, then we
28691cb0ef41Sopenharmony_ci// can safely skip forwards by the number of characters in the range.
28701cb0ef41Sopenharmony_ciint BoyerMooreLookahead::GetSkipTable(int min_lookahead, int max_lookahead,
28711cb0ef41Sopenharmony_ci                                      Handle<ByteArray> boolean_skip_table) {
28721cb0ef41Sopenharmony_ci  const int kSkipArrayEntry = 0;
28731cb0ef41Sopenharmony_ci  const int kDontSkipArrayEntry = 1;
28741cb0ef41Sopenharmony_ci
28751cb0ef41Sopenharmony_ci  std::memset(boolean_skip_table->GetDataStartAddress(), kSkipArrayEntry,
28761cb0ef41Sopenharmony_ci              boolean_skip_table->length());
28771cb0ef41Sopenharmony_ci
28781cb0ef41Sopenharmony_ci  for (int i = max_lookahead; i >= min_lookahead; i--) {
28791cb0ef41Sopenharmony_ci    BoyerMoorePositionInfo::Bitset bitset = bitmaps_->at(i)->raw_bitset();
28801cb0ef41Sopenharmony_ci
28811cb0ef41Sopenharmony_ci    // Iterate only over set bits.
28821cb0ef41Sopenharmony_ci    int j;
28831cb0ef41Sopenharmony_ci    while ((j = BitsetFirstSetBit(bitset)) != -1) {
28841cb0ef41Sopenharmony_ci      DCHECK(bitset[j]);  // Sanity check.
28851cb0ef41Sopenharmony_ci      boolean_skip_table->set(j, kDontSkipArrayEntry);
28861cb0ef41Sopenharmony_ci      bitset.reset(j);
28871cb0ef41Sopenharmony_ci    }
28881cb0ef41Sopenharmony_ci  }
28891cb0ef41Sopenharmony_ci
28901cb0ef41Sopenharmony_ci  const int skip = max_lookahead + 1 - min_lookahead;
28911cb0ef41Sopenharmony_ci  return skip;
28921cb0ef41Sopenharmony_ci}
28931cb0ef41Sopenharmony_ci
28941cb0ef41Sopenharmony_ci// See comment above on the implementation of GetSkipTable.
28951cb0ef41Sopenharmony_civoid BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
28961cb0ef41Sopenharmony_ci  const int kSize = RegExpMacroAssembler::kTableSize;
28971cb0ef41Sopenharmony_ci
28981cb0ef41Sopenharmony_ci  int min_lookahead = 0;
28991cb0ef41Sopenharmony_ci  int max_lookahead = 0;
29001cb0ef41Sopenharmony_ci
29011cb0ef41Sopenharmony_ci  if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return;
29021cb0ef41Sopenharmony_ci
29031cb0ef41Sopenharmony_ci  // Check if we only have a single non-empty position info, and that info
29041cb0ef41Sopenharmony_ci  // contains precisely one character.
29051cb0ef41Sopenharmony_ci  bool found_single_character = false;
29061cb0ef41Sopenharmony_ci  int single_character = 0;
29071cb0ef41Sopenharmony_ci  for (int i = max_lookahead; i >= min_lookahead; i--) {
29081cb0ef41Sopenharmony_ci    BoyerMoorePositionInfo* map = bitmaps_->at(i);
29091cb0ef41Sopenharmony_ci    if (map->map_count() == 0) continue;
29101cb0ef41Sopenharmony_ci
29111cb0ef41Sopenharmony_ci    if (found_single_character || map->map_count() > 1) {
29121cb0ef41Sopenharmony_ci      found_single_character = false;
29131cb0ef41Sopenharmony_ci      break;
29141cb0ef41Sopenharmony_ci    }
29151cb0ef41Sopenharmony_ci
29161cb0ef41Sopenharmony_ci    DCHECK(!found_single_character);
29171cb0ef41Sopenharmony_ci    DCHECK_EQ(map->map_count(), 1);
29181cb0ef41Sopenharmony_ci
29191cb0ef41Sopenharmony_ci    found_single_character = true;
29201cb0ef41Sopenharmony_ci    single_character = BitsetFirstSetBit(map->raw_bitset());
29211cb0ef41Sopenharmony_ci
29221cb0ef41Sopenharmony_ci    DCHECK_NE(single_character, -1);
29231cb0ef41Sopenharmony_ci  }
29241cb0ef41Sopenharmony_ci
29251cb0ef41Sopenharmony_ci  int lookahead_width = max_lookahead + 1 - min_lookahead;
29261cb0ef41Sopenharmony_ci
29271cb0ef41Sopenharmony_ci  if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
29281cb0ef41Sopenharmony_ci    // The mask-compare can probably handle this better.
29291cb0ef41Sopenharmony_ci    return;
29301cb0ef41Sopenharmony_ci  }
29311cb0ef41Sopenharmony_ci
29321cb0ef41Sopenharmony_ci  if (found_single_character) {
29331cb0ef41Sopenharmony_ci    Label cont, again;
29341cb0ef41Sopenharmony_ci    masm->Bind(&again);
29351cb0ef41Sopenharmony_ci    masm->LoadCurrentCharacter(max_lookahead, &cont, true);
29361cb0ef41Sopenharmony_ci    if (max_char_ > kSize) {
29371cb0ef41Sopenharmony_ci      masm->CheckCharacterAfterAnd(single_character,
29381cb0ef41Sopenharmony_ci                                   RegExpMacroAssembler::kTableMask, &cont);
29391cb0ef41Sopenharmony_ci    } else {
29401cb0ef41Sopenharmony_ci      masm->CheckCharacter(single_character, &cont);
29411cb0ef41Sopenharmony_ci    }
29421cb0ef41Sopenharmony_ci    masm->AdvanceCurrentPosition(lookahead_width);
29431cb0ef41Sopenharmony_ci    masm->GoTo(&again);
29441cb0ef41Sopenharmony_ci    masm->Bind(&cont);
29451cb0ef41Sopenharmony_ci    return;
29461cb0ef41Sopenharmony_ci  }
29471cb0ef41Sopenharmony_ci
29481cb0ef41Sopenharmony_ci  Factory* factory = masm->isolate()->factory();
29491cb0ef41Sopenharmony_ci  Handle<ByteArray> boolean_skip_table =
29501cb0ef41Sopenharmony_ci      factory->NewByteArray(kSize, AllocationType::kOld);
29511cb0ef41Sopenharmony_ci  int skip_distance =
29521cb0ef41Sopenharmony_ci      GetSkipTable(min_lookahead, max_lookahead, boolean_skip_table);
29531cb0ef41Sopenharmony_ci  DCHECK_NE(0, skip_distance);
29541cb0ef41Sopenharmony_ci
29551cb0ef41Sopenharmony_ci  Label cont, again;
29561cb0ef41Sopenharmony_ci  masm->Bind(&again);
29571cb0ef41Sopenharmony_ci  masm->LoadCurrentCharacter(max_lookahead, &cont, true);
29581cb0ef41Sopenharmony_ci  masm->CheckBitInTable(boolean_skip_table, &cont);
29591cb0ef41Sopenharmony_ci  masm->AdvanceCurrentPosition(skip_distance);
29601cb0ef41Sopenharmony_ci  masm->GoTo(&again);
29611cb0ef41Sopenharmony_ci  masm->Bind(&cont);
29621cb0ef41Sopenharmony_ci}
29631cb0ef41Sopenharmony_ci
29641cb0ef41Sopenharmony_ci/* Code generation for choice nodes.
29651cb0ef41Sopenharmony_ci *
29661cb0ef41Sopenharmony_ci * We generate quick checks that do a mask and compare to eliminate a
29671cb0ef41Sopenharmony_ci * choice.  If the quick check succeeds then it jumps to the continuation to
29681cb0ef41Sopenharmony_ci * do slow checks and check subsequent nodes.  If it fails (the common case)
29691cb0ef41Sopenharmony_ci * it falls through to the next choice.
29701cb0ef41Sopenharmony_ci *
29711cb0ef41Sopenharmony_ci * Here is the desired flow graph.  Nodes directly below each other imply
29721cb0ef41Sopenharmony_ci * fallthrough.  Alternatives 1 and 2 have quick checks.  Alternative
29731cb0ef41Sopenharmony_ci * 3 doesn't have a quick check so we have to call the slow check.
29741cb0ef41Sopenharmony_ci * Nodes are marked Qn for quick checks and Sn for slow checks.  The entire
29751cb0ef41Sopenharmony_ci * regexp continuation is generated directly after the Sn node, up to the
29761cb0ef41Sopenharmony_ci * next GoTo if we decide to reuse some already generated code.  Some
29771cb0ef41Sopenharmony_ci * nodes expect preload_characters to be preloaded into the current
29781cb0ef41Sopenharmony_ci * character register.  R nodes do this preloading.  Vertices are marked
29791cb0ef41Sopenharmony_ci * F for failures and S for success (possible success in the case of quick
29801cb0ef41Sopenharmony_ci * nodes).  L, V, < and > are used as arrow heads.
29811cb0ef41Sopenharmony_ci *
29821cb0ef41Sopenharmony_ci * ----------> R
29831cb0ef41Sopenharmony_ci *             |
29841cb0ef41Sopenharmony_ci *             V
29851cb0ef41Sopenharmony_ci *            Q1 -----> S1
29861cb0ef41Sopenharmony_ci *             |   S   /
29871cb0ef41Sopenharmony_ci *            F|      /
29881cb0ef41Sopenharmony_ci *             |    F/
29891cb0ef41Sopenharmony_ci *             |    /
29901cb0ef41Sopenharmony_ci *             |   R
29911cb0ef41Sopenharmony_ci *             |  /
29921cb0ef41Sopenharmony_ci *             V L
29931cb0ef41Sopenharmony_ci *            Q2 -----> S2
29941cb0ef41Sopenharmony_ci *             |   S   /
29951cb0ef41Sopenharmony_ci *            F|      /
29961cb0ef41Sopenharmony_ci *             |    F/
29971cb0ef41Sopenharmony_ci *             |    /
29981cb0ef41Sopenharmony_ci *             |   R
29991cb0ef41Sopenharmony_ci *             |  /
30001cb0ef41Sopenharmony_ci *             V L
30011cb0ef41Sopenharmony_ci *            S3
30021cb0ef41Sopenharmony_ci *             |
30031cb0ef41Sopenharmony_ci *            F|
30041cb0ef41Sopenharmony_ci *             |
30051cb0ef41Sopenharmony_ci *             R
30061cb0ef41Sopenharmony_ci *             |
30071cb0ef41Sopenharmony_ci * backtrack   V
30081cb0ef41Sopenharmony_ci * <----------Q4
30091cb0ef41Sopenharmony_ci *   \    F    |
30101cb0ef41Sopenharmony_ci *    \        |S
30111cb0ef41Sopenharmony_ci *     \   F   V
30121cb0ef41Sopenharmony_ci *      \-----S4
30131cb0ef41Sopenharmony_ci *
30141cb0ef41Sopenharmony_ci * For greedy loops we push the current position, then generate the code that
30151cb0ef41Sopenharmony_ci * eats the input specially in EmitGreedyLoop.  The other choice (the
30161cb0ef41Sopenharmony_ci * continuation) is generated by the normal code in EmitChoices, and steps back
30171cb0ef41Sopenharmony_ci * in the input to the starting position when it fails to match.  The loop code
30181cb0ef41Sopenharmony_ci * looks like this (U is the unwind code that steps back in the greedy loop).
30191cb0ef41Sopenharmony_ci *
30201cb0ef41Sopenharmony_ci *              _____
30211cb0ef41Sopenharmony_ci *             /     \
30221cb0ef41Sopenharmony_ci *             V     |
30231cb0ef41Sopenharmony_ci * ----------> S1    |
30241cb0ef41Sopenharmony_ci *            /|     |
30251cb0ef41Sopenharmony_ci *           / |S    |
30261cb0ef41Sopenharmony_ci *         F/  \_____/
30271cb0ef41Sopenharmony_ci *         /
30281cb0ef41Sopenharmony_ci *        |<-----
30291cb0ef41Sopenharmony_ci *        |      \
30301cb0ef41Sopenharmony_ci *        V       |S
30311cb0ef41Sopenharmony_ci *        Q2 ---> U----->backtrack
30321cb0ef41Sopenharmony_ci *        |  F   /
30331cb0ef41Sopenharmony_ci *       S|     /
30341cb0ef41Sopenharmony_ci *        V  F /
30351cb0ef41Sopenharmony_ci *        S2--/
30361cb0ef41Sopenharmony_ci */
30371cb0ef41Sopenharmony_ci
30381cb0ef41Sopenharmony_ciGreedyLoopState::GreedyLoopState(bool not_at_start) {
30391cb0ef41Sopenharmony_ci  counter_backtrack_trace_.set_backtrack(&label_);
30401cb0ef41Sopenharmony_ci  if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE);
30411cb0ef41Sopenharmony_ci}
30421cb0ef41Sopenharmony_ci
30431cb0ef41Sopenharmony_civoid ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
30441cb0ef41Sopenharmony_ci#ifdef DEBUG
30451cb0ef41Sopenharmony_ci  int choice_count = alternatives_->length();
30461cb0ef41Sopenharmony_ci  for (int i = 0; i < choice_count - 1; i++) {
30471cb0ef41Sopenharmony_ci    GuardedAlternative alternative = alternatives_->at(i);
30481cb0ef41Sopenharmony_ci    ZoneList<Guard*>* guards = alternative.guards();
30491cb0ef41Sopenharmony_ci    int guard_count = (guards == nullptr) ? 0 : guards->length();
30501cb0ef41Sopenharmony_ci    for (int j = 0; j < guard_count; j++) {
30511cb0ef41Sopenharmony_ci      DCHECK(!trace->mentions_reg(guards->at(j)->reg()));
30521cb0ef41Sopenharmony_ci    }
30531cb0ef41Sopenharmony_ci  }
30541cb0ef41Sopenharmony_ci#endif
30551cb0ef41Sopenharmony_ci}
30561cb0ef41Sopenharmony_ci
30571cb0ef41Sopenharmony_civoid ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, Trace* current_trace,
30581cb0ef41Sopenharmony_ci                              PreloadState* state) {
30591cb0ef41Sopenharmony_ci  if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
30601cb0ef41Sopenharmony_ci    // Save some time by looking at most one machine word ahead.
30611cb0ef41Sopenharmony_ci    state->eats_at_least_ =
30621cb0ef41Sopenharmony_ci        EatsAtLeast(current_trace->at_start() == Trace::FALSE_VALUE);
30631cb0ef41Sopenharmony_ci  }
30641cb0ef41Sopenharmony_ci  state->preload_characters_ =
30651cb0ef41Sopenharmony_ci      CalculatePreloadCharacters(compiler, state->eats_at_least_);
30661cb0ef41Sopenharmony_ci
30671cb0ef41Sopenharmony_ci  state->preload_is_current_ =
30681cb0ef41Sopenharmony_ci      (current_trace->characters_preloaded() == state->preload_characters_);
30691cb0ef41Sopenharmony_ci  state->preload_has_checked_bounds_ = state->preload_is_current_;
30701cb0ef41Sopenharmony_ci}
30711cb0ef41Sopenharmony_ci
30721cb0ef41Sopenharmony_civoid ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
30731cb0ef41Sopenharmony_ci  int choice_count = alternatives_->length();
30741cb0ef41Sopenharmony_ci
30751cb0ef41Sopenharmony_ci  if (choice_count == 1 && alternatives_->at(0).guards() == nullptr) {
30761cb0ef41Sopenharmony_ci    alternatives_->at(0).node()->Emit(compiler, trace);
30771cb0ef41Sopenharmony_ci    return;
30781cb0ef41Sopenharmony_ci  }
30791cb0ef41Sopenharmony_ci
30801cb0ef41Sopenharmony_ci  AssertGuardsMentionRegisters(trace);
30811cb0ef41Sopenharmony_ci
30821cb0ef41Sopenharmony_ci  LimitResult limit_result = LimitVersions(compiler, trace);
30831cb0ef41Sopenharmony_ci  if (limit_result == DONE) return;
30841cb0ef41Sopenharmony_ci  DCHECK(limit_result == CONTINUE);
30851cb0ef41Sopenharmony_ci
30861cb0ef41Sopenharmony_ci  // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
30871cb0ef41Sopenharmony_ci  // other choice nodes we only flush if we are out of code size budget.
30881cb0ef41Sopenharmony_ci  if (trace->flush_budget() == 0 && trace->actions() != nullptr) {
30891cb0ef41Sopenharmony_ci    trace->Flush(compiler, this);
30901cb0ef41Sopenharmony_ci    return;
30911cb0ef41Sopenharmony_ci  }
30921cb0ef41Sopenharmony_ci
30931cb0ef41Sopenharmony_ci  RecursionCheck rc(compiler);
30941cb0ef41Sopenharmony_ci
30951cb0ef41Sopenharmony_ci  PreloadState preload;
30961cb0ef41Sopenharmony_ci  preload.init();
30971cb0ef41Sopenharmony_ci  GreedyLoopState greedy_loop_state(not_at_start());
30981cb0ef41Sopenharmony_ci
30991cb0ef41Sopenharmony_ci  int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0));
31001cb0ef41Sopenharmony_ci  AlternativeGenerationList alt_gens(choice_count, zone());
31011cb0ef41Sopenharmony_ci
31021cb0ef41Sopenharmony_ci  if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
31031cb0ef41Sopenharmony_ci    trace = EmitGreedyLoop(compiler, trace, &alt_gens, &preload,
31041cb0ef41Sopenharmony_ci                           &greedy_loop_state, text_length);
31051cb0ef41Sopenharmony_ci  } else {
31061cb0ef41Sopenharmony_ci    // TODO(erikcorry): Delete this.  We don't need this label, but it makes us
31071cb0ef41Sopenharmony_ci    // match the traces produced pre-cleanup.
31081cb0ef41Sopenharmony_ci    Label second_choice;
31091cb0ef41Sopenharmony_ci    compiler->macro_assembler()->Bind(&second_choice);
31101cb0ef41Sopenharmony_ci
31111cb0ef41Sopenharmony_ci    preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
31121cb0ef41Sopenharmony_ci
31131cb0ef41Sopenharmony_ci    EmitChoices(compiler, &alt_gens, 0, trace, &preload);
31141cb0ef41Sopenharmony_ci  }
31151cb0ef41Sopenharmony_ci
31161cb0ef41Sopenharmony_ci  // At this point we need to generate slow checks for the alternatives where
31171cb0ef41Sopenharmony_ci  // the quick check was inlined.  We can recognize these because the associated
31181cb0ef41Sopenharmony_ci  // label was bound.
31191cb0ef41Sopenharmony_ci  int new_flush_budget = trace->flush_budget() / choice_count;
31201cb0ef41Sopenharmony_ci  for (int i = 0; i < choice_count; i++) {
31211cb0ef41Sopenharmony_ci    AlternativeGeneration* alt_gen = alt_gens.at(i);
31221cb0ef41Sopenharmony_ci    Trace new_trace(*trace);
31231cb0ef41Sopenharmony_ci    // If there are actions to be flushed we have to limit how many times
31241cb0ef41Sopenharmony_ci    // they are flushed.  Take the budget of the parent trace and distribute
31251cb0ef41Sopenharmony_ci    // it fairly amongst the children.
31261cb0ef41Sopenharmony_ci    if (new_trace.actions() != nullptr) {
31271cb0ef41Sopenharmony_ci      new_trace.set_flush_budget(new_flush_budget);
31281cb0ef41Sopenharmony_ci    }
31291cb0ef41Sopenharmony_ci    bool next_expects_preload =
31301cb0ef41Sopenharmony_ci        i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload;
31311cb0ef41Sopenharmony_ci    EmitOutOfLineContinuation(compiler, &new_trace, alternatives_->at(i),
31321cb0ef41Sopenharmony_ci                              alt_gen, preload.preload_characters_,
31331cb0ef41Sopenharmony_ci                              next_expects_preload);
31341cb0ef41Sopenharmony_ci  }
31351cb0ef41Sopenharmony_ci}
31361cb0ef41Sopenharmony_ci
31371cb0ef41Sopenharmony_ciTrace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, Trace* trace,
31381cb0ef41Sopenharmony_ci                                  AlternativeGenerationList* alt_gens,
31391cb0ef41Sopenharmony_ci                                  PreloadState* preload,
31401cb0ef41Sopenharmony_ci                                  GreedyLoopState* greedy_loop_state,
31411cb0ef41Sopenharmony_ci                                  int text_length) {
31421cb0ef41Sopenharmony_ci  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
31431cb0ef41Sopenharmony_ci  // Here we have special handling for greedy loops containing only text nodes
31441cb0ef41Sopenharmony_ci  // and other simple nodes.  These are handled by pushing the current
31451cb0ef41Sopenharmony_ci  // position on the stack and then incrementing the current position each
31461cb0ef41Sopenharmony_ci  // time around the switch.  On backtrack we decrement the current position
31471cb0ef41Sopenharmony_ci  // and check it against the pushed value.  This avoids pushing backtrack
31481cb0ef41Sopenharmony_ci  // information for each iteration of the loop, which could take up a lot of
31491cb0ef41Sopenharmony_ci  // space.
31501cb0ef41Sopenharmony_ci  DCHECK(trace->stop_node() == nullptr);
31511cb0ef41Sopenharmony_ci  macro_assembler->PushCurrentPosition();
31521cb0ef41Sopenharmony_ci  Label greedy_match_failed;
31531cb0ef41Sopenharmony_ci  Trace greedy_match_trace;
31541cb0ef41Sopenharmony_ci  if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
31551cb0ef41Sopenharmony_ci  greedy_match_trace.set_backtrack(&greedy_match_failed);
31561cb0ef41Sopenharmony_ci  Label loop_label;
31571cb0ef41Sopenharmony_ci  macro_assembler->Bind(&loop_label);
31581cb0ef41Sopenharmony_ci  greedy_match_trace.set_stop_node(this);
31591cb0ef41Sopenharmony_ci  greedy_match_trace.set_loop_label(&loop_label);
31601cb0ef41Sopenharmony_ci  alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
31611cb0ef41Sopenharmony_ci  macro_assembler->Bind(&greedy_match_failed);
31621cb0ef41Sopenharmony_ci
31631cb0ef41Sopenharmony_ci  Label second_choice;  // For use in greedy matches.
31641cb0ef41Sopenharmony_ci  macro_assembler->Bind(&second_choice);
31651cb0ef41Sopenharmony_ci
31661cb0ef41Sopenharmony_ci  Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
31671cb0ef41Sopenharmony_ci
31681cb0ef41Sopenharmony_ci  EmitChoices(compiler, alt_gens, 1, new_trace, preload);
31691cb0ef41Sopenharmony_ci
31701cb0ef41Sopenharmony_ci  macro_assembler->Bind(greedy_loop_state->label());
31711cb0ef41Sopenharmony_ci  // If we have unwound to the bottom then backtrack.
31721cb0ef41Sopenharmony_ci  macro_assembler->CheckGreedyLoop(trace->backtrack());
31731cb0ef41Sopenharmony_ci  // Otherwise try the second priority at an earlier position.
31741cb0ef41Sopenharmony_ci  macro_assembler->AdvanceCurrentPosition(-text_length);
31751cb0ef41Sopenharmony_ci  macro_assembler->GoTo(&second_choice);
31761cb0ef41Sopenharmony_ci  return new_trace;
31771cb0ef41Sopenharmony_ci}
31781cb0ef41Sopenharmony_ci
31791cb0ef41Sopenharmony_ciint ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
31801cb0ef41Sopenharmony_ci                                              Trace* trace) {
31811cb0ef41Sopenharmony_ci  int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
31821cb0ef41Sopenharmony_ci  if (alternatives_->length() != 2) return eats_at_least;
31831cb0ef41Sopenharmony_ci
31841cb0ef41Sopenharmony_ci  GuardedAlternative alt1 = alternatives_->at(1);
31851cb0ef41Sopenharmony_ci  if (alt1.guards() != nullptr && alt1.guards()->length() != 0) {
31861cb0ef41Sopenharmony_ci    return eats_at_least;
31871cb0ef41Sopenharmony_ci  }
31881cb0ef41Sopenharmony_ci  RegExpNode* eats_anything_node = alt1.node();
31891cb0ef41Sopenharmony_ci  if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) {
31901cb0ef41Sopenharmony_ci    return eats_at_least;
31911cb0ef41Sopenharmony_ci  }
31921cb0ef41Sopenharmony_ci
31931cb0ef41Sopenharmony_ci  // Really we should be creating a new trace when we execute this function,
31941cb0ef41Sopenharmony_ci  // but there is no need, because the code it generates cannot backtrack, and
31951cb0ef41Sopenharmony_ci  // we always arrive here with a trivial trace (since it's the entry to a
31961cb0ef41Sopenharmony_ci  // loop.  That also implies that there are no preloaded characters, which is
31971cb0ef41Sopenharmony_ci  // good, because it means we won't be violating any assumptions by
31981cb0ef41Sopenharmony_ci  // overwriting those characters with new load instructions.
31991cb0ef41Sopenharmony_ci  DCHECK(trace->is_trivial());
32001cb0ef41Sopenharmony_ci
32011cb0ef41Sopenharmony_ci  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
32021cb0ef41Sopenharmony_ci  Isolate* isolate = macro_assembler->isolate();
32031cb0ef41Sopenharmony_ci  // At this point we know that we are at a non-greedy loop that will eat
32041cb0ef41Sopenharmony_ci  // any character one at a time.  Any non-anchored regexp has such a
32051cb0ef41Sopenharmony_ci  // loop prepended to it in order to find where it starts.  We look for
32061cb0ef41Sopenharmony_ci  // a pattern of the form ...abc... where we can look 6 characters ahead
32071cb0ef41Sopenharmony_ci  // and step forwards 3 if the character is not one of abc.  Abc need
32081cb0ef41Sopenharmony_ci  // not be atoms, they can be any reasonably limited character class or
32091cb0ef41Sopenharmony_ci  // small alternation.
32101cb0ef41Sopenharmony_ci  BoyerMooreLookahead* bm = bm_info(false);
32111cb0ef41Sopenharmony_ci  if (bm == nullptr) {
32121cb0ef41Sopenharmony_ci    eats_at_least = std::min(kMaxLookaheadForBoyerMoore, EatsAtLeast(false));
32131cb0ef41Sopenharmony_ci    if (eats_at_least >= 1) {
32141cb0ef41Sopenharmony_ci      bm = zone()->New<BoyerMooreLookahead>(eats_at_least, compiler, zone());
32151cb0ef41Sopenharmony_ci      GuardedAlternative alt0 = alternatives_->at(0);
32161cb0ef41Sopenharmony_ci      alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm, false);
32171cb0ef41Sopenharmony_ci    }
32181cb0ef41Sopenharmony_ci  }
32191cb0ef41Sopenharmony_ci  if (bm != nullptr) {
32201cb0ef41Sopenharmony_ci    bm->EmitSkipInstructions(macro_assembler);
32211cb0ef41Sopenharmony_ci  }
32221cb0ef41Sopenharmony_ci  return eats_at_least;
32231cb0ef41Sopenharmony_ci}
32241cb0ef41Sopenharmony_ci
32251cb0ef41Sopenharmony_civoid ChoiceNode::EmitChoices(RegExpCompiler* compiler,
32261cb0ef41Sopenharmony_ci                             AlternativeGenerationList* alt_gens,
32271cb0ef41Sopenharmony_ci                             int first_choice, Trace* trace,
32281cb0ef41Sopenharmony_ci                             PreloadState* preload) {
32291cb0ef41Sopenharmony_ci  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
32301cb0ef41Sopenharmony_ci  SetUpPreLoad(compiler, trace, preload);
32311cb0ef41Sopenharmony_ci
32321cb0ef41Sopenharmony_ci  // For now we just call all choices one after the other.  The idea ultimately
32331cb0ef41Sopenharmony_ci  // is to use the Dispatch table to try only the relevant ones.
32341cb0ef41Sopenharmony_ci  int choice_count = alternatives_->length();
32351cb0ef41Sopenharmony_ci
32361cb0ef41Sopenharmony_ci  int new_flush_budget = trace->flush_budget() / choice_count;
32371cb0ef41Sopenharmony_ci
32381cb0ef41Sopenharmony_ci  for (int i = first_choice; i < choice_count; i++) {
32391cb0ef41Sopenharmony_ci    bool is_last = i == choice_count - 1;
32401cb0ef41Sopenharmony_ci    bool fall_through_on_failure = !is_last;
32411cb0ef41Sopenharmony_ci    GuardedAlternative alternative = alternatives_->at(i);
32421cb0ef41Sopenharmony_ci    AlternativeGeneration* alt_gen = alt_gens->at(i);
32431cb0ef41Sopenharmony_ci    alt_gen->quick_check_details.set_characters(preload->preload_characters_);
32441cb0ef41Sopenharmony_ci    ZoneList<Guard*>* guards = alternative.guards();
32451cb0ef41Sopenharmony_ci    int guard_count = (guards == nullptr) ? 0 : guards->length();
32461cb0ef41Sopenharmony_ci    Trace new_trace(*trace);
32471cb0ef41Sopenharmony_ci    new_trace.set_characters_preloaded(
32481cb0ef41Sopenharmony_ci        preload->preload_is_current_ ? preload->preload_characters_ : 0);
32491cb0ef41Sopenharmony_ci    if (preload->preload_has_checked_bounds_) {
32501cb0ef41Sopenharmony_ci      new_trace.set_bound_checked_up_to(preload->preload_characters_);
32511cb0ef41Sopenharmony_ci    }
32521cb0ef41Sopenharmony_ci    new_trace.quick_check_performed()->Clear();
32531cb0ef41Sopenharmony_ci    if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
32541cb0ef41Sopenharmony_ci    if (!is_last) {
32551cb0ef41Sopenharmony_ci      new_trace.set_backtrack(&alt_gen->after);
32561cb0ef41Sopenharmony_ci    }
32571cb0ef41Sopenharmony_ci    alt_gen->expects_preload = preload->preload_is_current_;
32581cb0ef41Sopenharmony_ci    bool generate_full_check_inline = false;
32591cb0ef41Sopenharmony_ci    if (compiler->optimize() &&
32601cb0ef41Sopenharmony_ci        try_to_emit_quick_check_for_alternative(i == 0) &&
32611cb0ef41Sopenharmony_ci        alternative.node()->EmitQuickCheck(
32621cb0ef41Sopenharmony_ci            compiler, trace, &new_trace, preload->preload_has_checked_bounds_,
32631cb0ef41Sopenharmony_ci            &alt_gen->possible_success, &alt_gen->quick_check_details,
32641cb0ef41Sopenharmony_ci            fall_through_on_failure, this)) {
32651cb0ef41Sopenharmony_ci      // Quick check was generated for this choice.
32661cb0ef41Sopenharmony_ci      preload->preload_is_current_ = true;
32671cb0ef41Sopenharmony_ci      preload->preload_has_checked_bounds_ = true;
32681cb0ef41Sopenharmony_ci      // If we generated the quick check to fall through on possible success,
32691cb0ef41Sopenharmony_ci      // we now need to generate the full check inline.
32701cb0ef41Sopenharmony_ci      if (!fall_through_on_failure) {
32711cb0ef41Sopenharmony_ci        macro_assembler->Bind(&alt_gen->possible_success);
32721cb0ef41Sopenharmony_ci        new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
32731cb0ef41Sopenharmony_ci        new_trace.set_characters_preloaded(preload->preload_characters_);
32741cb0ef41Sopenharmony_ci        new_trace.set_bound_checked_up_to(preload->preload_characters_);
32751cb0ef41Sopenharmony_ci        generate_full_check_inline = true;
32761cb0ef41Sopenharmony_ci      }
32771cb0ef41Sopenharmony_ci    } else if (alt_gen->quick_check_details.cannot_match()) {
32781cb0ef41Sopenharmony_ci      if (!fall_through_on_failure) {
32791cb0ef41Sopenharmony_ci        macro_assembler->GoTo(trace->backtrack());
32801cb0ef41Sopenharmony_ci      }
32811cb0ef41Sopenharmony_ci      continue;
32821cb0ef41Sopenharmony_ci    } else {
32831cb0ef41Sopenharmony_ci      // No quick check was generated.  Put the full code here.
32841cb0ef41Sopenharmony_ci      // If this is not the first choice then there could be slow checks from
32851cb0ef41Sopenharmony_ci      // previous cases that go here when they fail.  There's no reason to
32861cb0ef41Sopenharmony_ci      // insist that they preload characters since the slow check we are about
32871cb0ef41Sopenharmony_ci      // to generate probably can't use it.
32881cb0ef41Sopenharmony_ci      if (i != first_choice) {
32891cb0ef41Sopenharmony_ci        alt_gen->expects_preload = false;
32901cb0ef41Sopenharmony_ci        new_trace.InvalidateCurrentCharacter();
32911cb0ef41Sopenharmony_ci      }
32921cb0ef41Sopenharmony_ci      generate_full_check_inline = true;
32931cb0ef41Sopenharmony_ci    }
32941cb0ef41Sopenharmony_ci    if (generate_full_check_inline) {
32951cb0ef41Sopenharmony_ci      if (new_trace.actions() != nullptr) {
32961cb0ef41Sopenharmony_ci        new_trace.set_flush_budget(new_flush_budget);
32971cb0ef41Sopenharmony_ci      }
32981cb0ef41Sopenharmony_ci      for (int j = 0; j < guard_count; j++) {
32991cb0ef41Sopenharmony_ci        GenerateGuard(macro_assembler, guards->at(j), &new_trace);
33001cb0ef41Sopenharmony_ci      }
33011cb0ef41Sopenharmony_ci      alternative.node()->Emit(compiler, &new_trace);
33021cb0ef41Sopenharmony_ci      preload->preload_is_current_ = false;
33031cb0ef41Sopenharmony_ci    }
33041cb0ef41Sopenharmony_ci    macro_assembler->Bind(&alt_gen->after);
33051cb0ef41Sopenharmony_ci  }
33061cb0ef41Sopenharmony_ci}
33071cb0ef41Sopenharmony_ci
33081cb0ef41Sopenharmony_civoid ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
33091cb0ef41Sopenharmony_ci                                           Trace* trace,
33101cb0ef41Sopenharmony_ci                                           GuardedAlternative alternative,
33111cb0ef41Sopenharmony_ci                                           AlternativeGeneration* alt_gen,
33121cb0ef41Sopenharmony_ci                                           int preload_characters,
33131cb0ef41Sopenharmony_ci                                           bool next_expects_preload) {
33141cb0ef41Sopenharmony_ci  if (!alt_gen->possible_success.is_linked()) return;
33151cb0ef41Sopenharmony_ci
33161cb0ef41Sopenharmony_ci  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
33171cb0ef41Sopenharmony_ci  macro_assembler->Bind(&alt_gen->possible_success);
33181cb0ef41Sopenharmony_ci  Trace out_of_line_trace(*trace);
33191cb0ef41Sopenharmony_ci  out_of_line_trace.set_characters_preloaded(preload_characters);
33201cb0ef41Sopenharmony_ci  out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
33211cb0ef41Sopenharmony_ci  if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);
33221cb0ef41Sopenharmony_ci  ZoneList<Guard*>* guards = alternative.guards();
33231cb0ef41Sopenharmony_ci  int guard_count = (guards == nullptr) ? 0 : guards->length();
33241cb0ef41Sopenharmony_ci  if (next_expects_preload) {
33251cb0ef41Sopenharmony_ci    Label reload_current_char;
33261cb0ef41Sopenharmony_ci    out_of_line_trace.set_backtrack(&reload_current_char);
33271cb0ef41Sopenharmony_ci    for (int j = 0; j < guard_count; j++) {
33281cb0ef41Sopenharmony_ci      GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
33291cb0ef41Sopenharmony_ci    }
33301cb0ef41Sopenharmony_ci    alternative.node()->Emit(compiler, &out_of_line_trace);
33311cb0ef41Sopenharmony_ci    macro_assembler->Bind(&reload_current_char);
33321cb0ef41Sopenharmony_ci    // Reload the current character, since the next quick check expects that.
33331cb0ef41Sopenharmony_ci    // We don't need to check bounds here because we only get into this
33341cb0ef41Sopenharmony_ci    // code through a quick check which already did the checked load.
33351cb0ef41Sopenharmony_ci    macro_assembler->LoadCurrentCharacter(trace->cp_offset(), nullptr, false,
33361cb0ef41Sopenharmony_ci                                          preload_characters);
33371cb0ef41Sopenharmony_ci    macro_assembler->GoTo(&(alt_gen->after));
33381cb0ef41Sopenharmony_ci  } else {
33391cb0ef41Sopenharmony_ci    out_of_line_trace.set_backtrack(&(alt_gen->after));
33401cb0ef41Sopenharmony_ci    for (int j = 0; j < guard_count; j++) {
33411cb0ef41Sopenharmony_ci      GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
33421cb0ef41Sopenharmony_ci    }
33431cb0ef41Sopenharmony_ci    alternative.node()->Emit(compiler, &out_of_line_trace);
33441cb0ef41Sopenharmony_ci  }
33451cb0ef41Sopenharmony_ci}
33461cb0ef41Sopenharmony_ci
33471cb0ef41Sopenharmony_civoid ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
33481cb0ef41Sopenharmony_ci  RegExpMacroAssembler* assembler = compiler->macro_assembler();
33491cb0ef41Sopenharmony_ci  LimitResult limit_result = LimitVersions(compiler, trace);
33501cb0ef41Sopenharmony_ci  if (limit_result == DONE) return;
33511cb0ef41Sopenharmony_ci  DCHECK(limit_result == CONTINUE);
33521cb0ef41Sopenharmony_ci
33531cb0ef41Sopenharmony_ci  RecursionCheck rc(compiler);
33541cb0ef41Sopenharmony_ci
33551cb0ef41Sopenharmony_ci  switch (action_type_) {
33561cb0ef41Sopenharmony_ci    case STORE_POSITION: {
33571cb0ef41Sopenharmony_ci      Trace::DeferredCapture new_capture(data_.u_position_register.reg,
33581cb0ef41Sopenharmony_ci                                         data_.u_position_register.is_capture,
33591cb0ef41Sopenharmony_ci                                         trace);
33601cb0ef41Sopenharmony_ci      Trace new_trace = *trace;
33611cb0ef41Sopenharmony_ci      new_trace.add_action(&new_capture);
33621cb0ef41Sopenharmony_ci      on_success()->Emit(compiler, &new_trace);
33631cb0ef41Sopenharmony_ci      break;
33641cb0ef41Sopenharmony_ci    }
33651cb0ef41Sopenharmony_ci    case INCREMENT_REGISTER: {
33661cb0ef41Sopenharmony_ci      Trace::DeferredIncrementRegister new_increment(
33671cb0ef41Sopenharmony_ci          data_.u_increment_register.reg);
33681cb0ef41Sopenharmony_ci      Trace new_trace = *trace;
33691cb0ef41Sopenharmony_ci      new_trace.add_action(&new_increment);
33701cb0ef41Sopenharmony_ci      on_success()->Emit(compiler, &new_trace);
33711cb0ef41Sopenharmony_ci      break;
33721cb0ef41Sopenharmony_ci    }
33731cb0ef41Sopenharmony_ci    case SET_REGISTER_FOR_LOOP: {
33741cb0ef41Sopenharmony_ci      Trace::DeferredSetRegisterForLoop new_set(data_.u_store_register.reg,
33751cb0ef41Sopenharmony_ci                                                data_.u_store_register.value);
33761cb0ef41Sopenharmony_ci      Trace new_trace = *trace;
33771cb0ef41Sopenharmony_ci      new_trace.add_action(&new_set);
33781cb0ef41Sopenharmony_ci      on_success()->Emit(compiler, &new_trace);
33791cb0ef41Sopenharmony_ci      break;
33801cb0ef41Sopenharmony_ci    }
33811cb0ef41Sopenharmony_ci    case CLEAR_CAPTURES: {
33821cb0ef41Sopenharmony_ci      Trace::DeferredClearCaptures new_capture(Interval(
33831cb0ef41Sopenharmony_ci          data_.u_clear_captures.range_from, data_.u_clear_captures.range_to));
33841cb0ef41Sopenharmony_ci      Trace new_trace = *trace;
33851cb0ef41Sopenharmony_ci      new_trace.add_action(&new_capture);
33861cb0ef41Sopenharmony_ci      on_success()->Emit(compiler, &new_trace);
33871cb0ef41Sopenharmony_ci      break;
33881cb0ef41Sopenharmony_ci    }
33891cb0ef41Sopenharmony_ci    case BEGIN_POSITIVE_SUBMATCH:
33901cb0ef41Sopenharmony_ci    case BEGIN_NEGATIVE_SUBMATCH:
33911cb0ef41Sopenharmony_ci      if (!trace->is_trivial()) {
33921cb0ef41Sopenharmony_ci        trace->Flush(compiler, this);
33931cb0ef41Sopenharmony_ci      } else {
33941cb0ef41Sopenharmony_ci        assembler->WriteCurrentPositionToRegister(
33951cb0ef41Sopenharmony_ci            data_.u_submatch.current_position_register, 0);
33961cb0ef41Sopenharmony_ci        assembler->WriteStackPointerToRegister(
33971cb0ef41Sopenharmony_ci            data_.u_submatch.stack_pointer_register);
33981cb0ef41Sopenharmony_ci        on_success()->Emit(compiler, trace);
33991cb0ef41Sopenharmony_ci      }
34001cb0ef41Sopenharmony_ci      break;
34011cb0ef41Sopenharmony_ci    case EMPTY_MATCH_CHECK: {
34021cb0ef41Sopenharmony_ci      int start_pos_reg = data_.u_empty_match_check.start_register;
34031cb0ef41Sopenharmony_ci      int stored_pos = 0;
34041cb0ef41Sopenharmony_ci      int rep_reg = data_.u_empty_match_check.repetition_register;
34051cb0ef41Sopenharmony_ci      bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
34061cb0ef41Sopenharmony_ci      bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
34071cb0ef41Sopenharmony_ci      if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
34081cb0ef41Sopenharmony_ci        // If we know we haven't advanced and there is no minimum we
34091cb0ef41Sopenharmony_ci        // can just backtrack immediately.
34101cb0ef41Sopenharmony_ci        assembler->GoTo(trace->backtrack());
34111cb0ef41Sopenharmony_ci      } else if (know_dist && stored_pos < trace->cp_offset()) {
34121cb0ef41Sopenharmony_ci        // If we know we've advanced we can generate the continuation
34131cb0ef41Sopenharmony_ci        // immediately.
34141cb0ef41Sopenharmony_ci        on_success()->Emit(compiler, trace);
34151cb0ef41Sopenharmony_ci      } else if (!trace->is_trivial()) {
34161cb0ef41Sopenharmony_ci        trace->Flush(compiler, this);
34171cb0ef41Sopenharmony_ci      } else {
34181cb0ef41Sopenharmony_ci        Label skip_empty_check;
34191cb0ef41Sopenharmony_ci        // If we have a minimum number of repetitions we check the current
34201cb0ef41Sopenharmony_ci        // number first and skip the empty check if it's not enough.
34211cb0ef41Sopenharmony_ci        if (has_minimum) {
34221cb0ef41Sopenharmony_ci          int limit = data_.u_empty_match_check.repetition_limit;
34231cb0ef41Sopenharmony_ci          assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
34241cb0ef41Sopenharmony_ci        }
34251cb0ef41Sopenharmony_ci        // If the match is empty we bail out, otherwise we fall through
34261cb0ef41Sopenharmony_ci        // to the on-success continuation.
34271cb0ef41Sopenharmony_ci        assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
34281cb0ef41Sopenharmony_ci                                   trace->backtrack());
34291cb0ef41Sopenharmony_ci        assembler->Bind(&skip_empty_check);
34301cb0ef41Sopenharmony_ci        on_success()->Emit(compiler, trace);
34311cb0ef41Sopenharmony_ci      }
34321cb0ef41Sopenharmony_ci      break;
34331cb0ef41Sopenharmony_ci    }
34341cb0ef41Sopenharmony_ci    case POSITIVE_SUBMATCH_SUCCESS: {
34351cb0ef41Sopenharmony_ci      if (!trace->is_trivial()) {
34361cb0ef41Sopenharmony_ci        trace->Flush(compiler, this);
34371cb0ef41Sopenharmony_ci        return;
34381cb0ef41Sopenharmony_ci      }
34391cb0ef41Sopenharmony_ci      assembler->ReadCurrentPositionFromRegister(
34401cb0ef41Sopenharmony_ci          data_.u_submatch.current_position_register);
34411cb0ef41Sopenharmony_ci      assembler->ReadStackPointerFromRegister(
34421cb0ef41Sopenharmony_ci          data_.u_submatch.stack_pointer_register);
34431cb0ef41Sopenharmony_ci      int clear_register_count = data_.u_submatch.clear_register_count;
34441cb0ef41Sopenharmony_ci      if (clear_register_count == 0) {
34451cb0ef41Sopenharmony_ci        on_success()->Emit(compiler, trace);
34461cb0ef41Sopenharmony_ci        return;
34471cb0ef41Sopenharmony_ci      }
34481cb0ef41Sopenharmony_ci      int clear_registers_from = data_.u_submatch.clear_register_from;
34491cb0ef41Sopenharmony_ci      Label clear_registers_backtrack;
34501cb0ef41Sopenharmony_ci      Trace new_trace = *trace;
34511cb0ef41Sopenharmony_ci      new_trace.set_backtrack(&clear_registers_backtrack);
34521cb0ef41Sopenharmony_ci      on_success()->Emit(compiler, &new_trace);
34531cb0ef41Sopenharmony_ci
34541cb0ef41Sopenharmony_ci      assembler->Bind(&clear_registers_backtrack);
34551cb0ef41Sopenharmony_ci      int clear_registers_to = clear_registers_from + clear_register_count - 1;
34561cb0ef41Sopenharmony_ci      assembler->ClearRegisters(clear_registers_from, clear_registers_to);
34571cb0ef41Sopenharmony_ci
34581cb0ef41Sopenharmony_ci      DCHECK(trace->backtrack() == nullptr);
34591cb0ef41Sopenharmony_ci      assembler->Backtrack();
34601cb0ef41Sopenharmony_ci      return;
34611cb0ef41Sopenharmony_ci    }
34621cb0ef41Sopenharmony_ci    default:
34631cb0ef41Sopenharmony_ci      UNREACHABLE();
34641cb0ef41Sopenharmony_ci  }
34651cb0ef41Sopenharmony_ci}
34661cb0ef41Sopenharmony_ci
34671cb0ef41Sopenharmony_civoid BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
34681cb0ef41Sopenharmony_ci  RegExpMacroAssembler* assembler = compiler->macro_assembler();
34691cb0ef41Sopenharmony_ci  if (!trace->is_trivial()) {
34701cb0ef41Sopenharmony_ci    trace->Flush(compiler, this);
34711cb0ef41Sopenharmony_ci    return;
34721cb0ef41Sopenharmony_ci  }
34731cb0ef41Sopenharmony_ci
34741cb0ef41Sopenharmony_ci  LimitResult limit_result = LimitVersions(compiler, trace);
34751cb0ef41Sopenharmony_ci  if (limit_result == DONE) return;
34761cb0ef41Sopenharmony_ci  DCHECK(limit_result == CONTINUE);
34771cb0ef41Sopenharmony_ci
34781cb0ef41Sopenharmony_ci  RecursionCheck rc(compiler);
34791cb0ef41Sopenharmony_ci
34801cb0ef41Sopenharmony_ci  DCHECK_EQ(start_reg_ + 1, end_reg_);
34811cb0ef41Sopenharmony_ci  if (IsIgnoreCase(flags_)) {
34821cb0ef41Sopenharmony_ci    bool unicode = IsUnicode(flags_);
34831cb0ef41Sopenharmony_ci    assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
34841cb0ef41Sopenharmony_ci                                               unicode, trace->backtrack());
34851cb0ef41Sopenharmony_ci  } else {
34861cb0ef41Sopenharmony_ci    assembler->CheckNotBackReference(start_reg_, read_backward(),
34871cb0ef41Sopenharmony_ci                                     trace->backtrack());
34881cb0ef41Sopenharmony_ci  }
34891cb0ef41Sopenharmony_ci  // We are going to advance backward, so we may end up at the start.
34901cb0ef41Sopenharmony_ci  if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
34911cb0ef41Sopenharmony_ci
34921cb0ef41Sopenharmony_ci  // Check that the back reference does not end inside a surrogate pair.
34931cb0ef41Sopenharmony_ci  if (IsUnicode(flags_) && !compiler->one_byte()) {
34941cb0ef41Sopenharmony_ci    assembler->CheckNotInSurrogatePair(trace->cp_offset(), trace->backtrack());
34951cb0ef41Sopenharmony_ci  }
34961cb0ef41Sopenharmony_ci  on_success()->Emit(compiler, trace);
34971cb0ef41Sopenharmony_ci}
34981cb0ef41Sopenharmony_ci
34991cb0ef41Sopenharmony_civoid TextNode::CalculateOffsets() {
35001cb0ef41Sopenharmony_ci  int element_count = elements()->length();
35011cb0ef41Sopenharmony_ci  // Set up the offsets of the elements relative to the start.  This is a fixed
35021cb0ef41Sopenharmony_ci  // quantity since a TextNode can only contain fixed-width things.
35031cb0ef41Sopenharmony_ci  int cp_offset = 0;
35041cb0ef41Sopenharmony_ci  for (int i = 0; i < element_count; i++) {
35051cb0ef41Sopenharmony_ci    TextElement& elm = elements()->at(i);
35061cb0ef41Sopenharmony_ci    elm.set_cp_offset(cp_offset);
35071cb0ef41Sopenharmony_ci    cp_offset += elm.length();
35081cb0ef41Sopenharmony_ci  }
35091cb0ef41Sopenharmony_ci}
35101cb0ef41Sopenharmony_ci
35111cb0ef41Sopenharmony_cinamespace {
35121cb0ef41Sopenharmony_ci
35131cb0ef41Sopenharmony_ci// Assertion propagation moves information about assertions such as
35141cb0ef41Sopenharmony_ci// \b to the affected nodes.  For instance, in /.\b./ information must
35151cb0ef41Sopenharmony_ci// be propagated to the first '.' that whatever follows needs to know
35161cb0ef41Sopenharmony_ci// if it matched a word or a non-word, and to the second '.' that it
35171cb0ef41Sopenharmony_ci// has to check if it succeeds a word or non-word.  In this case the
35181cb0ef41Sopenharmony_ci// result will be something like:
35191cb0ef41Sopenharmony_ci//
35201cb0ef41Sopenharmony_ci//   +-------+        +------------+
35211cb0ef41Sopenharmony_ci//   |   .   |        |      .     |
35221cb0ef41Sopenharmony_ci//   +-------+  --->  +------------+
35231cb0ef41Sopenharmony_ci//   | word? |        | check word |
35241cb0ef41Sopenharmony_ci//   +-------+        +------------+
35251cb0ef41Sopenharmony_ciclass AssertionPropagator : public AllStatic {
35261cb0ef41Sopenharmony_ci public:
35271cb0ef41Sopenharmony_ci  static void VisitText(TextNode* that) {}
35281cb0ef41Sopenharmony_ci
35291cb0ef41Sopenharmony_ci  static void VisitAction(ActionNode* that) {
35301cb0ef41Sopenharmony_ci    // If the next node is interested in what it follows then this node
35311cb0ef41Sopenharmony_ci    // has to be interested too so it can pass the information on.
35321cb0ef41Sopenharmony_ci    that->info()->AddFromFollowing(that->on_success()->info());
35331cb0ef41Sopenharmony_ci  }
35341cb0ef41Sopenharmony_ci
35351cb0ef41Sopenharmony_ci  static void VisitChoice(ChoiceNode* that, int i) {
35361cb0ef41Sopenharmony_ci    // Anything the following nodes need to know has to be known by
35371cb0ef41Sopenharmony_ci    // this node also, so it can pass it on.
35381cb0ef41Sopenharmony_ci    that->info()->AddFromFollowing(that->alternatives()->at(i).node()->info());
35391cb0ef41Sopenharmony_ci  }
35401cb0ef41Sopenharmony_ci
35411cb0ef41Sopenharmony_ci  static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
35421cb0ef41Sopenharmony_ci    that->info()->AddFromFollowing(that->continue_node()->info());
35431cb0ef41Sopenharmony_ci  }
35441cb0ef41Sopenharmony_ci
35451cb0ef41Sopenharmony_ci  static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {
35461cb0ef41Sopenharmony_ci    that->info()->AddFromFollowing(that->loop_node()->info());
35471cb0ef41Sopenharmony_ci  }
35481cb0ef41Sopenharmony_ci
35491cb0ef41Sopenharmony_ci  static void VisitNegativeLookaroundChoiceLookaroundNode(
35501cb0ef41Sopenharmony_ci      NegativeLookaroundChoiceNode* that) {
35511cb0ef41Sopenharmony_ci    VisitChoice(that, NegativeLookaroundChoiceNode::kLookaroundIndex);
35521cb0ef41Sopenharmony_ci  }
35531cb0ef41Sopenharmony_ci
35541cb0ef41Sopenharmony_ci  static void VisitNegativeLookaroundChoiceContinueNode(
35551cb0ef41Sopenharmony_ci      NegativeLookaroundChoiceNode* that) {
35561cb0ef41Sopenharmony_ci    VisitChoice(that, NegativeLookaroundChoiceNode::kContinueIndex);
35571cb0ef41Sopenharmony_ci  }
35581cb0ef41Sopenharmony_ci
35591cb0ef41Sopenharmony_ci  static void VisitBackReference(BackReferenceNode* that) {}
35601cb0ef41Sopenharmony_ci
35611cb0ef41Sopenharmony_ci  static void VisitAssertion(AssertionNode* that) {}
35621cb0ef41Sopenharmony_ci};
35631cb0ef41Sopenharmony_ci
35641cb0ef41Sopenharmony_ci// Propagates information about the minimum size of successful matches from
35651cb0ef41Sopenharmony_ci// successor nodes to their predecessors. Note that all eats_at_least values
35661cb0ef41Sopenharmony_ci// are initialized to zero before analysis.
35671cb0ef41Sopenharmony_ciclass EatsAtLeastPropagator : public AllStatic {
35681cb0ef41Sopenharmony_ci public:
35691cb0ef41Sopenharmony_ci  static void VisitText(TextNode* that) {
35701cb0ef41Sopenharmony_ci    // The eats_at_least value is not used if reading backward.
35711cb0ef41Sopenharmony_ci    if (!that->read_backward()) {
35721cb0ef41Sopenharmony_ci      // We are not at the start after this node, and thus we can use the
35731cb0ef41Sopenharmony_ci      // successor's eats_at_least_from_not_start value.
35741cb0ef41Sopenharmony_ci      uint8_t eats_at_least = base::saturated_cast<uint8_t>(
35751cb0ef41Sopenharmony_ci          that->Length() + that->on_success()
35761cb0ef41Sopenharmony_ci                               ->eats_at_least_info()
35771cb0ef41Sopenharmony_ci                               ->eats_at_least_from_not_start);
35781cb0ef41Sopenharmony_ci      that->set_eats_at_least_info(EatsAtLeastInfo(eats_at_least));
35791cb0ef41Sopenharmony_ci    }
35801cb0ef41Sopenharmony_ci  }
35811cb0ef41Sopenharmony_ci
35821cb0ef41Sopenharmony_ci  static void VisitAction(ActionNode* that) {
35831cb0ef41Sopenharmony_ci    switch (that->action_type()) {
35841cb0ef41Sopenharmony_ci      case ActionNode::BEGIN_POSITIVE_SUBMATCH:
35851cb0ef41Sopenharmony_ci      case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
35861cb0ef41Sopenharmony_ci        // We do not propagate eats_at_least data through positive lookarounds,
35871cb0ef41Sopenharmony_ci        // because they rewind input.
35881cb0ef41Sopenharmony_ci        // TODO(v8:11859) Potential approaches for fixing this include:
35891cb0ef41Sopenharmony_ci        // 1. Add a dedicated choice node for positive lookaround, similar to
35901cb0ef41Sopenharmony_ci        //    NegativeLookaroundChoiceNode.
35911cb0ef41Sopenharmony_ci        // 2. Add an eats_at_least_inside_loop field to EatsAtLeastInfo, which
35921cb0ef41Sopenharmony_ci        //    is <= eats_at_least_from_possibly_start, and use that value in
35931cb0ef41Sopenharmony_ci        //    EatsAtLeastFromLoopEntry.
35941cb0ef41Sopenharmony_ci        DCHECK(that->eats_at_least_info()->IsZero());
35951cb0ef41Sopenharmony_ci        break;
35961cb0ef41Sopenharmony_ci      case ActionNode::SET_REGISTER_FOR_LOOP:
35971cb0ef41Sopenharmony_ci        // SET_REGISTER_FOR_LOOP indicates a loop entry point, which means the
35981cb0ef41Sopenharmony_ci        // loop body will run at least the minimum number of times before the
35991cb0ef41Sopenharmony_ci        // continuation case can run.
36001cb0ef41Sopenharmony_ci        that->set_eats_at_least_info(
36011cb0ef41Sopenharmony_ci            that->on_success()->EatsAtLeastFromLoopEntry());
36021cb0ef41Sopenharmony_ci        break;
36031cb0ef41Sopenharmony_ci      case ActionNode::BEGIN_NEGATIVE_SUBMATCH:
36041cb0ef41Sopenharmony_ci      default:
36051cb0ef41Sopenharmony_ci        // Otherwise, the current node eats at least as much as its successor.
36061cb0ef41Sopenharmony_ci        // Note: we can propagate eats_at_least data for BEGIN_NEGATIVE_SUBMATCH
36071cb0ef41Sopenharmony_ci        // because NegativeLookaroundChoiceNode ignores its lookaround successor
36081cb0ef41Sopenharmony_ci        // when computing eats-at-least and quick check information.
36091cb0ef41Sopenharmony_ci        that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
36101cb0ef41Sopenharmony_ci        break;
36111cb0ef41Sopenharmony_ci    }
36121cb0ef41Sopenharmony_ci  }
36131cb0ef41Sopenharmony_ci
36141cb0ef41Sopenharmony_ci  static void VisitChoice(ChoiceNode* that, int i) {
36151cb0ef41Sopenharmony_ci    // The minimum possible match from a choice node is the minimum of its
36161cb0ef41Sopenharmony_ci    // successors.
36171cb0ef41Sopenharmony_ci    EatsAtLeastInfo eats_at_least =
36181cb0ef41Sopenharmony_ci        i == 0 ? EatsAtLeastInfo(UINT8_MAX) : *that->eats_at_least_info();
36191cb0ef41Sopenharmony_ci    eats_at_least.SetMin(
36201cb0ef41Sopenharmony_ci        *that->alternatives()->at(i).node()->eats_at_least_info());
36211cb0ef41Sopenharmony_ci    that->set_eats_at_least_info(eats_at_least);
36221cb0ef41Sopenharmony_ci  }
36231cb0ef41Sopenharmony_ci
36241cb0ef41Sopenharmony_ci  static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
36251cb0ef41Sopenharmony_ci    if (!that->read_backward()) {
36261cb0ef41Sopenharmony_ci      that->set_eats_at_least_info(
36271cb0ef41Sopenharmony_ci          *that->continue_node()->eats_at_least_info());
36281cb0ef41Sopenharmony_ci    }
36291cb0ef41Sopenharmony_ci  }
36301cb0ef41Sopenharmony_ci
36311cb0ef41Sopenharmony_ci  static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {}
36321cb0ef41Sopenharmony_ci
36331cb0ef41Sopenharmony_ci  static void VisitNegativeLookaroundChoiceLookaroundNode(
36341cb0ef41Sopenharmony_ci      NegativeLookaroundChoiceNode* that) {}
36351cb0ef41Sopenharmony_ci
36361cb0ef41Sopenharmony_ci  static void VisitNegativeLookaroundChoiceContinueNode(
36371cb0ef41Sopenharmony_ci      NegativeLookaroundChoiceNode* that) {
36381cb0ef41Sopenharmony_ci    that->set_eats_at_least_info(*that->continue_node()->eats_at_least_info());
36391cb0ef41Sopenharmony_ci  }
36401cb0ef41Sopenharmony_ci
36411cb0ef41Sopenharmony_ci  static void VisitBackReference(BackReferenceNode* that) {
36421cb0ef41Sopenharmony_ci    if (!that->read_backward()) {
36431cb0ef41Sopenharmony_ci      that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
36441cb0ef41Sopenharmony_ci    }
36451cb0ef41Sopenharmony_ci  }
36461cb0ef41Sopenharmony_ci
36471cb0ef41Sopenharmony_ci  static void VisitAssertion(AssertionNode* that) {
36481cb0ef41Sopenharmony_ci    EatsAtLeastInfo eats_at_least = *that->on_success()->eats_at_least_info();
36491cb0ef41Sopenharmony_ci    if (that->assertion_type() == AssertionNode::AT_START) {
36501cb0ef41Sopenharmony_ci      // If we know we are not at the start and we are asked "how many
36511cb0ef41Sopenharmony_ci      // characters will you match if you succeed?" then we can answer anything
36521cb0ef41Sopenharmony_ci      // since false implies false.  So let's just set the max answer
36531cb0ef41Sopenharmony_ci      // (UINT8_MAX) since that won't prevent us from preloading a lot of
36541cb0ef41Sopenharmony_ci      // characters for the other branches in the node graph.
36551cb0ef41Sopenharmony_ci      eats_at_least.eats_at_least_from_not_start = UINT8_MAX;
36561cb0ef41Sopenharmony_ci    }
36571cb0ef41Sopenharmony_ci    that->set_eats_at_least_info(eats_at_least);
36581cb0ef41Sopenharmony_ci  }
36591cb0ef41Sopenharmony_ci};
36601cb0ef41Sopenharmony_ci
36611cb0ef41Sopenharmony_ci}  // namespace
36621cb0ef41Sopenharmony_ci
36631cb0ef41Sopenharmony_ci// -------------------------------------------------------------------
36641cb0ef41Sopenharmony_ci// Analysis
36651cb0ef41Sopenharmony_ci
36661cb0ef41Sopenharmony_ci// Iterates the node graph and provides the opportunity for propagators to set
36671cb0ef41Sopenharmony_ci// values that depend on successor nodes.
36681cb0ef41Sopenharmony_citemplate <typename... Propagators>
36691cb0ef41Sopenharmony_ciclass Analysis : public NodeVisitor {
36701cb0ef41Sopenharmony_ci public:
36711cb0ef41Sopenharmony_ci  Analysis(Isolate* isolate, bool is_one_byte, RegExpFlags flags)
36721cb0ef41Sopenharmony_ci      : isolate_(isolate),
36731cb0ef41Sopenharmony_ci        is_one_byte_(is_one_byte),
36741cb0ef41Sopenharmony_ci        flags_(flags),
36751cb0ef41Sopenharmony_ci        error_(RegExpError::kNone) {}
36761cb0ef41Sopenharmony_ci
36771cb0ef41Sopenharmony_ci  void EnsureAnalyzed(RegExpNode* that) {
36781cb0ef41Sopenharmony_ci    StackLimitCheck check(isolate());
36791cb0ef41Sopenharmony_ci    if (check.HasOverflowed()) {
36801cb0ef41Sopenharmony_ci      if (FLAG_correctness_fuzzer_suppressions) {
36811cb0ef41Sopenharmony_ci        FATAL("Analysis: Aborting on stack overflow");
36821cb0ef41Sopenharmony_ci      }
36831cb0ef41Sopenharmony_ci      fail(RegExpError::kAnalysisStackOverflow);
36841cb0ef41Sopenharmony_ci      return;
36851cb0ef41Sopenharmony_ci    }
36861cb0ef41Sopenharmony_ci    if (that->info()->been_analyzed || that->info()->being_analyzed) return;
36871cb0ef41Sopenharmony_ci    that->info()->being_analyzed = true;
36881cb0ef41Sopenharmony_ci    that->Accept(this);
36891cb0ef41Sopenharmony_ci    that->info()->being_analyzed = false;
36901cb0ef41Sopenharmony_ci    that->info()->been_analyzed = true;
36911cb0ef41Sopenharmony_ci  }
36921cb0ef41Sopenharmony_ci
36931cb0ef41Sopenharmony_ci  bool has_failed() { return error_ != RegExpError::kNone; }
36941cb0ef41Sopenharmony_ci  RegExpError error() {
36951cb0ef41Sopenharmony_ci    DCHECK(error_ != RegExpError::kNone);
36961cb0ef41Sopenharmony_ci    return error_;
36971cb0ef41Sopenharmony_ci  }
36981cb0ef41Sopenharmony_ci  void fail(RegExpError error) { error_ = error; }
36991cb0ef41Sopenharmony_ci
37001cb0ef41Sopenharmony_ci  Isolate* isolate() const { return isolate_; }
37011cb0ef41Sopenharmony_ci
37021cb0ef41Sopenharmony_ci  void VisitEnd(EndNode* that) override {
37031cb0ef41Sopenharmony_ci    // nothing to do
37041cb0ef41Sopenharmony_ci  }
37051cb0ef41Sopenharmony_ci
37061cb0ef41Sopenharmony_ci// Used to call the given static function on each propagator / variadic template
37071cb0ef41Sopenharmony_ci// argument.
37081cb0ef41Sopenharmony_ci#define STATIC_FOR_EACH(expr)       \
37091cb0ef41Sopenharmony_ci  do {                              \
37101cb0ef41Sopenharmony_ci    int dummy[] = {((expr), 0)...}; \
37111cb0ef41Sopenharmony_ci    USE(dummy);                     \
37121cb0ef41Sopenharmony_ci  } while (false)
37131cb0ef41Sopenharmony_ci
37141cb0ef41Sopenharmony_ci  void VisitText(TextNode* that) override {
37151cb0ef41Sopenharmony_ci    that->MakeCaseIndependent(isolate(), is_one_byte_, flags_);
37161cb0ef41Sopenharmony_ci    EnsureAnalyzed(that->on_success());
37171cb0ef41Sopenharmony_ci    if (has_failed()) return;
37181cb0ef41Sopenharmony_ci    that->CalculateOffsets();
37191cb0ef41Sopenharmony_ci    STATIC_FOR_EACH(Propagators::VisitText(that));
37201cb0ef41Sopenharmony_ci  }
37211cb0ef41Sopenharmony_ci
37221cb0ef41Sopenharmony_ci  void VisitAction(ActionNode* that) override {
37231cb0ef41Sopenharmony_ci    EnsureAnalyzed(that->on_success());
37241cb0ef41Sopenharmony_ci    if (has_failed()) return;
37251cb0ef41Sopenharmony_ci    STATIC_FOR_EACH(Propagators::VisitAction(that));
37261cb0ef41Sopenharmony_ci  }
37271cb0ef41Sopenharmony_ci
37281cb0ef41Sopenharmony_ci  void VisitChoice(ChoiceNode* that) override {
37291cb0ef41Sopenharmony_ci    for (int i = 0; i < that->alternatives()->length(); i++) {
37301cb0ef41Sopenharmony_ci      EnsureAnalyzed(that->alternatives()->at(i).node());
37311cb0ef41Sopenharmony_ci      if (has_failed()) return;
37321cb0ef41Sopenharmony_ci      STATIC_FOR_EACH(Propagators::VisitChoice(that, i));
37331cb0ef41Sopenharmony_ci    }
37341cb0ef41Sopenharmony_ci  }
37351cb0ef41Sopenharmony_ci
37361cb0ef41Sopenharmony_ci  void VisitLoopChoice(LoopChoiceNode* that) override {
37371cb0ef41Sopenharmony_ci    DCHECK_EQ(that->alternatives()->length(), 2);  // Just loop and continue.
37381cb0ef41Sopenharmony_ci
37391cb0ef41Sopenharmony_ci    // First propagate all information from the continuation node.
37401cb0ef41Sopenharmony_ci    EnsureAnalyzed(that->continue_node());
37411cb0ef41Sopenharmony_ci    if (has_failed()) return;
37421cb0ef41Sopenharmony_ci    STATIC_FOR_EACH(Propagators::VisitLoopChoiceContinueNode(that));
37431cb0ef41Sopenharmony_ci
37441cb0ef41Sopenharmony_ci    // Check the loop last since it may need the value of this node
37451cb0ef41Sopenharmony_ci    // to get a correct result.
37461cb0ef41Sopenharmony_ci    EnsureAnalyzed(that->loop_node());
37471cb0ef41Sopenharmony_ci    if (has_failed()) return;
37481cb0ef41Sopenharmony_ci    STATIC_FOR_EACH(Propagators::VisitLoopChoiceLoopNode(that));
37491cb0ef41Sopenharmony_ci  }
37501cb0ef41Sopenharmony_ci
37511cb0ef41Sopenharmony_ci  void VisitNegativeLookaroundChoice(
37521cb0ef41Sopenharmony_ci      NegativeLookaroundChoiceNode* that) override {
37531cb0ef41Sopenharmony_ci    DCHECK_EQ(that->alternatives()->length(), 2);  // Lookaround and continue.
37541cb0ef41Sopenharmony_ci
37551cb0ef41Sopenharmony_ci    EnsureAnalyzed(that->lookaround_node());
37561cb0ef41Sopenharmony_ci    if (has_failed()) return;
37571cb0ef41Sopenharmony_ci    STATIC_FOR_EACH(
37581cb0ef41Sopenharmony_ci        Propagators::VisitNegativeLookaroundChoiceLookaroundNode(that));
37591cb0ef41Sopenharmony_ci
37601cb0ef41Sopenharmony_ci    EnsureAnalyzed(that->continue_node());
37611cb0ef41Sopenharmony_ci    if (has_failed()) return;
37621cb0ef41Sopenharmony_ci    STATIC_FOR_EACH(
37631cb0ef41Sopenharmony_ci        Propagators::VisitNegativeLookaroundChoiceContinueNode(that));
37641cb0ef41Sopenharmony_ci  }
37651cb0ef41Sopenharmony_ci
37661cb0ef41Sopenharmony_ci  void VisitBackReference(BackReferenceNode* that) override {
37671cb0ef41Sopenharmony_ci    EnsureAnalyzed(that->on_success());
37681cb0ef41Sopenharmony_ci    if (has_failed()) return;
37691cb0ef41Sopenharmony_ci    STATIC_FOR_EACH(Propagators::VisitBackReference(that));
37701cb0ef41Sopenharmony_ci  }
37711cb0ef41Sopenharmony_ci
37721cb0ef41Sopenharmony_ci  void VisitAssertion(AssertionNode* that) override {
37731cb0ef41Sopenharmony_ci    EnsureAnalyzed(that->on_success());
37741cb0ef41Sopenharmony_ci    if (has_failed()) return;
37751cb0ef41Sopenharmony_ci    STATIC_FOR_EACH(Propagators::VisitAssertion(that));
37761cb0ef41Sopenharmony_ci  }
37771cb0ef41Sopenharmony_ci
37781cb0ef41Sopenharmony_ci#undef STATIC_FOR_EACH
37791cb0ef41Sopenharmony_ci
37801cb0ef41Sopenharmony_ci private:
37811cb0ef41Sopenharmony_ci  Isolate* isolate_;
37821cb0ef41Sopenharmony_ci  const bool is_one_byte_;
37831cb0ef41Sopenharmony_ci  const RegExpFlags flags_;
37841cb0ef41Sopenharmony_ci  RegExpError error_;
37851cb0ef41Sopenharmony_ci
37861cb0ef41Sopenharmony_ci  DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
37871cb0ef41Sopenharmony_ci};
37881cb0ef41Sopenharmony_ci
37891cb0ef41Sopenharmony_ciRegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpFlags flags,
37901cb0ef41Sopenharmony_ci                          RegExpNode* node) {
37911cb0ef41Sopenharmony_ci  Analysis<AssertionPropagator, EatsAtLeastPropagator> analysis(
37921cb0ef41Sopenharmony_ci      isolate, is_one_byte, flags);
37931cb0ef41Sopenharmony_ci  DCHECK_EQ(node->info()->been_analyzed, false);
37941cb0ef41Sopenharmony_ci  analysis.EnsureAnalyzed(node);
37951cb0ef41Sopenharmony_ci  DCHECK_IMPLIES(analysis.has_failed(), analysis.error() != RegExpError::kNone);
37961cb0ef41Sopenharmony_ci  return analysis.has_failed() ? analysis.error() : RegExpError::kNone;
37971cb0ef41Sopenharmony_ci}
37981cb0ef41Sopenharmony_ci
37991cb0ef41Sopenharmony_civoid BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
38001cb0ef41Sopenharmony_ci                                     BoyerMooreLookahead* bm,
38011cb0ef41Sopenharmony_ci                                     bool not_at_start) {
38021cb0ef41Sopenharmony_ci  // Working out the set of characters that a backreference can match is too
38031cb0ef41Sopenharmony_ci  // hard, so we just say that any character can match.
38041cb0ef41Sopenharmony_ci  bm->SetRest(offset);
38051cb0ef41Sopenharmony_ci  SaveBMInfo(bm, not_at_start, offset);
38061cb0ef41Sopenharmony_ci}
38071cb0ef41Sopenharmony_ci
38081cb0ef41Sopenharmony_ciSTATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
38091cb0ef41Sopenharmony_ci              RegExpMacroAssembler::kTableSize);
38101cb0ef41Sopenharmony_ci
38111cb0ef41Sopenharmony_civoid ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
38121cb0ef41Sopenharmony_ci                              BoyerMooreLookahead* bm, bool not_at_start) {
38131cb0ef41Sopenharmony_ci  ZoneList<GuardedAlternative>* alts = alternatives();
38141cb0ef41Sopenharmony_ci  budget = (budget - 1) / alts->length();
38151cb0ef41Sopenharmony_ci  for (int i = 0; i < alts->length(); i++) {
38161cb0ef41Sopenharmony_ci    GuardedAlternative& alt = alts->at(i);
38171cb0ef41Sopenharmony_ci    if (alt.guards() != nullptr && alt.guards()->length() != 0) {
38181cb0ef41Sopenharmony_ci      bm->SetRest(offset);  // Give up trying to fill in info.
38191cb0ef41Sopenharmony_ci      SaveBMInfo(bm, not_at_start, offset);
38201cb0ef41Sopenharmony_ci      return;
38211cb0ef41Sopenharmony_ci    }
38221cb0ef41Sopenharmony_ci    alt.node()->FillInBMInfo(isolate, offset, budget, bm, not_at_start);
38231cb0ef41Sopenharmony_ci  }
38241cb0ef41Sopenharmony_ci  SaveBMInfo(bm, not_at_start, offset);
38251cb0ef41Sopenharmony_ci}
38261cb0ef41Sopenharmony_ci
38271cb0ef41Sopenharmony_civoid TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
38281cb0ef41Sopenharmony_ci                            BoyerMooreLookahead* bm, bool not_at_start) {
38291cb0ef41Sopenharmony_ci  if (initial_offset >= bm->length()) return;
38301cb0ef41Sopenharmony_ci  int offset = initial_offset;
38311cb0ef41Sopenharmony_ci  int max_char = bm->max_char();
38321cb0ef41Sopenharmony_ci  for (int i = 0; i < elements()->length(); i++) {
38331cb0ef41Sopenharmony_ci    if (offset >= bm->length()) {
38341cb0ef41Sopenharmony_ci      if (initial_offset == 0) set_bm_info(not_at_start, bm);
38351cb0ef41Sopenharmony_ci      return;
38361cb0ef41Sopenharmony_ci    }
38371cb0ef41Sopenharmony_ci    TextElement text = elements()->at(i);
38381cb0ef41Sopenharmony_ci    if (text.text_type() == TextElement::ATOM) {
38391cb0ef41Sopenharmony_ci      RegExpAtom* atom = text.atom();
38401cb0ef41Sopenharmony_ci      for (int j = 0; j < atom->length(); j++, offset++) {
38411cb0ef41Sopenharmony_ci        if (offset >= bm->length()) {
38421cb0ef41Sopenharmony_ci          if (initial_offset == 0) set_bm_info(not_at_start, bm);
38431cb0ef41Sopenharmony_ci          return;
38441cb0ef41Sopenharmony_ci        }
38451cb0ef41Sopenharmony_ci        base::uc16 character = atom->data()[j];
38461cb0ef41Sopenharmony_ci        if (IsIgnoreCase(bm->compiler()->flags())) {
38471cb0ef41Sopenharmony_ci          unibrow::uchar chars[4];
38481cb0ef41Sopenharmony_ci          int length = GetCaseIndependentLetters(
38491cb0ef41Sopenharmony_ci              isolate, character, bm->max_char() == String::kMaxOneByteCharCode,
38501cb0ef41Sopenharmony_ci              chars, 4);
38511cb0ef41Sopenharmony_ci          for (int k = 0; k < length; k++) {
38521cb0ef41Sopenharmony_ci            bm->Set(offset, chars[k]);
38531cb0ef41Sopenharmony_ci          }
38541cb0ef41Sopenharmony_ci        } else {
38551cb0ef41Sopenharmony_ci          if (character <= max_char) bm->Set(offset, character);
38561cb0ef41Sopenharmony_ci        }
38571cb0ef41Sopenharmony_ci      }
38581cb0ef41Sopenharmony_ci    } else {
38591cb0ef41Sopenharmony_ci      DCHECK_EQ(TextElement::CHAR_CLASS, text.text_type());
38601cb0ef41Sopenharmony_ci      RegExpCharacterClass* char_class = text.char_class();
38611cb0ef41Sopenharmony_ci      ZoneList<CharacterRange>* ranges = char_class->ranges(zone());
38621cb0ef41Sopenharmony_ci      if (char_class->is_negated()) {
38631cb0ef41Sopenharmony_ci        bm->SetAll(offset);
38641cb0ef41Sopenharmony_ci      } else {
38651cb0ef41Sopenharmony_ci        for (int k = 0; k < ranges->length(); k++) {
38661cb0ef41Sopenharmony_ci          CharacterRange& range = ranges->at(k);
38671cb0ef41Sopenharmony_ci          if (static_cast<int>(range.from()) > max_char) continue;
38681cb0ef41Sopenharmony_ci          int to = std::min(max_char, static_cast<int>(range.to()));
38691cb0ef41Sopenharmony_ci          bm->SetInterval(offset, Interval(range.from(), to));
38701cb0ef41Sopenharmony_ci        }
38711cb0ef41Sopenharmony_ci      }
38721cb0ef41Sopenharmony_ci      offset++;
38731cb0ef41Sopenharmony_ci    }
38741cb0ef41Sopenharmony_ci  }
38751cb0ef41Sopenharmony_ci  if (offset >= bm->length()) {
38761cb0ef41Sopenharmony_ci    if (initial_offset == 0) set_bm_info(not_at_start, bm);
38771cb0ef41Sopenharmony_ci    return;
38781cb0ef41Sopenharmony_ci  }
38791cb0ef41Sopenharmony_ci  on_success()->FillInBMInfo(isolate, offset, budget - 1, bm,
38801cb0ef41Sopenharmony_ci                             true);  // Not at start after a text node.
38811cb0ef41Sopenharmony_ci  if (initial_offset == 0) set_bm_info(not_at_start, bm);
38821cb0ef41Sopenharmony_ci}
38831cb0ef41Sopenharmony_ci
38841cb0ef41Sopenharmony_ciRegExpNode* RegExpCompiler::OptionallyStepBackToLeadSurrogate(
38851cb0ef41Sopenharmony_ci    RegExpNode* on_success) {
38861cb0ef41Sopenharmony_ci  DCHECK(!read_backward());
38871cb0ef41Sopenharmony_ci  ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
38881cb0ef41Sopenharmony_ci      zone(), CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
38891cb0ef41Sopenharmony_ci  ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
38901cb0ef41Sopenharmony_ci      zone(), CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
38911cb0ef41Sopenharmony_ci
38921cb0ef41Sopenharmony_ci  ChoiceNode* optional_step_back = zone()->New<ChoiceNode>(2, zone());
38931cb0ef41Sopenharmony_ci
38941cb0ef41Sopenharmony_ci  int stack_register = UnicodeLookaroundStackRegister();
38951cb0ef41Sopenharmony_ci  int position_register = UnicodeLookaroundPositionRegister();
38961cb0ef41Sopenharmony_ci  RegExpNode* step_back = TextNode::CreateForCharacterRanges(
38971cb0ef41Sopenharmony_ci      zone(), lead_surrogates, true, on_success);
38981cb0ef41Sopenharmony_ci  RegExpLookaround::Builder builder(true, step_back, stack_register,
38991cb0ef41Sopenharmony_ci                                    position_register);
39001cb0ef41Sopenharmony_ci  RegExpNode* match_trail = TextNode::CreateForCharacterRanges(
39011cb0ef41Sopenharmony_ci      zone(), trail_surrogates, false, builder.on_match_success());
39021cb0ef41Sopenharmony_ci
39031cb0ef41Sopenharmony_ci  optional_step_back->AddAlternative(
39041cb0ef41Sopenharmony_ci      GuardedAlternative(builder.ForMatch(match_trail)));
39051cb0ef41Sopenharmony_ci  optional_step_back->AddAlternative(GuardedAlternative(on_success));
39061cb0ef41Sopenharmony_ci
39071cb0ef41Sopenharmony_ci  return optional_step_back;
39081cb0ef41Sopenharmony_ci}
39091cb0ef41Sopenharmony_ci
39101cb0ef41Sopenharmony_ciRegExpNode* RegExpCompiler::PreprocessRegExp(RegExpCompileData* data,
39111cb0ef41Sopenharmony_ci                                             RegExpFlags flags,
39121cb0ef41Sopenharmony_ci                                             bool is_one_byte) {
39131cb0ef41Sopenharmony_ci  // Wrap the body of the regexp in capture #0.
39141cb0ef41Sopenharmony_ci  RegExpNode* captured_body =
39151cb0ef41Sopenharmony_ci      RegExpCapture::ToNode(data->tree, 0, this, accept());
39161cb0ef41Sopenharmony_ci  RegExpNode* node = captured_body;
39171cb0ef41Sopenharmony_ci  if (!data->tree->IsAnchoredAtStart() && !IsSticky(flags)) {
39181cb0ef41Sopenharmony_ci    // Add a .*? at the beginning, outside the body capture, unless
39191cb0ef41Sopenharmony_ci    // this expression is anchored at the beginning or sticky.
39201cb0ef41Sopenharmony_ci    RegExpNode* loop_node = RegExpQuantifier::ToNode(
39211cb0ef41Sopenharmony_ci        0, RegExpTree::kInfinity, false,
39221cb0ef41Sopenharmony_ci        zone()->New<RegExpCharacterClass>(StandardCharacterSet::kEverything),
39231cb0ef41Sopenharmony_ci        this, captured_body, data->contains_anchor);
39241cb0ef41Sopenharmony_ci
39251cb0ef41Sopenharmony_ci    if (data->contains_anchor) {
39261cb0ef41Sopenharmony_ci      // Unroll loop once, to take care of the case that might start
39271cb0ef41Sopenharmony_ci      // at the start of input.
39281cb0ef41Sopenharmony_ci      ChoiceNode* first_step_node = zone()->New<ChoiceNode>(2, zone());
39291cb0ef41Sopenharmony_ci      first_step_node->AddAlternative(GuardedAlternative(captured_body));
39301cb0ef41Sopenharmony_ci      first_step_node->AddAlternative(GuardedAlternative(zone()->New<TextNode>(
39311cb0ef41Sopenharmony_ci          zone()->New<RegExpCharacterClass>(StandardCharacterSet::kEverything),
39321cb0ef41Sopenharmony_ci          false, loop_node)));
39331cb0ef41Sopenharmony_ci      node = first_step_node;
39341cb0ef41Sopenharmony_ci    } else {
39351cb0ef41Sopenharmony_ci      node = loop_node;
39361cb0ef41Sopenharmony_ci    }
39371cb0ef41Sopenharmony_ci  }
39381cb0ef41Sopenharmony_ci  if (is_one_byte) {
39391cb0ef41Sopenharmony_ci    node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags);
39401cb0ef41Sopenharmony_ci    // Do it again to propagate the new nodes to places where they were not
39411cb0ef41Sopenharmony_ci    // put because they had not been calculated yet.
39421cb0ef41Sopenharmony_ci    if (node != nullptr) {
39431cb0ef41Sopenharmony_ci      node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags);
39441cb0ef41Sopenharmony_ci    }
39451cb0ef41Sopenharmony_ci  } else if (IsUnicode(flags) && (IsGlobal(flags) || IsSticky(flags))) {
39461cb0ef41Sopenharmony_ci    node = OptionallyStepBackToLeadSurrogate(node);
39471cb0ef41Sopenharmony_ci  }
39481cb0ef41Sopenharmony_ci
39491cb0ef41Sopenharmony_ci  if (node == nullptr) node = zone()->New<EndNode>(EndNode::BACKTRACK, zone());
39501cb0ef41Sopenharmony_ci  return node;
39511cb0ef41Sopenharmony_ci}
39521cb0ef41Sopenharmony_ci
39531cb0ef41Sopenharmony_civoid RegExpCompiler::ToNodeCheckForStackOverflow() {
39541cb0ef41Sopenharmony_ci  if (StackLimitCheck{isolate()}.HasOverflowed()) {
39551cb0ef41Sopenharmony_ci    FatalProcessOutOfMemory(isolate(), "RegExpCompiler");
39561cb0ef41Sopenharmony_ci  }
39571cb0ef41Sopenharmony_ci}
39581cb0ef41Sopenharmony_ci
39591cb0ef41Sopenharmony_ci}  // namespace internal
39601cb0ef41Sopenharmony_ci}  // namespace v8
3961