11cb0ef41Sopenharmony_ci// Copyright 2012 the V8 project authors. All rights reserved.
21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be
31cb0ef41Sopenharmony_ci// found in the LICENSE file.
41cb0ef41Sopenharmony_ci
51cb0ef41Sopenharmony_ci#ifndef V8_REGEXP_REGEXP_H_
61cb0ef41Sopenharmony_ci#define V8_REGEXP_REGEXP_H_
71cb0ef41Sopenharmony_ci
81cb0ef41Sopenharmony_ci#include "src/common/assert-scope.h"
91cb0ef41Sopenharmony_ci#include "src/handles/handles.h"
101cb0ef41Sopenharmony_ci#include "src/regexp/regexp-error.h"
111cb0ef41Sopenharmony_ci#include "src/regexp/regexp-flags.h"
121cb0ef41Sopenharmony_ci#include "src/zone/zone-containers.h"
131cb0ef41Sopenharmony_ci
141cb0ef41Sopenharmony_cinamespace v8 {
151cb0ef41Sopenharmony_cinamespace internal {
161cb0ef41Sopenharmony_ci
171cb0ef41Sopenharmony_ciclass JSRegExp;
181cb0ef41Sopenharmony_ciclass RegExpCapture;
191cb0ef41Sopenharmony_ciclass RegExpMatchInfo;
201cb0ef41Sopenharmony_ciclass RegExpNode;
211cb0ef41Sopenharmony_ciclass RegExpTree;
221cb0ef41Sopenharmony_ci
231cb0ef41Sopenharmony_cienum class RegExpCompilationTarget : int { kBytecode, kNative };
241cb0ef41Sopenharmony_ci
251cb0ef41Sopenharmony_ci// TODO(jgruber): Do not expose in regexp.h.
261cb0ef41Sopenharmony_ci// TODO(jgruber): Consider splitting between ParseData and CompileData.
271cb0ef41Sopenharmony_cistruct RegExpCompileData {
281cb0ef41Sopenharmony_ci  // The parsed AST as produced by the RegExpParser.
291cb0ef41Sopenharmony_ci  RegExpTree* tree = nullptr;
301cb0ef41Sopenharmony_ci
311cb0ef41Sopenharmony_ci  // The compiled Node graph as produced by RegExpTree::ToNode methods.
321cb0ef41Sopenharmony_ci  RegExpNode* node = nullptr;
331cb0ef41Sopenharmony_ci
341cb0ef41Sopenharmony_ci  // Either the generated code as produced by the compiler or a trampoline
351cb0ef41Sopenharmony_ci  // to the interpreter.
361cb0ef41Sopenharmony_ci  Handle<Object> code;
371cb0ef41Sopenharmony_ci
381cb0ef41Sopenharmony_ci  // True, iff the pattern is a 'simple' atom with zero captures. In other
391cb0ef41Sopenharmony_ci  // words, the pattern consists of a string with no metacharacters and special
401cb0ef41Sopenharmony_ci  // regexp features, and can be implemented as a standard string search.
411cb0ef41Sopenharmony_ci  bool simple = true;
421cb0ef41Sopenharmony_ci
431cb0ef41Sopenharmony_ci  // True, iff the pattern is anchored at the start of the string with '^'.
441cb0ef41Sopenharmony_ci  bool contains_anchor = false;
451cb0ef41Sopenharmony_ci
461cb0ef41Sopenharmony_ci  // Only set if the pattern contains named captures.
471cb0ef41Sopenharmony_ci  // Note: the lifetime equals that of the parse/compile zone.
481cb0ef41Sopenharmony_ci  ZoneVector<RegExpCapture*>* named_captures = nullptr;
491cb0ef41Sopenharmony_ci
501cb0ef41Sopenharmony_ci  // The error message. Only used if an error occurred during parsing or
511cb0ef41Sopenharmony_ci  // compilation.
521cb0ef41Sopenharmony_ci  RegExpError error = RegExpError::kNone;
531cb0ef41Sopenharmony_ci
541cb0ef41Sopenharmony_ci  // The position at which the error was detected. Only used if an
551cb0ef41Sopenharmony_ci  // error occurred.
561cb0ef41Sopenharmony_ci  int error_pos = 0;
571cb0ef41Sopenharmony_ci
581cb0ef41Sopenharmony_ci  // The number of capture groups, without the global capture \0.
591cb0ef41Sopenharmony_ci  int capture_count = 0;
601cb0ef41Sopenharmony_ci
611cb0ef41Sopenharmony_ci  // The number of registers used by the generated code.
621cb0ef41Sopenharmony_ci  int register_count = 0;
631cb0ef41Sopenharmony_ci
641cb0ef41Sopenharmony_ci  // The compilation target (bytecode or native code).
651cb0ef41Sopenharmony_ci  RegExpCompilationTarget compilation_target;
661cb0ef41Sopenharmony_ci};
671cb0ef41Sopenharmony_ci
681cb0ef41Sopenharmony_ciclass RegExp final : public AllStatic {
691cb0ef41Sopenharmony_ci public:
701cb0ef41Sopenharmony_ci  // Whether the irregexp engine generates interpreter bytecode.
711cb0ef41Sopenharmony_ci  static bool CanGenerateBytecode();
721cb0ef41Sopenharmony_ci
731cb0ef41Sopenharmony_ci  // Verify the given pattern, i.e. check that parsing succeeds. If
741cb0ef41Sopenharmony_ci  // verification fails, `regexp_error_out` is set.
751cb0ef41Sopenharmony_ci  template <class CharT>
761cb0ef41Sopenharmony_ci  static bool VerifySyntax(Zone* zone, uintptr_t stack_limit,
771cb0ef41Sopenharmony_ci                           const CharT* input, int input_length,
781cb0ef41Sopenharmony_ci                           RegExpFlags flags, RegExpError* regexp_error_out,
791cb0ef41Sopenharmony_ci                           const DisallowGarbageCollection& no_gc);
801cb0ef41Sopenharmony_ci
811cb0ef41Sopenharmony_ci  // Parses the RegExp pattern and prepares the JSRegExp object with
821cb0ef41Sopenharmony_ci  // generic data and choice of implementation - as well as what
831cb0ef41Sopenharmony_ci  // the implementation wants to store in the data field.
841cb0ef41Sopenharmony_ci  // Returns false if compilation fails.
851cb0ef41Sopenharmony_ci  V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Compile(
861cb0ef41Sopenharmony_ci      Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
871cb0ef41Sopenharmony_ci      RegExpFlags flags, uint32_t backtrack_limit);
881cb0ef41Sopenharmony_ci
891cb0ef41Sopenharmony_ci  // Ensures that a regexp is fully compiled and ready to be executed on a
901cb0ef41Sopenharmony_ci  // subject string.  Returns true on success. Return false on failure, and
911cb0ef41Sopenharmony_ci  // then an exception will be pending.
921cb0ef41Sopenharmony_ci  V8_WARN_UNUSED_RESULT static bool EnsureFullyCompiled(Isolate* isolate,
931cb0ef41Sopenharmony_ci                                                        Handle<JSRegExp> re,
941cb0ef41Sopenharmony_ci                                                        Handle<String> subject);
951cb0ef41Sopenharmony_ci
961cb0ef41Sopenharmony_ci  enum CallOrigin : int {
971cb0ef41Sopenharmony_ci    kFromRuntime = 0,
981cb0ef41Sopenharmony_ci    kFromJs = 1,
991cb0ef41Sopenharmony_ci  };
1001cb0ef41Sopenharmony_ci
1011cb0ef41Sopenharmony_ci  enum class ExecQuirks {
1021cb0ef41Sopenharmony_ci    kNone,
1031cb0ef41Sopenharmony_ci    // Used to work around an issue in the RegExpPrototypeSplit fast path,
1041cb0ef41Sopenharmony_ci    // which diverges from the spec by not creating a sticky copy of the RegExp
1051cb0ef41Sopenharmony_ci    // instance and calling `exec` in a loop. If called in this context, we
1061cb0ef41Sopenharmony_ci    // must not update the last_match_info on a successful match at the subject
1071cb0ef41Sopenharmony_ci    // string end. See crbug.com/1075514 for more information.
1081cb0ef41Sopenharmony_ci    kTreatMatchAtEndAsFailure,
1091cb0ef41Sopenharmony_ci  };
1101cb0ef41Sopenharmony_ci
1111cb0ef41Sopenharmony_ci  // See ECMA-262 section 15.10.6.2.
1121cb0ef41Sopenharmony_ci  // This function calls the garbage collector if necessary.
1131cb0ef41Sopenharmony_ci  V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec(
1141cb0ef41Sopenharmony_ci      Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
1151cb0ef41Sopenharmony_ci      int index, Handle<RegExpMatchInfo> last_match_info,
1161cb0ef41Sopenharmony_ci      ExecQuirks exec_quirks = ExecQuirks::kNone);
1171cb0ef41Sopenharmony_ci
1181cb0ef41Sopenharmony_ci  V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object>
1191cb0ef41Sopenharmony_ci  ExperimentalOneshotExec(Isolate* isolate, Handle<JSRegExp> regexp,
1201cb0ef41Sopenharmony_ci                          Handle<String> subject, int index,
1211cb0ef41Sopenharmony_ci                          Handle<RegExpMatchInfo> last_match_info,
1221cb0ef41Sopenharmony_ci                          ExecQuirks exec_quirks = ExecQuirks::kNone);
1231cb0ef41Sopenharmony_ci
1241cb0ef41Sopenharmony_ci  // Integral return values used throughout regexp code layers.
1251cb0ef41Sopenharmony_ci  static constexpr int kInternalRegExpFailure = 0;
1261cb0ef41Sopenharmony_ci  static constexpr int kInternalRegExpSuccess = 1;
1271cb0ef41Sopenharmony_ci  static constexpr int kInternalRegExpException = -1;
1281cb0ef41Sopenharmony_ci  static constexpr int kInternalRegExpRetry = -2;
1291cb0ef41Sopenharmony_ci  static constexpr int kInternalRegExpFallbackToExperimental = -3;
1301cb0ef41Sopenharmony_ci  static constexpr int kInternalRegExpSmallestResult = -3;
1311cb0ef41Sopenharmony_ci
1321cb0ef41Sopenharmony_ci  enum IrregexpResult : int32_t {
1331cb0ef41Sopenharmony_ci    RE_FAILURE = kInternalRegExpFailure,
1341cb0ef41Sopenharmony_ci    RE_SUCCESS = kInternalRegExpSuccess,
1351cb0ef41Sopenharmony_ci    RE_EXCEPTION = kInternalRegExpException,
1361cb0ef41Sopenharmony_ci    RE_RETRY = kInternalRegExpRetry,
1371cb0ef41Sopenharmony_ci    RE_FALLBACK_TO_EXPERIMENTAL = kInternalRegExpFallbackToExperimental,
1381cb0ef41Sopenharmony_ci  };
1391cb0ef41Sopenharmony_ci
1401cb0ef41Sopenharmony_ci  // Set last match info.  If match is nullptr, then setting captures is
1411cb0ef41Sopenharmony_ci  // omitted.
1421cb0ef41Sopenharmony_ci  static Handle<RegExpMatchInfo> SetLastMatchInfo(
1431cb0ef41Sopenharmony_ci      Isolate* isolate, Handle<RegExpMatchInfo> last_match_info,
1441cb0ef41Sopenharmony_ci      Handle<String> subject, int capture_count, int32_t* match);
1451cb0ef41Sopenharmony_ci
1461cb0ef41Sopenharmony_ci  V8_EXPORT_PRIVATE static bool CompileForTesting(
1471cb0ef41Sopenharmony_ci      Isolate* isolate, Zone* zone, RegExpCompileData* input, RegExpFlags flags,
1481cb0ef41Sopenharmony_ci      Handle<String> pattern, Handle<String> sample_subject, bool is_one_byte);
1491cb0ef41Sopenharmony_ci
1501cb0ef41Sopenharmony_ci  V8_EXPORT_PRIVATE static void DotPrintForTesting(const char* label,
1511cb0ef41Sopenharmony_ci                                                   RegExpNode* node);
1521cb0ef41Sopenharmony_ci
1531cb0ef41Sopenharmony_ci  static const int kRegExpTooLargeToOptimize = 20 * KB;
1541cb0ef41Sopenharmony_ci
1551cb0ef41Sopenharmony_ci  V8_WARN_UNUSED_RESULT
1561cb0ef41Sopenharmony_ci  static MaybeHandle<Object> ThrowRegExpException(Isolate* isolate,
1571cb0ef41Sopenharmony_ci                                                  Handle<JSRegExp> re,
1581cb0ef41Sopenharmony_ci                                                  Handle<String> pattern,
1591cb0ef41Sopenharmony_ci                                                  RegExpError error);
1601cb0ef41Sopenharmony_ci  static void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
1611cb0ef41Sopenharmony_ci                                   RegExpError error_text);
1621cb0ef41Sopenharmony_ci
1631cb0ef41Sopenharmony_ci  static bool IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp);
1641cb0ef41Sopenharmony_ci
1651cb0ef41Sopenharmony_ci  static Handle<FixedArray> CreateCaptureNameMap(
1661cb0ef41Sopenharmony_ci      Isolate* isolate, ZoneVector<RegExpCapture*>* named_captures);
1671cb0ef41Sopenharmony_ci};
1681cb0ef41Sopenharmony_ci
1691cb0ef41Sopenharmony_ci// Uses a special global mode of irregexp-generated code to perform a global
1701cb0ef41Sopenharmony_ci// search and return multiple results at once. As such, this is essentially an
1711cb0ef41Sopenharmony_ci// iterator over multiple results (retrieved batch-wise in advance).
1721cb0ef41Sopenharmony_ciclass RegExpGlobalCache final {
1731cb0ef41Sopenharmony_ci public:
1741cb0ef41Sopenharmony_ci  RegExpGlobalCache(Handle<JSRegExp> regexp, Handle<String> subject,
1751cb0ef41Sopenharmony_ci                    Isolate* isolate);
1761cb0ef41Sopenharmony_ci
1771cb0ef41Sopenharmony_ci  ~RegExpGlobalCache();
1781cb0ef41Sopenharmony_ci
1791cb0ef41Sopenharmony_ci  // Fetch the next entry in the cache for global regexp match results.
1801cb0ef41Sopenharmony_ci  // This does not set the last match info.  Upon failure, nullptr is
1811cb0ef41Sopenharmony_ci  // returned. The cause can be checked with Result().  The previous result is
1821cb0ef41Sopenharmony_ci  // still in available in memory when a failure happens.
1831cb0ef41Sopenharmony_ci  int32_t* FetchNext();
1841cb0ef41Sopenharmony_ci
1851cb0ef41Sopenharmony_ci  int32_t* LastSuccessfulMatch();
1861cb0ef41Sopenharmony_ci
1871cb0ef41Sopenharmony_ci  bool HasException() { return num_matches_ < 0; }
1881cb0ef41Sopenharmony_ci
1891cb0ef41Sopenharmony_ci private:
1901cb0ef41Sopenharmony_ci  int AdvanceZeroLength(int last_index);
1911cb0ef41Sopenharmony_ci
1921cb0ef41Sopenharmony_ci  int num_matches_;
1931cb0ef41Sopenharmony_ci  int max_matches_;
1941cb0ef41Sopenharmony_ci  int current_match_index_;
1951cb0ef41Sopenharmony_ci  int registers_per_match_;
1961cb0ef41Sopenharmony_ci  // Pointer to the last set of captures.
1971cb0ef41Sopenharmony_ci  int32_t* register_array_;
1981cb0ef41Sopenharmony_ci  int register_array_size_;
1991cb0ef41Sopenharmony_ci  Handle<JSRegExp> regexp_;
2001cb0ef41Sopenharmony_ci  Handle<String> subject_;
2011cb0ef41Sopenharmony_ci  Isolate* isolate_;
2021cb0ef41Sopenharmony_ci};
2031cb0ef41Sopenharmony_ci
2041cb0ef41Sopenharmony_ci// Caches results for specific regexp queries on the isolate. At the time of
2051cb0ef41Sopenharmony_ci// writing, this is used during global calls to RegExp.prototype.exec and
2061cb0ef41Sopenharmony_ci// @@split.
2071cb0ef41Sopenharmony_ciclass RegExpResultsCache final : public AllStatic {
2081cb0ef41Sopenharmony_ci public:
2091cb0ef41Sopenharmony_ci  enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS };
2101cb0ef41Sopenharmony_ci
2111cb0ef41Sopenharmony_ci  // Attempt to retrieve a cached result.  On failure, 0 is returned as a Smi.
2121cb0ef41Sopenharmony_ci  // On success, the returned result is guaranteed to be a COW-array.
2131cb0ef41Sopenharmony_ci  static Object Lookup(Heap* heap, String key_string, Object key_pattern,
2141cb0ef41Sopenharmony_ci                       FixedArray* last_match_out, ResultsCacheType type);
2151cb0ef41Sopenharmony_ci  // Attempt to add value_array to the cache specified by type.  On success,
2161cb0ef41Sopenharmony_ci  // value_array is turned into a COW-array.
2171cb0ef41Sopenharmony_ci  static void Enter(Isolate* isolate, Handle<String> key_string,
2181cb0ef41Sopenharmony_ci                    Handle<Object> key_pattern, Handle<FixedArray> value_array,
2191cb0ef41Sopenharmony_ci                    Handle<FixedArray> last_match_cache, ResultsCacheType type);
2201cb0ef41Sopenharmony_ci  static void Clear(FixedArray cache);
2211cb0ef41Sopenharmony_ci
2221cb0ef41Sopenharmony_ci  static constexpr int kRegExpResultsCacheSize = 0x100;
2231cb0ef41Sopenharmony_ci
2241cb0ef41Sopenharmony_ci private:
2251cb0ef41Sopenharmony_ci  static constexpr int kStringOffset = 0;
2261cb0ef41Sopenharmony_ci  static constexpr int kPatternOffset = 1;
2271cb0ef41Sopenharmony_ci  static constexpr int kArrayOffset = 2;
2281cb0ef41Sopenharmony_ci  static constexpr int kLastMatchOffset = 3;
2291cb0ef41Sopenharmony_ci  static constexpr int kArrayEntriesPerCacheEntry = 4;
2301cb0ef41Sopenharmony_ci};
2311cb0ef41Sopenharmony_ci
2321cb0ef41Sopenharmony_ci}  // namespace internal
2331cb0ef41Sopenharmony_ci}  // namespace v8
2341cb0ef41Sopenharmony_ci
2351cb0ef41Sopenharmony_ci#endif  // V8_REGEXP_REGEXP_H_
236