11cb0ef41Sopenharmony_ci// Copyright 2012 the V8 project authors. All rights reserved. 21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be 31cb0ef41Sopenharmony_ci// found in the LICENSE file. 41cb0ef41Sopenharmony_ci 51cb0ef41Sopenharmony_ci#ifndef V8_REGEXP_REGEXP_H_ 61cb0ef41Sopenharmony_ci#define V8_REGEXP_REGEXP_H_ 71cb0ef41Sopenharmony_ci 81cb0ef41Sopenharmony_ci#include "src/common/assert-scope.h" 91cb0ef41Sopenharmony_ci#include "src/handles/handles.h" 101cb0ef41Sopenharmony_ci#include "src/regexp/regexp-error.h" 111cb0ef41Sopenharmony_ci#include "src/regexp/regexp-flags.h" 121cb0ef41Sopenharmony_ci#include "src/zone/zone-containers.h" 131cb0ef41Sopenharmony_ci 141cb0ef41Sopenharmony_cinamespace v8 { 151cb0ef41Sopenharmony_cinamespace internal { 161cb0ef41Sopenharmony_ci 171cb0ef41Sopenharmony_ciclass JSRegExp; 181cb0ef41Sopenharmony_ciclass RegExpCapture; 191cb0ef41Sopenharmony_ciclass RegExpMatchInfo; 201cb0ef41Sopenharmony_ciclass RegExpNode; 211cb0ef41Sopenharmony_ciclass RegExpTree; 221cb0ef41Sopenharmony_ci 231cb0ef41Sopenharmony_cienum class RegExpCompilationTarget : int { kBytecode, kNative }; 241cb0ef41Sopenharmony_ci 251cb0ef41Sopenharmony_ci// TODO(jgruber): Do not expose in regexp.h. 261cb0ef41Sopenharmony_ci// TODO(jgruber): Consider splitting between ParseData and CompileData. 271cb0ef41Sopenharmony_cistruct RegExpCompileData { 281cb0ef41Sopenharmony_ci // The parsed AST as produced by the RegExpParser. 291cb0ef41Sopenharmony_ci RegExpTree* tree = nullptr; 301cb0ef41Sopenharmony_ci 311cb0ef41Sopenharmony_ci // The compiled Node graph as produced by RegExpTree::ToNode methods. 321cb0ef41Sopenharmony_ci RegExpNode* node = nullptr; 331cb0ef41Sopenharmony_ci 341cb0ef41Sopenharmony_ci // Either the generated code as produced by the compiler or a trampoline 351cb0ef41Sopenharmony_ci // to the interpreter. 361cb0ef41Sopenharmony_ci Handle<Object> code; 371cb0ef41Sopenharmony_ci 381cb0ef41Sopenharmony_ci // True, iff the pattern is a 'simple' atom with zero captures. In other 391cb0ef41Sopenharmony_ci // words, the pattern consists of a string with no metacharacters and special 401cb0ef41Sopenharmony_ci // regexp features, and can be implemented as a standard string search. 411cb0ef41Sopenharmony_ci bool simple = true; 421cb0ef41Sopenharmony_ci 431cb0ef41Sopenharmony_ci // True, iff the pattern is anchored at the start of the string with '^'. 441cb0ef41Sopenharmony_ci bool contains_anchor = false; 451cb0ef41Sopenharmony_ci 461cb0ef41Sopenharmony_ci // Only set if the pattern contains named captures. 471cb0ef41Sopenharmony_ci // Note: the lifetime equals that of the parse/compile zone. 481cb0ef41Sopenharmony_ci ZoneVector<RegExpCapture*>* named_captures = nullptr; 491cb0ef41Sopenharmony_ci 501cb0ef41Sopenharmony_ci // The error message. Only used if an error occurred during parsing or 511cb0ef41Sopenharmony_ci // compilation. 521cb0ef41Sopenharmony_ci RegExpError error = RegExpError::kNone; 531cb0ef41Sopenharmony_ci 541cb0ef41Sopenharmony_ci // The position at which the error was detected. Only used if an 551cb0ef41Sopenharmony_ci // error occurred. 561cb0ef41Sopenharmony_ci int error_pos = 0; 571cb0ef41Sopenharmony_ci 581cb0ef41Sopenharmony_ci // The number of capture groups, without the global capture \0. 591cb0ef41Sopenharmony_ci int capture_count = 0; 601cb0ef41Sopenharmony_ci 611cb0ef41Sopenharmony_ci // The number of registers used by the generated code. 621cb0ef41Sopenharmony_ci int register_count = 0; 631cb0ef41Sopenharmony_ci 641cb0ef41Sopenharmony_ci // The compilation target (bytecode or native code). 651cb0ef41Sopenharmony_ci RegExpCompilationTarget compilation_target; 661cb0ef41Sopenharmony_ci}; 671cb0ef41Sopenharmony_ci 681cb0ef41Sopenharmony_ciclass RegExp final : public AllStatic { 691cb0ef41Sopenharmony_ci public: 701cb0ef41Sopenharmony_ci // Whether the irregexp engine generates interpreter bytecode. 711cb0ef41Sopenharmony_ci static bool CanGenerateBytecode(); 721cb0ef41Sopenharmony_ci 731cb0ef41Sopenharmony_ci // Verify the given pattern, i.e. check that parsing succeeds. If 741cb0ef41Sopenharmony_ci // verification fails, `regexp_error_out` is set. 751cb0ef41Sopenharmony_ci template <class CharT> 761cb0ef41Sopenharmony_ci static bool VerifySyntax(Zone* zone, uintptr_t stack_limit, 771cb0ef41Sopenharmony_ci const CharT* input, int input_length, 781cb0ef41Sopenharmony_ci RegExpFlags flags, RegExpError* regexp_error_out, 791cb0ef41Sopenharmony_ci const DisallowGarbageCollection& no_gc); 801cb0ef41Sopenharmony_ci 811cb0ef41Sopenharmony_ci // Parses the RegExp pattern and prepares the JSRegExp object with 821cb0ef41Sopenharmony_ci // generic data and choice of implementation - as well as what 831cb0ef41Sopenharmony_ci // the implementation wants to store in the data field. 841cb0ef41Sopenharmony_ci // Returns false if compilation fails. 851cb0ef41Sopenharmony_ci V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Compile( 861cb0ef41Sopenharmony_ci Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern, 871cb0ef41Sopenharmony_ci RegExpFlags flags, uint32_t backtrack_limit); 881cb0ef41Sopenharmony_ci 891cb0ef41Sopenharmony_ci // Ensures that a regexp is fully compiled and ready to be executed on a 901cb0ef41Sopenharmony_ci // subject string. Returns true on success. Return false on failure, and 911cb0ef41Sopenharmony_ci // then an exception will be pending. 921cb0ef41Sopenharmony_ci V8_WARN_UNUSED_RESULT static bool EnsureFullyCompiled(Isolate* isolate, 931cb0ef41Sopenharmony_ci Handle<JSRegExp> re, 941cb0ef41Sopenharmony_ci Handle<String> subject); 951cb0ef41Sopenharmony_ci 961cb0ef41Sopenharmony_ci enum CallOrigin : int { 971cb0ef41Sopenharmony_ci kFromRuntime = 0, 981cb0ef41Sopenharmony_ci kFromJs = 1, 991cb0ef41Sopenharmony_ci }; 1001cb0ef41Sopenharmony_ci 1011cb0ef41Sopenharmony_ci enum class ExecQuirks { 1021cb0ef41Sopenharmony_ci kNone, 1031cb0ef41Sopenharmony_ci // Used to work around an issue in the RegExpPrototypeSplit fast path, 1041cb0ef41Sopenharmony_ci // which diverges from the spec by not creating a sticky copy of the RegExp 1051cb0ef41Sopenharmony_ci // instance and calling `exec` in a loop. If called in this context, we 1061cb0ef41Sopenharmony_ci // must not update the last_match_info on a successful match at the subject 1071cb0ef41Sopenharmony_ci // string end. See crbug.com/1075514 for more information. 1081cb0ef41Sopenharmony_ci kTreatMatchAtEndAsFailure, 1091cb0ef41Sopenharmony_ci }; 1101cb0ef41Sopenharmony_ci 1111cb0ef41Sopenharmony_ci // See ECMA-262 section 15.10.6.2. 1121cb0ef41Sopenharmony_ci // This function calls the garbage collector if necessary. 1131cb0ef41Sopenharmony_ci V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec( 1141cb0ef41Sopenharmony_ci Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, 1151cb0ef41Sopenharmony_ci int index, Handle<RegExpMatchInfo> last_match_info, 1161cb0ef41Sopenharmony_ci ExecQuirks exec_quirks = ExecQuirks::kNone); 1171cb0ef41Sopenharmony_ci 1181cb0ef41Sopenharmony_ci V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> 1191cb0ef41Sopenharmony_ci ExperimentalOneshotExec(Isolate* isolate, Handle<JSRegExp> regexp, 1201cb0ef41Sopenharmony_ci Handle<String> subject, int index, 1211cb0ef41Sopenharmony_ci Handle<RegExpMatchInfo> last_match_info, 1221cb0ef41Sopenharmony_ci ExecQuirks exec_quirks = ExecQuirks::kNone); 1231cb0ef41Sopenharmony_ci 1241cb0ef41Sopenharmony_ci // Integral return values used throughout regexp code layers. 1251cb0ef41Sopenharmony_ci static constexpr int kInternalRegExpFailure = 0; 1261cb0ef41Sopenharmony_ci static constexpr int kInternalRegExpSuccess = 1; 1271cb0ef41Sopenharmony_ci static constexpr int kInternalRegExpException = -1; 1281cb0ef41Sopenharmony_ci static constexpr int kInternalRegExpRetry = -2; 1291cb0ef41Sopenharmony_ci static constexpr int kInternalRegExpFallbackToExperimental = -3; 1301cb0ef41Sopenharmony_ci static constexpr int kInternalRegExpSmallestResult = -3; 1311cb0ef41Sopenharmony_ci 1321cb0ef41Sopenharmony_ci enum IrregexpResult : int32_t { 1331cb0ef41Sopenharmony_ci RE_FAILURE = kInternalRegExpFailure, 1341cb0ef41Sopenharmony_ci RE_SUCCESS = kInternalRegExpSuccess, 1351cb0ef41Sopenharmony_ci RE_EXCEPTION = kInternalRegExpException, 1361cb0ef41Sopenharmony_ci RE_RETRY = kInternalRegExpRetry, 1371cb0ef41Sopenharmony_ci RE_FALLBACK_TO_EXPERIMENTAL = kInternalRegExpFallbackToExperimental, 1381cb0ef41Sopenharmony_ci }; 1391cb0ef41Sopenharmony_ci 1401cb0ef41Sopenharmony_ci // Set last match info. If match is nullptr, then setting captures is 1411cb0ef41Sopenharmony_ci // omitted. 1421cb0ef41Sopenharmony_ci static Handle<RegExpMatchInfo> SetLastMatchInfo( 1431cb0ef41Sopenharmony_ci Isolate* isolate, Handle<RegExpMatchInfo> last_match_info, 1441cb0ef41Sopenharmony_ci Handle<String> subject, int capture_count, int32_t* match); 1451cb0ef41Sopenharmony_ci 1461cb0ef41Sopenharmony_ci V8_EXPORT_PRIVATE static bool CompileForTesting( 1471cb0ef41Sopenharmony_ci Isolate* isolate, Zone* zone, RegExpCompileData* input, RegExpFlags flags, 1481cb0ef41Sopenharmony_ci Handle<String> pattern, Handle<String> sample_subject, bool is_one_byte); 1491cb0ef41Sopenharmony_ci 1501cb0ef41Sopenharmony_ci V8_EXPORT_PRIVATE static void DotPrintForTesting(const char* label, 1511cb0ef41Sopenharmony_ci RegExpNode* node); 1521cb0ef41Sopenharmony_ci 1531cb0ef41Sopenharmony_ci static const int kRegExpTooLargeToOptimize = 20 * KB; 1541cb0ef41Sopenharmony_ci 1551cb0ef41Sopenharmony_ci V8_WARN_UNUSED_RESULT 1561cb0ef41Sopenharmony_ci static MaybeHandle<Object> ThrowRegExpException(Isolate* isolate, 1571cb0ef41Sopenharmony_ci Handle<JSRegExp> re, 1581cb0ef41Sopenharmony_ci Handle<String> pattern, 1591cb0ef41Sopenharmony_ci RegExpError error); 1601cb0ef41Sopenharmony_ci static void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re, 1611cb0ef41Sopenharmony_ci RegExpError error_text); 1621cb0ef41Sopenharmony_ci 1631cb0ef41Sopenharmony_ci static bool IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp); 1641cb0ef41Sopenharmony_ci 1651cb0ef41Sopenharmony_ci static Handle<FixedArray> CreateCaptureNameMap( 1661cb0ef41Sopenharmony_ci Isolate* isolate, ZoneVector<RegExpCapture*>* named_captures); 1671cb0ef41Sopenharmony_ci}; 1681cb0ef41Sopenharmony_ci 1691cb0ef41Sopenharmony_ci// Uses a special global mode of irregexp-generated code to perform a global 1701cb0ef41Sopenharmony_ci// search and return multiple results at once. As such, this is essentially an 1711cb0ef41Sopenharmony_ci// iterator over multiple results (retrieved batch-wise in advance). 1721cb0ef41Sopenharmony_ciclass RegExpGlobalCache final { 1731cb0ef41Sopenharmony_ci public: 1741cb0ef41Sopenharmony_ci RegExpGlobalCache(Handle<JSRegExp> regexp, Handle<String> subject, 1751cb0ef41Sopenharmony_ci Isolate* isolate); 1761cb0ef41Sopenharmony_ci 1771cb0ef41Sopenharmony_ci ~RegExpGlobalCache(); 1781cb0ef41Sopenharmony_ci 1791cb0ef41Sopenharmony_ci // Fetch the next entry in the cache for global regexp match results. 1801cb0ef41Sopenharmony_ci // This does not set the last match info. Upon failure, nullptr is 1811cb0ef41Sopenharmony_ci // returned. The cause can be checked with Result(). The previous result is 1821cb0ef41Sopenharmony_ci // still in available in memory when a failure happens. 1831cb0ef41Sopenharmony_ci int32_t* FetchNext(); 1841cb0ef41Sopenharmony_ci 1851cb0ef41Sopenharmony_ci int32_t* LastSuccessfulMatch(); 1861cb0ef41Sopenharmony_ci 1871cb0ef41Sopenharmony_ci bool HasException() { return num_matches_ < 0; } 1881cb0ef41Sopenharmony_ci 1891cb0ef41Sopenharmony_ci private: 1901cb0ef41Sopenharmony_ci int AdvanceZeroLength(int last_index); 1911cb0ef41Sopenharmony_ci 1921cb0ef41Sopenharmony_ci int num_matches_; 1931cb0ef41Sopenharmony_ci int max_matches_; 1941cb0ef41Sopenharmony_ci int current_match_index_; 1951cb0ef41Sopenharmony_ci int registers_per_match_; 1961cb0ef41Sopenharmony_ci // Pointer to the last set of captures. 1971cb0ef41Sopenharmony_ci int32_t* register_array_; 1981cb0ef41Sopenharmony_ci int register_array_size_; 1991cb0ef41Sopenharmony_ci Handle<JSRegExp> regexp_; 2001cb0ef41Sopenharmony_ci Handle<String> subject_; 2011cb0ef41Sopenharmony_ci Isolate* isolate_; 2021cb0ef41Sopenharmony_ci}; 2031cb0ef41Sopenharmony_ci 2041cb0ef41Sopenharmony_ci// Caches results for specific regexp queries on the isolate. At the time of 2051cb0ef41Sopenharmony_ci// writing, this is used during global calls to RegExp.prototype.exec and 2061cb0ef41Sopenharmony_ci// @@split. 2071cb0ef41Sopenharmony_ciclass RegExpResultsCache final : public AllStatic { 2081cb0ef41Sopenharmony_ci public: 2091cb0ef41Sopenharmony_ci enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS }; 2101cb0ef41Sopenharmony_ci 2111cb0ef41Sopenharmony_ci // Attempt to retrieve a cached result. On failure, 0 is returned as a Smi. 2121cb0ef41Sopenharmony_ci // On success, the returned result is guaranteed to be a COW-array. 2131cb0ef41Sopenharmony_ci static Object Lookup(Heap* heap, String key_string, Object key_pattern, 2141cb0ef41Sopenharmony_ci FixedArray* last_match_out, ResultsCacheType type); 2151cb0ef41Sopenharmony_ci // Attempt to add value_array to the cache specified by type. On success, 2161cb0ef41Sopenharmony_ci // value_array is turned into a COW-array. 2171cb0ef41Sopenharmony_ci static void Enter(Isolate* isolate, Handle<String> key_string, 2181cb0ef41Sopenharmony_ci Handle<Object> key_pattern, Handle<FixedArray> value_array, 2191cb0ef41Sopenharmony_ci Handle<FixedArray> last_match_cache, ResultsCacheType type); 2201cb0ef41Sopenharmony_ci static void Clear(FixedArray cache); 2211cb0ef41Sopenharmony_ci 2221cb0ef41Sopenharmony_ci static constexpr int kRegExpResultsCacheSize = 0x100; 2231cb0ef41Sopenharmony_ci 2241cb0ef41Sopenharmony_ci private: 2251cb0ef41Sopenharmony_ci static constexpr int kStringOffset = 0; 2261cb0ef41Sopenharmony_ci static constexpr int kPatternOffset = 1; 2271cb0ef41Sopenharmony_ci static constexpr int kArrayOffset = 2; 2281cb0ef41Sopenharmony_ci static constexpr int kLastMatchOffset = 3; 2291cb0ef41Sopenharmony_ci static constexpr int kArrayEntriesPerCacheEntry = 4; 2301cb0ef41Sopenharmony_ci}; 2311cb0ef41Sopenharmony_ci 2321cb0ef41Sopenharmony_ci} // namespace internal 2331cb0ef41Sopenharmony_ci} // namespace v8 2341cb0ef41Sopenharmony_ci 2351cb0ef41Sopenharmony_ci#endif // V8_REGEXP_REGEXP_H_ 236