1/* 2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16#ifndef ECMASCRIPT_BUILTINS_BUILTINS_REGEXP_H 17#define ECMASCRIPT_BUILTINS_BUILTINS_REGEXP_H 18 19#include "ecmascript/base/builtins_base.h" 20#include "ecmascript/builtins/builtins_string.h" 21#include "ecmascript/ecma_runtime_call_info.h" 22#include "ecmascript/js_tagged_value.h" 23#include "ecmascript/regexp/regexp_parser.h" 24#include "ecmascript/tagged_array-inl.h" 25 26namespace panda::ecmascript::builtins { 27class BuiltinsRegExp : public base::BuiltinsBase { 28public: 29 enum RegExpSymbol { 30 SPLIT, 31 SEARCH, 32 MATCH, 33 MATCHALL, 34 REPLACE, 35 UNKNOWN 36 }; 37 // 21.2.3.1 RegExp ( pattern, flags ) 38 static JSTaggedValue RegExpConstructor(EcmaRuntimeCallInfo *argv); 39 40 // prototype 41 // 21.2.5.2 RegExp.prototype.exec ( string ) 42 static JSTaggedValue Exec(EcmaRuntimeCallInfo *argv); 43 // 21.2.5.13 RegExp.prototype.test( S ) 44 static JSTaggedValue Test(EcmaRuntimeCallInfo *argv); 45 // 21.2.5.14 RegExp.prototype.toString ( ) 46 static JSTaggedValue ToString(EcmaRuntimeCallInfo *argv); 47 // 21.2.5.3 get RegExp.prototype.flags 48 static JSTaggedValue GetFlags(EcmaRuntimeCallInfo *argv); 49 // 21.2.5.4 get RegExp.prototype.global 50 static JSTaggedValue GetGlobal(EcmaRuntimeCallInfo *argv); 51 // 21.2.5.5 get RegExp.prototype.ignoreCase 52 static JSTaggedValue GetIgnoreCase(EcmaRuntimeCallInfo *argv); 53 // 21.2.5.7 get RegExp.prototype.multiline 54 static JSTaggedValue GetMultiline(EcmaRuntimeCallInfo *argv); 55 static JSTaggedValue GetDotAll(EcmaRuntimeCallInfo *argv); 56 // 21.2.5.10 get RegExp.prototype.source 57 static JSTaggedValue GetSource(EcmaRuntimeCallInfo *argv); 58 // 21.2.5.12 get RegExp.prototype.sticky 59 static JSTaggedValue GetSticky(EcmaRuntimeCallInfo *argv); 60 // 21.2.5.15 get RegExp.prototype.unicode 61 static JSTaggedValue GetUnicode(EcmaRuntimeCallInfo *argv); 62 // 21.2.4.2 get RegExp [ @@species ] 63 static JSTaggedValue GetSpecies(EcmaRuntimeCallInfo *argv); 64 // 21.2.5.6 RegExp.prototype [ @@match ] ( string ) 65 static JSTaggedValue Match(EcmaRuntimeCallInfo *argv); 66 // 22.2.5.8 RegExp.prototype [ @@matchAll ] ( string ) 67 static JSTaggedValue MatchAll(EcmaRuntimeCallInfo *argv); 68 // 21.2.5.8 RegExp.prototype [ @@replace ] ( string, replaceValue ) 69 static JSTaggedValue Replace(EcmaRuntimeCallInfo *argv); 70 // 21.2.5.9 RegExp.prototype [ @@search ] ( string ) 71 static JSTaggedValue Search(EcmaRuntimeCallInfo *argv); 72 // 21.2.5.11 RegExp.prototype [ @@split ] ( string, limit ) 73 static JSTaggedValue Split(EcmaRuntimeCallInfo *argv); 74 // 21.2.3.2.3 Runtime Semantics: RegExpCreate ( P, F ) 75 static JSTaggedValue RegExpCreate(JSThread *thread, const JSHandle<JSTaggedValue> &pattern, 76 const JSHandle<JSTaggedValue> &flags); 77 static JSTaggedValue FlagsBitsToString(JSThread *thread, uint8_t flags); 78 // 21.2.5.2.1 Runtime Semantics: RegExpExec ( R, S ) 79 static JSTaggedValue RegExpExec(JSThread *thread, const JSHandle<JSTaggedValue> ®exp, 80 const JSHandle<JSTaggedValue> &inputString, bool useCache, 81 bool isIntermediateResult = false); 82 // 21.2.5.2.3 AdvanceStringIndex ( S, index, unicode ) 83 static int64_t AdvanceStringIndex(const JSHandle<JSTaggedValue> &inputStr, int64_t index, 84 bool unicode); 85 // 22.2.6.6 get RegExp.prototype.hasIndices 86 static JSTaggedValue GetHasIndices(EcmaRuntimeCallInfo *argv); 87 88 static JSTaggedValue ReplaceInternal(JSThread *thread, 89 JSHandle<JSTaggedValue> thisObj, 90 JSHandle<JSTaggedValue> string, 91 JSHandle<JSTaggedValue> inputReplaceValue); 92 static JSTaggedValue GetAllFlagsInternal(JSThread *thread, JSHandle<JSTaggedValue> &thisObj); 93 static bool IsFastRegExp(JSThread *thread, JSHandle<JSTaggedValue> regexp, 94 RegExpSymbol symbolTag = RegExpSymbol::UNKNOWN); 95 static bool GetFlag(JSThread *thread, const JSHandle<JSTaggedValue> regexp, uint32_t flag, bool isFastPath); 96 static bool GetOriginalFlag(JSThread *thread, const JSHandle<JSTaggedValue> regexp, uint32_t flag); 97 static void SetLastIndex(JSThread *thread, const JSHandle<JSTaggedValue> regexp, 98 JSTaggedValue lastIndex, bool isFastPath); 99 static int64_t GetLastIndex(JSThread *thread, const JSHandle<JSTaggedValue> regexp, bool isFastPath); 100 static JSTaggedValue RegExpBuiltinExecWithoutResult(JSThread *thread, const JSHandle<JSTaggedValue> regexp, 101 const JSHandle<JSTaggedValue> inputStr, 102 bool isFastPath, uint32_t lastIndex, bool useCache); 103 // 21.2.5.2.2 Runtime Semantics: RegExpBuiltinExec ( R, S ) 104 static JSTaggedValue RegExpBuiltinExec(JSThread *thread, const JSHandle<JSTaggedValue> regexp, 105 const JSHandle<JSTaggedValue> inputStr, 106 bool isFastPath, bool useCache, bool isIntermediateResult = false); 107 static JSTaggedValue RegExpSearch(JSThread *thread, 108 const JSHandle<JSTaggedValue> regexp, 109 const JSHandle<JSTaggedValue> string); 110 static JSTaggedValue RegExpSearchFast(JSThread *thread, const JSHandle<JSTaggedValue> regexp, 111 const JSHandle<JSTaggedValue> string); 112 static JSTaggedValue RegExpSplit(JSThread *thread, const JSHandle<JSTaggedValue> regexp, 113 JSHandle<JSTaggedValue> jsString, JSHandle<JSTaggedValue> limit, 114 bool isFastPath); 115 static JSTaggedValue GetExecResultIndex(JSThread *thread, const JSHandle<JSTaggedValue> &execResults, 116 bool isFastPath); 117 static JSTaggedValue GetExecResultGroups(JSThread *thread, const JSHandle<JSTaggedValue> &execResults, 118 bool isFastPath); 119 static JSTaggedValue RegExpMatch(JSThread *thread, const JSHandle<JSTaggedValue> regexp, 120 const JSHandle<JSTaggedValue> string, bool isFastPath); 121 static JSTaggedValue RegExpMatchAll(JSThread *thread, const JSHandle<JSTaggedValue> regexp, 122 const JSHandle<EcmaString> string, bool isFastPath); 123// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) 124#define SET_GET_CAPTURE(index) \ 125 static JSTaggedValue GetCapture##index(JSThread *thread, const JSHandle<JSObject> &obj); \ 126 static bool SetCapture##index(JSThread *thread, const JSHandle<JSObject> &obj, \ 127 const JSHandle<JSTaggedValue> &value, bool mayThrow); 128 129 SET_GET_CAPTURE(1) 130 SET_GET_CAPTURE(2) 131 SET_GET_CAPTURE(3) 132 SET_GET_CAPTURE(4) 133 SET_GET_CAPTURE(5) 134 SET_GET_CAPTURE(6) 135 SET_GET_CAPTURE(7) 136 SET_GET_CAPTURE(8) 137 SET_GET_CAPTURE(9) 138#undef SET_GET_CAPTURE 139 140#define REGEXP_SYMBOL_FUNCTION_LIST(V) \ 141 V(SPLIT, Split) \ 142 V(SEARCH, Search) \ 143 V(MATCH, Match) \ 144 V(MATCHALL, MatchAll) \ 145 V(REPLACE, Replace) 146 147private: 148 static constexpr uint32_t MIN_REPLACE_STRING_LENGTH = 1000; 149 static constexpr uint32_t MAX_SPLIT_LIMIT = 0xFFFFFFFFu; 150 static constexpr uint32_t REGEXP_GLOBAL_ARRAY_SIZE = 9; 151 static constexpr uint32_t LAST_INDEX_OFFSET = 0; 152 static constexpr uint32_t EXEC_RESULT_INDEX_OFFSET = 1; 153 static constexpr uint32_t EXEC_RESULT_INPUT_OFFSET = 2; 154 static constexpr uint32_t EXEC_RESULT_GROUPS_OFFSET = 3; 155 156 static constexpr uint32_t REPLACE_RESULT_VAL = 2; 157 static constexpr unsigned REPLACE_LENGTH_BITS = 30; 158 static constexpr unsigned REPLACE_POSITION_BITS = 30; 159 using ReplaceLengthField = BitField<uint32_t, 0, REPLACE_LENGTH_BITS>; // 30 160 using ReplacePositionField = ReplaceLengthField::NextField<uint32_t, REPLACE_POSITION_BITS>; // 60 161 162 static bool Matcher(JSThread *thread, const JSHandle<JSTaggedValue> regexp, 163 const uint8_t *buffer, size_t length, int32_t lastindex, bool isUtf16); 164 165 static JSTaggedValue GetFlagsInternal(JSThread *thread, const JSHandle<JSTaggedValue> &obj, 166 const JSHandle<JSTaggedValue> &constructor, const uint8_t mask); 167 // 21.2.3.2.1 Runtime Semantics: RegExpAlloc ( newTarget ) 168 static JSTaggedValue RegExpAlloc(JSThread *thread, const JSHandle<JSTaggedValue> &newTarget); 169 170 static uint32_t UpdateExpressionFlags(JSThread *thread, const CString &checkStr); 171 172 // 21.2.3.2.2 Runtime Semantics: RegExpInitialize ( obj, pattern, flags ) 173 static JSTaggedValue RegExpInitialize(JSThread *thread, const JSHandle<JSTaggedValue> &obj, 174 const JSHandle<JSTaggedValue> &pattern, const JSHandle<JSTaggedValue> &flags); 175 // 21.2.3.2.4 Runtime Semantics: EscapeRegExpPattern ( P, F ) 176 static EcmaString *EscapeRegExpPattern(JSThread *thread, const JSHandle<JSTaggedValue> &src, 177 const JSHandle<JSTaggedValue> &flags); 178 static JSTaggedValue RegExpReplaceFast(JSThread *thread, JSHandle<JSTaggedValue> regexp, 179 JSHandle<EcmaString> inputString, uint32_t inputLength); 180 static JSTaggedValue GetLastIndex(JSThread *thread, JSHandle<JSTaggedValue> regexp, 181 uint32_t &lastIndex); 182 static bool ShouldUseCache(JSThread *thread, JSHandle<EcmaString> inputString); 183 static JSTaggedValue MatchAndReplace(JSThread *thread, JSHandle<JSTaggedValue> regexp, 184 JSHandle<EcmaString> inputString, uint32_t &flags, 185 uint32_t lastIndex, uint32_t inputLength, 186 std::string &resultString); 187 static JSTaggedValue RegExpTestFast(JSThread *thread, JSHandle<JSTaggedValue> regexp, 188 const JSHandle<JSTaggedValue> inputString, bool useCache); 189 static JSTaggedValue RegExpExecForTestFast(JSThread *thread, JSHandle<JSTaggedValue> regexp, 190 const JSHandle<JSTaggedValue> inputStr, bool useCache); 191 // 22.2.7.8 MakeMatchIndicesIndexPairArray ( S, indices, groupNames, hasGroups ) 192 static JSHandle<JSTaggedValue> MakeMatchIndicesIndexPairArray(JSThread* thread, 193 const std::vector<std::pair<JSTaggedValue, JSTaggedValue>>& indices, 194 const std::vector<JSHandle<JSTaggedValue>>& groupNames, bool hasGroups); 195 static bool RegExpExecInternal(JSThread *thread, const JSHandle<JSTaggedValue> regexp, 196 JSHandle<EcmaString> inputString, int32_t lastIndex); 197 static JSTaggedValue RegExpSplitFast(JSThread *thread, const JSHandle<JSTaggedValue> regexp, 198 JSHandle<JSTaggedValue> string, uint32_t limit, bool useCache); 199 static JSHandle<EcmaString> CreateStringFromResultArray(JSThread *thread, const JSHandle<TaggedArray> resultArray, 200 const std::vector<uint64_t> &resultLengthArray, JSHandle<EcmaString> srcString, 201 uint32_t resultStrLength, bool isUtf8); 202}; 203 204class RegExpExecResultCache : public TaggedArray { 205public: 206 enum CacheType { 207 REPLACE_TYPE, 208 SPLIT_TYPE, 209 MATCH_TYPE, 210 EXEC_TYPE, 211 INTERMEDIATE_REPLACE_TYPE, 212 TEST_TYPE, 213 SEARCH_TYPE, 214 }; 215 static RegExpExecResultCache *Cast(TaggedObject *object) 216 { 217 return reinterpret_cast<RegExpExecResultCache *>(object); 218 } 219 static JSTaggedValue CreateCacheTable(JSThread *thread); 220 // extend as an additional parameter to judge cached 221 JSTaggedValue FindCachedResult(JSThread *thread, const JSHandle<JSTaggedValue> input, 222 CacheType type, const JSHandle<JSTaggedValue> regexp, 223 JSTaggedValue lastIndexInput, JSHandle<JSTaggedValue> extend, 224 bool isIntermediateResult = false); 225 // extend as an additional parameter to judge cached 226 static void AddResultInCache(JSThread *thread, JSHandle<RegExpExecResultCache> cache, 227 const JSHandle<JSTaggedValue> regexp, 228 const JSHandle<JSTaggedValue> input, const JSHandle<JSTaggedValue> resultArray, 229 CacheType type, uint32_t lastIndexInput, uint32_t lastIndex, 230 const JSHandle<JSTaggedValue> extend, 231 bool isIntermediateResult = false); 232 233 static void GrowRegexpCache(JSThread *thread, JSHandle<RegExpExecResultCache> cache); 234 235 void ClearEntry(JSThread *thread, int entry); 236 void SetEntry(JSThread *thread, int entry, JSTaggedValue &patten, JSTaggedValue &flags, JSTaggedValue &input, 237 JSTaggedValue &lastIndexInputValue, JSTaggedValue &lastIndexValue, JSTaggedValue &extendValue, 238 JSTaggedValue &resTableArray); 239 void UpdateResultArray(JSThread *thread, int entry, JSTaggedValue resultArray, CacheType type); 240 bool Match(int entry, JSTaggedValue &pattenStr, JSTaggedValue &flagsStr, JSTaggedValue &inputStr, 241 JSTaggedValue &lastIndexInputValue, JSTaggedValue &extend, CacheType type); 242 inline void SetHitCount(JSThread *thread, int hitCount) 243 { 244 Set(thread, CACHE_HIT_COUNT_INDEX, JSTaggedValue(hitCount)); 245 } 246 247 inline int GetHitCount() 248 { 249 return Get(CACHE_HIT_COUNT_INDEX).GetInt(); 250 } 251 252 inline void SetCacheCount(JSThread *thread, int hitCount) 253 { 254 Set(thread, CACHE_COUNT_INDEX, JSTaggedValue(hitCount)); 255 } 256 257 inline int GetCacheCount() 258 { 259 return Get(CACHE_COUNT_INDEX).GetInt(); 260 } 261 262 void Print() 263 { 264 std::cout << "cache count: " << GetCacheCount() << std::endl; 265 std::cout << "cache hit count: " << GetHitCount() << std::endl; 266 } 267 268 inline void SetLargeStrCount(JSThread *thread, uint32_t newCount) 269 { 270 Set(thread, LARGE_STRING_COUNT_INDEX, JSTaggedValue(newCount)); 271 } 272 273 inline void SetConflictCount(JSThread *thread, uint32_t newCount) 274 { 275 Set(thread, CONFLICT_COUNT_INDEX, JSTaggedValue(newCount)); 276 } 277 278 inline void SetStrLenThreshold(JSThread *thread, uint32_t newThreshold) 279 { 280 Set(thread, STRING_LENGTH_THRESHOLD_INDEX, JSTaggedValue(newThreshold)); 281 } 282 283 inline uint32_t GetLargeStrCount() 284 { 285 return Get(LARGE_STRING_COUNT_INDEX).GetInt(); 286 } 287 288 inline uint32_t GetConflictCount() 289 { 290 return Get(CONFLICT_COUNT_INDEX).GetInt(); 291 } 292 293 inline uint32_t GetStrLenThreshold() 294 { 295 return Get(STRING_LENGTH_THRESHOLD_INDEX).GetInt(); 296 } 297 298 inline void SetCacheLength(JSThread *thread, int length) 299 { 300 Set(thread, CACHE_LENGTH_INDEX, JSTaggedValue(length)); 301 } 302 303 inline int GetCacheLength() 304 { 305 return Get(CACHE_LENGTH_INDEX).GetInt(); 306 } 307 308private: 309 static constexpr int DEFAULT_LARGE_STRING_COUNT = 10; 310 static constexpr int DEFAULT_CONFLICT_COUNT = 100; 311 static constexpr int INITIAL_CACHE_NUMBER = 0x10; 312 static constexpr int DEFAULT_CACHE_NUMBER = 0x1000; 313 static constexpr int CACHE_COUNT_INDEX = 0; 314 static constexpr int CACHE_HIT_COUNT_INDEX = 1; 315 static constexpr int LARGE_STRING_COUNT_INDEX = 2; 316 static constexpr int CONFLICT_COUNT_INDEX = 3; 317 static constexpr int STRING_LENGTH_THRESHOLD_INDEX = 4; 318 static constexpr int CACHE_LENGTH_INDEX = 5; 319 static constexpr int CACHE_TABLE_HEADER_SIZE = 6; 320 static constexpr int PATTERN_INDEX = 0; 321 static constexpr int FLAG_INDEX = 1; 322 static constexpr int INPUT_STRING_INDEX = 2; 323 static constexpr int LAST_INDEX_INPUT_INDEX = 3; 324 static constexpr int LAST_INDEX_INDEX = 4; 325 static constexpr int RESULT_REPLACE_INDEX = 5; 326 static constexpr int RESULT_SPLIT_INDEX = 6; 327 static constexpr int RESULT_MATCH_INDEX = 7; 328 static constexpr int RESULT_EXEC_INDEX = 8; 329 static constexpr int RESULT_INTERMEDIATE_REPLACE_INDEX = 9; 330 static constexpr int RESULT_TEST_INDEX = 10; 331 static constexpr int RESULT_SEARCH_INDEX = 11; 332 // Extend index used for saving an additional parameter to judge cached 333 static constexpr int EXTEND_INDEX = 12; 334 static constexpr int CAPTURE_SIZE = 13; 335 static constexpr int ENTRY_SIZE = 14; 336}; 337 338class RegExpGlobalResult : public TaggedArray { 339public: 340 static RegExpGlobalResult *Cast(TaggedObject *object) 341 { 342 return reinterpret_cast<RegExpGlobalResult *>(object); 343 } 344 static JSTaggedValue CreateGlobalResultTable(JSThread *thread); 345 346 void SetCapture(JSThread *thread, int index, JSTaggedValue value) 347 { 348 ASSERT(CAPTURE_START_INDEX + index - 1 < GLOBAL_TABLE_SIZE); 349 Set(thread, CAPTURE_START_INDEX + index - 1, value); 350 } 351 352 void ResetDollar(JSThread *thread) 353 { 354 for (uint32_t i = 0; i < DOLLAR_NUMBER; i++) { 355 Set(thread, CAPTURE_START_INDEX + i, JSTaggedValue::Hole()); 356 } 357 } 358 359 template <int N> 360 static JSTaggedValue GetCapture(JSThread *thread); 361 362 void SetTotalCaptureCounts(JSThread *thread, JSTaggedValue counts) 363 { 364 Set(thread, TOTAL_CAPTURE_COUNTS_INDEX, counts); 365 } 366 367 JSTaggedValue GetTotalCaptureCounts() 368 { 369 return Get(TOTAL_CAPTURE_COUNTS_INDEX); 370 } 371 372 void SetEndIndex(JSThread *thread, JSTaggedValue endIndex) 373 { 374 Set(thread, END_INDEX, endIndex); 375 } 376 377 JSTaggedValue GetEndIndex() 378 { 379 return Get(END_INDEX); 380 } 381 382 void SetInputString(JSThread *thread, JSTaggedValue string) 383 { 384 Set(thread, INPUT_STRING_INDEX, string); 385 } 386 387 JSTaggedValue GetInputString() 388 { 389 return Get(INPUT_STRING_INDEX); 390 } 391 392 void SetStartOfCaptureIndex(JSThread *thread, uint32_t index, JSTaggedValue value) 393 { 394 Set(thread, FIRST_CAPTURE_INDEX + index * 2, value); // 2 : double 395 } 396 397 void SetEndOfCaptureIndex(JSThread *thread, uint32_t index, JSTaggedValue value) 398 { 399 Set(thread, FIRST_CAPTURE_INDEX + index * 2 + 1, value); // 2 : double 400 } 401 402 JSTaggedValue GetStartOfCaptureIndex(uint32_t index) 403 { 404 return Get(FIRST_CAPTURE_INDEX + index * 2); // 2 : double 405 } 406 407 JSTaggedValue GetEndOfCaptureIndex(uint32_t index) 408 { 409 return Get(FIRST_CAPTURE_INDEX + index * 2 + 1); // 2 : double 410 } 411 412 static JSHandle<RegExpGlobalResult> GrowCapturesCapacity(JSThread *thread, 413 JSHandle<RegExpGlobalResult>result, uint32_t length); 414 415 static constexpr int FIRST_CAPTURE_INDEX = 12; // capture index starts here 416 417private: 418 static constexpr int GLOBAL_TABLE_SIZE = 12; // initial length 419 static constexpr int DOLLAR_NUMBER = 9; 420 static constexpr int CAPTURE_START_INDEX = 0; 421 422 static constexpr int TOTAL_CAPTURE_COUNTS_INDEX = 9; // save total capture size 423 static constexpr int INPUT_STRING_INDEX = 10; // save input string 424 static constexpr int END_INDEX = 11; // save last index 425 static constexpr int INITIAL_CAPTURE_INDICES = 18; // length: pairs of capture start index and end index 426}; 427} // namespace panda::ecmascript::builtins 428#endif // ECMASCRIPT_BUILTINS_BUILTINS_REGEXP_H 429