1/*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#ifndef ECMASCRIPT_BUILTINS_BUILTINS_REGEXP_H
17#define ECMASCRIPT_BUILTINS_BUILTINS_REGEXP_H
18
19#include "ecmascript/base/builtins_base.h"
20#include "ecmascript/builtins/builtins_string.h"
21#include "ecmascript/ecma_runtime_call_info.h"
22#include "ecmascript/js_tagged_value.h"
23#include "ecmascript/regexp/regexp_parser.h"
24#include "ecmascript/tagged_array-inl.h"
25
26namespace panda::ecmascript::builtins {
27class BuiltinsRegExp : public base::BuiltinsBase {
28public:
29    enum RegExpSymbol {
30        SPLIT,
31        SEARCH,
32        MATCH,
33        MATCHALL,
34        REPLACE,
35        UNKNOWN
36    };
37    // 21.2.3.1 RegExp ( pattern, flags )
38    static JSTaggedValue RegExpConstructor(EcmaRuntimeCallInfo *argv);
39
40    // prototype
41    // 21.2.5.2 RegExp.prototype.exec ( string )
42    static JSTaggedValue Exec(EcmaRuntimeCallInfo *argv);
43    // 21.2.5.13 RegExp.prototype.test( S )
44    static JSTaggedValue Test(EcmaRuntimeCallInfo *argv);
45    // 21.2.5.14 RegExp.prototype.toString ( )
46    static JSTaggedValue ToString(EcmaRuntimeCallInfo *argv);
47    // 21.2.5.3 get RegExp.prototype.flags
48    static JSTaggedValue GetFlags(EcmaRuntimeCallInfo *argv);
49    // 21.2.5.4 get RegExp.prototype.global
50    static JSTaggedValue GetGlobal(EcmaRuntimeCallInfo *argv);
51    // 21.2.5.5 get RegExp.prototype.ignoreCase
52    static JSTaggedValue GetIgnoreCase(EcmaRuntimeCallInfo *argv);
53    // 21.2.5.7 get RegExp.prototype.multiline
54    static JSTaggedValue GetMultiline(EcmaRuntimeCallInfo *argv);
55    static JSTaggedValue GetDotAll(EcmaRuntimeCallInfo *argv);
56    // 21.2.5.10 get RegExp.prototype.source
57    static JSTaggedValue GetSource(EcmaRuntimeCallInfo *argv);
58    // 21.2.5.12 get RegExp.prototype.sticky
59    static JSTaggedValue GetSticky(EcmaRuntimeCallInfo *argv);
60    // 21.2.5.15 get RegExp.prototype.unicode
61    static JSTaggedValue GetUnicode(EcmaRuntimeCallInfo *argv);
62    // 21.2.4.2 get RegExp [ @@species ]
63    static JSTaggedValue GetSpecies(EcmaRuntimeCallInfo *argv);
64    // 21.2.5.6 RegExp.prototype [ @@match ] ( string )
65    static JSTaggedValue Match(EcmaRuntimeCallInfo *argv);
66    // 22.2.5.8 RegExp.prototype [ @@matchAll ] ( string )
67    static JSTaggedValue MatchAll(EcmaRuntimeCallInfo *argv);
68    // 21.2.5.8 RegExp.prototype [ @@replace ] ( string, replaceValue )
69    static JSTaggedValue Replace(EcmaRuntimeCallInfo *argv);
70    // 21.2.5.9 RegExp.prototype [ @@search ] ( string )
71    static JSTaggedValue Search(EcmaRuntimeCallInfo *argv);
72    // 21.2.5.11 RegExp.prototype [ @@split ] ( string, limit )
73    static JSTaggedValue Split(EcmaRuntimeCallInfo *argv);
74    // 21.2.3.2.3 Runtime Semantics: RegExpCreate ( P, F )
75    static JSTaggedValue RegExpCreate(JSThread *thread, const JSHandle<JSTaggedValue> &pattern,
76                                      const JSHandle<JSTaggedValue> &flags);
77    static JSTaggedValue FlagsBitsToString(JSThread *thread, uint8_t flags);
78    // 21.2.5.2.1 Runtime Semantics: RegExpExec ( R, S )
79    static JSTaggedValue RegExpExec(JSThread *thread, const JSHandle<JSTaggedValue> &regexp,
80                                    const JSHandle<JSTaggedValue> &inputString, bool useCache,
81                                    bool isIntermediateResult = false);
82    // 21.2.5.2.3 AdvanceStringIndex ( S, index, unicode )
83    static int64_t AdvanceStringIndex(const JSHandle<JSTaggedValue> &inputStr, int64_t index,
84                                      bool unicode);
85    // 22.2.6.6 get RegExp.prototype.hasIndices
86    static JSTaggedValue GetHasIndices(EcmaRuntimeCallInfo *argv);
87
88    static JSTaggedValue ReplaceInternal(JSThread *thread,
89                                         JSHandle<JSTaggedValue> thisObj,
90                                         JSHandle<JSTaggedValue> string,
91                                         JSHandle<JSTaggedValue> inputReplaceValue);
92    static JSTaggedValue GetAllFlagsInternal(JSThread *thread, JSHandle<JSTaggedValue> &thisObj);
93    static bool IsFastRegExp(JSThread *thread, JSHandle<JSTaggedValue> regexp,
94                             RegExpSymbol symbolTag = RegExpSymbol::UNKNOWN);
95    static bool GetFlag(JSThread *thread, const JSHandle<JSTaggedValue> regexp, uint32_t flag, bool isFastPath);
96    static bool GetOriginalFlag(JSThread *thread, const JSHandle<JSTaggedValue> regexp, uint32_t flag);
97    static void SetLastIndex(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
98        JSTaggedValue lastIndex, bool isFastPath);
99    static int64_t GetLastIndex(JSThread *thread, const JSHandle<JSTaggedValue> regexp, bool isFastPath);
100    static JSTaggedValue RegExpBuiltinExecWithoutResult(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
101                                                        const JSHandle<JSTaggedValue> inputStr,
102                                                        bool isFastPath, uint32_t lastIndex, bool useCache);
103    // 21.2.5.2.2 Runtime Semantics: RegExpBuiltinExec ( R, S )
104    static JSTaggedValue RegExpBuiltinExec(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
105                                           const JSHandle<JSTaggedValue> inputStr,
106                                           bool isFastPath, bool useCache, bool isIntermediateResult = false);
107    static JSTaggedValue RegExpSearch(JSThread *thread,
108                                      const JSHandle<JSTaggedValue> regexp,
109                                      const JSHandle<JSTaggedValue> string);
110    static JSTaggedValue RegExpSearchFast(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
111                                          const JSHandle<JSTaggedValue> string);
112    static JSTaggedValue RegExpSplit(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
113                                     JSHandle<JSTaggedValue> jsString, JSHandle<JSTaggedValue> limit,
114                                     bool isFastPath);
115    static JSTaggedValue GetExecResultIndex(JSThread *thread, const JSHandle<JSTaggedValue> &execResults,
116                                            bool isFastPath);
117    static JSTaggedValue GetExecResultGroups(JSThread *thread, const JSHandle<JSTaggedValue> &execResults,
118                                             bool isFastPath);
119    static JSTaggedValue RegExpMatch(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
120                                     const JSHandle<JSTaggedValue> string, bool isFastPath);
121    static JSTaggedValue RegExpMatchAll(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
122                                        const JSHandle<EcmaString> string, bool isFastPath);
123// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
124#define SET_GET_CAPTURE(index)                                                                                \
125    static JSTaggedValue GetCapture##index(JSThread *thread, const JSHandle<JSObject> &obj);                  \
126    static bool SetCapture##index(JSThread *thread, const JSHandle<JSObject> &obj,                            \
127                                 const JSHandle<JSTaggedValue> &value, bool mayThrow);
128
129    SET_GET_CAPTURE(1)
130    SET_GET_CAPTURE(2)
131    SET_GET_CAPTURE(3)
132    SET_GET_CAPTURE(4)
133    SET_GET_CAPTURE(5)
134    SET_GET_CAPTURE(6)
135    SET_GET_CAPTURE(7)
136    SET_GET_CAPTURE(8)
137    SET_GET_CAPTURE(9)
138#undef SET_GET_CAPTURE
139
140#define REGEXP_SYMBOL_FUNCTION_LIST(V)    \
141    V(SPLIT, Split)                       \
142    V(SEARCH, Search)                     \
143    V(MATCH, Match)                       \
144    V(MATCHALL, MatchAll)                 \
145    V(REPLACE, Replace)
146
147private:
148    static constexpr uint32_t MIN_REPLACE_STRING_LENGTH = 1000;
149    static constexpr uint32_t MAX_SPLIT_LIMIT = 0xFFFFFFFFu;
150    static constexpr uint32_t REGEXP_GLOBAL_ARRAY_SIZE = 9;
151    static constexpr uint32_t LAST_INDEX_OFFSET = 0;
152    static constexpr uint32_t EXEC_RESULT_INDEX_OFFSET = 1;
153    static constexpr uint32_t EXEC_RESULT_INPUT_OFFSET = 2;
154    static constexpr uint32_t EXEC_RESULT_GROUPS_OFFSET = 3;
155
156    static constexpr uint32_t REPLACE_RESULT_VAL = 2;
157    static constexpr unsigned REPLACE_LENGTH_BITS = 30;
158    static constexpr unsigned REPLACE_POSITION_BITS = 30;
159    using ReplaceLengthField = BitField<uint32_t, 0, REPLACE_LENGTH_BITS>; // 30
160    using ReplacePositionField = ReplaceLengthField::NextField<uint32_t, REPLACE_POSITION_BITS>; // 60
161
162    static bool Matcher(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
163                        const uint8_t *buffer, size_t length, int32_t lastindex, bool isUtf16);
164
165    static JSTaggedValue GetFlagsInternal(JSThread *thread, const JSHandle<JSTaggedValue> &obj,
166                                          const JSHandle<JSTaggedValue> &constructor, const uint8_t mask);
167    // 21.2.3.2.1 Runtime Semantics: RegExpAlloc ( newTarget )
168    static JSTaggedValue RegExpAlloc(JSThread *thread, const JSHandle<JSTaggedValue> &newTarget);
169
170    static uint32_t UpdateExpressionFlags(JSThread *thread, const CString &checkStr);
171
172    // 21.2.3.2.2 Runtime Semantics: RegExpInitialize ( obj, pattern, flags )
173    static JSTaggedValue RegExpInitialize(JSThread *thread, const JSHandle<JSTaggedValue> &obj,
174                                          const JSHandle<JSTaggedValue> &pattern, const JSHandle<JSTaggedValue> &flags);
175    // 21.2.3.2.4 Runtime Semantics: EscapeRegExpPattern ( P, F )
176    static EcmaString *EscapeRegExpPattern(JSThread *thread, const JSHandle<JSTaggedValue> &src,
177                                           const JSHandle<JSTaggedValue> &flags);
178    static JSTaggedValue RegExpReplaceFast(JSThread *thread, JSHandle<JSTaggedValue> regexp,
179                                           JSHandle<EcmaString> inputString, uint32_t inputLength);
180    static JSTaggedValue GetLastIndex(JSThread *thread, JSHandle<JSTaggedValue> regexp,
181                                      uint32_t &lastIndex);
182    static bool ShouldUseCache(JSThread *thread, JSHandle<EcmaString> inputString);
183    static JSTaggedValue MatchAndReplace(JSThread *thread, JSHandle<JSTaggedValue> regexp,
184                                         JSHandle<EcmaString> inputString, uint32_t &flags,
185                                         uint32_t lastIndex, uint32_t inputLength,
186                                         std::string &resultString);
187    static JSTaggedValue RegExpTestFast(JSThread *thread, JSHandle<JSTaggedValue> regexp,
188                                        const JSHandle<JSTaggedValue> inputString, bool useCache);
189    static JSTaggedValue RegExpExecForTestFast(JSThread *thread, JSHandle<JSTaggedValue> regexp,
190                                               const JSHandle<JSTaggedValue> inputStr, bool useCache);
191    // 22.2.7.8 MakeMatchIndicesIndexPairArray ( S, indices, groupNames, hasGroups )
192    static JSHandle<JSTaggedValue> MakeMatchIndicesIndexPairArray(JSThread* thread,
193        const std::vector<std::pair<JSTaggedValue, JSTaggedValue>>& indices,
194        const std::vector<JSHandle<JSTaggedValue>>& groupNames, bool hasGroups);
195    static bool RegExpExecInternal(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
196                                   JSHandle<EcmaString> inputString, int32_t lastIndex);
197    static JSTaggedValue RegExpSplitFast(JSThread *thread, const JSHandle<JSTaggedValue> regexp,
198                                         JSHandle<JSTaggedValue> string, uint32_t limit, bool useCache);
199    static JSHandle<EcmaString> CreateStringFromResultArray(JSThread *thread, const JSHandle<TaggedArray> resultArray,
200        const std::vector<uint64_t> &resultLengthArray, JSHandle<EcmaString> srcString,
201        uint32_t resultStrLength, bool isUtf8);
202};
203
204class RegExpExecResultCache : public TaggedArray {
205public:
206    enum CacheType {
207        REPLACE_TYPE,
208        SPLIT_TYPE,
209        MATCH_TYPE,
210        EXEC_TYPE,
211        INTERMEDIATE_REPLACE_TYPE,
212        TEST_TYPE,
213        SEARCH_TYPE,
214    };
215    static RegExpExecResultCache *Cast(TaggedObject *object)
216    {
217        return reinterpret_cast<RegExpExecResultCache *>(object);
218    }
219    static JSTaggedValue CreateCacheTable(JSThread *thread);
220    // extend as an additional parameter to judge cached
221    JSTaggedValue FindCachedResult(JSThread *thread, const JSHandle<JSTaggedValue> input,
222                                   CacheType type, const JSHandle<JSTaggedValue> regexp,
223                                   JSTaggedValue lastIndexInput, JSHandle<JSTaggedValue> extend,
224                                   bool isIntermediateResult = false);
225    // extend as an additional parameter to judge cached
226    static void AddResultInCache(JSThread *thread, JSHandle<RegExpExecResultCache> cache,
227                                 const JSHandle<JSTaggedValue> regexp,
228                                 const JSHandle<JSTaggedValue> input, const JSHandle<JSTaggedValue> resultArray,
229                                 CacheType type, uint32_t lastIndexInput, uint32_t lastIndex,
230                                 const JSHandle<JSTaggedValue> extend,
231                                 bool isIntermediateResult = false);
232
233    static void GrowRegexpCache(JSThread *thread, JSHandle<RegExpExecResultCache> cache);
234
235    void ClearEntry(JSThread *thread, int entry);
236    void SetEntry(JSThread *thread, int entry, JSTaggedValue &patten, JSTaggedValue &flags, JSTaggedValue &input,
237                  JSTaggedValue &lastIndexInputValue, JSTaggedValue &lastIndexValue, JSTaggedValue &extendValue,
238                  JSTaggedValue &resTableArray);
239    void UpdateResultArray(JSThread *thread, int entry, JSTaggedValue resultArray, CacheType type);
240    bool Match(int entry, JSTaggedValue &pattenStr, JSTaggedValue &flagsStr, JSTaggedValue &inputStr,
241               JSTaggedValue &lastIndexInputValue, JSTaggedValue &extend, CacheType type);
242    inline void SetHitCount(JSThread *thread, int hitCount)
243    {
244        Set(thread, CACHE_HIT_COUNT_INDEX, JSTaggedValue(hitCount));
245    }
246
247    inline int GetHitCount()
248    {
249        return Get(CACHE_HIT_COUNT_INDEX).GetInt();
250    }
251
252    inline void SetCacheCount(JSThread *thread, int hitCount)
253    {
254        Set(thread, CACHE_COUNT_INDEX, JSTaggedValue(hitCount));
255    }
256
257    inline int GetCacheCount()
258    {
259        return Get(CACHE_COUNT_INDEX).GetInt();
260    }
261
262    void Print()
263    {
264        std::cout << "cache count: " << GetCacheCount() << std::endl;
265        std::cout << "cache hit count: " << GetHitCount() << std::endl;
266    }
267
268    inline void SetLargeStrCount(JSThread *thread, uint32_t newCount)
269    {
270        Set(thread, LARGE_STRING_COUNT_INDEX, JSTaggedValue(newCount));
271    }
272
273    inline void SetConflictCount(JSThread *thread, uint32_t newCount)
274    {
275        Set(thread, CONFLICT_COUNT_INDEX, JSTaggedValue(newCount));
276    }
277
278    inline void SetStrLenThreshold(JSThread *thread, uint32_t newThreshold)
279    {
280        Set(thread, STRING_LENGTH_THRESHOLD_INDEX, JSTaggedValue(newThreshold));
281    }
282
283    inline uint32_t GetLargeStrCount()
284    {
285        return Get(LARGE_STRING_COUNT_INDEX).GetInt();
286    }
287
288    inline uint32_t GetConflictCount()
289    {
290        return Get(CONFLICT_COUNT_INDEX).GetInt();
291    }
292
293    inline uint32_t GetStrLenThreshold()
294    {
295        return Get(STRING_LENGTH_THRESHOLD_INDEX).GetInt();
296    }
297
298    inline void SetCacheLength(JSThread *thread, int length)
299    {
300        Set(thread, CACHE_LENGTH_INDEX, JSTaggedValue(length));
301    }
302
303    inline int GetCacheLength()
304    {
305        return Get(CACHE_LENGTH_INDEX).GetInt();
306    }
307
308private:
309    static constexpr int DEFAULT_LARGE_STRING_COUNT = 10;
310    static constexpr int DEFAULT_CONFLICT_COUNT = 100;
311    static constexpr int INITIAL_CACHE_NUMBER = 0x10;
312    static constexpr int DEFAULT_CACHE_NUMBER = 0x1000;
313    static constexpr int CACHE_COUNT_INDEX = 0;
314    static constexpr int CACHE_HIT_COUNT_INDEX = 1;
315    static constexpr int LARGE_STRING_COUNT_INDEX = 2;
316    static constexpr int CONFLICT_COUNT_INDEX = 3;
317    static constexpr int STRING_LENGTH_THRESHOLD_INDEX = 4;
318    static constexpr int CACHE_LENGTH_INDEX = 5;
319    static constexpr int CACHE_TABLE_HEADER_SIZE = 6;
320    static constexpr int PATTERN_INDEX = 0;
321    static constexpr int FLAG_INDEX = 1;
322    static constexpr int INPUT_STRING_INDEX = 2;
323    static constexpr int LAST_INDEX_INPUT_INDEX = 3;
324    static constexpr int LAST_INDEX_INDEX = 4;
325    static constexpr int RESULT_REPLACE_INDEX = 5;
326    static constexpr int RESULT_SPLIT_INDEX = 6;
327    static constexpr int RESULT_MATCH_INDEX = 7;
328    static constexpr int RESULT_EXEC_INDEX = 8;
329    static constexpr int RESULT_INTERMEDIATE_REPLACE_INDEX = 9;
330    static constexpr int RESULT_TEST_INDEX = 10;
331    static constexpr int RESULT_SEARCH_INDEX = 11;
332    // Extend index used for saving an additional parameter to judge cached
333    static constexpr int EXTEND_INDEX = 12;
334    static constexpr int CAPTURE_SIZE = 13;
335    static constexpr int ENTRY_SIZE = 14;
336};
337
338class RegExpGlobalResult : public TaggedArray {
339public:
340    static RegExpGlobalResult *Cast(TaggedObject *object)
341    {
342        return reinterpret_cast<RegExpGlobalResult *>(object);
343    }
344    static JSTaggedValue CreateGlobalResultTable(JSThread *thread);
345
346    void SetCapture(JSThread *thread, int index, JSTaggedValue value)
347    {
348        ASSERT(CAPTURE_START_INDEX + index - 1 < GLOBAL_TABLE_SIZE);
349        Set(thread, CAPTURE_START_INDEX + index - 1, value);
350    }
351
352    void ResetDollar(JSThread *thread)
353    {
354        for (uint32_t i = 0; i < DOLLAR_NUMBER; i++) {
355            Set(thread, CAPTURE_START_INDEX + i, JSTaggedValue::Hole());
356        }
357    }
358
359    template <int N>
360    static JSTaggedValue GetCapture(JSThread *thread);
361
362    void SetTotalCaptureCounts(JSThread *thread, JSTaggedValue counts)
363    {
364        Set(thread, TOTAL_CAPTURE_COUNTS_INDEX, counts);
365    }
366
367    JSTaggedValue GetTotalCaptureCounts()
368    {
369        return Get(TOTAL_CAPTURE_COUNTS_INDEX);
370    }
371
372    void SetEndIndex(JSThread *thread, JSTaggedValue endIndex)
373    {
374        Set(thread, END_INDEX, endIndex);
375    }
376
377    JSTaggedValue GetEndIndex()
378    {
379        return Get(END_INDEX);
380    }
381
382    void SetInputString(JSThread *thread, JSTaggedValue string)
383    {
384        Set(thread, INPUT_STRING_INDEX, string);
385    }
386
387    JSTaggedValue GetInputString()
388    {
389        return Get(INPUT_STRING_INDEX);
390    }
391
392    void SetStartOfCaptureIndex(JSThread *thread, uint32_t index, JSTaggedValue value)
393    {
394        Set(thread, FIRST_CAPTURE_INDEX + index * 2, value); // 2 : double
395    }
396
397    void SetEndOfCaptureIndex(JSThread *thread, uint32_t index, JSTaggedValue value)
398    {
399        Set(thread, FIRST_CAPTURE_INDEX + index * 2 + 1, value); // 2 : double
400    }
401
402    JSTaggedValue GetStartOfCaptureIndex(uint32_t index)
403    {
404        return Get(FIRST_CAPTURE_INDEX + index * 2); // 2 : double
405    }
406
407    JSTaggedValue GetEndOfCaptureIndex(uint32_t index)
408    {
409        return Get(FIRST_CAPTURE_INDEX + index * 2 + 1); // 2 : double
410    }
411
412    static JSHandle<RegExpGlobalResult> GrowCapturesCapacity(JSThread *thread,
413        JSHandle<RegExpGlobalResult>result, uint32_t length);
414
415    static constexpr int FIRST_CAPTURE_INDEX = 12;  // capture index starts here
416
417private:
418    static constexpr int GLOBAL_TABLE_SIZE = 12; // initial length
419    static constexpr int DOLLAR_NUMBER = 9;
420    static constexpr int CAPTURE_START_INDEX = 0;
421
422    static constexpr int TOTAL_CAPTURE_COUNTS_INDEX = 9;  // save total capture size
423    static constexpr int INPUT_STRING_INDEX = 10; // save input string
424    static constexpr int END_INDEX = 11; // save last index
425    static constexpr int INITIAL_CAPTURE_INDICES = 18;  // length: pairs of capture start index and end index
426};
427}  // namespace panda::ecmascript::builtins
428#endif  // ECMASCRIPT_BUILTINS_BUILTINS_REGEXP_H
429