1/*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#ifndef ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
17#define ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
18
19#include "ecmascript/builtins/builtins_regexp.h"
20#include "ecmascript/regexp/regexp_parser.h"
21#include "ecmascript/mem/regexp_cached_chunk.h"
22#include "ecmascript/js_handle.h"
23
24namespace panda::ecmascript {
25class RegExpExecutor {
26public:
27    struct CaptureState {
28        const uint8_t *captureStart;
29        const uint8_t *captureEnd;
30    };
31
32    enum StateType : uint8_t {
33        STATE_SPLIT = 0, /* Do not re-order. */
34        STATE_NEGATIVE_MATCH_AHEAD, /* OP_NEGATIVE_MATCH_AHEAD  - OP_SPLIT_NEXT */
35        STATE_MATCH_AHEAD, /* OP_MATCH_AHEAD  - OP_SPLIT_NEXT */
36        STATE_SAVE,
37        STATE_PUSH,
38        STATE_POP,
39        STATE_SET,
40        STATE_INVALID,
41    };
42
43    struct RegExpState {
44        StateType type_ = STATE_SPLIT;
45        uint32_t currentPc_ = 0;
46        const uint8_t *currentPtr_ = nullptr;
47    };
48
49    explicit RegExpExecutor(RegExpCachedChunk *chunk) : chunk_(chunk)
50    {
51        ASSERT(chunk_ != nullptr);
52    };
53
54    ~RegExpExecutor() = default;
55
56    NO_COPY_SEMANTIC(RegExpExecutor);
57    NO_MOVE_SEMANTIC(RegExpExecutor);
58
59    bool Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar = false);
60
61    bool ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd);
62    inline bool HandleFirstSplit()
63    {
64        if (GetCurrentPC() == RegExpParser::OP_START_OFFSET && stateStackLen_ == 0 &&
65            (flags_ & RegExpParser::FLAG_STICKY) == 0) {
66            if (IsEOF()) {
67                if (MatchFailed()) {
68                    return false;
69                }
70            } else if (prefilter_ && !isWideChar_) {
71                ++currentPtr_;
72                currentPtr_ = (const uint8_t *)memchr(currentPtr_, prefilter_, inputEnd_ - currentPtr_);
73                if (currentPtr_ == nullptr) {
74                    currentPtr_ = inputEnd_;
75                }
76                PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET);
77            } else {
78                AdvanceCurrentPtr();
79                PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET);
80            }
81        }
82        return true;
83    }
84
85    inline bool HandleOpAll(uint8_t opCode)
86    {
87        if (IsEOF()) {
88            return !MatchFailed();
89        }
90        uint32_t currentChar = GetCurrentChar();
91        if ((opCode == RegExpOpCode::OP_DOTS) && IsTerminator(currentChar)) {
92            return !MatchFailed();
93        }
94        Advance(opCode);
95        return true;
96    }
97
98    inline bool HandleOpChar(const DynChunk &byteCode, uint8_t opCode)
99    {
100        uint32_t expectedChar = 0;
101        if (opCode == RegExpOpCode::OP_CHAR32) {
102            expectedChar = byteCode.GetU32(GetCurrentPC() + 1);
103        } else {
104            expectedChar = byteCode.GetU16(GetCurrentPC() + 1);
105        }
106        if (IsEOF()) {
107            return !MatchFailed();
108        }
109        uint32_t currentChar = GetCurrentChar();
110        if (IsIgnoreCase()) {
111            currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16()));
112        }
113        if (currentChar == expectedChar) {
114            Advance(opCode);
115        } else {
116            if (MatchFailed()) {
117                return false;
118            }
119        }
120        return true;
121    }
122
123    inline bool HandleOpWordBoundary(uint8_t opCode)
124    {
125        bool preIsWord = false;
126        if (GetCurrentPtr() != input_) {
127            // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
128            preIsWord = IsWordChar(PeekPrevChar(currentPtr_, input_));
129        }
130        bool currentIsWord = !IsEOF() && IsWordChar(PeekChar(currentPtr_, inputEnd_));
131        if (((opCode == RegExpOpCode::OP_WORD_BOUNDARY) &&
132            ((!preIsWord && currentIsWord) || (preIsWord && !currentIsWord))) ||
133            ((opCode == RegExpOpCode::OP_NOT_WORD_BOUNDARY) &&
134            ((preIsWord && currentIsWord) || (!preIsWord && !currentIsWord)))) {
135            Advance(opCode);
136        } else {
137            if (MatchFailed()) {
138                return false;
139            }
140        }
141        return true;
142    }
143
144    inline bool HandleOpLineStart(uint8_t opCode)
145    {
146        if ((GetCurrentPtr() == input_) ||
147            // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
148            ((flags_ & RegExpParser::FLAG_MULTILINE) != 0 && PeekPrevChar(currentPtr_, input_) == '\n')) {
149            Advance(opCode);
150        } else {
151            if (MatchFailed()) {
152                return false;
153            }
154        }
155        return true;
156    }
157
158    inline bool HandleOpLineEnd(uint8_t opCode)
159    {
160        if (IsEOF() ||
161            // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
162            ((flags_ & RegExpParser::FLAG_MULTILINE) != 0
163             && (PeekChar(currentPtr_, inputEnd_) == '\n' || PeekChar(currentPtr_, inputEnd_) == '\r'))) {
164            Advance(opCode);
165        } else {
166            if (MatchFailed()) {
167                return false;
168            }
169        }
170        return true;
171    }
172
173    inline void HandleOpSaveStart(const DynChunk &byteCode, uint8_t opCode)
174    {
175        uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
176        ASSERT(captureIndex < nCapture_);
177        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
178        CaptureState *captureState = &captureResultList_[captureIndex];
179        // 2: Even indexes store captureStart. Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ...
180        PushRegExpState(STATE_SAVE, captureIndex * 2, reinterpret_cast<uintptr_t>(captureState->captureStart));
181        captureState->captureStart = GetCurrentPtr();
182        Advance(opCode);
183    }
184
185    inline void HandleOpSaveEnd(const DynChunk &byteCode, uint8_t opCode)
186    {
187        uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
188        ASSERT(captureIndex < nCapture_);
189        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
190        CaptureState *captureState = &captureResultList_[captureIndex];
191        // 2: Even indexes store captureStart. Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ...
192        PushRegExpState(STATE_SAVE, captureIndex * 2 + 1, reinterpret_cast<uintptr_t>(captureState->captureEnd));
193        captureState->captureEnd = GetCurrentPtr();
194        Advance(opCode);
195    }
196
197    inline void HandleOpSaveReset(const DynChunk &byteCode, uint8_t opCode)
198    {
199        uint32_t catpureStartIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_START);
200        uint32_t catpureEndIndex = byteCode.GetU8(GetCurrentPC() + SAVE_RESET_END);
201        for (uint32_t i = catpureStartIndex; i <= catpureEndIndex; i++) {
202            CaptureState *captureState =
203                &captureResultList_[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
204            // 2: Even indexes store captureStart.
205            // Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ...
206            PushRegExpState(STATE_SAVE, i * 2, reinterpret_cast<uintptr_t>(captureState->captureStart));
207            // 2: Even indexes store captureStart.
208            // Odd indexes store captureEnd. 0: start0, 1: end0, 2: start1, 3: end1, ...
209            PushRegExpState(STATE_SAVE, i * 2 + 1, reinterpret_cast<uintptr_t>(captureState->captureEnd));
210            captureState->captureStart = nullptr;
211            captureState->captureEnd = nullptr;
212        }
213        Advance(opCode);
214    }
215
216    inline void HandleOpMatch(const DynChunk &byteCode, uint8_t opCode)
217    {
218        auto type = static_cast<StateType>(opCode - RegExpOpCode::OP_SPLIT_NEXT);
219        ASSERT(type == STATE_SPLIT || type == STATE_MATCH_AHEAD || type == STATE_NEGATIVE_MATCH_AHEAD);
220        uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
221        Advance(opCode);
222        uint32_t splitPc = GetCurrentPC() + offset;
223        PushRegExpState(type, splitPc);
224    }
225
226    inline void HandleOpSplitFirst(const DynChunk &byteCode, uint8_t opCode)
227    {
228        uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1);
229        Advance(opCode);
230        PushRegExpState(STATE_SPLIT, GetCurrentPC());
231        AdvanceOffset(offset);
232    }
233
234    inline bool HandleOpPrev(uint8_t opCode)
235    {
236        if (GetCurrentPtr() == input_) {
237            if (MatchFailed()) {
238                return false;
239            }
240        } else {
241            PrevPtr(&currentPtr_, input_);
242            Advance(opCode);
243        }
244        return true;
245    }
246
247    inline void HandleOpLoop(const DynChunk &byteCode, uint8_t opCode)
248    {
249        uint32_t quantifyMin = byteCode.GetU32(GetCurrentPC() + LOOP_MIN_OFFSET);
250        uint32_t quantifyMax = byteCode.GetU32(GetCurrentPC() + LOOP_MAX_OFFSET);
251        uint32_t pcOffset = byteCode.GetU32(GetCurrentPC() + LOOP_PC_OFFSET);
252        Advance(opCode);
253        uint32_t loopPcEnd = GetCurrentPC();
254        uint32_t loopPcStart = GetCurrentPC() + pcOffset;
255        bool isGreedy = opCode == RegExpOpCode::OP_LOOP_GREEDY;
256        uint32_t loopMax = isGreedy ? quantifyMax : quantifyMin;
257
258        uint32_t loopCount = PeekStack();
259        PushRegExpState(StateType::STATE_SET, 0, loopCount);
260        SetStackValue(++loopCount);
261        if (loopCount < loopMax) {
262            // greedy failed, goto next
263            if (loopCount >= quantifyMin) {
264                PushRegExpState(STATE_SPLIT, loopPcEnd);
265            }
266            // Goto loop start
267            SetCurrentPC(loopPcStart);
268        } else {
269            if (!isGreedy && (loopCount < quantifyMax)) {
270                PushRegExpState(STATE_SPLIT, loopPcStart);
271            }
272        }
273    }
274
275    inline bool HandleOpRange32(const DynChunk &byteCode)
276    {
277        if (IsEOF()) {
278            return !MatchFailed();
279        }
280        uint32_t currentChar = GetCurrentChar();
281        if (IsIgnoreCase()) {
282            currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16()));
283        }
284        uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1);
285        bool isFound = false;
286        int32_t idxMin = 0;
287        int32_t idxMax = static_cast<int32_t>(rangeCount) - 1;
288        int32_t idx = 0;
289        uint32_t low = 0;
290        uint32_t high =
291            byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET + idxMax * RANGE32_MAX_OFFSET +
292                            RANGE32_MAX_HALF_OFFSET);
293        if (currentChar <= high) {
294            while (idxMin <= idxMax) {
295                idx = (idxMin + idxMax) / RANGE32_OFFSET;
296                low = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET +  static_cast<uint32_t>(idx) *
297                    RANGE32_MAX_OFFSET);
298                high = byteCode.GetU32(GetCurrentPC() + RANGE32_HEAD_OFFSET +  static_cast<uint32_t>(idx) *
299                    RANGE32_MAX_OFFSET +
300                    RANGE32_MAX_HALF_OFFSET);
301                if (currentChar < low) {
302                    idxMax = idx - 1;
303                } else if (currentChar > high) {
304                    idxMin = idx + 1;
305                } else {
306                    isFound = true;
307                    break;
308                }
309            }
310        }
311        if (isFound) {
312            AdvanceOffset(rangeCount * RANGE32_MAX_OFFSET + RANGE32_HEAD_OFFSET);
313        } else {
314            if (MatchFailed()) {
315                return false;
316            }
317        }
318        return true;
319    }
320
321    inline bool HandleOpRange(const DynChunk &byteCode)
322    {
323        if (IsEOF()) {
324            return !MatchFailed();
325        }
326        uint32_t currentChar = GetCurrentChar();
327        uint16_t rangeCount = byteCode.GetU16(GetCurrentPC() + 1);
328        bool flag = IsFoundOpRange(GetCurrentPC(), currentChar, byteCode, rangeCount);
329        if (IsIgnoreCase() && !flag) {
330            currentChar = static_cast<uint32_t>(RegExpParser::GetcurrentCharNext(currentChar));
331            flag = IsFoundOpRange(GetCurrentPC(), currentChar, byteCode, rangeCount);
332        }
333        if (flag) {
334            AdvanceOffset(rangeCount * RANGE32_MAX_HALF_OFFSET + RANGE32_HEAD_OFFSET);
335        } else {
336            if (MatchFailed()) {
337                return false;
338            }
339        }
340        return true;
341    }
342
343    inline bool HandleOpSparse(const DynChunk &byteCode)
344    {
345        if (IsEOF()) {
346            return !MatchFailed();
347        }
348        uint32_t currentChar = GetCurrentChar();
349        if (IsIgnoreCase()) {
350            currentChar = static_cast<uint32_t>(RegExpParser::Canonicalize(currentChar, IsUtf16()));
351        }
352        uint16_t sparseCount = byteCode.GetU16(GetCurrentPC() + 1);
353        for (uint32_t i = 0; i < sparseCount; i++) {
354            uint32_t sparseChar = byteCode.GetU16(GetCurrentPC() + SPARSE_HEAD_OFFSET + i * SPARSE_MAX_OFFSET);
355            if (currentChar == sparseChar) {
356                uint32_t offset = byteCode.GetU32(GetCurrentPC() + SPARSE_HEAD_OFFSET + i * SPARSE_MAX_OFFSET +
357                    SPARSE_OFF_OFFSET);
358                AdvanceOffset(offset + sparseCount * SPARSE_MAX_OFFSET + SPARSE_HEAD_OFFSET);
359                return true;
360            }
361        }
362        return !MatchFailed();
363    }
364
365    inline bool HandleOpBackReference(const DynChunk &byteCode, uint8_t opCode)
366    {
367        uint32_t captureIndex = byteCode.GetU8(GetCurrentPC() + 1);
368        if (captureIndex >= nCapture_) {
369            return !MatchFailed();
370        }
371        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
372        const uint8_t *captureStart = captureResultList_[captureIndex].captureStart;
373        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
374        const uint8_t *captureEnd = captureResultList_[captureIndex].captureEnd;
375        if (captureStart == nullptr || captureEnd == nullptr) {
376            Advance(opCode);
377            return true;
378        }
379        bool isMatched = true;
380        if (opCode == RegExpOpCode::OP_BACKREFERENCE) {
381            const uint8_t *refCptr = captureStart;
382            while (refCptr < captureEnd) {
383                if (IsEOF()) {
384                    isMatched = false;
385                    break;
386                }
387                // NOLINTNEXTLINE(readability-identifier-naming)
388                uint32_t c1 = GetChar(&refCptr, captureEnd);
389                // NOLINTNEXTLINE(readability-identifier-naming)
390                uint32_t c2 = GetChar(&currentPtr_, inputEnd_);
391                if (IsIgnoreCase()) {
392                    c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16()));
393                    c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16()));
394                }
395                if (c1 != c2) {
396                    isMatched = false;
397                    break;
398                }
399            }
400            if (!isMatched) {
401                if (MatchFailed()) {
402                    return false;
403                }
404            } else {
405                Advance(opCode);
406            }
407        } else {
408            const uint8_t *refCptr = captureEnd;
409            while (refCptr > captureStart) {
410                if (GetCurrentPtr() == input_) {
411                    isMatched = false;
412                    break;
413                }
414                // NOLINTNEXTLINE(readability-identifier-naming)
415                uint32_t c1 = GetPrevChar(&refCptr, captureStart);
416                // NOLINTNEXTLINE(readability-identifier-naming)
417                uint32_t c2 = GetPrevChar(&currentPtr_, input_);
418                if (IsIgnoreCase()) {
419                    c1 = static_cast<uint32_t>(RegExpParser::Canonicalize(c1, IsUtf16()));
420                    c2 = static_cast<uint32_t>(RegExpParser::Canonicalize(c2, IsUtf16()));
421                }
422                if (c1 != c2) {
423                    isMatched = false;
424                    break;
425                }
426            }
427            if (!isMatched) {
428                if (MatchFailed()) {
429                    return false;
430                }
431            } else {
432                Advance(opCode);
433            }
434        }
435        return true;
436    }
437
438    inline void Advance(uint8_t opCode, uint32_t offset = 0)
439    {
440        currentPc_ += offset + static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(opCode)->GetSize());
441    }
442
443    inline void AdvanceOffset(uint32_t offset)
444    {
445        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
446        currentPc_ += offset;
447    }
448
449    inline uint32_t GetCurrentChar()
450    {
451        return GetChar(&currentPtr_, inputEnd_);
452    }
453
454    inline void AdvanceCurrentPtr()
455    {
456        AdvancePtr(&currentPtr_, inputEnd_);
457    }
458
459    uint32_t GetChar(const uint8_t **pp, const uint8_t *end) const
460    {
461        uint32_t c = 0;
462        const uint8_t *cptr = *pp;
463        if (!isWideChar_) {
464            c = *cptr;
465            *pp += 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
466        } else {
467            // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
468            uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
469            c = c1;
470            cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
471            if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) {
472                // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
473                c1 = *(reinterpret_cast<const uint16_t *>(cptr));
474                if (U16_IS_TRAIL(c1)) {
475                    c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
476                    cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
477                }
478            }
479            *pp = cptr;
480        }
481        return c;
482    }
483
484    uint32_t PeekChar(const uint8_t *p, const uint8_t *end) const
485    {
486        uint32_t c = 0;
487        const uint8_t *cptr = p;
488        if (!isWideChar_) {
489            c = *cptr;
490        } else {
491            // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
492            uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
493            c = c1;
494            cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
495            if (U16_IS_LEAD(c) && IsUtf16() && cptr < end) {
496                c1 = *(uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
497                if (U16_IS_TRAIL(c1)) {
498                    c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c, c1));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
499                }
500            }
501        }
502        return c;
503    }
504
505    void AdvancePtr(const uint8_t **pp, const uint8_t *end) const
506    {
507        const uint8_t *cptr = *pp;
508        if (!isWideChar_) {
509            *pp += 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
510        } else {
511            // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
512            uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
513            cptr += WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
514            if (U16_IS_LEAD(c1) && IsUtf16() && cptr < end) {
515                // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
516                c1 = *(reinterpret_cast<const uint16_t *>(cptr));
517                if (U16_IS_TRAIL(c1)) {
518                    cptr += WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
519                }
520            }
521            *pp = cptr;
522        }
523    }
524
525    uint32_t PeekPrevChar(const uint8_t *p, const uint8_t *start) const
526    {
527        uint32_t c = 0;
528        const uint8_t *cptr = p;
529        if (!isWideChar_) {
530            c = *(cptr - 1);  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
531        } else {
532            cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
533            // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
534            uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
535            c = c1;
536            if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) {
537                 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
538                c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
539                if (U16_IS_LEAD(c1)) {
540                    c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
541                }
542            }
543        }
544        return c;
545    }
546
547    uint32_t GetPrevChar(const uint8_t **pp, const uint8_t *start) const
548    {
549        uint32_t c = 0;
550        const uint8_t *cptr = *pp;
551        if (!isWideChar_) {
552            c = *(cptr - 1);  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
553            cptr -= 1;        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
554            *pp = cptr;
555        } else {
556            cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
557            // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
558            uint16_t c1 = *(reinterpret_cast<const uint16_t *>(cptr));
559            c = c1;
560            if (U16_IS_TRAIL(c) && IsUtf16() && cptr > start) {
561                // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
562                c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
563                if (U16_IS_LEAD(c1)) {
564                    c = static_cast<uint32_t>(U16_GET_SUPPLEMENTARY(c1, c));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
565                    cptr -= WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
566                }
567            }
568            *pp = cptr;
569        }
570        return c;
571    }
572
573    void PrevPtr(const uint8_t **pp, const uint8_t *start) const
574    {
575        const uint8_t *cptr = *pp;
576        if (!isWideChar_) {
577            cptr -= 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
578            *pp = cptr;
579        } else {
580            cptr -= WIDE_CHAR_SIZE;           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
581            uint16_t c1 = *(const uint16_t *)cptr;  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
582            if (U16_IS_TRAIL(c1) && IsUtf16() && cptr > start) {
583                // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
584                c1 = (reinterpret_cast<const uint16_t *>(cptr))[-1];
585                if (U16_IS_LEAD(c1)) {
586                    cptr -= WIDE_CHAR_SIZE;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
587                }
588            }
589            *pp = cptr;
590        }
591    }
592
593    bool MatchFailed(bool isMatched = false);
594
595    void SetCurrentPC(uint32_t pc)
596    {
597        currentPc_ = pc;
598    }
599
600    void SetCurrentPtr(const uint8_t *ptr)
601    {
602        currentPtr_ = ptr;
603    }
604
605    bool IsEOF() const
606    {
607        return currentPtr_ >= inputEnd_;
608    }
609
610    bool IsFoundOpRange(const uint32_t currentPc, const uint32_t nowChar,
611                        const DynChunk &byteCode, const uint16_t rangeCount)
612    {
613        bool isFound = false;
614        int32_t idxMin = 0;
615        int32_t idxMax = static_cast<int32_t>(rangeCount - 1);
616        int32_t idx = 0;
617        uint32_t low = 0;
618        uint32_t high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET +
619            static_cast<size_t>(idxMax) * RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET);
620        if (nowChar <= high) {
621            while (idxMin <= idxMax) {
622                idx = (idxMin + idxMax) / RANGE32_OFFSET;
623                low = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) *
624                    RANGE32_MAX_HALF_OFFSET);
625                high = byteCode.GetU16(currentPc + RANGE32_HEAD_OFFSET + static_cast<uint32_t>(idx) *
626                    RANGE32_MAX_HALF_OFFSET + RANGE32_OFFSET);
627                if (nowChar < low) {
628                    idxMax = idx - 1;
629                } else if (nowChar > high) {
630                    idxMin = idx + 1;
631                } else {
632                    isFound = true;
633                    break;
634                }
635            }
636        }
637        return isFound;
638    }
639
640    uint32_t GetCurrentPC() const
641    {
642        return currentPc_;
643    }
644
645    void PushStack(uintptr_t val)
646    {
647        ASSERT(currentStack_ < nStack_);
648        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
649        stack_[currentStack_++] = val;
650    }
651
652    void SetStackValue(uintptr_t val) const
653    {
654        ASSERT(currentStack_ >= 1);
655        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
656        stack_[currentStack_ - 1] = val;
657    }
658
659    uintptr_t PopStack()
660    {
661        ASSERT(currentStack_ >= 1);
662        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
663        return stack_[--currentStack_];
664    }
665
666    uintptr_t PeekStack() const
667    {
668        ASSERT(currentStack_ >= 1);
669        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
670        return stack_[currentStack_ - 1];
671    }
672
673    const uint8_t *GetCurrentPtr() const
674    {
675        return currentPtr_;
676    }
677
678    CaptureState *GetCaptureResultList() const
679    {
680        return captureResultList_;
681    }
682
683    void DumpResult(std::ostream &out) const;
684
685    void GetResult(JSThread *thread);
686
687    void PushRegExpState(StateType type, uint32_t pc);
688    void PushRegExpState(StateType type, uint32_t pc, uintptr_t ptr);
689
690    StateType PopRegExpState(bool copyCapture = true);
691
692    void DropRegExpState()
693    {
694        stateStackLen_--;
695    }
696
697    RegExpState *PeekRegExpState() const
698    {
699        ASSERT(stateStackLen_ >= 1);
700        return reinterpret_cast<RegExpState *>(
701            stateStack_ +  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
702            (stateStackLen_ - 1) * sizeof(RegExpState));
703    }
704
705    void ReAllocStack(uint32_t stackLen);
706
707    inline bool IsWordChar(uint8_t value) const
708    {
709        return ((value >= '0' && value <= '9') || (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') ||
710                (value == '_'));
711    }
712
713    inline bool IsTerminator(uint32_t value) const
714    {
715        // NOLINTNEXTLINE(readability-magic-numbers)
716        return (value == '\n' || value == '\r' || value == 0x2028 || value == 0x2029);
717    }
718
719    inline bool IsIgnoreCase() const
720    {
721        return (flags_ & RegExpParser::FLAG_IGNORECASE) != 0;
722    }
723
724    inline bool IsUtf16() const
725    {
726        return (flags_ & RegExpParser::FLAG_UTF16) != 0;
727    }
728
729private:
730    static constexpr size_t CHAR_SIZE = 1;
731    static constexpr size_t WIDE_CHAR_SIZE = 2;
732    static constexpr size_t SAVE_RESET_START = 1;
733    static constexpr size_t SAVE_RESET_END = 2;
734    static constexpr size_t LOOP_MIN_OFFSET = 5;
735    static constexpr size_t LOOP_MAX_OFFSET = 9;
736    static constexpr size_t LOOP_PC_OFFSET = 1;
737    static constexpr size_t RANGE32_HEAD_OFFSET = 3;
738    static constexpr size_t RANGE32_MAX_HALF_OFFSET = 4;
739    static constexpr size_t RANGE32_MAX_OFFSET = 8;
740    static constexpr size_t RANGE32_OFFSET = 2;
741    static constexpr size_t SPARSE_HEAD_OFFSET = 3;
742    static constexpr size_t SPARSE_OFF_OFFSET = 2;
743    static constexpr size_t SPARSE_MAX_OFFSET = 6;
744    static constexpr uint32_t STACK_MULTIPLIER = 2;
745    static constexpr uint32_t MIN_STACK_SIZE = 8;
746    static constexpr int TMP_BUF_SIZE = 128;
747    uint8_t *input_ = nullptr;
748    uint8_t *inputEnd_ = nullptr;
749    bool isWideChar_ = false;
750    uint16_t prefilter_ = 0;
751
752    uint32_t currentPc_ = 0;
753    const uint8_t *currentPtr_ = nullptr;
754    CaptureState *captureResultList_ = nullptr;
755    uintptr_t *stack_ = nullptr;
756    uint32_t currentStack_ = 0;
757
758    uint32_t nCapture_ = 0;
759    uint32_t nStack_ = 0;
760
761    uint32_t flags_ = 0;
762    uint32_t stateStackLen_ = 0;
763    uint32_t stateStackSize_ = 0;
764    uint8_t *stateStack_ = nullptr;
765    RegExpCachedChunk *chunk_ = nullptr;
766};
767}  // namespace panda::ecmascript
768#endif  // ECMASCRIPT_REGEXP_REGEXP_EXECUTOR_H
769