1/*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include "ecmascript/regexp/regexp_opcode.h"
17
18#include "ecmascript/regexp/regexp_executor.h"
19
20namespace panda::ecmascript {
21using CaptureState = RegExpExecutor::CaptureState;
22
23static SaveStartOpCode g_saveStartOpcode = SaveStartOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
24static SaveEndOpCode g_saveEndOpcode = SaveEndOpCode();        // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
25static CharOpCode g_charOpcode = CharOpCode();                 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
26static GotoOpCode g_gotoOpcode = GotoOpCode();                 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
27static SplitNextOpCode g_splitNextOpcode = SplitNextOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
28static SplitFirstOpCode g_splitFirstOpcode =
29    SplitFirstOpCode();                            // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
30static MatchOpCode g_matchOpcode = MatchOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
31static LoopOpCode g_loopOpcode = LoopOpCode();     // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
32static LoopGreedyOpCode g_loopGreedyOpcode =
33    LoopGreedyOpCode();                                        // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
34static PushCharOpCode g_pushCharOpcode = PushCharOpCode();     // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
35static CheckCharOpCode g_checkCharOpcode = CheckCharOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
36static PushOpCode g_pushOpcode = PushOpCode();                 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
37static PopOpCode g_popOpcode = PopOpCode();                    // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
38static SaveResetOpCode g_saveResetOpcode = SaveResetOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
39static LineStartOpCode g_lineStartOpcode = LineStartOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
40static LineEndOpCode g_lineEndOpcode = LineEndOpCode();        // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
41static WordBoundaryOpCode g_wordBoundaryOpcode =
42    WordBoundaryOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
43static NotWordBoundaryOpCode g_notWordBoundaryOpcode =
44    NotWordBoundaryOpCode();                    // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
45static AllOpCode g_allOpcode = AllOpCode();     // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
46static DotsOpCode g_dotsOpcode = DotsOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
47static MatchAheadOpCode g_matchAheadOpcode =
48    MatchAheadOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
49static NegativeMatchAheadOpCode g_negativeMatchAheadOpcode =
50    NegativeMatchAheadOpCode();                             // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
51static MatchEndOpCode g_matchEndOpcode = MatchEndOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
52static PrevOpCode g_prevOpcode = PrevOpCode();              // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
53static RangeOpCode g_rangeOpcode = RangeOpCode();           // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
54static BackReferenceOpCode g_backreferenceOpcode =
55    BackReferenceOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
56static BackwardBackReferenceOpCode g_backwardBackreferenceOpcode =
57    BackwardBackReferenceOpCode();                       // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
58static Char32OpCode g_char32Opcode = Char32OpCode();     // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
59static Range32OpCode g_range32Opcode = Range32OpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
60static SparseOpCode g_sparseOpcode = SparseOpCode();     // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
61// NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
62static std::vector<RegExpOpCode *> g_intrinsicSet = {
63    &g_saveStartOpcode,
64    &g_saveEndOpcode,
65    &g_charOpcode,
66    &g_gotoOpcode,
67    &g_splitFirstOpcode,
68    &g_splitNextOpcode,
69    &g_negativeMatchAheadOpcode,
70    &g_matchAheadOpcode,
71    &g_matchOpcode,
72    &g_loopOpcode,
73    &g_loopGreedyOpcode,
74    &g_pushCharOpcode,
75    &g_checkCharOpcode,
76    &g_pushOpcode,
77    &g_popOpcode,
78    &g_saveResetOpcode,
79    &g_lineStartOpcode,
80    &g_lineEndOpcode,
81    &g_wordBoundaryOpcode,
82    &g_notWordBoundaryOpcode,
83    &g_allOpcode,
84    &g_dotsOpcode,
85    &g_matchEndOpcode,
86    &g_prevOpcode,
87    &g_rangeOpcode,
88    &g_backreferenceOpcode,
89    &g_backwardBackreferenceOpcode,
90    &g_char32Opcode,
91    &g_range32Opcode,
92    &g_sparseOpcode,
93};
94
95RegExpOpCode::RegExpOpCode(uint8_t opCode, int size) : opCode_(opCode), size_(size) {}
96
97/* static */
98RegExpOpCode *RegExpOpCode::GetRegExpOpCode(const DynChunk &buf, int pc)
99{
100    uint8_t opCode = buf.GetU8(pc);
101    ASSERT_PRINT(opCode <= g_intrinsicSet.size(), "invalid op code");
102    return g_intrinsicSet.at(opCode);
103}
104
105/* static */
106RegExpOpCode *RegExpOpCode::GetRegExpOpCode(uint8_t opCode)
107{
108    ASSERT_PRINT(opCode <= g_intrinsicSet.size(), "invalid op code");
109    return g_intrinsicSet.at(opCode);
110}
111
112/* static */
113void RegExpOpCode::DumpRegExpOpCode(std::ostream &out, const DynChunk &buf)
114{
115    out << "OpCode:\t" << std::endl;
116    uint32_t pc = RegExpParser::OP_START_OFFSET;
117    do {
118        RegExpOpCode *byteCode = GetRegExpOpCode(buf, pc);
119        pc = byteCode->DumpOpCode(out, buf, pc);
120    } while (pc < buf.size_);
121}
122
123uint32_t SaveStartOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
124{
125    auto capture = static_cast<uint8_t>(para & 0xffU);  // NOLINTNEXTLINE(readability-magic-numbers)
126    buf->EmitChar(GetOpCode());
127    buf->EmitChar(capture);
128    return GetDynChunkfSize(*buf);
129}
130
131uint32_t SaveStartOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
132{
133    out << offset << ":\t"
134        << "save_start\t" << buf.GetU8(offset + 1) << std::endl;
135    return offset + GetSize();
136}
137
138uint32_t SaveEndOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
139{
140    auto capture = static_cast<uint8_t>(para & 0xffU);  // NOLINTNEXTLINE(readability-magic-numbers)
141    buf->EmitChar(GetOpCode());
142    buf->EmitChar(capture);
143    return GetDynChunkfSize(*buf);
144}
145
146uint32_t SaveEndOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
147{
148    out << offset << ":\t"
149        << "save_end\t" << buf.GetU8(offset + 1) << std::endl;
150    return offset + GetSize();
151}
152
153uint32_t CharOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
154{
155    auto paraChar = static_cast<uint16_t>(para & 0xffffU);  // NOLINTNEXTLINE(readability-magic-numbers)
156    buf->EmitChar(GetOpCode());
157    buf->EmitU16(paraChar);
158    return GetDynChunkfSize(*buf);
159}
160
161uint32_t CharOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
162{
163    out << offset << ":\t"
164        << "char\t" << static_cast<char>(buf.GetU16(offset + 1)) << std::endl;
165    return offset + GetSize();
166}
167
168uint32_t Char32OpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
169{
170    buf->EmitChar(GetOpCode());
171    buf->EmitU32(para);
172    return GetDynChunkfSize(*buf);
173}
174
175uint32_t Char32OpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
176{
177    out << offset << ":\t"
178        << "char32\t" << static_cast<char>(buf.GetU32(offset + 1)) << std::endl;
179    return offset + GetSize();
180}
181
182uint32_t GotoOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
183{
184    buf->EmitChar(GetOpCode());
185    buf->EmitU32(para);
186    return GetDynChunkfSize(*buf);
187}
188
189void GotoOpCode::UpdateOpPara(DynChunk *buf, uint32_t offset, uint32_t para) const
190{
191    buf->PutU32(offset + 1, para);
192}
193
194uint32_t GotoOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
195{
196    out << offset << ":\t"
197        << "goto\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
198    return offset + GetSize();
199}
200
201uint32_t SplitNextOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
202{
203    buf->Insert(offset, GetSize());
204    buf->PutU8(offset, GetOpCode());
205    buf->PutU32(offset + 1, para);
206    return GetDynChunkfSize(*buf);
207}
208
209uint32_t SplitNextOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
210{
211    buf->EmitChar(GetOpCode());
212    buf->EmitU32(para);
213    return GetDynChunkfSize(*buf);
214}
215
216uint32_t SplitNextOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
217{
218    out << offset << ":\t"
219        << "split_next\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
220    return offset + GetSize();
221}
222
223uint32_t SplitFirstOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
224{
225    buf->Insert(offset, GetSize());
226    buf->PutU8(offset, GetOpCode());
227    buf->PutU32(offset + 1, para);
228    return GetDynChunkfSize(*buf);
229}
230
231uint32_t SplitFirstOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
232{
233    buf->EmitChar(GetOpCode());
234    buf->EmitU32(para);
235    return GetDynChunkfSize(*buf);
236}
237
238uint32_t SplitFirstOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
239{
240    out << offset << ":\t"
241        << "split_first\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
242    return offset + GetSize();
243}
244
245uint32_t LoopOpCode::EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const
246{
247    buf->EmitChar(GetOpCode());
248    buf->EmitU32(start);
249    buf->EmitU32(min);
250    buf->EmitU32(max);
251    return GetDynChunkfSize(*buf);
252}
253
254uint32_t LoopOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
255{
256    out << offset << ":\t"
257        << "loop\t" << buf.GetU32(offset + 1) + offset + GetSize() << "\t"
258        << buf.GetU32(offset + RegExpOpCode::OP_SIZE_FIVE) << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_NINE)
259        << std::endl;
260    return offset + GetSize();
261}
262
263uint32_t LoopGreedyOpCode::EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const
264{
265    buf->EmitChar(GetOpCode());
266    buf->EmitU32(start);
267    buf->EmitU32(min);
268    buf->EmitU32(max);
269    return GetDynChunkfSize(*buf);
270}
271
272uint32_t LoopGreedyOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
273{
274    out << offset << ":\t"
275        << "greedy_loop\t" << buf.GetU32(offset + 1) + offset + GetSize() << "\t"
276        << buf.GetU32(offset + RegExpOpCode::OP_SIZE_FIVE) << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_NINE)
277        << std::endl;
278    return offset + GetSize();
279}
280
281uint32_t PushCharOpCode::InsertOpCode(DynChunk *buf, uint32_t offset) const
282{
283    buf->Insert(offset, GetSize());
284    buf->PutU8(offset, GetOpCode());
285    return GetDynChunkfSize(*buf);
286}
287
288uint32_t PushCharOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
289{
290    out << offset << ":\t"
291        << "push_char" << std::endl;
292    return offset + GetSize();
293}
294
295uint32_t PushOpCode::InsertOpCode(DynChunk *buf, uint32_t offset) const
296{
297    buf->Insert(offset, GetSize());
298    buf->PutU8(offset, GetOpCode());
299    return GetDynChunkfSize(*buf);
300}
301
302uint32_t PushOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
303{
304    out << offset << ":\t"
305        << "push" << std::endl;
306    return offset + GetSize();
307}
308
309uint32_t PopOpCode::EmitOpCode(DynChunk *buf) const
310{
311    buf->EmitChar(GetOpCode());
312    return GetDynChunkfSize(*buf);
313}
314
315uint32_t PopOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
316{
317    out << offset << ":\t"
318        << "pop" << std::endl;
319    return offset + GetSize();
320}
321
322uint32_t CheckCharOpCode::EmitOpCode(DynChunk *buf, uint32_t offset) const
323{
324    buf->EmitChar(GetOpCode());
325    buf->EmitU32(offset);
326    return GetDynChunkfSize(*buf);
327}
328
329uint32_t CheckCharOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
330{
331    out << offset << ":\t"
332        << "check_char\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
333    return offset + GetSize();
334}
335
336uint32_t SaveResetOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t start, uint32_t end) const
337{
338    auto captureStart = static_cast<uint8_t>(start & 0xffU);  // NOLINTNEXTLINE(readability-magic-numbers)
339    auto captureEnd = static_cast<uint8_t>(end & 0xffU);      // NOLINTNEXTLINE(readability-magic-numbers)
340    buf->Insert(offset, GetSize());
341    buf->PutU8(offset, GetOpCode());
342    buf->PutU8(offset + RegExpOpCode::OP_SIZE_ONE, captureStart);
343    buf->PutU8(offset + RegExpOpCode::OP_SIZE_TWO, captureEnd);
344    return GetDynChunkfSize(*buf);
345}
346
347uint32_t SaveResetOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
348{
349    out << offset << ":\t"
350        << "save_reset\t" << buf.GetU8(offset + RegExpOpCode::OP_SIZE_ONE) << "\t"
351        << buf.GetU8(offset + RegExpOpCode::OP_SIZE_TWO) << std::endl;
352    return offset + GetSize();
353}
354
355uint32_t MatchOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
356{
357    buf->EmitChar(GetOpCode());
358    return GetDynChunkfSize(*buf);
359}
360
361uint32_t MatchOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
362{
363    out << offset << ":\t"
364        << "match" << std::endl;
365    return offset + GetSize();
366}
367
368uint32_t MatchEndOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
369{
370    buf->EmitChar(GetOpCode());
371    return GetDynChunkfSize(*buf);
372}
373
374uint32_t MatchEndOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
375{
376    out << offset << ":\t"
377        << "match_end" << std::endl;
378    return offset + GetSize();
379}
380
381uint32_t LineStartOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
382{
383    buf->EmitChar(GetOpCode());
384    return GetDynChunkfSize(*buf);
385}
386
387uint32_t LineStartOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
388{
389    out << offset << ":\t"
390        << "line_start" << std::endl;
391    return offset + GetSize();
392}
393
394uint32_t LineEndOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
395{
396    buf->EmitChar(GetOpCode());
397    return GetDynChunkfSize(*buf);
398}
399
400uint32_t LineEndOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
401{
402    out << offset << ":\t"
403        << "line_end" << std::endl;
404    return offset + GetSize();
405}
406
407uint32_t WordBoundaryOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
408{
409    buf->EmitChar(GetOpCode());
410    return GetDynChunkfSize(*buf);
411}
412
413uint32_t WordBoundaryOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
414{
415    out << offset << ":\t"
416        << "word_boundary" << std::endl;
417    return offset + GetSize();
418}
419
420uint32_t NotWordBoundaryOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
421{
422    buf->EmitChar(GetOpCode());
423    return GetDynChunkfSize(*buf);
424}
425
426uint32_t NotWordBoundaryOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf,
427                                           uint32_t offset) const
428{
429    out << offset << ":\t"
430        << "not_word_boundary" << std::endl;
431    return offset + GetSize();
432}
433
434uint32_t AllOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
435{
436    buf->EmitChar(GetOpCode());
437    return GetDynChunkfSize(*buf);
438}
439
440uint32_t AllOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
441{
442    out << offset << ":\t"
443        << "all" << std::endl;
444    return offset + GetSize();
445}
446
447uint32_t DotsOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
448{
449    buf->EmitChar(GetOpCode());
450    return GetDynChunkfSize(*buf);
451}
452
453uint32_t DotsOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
454{
455    out << offset << ":\t"
456        << "dots" << std::endl;
457    return offset + GetSize();
458}
459
460uint32_t MatchAheadOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
461{
462    out << offset << ":\t"
463        << "match_ahead\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
464    return offset + GetSize();
465}
466
467uint32_t RangeOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
468{
469    out << offset << ":\t"
470        << "range\t";
471    size_t size = buf.GetU16(offset + 1);
472    for (size_t i = 0; i < size; i++) {
473        out << buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_FOUR)) << "\t"
474            << buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE +
475                          (i * RegExpOpCode::OP_SIZE_FOUR + RegExpOpCode::OP_SIZE_TWO))
476            << "\t";
477    }
478    out << std::endl;
479    return offset + size * RegExpOpCode::OP_SIZE_FOUR + RegExpOpCode::OP_SIZE_THREE;
480}
481
482uint32_t RangeOpCode::InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const
483{
484    buf->EmitChar(GetOpCode());
485    size_t size = rangeSet.rangeSet_.size();
486    buf->EmitU16(size);
487    for (auto range : rangeSet.rangeSet_) {
488        buf->EmitU16(range.first);
489        buf->EmitU16(range.second);
490    }
491    return GetDynChunkfSize(*buf);
492}
493
494uint32_t Range32OpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
495{
496    out << offset << ":\t"
497        << "range32\t";
498    size_t size = buf.GetU16(offset + 1);
499    for (size_t i = 0; i < size; i++) {
500        out << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_EIGHT)) << "\t"
501            << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE +
502                          (i * RegExpOpCode::OP_SIZE_EIGHT + RegExpOpCode::OP_SIZE_FOUR))
503            << "\t";
504    }
505    out << std::endl;
506    return offset + size * +RegExpOpCode::OP_SIZE_EIGHT + RegExpOpCode::OP_SIZE_THREE;
507}
508
509uint32_t Range32OpCode::InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const
510{
511    buf->EmitChar(GetOpCode());
512    size_t size = rangeSet.rangeSet_.size();
513    buf->EmitU16(size);
514    for (auto range : rangeSet.rangeSet_) {
515        buf->EmitU32(range.first);
516        buf->EmitU32(range.second);
517    }
518    return GetDynChunkfSize(*buf);
519}
520
521uint32_t SparseOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
522{
523    out << offset << ":\t"
524        << "sparse\t";
525    size_t size = buf.GetU16(offset + 1);
526    for (size_t i = 0; i < size; i++) {
527        out << static_cast<char>(buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_SIX)))
528            << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE +
529                          (i * RegExpOpCode::OP_SIZE_SIX + RegExpOpCode::OP_SIZE_TWO)) +
530                          offset + size * RegExpOpCode::OP_SIZE_SIX + RegExpOpCode::OP_SIZE_THREE
531            << "\t";
532    }
533    out << std::endl;
534    return offset + size * RegExpOpCode::OP_SIZE_SIX + RegExpOpCode::OP_SIZE_THREE;
535}
536
537uint32_t MatchAheadOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
538{
539    buf->Insert(offset, GetSize());
540    buf->PutU8(offset, GetOpCode());
541    buf->PutU32(offset + 1, para);
542    return GetDynChunkfSize(*buf);
543}
544
545uint32_t NegativeMatchAheadOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
546{
547    out << offset << ":\t"
548        << "negative_match_ahead\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
549    return offset + GetSize();
550}
551
552uint32_t NegativeMatchAheadOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
553{
554    buf->Insert(offset, GetSize());
555    buf->PutU8(offset, GetOpCode());
556    buf->PutU32(offset + 1, para);
557    return GetDynChunkfSize(*buf);
558}
559
560uint32_t PrevOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
561{
562    buf->EmitChar(GetOpCode());
563    return GetDynChunkfSize(*buf);
564}
565
566uint32_t PrevOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
567{
568    out << offset << ":\t"
569        << "prev" << std::endl;
570    return offset + GetSize();
571}
572
573uint32_t BackReferenceOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
574{
575    auto capture = static_cast<uint8_t>(para & 0xffU);  // NOLINTNEXTLINE(readability-magic-numbers)
576    buf->EmitChar(GetOpCode());
577    buf->EmitChar(capture);
578    return GetDynChunkfSize(*buf);
579}
580
581uint32_t BackReferenceOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
582{
583    out << offset << ":\t"
584        << "backreference\t" << buf.GetU8(offset + 1) << std::endl;
585    return offset + GetSize();
586}
587
588uint32_t BackwardBackReferenceOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
589{
590    auto capture = static_cast<uint8_t>(para & 0xffU);  // NOLINTNEXTLINE(readability-magic-numbers)
591    buf->EmitChar(GetOpCode());
592    buf->EmitChar(capture);
593    return GetDynChunkfSize(*buf);
594}
595
596uint32_t BackwardBackReferenceOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
597{
598    out << offset << ":\t"
599        << "backward_backreference\t" << buf.GetU8(offset + 1) << std::endl;
600    return offset + GetSize();
601}
602
603void RangeSet::Insert(uint32_t start, uint32_t end)
604{
605    if (start > end) {
606        return;
607    }
608    std::pair<uint32_t, uint32_t> pairElement = std::make_pair(start, end);
609    if (rangeSet_.empty()) {
610        rangeSet_.emplace_back(pairElement);
611    } else {
612        for (auto iter = rangeSet_.begin(); iter != rangeSet_.end(); iter++) {
613            if (IsIntersect(start, end, iter->first, iter->second) ||
614                IsAdjacent(start, end, iter->first, iter->second)) {
615                iter->first = std::min(iter->first, start);
616                iter->second = std::max(iter->second, end);
617                Compress();
618                return;
619            }
620            if (iter->first > end) {
621                rangeSet_.insert(iter, pairElement);
622                return;
623            }
624        }
625        rangeSet_.emplace_back(pairElement);
626    }
627}
628// if RangeResult cross-intersects with [a, z] and [A, Z],
629// we capitalize the intersection part and insert into RangeResult.
630void RangeSet::Inter(RangeSet &cr, const RangeSet &s1)
631{
632    if (s1.rangeSet_.empty()) {
633        rangeSet_.clear();
634        return;
635    }
636    if (rangeSet_.empty()) {
637        return;
638    }
639    for (const auto &interItem : s1.rangeSet_) {
640        uint32_t firstMax = 0;
641        uint32_t secondMin = 0;
642        for (const auto &range : rangeSet_) {
643            if (range.first >= interItem.first) {
644                firstMax = range.first;
645            } else {
646                firstMax = interItem.first;
647            }
648            if (range.second >= interItem.second) {
649                secondMin = interItem.second;
650            } else {
651                secondMin = range.second;
652            }
653            if (secondMin < firstMax) {
654                continue;
655            }
656            if (firstMax >= 'a' && firstMax <= 'z') {
657                cr.Insert(firstMax + 'A' - 'a', secondMin + 'A' - 'a');
658            }
659            if (firstMax >= 'A' && firstMax <= 'Z') {
660                cr.Insert(firstMax - 'A' + 'a', secondMin - 'A' + 'a');
661            }
662        }
663    }
664}
665void RangeSet::Insert(const RangeSet &s1)
666{
667    if (s1.rangeSet_.empty()) {
668        return;
669    }
670    if (rangeSet_.empty()) {
671        rangeSet_ = s1.rangeSet_;
672    } else {
673        for (auto range : s1.rangeSet_) {
674            Insert(range.first, range.second);
675        }
676        Compress();
677    }
678}
679
680void RangeSet::Invert(bool isUtf16)
681{
682    uint32_t maxValue = isUtf16 ? UINT32_MAX : UINT16_MAX;
683    if (rangeSet_.empty()) {
684        rangeSet_.emplace_back(std::make_pair(0, maxValue));
685        return;
686    }
687
688    auto iter = rangeSet_.begin();
689    auto iter2 = rangeSet_.begin();
690    if (iter->first == 0 && iter->second == maxValue) {
691        rangeSet_.clear();
692        return;
693    }
694    iter2++;
695
696    uint32_t first = iter->first;
697
698    for (iter = rangeSet_.begin(); iter != rangeSet_.end(); iter++) {
699        if (iter->second == maxValue) {
700            rangeSet_.erase(iter);
701            break;
702        }
703        iter->first = iter->second + 1;
704        if (iter2 != rangeSet_.end()) {
705            iter->second = iter2->first - 1;
706            iter2++;
707        } else {
708            iter->second = maxValue;
709        }
710    }
711    if (first > 0) {
712        std::pair<uint32_t, uint32_t> pair1 = std::make_pair(0, first - 1);
713        rangeSet_.push_front(pair1);
714    }
715    Compress();
716}
717
718void RangeSet::Compress()
719{
720    auto iter = rangeSet_.begin();
721    auto iter2 = rangeSet_.begin();
722    iter2++;
723    while (iter2 != rangeSet_.end()) {
724        if (IsIntersect(iter->first, iter->second, iter2->first, iter2->second) ||
725            IsAdjacent(iter->first, iter->second, iter2->first, iter2->second)) {
726            iter->first = std::min(iter->first, iter2->first);
727            iter->second = std::max(iter->second, iter2->second);
728            iter2 = rangeSet_.erase(iter2);
729        } else {
730            iter++;
731            iter2++;
732        }
733    }
734}
735}  // namespace panda::ecmascript
736