1/* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16#include "ecmascript/regexp/regexp_opcode.h" 17 18#include "ecmascript/regexp/regexp_executor.h" 19 20namespace panda::ecmascript { 21using CaptureState = RegExpExecutor::CaptureState; 22 23static SaveStartOpCode g_saveStartOpcode = SaveStartOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 24static SaveEndOpCode g_saveEndOpcode = SaveEndOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 25static CharOpCode g_charOpcode = CharOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 26static GotoOpCode g_gotoOpcode = GotoOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 27static SplitNextOpCode g_splitNextOpcode = SplitNextOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 28static SplitFirstOpCode g_splitFirstOpcode = 29 SplitFirstOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 30static MatchOpCode g_matchOpcode = MatchOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 31static LoopOpCode g_loopOpcode = LoopOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 32static LoopGreedyOpCode g_loopGreedyOpcode = 33 LoopGreedyOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 34static PushCharOpCode g_pushCharOpcode = PushCharOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 35static CheckCharOpCode g_checkCharOpcode = CheckCharOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 36static PushOpCode g_pushOpcode = PushOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 37static PopOpCode g_popOpcode = PopOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 38static SaveResetOpCode g_saveResetOpcode = SaveResetOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 39static LineStartOpCode g_lineStartOpcode = LineStartOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 40static LineEndOpCode g_lineEndOpcode = LineEndOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 41static WordBoundaryOpCode g_wordBoundaryOpcode = 42 WordBoundaryOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 43static NotWordBoundaryOpCode g_notWordBoundaryOpcode = 44 NotWordBoundaryOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 45static AllOpCode g_allOpcode = AllOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 46static DotsOpCode g_dotsOpcode = DotsOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 47static MatchAheadOpCode g_matchAheadOpcode = 48 MatchAheadOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 49static NegativeMatchAheadOpCode g_negativeMatchAheadOpcode = 50 NegativeMatchAheadOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 51static MatchEndOpCode g_matchEndOpcode = MatchEndOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 52static PrevOpCode g_prevOpcode = PrevOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 53static RangeOpCode g_rangeOpcode = RangeOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 54static BackReferenceOpCode g_backreferenceOpcode = 55 BackReferenceOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 56static BackwardBackReferenceOpCode g_backwardBackreferenceOpcode = 57 BackwardBackReferenceOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 58static Char32OpCode g_char32Opcode = Char32OpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 59static Range32OpCode g_range32Opcode = Range32OpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 60static SparseOpCode g_sparseOpcode = SparseOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 61// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) 62static std::vector<RegExpOpCode *> g_intrinsicSet = { 63 &g_saveStartOpcode, 64 &g_saveEndOpcode, 65 &g_charOpcode, 66 &g_gotoOpcode, 67 &g_splitFirstOpcode, 68 &g_splitNextOpcode, 69 &g_negativeMatchAheadOpcode, 70 &g_matchAheadOpcode, 71 &g_matchOpcode, 72 &g_loopOpcode, 73 &g_loopGreedyOpcode, 74 &g_pushCharOpcode, 75 &g_checkCharOpcode, 76 &g_pushOpcode, 77 &g_popOpcode, 78 &g_saveResetOpcode, 79 &g_lineStartOpcode, 80 &g_lineEndOpcode, 81 &g_wordBoundaryOpcode, 82 &g_notWordBoundaryOpcode, 83 &g_allOpcode, 84 &g_dotsOpcode, 85 &g_matchEndOpcode, 86 &g_prevOpcode, 87 &g_rangeOpcode, 88 &g_backreferenceOpcode, 89 &g_backwardBackreferenceOpcode, 90 &g_char32Opcode, 91 &g_range32Opcode, 92 &g_sparseOpcode, 93}; 94 95RegExpOpCode::RegExpOpCode(uint8_t opCode, int size) : opCode_(opCode), size_(size) {} 96 97/* static */ 98RegExpOpCode *RegExpOpCode::GetRegExpOpCode(const DynChunk &buf, int pc) 99{ 100 uint8_t opCode = buf.GetU8(pc); 101 ASSERT_PRINT(opCode <= g_intrinsicSet.size(), "invalid op code"); 102 return g_intrinsicSet.at(opCode); 103} 104 105/* static */ 106RegExpOpCode *RegExpOpCode::GetRegExpOpCode(uint8_t opCode) 107{ 108 ASSERT_PRINT(opCode <= g_intrinsicSet.size(), "invalid op code"); 109 return g_intrinsicSet.at(opCode); 110} 111 112/* static */ 113void RegExpOpCode::DumpRegExpOpCode(std::ostream &out, const DynChunk &buf) 114{ 115 out << "OpCode:\t" << std::endl; 116 uint32_t pc = RegExpParser::OP_START_OFFSET; 117 do { 118 RegExpOpCode *byteCode = GetRegExpOpCode(buf, pc); 119 pc = byteCode->DumpOpCode(out, buf, pc); 120 } while (pc < buf.size_); 121} 122 123uint32_t SaveStartOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const 124{ 125 auto capture = static_cast<uint8_t>(para & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers) 126 buf->EmitChar(GetOpCode()); 127 buf->EmitChar(capture); 128 return GetDynChunkfSize(*buf); 129} 130 131uint32_t SaveStartOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 132{ 133 out << offset << ":\t" 134 << "save_start\t" << buf.GetU8(offset + 1) << std::endl; 135 return offset + GetSize(); 136} 137 138uint32_t SaveEndOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const 139{ 140 auto capture = static_cast<uint8_t>(para & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers) 141 buf->EmitChar(GetOpCode()); 142 buf->EmitChar(capture); 143 return GetDynChunkfSize(*buf); 144} 145 146uint32_t SaveEndOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 147{ 148 out << offset << ":\t" 149 << "save_end\t" << buf.GetU8(offset + 1) << std::endl; 150 return offset + GetSize(); 151} 152 153uint32_t CharOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const 154{ 155 auto paraChar = static_cast<uint16_t>(para & 0xffffU); // NOLINTNEXTLINE(readability-magic-numbers) 156 buf->EmitChar(GetOpCode()); 157 buf->EmitU16(paraChar); 158 return GetDynChunkfSize(*buf); 159} 160 161uint32_t CharOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 162{ 163 out << offset << ":\t" 164 << "char\t" << static_cast<char>(buf.GetU16(offset + 1)) << std::endl; 165 return offset + GetSize(); 166} 167 168uint32_t Char32OpCode::EmitOpCode(DynChunk *buf, uint32_t para) const 169{ 170 buf->EmitChar(GetOpCode()); 171 buf->EmitU32(para); 172 return GetDynChunkfSize(*buf); 173} 174 175uint32_t Char32OpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 176{ 177 out << offset << ":\t" 178 << "char32\t" << static_cast<char>(buf.GetU32(offset + 1)) << std::endl; 179 return offset + GetSize(); 180} 181 182uint32_t GotoOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const 183{ 184 buf->EmitChar(GetOpCode()); 185 buf->EmitU32(para); 186 return GetDynChunkfSize(*buf); 187} 188 189void GotoOpCode::UpdateOpPara(DynChunk *buf, uint32_t offset, uint32_t para) const 190{ 191 buf->PutU32(offset + 1, para); 192} 193 194uint32_t GotoOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 195{ 196 out << offset << ":\t" 197 << "goto\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl; 198 return offset + GetSize(); 199} 200 201uint32_t SplitNextOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const 202{ 203 buf->Insert(offset, GetSize()); 204 buf->PutU8(offset, GetOpCode()); 205 buf->PutU32(offset + 1, para); 206 return GetDynChunkfSize(*buf); 207} 208 209uint32_t SplitNextOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const 210{ 211 buf->EmitChar(GetOpCode()); 212 buf->EmitU32(para); 213 return GetDynChunkfSize(*buf); 214} 215 216uint32_t SplitNextOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 217{ 218 out << offset << ":\t" 219 << "split_next\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl; 220 return offset + GetSize(); 221} 222 223uint32_t SplitFirstOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const 224{ 225 buf->Insert(offset, GetSize()); 226 buf->PutU8(offset, GetOpCode()); 227 buf->PutU32(offset + 1, para); 228 return GetDynChunkfSize(*buf); 229} 230 231uint32_t SplitFirstOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const 232{ 233 buf->EmitChar(GetOpCode()); 234 buf->EmitU32(para); 235 return GetDynChunkfSize(*buf); 236} 237 238uint32_t SplitFirstOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 239{ 240 out << offset << ":\t" 241 << "split_first\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl; 242 return offset + GetSize(); 243} 244 245uint32_t LoopOpCode::EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const 246{ 247 buf->EmitChar(GetOpCode()); 248 buf->EmitU32(start); 249 buf->EmitU32(min); 250 buf->EmitU32(max); 251 return GetDynChunkfSize(*buf); 252} 253 254uint32_t LoopOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 255{ 256 out << offset << ":\t" 257 << "loop\t" << buf.GetU32(offset + 1) + offset + GetSize() << "\t" 258 << buf.GetU32(offset + RegExpOpCode::OP_SIZE_FIVE) << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_NINE) 259 << std::endl; 260 return offset + GetSize(); 261} 262 263uint32_t LoopGreedyOpCode::EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const 264{ 265 buf->EmitChar(GetOpCode()); 266 buf->EmitU32(start); 267 buf->EmitU32(min); 268 buf->EmitU32(max); 269 return GetDynChunkfSize(*buf); 270} 271 272uint32_t LoopGreedyOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 273{ 274 out << offset << ":\t" 275 << "greedy_loop\t" << buf.GetU32(offset + 1) + offset + GetSize() << "\t" 276 << buf.GetU32(offset + RegExpOpCode::OP_SIZE_FIVE) << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_NINE) 277 << std::endl; 278 return offset + GetSize(); 279} 280 281uint32_t PushCharOpCode::InsertOpCode(DynChunk *buf, uint32_t offset) const 282{ 283 buf->Insert(offset, GetSize()); 284 buf->PutU8(offset, GetOpCode()); 285 return GetDynChunkfSize(*buf); 286} 287 288uint32_t PushCharOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const 289{ 290 out << offset << ":\t" 291 << "push_char" << std::endl; 292 return offset + GetSize(); 293} 294 295uint32_t PushOpCode::InsertOpCode(DynChunk *buf, uint32_t offset) const 296{ 297 buf->Insert(offset, GetSize()); 298 buf->PutU8(offset, GetOpCode()); 299 return GetDynChunkfSize(*buf); 300} 301 302uint32_t PushOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const 303{ 304 out << offset << ":\t" 305 << "push" << std::endl; 306 return offset + GetSize(); 307} 308 309uint32_t PopOpCode::EmitOpCode(DynChunk *buf) const 310{ 311 buf->EmitChar(GetOpCode()); 312 return GetDynChunkfSize(*buf); 313} 314 315uint32_t PopOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const 316{ 317 out << offset << ":\t" 318 << "pop" << std::endl; 319 return offset + GetSize(); 320} 321 322uint32_t CheckCharOpCode::EmitOpCode(DynChunk *buf, uint32_t offset) const 323{ 324 buf->EmitChar(GetOpCode()); 325 buf->EmitU32(offset); 326 return GetDynChunkfSize(*buf); 327} 328 329uint32_t CheckCharOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 330{ 331 out << offset << ":\t" 332 << "check_char\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl; 333 return offset + GetSize(); 334} 335 336uint32_t SaveResetOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t start, uint32_t end) const 337{ 338 auto captureStart = static_cast<uint8_t>(start & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers) 339 auto captureEnd = static_cast<uint8_t>(end & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers) 340 buf->Insert(offset, GetSize()); 341 buf->PutU8(offset, GetOpCode()); 342 buf->PutU8(offset + RegExpOpCode::OP_SIZE_ONE, captureStart); 343 buf->PutU8(offset + RegExpOpCode::OP_SIZE_TWO, captureEnd); 344 return GetDynChunkfSize(*buf); 345} 346 347uint32_t SaveResetOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 348{ 349 out << offset << ":\t" 350 << "save_reset\t" << buf.GetU8(offset + RegExpOpCode::OP_SIZE_ONE) << "\t" 351 << buf.GetU8(offset + RegExpOpCode::OP_SIZE_TWO) << std::endl; 352 return offset + GetSize(); 353} 354 355uint32_t MatchOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const 356{ 357 buf->EmitChar(GetOpCode()); 358 return GetDynChunkfSize(*buf); 359} 360 361uint32_t MatchOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const 362{ 363 out << offset << ":\t" 364 << "match" << std::endl; 365 return offset + GetSize(); 366} 367 368uint32_t MatchEndOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const 369{ 370 buf->EmitChar(GetOpCode()); 371 return GetDynChunkfSize(*buf); 372} 373 374uint32_t MatchEndOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const 375{ 376 out << offset << ":\t" 377 << "match_end" << std::endl; 378 return offset + GetSize(); 379} 380 381uint32_t LineStartOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const 382{ 383 buf->EmitChar(GetOpCode()); 384 return GetDynChunkfSize(*buf); 385} 386 387uint32_t LineStartOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const 388{ 389 out << offset << ":\t" 390 << "line_start" << std::endl; 391 return offset + GetSize(); 392} 393 394uint32_t LineEndOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const 395{ 396 buf->EmitChar(GetOpCode()); 397 return GetDynChunkfSize(*buf); 398} 399 400uint32_t LineEndOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const 401{ 402 out << offset << ":\t" 403 << "line_end" << std::endl; 404 return offset + GetSize(); 405} 406 407uint32_t WordBoundaryOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const 408{ 409 buf->EmitChar(GetOpCode()); 410 return GetDynChunkfSize(*buf); 411} 412 413uint32_t WordBoundaryOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const 414{ 415 out << offset << ":\t" 416 << "word_boundary" << std::endl; 417 return offset + GetSize(); 418} 419 420uint32_t NotWordBoundaryOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const 421{ 422 buf->EmitChar(GetOpCode()); 423 return GetDynChunkfSize(*buf); 424} 425 426uint32_t NotWordBoundaryOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, 427 uint32_t offset) const 428{ 429 out << offset << ":\t" 430 << "not_word_boundary" << std::endl; 431 return offset + GetSize(); 432} 433 434uint32_t AllOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const 435{ 436 buf->EmitChar(GetOpCode()); 437 return GetDynChunkfSize(*buf); 438} 439 440uint32_t AllOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const 441{ 442 out << offset << ":\t" 443 << "all" << std::endl; 444 return offset + GetSize(); 445} 446 447uint32_t DotsOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const 448{ 449 buf->EmitChar(GetOpCode()); 450 return GetDynChunkfSize(*buf); 451} 452 453uint32_t DotsOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const 454{ 455 out << offset << ":\t" 456 << "dots" << std::endl; 457 return offset + GetSize(); 458} 459 460uint32_t MatchAheadOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 461{ 462 out << offset << ":\t" 463 << "match_ahead\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl; 464 return offset + GetSize(); 465} 466 467uint32_t RangeOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 468{ 469 out << offset << ":\t" 470 << "range\t"; 471 size_t size = buf.GetU16(offset + 1); 472 for (size_t i = 0; i < size; i++) { 473 out << buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_FOUR)) << "\t" 474 << buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE + 475 (i * RegExpOpCode::OP_SIZE_FOUR + RegExpOpCode::OP_SIZE_TWO)) 476 << "\t"; 477 } 478 out << std::endl; 479 return offset + size * RegExpOpCode::OP_SIZE_FOUR + RegExpOpCode::OP_SIZE_THREE; 480} 481 482uint32_t RangeOpCode::InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const 483{ 484 buf->EmitChar(GetOpCode()); 485 size_t size = rangeSet.rangeSet_.size(); 486 buf->EmitU16(size); 487 for (auto range : rangeSet.rangeSet_) { 488 buf->EmitU16(range.first); 489 buf->EmitU16(range.second); 490 } 491 return GetDynChunkfSize(*buf); 492} 493 494uint32_t Range32OpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 495{ 496 out << offset << ":\t" 497 << "range32\t"; 498 size_t size = buf.GetU16(offset + 1); 499 for (size_t i = 0; i < size; i++) { 500 out << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_EIGHT)) << "\t" 501 << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE + 502 (i * RegExpOpCode::OP_SIZE_EIGHT + RegExpOpCode::OP_SIZE_FOUR)) 503 << "\t"; 504 } 505 out << std::endl; 506 return offset + size * +RegExpOpCode::OP_SIZE_EIGHT + RegExpOpCode::OP_SIZE_THREE; 507} 508 509uint32_t Range32OpCode::InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const 510{ 511 buf->EmitChar(GetOpCode()); 512 size_t size = rangeSet.rangeSet_.size(); 513 buf->EmitU16(size); 514 for (auto range : rangeSet.rangeSet_) { 515 buf->EmitU32(range.first); 516 buf->EmitU32(range.second); 517 } 518 return GetDynChunkfSize(*buf); 519} 520 521uint32_t SparseOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 522{ 523 out << offset << ":\t" 524 << "sparse\t"; 525 size_t size = buf.GetU16(offset + 1); 526 for (size_t i = 0; i < size; i++) { 527 out << static_cast<char>(buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_SIX))) 528 << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE + 529 (i * RegExpOpCode::OP_SIZE_SIX + RegExpOpCode::OP_SIZE_TWO)) + 530 offset + size * RegExpOpCode::OP_SIZE_SIX + RegExpOpCode::OP_SIZE_THREE 531 << "\t"; 532 } 533 out << std::endl; 534 return offset + size * RegExpOpCode::OP_SIZE_SIX + RegExpOpCode::OP_SIZE_THREE; 535} 536 537uint32_t MatchAheadOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const 538{ 539 buf->Insert(offset, GetSize()); 540 buf->PutU8(offset, GetOpCode()); 541 buf->PutU32(offset + 1, para); 542 return GetDynChunkfSize(*buf); 543} 544 545uint32_t NegativeMatchAheadOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 546{ 547 out << offset << ":\t" 548 << "negative_match_ahead\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl; 549 return offset + GetSize(); 550} 551 552uint32_t NegativeMatchAheadOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const 553{ 554 buf->Insert(offset, GetSize()); 555 buf->PutU8(offset, GetOpCode()); 556 buf->PutU32(offset + 1, para); 557 return GetDynChunkfSize(*buf); 558} 559 560uint32_t PrevOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const 561{ 562 buf->EmitChar(GetOpCode()); 563 return GetDynChunkfSize(*buf); 564} 565 566uint32_t PrevOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const 567{ 568 out << offset << ":\t" 569 << "prev" << std::endl; 570 return offset + GetSize(); 571} 572 573uint32_t BackReferenceOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const 574{ 575 auto capture = static_cast<uint8_t>(para & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers) 576 buf->EmitChar(GetOpCode()); 577 buf->EmitChar(capture); 578 return GetDynChunkfSize(*buf); 579} 580 581uint32_t BackReferenceOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 582{ 583 out << offset << ":\t" 584 << "backreference\t" << buf.GetU8(offset + 1) << std::endl; 585 return offset + GetSize(); 586} 587 588uint32_t BackwardBackReferenceOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const 589{ 590 auto capture = static_cast<uint8_t>(para & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers) 591 buf->EmitChar(GetOpCode()); 592 buf->EmitChar(capture); 593 return GetDynChunkfSize(*buf); 594} 595 596uint32_t BackwardBackReferenceOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const 597{ 598 out << offset << ":\t" 599 << "backward_backreference\t" << buf.GetU8(offset + 1) << std::endl; 600 return offset + GetSize(); 601} 602 603void RangeSet::Insert(uint32_t start, uint32_t end) 604{ 605 if (start > end) { 606 return; 607 } 608 std::pair<uint32_t, uint32_t> pairElement = std::make_pair(start, end); 609 if (rangeSet_.empty()) { 610 rangeSet_.emplace_back(pairElement); 611 } else { 612 for (auto iter = rangeSet_.begin(); iter != rangeSet_.end(); iter++) { 613 if (IsIntersect(start, end, iter->first, iter->second) || 614 IsAdjacent(start, end, iter->first, iter->second)) { 615 iter->first = std::min(iter->first, start); 616 iter->second = std::max(iter->second, end); 617 Compress(); 618 return; 619 } 620 if (iter->first > end) { 621 rangeSet_.insert(iter, pairElement); 622 return; 623 } 624 } 625 rangeSet_.emplace_back(pairElement); 626 } 627} 628// if RangeResult cross-intersects with [a, z] and [A, Z], 629// we capitalize the intersection part and insert into RangeResult. 630void RangeSet::Inter(RangeSet &cr, const RangeSet &s1) 631{ 632 if (s1.rangeSet_.empty()) { 633 rangeSet_.clear(); 634 return; 635 } 636 if (rangeSet_.empty()) { 637 return; 638 } 639 for (const auto &interItem : s1.rangeSet_) { 640 uint32_t firstMax = 0; 641 uint32_t secondMin = 0; 642 for (const auto &range : rangeSet_) { 643 if (range.first >= interItem.first) { 644 firstMax = range.first; 645 } else { 646 firstMax = interItem.first; 647 } 648 if (range.second >= interItem.second) { 649 secondMin = interItem.second; 650 } else { 651 secondMin = range.second; 652 } 653 if (secondMin < firstMax) { 654 continue; 655 } 656 if (firstMax >= 'a' && firstMax <= 'z') { 657 cr.Insert(firstMax + 'A' - 'a', secondMin + 'A' - 'a'); 658 } 659 if (firstMax >= 'A' && firstMax <= 'Z') { 660 cr.Insert(firstMax - 'A' + 'a', secondMin - 'A' + 'a'); 661 } 662 } 663 } 664} 665void RangeSet::Insert(const RangeSet &s1) 666{ 667 if (s1.rangeSet_.empty()) { 668 return; 669 } 670 if (rangeSet_.empty()) { 671 rangeSet_ = s1.rangeSet_; 672 } else { 673 for (auto range : s1.rangeSet_) { 674 Insert(range.first, range.second); 675 } 676 Compress(); 677 } 678} 679 680void RangeSet::Invert(bool isUtf16) 681{ 682 uint32_t maxValue = isUtf16 ? UINT32_MAX : UINT16_MAX; 683 if (rangeSet_.empty()) { 684 rangeSet_.emplace_back(std::make_pair(0, maxValue)); 685 return; 686 } 687 688 auto iter = rangeSet_.begin(); 689 auto iter2 = rangeSet_.begin(); 690 if (iter->first == 0 && iter->second == maxValue) { 691 rangeSet_.clear(); 692 return; 693 } 694 iter2++; 695 696 uint32_t first = iter->first; 697 698 for (iter = rangeSet_.begin(); iter != rangeSet_.end(); iter++) { 699 if (iter->second == maxValue) { 700 rangeSet_.erase(iter); 701 break; 702 } 703 iter->first = iter->second + 1; 704 if (iter2 != rangeSet_.end()) { 705 iter->second = iter2->first - 1; 706 iter2++; 707 } else { 708 iter->second = maxValue; 709 } 710 } 711 if (first > 0) { 712 std::pair<uint32_t, uint32_t> pair1 = std::make_pair(0, first - 1); 713 rangeSet_.push_front(pair1); 714 } 715 Compress(); 716} 717 718void RangeSet::Compress() 719{ 720 auto iter = rangeSet_.begin(); 721 auto iter2 = rangeSet_.begin(); 722 iter2++; 723 while (iter2 != rangeSet_.end()) { 724 if (IsIntersect(iter->first, iter->second, iter2->first, iter2->second) || 725 IsAdjacent(iter->first, iter->second, iter2->first, iter2->second)) { 726 iter->first = std::min(iter->first, iter2->first); 727 iter->second = std::max(iter->second, iter2->second); 728 iter2 = rangeSet_.erase(iter2); 729 } else { 730 iter++; 731 iter2++; 732 } 733 } 734} 735} // namespace panda::ecmascript 736