1/* 2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16#include "ecmascript/regexp/regexp_executor.h" 17 18namespace panda::ecmascript { 19using RegExpState = RegExpExecutor::RegExpState; 20using RegExpGlobalResult = builtins::RegExpGlobalResult; 21bool RegExpExecutor::Execute(const uint8_t *input, uint32_t lastIndex, uint32_t length, uint8_t *buf, bool isWideChar) 22{ 23 DynChunk buffer(buf, chunk_); 24 input_ = const_cast<uint8_t *>(input); 25 inputEnd_ = const_cast<uint8_t *>(input + length * (isWideChar ? WIDE_CHAR_SIZE : CHAR_SIZE)); 26 uint32_t size = buffer.GetU32(0); 27 nCapture_ = buffer.GetU32(RegExpParser::NUM_CAPTURE__OFFSET); 28 nStack_ = buffer.GetU32(RegExpParser::NUM_STACK_OFFSET); 29 flags_ = buffer.GetU32(RegExpParser::FLAGS_OFFSET); 30 prefilter_ = buffer.GetU32(RegExpParser::PREFILTER_OFFSET); 31 isWideChar_ = isWideChar; 32 33 uint32_t captureResultSize = sizeof(CaptureState) * nCapture_; 34 uint32_t stackSize = sizeof(uintptr_t) * nStack_; 35 stateStackLen_ = 0; 36 currentStack_ = 0; 37 38 if (captureResultSize != 0) { 39 if (captureResultList_ == nullptr) { 40 captureResultList_ = chunk_->NewArray<CaptureState>(nCapture_); 41 } 42 if (memset_s(captureResultList_, captureResultSize, 0, captureResultSize) != EOK) { 43 LOG_FULL(FATAL) << "memset_s failed"; 44 UNREACHABLE(); 45 } 46 } 47 if (stackSize != 0 && stack_ == nullptr) { 48 stack_ = chunk_->NewArray<uintptr_t>(nStack_); 49 if (memset_s(stack_, stackSize, 0, stackSize) != EOK) { 50 LOG_FULL(FATAL) << "memset_s failed"; 51 UNREACHABLE(); 52 } 53 } 54 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 55 SetCurrentPtr(input + lastIndex * (isWideChar ? WIDE_CHAR_SIZE : CHAR_SIZE)); 56 SetCurrentPC(RegExpParser::OP_START_OFFSET); 57 58 // first split 59 if ((flags_ & RegExpParser::FLAG_STICKY) == 0) { 60 PushRegExpState(STATE_SPLIT, RegExpParser::OP_START_OFFSET); 61 } 62 return ExecuteInternal(buffer, size); 63} 64 65bool RegExpExecutor::MatchFailed(bool isMatched) 66{ 67 if (isMatched) { 68 stateStackLen_ = 0; 69 return true; 70 } 71 while (stateStackLen_ > 0) { 72 // StateType::STATE_SPLIT or STATE_NEGATIVE_MATCH_AHEAD 73 if (PopRegExpState() <= StateType::STATE_NEGATIVE_MATCH_AHEAD) { 74 return false; 75 } 76 } 77 return true; 78} 79 80// NOLINTNEXTLINE(readability-function-size) 81bool RegExpExecutor::ExecuteInternal(const DynChunk &byteCode, uint32_t pcEnd) 82{ 83 while (GetCurrentPC() < pcEnd) { 84 // first split 85 if (!HandleFirstSplit()) { 86 return false; 87 } 88 uint8_t opCode = byteCode.GetU8(GetCurrentPC()); 89 switch (opCode) { 90 case RegExpOpCode::OP_DOTS: 91 case RegExpOpCode::OP_ALL: { 92 if (!HandleOpAll(opCode)) { 93 return false; 94 } 95 break; 96 } 97 case RegExpOpCode::OP_CHAR32: 98 case RegExpOpCode::OP_CHAR: { 99 if (!HandleOpChar(byteCode, opCode)) { 100 return false; 101 } 102 break; 103 } 104 case RegExpOpCode::OP_NOT_WORD_BOUNDARY: 105 case RegExpOpCode::OP_WORD_BOUNDARY: { 106 if (!HandleOpWordBoundary(opCode)) { 107 return false; 108 } 109 break; 110 } 111 case RegExpOpCode::OP_LINE_START: { 112 if (!HandleOpLineStart(opCode)) { 113 return false; 114 } 115 break; 116 } 117 case RegExpOpCode::OP_LINE_END: { 118 if (!HandleOpLineEnd(opCode)) { 119 return false; 120 } 121 break; 122 } 123 case RegExpOpCode::OP_SAVE_START: 124 HandleOpSaveStart(byteCode, opCode); 125 break; 126 case RegExpOpCode::OP_SAVE_END: 127 HandleOpSaveEnd(byteCode, opCode); 128 break; 129 case RegExpOpCode::OP_GOTO: { 130 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); 131 Advance(opCode, offset); 132 break; 133 } 134 case RegExpOpCode::OP_MATCH: { 135 ASSERT(stateStackLen_ > 0); 136 // jump to match ahead 137 uint32_t ahead = stateStackLen_ - 1; 138 auto stateStack = reinterpret_cast<RegExpState *>(stateStack_); 139 while (ahead != 0 && stateStack[ahead].type_ != StateType::STATE_MATCH_AHEAD && 140 stateStack[ahead].type_ != StateType::STATE_NEGATIVE_MATCH_AHEAD) { 141 --ahead; 142 } 143 bool isNegative = stateStack[ahead].type_ == StateType::STATE_NEGATIVE_MATCH_AHEAD; 144 while (stateStackLen_ > ahead) { 145 PopRegExpState(isNegative); 146 } 147 if (isNegative && MatchFailed(false)) { 148 return false; 149 } 150 break; 151 } 152 case RegExpOpCode::OP_MATCH_END: 153 return true; 154 case RegExpOpCode::OP_SAVE_RESET: 155 HandleOpSaveReset(byteCode, opCode); 156 break; 157 case RegExpOpCode::OP_SPLIT_NEXT: 158 case RegExpOpCode::OP_MATCH_AHEAD: 159 case RegExpOpCode::OP_NEGATIVE_MATCH_AHEAD: 160 HandleOpMatch(byteCode, opCode); 161 break; 162 case RegExpOpCode::OP_SPLIT_FIRST: 163 HandleOpSplitFirst(byteCode, opCode); 164 break; 165 case RegExpOpCode::OP_PREV: { 166 if (!HandleOpPrev(opCode)) { 167 return false; 168 } 169 break; 170 } 171 case RegExpOpCode::OP_LOOP_GREEDY: 172 case RegExpOpCode::OP_LOOP: 173 HandleOpLoop(byteCode, opCode); 174 break; 175 case RegExpOpCode::OP_PUSH_CHAR: { 176 PushRegExpState(StateType::STATE_PUSH, 0, 0); 177 PushStack(reinterpret_cast<uintptr_t>(GetCurrentPtr())); 178 Advance(opCode); 179 break; 180 } 181 case RegExpOpCode::OP_CHECK_CHAR: { 182 if (stateStackLen_ > 0 && PeekRegExpState()->type_ == StateType::STATE_PUSH) { 183 DropRegExpState(); 184 } else { 185 ASSERT(currentStack_ > 0); 186 PushRegExpState(StateType::STATE_POP, 0, stack_[currentStack_ - 1]); 187 } 188 if (PopStack() != reinterpret_cast<uintptr_t>(GetCurrentPtr())) { 189 Advance(opCode); 190 } else { 191 uint32_t offset = byteCode.GetU32(GetCurrentPC() + 1); 192 Advance(opCode, offset); 193 } 194 break; 195 } 196 case RegExpOpCode::OP_PUSH: { 197 PushRegExpState(StateType::STATE_PUSH, 0, 0); 198 PushStack(0); 199 Advance(opCode); 200 break; 201 } 202 case RegExpOpCode::OP_POP: { 203 ASSERT(currentStack_ > 0); 204 PushRegExpState(StateType::STATE_POP, 0, stack_[currentStack_ - 1]); 205 PopStack(); 206 Advance(opCode); 207 break; 208 } 209 case RegExpOpCode::OP_RANGE32: { 210 if (!HandleOpRange32(byteCode)) { 211 return false; 212 } 213 break; 214 } 215 case RegExpOpCode::OP_RANGE: { 216 if (!HandleOpRange(byteCode)) { 217 return false; 218 } 219 break; 220 } 221 case RegExpOpCode::OP_SPARSE: { 222 if (!HandleOpSparse(byteCode)) { 223 return false; 224 } 225 break; 226 } 227 case RegExpOpCode::OP_BACKREFERENCE: 228 case RegExpOpCode::OP_BACKWARD_BACKREFERENCE: { 229 if (!HandleOpBackReference(byteCode, opCode)) { 230 return false; 231 } 232 break; 233 } 234 default: 235 UNREACHABLE(); 236 } 237 } 238 // for loop match 239 return true; 240} 241 242void RegExpExecutor::DumpResult(std::ostream &out) const 243{ 244 out << "captures:" << std::endl; 245 for (uint32_t i = 0; i < nCapture_; i++) { 246 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 247 CaptureState *captureState = &captureResultList_[i]; 248 int32_t len = captureState->captureEnd - captureState->captureStart; 249 if ((captureState->captureStart != nullptr && captureState->captureEnd != nullptr) && (len >= 0)) { 250 out << i << ":\t" << CString(reinterpret_cast<const char *>(captureState->captureStart), len) << std::endl; 251 } else { 252 out << i << ":\t" 253 << "undefined" << std::endl; 254 } 255 } 256} 257 258void RegExpExecutor::GetResult(JSThread *thread) 259{ 260 JSHandle<RegExpGlobalResult> matchResult(thread->GetCurrentEcmaContext()->GetRegExpGlobalResult()); 261 matchResult->SetTotalCaptureCounts(thread, JSTaggedValue(nCapture_)); 262 uint32_t firstIndex = RegExpGlobalResult::FIRST_CAPTURE_INDEX; 263 uint32_t availableCaptureSlot = matchResult->GetLength() - firstIndex; 264 uint32_t requiredLength = nCapture_ * 2; 265 if (requiredLength > availableCaptureSlot) { 266 matchResult = RegExpGlobalResult::GrowCapturesCapacity(thread, matchResult, requiredLength + firstIndex); 267 } 268 for (uint32_t i = 0; i < nCapture_; i++) { 269 CaptureState *captureState = &captureResultList_[i]; 270 int32_t len = captureState->captureEnd - captureState->captureStart; 271 if ((captureState->captureStart != nullptr && captureState->captureEnd != nullptr) && (len >= 0)) { 272 if (isWideChar_) { 273 matchResult->SetStartOfCaptureIndex(thread, i, JSTaggedValue( 274 static_cast<int32_t>((captureState->captureStart - input_) / WIDE_CHAR_SIZE))); 275 matchResult->SetEndOfCaptureIndex(thread, i, JSTaggedValue( 276 static_cast<int32_t>((captureState->captureEnd - input_) / WIDE_CHAR_SIZE))); 277 } else { 278 matchResult->SetStartOfCaptureIndex(thread, i, JSTaggedValue( 279 static_cast<int32_t>(captureState->captureStart - input_))); 280 matchResult->SetEndOfCaptureIndex(thread, i, JSTaggedValue( 281 static_cast<int32_t>(captureState->captureEnd - input_))); 282 } 283 } else { 284 // undefined 285 matchResult->SetStartOfCaptureIndex(thread, i, JSTaggedValue(0)); 286 matchResult->SetEndOfCaptureIndex(thread, i, JSTaggedValue(-1)); 287 } 288 } 289 uint32_t endIndex = currentPtr_ - input_; 290 if (isWideChar_) { 291 endIndex /= WIDE_CHAR_SIZE; 292 } 293 matchResult->SetEndIndex(thread, JSTaggedValue(endIndex)); 294} 295 296void RegExpExecutor::PushRegExpState(StateType type, uint32_t pc) 297{ 298 ReAllocStack(stateStackLen_ + 1); 299 auto state = reinterpret_cast<RegExpState *>( 300 stateStack_ + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 301 stateStackLen_ * sizeof(RegExpState)); 302 state->type_ = type; 303 state->currentPc_ = pc; 304 state->currentPtr_ = GetCurrentPtr(); 305 stateStackLen_++; 306} 307 308void RegExpExecutor::PushRegExpState(StateType type, uint32_t pc, uintptr_t ptr) 309{ 310 ReAllocStack(stateStackLen_ + 1); 311 auto state = reinterpret_cast<RegExpState *>( 312 stateStack_ + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 313 stateStackLen_ * sizeof(RegExpState)); 314 state->type_ = type; 315 state->currentPc_ = pc; 316 state->currentPtr_ = reinterpret_cast<const uint8_t *>(ptr); 317 stateStackLen_++; 318} 319 320RegExpExecutor::StateType RegExpExecutor::PopRegExpState(bool copyCapture) 321{ 322 if (stateStackLen_ != 0) { 323 auto state = PeekRegExpState(); 324 stateStackLen_--; 325 switch (state->type_) { 326 case StateType::STATE_SPLIT: 327 case StateType::STATE_NEGATIVE_MATCH_AHEAD: 328 case StateType::STATE_MATCH_AHEAD: 329 SetCurrentPC(state->currentPc_); 330 SetCurrentPtr(state->currentPtr_); 331 break; 332 case StateType::STATE_SAVE: 333 if (copyCapture) { 334 *(reinterpret_cast<const uint8_t **>(GetCaptureResultList()) + state->currentPc_) = 335 state->currentPtr_; 336 } 337 break; 338 case StateType::STATE_PUSH: 339 PopStack(); 340 break; 341 case StateType::STATE_POP: 342 PushStack((uintptr_t)state->currentPtr_); 343 break; 344 case StateType::STATE_SET: 345 SetStackValue((uintptr_t)state->currentPtr_); 346 break; 347 default: 348 UNREACHABLE(); 349 break; 350 } 351 return state->type_; 352 } 353 return StateType::STATE_INVALID; 354} 355 356void RegExpExecutor::ReAllocStack(uint32_t stackLen) 357{ 358 if (stackLen > stateStackSize_) { 359 ASSERT((static_cast<size_t>(stateStackSize_) * 2) <= static_cast<size_t>(UINT32_MAX)); // 2: double the size 360 uint32_t newStackSize = std::max(stateStackSize_ * 2, MIN_STACK_SIZE); // 2: double the size 361 ASSERT((static_cast<size_t>(newStackSize) * static_cast<size_t>(sizeof(RegExpState))) <= 362 static_cast<size_t>(UINT32_MAX)); 363 uint32_t stackByteSize = newStackSize * sizeof(RegExpState); 364 auto newStack = chunk_->NewArray<uint8_t>(stackByteSize); 365 if (memset_s(newStack, stackByteSize, 0, stackByteSize) != EOK) { 366 LOG_FULL(FATAL) << "memset_s failed"; 367 UNREACHABLE(); 368 } 369 if (stateStack_ != nullptr) { 370 auto stackSize = stateStackSize_ * sizeof(RegExpState); 371 if (memcpy_s(newStack, stackSize, stateStack_, stackSize) != EOK) { 372 return; 373 } 374 } 375 stateStack_ = newStack; 376 stateStackSize_ = newStackSize; 377 } 378} 379} // namespace panda::ecmascript 380