1/** 2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16#ifndef ES2PANDA_PARSER_CORE_REGEXP_H 17#define ES2PANDA_PARSER_CORE_REGEXP_H 18 19#include "util/enumbitops.h" 20#include "util/ustring.h" 21 22#include "../../parser/parserImpl.h" 23 24#include <unordered_set> 25 26namespace ark::es2panda::lexer { 27 28using ENUMBITOPS_OPERATORS; 29 30enum class RegExpFlags : uint32_t { 31 EMPTY = 0U, 32 GLOBAL = 1U << 0U, 33 IGNORE_CASE = 1U << 1U, 34 MULTILINE = 1U << 2U, 35 DOTALL = 1U << 3U, 36 UNICODE = 1U << 4U, 37 STICKY = 1U << 5U, 38}; 39 40struct RegExp { 41 RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags); 42 43 // NOLINTBEGIN(misc-non-private-member-variables-in-classes) 44 util::StringView patternStr; 45 util::StringView flagsStr; 46 RegExpFlags flags; 47 // NOLINTEND(misc-non-private-member-variables-in-classes) 48}; 49 50class RegExpParser { 51public: 52 explicit RegExpParser(const RegExp &re, ArenaAllocator *allocator, const parser::ParserImpl &parser); 53 void ParsePattern(); 54 55private: 56 void ParseDisjunction(); 57 void ParseAlternatives(); 58 void ParseAlternative(); 59 bool ParseAlternativeCharLeftParen(); 60 61 void ParseNonCapturingGroup(); 62 void ParseNamedCapturingGroup(); 63 void ParseCapturingGroup(); 64 65 void ParseAssertion(); 66 char32_t ParseClassAtom(); 67 void ParseCharacterClass(); 68 void ParseAtomEscape(); 69 void ParseAtomEscapeSwitch(char32_t cp); 70 71 uint32_t ParseControlEscape(); 72 uint32_t ParseDecimalEscape(); 73 uint32_t ParseLegacyOctalEscape(); 74 uint32_t ParseHexEscape(); 75 uint32_t ParseUnicodeDigits(); 76 uint32_t ParseUnicodeEscape(); 77 78 void ParseUnicodePropertyEscape(); 79 void ValidateNamedBackreference(bool isUnicode); 80 void ValidateGroupNameElement(char32_t cp); 81 void ParseNamedBackreference(); 82 83 void ParseQuantifier(); 84 bool ParseBracedQuantifier(); 85 86 bool IsSyntaxCharacter(char32_t cp) const; 87 bool ParsePatternCharacter(); 88 89 util::StringView ParseIdent(); 90 91 bool Unicode() const; 92 93 char32_t Peek() const; 94 char32_t Next(); 95 void Advance(); 96 bool Eos() const; 97 98 RegExp re_; 99 ArenaAllocator *allocator_ {}; 100 util::StringView::Iterator iter_; 101 uint32_t capturingGroupCount_ {}; 102 std::unordered_set<util::StringView> groupNames_; 103 std::unordered_set<util::StringView> backReferences_; 104 const es2panda::parser::ParserImpl &parser_; 105}; 106} // namespace ark::es2panda::lexer 107 108template <> 109struct enumbitops::IsAllowedType<ark::es2panda::lexer::RegExpFlags> : std::true_type { 110}; 111 112#endif 113