1 /** 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ES2PANDA_PARSER_CORE_REGEXP_H 17 #define ES2PANDA_PARSER_CORE_REGEXP_H 18 19 #include <util/enumbitops.h> 20 #include <util/ustring.h> 21 22 #include <unordered_set> 23 24 namespace panda::es2panda::lexer { 25 26 enum class RegExpFlags : uint8_t { 27 EMPTY = 0, 28 GLOBAL = 1 << 0, 29 IGNORE_CASE = 1 << 1, 30 MULTILINE = 1 << 2, 31 STICKY = 1 << 3, 32 UNICODE = 1 << 4, 33 DOTALL = 1 << 5, 34 HAS_INDICES = 1 << 6, 35 }; 36 37 DEFINE_BITOPS(RegExpFlags) 38 39 class RegExpError : std::exception { 40 public: 41 explicit RegExpError(const std::string_view &m); 42 std::string message; 43 }; 44 45 struct RegExp { 46 RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags); 47 48 util::StringView patternStr; 49 util::StringView flagsStr; 50 RegExpFlags flags; 51 }; 52 53 class RegExpParser { 54 public: 55 explicit RegExpParser(const RegExp &re, ArenaAllocator *allocator); 56 void ParsePattern(); 57 58 private: 59 void ParseDisjunction(); 60 void ParseAlternatives(); 61 void ParseAlternative(); 62 63 void ParseNonCapturingGroup(); 64 void ParseNamedCapturingGroup(); 65 void ParseCapturingGroup(); 66 67 void ParseAssertion(); 68 char32_t ParseClassAtom(); 69 void ParseCharacterClass(); 70 void ParseAtomEscape(); 71 72 uint32_t ParseControlEscape(); 73 uint32_t ParseDecimalEscape(); 74 uint32_t ParseLegacyOctalEscape(); 75 uint32_t ParseHexEscape(); 76 uint32_t ParseUnicodeDigits(); 77 uint32_t ParseUnicodeEscape(); 78 79 void ParseUnicodePropertyEscape(); 80 void ParseNamedBackreference(); 81 82 void ParseQuantifier(); 83 bool ParseBracedQuantifier(); 84 85 bool IsSyntaxCharacter(char32_t cp) const; 86 bool ParsePatternCharacter(); 87 88 util::StringView ParseIdent(); 89 90 bool Unicode() const; 91 92 char32_t Peek() const; 93 char32_t Next(); 94 void Advance(); 95 bool Eos() const; 96 void ValidateNamedGroupReferences(); 97 98 RegExp re_; 99 ArenaAllocator *allocator_ {}; 100 util::StringView::Iterator iter_; 101 uint32_t capturingGroupCount_; 102 std::unordered_set<util::StringView> groupNames_; 103 std::unordered_set<util::StringView> namedGroupReferences_; 104 }; 105 106 } // namespace panda::es2panda::lexer 107 108 #endif 109