1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ES2PANDA_PARSER_CORE_REGEXP_H
17 #define ES2PANDA_PARSER_CORE_REGEXP_H
18 
19 #include <util/enumbitops.h>
20 #include <util/ustring.h>
21 
22 #include <unordered_set>
23 
24 namespace panda::es2panda::lexer {
25 
26 enum class RegExpFlags : uint8_t {
27     EMPTY = 0,
28     GLOBAL = 1 << 0,
29     IGNORE_CASE = 1 << 1,
30     MULTILINE = 1 << 2,
31     STICKY = 1 << 3,
32     UNICODE = 1 << 4,
33     DOTALL = 1 << 5,
34     HAS_INDICES = 1 << 6,
35 };
36 
37 DEFINE_BITOPS(RegExpFlags)
38 
39 class RegExpError : std::exception {
40 public:
41     explicit RegExpError(const std::string_view &m);
42     std::string message;
43 };
44 
45 struct RegExp {
46     RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags);
47 
48     util::StringView patternStr;
49     util::StringView flagsStr;
50     RegExpFlags flags;
51 };
52 
53 class RegExpParser {
54 public:
55     explicit RegExpParser(const RegExp &re, ArenaAllocator *allocator);
56     void ParsePattern();
57 
58 private:
59     void ParseDisjunction();
60     void ParseAlternatives();
61     void ParseAlternative();
62 
63     void ParseNonCapturingGroup();
64     void ParseNamedCapturingGroup();
65     void ParseCapturingGroup();
66 
67     void ParseAssertion();
68     char32_t ParseClassAtom();
69     void ParseCharacterClass();
70     void ParseAtomEscape();
71 
72     uint32_t ParseControlEscape();
73     uint32_t ParseDecimalEscape();
74     uint32_t ParseLegacyOctalEscape();
75     uint32_t ParseHexEscape();
76     uint32_t ParseUnicodeDigits();
77     uint32_t ParseUnicodeEscape();
78 
79     void ParseUnicodePropertyEscape();
80     void ParseNamedBackreference();
81 
82     void ParseQuantifier();
83     bool ParseBracedQuantifier();
84 
85     bool IsSyntaxCharacter(char32_t cp) const;
86     bool ParsePatternCharacter();
87 
88     util::StringView ParseIdent();
89 
90     bool Unicode() const;
91 
92     char32_t Peek() const;
93     char32_t Next();
94     void Advance();
95     bool Eos() const;
96     void ValidateNamedGroupReferences();
97 
98     RegExp re_;
99     ArenaAllocator *allocator_ {};
100     util::StringView::Iterator iter_;
101     uint32_t capturingGroupCount_;
102     std::unordered_set<util::StringView> groupNames_;
103     std::unordered_set<util::StringView> namedGroupReferences_;
104 };
105 
106 }  // namespace panda::es2panda::lexer
107 
108 #endif
109