1 /**
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ES2PANDA_PARSER_CORE_REGEXP_H
17 #define ES2PANDA_PARSER_CORE_REGEXP_H
18 
19 #include "util/enumbitops.h"
20 #include "util/ustring.h"
21 
22 #include "../../parser/parserImpl.h"
23 
24 #include <unordered_set>
25 
26 namespace ark::es2panda::lexer {
27 
28 using ENUMBITOPS_OPERATORS;
29 
30 enum class RegExpFlags : uint32_t {
31     EMPTY = 0U,
32     GLOBAL = 1U << 0U,
33     IGNORE_CASE = 1U << 1U,
34     MULTILINE = 1U << 2U,
35     DOTALL = 1U << 3U,
36     UNICODE = 1U << 4U,
37     STICKY = 1U << 5U,
38 };
39 
40 struct RegExp {
41     RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags);
42 
43     // NOLINTBEGIN(misc-non-private-member-variables-in-classes)
44     util::StringView patternStr;
45     util::StringView flagsStr;
46     RegExpFlags flags;
47     // NOLINTEND(misc-non-private-member-variables-in-classes)
48 };
49 
50 class RegExpParser {
51 public:
52     explicit RegExpParser(const RegExp &re, ArenaAllocator *allocator, const parser::ParserImpl &parser);
53     void ParsePattern();
54 
55 private:
56     void ParseDisjunction();
57     void ParseAlternatives();
58     void ParseAlternative();
59     bool ParseAlternativeCharLeftParen();
60 
61     void ParseNonCapturingGroup();
62     void ParseNamedCapturingGroup();
63     void ParseCapturingGroup();
64 
65     void ParseAssertion();
66     char32_t ParseClassAtom();
67     void ParseCharacterClass();
68     void ParseAtomEscape();
69     void ParseAtomEscapeSwitch(char32_t cp);
70 
71     uint32_t ParseControlEscape();
72     uint32_t ParseDecimalEscape();
73     uint32_t ParseLegacyOctalEscape();
74     uint32_t ParseHexEscape();
75     uint32_t ParseUnicodeDigits();
76     uint32_t ParseUnicodeEscape();
77 
78     void ParseUnicodePropertyEscape();
79     void ValidateNamedBackreference(bool isUnicode);
80     void ValidateGroupNameElement(char32_t cp);
81     void ParseNamedBackreference();
82 
83     void ParseQuantifier();
84     bool ParseBracedQuantifier();
85 
86     bool IsSyntaxCharacter(char32_t cp) const;
87     bool ParsePatternCharacter();
88 
89     util::StringView ParseIdent();
90 
91     bool Unicode() const;
92 
93     char32_t Peek() const;
94     char32_t Next();
95     void Advance();
96     bool Eos() const;
97 
98     RegExp re_;
99     ArenaAllocator *allocator_ {};
100     util::StringView::Iterator iter_;
101     uint32_t capturingGroupCount_ {};
102     std::unordered_set<util::StringView> groupNames_;
103     std::unordered_set<util::StringView> backReferences_;
104     const es2panda::parser::ParserImpl &parser_;
105 };
106 }  // namespace ark::es2panda::lexer
107 
108 template <>
109 struct enumbitops::IsAllowedType<ark::es2panda::lexer::RegExpFlags> : std::true_type {
110 };
111 
112 #endif
113