1/**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#ifndef ES2PANDA_PARSER_CORE_REGEXP_H
17#define ES2PANDA_PARSER_CORE_REGEXP_H
18
19#include <util/enumbitops.h>
20#include <util/ustring.h>
21
22#include <unordered_set>
23
24namespace panda::es2panda::lexer {
25
26enum class RegExpFlags : uint8_t {
27    EMPTY = 0,
28    GLOBAL = 1 << 0,
29    IGNORE_CASE = 1 << 1,
30    MULTILINE = 1 << 2,
31    STICKY = 1 << 3,
32    UNICODE = 1 << 4,
33    DOTALL = 1 << 5,
34    HAS_INDICES = 1 << 6,
35};
36
37DEFINE_BITOPS(RegExpFlags)
38
39class RegExpError : std::exception {
40public:
41    explicit RegExpError(const std::string_view &m);
42    std::string message;
43};
44
45struct RegExp {
46    RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags);
47
48    util::StringView patternStr;
49    util::StringView flagsStr;
50    RegExpFlags flags;
51};
52
53class RegExpParser {
54public:
55    explicit RegExpParser(const RegExp &re, ArenaAllocator *allocator);
56    void ParsePattern();
57
58private:
59    void ParseDisjunction();
60    void ParseAlternatives();
61    void ParseAlternative();
62
63    void ParseNonCapturingGroup();
64    void ParseNamedCapturingGroup();
65    void ParseCapturingGroup();
66
67    void ParseAssertion();
68    char32_t ParseClassAtom();
69    void ParseCharacterClass();
70    void ParseAtomEscape();
71
72    uint32_t ParseControlEscape();
73    uint32_t ParseDecimalEscape();
74    uint32_t ParseLegacyOctalEscape();
75    uint32_t ParseHexEscape();
76    uint32_t ParseUnicodeDigits();
77    uint32_t ParseUnicodeEscape();
78
79    void ParseUnicodePropertyEscape();
80    void ParseNamedBackreference();
81
82    void ParseQuantifier();
83    bool ParseBracedQuantifier();
84
85    bool IsSyntaxCharacter(char32_t cp) const;
86    bool ParsePatternCharacter();
87
88    util::StringView ParseIdent();
89
90    bool Unicode() const;
91
92    char32_t Peek() const;
93    char32_t Next();
94    void Advance();
95    bool Eos() const;
96    void ValidateNamedGroupReferences();
97
98    RegExp re_;
99    ArenaAllocator *allocator_ {};
100    util::StringView::Iterator iter_;
101    uint32_t capturingGroupCount_;
102    std::unordered_set<util::StringView> groupNames_;
103    std::unordered_set<util::StringView> namedGroupReferences_;
104};
105
106}  // namespace panda::es2panda::lexer
107
108#endif
109