1/**
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#ifndef ES2PANDA_PARSER_CORE_REGEXP_H
17#define ES2PANDA_PARSER_CORE_REGEXP_H
18
19#include "util/enumbitops.h"
20#include "util/ustring.h"
21
22#include "../../parser/parserImpl.h"
23
24#include <unordered_set>
25
26namespace ark::es2panda::lexer {
27
28using ENUMBITOPS_OPERATORS;
29
30enum class RegExpFlags : uint32_t {
31    EMPTY = 0U,
32    GLOBAL = 1U << 0U,
33    IGNORE_CASE = 1U << 1U,
34    MULTILINE = 1U << 2U,
35    DOTALL = 1U << 3U,
36    UNICODE = 1U << 4U,
37    STICKY = 1U << 5U,
38};
39
40struct RegExp {
41    RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags);
42
43    // NOLINTBEGIN(misc-non-private-member-variables-in-classes)
44    util::StringView patternStr;
45    util::StringView flagsStr;
46    RegExpFlags flags;
47    // NOLINTEND(misc-non-private-member-variables-in-classes)
48};
49
50class RegExpParser {
51public:
52    explicit RegExpParser(const RegExp &re, ArenaAllocator *allocator, const parser::ParserImpl &parser);
53    void ParsePattern();
54
55private:
56    void ParseDisjunction();
57    void ParseAlternatives();
58    void ParseAlternative();
59    bool ParseAlternativeCharLeftParen();
60
61    void ParseNonCapturingGroup();
62    void ParseNamedCapturingGroup();
63    void ParseCapturingGroup();
64
65    void ParseAssertion();
66    char32_t ParseClassAtom();
67    void ParseCharacterClass();
68    void ParseAtomEscape();
69    void ParseAtomEscapeSwitch(char32_t cp);
70
71    uint32_t ParseControlEscape();
72    uint32_t ParseDecimalEscape();
73    uint32_t ParseLegacyOctalEscape();
74    uint32_t ParseHexEscape();
75    uint32_t ParseUnicodeDigits();
76    uint32_t ParseUnicodeEscape();
77
78    void ParseUnicodePropertyEscape();
79    void ValidateNamedBackreference(bool isUnicode);
80    void ValidateGroupNameElement(char32_t cp);
81    void ParseNamedBackreference();
82
83    void ParseQuantifier();
84    bool ParseBracedQuantifier();
85
86    bool IsSyntaxCharacter(char32_t cp) const;
87    bool ParsePatternCharacter();
88
89    util::StringView ParseIdent();
90
91    bool Unicode() const;
92
93    char32_t Peek() const;
94    char32_t Next();
95    void Advance();
96    bool Eos() const;
97
98    RegExp re_;
99    ArenaAllocator *allocator_ {};
100    util::StringView::Iterator iter_;
101    uint32_t capturingGroupCount_ {};
102    std::unordered_set<util::StringView> groupNames_;
103    std::unordered_set<util::StringView> backReferences_;
104    const es2panda::parser::ParserImpl &parser_;
105};
106}  // namespace ark::es2panda::lexer
107
108template <>
109struct enumbitops::IsAllowedType<ark::es2panda::lexer::RegExpFlags> : std::true_type {
110};
111
112#endif
113