13af6ab5fSopenharmony_ci/**
23af6ab5fSopenharmony_ci * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
33af6ab5fSopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License");
43af6ab5fSopenharmony_ci * you may not use this file except in compliance with the License.
53af6ab5fSopenharmony_ci * You may obtain a copy of the License at
63af6ab5fSopenharmony_ci *
73af6ab5fSopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0
83af6ab5fSopenharmony_ci *
93af6ab5fSopenharmony_ci * Unless required by applicable law or agreed to in writing, software
103af6ab5fSopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS,
113af6ab5fSopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
123af6ab5fSopenharmony_ci * See the License for the specific language governing permissions and
133af6ab5fSopenharmony_ci * limitations under the License.
143af6ab5fSopenharmony_ci */
153af6ab5fSopenharmony_ci
163af6ab5fSopenharmony_ci#ifndef ES2PANDA_PARSER_CORE_REGEXP_H
173af6ab5fSopenharmony_ci#define ES2PANDA_PARSER_CORE_REGEXP_H
183af6ab5fSopenharmony_ci
193af6ab5fSopenharmony_ci#include "util/enumbitops.h"
203af6ab5fSopenharmony_ci#include "util/ustring.h"
213af6ab5fSopenharmony_ci
223af6ab5fSopenharmony_ci#include "../../parser/parserImpl.h"
233af6ab5fSopenharmony_ci
243af6ab5fSopenharmony_ci#include <unordered_set>
253af6ab5fSopenharmony_ci
263af6ab5fSopenharmony_cinamespace ark::es2panda::lexer {
273af6ab5fSopenharmony_ci
283af6ab5fSopenharmony_ciusing ENUMBITOPS_OPERATORS;
293af6ab5fSopenharmony_ci
303af6ab5fSopenharmony_cienum class RegExpFlags : uint32_t {
313af6ab5fSopenharmony_ci    EMPTY = 0U,
323af6ab5fSopenharmony_ci    GLOBAL = 1U << 0U,
333af6ab5fSopenharmony_ci    IGNORE_CASE = 1U << 1U,
343af6ab5fSopenharmony_ci    MULTILINE = 1U << 2U,
353af6ab5fSopenharmony_ci    DOTALL = 1U << 3U,
363af6ab5fSopenharmony_ci    UNICODE = 1U << 4U,
373af6ab5fSopenharmony_ci    STICKY = 1U << 5U,
383af6ab5fSopenharmony_ci};
393af6ab5fSopenharmony_ci
403af6ab5fSopenharmony_cistruct RegExp {
413af6ab5fSopenharmony_ci    RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags);
423af6ab5fSopenharmony_ci
433af6ab5fSopenharmony_ci    // NOLINTBEGIN(misc-non-private-member-variables-in-classes)
443af6ab5fSopenharmony_ci    util::StringView patternStr;
453af6ab5fSopenharmony_ci    util::StringView flagsStr;
463af6ab5fSopenharmony_ci    RegExpFlags flags;
473af6ab5fSopenharmony_ci    // NOLINTEND(misc-non-private-member-variables-in-classes)
483af6ab5fSopenharmony_ci};
493af6ab5fSopenharmony_ci
503af6ab5fSopenharmony_ciclass RegExpParser {
513af6ab5fSopenharmony_cipublic:
523af6ab5fSopenharmony_ci    explicit RegExpParser(const RegExp &re, ArenaAllocator *allocator, const parser::ParserImpl &parser);
533af6ab5fSopenharmony_ci    void ParsePattern();
543af6ab5fSopenharmony_ci
553af6ab5fSopenharmony_ciprivate:
563af6ab5fSopenharmony_ci    void ParseDisjunction();
573af6ab5fSopenharmony_ci    void ParseAlternatives();
583af6ab5fSopenharmony_ci    void ParseAlternative();
593af6ab5fSopenharmony_ci    bool ParseAlternativeCharLeftParen();
603af6ab5fSopenharmony_ci
613af6ab5fSopenharmony_ci    void ParseNonCapturingGroup();
623af6ab5fSopenharmony_ci    void ParseNamedCapturingGroup();
633af6ab5fSopenharmony_ci    void ParseCapturingGroup();
643af6ab5fSopenharmony_ci
653af6ab5fSopenharmony_ci    void ParseAssertion();
663af6ab5fSopenharmony_ci    char32_t ParseClassAtom();
673af6ab5fSopenharmony_ci    void ParseCharacterClass();
683af6ab5fSopenharmony_ci    void ParseAtomEscape();
693af6ab5fSopenharmony_ci    void ParseAtomEscapeSwitch(char32_t cp);
703af6ab5fSopenharmony_ci
713af6ab5fSopenharmony_ci    uint32_t ParseControlEscape();
723af6ab5fSopenharmony_ci    uint32_t ParseDecimalEscape();
733af6ab5fSopenharmony_ci    uint32_t ParseLegacyOctalEscape();
743af6ab5fSopenharmony_ci    uint32_t ParseHexEscape();
753af6ab5fSopenharmony_ci    uint32_t ParseUnicodeDigits();
763af6ab5fSopenharmony_ci    uint32_t ParseUnicodeEscape();
773af6ab5fSopenharmony_ci
783af6ab5fSopenharmony_ci    void ParseUnicodePropertyEscape();
793af6ab5fSopenharmony_ci    void ValidateNamedBackreference(bool isUnicode);
803af6ab5fSopenharmony_ci    void ValidateGroupNameElement(char32_t cp);
813af6ab5fSopenharmony_ci    void ParseNamedBackreference();
823af6ab5fSopenharmony_ci
833af6ab5fSopenharmony_ci    void ParseQuantifier();
843af6ab5fSopenharmony_ci    bool ParseBracedQuantifier();
853af6ab5fSopenharmony_ci
863af6ab5fSopenharmony_ci    bool IsSyntaxCharacter(char32_t cp) const;
873af6ab5fSopenharmony_ci    bool ParsePatternCharacter();
883af6ab5fSopenharmony_ci
893af6ab5fSopenharmony_ci    util::StringView ParseIdent();
903af6ab5fSopenharmony_ci
913af6ab5fSopenharmony_ci    bool Unicode() const;
923af6ab5fSopenharmony_ci
933af6ab5fSopenharmony_ci    char32_t Peek() const;
943af6ab5fSopenharmony_ci    char32_t Next();
953af6ab5fSopenharmony_ci    void Advance();
963af6ab5fSopenharmony_ci    bool Eos() const;
973af6ab5fSopenharmony_ci
983af6ab5fSopenharmony_ci    RegExp re_;
993af6ab5fSopenharmony_ci    ArenaAllocator *allocator_ {};
1003af6ab5fSopenharmony_ci    util::StringView::Iterator iter_;
1013af6ab5fSopenharmony_ci    uint32_t capturingGroupCount_ {};
1023af6ab5fSopenharmony_ci    std::unordered_set<util::StringView> groupNames_;
1033af6ab5fSopenharmony_ci    std::unordered_set<util::StringView> backReferences_;
1043af6ab5fSopenharmony_ci    const es2panda::parser::ParserImpl &parser_;
1053af6ab5fSopenharmony_ci};
1063af6ab5fSopenharmony_ci}  // namespace ark::es2panda::lexer
1073af6ab5fSopenharmony_ci
1083af6ab5fSopenharmony_citemplate <>
1093af6ab5fSopenharmony_cistruct enumbitops::IsAllowedType<ark::es2panda::lexer::RegExpFlags> : std::true_type {
1103af6ab5fSopenharmony_ci};
1113af6ab5fSopenharmony_ci
1123af6ab5fSopenharmony_ci#endif
113