13af6ab5fSopenharmony_ci/** 23af6ab5fSopenharmony_ci * Copyright (c) 2021-2024 Huawei Device Co., Ltd. 33af6ab5fSopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License"); 43af6ab5fSopenharmony_ci * you may not use this file except in compliance with the License. 53af6ab5fSopenharmony_ci * You may obtain a copy of the License at 63af6ab5fSopenharmony_ci * 73af6ab5fSopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0 83af6ab5fSopenharmony_ci * 93af6ab5fSopenharmony_ci * Unless required by applicable law or agreed to in writing, software 103af6ab5fSopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS, 113af6ab5fSopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 123af6ab5fSopenharmony_ci * See the License for the specific language governing permissions and 133af6ab5fSopenharmony_ci * limitations under the License. 143af6ab5fSopenharmony_ci */ 153af6ab5fSopenharmony_ci 163af6ab5fSopenharmony_ci#ifndef ES2PANDA_PARSER_CORE_LEXER_H 173af6ab5fSopenharmony_ci#define ES2PANDA_PARSER_CORE_LEXER_H 183af6ab5fSopenharmony_ci 193af6ab5fSopenharmony_ci#include <ios> 203af6ab5fSopenharmony_ci#include "lexer/regexp/regexp.h" 213af6ab5fSopenharmony_ci#include "lexer/token/letters.h" 223af6ab5fSopenharmony_ci#include "lexer/token/token.h" 233af6ab5fSopenharmony_ci#include "util/enumbitops.h" 243af6ab5fSopenharmony_ci 253af6ab5fSopenharmony_cinamespace ark::es2panda::parser { 263af6ab5fSopenharmony_ciclass ParserContext; 273af6ab5fSopenharmony_ciclass ETSNolintParser; 283af6ab5fSopenharmony_ci} // namespace ark::es2panda::parser 293af6ab5fSopenharmony_ci 303af6ab5fSopenharmony_cinamespace ark::es2panda::lexer { 313af6ab5fSopenharmony_ciclass Keywords; 323af6ab5fSopenharmony_ci 333af6ab5fSopenharmony_ciusing ENUMBITOPS_OPERATORS; 343af6ab5fSopenharmony_ci 353af6ab5fSopenharmony_cienum class NextTokenFlags : uint32_t { 363af6ab5fSopenharmony_ci NONE = 0U, 373af6ab5fSopenharmony_ci KEYWORD_TO_IDENT = 1U << 0U, 383af6ab5fSopenharmony_ci NUMERIC_SEPARATOR_ALLOWED = 1U << 1U, 393af6ab5fSopenharmony_ci BIGINT_ALLOWED = 1U << 2U, 403af6ab5fSopenharmony_ci}; 413af6ab5fSopenharmony_ci 423af6ab5fSopenharmony_ciclass LexerPosition { 433af6ab5fSopenharmony_cipublic: 443af6ab5fSopenharmony_ci explicit LexerPosition(const util::StringView &source); 453af6ab5fSopenharmony_ci DEFAULT_COPY_SEMANTIC(LexerPosition); 463af6ab5fSopenharmony_ci DEFAULT_MOVE_SEMANTIC(LexerPosition); 473af6ab5fSopenharmony_ci ~LexerPosition() = default; 483af6ab5fSopenharmony_ci 493af6ab5fSopenharmony_ci util::StringView::Iterator &Iterator() 503af6ab5fSopenharmony_ci { 513af6ab5fSopenharmony_ci return iterator_; 523af6ab5fSopenharmony_ci } 533af6ab5fSopenharmony_ci 543af6ab5fSopenharmony_ci const util::StringView::Iterator &Iterator() const 553af6ab5fSopenharmony_ci { 563af6ab5fSopenharmony_ci return iterator_; 573af6ab5fSopenharmony_ci } 583af6ab5fSopenharmony_ci 593af6ab5fSopenharmony_ci size_t Line() const 603af6ab5fSopenharmony_ci { 613af6ab5fSopenharmony_ci return line_; 623af6ab5fSopenharmony_ci } 633af6ab5fSopenharmony_ci 643af6ab5fSopenharmony_ci Token &GetToken() 653af6ab5fSopenharmony_ci { 663af6ab5fSopenharmony_ci return token_; 673af6ab5fSopenharmony_ci } 683af6ab5fSopenharmony_ci 693af6ab5fSopenharmony_ci const Token &GetToken() const 703af6ab5fSopenharmony_ci { 713af6ab5fSopenharmony_ci return token_; 723af6ab5fSopenharmony_ci } 733af6ab5fSopenharmony_ci 743af6ab5fSopenharmony_ci size_t &NextTokenLine() 753af6ab5fSopenharmony_ci { 763af6ab5fSopenharmony_ci return nextTokenLine_; 773af6ab5fSopenharmony_ci } 783af6ab5fSopenharmony_ci 793af6ab5fSopenharmony_ciprivate: 803af6ab5fSopenharmony_ci friend class Lexer; 813af6ab5fSopenharmony_ci 823af6ab5fSopenharmony_ci Token token_ {}; 833af6ab5fSopenharmony_ci util::StringView::Iterator iterator_; 843af6ab5fSopenharmony_ci size_t line_ {}; 853af6ab5fSopenharmony_ci size_t nextTokenLine_ {}; 863af6ab5fSopenharmony_ci}; 873af6ab5fSopenharmony_ci 883af6ab5fSopenharmony_ciclass LexerTemplateString { 893af6ab5fSopenharmony_cipublic: 903af6ab5fSopenharmony_ci explicit LexerTemplateString(ArenaAllocator *allocator) : str(allocator) {} 913af6ab5fSopenharmony_ci DEFAULT_COPY_SEMANTIC(LexerTemplateString); 923af6ab5fSopenharmony_ci DEFAULT_MOVE_SEMANTIC(LexerTemplateString); 933af6ab5fSopenharmony_ci ~LexerTemplateString() = default; 943af6ab5fSopenharmony_ci 953af6ab5fSopenharmony_ci // NOLINTBEGIN(misc-non-private-member-variables-in-classes) 963af6ab5fSopenharmony_ci util::UString str; 973af6ab5fSopenharmony_ci size_t end {}; 983af6ab5fSopenharmony_ci bool scanExpression {}; 993af6ab5fSopenharmony_ci // NOLINTEND(misc-non-private-member-variables-in-classes) 1003af6ab5fSopenharmony_ci}; 1013af6ab5fSopenharmony_ci 1023af6ab5fSopenharmony_ciclass TemplateLiteralParserContext; 1033af6ab5fSopenharmony_ci 1043af6ab5fSopenharmony_ciclass Lexer { 1053af6ab5fSopenharmony_cipublic: 1063af6ab5fSopenharmony_ci explicit Lexer(const parser::ParserContext *parserContext, bool startLexer = true); 1073af6ab5fSopenharmony_ci NO_COPY_SEMANTIC(Lexer); 1083af6ab5fSopenharmony_ci NO_MOVE_SEMANTIC(Lexer); 1093af6ab5fSopenharmony_ci virtual ~Lexer() = default; 1103af6ab5fSopenharmony_ci 1113af6ab5fSopenharmony_ci // NOLINTNEXTLINE(google-default-arguments) 1123af6ab5fSopenharmony_ci virtual void NextToken(NextTokenFlags flags = NextTokenFlags::NONE); 1133af6ab5fSopenharmony_ci virtual void ScanAsteriskPunctuator(); 1143af6ab5fSopenharmony_ci 1153af6ab5fSopenharmony_ci Token &GetToken(); 1163af6ab5fSopenharmony_ci const Token &GetToken() const; 1173af6ab5fSopenharmony_ci size_t Line() const; 1183af6ab5fSopenharmony_ci 1193af6ab5fSopenharmony_ci bool TryEatTokenType(lexer::TokenType type) 1203af6ab5fSopenharmony_ci { 1213af6ab5fSopenharmony_ci auto token = GetToken(); 1223af6ab5fSopenharmony_ci if (token.Type() == type) { 1233af6ab5fSopenharmony_ci NextToken(); 1243af6ab5fSopenharmony_ci return true; 1253af6ab5fSopenharmony_ci } 1263af6ab5fSopenharmony_ci return false; 1273af6ab5fSopenharmony_ci } 1283af6ab5fSopenharmony_ci 1293af6ab5fSopenharmony_ci std::optional<Token> TryEatTokenKeyword(lexer::TokenType type) 1303af6ab5fSopenharmony_ci { 1313af6ab5fSopenharmony_ci auto token = GetToken(); 1323af6ab5fSopenharmony_ci if (token.KeywordType() == type) { 1333af6ab5fSopenharmony_ci NextToken(); 1343af6ab5fSopenharmony_ci return token; 1353af6ab5fSopenharmony_ci } 1363af6ab5fSopenharmony_ci return std::nullopt; 1373af6ab5fSopenharmony_ci } 1383af6ab5fSopenharmony_ci 1393af6ab5fSopenharmony_ci LexerPosition Save() const; 1403af6ab5fSopenharmony_ci void Rewind(const LexerPosition &pos); 1413af6ab5fSopenharmony_ci void BackwardToken(TokenType type, size_t offset); 1423af6ab5fSopenharmony_ci void ForwardToken(TokenType type, size_t offset); 1433af6ab5fSopenharmony_ci 1443af6ab5fSopenharmony_ci char32_t Lookahead(); 1453af6ab5fSopenharmony_ci bool CheckArrow(); 1463af6ab5fSopenharmony_ci 1473af6ab5fSopenharmony_ci RegExp ScanRegExp(); 1483af6ab5fSopenharmony_ci template <char32_t END> 1493af6ab5fSopenharmony_ci void ScanString(); 1503af6ab5fSopenharmony_ci void ResetTokenEnd(); 1513af6ab5fSopenharmony_ci LexerTemplateString ScanTemplateString(); 1523af6ab5fSopenharmony_ci void ScanTemplateStringEnd(); 1533af6ab5fSopenharmony_ci void PushTemplateContext(TemplateLiteralParserContext *ctx); 1543af6ab5fSopenharmony_ci [[noreturn]] void ThrowUnexpectedStrictModeReservedKeyword() const 1553af6ab5fSopenharmony_ci { 1563af6ab5fSopenharmony_ci ThrowError("Unexpected strict mode reserved keyword"); 1573af6ab5fSopenharmony_ci } 1583af6ab5fSopenharmony_ci 1593af6ab5fSopenharmony_ci enum class ConversionResult : uint8_t { 1603af6ab5fSopenharmony_ci SUCCESS, 1613af6ab5fSopenharmony_ci INVALID_ARGUMENT, 1623af6ab5fSopenharmony_ci OUT_OF_RANGE, 1633af6ab5fSopenharmony_ci }; 1643af6ab5fSopenharmony_ci 1653af6ab5fSopenharmony_ci template <typename Tret, typename Ret = Tret, typename... Base> 1663af6ab5fSopenharmony_ci static Ret StrToNumeric(Tret (*converter)(const char *, char **, Base...), const char *str, 1673af6ab5fSopenharmony_ci ConversionResult &result, Base... base) noexcept 1683af6ab5fSopenharmony_ci { 1693af6ab5fSopenharmony_ci Ret ret {}; 1703af6ab5fSopenharmony_ci char *endPtr; 1713af6ab5fSopenharmony_ci // NOLINTBEGIN(cppcoreguidelines-special-member-functions) 1723af6ab5fSopenharmony_ci struct SaveErrno { 1733af6ab5fSopenharmony_ci explicit SaveErrno() : errno_(errno) 1743af6ab5fSopenharmony_ci { 1753af6ab5fSopenharmony_ci errno = 0; 1763af6ab5fSopenharmony_ci } 1773af6ab5fSopenharmony_ci ~SaveErrno() 1783af6ab5fSopenharmony_ci { 1793af6ab5fSopenharmony_ci if (errno == 0) { 1803af6ab5fSopenharmony_ci errno = errno_; 1813af6ab5fSopenharmony_ci } 1823af6ab5fSopenharmony_ci } 1833af6ab5fSopenharmony_ci 1843af6ab5fSopenharmony_ci private: 1853af6ab5fSopenharmony_ci decltype(errno) errno_; 1863af6ab5fSopenharmony_ci } const savedErrno; 1873af6ab5fSopenharmony_ci // NOLINTEND(cppcoreguidelines-special-member-functions) 1883af6ab5fSopenharmony_ci 1893af6ab5fSopenharmony_ci const Tret tmp = converter(str, &endPtr, base...); 1903af6ab5fSopenharmony_ci 1913af6ab5fSopenharmony_ci bool outOfRange = false; 1923af6ab5fSopenharmony_ci if constexpr (std::is_same_v<Ret, int>) { 1933af6ab5fSopenharmony_ci outOfRange = tmp < static_cast<Tret>(std::numeric_limits<int>::min()) || 1943af6ab5fSopenharmony_ci tmp > static_cast<Tret>(std::numeric_limits<int>::max()); 1953af6ab5fSopenharmony_ci } 1963af6ab5fSopenharmony_ci 1973af6ab5fSopenharmony_ci if (endPtr == str) { 1983af6ab5fSopenharmony_ci result = ConversionResult::INVALID_ARGUMENT; 1993af6ab5fSopenharmony_ci } else if (errno == ERANGE || outOfRange) { 2003af6ab5fSopenharmony_ci result = ConversionResult::OUT_OF_RANGE; 2013af6ab5fSopenharmony_ci } else { 2023af6ab5fSopenharmony_ci result = ConversionResult::SUCCESS; 2033af6ab5fSopenharmony_ci ret = tmp; 2043af6ab5fSopenharmony_ci } 2053af6ab5fSopenharmony_ci 2063af6ab5fSopenharmony_ci return ret; 2073af6ab5fSopenharmony_ci } 2083af6ab5fSopenharmony_ci 2093af6ab5fSopenharmony_ci util::StringView SourceView(size_t begin, size_t end) const; 2103af6ab5fSopenharmony_ci 2113af6ab5fSopenharmony_ciprotected: 2123af6ab5fSopenharmony_ci void NextToken(Keywords *kws); 2133af6ab5fSopenharmony_ci ArenaAllocator *Allocator(); 2143af6ab5fSopenharmony_ci bool IsLineTerminatorOrEos() const; 2153af6ab5fSopenharmony_ci void ScanRegExpPattern(); 2163af6ab5fSopenharmony_ci RegExpFlags ScanRegExpFlags(); 2173af6ab5fSopenharmony_ci 2183af6ab5fSopenharmony_ci [[noreturn]] void ThrowError(std::string_view message) const; 2193af6ab5fSopenharmony_ci [[noreturn]] void ThrowUnexpectedToken(lexer::TokenType tokenType) const; 2203af6ab5fSopenharmony_ci 2213af6ab5fSopenharmony_ci void SetTokenStart(); 2223af6ab5fSopenharmony_ci void SetTokenEnd(); 2233af6ab5fSopenharmony_ci 2243af6ab5fSopenharmony_ci inline util::StringView::Iterator &Iterator() 2253af6ab5fSopenharmony_ci { 2263af6ab5fSopenharmony_ci return pos_.iterator_; 2273af6ab5fSopenharmony_ci } 2283af6ab5fSopenharmony_ci 2293af6ab5fSopenharmony_ci inline const util::StringView::Iterator &Iterator() const 2303af6ab5fSopenharmony_ci { 2313af6ab5fSopenharmony_ci return pos_.iterator_; 2323af6ab5fSopenharmony_ci } 2333af6ab5fSopenharmony_ci 2343af6ab5fSopenharmony_ci util::StringView SourceView(const util::StringView::Iterator &begin, const util::StringView::Iterator &end) const; 2353af6ab5fSopenharmony_ci 2363af6ab5fSopenharmony_ci void SkipWhiteSpaces(); 2373af6ab5fSopenharmony_ci void SkipSingleLineComment(); 2383af6ab5fSopenharmony_ci 2393af6ab5fSopenharmony_ci bool ScanPunctuator(); 2403af6ab5fSopenharmony_ci void ScanQuestionPunctuator(); 2413af6ab5fSopenharmony_ci void ScanLessThanPunctuator(); 2423af6ab5fSopenharmony_ci void ScanGreaterThanPunctuator(); 2433af6ab5fSopenharmony_ci virtual void ScanEqualsPunctuator(); 2443af6ab5fSopenharmony_ci virtual void ScanExclamationPunctuator(); 2453af6ab5fSopenharmony_ci void ScanAmpersandPunctuator(); 2463af6ab5fSopenharmony_ci void ScanVLinePunctuator(); 2473af6ab5fSopenharmony_ci void ScanCircumflexPunctuator(); 2483af6ab5fSopenharmony_ci void ScanPlusPunctuator(); 2493af6ab5fSopenharmony_ci void ScanMinusPunctuator(); 2503af6ab5fSopenharmony_ci void ScanSlashPunctuator(); 2513af6ab5fSopenharmony_ci void ScanPercentPunctuator(); 2523af6ab5fSopenharmony_ci void ScanDotPunctuator(); 2533af6ab5fSopenharmony_ci void ScanColonPunctuator(); 2543af6ab5fSopenharmony_ci virtual bool ScanDollarPunctuator(); 2553af6ab5fSopenharmony_ci void ScanAtPunctuator(); 2563af6ab5fSopenharmony_ci 2573af6ab5fSopenharmony_ci virtual void SkipMultiLineComment(); 2583af6ab5fSopenharmony_ci virtual void ScanHashMark(); 2593af6ab5fSopenharmony_ci virtual void ScanBackTick(); 2603af6ab5fSopenharmony_ci 2613af6ab5fSopenharmony_ci virtual bool ScanCharLiteral() 2623af6ab5fSopenharmony_ci { 2633af6ab5fSopenharmony_ci return false; 2643af6ab5fSopenharmony_ci } 2653af6ab5fSopenharmony_ci 2663af6ab5fSopenharmony_ci char32_t ScanUnicodeEscapeSequence(); 2673af6ab5fSopenharmony_ci template <int N, bool IN_AS = false> 2683af6ab5fSopenharmony_ci char32_t ScanHexEscape(); 2693af6ab5fSopenharmony_ci char32_t ScanUnicodeCodePointEscape(); 2703af6ab5fSopenharmony_ci 2713af6ab5fSopenharmony_ci void ScanStringUnicodePart(util::UString *str); 2723af6ab5fSopenharmony_ci char32_t ScanUnicodeCharacter(); 2733af6ab5fSopenharmony_ci 2743af6ab5fSopenharmony_ci void ScanDecimalNumbers(); 2753af6ab5fSopenharmony_ci 2763af6ab5fSopenharmony_ci virtual void ScanNumberLeadingZero() 2773af6ab5fSopenharmony_ci { 2783af6ab5fSopenharmony_ci ScanNumberLeadingZeroImpl<double>(); 2793af6ab5fSopenharmony_ci } 2803af6ab5fSopenharmony_ci 2813af6ab5fSopenharmony_ci template <typename RadixType, typename RadixLimit = void *> 2823af6ab5fSopenharmony_ci bool ScanNumberLeadingZeroImpl(); 2833af6ab5fSopenharmony_ci void ScanNumberLeadingZeroImplNonAllowedCases(); 2843af6ab5fSopenharmony_ci template <bool RANGE_CHECK(char32_t), int RADIX, typename RadixType, typename RadixLimit> 2853af6ab5fSopenharmony_ci bool ScanNumberRadix(bool allowNumericSeparator = true); 2863af6ab5fSopenharmony_ci void ScanNumber(bool allowBigInt = true); 2873af6ab5fSopenharmony_ci std::tuple<size_t, bool, NumberFlags> ScanCharLex(bool allowBigInt, bool parseExponent, NumberFlags flags); 2883af6ab5fSopenharmony_ci size_t ScanSignOfNumber(); 2893af6ab5fSopenharmony_ci template <bool RANGE_CHECK(char32_t), int RADIX, typename RadixType, typename RadixLimit> 2903af6ab5fSopenharmony_ci bool ScanTooLargeNumber(RadixType number); 2913af6ab5fSopenharmony_ci virtual void ConvertNumber(const std::string &utf8, NumberFlags flags); 2923af6ab5fSopenharmony_ci void ScanDecimalLiteral(); 2933af6ab5fSopenharmony_ci void ScanDecimalDigits(bool allowNumericSeparator); 2943af6ab5fSopenharmony_ci virtual void CheckNumberLiteralEnd(); 2953af6ab5fSopenharmony_ci void CheckOctal(); 2963af6ab5fSopenharmony_ci 2973af6ab5fSopenharmony_ci inline static uint32_t HexValue(char32_t ch); 2983af6ab5fSopenharmony_ci inline static bool IsDecimalDigit(uint32_t cp); 2993af6ab5fSopenharmony_ci inline static bool IsHexDigit(char32_t ch); 3003af6ab5fSopenharmony_ci inline static bool IsBinaryDigit(char32_t ch); 3013af6ab5fSopenharmony_ci inline static bool IsOctalDigit(char32_t ch); 3023af6ab5fSopenharmony_ci 3033af6ab5fSopenharmony_ci friend class KeywordsUtil; 3043af6ab5fSopenharmony_ci friend class TemplateLiteralParserContext; 3053af6ab5fSopenharmony_ci friend class parser::ETSNolintParser; 3063af6ab5fSopenharmony_ci 3073af6ab5fSopenharmony_ci LexerPosition &Pos(); 3083af6ab5fSopenharmony_ci const LexerPosition &Pos() const; 3093af6ab5fSopenharmony_ci 3103af6ab5fSopenharmony_ciprivate: 3113af6ab5fSopenharmony_ci TemplateLiteralParserContext *tlCtx_ {}; 3123af6ab5fSopenharmony_ci ArenaAllocator *allocator_; 3133af6ab5fSopenharmony_ci Keywords *kws_ {}; 3143af6ab5fSopenharmony_ci const parser::ParserContext *parserContext_; 3153af6ab5fSopenharmony_ci util::StringView source_; 3163af6ab5fSopenharmony_ci LexerPosition pos_; 3173af6ab5fSopenharmony_ci}; 3183af6ab5fSopenharmony_ci 3193af6ab5fSopenharmony_ciclass TemplateLiteralParserContext { 3203af6ab5fSopenharmony_cipublic: 3213af6ab5fSopenharmony_ci explicit TemplateLiteralParserContext(Lexer *lexer) : lexer_(lexer), prev_(lexer_->tlCtx_) {} 3223af6ab5fSopenharmony_ci NO_MOVE_SEMANTIC(TemplateLiteralParserContext); 3233af6ab5fSopenharmony_ci NO_COPY_SEMANTIC(TemplateLiteralParserContext); 3243af6ab5fSopenharmony_ci 3253af6ab5fSopenharmony_ci ~TemplateLiteralParserContext() 3263af6ab5fSopenharmony_ci { 3273af6ab5fSopenharmony_ci lexer_->tlCtx_ = prev_; 3283af6ab5fSopenharmony_ci } 3293af6ab5fSopenharmony_ci 3303af6ab5fSopenharmony_ci void ConsumeLeftBrace() 3313af6ab5fSopenharmony_ci { 3323af6ab5fSopenharmony_ci braceDepth_++; 3333af6ab5fSopenharmony_ci } 3343af6ab5fSopenharmony_ci 3353af6ab5fSopenharmony_ci bool ConsumeRightBrace() 3363af6ab5fSopenharmony_ci { 3373af6ab5fSopenharmony_ci braceDepth_--; 3383af6ab5fSopenharmony_ci 3393af6ab5fSopenharmony_ci return braceDepth_ == 0; 3403af6ab5fSopenharmony_ci } 3413af6ab5fSopenharmony_ci 3423af6ab5fSopenharmony_ciprivate: 3433af6ab5fSopenharmony_ci Lexer *lexer_; 3443af6ab5fSopenharmony_ci TemplateLiteralParserContext *prev_ {}; 3453af6ab5fSopenharmony_ci size_t braceDepth_ {1}; 3463af6ab5fSopenharmony_ci}; 3473af6ab5fSopenharmony_ci 3483af6ab5fSopenharmony_citemplate <char32_t END> 3493af6ab5fSopenharmony_civoid Lexer::ScanString() 3503af6ab5fSopenharmony_ci{ 3513af6ab5fSopenharmony_ci util::UString str(Allocator()); 3523af6ab5fSopenharmony_ci GetToken().type_ = TokenType::LITERAL_STRING; 3533af6ab5fSopenharmony_ci GetToken().keywordType_ = TokenType::LITERAL_STRING; 3543af6ab5fSopenharmony_ci 3553af6ab5fSopenharmony_ci const auto startPos = Iterator().Index(); 3563af6ab5fSopenharmony_ci auto escapeEnd = startPos; 3573af6ab5fSopenharmony_ci 3583af6ab5fSopenharmony_ci do { 3593af6ab5fSopenharmony_ci char32_t cp = Iterator().Peek(); 3603af6ab5fSopenharmony_ci 3613af6ab5fSopenharmony_ci switch (cp) { 3623af6ab5fSopenharmony_ci case util::StringView::Iterator::INVALID_CP: { 3633af6ab5fSopenharmony_ci ThrowError("Unterminated string"); 3643af6ab5fSopenharmony_ci break; 3653af6ab5fSopenharmony_ci } 3663af6ab5fSopenharmony_ci case LEX_CHAR_CR: 3673af6ab5fSopenharmony_ci case LEX_CHAR_LF: { 3683af6ab5fSopenharmony_ci // NOLINTNEXTLINE(readability-braces-around-statements,bugprone-suspicious-semicolon) 3693af6ab5fSopenharmony_ci if constexpr (END != LEX_CHAR_BACK_TICK) { 3703af6ab5fSopenharmony_ci ThrowError("Newline is not allowed in strings"); 3713af6ab5fSopenharmony_ci } 3723af6ab5fSopenharmony_ci 3733af6ab5fSopenharmony_ci GetToken().flags_ |= TokenFlags::HAS_ESCAPE; 3743af6ab5fSopenharmony_ci str.Append(SourceView(escapeEnd, Iterator().Index())); 3753af6ab5fSopenharmony_ci 3763af6ab5fSopenharmony_ci if (cp == LEX_CHAR_CR) { 3773af6ab5fSopenharmony_ci Iterator().Forward(1); 3783af6ab5fSopenharmony_ci 3793af6ab5fSopenharmony_ci if (Iterator().Peek() != LEX_CHAR_LF) { 3803af6ab5fSopenharmony_ci Iterator().Backward(1); 3813af6ab5fSopenharmony_ci } 3823af6ab5fSopenharmony_ci } 3833af6ab5fSopenharmony_ci 3843af6ab5fSopenharmony_ci pos_.line_++; 3853af6ab5fSopenharmony_ci str.Append(LEX_CHAR_LF); 3863af6ab5fSopenharmony_ci Iterator().Forward(1); 3873af6ab5fSopenharmony_ci escapeEnd = Iterator().Index(); 3883af6ab5fSopenharmony_ci continue; 3893af6ab5fSopenharmony_ci } 3903af6ab5fSopenharmony_ci case LEX_CHAR_BACKSLASH: { 3913af6ab5fSopenharmony_ci GetToken().flags_ |= TokenFlags::HAS_ESCAPE; 3923af6ab5fSopenharmony_ci str.Append(SourceView(escapeEnd, Iterator().Index())); 3933af6ab5fSopenharmony_ci 3943af6ab5fSopenharmony_ci Iterator().Forward(1); 3953af6ab5fSopenharmony_ci ScanStringUnicodePart(&str); 3963af6ab5fSopenharmony_ci escapeEnd = Iterator().Index(); 3973af6ab5fSopenharmony_ci continue; 3983af6ab5fSopenharmony_ci } 3993af6ab5fSopenharmony_ci case LEX_CHAR_BACK_TICK: 4003af6ab5fSopenharmony_ci case LEX_CHAR_SINGLE_QUOTE: 4013af6ab5fSopenharmony_ci case LEX_CHAR_DOUBLE_QUOTE: { 4023af6ab5fSopenharmony_ci if (END == cp) { 4033af6ab5fSopenharmony_ci break; 4043af6ab5fSopenharmony_ci } 4053af6ab5fSopenharmony_ci 4063af6ab5fSopenharmony_ci Iterator().Forward(1); 4073af6ab5fSopenharmony_ci continue; 4083af6ab5fSopenharmony_ci } 4093af6ab5fSopenharmony_ci case LEX_CHAR_DOLLAR_SIGN: { 4103af6ab5fSopenharmony_ci Iterator().Forward(1); 4113af6ab5fSopenharmony_ci 4123af6ab5fSopenharmony_ci // NOLINTNEXTLINE(readability-braces-around-statements,bugprone-suspicious-semicolon) 4133af6ab5fSopenharmony_ci if constexpr (END == LEX_CHAR_BACK_TICK) { 4143af6ab5fSopenharmony_ci if (Iterator().Peek() == LEX_CHAR_LEFT_BRACE) { 4153af6ab5fSopenharmony_ci Iterator().Backward(1); 4163af6ab5fSopenharmony_ci break; 4173af6ab5fSopenharmony_ci } 4183af6ab5fSopenharmony_ci } 4193af6ab5fSopenharmony_ci 4203af6ab5fSopenharmony_ci continue; 4213af6ab5fSopenharmony_ci } 4223af6ab5fSopenharmony_ci default: { 4233af6ab5fSopenharmony_ci Iterator().SkipCp(); 4243af6ab5fSopenharmony_ci continue; 4253af6ab5fSopenharmony_ci } 4263af6ab5fSopenharmony_ci } 4273af6ab5fSopenharmony_ci 4283af6ab5fSopenharmony_ci if (GetToken().flags_ & TokenFlags::HAS_ESCAPE) { 4293af6ab5fSopenharmony_ci str.Append(SourceView(escapeEnd, Iterator().Index())); 4303af6ab5fSopenharmony_ci GetToken().src_ = str.View(); 4313af6ab5fSopenharmony_ci } else { 4323af6ab5fSopenharmony_ci GetToken().src_ = SourceView(startPos, Iterator().Index()); 4333af6ab5fSopenharmony_ci } 4343af6ab5fSopenharmony_ci 4353af6ab5fSopenharmony_ci break; 4363af6ab5fSopenharmony_ci } while (true); 4373af6ab5fSopenharmony_ci 4383af6ab5fSopenharmony_ci // NOLINTNEXTLINE(readability-braces-around-statements,bugprone-suspicious-semicolon) 4393af6ab5fSopenharmony_ci if constexpr (END != LEX_CHAR_BACK_TICK) { 4403af6ab5fSopenharmony_ci Iterator().Forward(1); 4413af6ab5fSopenharmony_ci } 4423af6ab5fSopenharmony_ci} 4433af6ab5fSopenharmony_ci 4443af6ab5fSopenharmony_citemplate <int N, bool IN_AS> 4453af6ab5fSopenharmony_cichar32_t Lexer::ScanHexEscape() 4463af6ab5fSopenharmony_ci{ 4473af6ab5fSopenharmony_ci char32_t code = 0; 4483af6ab5fSopenharmony_ci 4493af6ab5fSopenharmony_ci for (size_t i = 0; i < N; ++i) { 4503af6ab5fSopenharmony_ci const auto cp = Iterator().Peek(); 4513af6ab5fSopenharmony_ci if (IN_AS && cp == LEX_CHAR_BACK_TICK) { 4523af6ab5fSopenharmony_ci break; 4533af6ab5fSopenharmony_ci } 4543af6ab5fSopenharmony_ci 4553af6ab5fSopenharmony_ci Iterator().Forward(1); 4563af6ab5fSopenharmony_ci 4573af6ab5fSopenharmony_ci if (!IsHexDigit(cp)) { 4583af6ab5fSopenharmony_ci ThrowError("Invalid unicode escape sequence"); 4593af6ab5fSopenharmony_ci } 4603af6ab5fSopenharmony_ci 4613af6ab5fSopenharmony_ci constexpr auto MULTIPLIER = 16; 4623af6ab5fSopenharmony_ci code = code * MULTIPLIER + HexValue(cp); 4633af6ab5fSopenharmony_ci } 4643af6ab5fSopenharmony_ci 4653af6ab5fSopenharmony_ci return code; 4663af6ab5fSopenharmony_ci} 4673af6ab5fSopenharmony_ci 4683af6ab5fSopenharmony_citemplate <typename RadixType, typename RadixLimit> 4693af6ab5fSopenharmony_cibool Lexer::ScanNumberLeadingZeroImpl() 4703af6ab5fSopenharmony_ci{ 4713af6ab5fSopenharmony_ci GetToken().type_ = TokenType::LITERAL_NUMBER; 4723af6ab5fSopenharmony_ci GetToken().keywordType_ = TokenType::LITERAL_NUMBER; 4733af6ab5fSopenharmony_ci 4743af6ab5fSopenharmony_ci switch (Iterator().Peek()) { 4753af6ab5fSopenharmony_ci case LEX_CHAR_LOWERCASE_X: 4763af6ab5fSopenharmony_ci case LEX_CHAR_UPPERCASE_X: { 4773af6ab5fSopenharmony_ci Iterator().Forward(1); 4783af6ab5fSopenharmony_ci constexpr auto RADIX = 16; 4793af6ab5fSopenharmony_ci if (!ScanNumberRadix<IsHexDigit, RADIX, RadixType, RadixLimit>()) { 4803af6ab5fSopenharmony_ci return false; 4813af6ab5fSopenharmony_ci } 4823af6ab5fSopenharmony_ci CheckNumberLiteralEnd(); 4833af6ab5fSopenharmony_ci return true; 4843af6ab5fSopenharmony_ci } 4853af6ab5fSopenharmony_ci case LEX_CHAR_LOWERCASE_B: 4863af6ab5fSopenharmony_ci case LEX_CHAR_UPPERCASE_B: { 4873af6ab5fSopenharmony_ci Iterator().Forward(1); 4883af6ab5fSopenharmony_ci constexpr auto RADIX = 2; 4893af6ab5fSopenharmony_ci if (!ScanNumberRadix<IsBinaryDigit, RADIX, RadixType, RadixLimit>()) { 4903af6ab5fSopenharmony_ci return false; 4913af6ab5fSopenharmony_ci } 4923af6ab5fSopenharmony_ci CheckNumberLiteralEnd(); 4933af6ab5fSopenharmony_ci return true; 4943af6ab5fSopenharmony_ci } 4953af6ab5fSopenharmony_ci case LEX_CHAR_LOWERCASE_O: 4963af6ab5fSopenharmony_ci case LEX_CHAR_UPPERCASE_O: { 4973af6ab5fSopenharmony_ci Iterator().Forward(1); 4983af6ab5fSopenharmony_ci constexpr auto RADIX = 8; 4993af6ab5fSopenharmony_ci if (!ScanNumberRadix<IsOctalDigit, RADIX, RadixType, RadixLimit>()) { 5003af6ab5fSopenharmony_ci return false; 5013af6ab5fSopenharmony_ci } 5023af6ab5fSopenharmony_ci CheckOctal(); 5033af6ab5fSopenharmony_ci CheckNumberLiteralEnd(); 5043af6ab5fSopenharmony_ci return true; 5053af6ab5fSopenharmony_ci } 5063af6ab5fSopenharmony_ci default: { 5073af6ab5fSopenharmony_ci ScanNumberLeadingZeroImplNonAllowedCases(); 5083af6ab5fSopenharmony_ci break; 5093af6ab5fSopenharmony_ci } 5103af6ab5fSopenharmony_ci } 5113af6ab5fSopenharmony_ci 5123af6ab5fSopenharmony_ci ScanNumber(); 5133af6ab5fSopenharmony_ci return true; 5143af6ab5fSopenharmony_ci} 5153af6ab5fSopenharmony_ci 5163af6ab5fSopenharmony_citemplate <bool RANGE_CHECK(char32_t), int RADIX, typename RadixType, typename RadixLimit> 5173af6ab5fSopenharmony_cibool Lexer::ScanTooLargeNumber([[maybe_unused]] RadixType number) 5183af6ab5fSopenharmony_ci{ 5193af6ab5fSopenharmony_ci if constexpr (std::is_arithmetic_v<RadixLimit>) { 5203af6ab5fSopenharmony_ci if (number > std::numeric_limits<RadixLimit>::max() / RADIX) { 5213af6ab5fSopenharmony_ci return false; 5223af6ab5fSopenharmony_ci } 5233af6ab5fSopenharmony_ci } 5243af6ab5fSopenharmony_ci return true; 5253af6ab5fSopenharmony_ci} 5263af6ab5fSopenharmony_ci 5273af6ab5fSopenharmony_citemplate <bool RANGE_CHECK(char32_t), int RADIX, typename RadixType, typename RadixLimit> 5283af6ab5fSopenharmony_cibool Lexer::ScanNumberRadix(bool allowNumericSeparator) 5293af6ab5fSopenharmony_ci{ 5303af6ab5fSopenharmony_ci RadixType number {}; 5313af6ab5fSopenharmony_ci 5323af6ab5fSopenharmony_ci auto cp = Iterator().Peek(); 5333af6ab5fSopenharmony_ci if (!RANGE_CHECK(cp)) { 5343af6ab5fSopenharmony_ci ThrowError("Invalid digit"); 5353af6ab5fSopenharmony_ci } 5363af6ab5fSopenharmony_ci 5373af6ab5fSopenharmony_ci bool allowNumericOnNext = true; 5383af6ab5fSopenharmony_ci 5393af6ab5fSopenharmony_ci do { 5403af6ab5fSopenharmony_ci cp = Iterator().Peek(); 5413af6ab5fSopenharmony_ci if (RANGE_CHECK(cp)) { 5423af6ab5fSopenharmony_ci auto digit = HexValue(cp); 5433af6ab5fSopenharmony_ci 5443af6ab5fSopenharmony_ci if (!ScanTooLargeNumber<RANGE_CHECK, RADIX, RadixType, RadixLimit>(number)) { 5453af6ab5fSopenharmony_ci return false; 5463af6ab5fSopenharmony_ci } 5473af6ab5fSopenharmony_ci 5483af6ab5fSopenharmony_ci number = number * RADIX + digit; 5493af6ab5fSopenharmony_ci Iterator().Forward(1); 5503af6ab5fSopenharmony_ci allowNumericOnNext = true; 5513af6ab5fSopenharmony_ci continue; 5523af6ab5fSopenharmony_ci } 5533af6ab5fSopenharmony_ci 5543af6ab5fSopenharmony_ci if (cp == LEX_CHAR_UNDERSCORE) { 5553af6ab5fSopenharmony_ci if (!allowNumericSeparator || !allowNumericOnNext) { 5563af6ab5fSopenharmony_ci ThrowError("Invalid numeric separator"); 5573af6ab5fSopenharmony_ci } 5583af6ab5fSopenharmony_ci 5593af6ab5fSopenharmony_ci GetToken().flags_ |= TokenFlags::NUMBER_HAS_UNDERSCORE; 5603af6ab5fSopenharmony_ci Iterator().Forward(1); 5613af6ab5fSopenharmony_ci allowNumericOnNext = false; 5623af6ab5fSopenharmony_ci continue; 5633af6ab5fSopenharmony_ci } 5643af6ab5fSopenharmony_ci 5653af6ab5fSopenharmony_ci if (!allowNumericOnNext) { 5663af6ab5fSopenharmony_ci Iterator().Backward(1); 5673af6ab5fSopenharmony_ci ThrowError("Numeric separators are not allowed at the end of numeric literals"); 5683af6ab5fSopenharmony_ci } 5693af6ab5fSopenharmony_ci 5703af6ab5fSopenharmony_ci break; 5713af6ab5fSopenharmony_ci } while (true); 5723af6ab5fSopenharmony_ci 5733af6ab5fSopenharmony_ci GetToken().number_ = lexer::Number(number); 5743af6ab5fSopenharmony_ci return true; 5753af6ab5fSopenharmony_ci} 5763af6ab5fSopenharmony_ci 5773af6ab5fSopenharmony_ciinline uint32_t Lexer::HexValue(char32_t ch) 5783af6ab5fSopenharmony_ci{ 5793af6ab5fSopenharmony_ci constexpr uint32_t HEX_MASK = 0xF; 5803af6ab5fSopenharmony_ci constexpr uint32_t DEC_OFFSET = 10; 5813af6ab5fSopenharmony_ci return ch < LEX_CHAR_UPPERCASE_A ? ch - LEX_CHAR_0 : ((ch - LEX_CHAR_UPPERCASE_A + DEC_OFFSET) & HEX_MASK); 5823af6ab5fSopenharmony_ci} 5833af6ab5fSopenharmony_ci 5843af6ab5fSopenharmony_ciinline bool Lexer::IsDecimalDigit(uint32_t cp) 5853af6ab5fSopenharmony_ci{ 5863af6ab5fSopenharmony_ci return (cp >= LEX_CHAR_0 && cp <= LEX_CHAR_9); 5873af6ab5fSopenharmony_ci} 5883af6ab5fSopenharmony_ci 5893af6ab5fSopenharmony_ciinline bool Lexer::IsHexDigit(char32_t ch) 5903af6ab5fSopenharmony_ci{ 5913af6ab5fSopenharmony_ci return ch < LEX_ASCII_MAX_BITS && (std::isxdigit(static_cast<unsigned char>(ch)) != 0); 5923af6ab5fSopenharmony_ci} 5933af6ab5fSopenharmony_ci 5943af6ab5fSopenharmony_ciinline bool Lexer::IsBinaryDigit(char32_t ch) 5953af6ab5fSopenharmony_ci{ 5963af6ab5fSopenharmony_ci return ch == LEX_CHAR_0 || ch == LEX_CHAR_1; 5973af6ab5fSopenharmony_ci} 5983af6ab5fSopenharmony_ci 5993af6ab5fSopenharmony_ciinline bool Lexer::IsOctalDigit(char32_t ch) 6003af6ab5fSopenharmony_ci{ 6013af6ab5fSopenharmony_ci return (ch >= LEX_CHAR_0 && ch <= LEX_CHAR_7); 6023af6ab5fSopenharmony_ci} 6033af6ab5fSopenharmony_ci} // namespace ark::es2panda::lexer 6043af6ab5fSopenharmony_ci 6053af6ab5fSopenharmony_citemplate <> 6063af6ab5fSopenharmony_cistruct enumbitops::IsAllowedType<ark::es2panda::lexer::NextTokenFlags> : std::true_type { 6073af6ab5fSopenharmony_ci}; 6083af6ab5fSopenharmony_ci 6093af6ab5fSopenharmony_ci#endif 610