/* * Copyright (c) 2021-2024 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef PANDA_ASSEMBLER_LEXER_H #define PANDA_ASSEMBLER_LEXER_H #include #include #include #include #include #include #include "define.h" #include "error.h" #include "isa.h" #include "utils/logger.h" namespace ark::pandasm { // NOLINTBEGIN(misc-non-private-member-variables-in-classes) struct Token { enum class Type { ID_BAD = 0, /* delimiters */ DEL_COMMA, /* , */ DEL_COLON, /* : */ DEL_BRACE_L, /* { */ DEL_BRACE_R, /* } */ DEL_BRACKET_L, /* ( */ DEL_BRACKET_R, /* ) */ DEL_SQUARE_BRACKET_L, /* [ */ DEL_SQUARE_BRACKET_R, /* ] */ DEL_GT, /* > */ DEL_LT, /* < */ DEL_EQ, /* = */ DEL_DOT, /* . */ ID, /* other */ ID_STRING, /* string literal */ OPERATION, /* special */ // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define OPLIST(inst_code, name, optype, width, flags, dst_idx, src_idxs, prof_size) \ ID_OP_##inst_code, /* command type list */ PANDA_INSTRUCTION_LIST(OPLIST) #undef OPLIST KEYWORD, /* special */ // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define KEYWORDS(name, inst_code) ID_##inst_code, /* keyword type List */ KEYWORDS_LIST(KEYWORDS) #undef KEYWORDS }; std::string wholeLine; size_t boundLeft; /* right and left bounds of tokens */ size_t boundRight; Type type; Token() : Token(0, 0, Type::ID_BAD, "") {} Token(size_t bL, size_t bR, Type t, std::string begOfLine) : wholeLine(std::move(begOfLine)), boundLeft(bL), boundRight(bR), type(t) { } }; using Tokens = std::pair, Error>; using TokenSet = const std::vector>; struct Line { std::vector tokens; std::string buffer; /* Raw line, as read from the file */ size_t pos {0}; /* current line position */ size_t end; explicit Line(std::string str) : buffer(std::move(str)), end(buffer.size()) {} }; // NOLINTEND(misc-non-private-member-variables-in-classes) class Lexer { public: PANDA_PUBLIC_API Lexer(); PANDA_PUBLIC_API ~Lexer(); NO_MOVE_SEMANTIC(Lexer); NO_COPY_SEMANTIC(Lexer); /* * The main function of Tokenizing, which takes a string. * Returns a vector of tokens. */ PANDA_PUBLIC_API Tokens TokenizeString(const std::string &sourceStr); private: std::vector lines_; Line *currLine_ {nullptr}; Error err_; bool Eol() const; /* End of line */ bool LexString(); void LexTokens(); void LexBadTokens(); void LexPreprocess(); void SkipSpace(); void AnalyzeLine(); Token::Type LexGetType(size_t beg, size_t end) const; }; /* * Returns a string representation of a token type. */ std::string_view TokenTypeWhat(Token::Type t); } // namespace ark::pandasm #endif // PANDA_ASSEMBLER_LEXER_H