1b1994897Sopenharmony_ci/**
2b1994897Sopenharmony_ci * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3b1994897Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License");
4b1994897Sopenharmony_ci * you may not use this file except in compliance with the License.
5b1994897Sopenharmony_ci * You may obtain a copy of the License at
6b1994897Sopenharmony_ci *
7b1994897Sopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0
8b1994897Sopenharmony_ci *
9b1994897Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software
10b1994897Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS,
11b1994897Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12b1994897Sopenharmony_ci * See the License for the specific language governing permissions and
13b1994897Sopenharmony_ci * limitations under the License.
14b1994897Sopenharmony_ci */
15b1994897Sopenharmony_ci
16b1994897Sopenharmony_ci#ifndef ASSEMBLER_LEXER_H
17b1994897Sopenharmony_ci#define ASSEMBLER_LEXER_H
18b1994897Sopenharmony_ci
19b1994897Sopenharmony_ci#include <array>
20b1994897Sopenharmony_ci#include <iostream>
21b1994897Sopenharmony_ci#include <string>
22b1994897Sopenharmony_ci#include <string_view>
23b1994897Sopenharmony_ci#include <unordered_map>
24b1994897Sopenharmony_ci#include <vector>
25b1994897Sopenharmony_ci
26b1994897Sopenharmony_ci#include "define.h"
27b1994897Sopenharmony_ci#include "error.h"
28b1994897Sopenharmony_ci#include "isa.h"
29b1994897Sopenharmony_ci#include "utils/logger.h"
30b1994897Sopenharmony_ci
31b1994897Sopenharmony_cinamespace panda::pandasm {
32b1994897Sopenharmony_ci
33b1994897Sopenharmony_cistruct Token {
34b1994897Sopenharmony_ci    enum class Type {
35b1994897Sopenharmony_ci        ID_BAD = 0,
36b1994897Sopenharmony_ci        /* delimiters */
37b1994897Sopenharmony_ci        DEL_COMMA,                                                                          /* , */
38b1994897Sopenharmony_ci        DEL_COLON,                                                                          /* : */
39b1994897Sopenharmony_ci        DEL_BRACE_L,                                                                        /* { */
40b1994897Sopenharmony_ci        DEL_BRACE_R,                                                                        /* } */
41b1994897Sopenharmony_ci        DEL_BRACKET_L,                                                                      /* ( */
42b1994897Sopenharmony_ci        DEL_BRACKET_R,                                                                      /* ) */
43b1994897Sopenharmony_ci        DEL_SQUARE_BRACKET_L,                                                               /* [ */
44b1994897Sopenharmony_ci        DEL_SQUARE_BRACKET_R,                                                               /* ] */
45b1994897Sopenharmony_ci        DEL_GT,                                                                             /* > */
46b1994897Sopenharmony_ci        DEL_LT,                                                                             /* < */
47b1994897Sopenharmony_ci        DEL_EQ,                                                                             /* = */
48b1994897Sopenharmony_ci        DEL_DOT,                                                                            /* . */
49b1994897Sopenharmony_ci        ID,                                                                                 /* other */
50b1994897Sopenharmony_ci        ID_STRING,                                                                          /* string literal */
51b1994897Sopenharmony_ci        OPERATION,                                                                          /* special */
52b1994897Sopenharmony_ci#define OPLIST(inst_code, name, optype, width, flags, dst_idx, src_idxs) ID_OP_##inst_code, /* command type list */
53b1994897Sopenharmony_ci        PANDA_INSTRUCTION_LIST(OPLIST)
54b1994897Sopenharmony_ci#undef OPLIST
55b1994897Sopenharmony_ci            KEYWORD,                              /* special */
56b1994897Sopenharmony_ci#define KEYWORDS(name, inst_code) ID_##inst_code, /* keyword type List */
57b1994897Sopenharmony_ci        KEYWORDS_LIST(KEYWORDS)
58b1994897Sopenharmony_ci#undef KEYWORDS
59b1994897Sopenharmony_ci    };
60b1994897Sopenharmony_ci
61b1994897Sopenharmony_ci    std::string whole_line;
62b1994897Sopenharmony_ci    size_t bound_left; /* right and left bounds of tokens */
63b1994897Sopenharmony_ci    size_t bound_right;
64b1994897Sopenharmony_ci    Type type;
65b1994897Sopenharmony_ci
66b1994897Sopenharmony_ci    Token() : Token(0, 0, Type::ID_BAD, "") {}
67b1994897Sopenharmony_ci
68b1994897Sopenharmony_ci    Token(size_t b_l, size_t b_r, Type t, std::string beg_of_line)
69b1994897Sopenharmony_ci        : whole_line(std::move(beg_of_line)), bound_left(b_l), bound_right(b_r), type(t)
70b1994897Sopenharmony_ci    {
71b1994897Sopenharmony_ci    }
72b1994897Sopenharmony_ci};
73b1994897Sopenharmony_ci
74b1994897Sopenharmony_ciusing Tokens = std::pair<std::vector<Token>, Error>;
75b1994897Sopenharmony_ci
76b1994897Sopenharmony_ciusing TokenSet = const std::vector<std::vector<Token>>;
77b1994897Sopenharmony_ci
78b1994897Sopenharmony_cistruct Line {
79b1994897Sopenharmony_ci    std::vector<Token> tokens;
80b1994897Sopenharmony_ci    std::string buffer; /* Raw line, as read from the file */
81b1994897Sopenharmony_ci    size_t pos;         /* current line position */
82b1994897Sopenharmony_ci    size_t end;
83b1994897Sopenharmony_ci
84b1994897Sopenharmony_ci    explicit Line(std::string str) : buffer(std::move(str)), pos(0), end(buffer.size()) {}
85b1994897Sopenharmony_ci};
86b1994897Sopenharmony_ci
87b1994897Sopenharmony_ciclass Lexer {
88b1994897Sopenharmony_cipublic:
89b1994897Sopenharmony_ci    Lexer();
90b1994897Sopenharmony_ci    ~Lexer();
91b1994897Sopenharmony_ci    NO_MOVE_SEMANTIC(Lexer);
92b1994897Sopenharmony_ci    NO_COPY_SEMANTIC(Lexer);
93b1994897Sopenharmony_ci
94b1994897Sopenharmony_ci    /*
95b1994897Sopenharmony_ci     * The main function of Tokenizing, which takes a string.
96b1994897Sopenharmony_ci     * Returns a vector of tokens.
97b1994897Sopenharmony_ci     */
98b1994897Sopenharmony_ci    Tokens TokenizeString(const std::string &);
99b1994897Sopenharmony_ci
100b1994897Sopenharmony_ciprivate:
101b1994897Sopenharmony_ci    std::vector<Line> lines_;
102b1994897Sopenharmony_ci    Line *curr_line_;
103b1994897Sopenharmony_ci    Error err_;
104b1994897Sopenharmony_ci
105b1994897Sopenharmony_ci    bool Eol() const; /* End of line */
106b1994897Sopenharmony_ci    bool LexString();
107b1994897Sopenharmony_ci    void LexTokens();
108b1994897Sopenharmony_ci    void LexPreprocess();
109b1994897Sopenharmony_ci    void SkipSpace();
110b1994897Sopenharmony_ci    void AnalyzeLine();
111b1994897Sopenharmony_ci    Token::Type LexGetType(size_t beg, size_t end) const;
112b1994897Sopenharmony_ci};
113b1994897Sopenharmony_ci
114b1994897Sopenharmony_ci/*
115b1994897Sopenharmony_ci * Returns a string representation of a token type.
116b1994897Sopenharmony_ci */
117b1994897Sopenharmony_cistd::string_view TokenTypeWhat(Token::Type t);
118b1994897Sopenharmony_ci
119b1994897Sopenharmony_ci}  // namespace panda::pandasm
120b1994897Sopenharmony_ci
121b1994897Sopenharmony_ci#endif  // ASSEMBLER_LEXER_H
122