1/* Copyright JS Foundation and other contributors, http://js.foundation
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#ifndef JS_LEXER_H
17#define JS_LEXER_H
18
19/** \addtogroup parser Parser
20 * @{
21 *
22 * \addtogroup jsparser JavaScript
23 * @{
24 *
25 * \addtogroup jsparser_lexer Lexer
26 * @{
27 */
28
29/**
30 * Lexer token types.
31 */
32typedef enum
33{
34  LEXER_EOS,                     /**< end of source */
35
36  /* Primary expressions */
37  LEXER_LITERAL,                 /**< literal token */
38  LEXER_KEYW_THIS,               /**< this */
39  LEXER_LIT_TRUE,                /**< true (not a keyword!) */
40  LEXER_LIT_FALSE,               /**< false (not a keyword!) */
41  LEXER_LIT_NULL,                /**< null (not a keyword!) */
42#if ENABLED (JERRY_ES2015)
43  LEXER_TEMPLATE_LITERAL,        /**< multi segment template literal */
44  LEXER_THREE_DOTS,              /**< ... (rest or spread operator) */
45#endif /* ENABLED (JERRY_ES2015) */
46
47  /* Unary operators
48   * IMPORTANT: update CBC_UNARY_OP_TOKEN_TO_OPCODE and
49   *            CBC_UNARY_LVALUE_OP_TOKEN_TO_OPCODE after changes. */
50#define LEXER_IS_UNARY_OP_TOKEN(token_type) \
51  ((token_type) >= LEXER_PLUS && (token_type) <= LEXER_DECREASE)
52#define LEXER_IS_UNARY_LVALUE_OP_TOKEN(token_type) \
53  ((token_type) >= LEXER_KEYW_DELETE && (token_type) <= LEXER_DECREASE)
54
55  LEXER_PLUS,                    /**< "+" */
56  LEXER_NEGATE,                  /**< "-" */
57  LEXER_LOGICAL_NOT,             /**< "!" */
58  LEXER_BIT_NOT,                 /**< "~" */
59  LEXER_KEYW_VOID,               /**< void */
60  LEXER_KEYW_TYPEOF,             /**< typeof */
61#if ENABLED (JERRY_ES2015)
62  LEXER_KEYW_AWAIT,              /**< await */
63#endif /* ENABLED (JERRY_ES2015) */
64  LEXER_KEYW_DELETE,             /**< delete */
65  LEXER_INCREASE,                /**< "++" */
66  LEXER_DECREASE,                /**< "--" */
67
68  /* Binary operators
69   * IMPORTANT: update CBC_BINARY_OP_TOKEN_TO_OPCODE,
70   *            CBC_BINARY_LVALUE_OP_TOKEN_TO_OPCODE and
71   *            parser_binary_precedence_table after changes. */
72#if ENABLED (JERRY_ES2015)
73#define LEXER_IS_BINARY_OP_TOKEN(token_type) \
74  ((token_type) >= LEXER_ASSIGN && (token_type) <= LEXER_EXPONENTIATION)
75#else /* !ENABLED (JERRY_ES2015) */
76#define LEXER_IS_BINARY_OP_TOKEN(token_type) \
77  ((token_type) >= LEXER_ASSIGN && (token_type) <= LEXER_MODULO)
78#endif /* ENABLED (JERRY_ES2015) */
79
80#define LEXER_IS_BINARY_LVALUE_TOKEN(token_type) \
81  ((token_type) >= LEXER_ASSIGN && (token_type) <= LEXER_ASSIGN_BIT_XOR)
82
83#define LEXER_FIRST_BINARY_OP LEXER_ASSIGN
84
85  LEXER_ASSIGN,                  /**< "=" (prec: 3) */
86  LEXER_ASSIGN_ADD,              /**< "+=" (prec: 3) */
87  LEXER_ASSIGN_SUBTRACT,         /**< "-=" (prec: 3) */
88  LEXER_ASSIGN_MULTIPLY,         /**< "*=" (prec: 3) */
89  LEXER_ASSIGN_DIVIDE,           /**< "/=" (prec: 3) */
90  LEXER_ASSIGN_MODULO,           /**< "%=" (prec: 3) */
91#if ENABLED (JERRY_ES2015)
92  LEXER_ASSIGN_EXPONENTIATION,   /**< "**=" (prec: 3) */
93#endif /* ENABLED (JERRY_ES2015) */
94  LEXER_ASSIGN_LEFT_SHIFT,       /**< "<<=" (prec: 3) */
95  LEXER_ASSIGN_RIGHT_SHIFT,      /**< ">>=" (prec: 3) */
96  LEXER_ASSIGN_UNS_RIGHT_SHIFT,  /**< ">>>=" (prec: 3) */
97  LEXER_ASSIGN_BIT_AND,          /**< "&=" (prec: 3) */
98  LEXER_ASSIGN_BIT_OR,           /**< "|=" (prec: 3) */
99  LEXER_ASSIGN_BIT_XOR,          /**< "^=" (prec: 3) */
100  LEXER_QUESTION_MARK,           /**< "?" (prec: 4) */
101  LEXER_LOGICAL_OR,              /**< "||" (prec: 5) */
102  LEXER_LOGICAL_AND,             /**< "&&" (prec: 6) */
103  LEXER_BIT_OR,                  /**< "|" (prec: 7) */
104  LEXER_BIT_XOR,                 /**< "^" (prec: 8) */
105  LEXER_BIT_AND,                 /**< "&" (prec: 9) */
106  LEXER_EQUAL,                   /**< "==" (prec: 10) */
107  LEXER_NOT_EQUAL,               /**< "!=" (prec: 10) */
108  LEXER_STRICT_EQUAL,            /**< "===" (prec: 10) */
109  LEXER_STRICT_NOT_EQUAL,        /**< "!==" (prec: 10) */
110  LEXER_LESS,                    /**< "<" (prec: 11) */
111  LEXER_GREATER,                 /**< ">" (prec: 11) */
112  LEXER_LESS_EQUAL,              /**< "<=" (prec: 11) */
113  LEXER_GREATER_EQUAL,           /**< ">=" (prec: 11) */
114  LEXER_KEYW_IN,                 /**< in (prec: 11) */
115  LEXER_KEYW_INSTANCEOF,         /**< instanceof (prec: 11) */
116  LEXER_LEFT_SHIFT,              /**< "<<" (prec: 12) */
117  LEXER_RIGHT_SHIFT,             /**< ">>" (prec: 12) */
118  LEXER_UNS_RIGHT_SHIFT,         /**< ">>>" (prec: 12) */
119  LEXER_ADD,                     /**< "+" (prec: 13) */
120  LEXER_SUBTRACT,                /**< "-" (prec: 13) */
121  LEXER_MULTIPLY,                /**< "*" (prec: 14) */
122  LEXER_DIVIDE,                  /**< "/" (prec: 14) */
123  LEXER_MODULO,                  /**< "%" (prec: 14) */
124#if ENABLED (JERRY_ES2015)
125  LEXER_EXPONENTIATION,          /**< "**" (prec: 15) */
126#endif /* ENABLED (JERRY_ES2015) */
127
128  LEXER_LEFT_BRACE,              /**< "{" */
129  LEXER_LEFT_PAREN,              /**< "(" */
130  LEXER_LEFT_SQUARE,             /**< "[" */
131  LEXER_RIGHT_BRACE,             /**< "}" */
132  LEXER_RIGHT_PAREN,             /**< ")" */
133  LEXER_RIGHT_SQUARE,            /**< "]" */
134  LEXER_DOT,                     /**< "." */
135  LEXER_SEMICOLON,               /**< ";" */
136  LEXER_COLON,                   /**< ":" */
137  LEXER_COMMA,                   /**< "," */
138#if ENABLED (JERRY_ES2015)
139  LEXER_ARROW,                   /**< "=>" */
140#endif /* ENABLED (JERRY_ES2015) */
141
142  LEXER_KEYW_BREAK,              /**< break */
143  LEXER_KEYW_DO,                 /**< do */
144  LEXER_KEYW_CASE,               /**< case  */
145  LEXER_KEYW_ELSE,               /**< else */
146  LEXER_KEYW_NEW,                /**< new */
147  LEXER_KEYW_VAR,                /**< var */
148  LEXER_KEYW_CATCH,              /**< catch */
149  LEXER_KEYW_FINALLY,            /**< finally */
150  LEXER_KEYW_RETURN,             /**< return */
151  LEXER_KEYW_CONTINUE,           /**< continue */
152  LEXER_KEYW_FOR,                /**< for */
153  LEXER_KEYW_SWITCH,             /**< switch */
154  LEXER_KEYW_WHILE,              /**< while */
155  LEXER_KEYW_DEBUGGER,           /**< debugger */
156  LEXER_KEYW_FUNCTION,           /**< function */
157  LEXER_KEYW_WITH,               /**< with */
158  LEXER_KEYW_DEFAULT,            /**< default */
159  LEXER_KEYW_IF,                 /**< if */
160  LEXER_KEYW_THROW,              /**< throw */
161  LEXER_KEYW_TRY,                /**< try */
162
163  LEXER_KEYW_CLASS,              /**< class */
164  LEXER_KEYW_EXTENDS,            /**< extends */
165  LEXER_KEYW_SUPER,              /**< super */
166  LEXER_KEYW_CONST,              /**< const */
167  LEXER_KEYW_EXPORT,             /**< export */
168  LEXER_KEYW_IMPORT,             /**< import */
169  LEXER_KEYW_ENUM,               /**< enum */
170
171  /* These are virtual tokens. */
172  LEXER_EXPRESSION_START,        /**< expression start */
173  LEXER_PROPERTY_GETTER,         /**< property getter function */
174  LEXER_PROPERTY_SETTER,         /**< property setter function */
175  LEXER_COMMA_SEP_LIST,          /**< comma separated bracketed expression list */
176#if ENABLED (JERRY_ES2015)
177  LEXER_ASSIGN_GROUP_EXPR,       /**< indetifier for the assignment is located in a group expression */
178  LEXER_ASSIGN_CONST,            /**< a const binding is reassigned */
179  LEXER_CLASS_CONSTRUCTOR,       /**< special value for class constructor method */
180  LEXER_INVALID_PATTERN,         /**< special value for invalid destructuring pattern */
181#endif /* ENABLED (JERRY_ES2015) */
182
183#if ENABLED (JERRY_ES2015)
184  /* Keywords which are not keyword tokens. */
185#define LEXER_FIRST_NON_RESERVED_KEYWORD LEXER_KEYW_ASYNC
186  LEXER_KEYW_ASYNC,              /**< async */
187#else /* !ENABLED (JERRY_ES2015) */
188  /* Keywords which are not keyword tokens. */
189#define LEXER_FIRST_NON_RESERVED_KEYWORD LEXER_KEYW_EVAL
190#endif /* ENABLED (JERRY_ES2015) */
191
192  /* Keywords which cannot be assigned in strict mode. */
193#define LEXER_FIRST_NON_STRICT_ARGUMENTS LEXER_KEYW_EVAL
194  LEXER_KEYW_EVAL,               /**< eval */
195  LEXER_KEYW_ARGUMENTS,          /**< arguments */
196
197  /* Future strict reserved words: these keywords
198   * must form a group after non-reserved keywords. */
199#define LEXER_FIRST_FUTURE_STRICT_RESERVED_WORD LEXER_KEYW_IMPLEMENTS
200  LEXER_KEYW_IMPLEMENTS,         /**< implements */
201  LEXER_KEYW_PRIVATE,            /**< private */
202  LEXER_KEYW_PUBLIC,             /**< public */
203  LEXER_KEYW_INTERFACE,          /**< interface */
204  LEXER_KEYW_PACKAGE,            /**< package */
205  LEXER_KEYW_PROTECTED,          /**< protected */
206
207  /* Context dependent future strict reserved words:
208   * See also: ECMA-262 v6, 11.6.2.1 */
209  LEXER_KEYW_LET,                /**< let */
210  LEXER_KEYW_YIELD,              /**< yield */
211  LEXER_KEYW_STATIC,             /**< static */
212} lexer_token_type_t;
213
214#define LEXER_NEWLINE_LS_PS_BYTE_1 0xe2
215#define LEXER_NEWLINE_LS_PS_BYTE_23(source) \
216  ((source)[1] == LIT_UTF8_2_BYTE_CODE_POINT_MIN && ((source)[2] | 0x1) == 0xa9)
217
218#define LEXER_IS_LEFT_BRACKET(type) \
219  ((type) == LEXER_LEFT_BRACE || (type) == LEXER_LEFT_PAREN || (type) == LEXER_LEFT_SQUARE)
220
221#define LEXER_IS_RIGHT_BRACKET(type) \
222  ((type) == LEXER_RIGHT_BRACE || (type) == LEXER_RIGHT_PAREN || (type) == LEXER_RIGHT_SQUARE)
223
224#define LEXER_UNARY_OP_TOKEN_TO_OPCODE(token_type) \
225   ((((token_type) - LEXER_PLUS) * 2) + CBC_PLUS)
226
227#define LEXER_UNARY_LVALUE_OP_TOKEN_TO_OPCODE(token_type) \
228   ((((token_type) - LEXER_INCREASE) * 6) + CBC_PRE_INCR)
229
230#define LEXER_BINARY_OP_TOKEN_TO_OPCODE(token_type) \
231   ((cbc_opcode_t) ((((token_type) - LEXER_BIT_OR) * 3) + CBC_BIT_OR))
232
233#define LEXER_BINARY_LVALUE_OP_TOKEN_TO_OPCODE(token_type) \
234   ((cbc_opcode_t) ((((token_type) - LEXER_ASSIGN_ADD) * 2) + CBC_ASSIGN_ADD))
235
236/**
237 * Maximum local buffer size for identifiers which contains escape sequences.
238 */
239#define LEXER_MAX_LITERAL_LOCAL_BUFFER_SIZE 48
240
241/**
242 * Lexer newline flags.
243 */
244typedef enum
245{
246  LEXER_WAS_NEWLINE = (1u << 0),             /**< newline was seen */
247  LEXER_NO_SKIP_SPACES = (1u << 1)           /**< ignore skip spaces */
248} lexer_newline_flags_t;
249
250/**
251 * Lexer object identifier parse options.
252 */
253typedef enum
254{
255  LEXER_OBJ_IDENT_NO_OPTS = (1u << 0),          /**< no options */
256  LEXER_OBJ_IDENT_ONLY_IDENTIFIERS = (1u << 1), /**< only identifiers are accepted */
257  LEXER_OBJ_IDENT_CLASS_METHOD = (1u << 2),     /**< expect identifier inside a class body */
258  LEXER_OBJ_IDENT_OBJECT_PATTERN = (1u << 3),   /**< parse "get"/"set" as string literal in object pattern */
259} lexer_obj_ident_opts_t;
260
261/**
262 * Lexer string options.
263 */
264typedef enum
265{
266  LEXER_STRING_NO_OPTS = (1u << 0),       /**< no options */
267  LEXER_STRING_RAW = (1u << 1),           /**< raw string ECMAScript v6, 11.8.6.1: TVR */
268} lexer_string_options_t;
269
270/**
271 * Lexer number types.
272 */
273typedef enum
274{
275  LEXER_NUMBER_DECIMAL,                     /**< decimal number */
276  LEXER_NUMBER_HEXADECIMAL,                 /**< hexadecimal number */
277  LEXER_NUMBER_OCTAL,                       /**< octal number */
278  LEXER_NUMBER_BINARY,                      /**< binary number */
279} lexer_number_type_t;
280
281/**
282 * Lexer character (string / identifier) literal data.
283 */
284typedef struct
285{
286  const uint8_t *char_p;                     /**< start of identifier or string token */
287  prop_length_t length;                      /**< length or index of a literal */
288  uint8_t type;                              /**< type of the current literal */
289  uint8_t has_escape;                        /**< has escape sequences */
290} lexer_lit_location_t;
291
292/**
293 * Lexer token.
294 */
295typedef struct
296{
297  uint8_t type;                              /**< token type */
298  uint8_t keyword_type;                      /**< keyword type for identifiers */
299  uint8_t extra_value;                       /**< helper value for different purposes */
300  uint8_t flags;                             /**< flag bits for the current token */
301  parser_line_counter_t line;                /**< token start line */
302  parser_line_counter_t column;              /**< token start column */
303  lexer_lit_location_t lit_location;         /**< extra data for character literals */
304} lexer_token_t;
305
306/**
307 * Literal data set by lexer_construct_literal_object.
308 */
309typedef struct
310{
311  lexer_literal_t *literal_p;                /**< pointer to the literal object */
312  uint16_t index;                            /**< literal index */
313} lexer_lit_object_t;
314
315/**
316 * @}
317 * @}
318 * @}
319 */
320
321#endif /* !JS_LEXER_H */
322