1/* Copyright JS Foundation and other contributors, http://js.foundation
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#ifndef LIT_CHAR_HELPERS_H
17#define LIT_CHAR_HELPERS_H
18
19#include "lit-globals.h"
20
21/*
22 * Format control characters (ECMA-262 v5, Table 1)
23 */
24#define LIT_CHAR_ZWNJ ((ecma_char_t) 0x200C) /* zero width non-joiner */
25#define LIT_CHAR_ZWJ  ((ecma_char_t) 0x200D) /* zero width joiner */
26#define LIT_CHAR_BOM  ((ecma_char_t) 0xFEFF) /* byte order mark */
27
28/*
29 * Whitespace characters (ECMA-262 v5, Table 2)
30 */
31#define LIT_CHAR_TAB  ((ecma_char_t) 0x0009) /* tab */
32#define LIT_CHAR_VTAB ((ecma_char_t) 0x000B) /* vertical tab */
33#define LIT_CHAR_FF   ((ecma_char_t) 0x000C) /* form feed */
34#define LIT_CHAR_SP   ((ecma_char_t) 0x0020) /* space */
35#define LIT_CHAR_NBSP ((ecma_char_t) 0x00A0) /* no-break space */
36/* LIT_CHAR_BOM is defined above */
37
38bool lit_char_is_white_space (lit_code_point_t c);
39
40/*
41 * Line terminator characters (ECMA-262 v5, Table 3)
42 */
43#define LIT_CHAR_LF ((ecma_char_t) 0x000A) /* line feed */
44#define LIT_CHAR_CR ((ecma_char_t) 0x000D) /* carriage return */
45#define LIT_CHAR_LS ((ecma_char_t) 0x2028) /* line separator */
46#define LIT_CHAR_PS ((ecma_char_t) 0x2029) /* paragraph separator */
47
48bool lit_char_is_line_terminator (ecma_char_t c);
49
50/*
51 * String Single Character Escape Sequences (ECMA-262 v5, Table 4)
52 */
53#define LIT_CHAR_BS           ((ecma_char_t) 0x0008) /* backspace */
54/* LIT_CHAR_TAB is defined above */
55/* LIT_CHAR_LF is defined above */
56/* LIT_CHAR_VTAB is defined above */
57/* LIT_CHAR_FF is defined above */
58/* LIT_CHAR_CR is defined above */
59#define LIT_CHAR_DOUBLE_QUOTE ((ecma_char_t) '"') /* double quote */
60#define LIT_CHAR_SINGLE_QUOTE ((ecma_char_t) '\'') /* single quote */
61#define LIT_CHAR_BACKSLASH    ((ecma_char_t) '\\') /* reverse solidus (backslash) */
62
63/*
64 * Comment characters (ECMA-262 v5, 7.4)
65 */
66#define LIT_CHAR_SLASH    ((ecma_char_t) '/') /* solidus */
67#define LIT_CHAR_ASTERISK ((ecma_char_t) '*') /* asterisk */
68
69/*
70 * Identifier name characters (ECMA-262 v5, 7.6)
71 */
72#define LIT_CHAR_DOLLAR_SIGN ((ecma_char_t) '$')  /* dollar sign */
73#define LIT_CHAR_UNDERSCORE  ((ecma_char_t) '_')  /* low line (underscore) */
74/* LIT_CHAR_BACKSLASH defined above */
75
76bool lit_code_point_is_identifier_start (lit_code_point_t code_point);
77bool lit_code_point_is_identifier_part (lit_code_point_t code_point);
78
79/*
80 * Punctuator characters (ECMA-262 v5, 7.7)
81 */
82#define LIT_CHAR_LEFT_BRACE   ((ecma_char_t) '{') /* left curly bracket */
83#define LIT_CHAR_RIGHT_BRACE  ((ecma_char_t) '}') /* right curly bracket */
84#define LIT_CHAR_LEFT_PAREN   ((ecma_char_t) '(') /* left parenthesis */
85#define LIT_CHAR_RIGHT_PAREN  ((ecma_char_t) ')') /* right parenthesis */
86#define LIT_CHAR_LEFT_SQUARE  ((ecma_char_t) '[') /* left square bracket */
87#define LIT_CHAR_RIGHT_SQUARE ((ecma_char_t) ']') /* right square bracket */
88#define LIT_CHAR_DOT          ((ecma_char_t) '.') /* dot */
89#define LIT_CHAR_SEMICOLON    ((ecma_char_t) ';') /* semicolon */
90#define LIT_CHAR_COMMA        ((ecma_char_t) ',') /* comma */
91#define LIT_CHAR_LESS_THAN    ((ecma_char_t) '<') /* less-than sign */
92#define LIT_CHAR_GREATER_THAN ((ecma_char_t) '>') /* greater-than sign */
93#define LIT_CHAR_EQUALS       ((ecma_char_t) '=') /* equals sign */
94#define LIT_CHAR_PLUS         ((ecma_char_t) '+') /* plus sign */
95#define LIT_CHAR_MINUS        ((ecma_char_t) '-') /* hyphen-minus */
96/* LIT_CHAR_ASTERISK is defined above */
97#define LIT_CHAR_PERCENT      ((ecma_char_t) '%') /* percent sign */
98#define LIT_CHAR_AMPERSAND    ((ecma_char_t) '&') /* ampersand */
99#define LIT_CHAR_VLINE        ((ecma_char_t) '|') /* vertical line */
100#define LIT_CHAR_CIRCUMFLEX   ((ecma_char_t) '^') /* circumflex accent */
101#define LIT_CHAR_EXCLAMATION  ((ecma_char_t) '!') /* exclamation mark */
102#define LIT_CHAR_TILDE        ((ecma_char_t) '~') /* tilde */
103#define LIT_CHAR_QUESTION     ((ecma_char_t) '?') /* question mark */
104#define LIT_CHAR_COLON        ((ecma_char_t) ':') /* colon */
105
106/*
107 * Special characters for String.prototype.replace.
108 */
109#define LIT_CHAR_GRAVE_ACCENT ((ecma_char_t) '`') /* grave accent */
110
111/**
112 * Uppercase ASCII letters
113 */
114#define LIT_CHAR_UPPERCASE_A ((ecma_char_t) 'A')
115#define LIT_CHAR_UPPERCASE_B ((ecma_char_t) 'B')
116#define LIT_CHAR_UPPERCASE_C ((ecma_char_t) 'C')
117#define LIT_CHAR_UPPERCASE_D ((ecma_char_t) 'D')
118#define LIT_CHAR_UPPERCASE_E ((ecma_char_t) 'E')
119#define LIT_CHAR_UPPERCASE_F ((ecma_char_t) 'F')
120#define LIT_CHAR_UPPERCASE_G ((ecma_char_t) 'G')
121#define LIT_CHAR_UPPERCASE_H ((ecma_char_t) 'H')
122#define LIT_CHAR_UPPERCASE_I ((ecma_char_t) 'I')
123#define LIT_CHAR_UPPERCASE_J ((ecma_char_t) 'J')
124#define LIT_CHAR_UPPERCASE_K ((ecma_char_t) 'K')
125#define LIT_CHAR_UPPERCASE_L ((ecma_char_t) 'L')
126#define LIT_CHAR_UPPERCASE_M ((ecma_char_t) 'M')
127#define LIT_CHAR_UPPERCASE_N ((ecma_char_t) 'N')
128#define LIT_CHAR_UPPERCASE_O ((ecma_char_t) 'O')
129#define LIT_CHAR_UPPERCASE_P ((ecma_char_t) 'P')
130#define LIT_CHAR_UPPERCASE_Q ((ecma_char_t) 'Q')
131#define LIT_CHAR_UPPERCASE_R ((ecma_char_t) 'R')
132#define LIT_CHAR_UPPERCASE_S ((ecma_char_t) 'S')
133#define LIT_CHAR_UPPERCASE_T ((ecma_char_t) 'T')
134#define LIT_CHAR_UPPERCASE_U ((ecma_char_t) 'U')
135#define LIT_CHAR_UPPERCASE_V ((ecma_char_t) 'V')
136#define LIT_CHAR_UPPERCASE_W ((ecma_char_t) 'W')
137#define LIT_CHAR_UPPERCASE_X ((ecma_char_t) 'X')
138#define LIT_CHAR_UPPERCASE_Y ((ecma_char_t) 'Y')
139#define LIT_CHAR_UPPERCASE_Z ((ecma_char_t) 'Z')
140
141/**
142 * Lowercase ASCII letters
143 */
144#define LIT_CHAR_LOWERCASE_A ((ecma_char_t) 'a')
145#define LIT_CHAR_LOWERCASE_B ((ecma_char_t) 'b')
146#define LIT_CHAR_LOWERCASE_C ((ecma_char_t) 'c')
147#define LIT_CHAR_LOWERCASE_D ((ecma_char_t) 'd')
148#define LIT_CHAR_LOWERCASE_E ((ecma_char_t) 'e')
149#define LIT_CHAR_LOWERCASE_F ((ecma_char_t) 'f')
150#define LIT_CHAR_LOWERCASE_G ((ecma_char_t) 'g')
151#define LIT_CHAR_LOWERCASE_H ((ecma_char_t) 'h')
152#define LIT_CHAR_LOWERCASE_I ((ecma_char_t) 'i')
153#define LIT_CHAR_LOWERCASE_J ((ecma_char_t) 'j')
154#define LIT_CHAR_LOWERCASE_K ((ecma_char_t) 'k')
155#define LIT_CHAR_LOWERCASE_L ((ecma_char_t) 'l')
156#define LIT_CHAR_LOWERCASE_M ((ecma_char_t) 'm')
157#define LIT_CHAR_LOWERCASE_N ((ecma_char_t) 'n')
158#define LIT_CHAR_LOWERCASE_O ((ecma_char_t) 'o')
159#define LIT_CHAR_LOWERCASE_P ((ecma_char_t) 'p')
160#define LIT_CHAR_LOWERCASE_Q ((ecma_char_t) 'q')
161#define LIT_CHAR_LOWERCASE_R ((ecma_char_t) 'r')
162#define LIT_CHAR_LOWERCASE_S ((ecma_char_t) 's')
163#define LIT_CHAR_LOWERCASE_T ((ecma_char_t) 't')
164#define LIT_CHAR_LOWERCASE_U ((ecma_char_t) 'u')
165#define LIT_CHAR_LOWERCASE_V ((ecma_char_t) 'v')
166#define LIT_CHAR_LOWERCASE_W ((ecma_char_t) 'w')
167#define LIT_CHAR_LOWERCASE_X ((ecma_char_t) 'x')
168#define LIT_CHAR_LOWERCASE_Y ((ecma_char_t) 'y')
169#define LIT_CHAR_LOWERCASE_Z ((ecma_char_t) 'z')
170
171/**
172 * ASCII decimal digits
173 */
174#define LIT_CHAR_0    ((ecma_char_t) '0')
175#define LIT_CHAR_1    ((ecma_char_t) '1')
176#define LIT_CHAR_2    ((ecma_char_t) '2')
177#define LIT_CHAR_3    ((ecma_char_t) '3')
178#define LIT_CHAR_4    ((ecma_char_t) '4')
179#define LIT_CHAR_5    ((ecma_char_t) '5')
180#define LIT_CHAR_6    ((ecma_char_t) '6')
181#define LIT_CHAR_7    ((ecma_char_t) '7')
182#define LIT_CHAR_8    ((ecma_char_t) '8')
183#define LIT_CHAR_9    ((ecma_char_t) '9')
184
185/**
186 * ASCII character ranges
187 */
188#define LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN      LIT_CHAR_UPPERCASE_A /* uppercase letters range */
189#define LIT_CHAR_ASCII_UPPERCASE_LETTERS_END        LIT_CHAR_UPPERCASE_Z
190
191#define LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN      LIT_CHAR_LOWERCASE_A /* lowercase letters range */
192#define LIT_CHAR_ASCII_LOWERCASE_LETTERS_END        LIT_CHAR_LOWERCASE_Z
193
194#define LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN  LIT_CHAR_UPPERCASE_A /* uppercase letters for
195                                                                          * hexadecimal digits range */
196#define LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END    LIT_CHAR_UPPERCASE_F
197
198#define LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN  LIT_CHAR_LOWERCASE_A /* lowercase letters for
199                                                                          * hexadecimal digits range */
200#define LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END    LIT_CHAR_LOWERCASE_F
201
202#define LIT_CHAR_ASCII_OCTAL_DIGITS_BEGIN           LIT_CHAR_0           /* octal digits range */
203#define LIT_CHAR_ASCII_OCTAL_DIGITS_END             LIT_CHAR_7
204
205#define LIT_CHAR_ASCII_DIGITS_BEGIN                 LIT_CHAR_0           /* decimal digits range */
206#define LIT_CHAR_ASCII_DIGITS_END                   LIT_CHAR_9
207
208#define LEXER_TO_ASCII_LOWERCASE(character) ((character) | LIT_CHAR_SP)
209
210bool lit_char_is_octal_digit (ecma_char_t c);
211bool lit_char_is_decimal_digit (ecma_char_t c);
212bool lit_char_is_hex_digit (ecma_char_t c);
213#if ENABLED (JERRY_ES2015)
214bool lit_char_is_binary_digit (ecma_char_t c);
215#endif /* ENABLED (JERRY_ES2015) */
216uint32_t lit_char_hex_to_int (ecma_char_t c);
217size_t lit_code_point_to_cesu8_bytes (uint8_t *dst_p, lit_code_point_t code_point);
218size_t lit_code_point_get_cesu8_length (lit_code_point_t code_point);
219void lit_four_byte_utf8_char_to_cesu8 (uint8_t *dst_p, const uint8_t *source_p);
220uint32_t lit_char_hex_lookup (const lit_utf8_byte_t *buf_p, const lit_utf8_byte_t *const buf_end_p, uint32_t lookup);
221uint32_t lit_parse_decimal (const lit_utf8_byte_t **buffer_p, const lit_utf8_byte_t *const buffer_end_p);
222
223/**
224 * Null character
225 */
226#define LIT_CHAR_NULL  ((ecma_char_t) '\0')
227
228/*
229 * Part of IsWordChar abstract operation (ECMA-262 v5, 15.10.2.6, step 3)
230 */
231bool lit_char_is_word_char (lit_code_point_t c);
232
233/*
234 * Utility functions for uppercasing / lowercasing
235 */
236
237/**
238 * Minimum buffer size for lit_char_to_lower_case / lit_char_to_upper_case functions.
239 */
240#define LIT_MAXIMUM_OTHER_CASE_LENGTH (3)
241
242ecma_length_t lit_char_to_lower_case (ecma_char_t character, ecma_char_t *output_buffer_p, ecma_length_t buffer_size);
243ecma_length_t lit_char_to_upper_case (ecma_char_t character, ecma_char_t *output_buffer_p, ecma_length_t buffer_size);
244
245#endif /* !LIT_CHAR_HELPERS_H */
246