1425bb815Sopenharmony_ci/* Copyright JS Foundation and other contributors, http://js.foundation
2425bb815Sopenharmony_ci *
3425bb815Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License");
4425bb815Sopenharmony_ci * you may not use this file except in compliance with the License.
5425bb815Sopenharmony_ci * You may obtain a copy of the License at
6425bb815Sopenharmony_ci *
7425bb815Sopenharmony_ci *     http://www.apache.org/licenses/LICENSE-2.0
8425bb815Sopenharmony_ci *
9425bb815Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software
10425bb815Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS
11425bb815Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12425bb815Sopenharmony_ci * See the License for the specific language governing permissions and
13425bb815Sopenharmony_ci * limitations under the License.
14425bb815Sopenharmony_ci */
15425bb815Sopenharmony_ci
16425bb815Sopenharmony_ci#include "config.h"
17425bb815Sopenharmony_ci#include "lit-char-helpers.h"
18425bb815Sopenharmony_ci#include "lit-unicode-ranges.inc.h"
19425bb815Sopenharmony_ci#include "lit-strings.h"
20425bb815Sopenharmony_ci
21425bb815Sopenharmony_ci#if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
22425bb815Sopenharmony_ci#include "lit-unicode-conversions.inc.h"
23425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
24425bb815Sopenharmony_ci
25425bb815Sopenharmony_ci#define NUM_OF_ELEMENTS(array) (sizeof (array) / sizeof ((array)[0]))
26425bb815Sopenharmony_ci
27425bb815Sopenharmony_ci/**
28425bb815Sopenharmony_ci * Binary search algorithm that searches the a
29425bb815Sopenharmony_ci * character in the given char array.
30425bb815Sopenharmony_ci *
31425bb815Sopenharmony_ci * @return true - if the character is in the given array
32425bb815Sopenharmony_ci *         false - otherwise
33425bb815Sopenharmony_ci */
34425bb815Sopenharmony_cistatic bool
35425bb815Sopenharmony_cisearch_char_in_char_array (ecma_char_t c,               /**< code unit */
36425bb815Sopenharmony_ci                           const ecma_char_t *array,    /**< array */
37425bb815Sopenharmony_ci                           int size_of_array)           /**< length of the array */
38425bb815Sopenharmony_ci{
39425bb815Sopenharmony_ci  int bottom = 0;
40425bb815Sopenharmony_ci  int top = size_of_array - 1;
41425bb815Sopenharmony_ci
42425bb815Sopenharmony_ci  while (bottom <= top)
43425bb815Sopenharmony_ci  {
44425bb815Sopenharmony_ci    int middle = (bottom + top) / 2;
45425bb815Sopenharmony_ci    ecma_char_t current = array[middle];
46425bb815Sopenharmony_ci
47425bb815Sopenharmony_ci    if (current == c)
48425bb815Sopenharmony_ci    {
49425bb815Sopenharmony_ci      return true;
50425bb815Sopenharmony_ci    }
51425bb815Sopenharmony_ci
52425bb815Sopenharmony_ci    if (c < current)
53425bb815Sopenharmony_ci    {
54425bb815Sopenharmony_ci      top = middle - 1;
55425bb815Sopenharmony_ci    }
56425bb815Sopenharmony_ci    else
57425bb815Sopenharmony_ci    {
58425bb815Sopenharmony_ci      bottom = middle + 1;
59425bb815Sopenharmony_ci    }
60425bb815Sopenharmony_ci  }
61425bb815Sopenharmony_ci
62425bb815Sopenharmony_ci  return false;
63425bb815Sopenharmony_ci} /* search_char_in_char_array */
64425bb815Sopenharmony_ci
65425bb815Sopenharmony_ci/**
66425bb815Sopenharmony_ci * Binary search algorithm that searches a character in the given intervals.
67425bb815Sopenharmony_ci * Intervals specifed by two arrays. The first one contains the starting points
68425bb815Sopenharmony_ci * of the intervals, the second one contains the length of them.
69425bb815Sopenharmony_ci *
70425bb815Sopenharmony_ci * @return true - if the the character is included (inclusively) in one of the intervals in the given array
71425bb815Sopenharmony_ci *         false - otherwise
72425bb815Sopenharmony_ci */
73425bb815Sopenharmony_cistatic bool
74425bb815Sopenharmony_cisearch_char_in_interval_array (ecma_char_t c,               /**< code unit */
75425bb815Sopenharmony_ci                               const ecma_char_t *array_sp, /**< array of interval starting points */
76425bb815Sopenharmony_ci                               const uint8_t *lengths,      /**< array of interval lengths */
77425bb815Sopenharmony_ci                               int size_of_array)           /**< length of the array */
78425bb815Sopenharmony_ci{
79425bb815Sopenharmony_ci  int bottom = 0;
80425bb815Sopenharmony_ci  int top = size_of_array - 1;
81425bb815Sopenharmony_ci
82425bb815Sopenharmony_ci  while (bottom <= top)
83425bb815Sopenharmony_ci  {
84425bb815Sopenharmony_ci    int middle = (bottom + top) / 2;
85425bb815Sopenharmony_ci    ecma_char_t current_sp = array_sp[middle];
86425bb815Sopenharmony_ci
87425bb815Sopenharmony_ci    if (current_sp <= c && c <= current_sp + lengths[middle])
88425bb815Sopenharmony_ci    {
89425bb815Sopenharmony_ci      return true;
90425bb815Sopenharmony_ci    }
91425bb815Sopenharmony_ci
92425bb815Sopenharmony_ci    if (c > current_sp)
93425bb815Sopenharmony_ci    {
94425bb815Sopenharmony_ci      bottom = middle + 1;
95425bb815Sopenharmony_ci    }
96425bb815Sopenharmony_ci    else
97425bb815Sopenharmony_ci    {
98425bb815Sopenharmony_ci      top = middle - 1;
99425bb815Sopenharmony_ci    }
100425bb815Sopenharmony_ci  }
101425bb815Sopenharmony_ci
102425bb815Sopenharmony_ci  return false;
103425bb815Sopenharmony_ci} /* search_char_in_interval_array */
104425bb815Sopenharmony_ci
105425bb815Sopenharmony_ci/**
106425bb815Sopenharmony_ci * Check if specified character is one of the Whitespace characters including those that fall into
107425bb815Sopenharmony_ci * "Space, Separator" ("Zs") Unicode character category or one of the Line Terminator characters.
108425bb815Sopenharmony_ci *
109425bb815Sopenharmony_ci * @return true - if the character is one of characters, listed in ECMA-262 v5, Table 2,
110425bb815Sopenharmony_ci *         false - otherwise
111425bb815Sopenharmony_ci */
112425bb815Sopenharmony_cibool
113425bb815Sopenharmony_cilit_char_is_white_space (lit_code_point_t c) /**< code point */
114425bb815Sopenharmony_ci{
115425bb815Sopenharmony_ci  if (c <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
116425bb815Sopenharmony_ci  {
117425bb815Sopenharmony_ci    return (c == LIT_CHAR_SP || (c >= LIT_CHAR_TAB && c <= LIT_CHAR_CR));
118425bb815Sopenharmony_ci  }
119425bb815Sopenharmony_ci  else
120425bb815Sopenharmony_ci  {
121425bb815Sopenharmony_ci    if (c == LIT_CHAR_NBSP || c == LIT_CHAR_BOM || c == LIT_CHAR_LS || c == LIT_CHAR_PS)
122425bb815Sopenharmony_ci    {
123425bb815Sopenharmony_ci      return true;
124425bb815Sopenharmony_ci    }
125425bb815Sopenharmony_ci
126425bb815Sopenharmony_ci    return (c <= LIT_UTF16_CODE_UNIT_MAX
127425bb815Sopenharmony_ci            && ((c >= lit_unicode_separator_char_interval_sps[0]
128425bb815Sopenharmony_ci                 && c < lit_unicode_separator_char_interval_sps[0] + lit_unicode_separator_char_interval_lengths[0])
129425bb815Sopenharmony_ci                || search_char_in_char_array ((ecma_char_t) c,
130425bb815Sopenharmony_ci                                              lit_unicode_separator_chars,
131425bb815Sopenharmony_ci                                              NUM_OF_ELEMENTS (lit_unicode_separator_chars))));
132425bb815Sopenharmony_ci  }
133425bb815Sopenharmony_ci} /* lit_char_is_white_space */
134425bb815Sopenharmony_ci
135425bb815Sopenharmony_ci/**
136425bb815Sopenharmony_ci * Check if specified character is one of LineTerminator characters
137425bb815Sopenharmony_ci *
138425bb815Sopenharmony_ci * @return true - if the character is one of characters, listed in ECMA-262 v5, Table 3,
139425bb815Sopenharmony_ci *         false - otherwise
140425bb815Sopenharmony_ci */
141425bb815Sopenharmony_cibool
142425bb815Sopenharmony_cilit_char_is_line_terminator (ecma_char_t c) /**< code unit */
143425bb815Sopenharmony_ci{
144425bb815Sopenharmony_ci  return (c == LIT_CHAR_LF
145425bb815Sopenharmony_ci          || c == LIT_CHAR_CR
146425bb815Sopenharmony_ci          || c == LIT_CHAR_LS
147425bb815Sopenharmony_ci          || c == LIT_CHAR_PS);
148425bb815Sopenharmony_ci} /* lit_char_is_line_terminator */
149425bb815Sopenharmony_ci
150425bb815Sopenharmony_ci/**
151425bb815Sopenharmony_ci * Check if specified character is a unicode letter
152425bb815Sopenharmony_ci *
153425bb815Sopenharmony_ci * Note:
154425bb815Sopenharmony_ci *      Unicode letter is a character, included into one of the following categories:
155425bb815Sopenharmony_ci *       - Uppercase letter (Lu);
156425bb815Sopenharmony_ci *       - Lowercase letter (Ll);
157425bb815Sopenharmony_ci *       - Titlecase letter (Lt);
158425bb815Sopenharmony_ci *       - Modifier letter (Lm);
159425bb815Sopenharmony_ci *       - Other letter (Lo);
160425bb815Sopenharmony_ci *       - Letter number (Nl).
161425bb815Sopenharmony_ci *
162425bb815Sopenharmony_ci * See also:
163425bb815Sopenharmony_ci *          ECMA-262 v5, 7.6
164425bb815Sopenharmony_ci *
165425bb815Sopenharmony_ci * @return true - if specified character falls into one of the listed categories,
166425bb815Sopenharmony_ci *         false - otherwise
167425bb815Sopenharmony_ci */
168425bb815Sopenharmony_cistatic bool
169425bb815Sopenharmony_cilit_char_is_unicode_letter (ecma_char_t c) /**< code unit */
170425bb815Sopenharmony_ci{
171425bb815Sopenharmony_ci  return (search_char_in_interval_array (c,
172425bb815Sopenharmony_ci                                         lit_unicode_letter_interval_sps,
173425bb815Sopenharmony_ci                                         lit_unicode_letter_interval_lengths,
174425bb815Sopenharmony_ci                                         NUM_OF_ELEMENTS (lit_unicode_letter_interval_sps))
175425bb815Sopenharmony_ci          || search_char_in_char_array (c, lit_unicode_letter_chars, NUM_OF_ELEMENTS (lit_unicode_letter_chars)));
176425bb815Sopenharmony_ci} /* lit_char_is_unicode_letter */
177425bb815Sopenharmony_ci
178425bb815Sopenharmony_ci/**
179425bb815Sopenharmony_ci * Check if specified character is a non-letter character and can be used as a
180425bb815Sopenharmony_ci * non-first character of an identifier.
181425bb815Sopenharmony_ci * These characters coverd by the following unicode categories:
182425bb815Sopenharmony_ci *  - digit (Nd)
183425bb815Sopenharmony_ci *  - punctuation mark (Mn, Mc)
184425bb815Sopenharmony_ci *  - connector punctuation (Pc)
185425bb815Sopenharmony_ci *
186425bb815Sopenharmony_ci * See also:
187425bb815Sopenharmony_ci *          ECMA-262 v5, 7.6
188425bb815Sopenharmony_ci *
189425bb815Sopenharmony_ci * @return true - if specified character falls into one of the listed categories,
190425bb815Sopenharmony_ci *         false - otherwise
191425bb815Sopenharmony_ci */
192425bb815Sopenharmony_cistatic bool
193425bb815Sopenharmony_cilit_char_is_unicode_non_letter_ident_part (ecma_char_t c) /**< code unit */
194425bb815Sopenharmony_ci{
195425bb815Sopenharmony_ci  return (search_char_in_interval_array (c,
196425bb815Sopenharmony_ci                                         lit_unicode_non_letter_ident_part_interval_sps,
197425bb815Sopenharmony_ci                                         lit_unicode_non_letter_ident_part_interval_lengths,
198425bb815Sopenharmony_ci                                         NUM_OF_ELEMENTS (lit_unicode_non_letter_ident_part_interval_sps))
199425bb815Sopenharmony_ci          || search_char_in_char_array (c,
200425bb815Sopenharmony_ci                                        lit_unicode_non_letter_ident_part_chars,
201425bb815Sopenharmony_ci                                        NUM_OF_ELEMENTS (lit_unicode_non_letter_ident_part_chars)));
202425bb815Sopenharmony_ci} /* lit_char_is_unicode_non_letter_ident_part */
203425bb815Sopenharmony_ci
204425bb815Sopenharmony_ci/**
205425bb815Sopenharmony_ci * Checks whether the character is a valid identifier start.
206425bb815Sopenharmony_ci *
207425bb815Sopenharmony_ci * @return true if it is.
208425bb815Sopenharmony_ci */
209425bb815Sopenharmony_cibool
210425bb815Sopenharmony_cilit_code_point_is_identifier_start (lit_code_point_t code_point) /**< code point */
211425bb815Sopenharmony_ci{
212425bb815Sopenharmony_ci  /* Fast path for ASCII-defined letters. */
213425bb815Sopenharmony_ci  if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
214425bb815Sopenharmony_ci  {
215425bb815Sopenharmony_ci    return ((LEXER_TO_ASCII_LOWERCASE (code_point) >= LIT_CHAR_LOWERCASE_A
216425bb815Sopenharmony_ci             && LEXER_TO_ASCII_LOWERCASE (code_point) <= LIT_CHAR_LOWERCASE_Z)
217425bb815Sopenharmony_ci            || code_point == LIT_CHAR_DOLLAR_SIGN
218425bb815Sopenharmony_ci            || code_point == LIT_CHAR_UNDERSCORE);
219425bb815Sopenharmony_ci  }
220425bb815Sopenharmony_ci
221425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015)
222425bb815Sopenharmony_ci  if (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN)
223425bb815Sopenharmony_ci  {
224425bb815Sopenharmony_ci    /* TODO: detect these ranges correctly. */
225425bb815Sopenharmony_ci    return (code_point >= 0x10C80 && code_point <= 0x10CF2);
226425bb815Sopenharmony_ci  }
227425bb815Sopenharmony_ci#else /* !ENABLED (JERRY_ES2015) */
228425bb815Sopenharmony_ci  JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MIN);
229425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */
230425bb815Sopenharmony_ci
231425bb815Sopenharmony_ci  return lit_char_is_unicode_letter ((ecma_char_t) code_point);
232425bb815Sopenharmony_ci} /* lit_code_point_is_identifier_start */
233425bb815Sopenharmony_ci
234425bb815Sopenharmony_ci/**
235425bb815Sopenharmony_ci * Checks whether the character is a valid identifier part.
236425bb815Sopenharmony_ci *
237425bb815Sopenharmony_ci * @return true if it is.
238425bb815Sopenharmony_ci */
239425bb815Sopenharmony_cibool
240425bb815Sopenharmony_cilit_code_point_is_identifier_part (lit_code_point_t code_point) /**< code point */
241425bb815Sopenharmony_ci{
242425bb815Sopenharmony_ci  /* Fast path for ASCII-defined letters. */
243425bb815Sopenharmony_ci  if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
244425bb815Sopenharmony_ci  {
245425bb815Sopenharmony_ci    return ((LEXER_TO_ASCII_LOWERCASE (code_point) >= LIT_CHAR_LOWERCASE_A
246425bb815Sopenharmony_ci             && LEXER_TO_ASCII_LOWERCASE (code_point) <= LIT_CHAR_LOWERCASE_Z)
247425bb815Sopenharmony_ci            || (code_point >= LIT_CHAR_0 && code_point <= LIT_CHAR_9)
248425bb815Sopenharmony_ci            || code_point == LIT_CHAR_DOLLAR_SIGN
249425bb815Sopenharmony_ci            || code_point == LIT_CHAR_UNDERSCORE);
250425bb815Sopenharmony_ci  }
251425bb815Sopenharmony_ci
252425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015)
253425bb815Sopenharmony_ci  if (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN)
254425bb815Sopenharmony_ci  {
255425bb815Sopenharmony_ci    /* TODO: detect these ranges correctly. */
256425bb815Sopenharmony_ci    return (code_point >= 0x10C80 && code_point <= 0x10CF2);
257425bb815Sopenharmony_ci  }
258425bb815Sopenharmony_ci#else /* !ENABLED (JERRY_ES2015) */
259425bb815Sopenharmony_ci  JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MIN);
260425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */
261425bb815Sopenharmony_ci
262425bb815Sopenharmony_ci  return (lit_char_is_unicode_letter ((ecma_char_t) code_point)
263425bb815Sopenharmony_ci          || lit_char_is_unicode_non_letter_ident_part ((ecma_char_t) code_point));
264425bb815Sopenharmony_ci} /* lit_code_point_is_identifier_part */
265425bb815Sopenharmony_ci
266425bb815Sopenharmony_ci/**
267425bb815Sopenharmony_ci * Check if specified character is one of OctalDigit characters (ECMA-262 v5, B.1.2)
268425bb815Sopenharmony_ci *
269425bb815Sopenharmony_ci * @return true / false
270425bb815Sopenharmony_ci */
271425bb815Sopenharmony_cibool
272425bb815Sopenharmony_cilit_char_is_octal_digit (ecma_char_t c) /**< code unit */
273425bb815Sopenharmony_ci{
274425bb815Sopenharmony_ci  return (c >= LIT_CHAR_ASCII_OCTAL_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_OCTAL_DIGITS_END);
275425bb815Sopenharmony_ci} /* lit_char_is_octal_digit */
276425bb815Sopenharmony_ci
277425bb815Sopenharmony_ci/**
278425bb815Sopenharmony_ci * Check if specified character is one of DecimalDigit characters (ECMA-262 v5, 7.8.3)
279425bb815Sopenharmony_ci *
280425bb815Sopenharmony_ci * @return true / false
281425bb815Sopenharmony_ci */
282425bb815Sopenharmony_cibool
283425bb815Sopenharmony_cilit_char_is_decimal_digit (ecma_char_t c) /**< code unit */
284425bb815Sopenharmony_ci{
285425bb815Sopenharmony_ci  return (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END);
286425bb815Sopenharmony_ci} /* lit_char_is_decimal_digit */
287425bb815Sopenharmony_ci
288425bb815Sopenharmony_ci/**
289425bb815Sopenharmony_ci * Check if specified character is one of HexDigit characters (ECMA-262 v5, 7.8.3)
290425bb815Sopenharmony_ci *
291425bb815Sopenharmony_ci * @return true / false
292425bb815Sopenharmony_ci */
293425bb815Sopenharmony_cibool
294425bb815Sopenharmony_cilit_char_is_hex_digit (ecma_char_t c) /**< code unit */
295425bb815Sopenharmony_ci{
296425bb815Sopenharmony_ci  return ((c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
297425bb815Sopenharmony_ci          || (LEXER_TO_ASCII_LOWERCASE (c) >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN
298425bb815Sopenharmony_ci              && LEXER_TO_ASCII_LOWERCASE (c) <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END));
299425bb815Sopenharmony_ci} /* lit_char_is_hex_digit */
300425bb815Sopenharmony_ci
301425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015)
302425bb815Sopenharmony_ci/**
303425bb815Sopenharmony_ci * Check if specified character is one of BinaryDigits characters (ECMA-262 v6, 11.8.3)
304425bb815Sopenharmony_ci *
305425bb815Sopenharmony_ci * @return true / false
306425bb815Sopenharmony_ci */
307425bb815Sopenharmony_cibool
308425bb815Sopenharmony_cilit_char_is_binary_digit (ecma_char_t c) /** code unit */
309425bb815Sopenharmony_ci{
310425bb815Sopenharmony_ci  return (c == LIT_CHAR_0 || c == LIT_CHAR_1);
311425bb815Sopenharmony_ci} /* lit_char_is_binary_digit */
312425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */
313425bb815Sopenharmony_ci
314425bb815Sopenharmony_ci/**
315425bb815Sopenharmony_ci * Convert a HexDigit character to its numeric value, as defined in ECMA-262 v5, 7.8.3
316425bb815Sopenharmony_ci *
317425bb815Sopenharmony_ci * @return digit value, corresponding to the hex char
318425bb815Sopenharmony_ci */
319425bb815Sopenharmony_ciuint32_t
320425bb815Sopenharmony_cilit_char_hex_to_int (ecma_char_t c) /**< code unit, corresponding to
321425bb815Sopenharmony_ci                                     *    one of HexDigit characters */
322425bb815Sopenharmony_ci{
323425bb815Sopenharmony_ci  JERRY_ASSERT (lit_char_is_hex_digit (c));
324425bb815Sopenharmony_ci
325425bb815Sopenharmony_ci  if (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
326425bb815Sopenharmony_ci  {
327425bb815Sopenharmony_ci    return (uint32_t) (c - LIT_CHAR_ASCII_DIGITS_BEGIN);
328425bb815Sopenharmony_ci  }
329425bb815Sopenharmony_ci  else if (c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END)
330425bb815Sopenharmony_ci  {
331425bb815Sopenharmony_ci    return (uint32_t) (c - LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN + 10);
332425bb815Sopenharmony_ci  }
333425bb815Sopenharmony_ci  else
334425bb815Sopenharmony_ci  {
335425bb815Sopenharmony_ci    return (uint32_t) (c - LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN + 10);
336425bb815Sopenharmony_ci  }
337425bb815Sopenharmony_ci} /* lit_char_hex_to_int */
338425bb815Sopenharmony_ci
339425bb815Sopenharmony_ci/**
340425bb815Sopenharmony_ci * Converts a character to UTF8 bytes.
341425bb815Sopenharmony_ci *
342425bb815Sopenharmony_ci * @return length of the UTF8 representation.
343425bb815Sopenharmony_ci */
344425bb815Sopenharmony_cisize_t
345425bb815Sopenharmony_cilit_code_point_to_cesu8_bytes (uint8_t *dst_p, /**< destination buffer */
346425bb815Sopenharmony_ci                               lit_code_point_t code_point) /**< code point */
347425bb815Sopenharmony_ci{
348425bb815Sopenharmony_ci  if (code_point < LIT_UTF8_2_BYTE_CODE_POINT_MIN)
349425bb815Sopenharmony_ci  {
350425bb815Sopenharmony_ci    /* 00000000 0xxxxxxx -> 0xxxxxxx */
351425bb815Sopenharmony_ci    dst_p[0] = (uint8_t) code_point;
352425bb815Sopenharmony_ci    return 1;
353425bb815Sopenharmony_ci  }
354425bb815Sopenharmony_ci
355425bb815Sopenharmony_ci  if (code_point < LIT_UTF8_3_BYTE_CODE_POINT_MIN)
356425bb815Sopenharmony_ci  {
357425bb815Sopenharmony_ci    /* 00000yyy yyxxxxxx -> 110yyyyy 10xxxxxx */
358425bb815Sopenharmony_ci    dst_p[0] = (uint8_t) (LIT_UTF8_2_BYTE_MARKER | ((code_point >> 6) & LIT_UTF8_LAST_5_BITS_MASK));
359425bb815Sopenharmony_ci    dst_p[1] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (code_point & LIT_UTF8_LAST_6_BITS_MASK));
360425bb815Sopenharmony_ci    return 2;
361425bb815Sopenharmony_ci  }
362425bb815Sopenharmony_ci
363425bb815Sopenharmony_ci  if (code_point < LIT_UTF8_4_BYTE_CODE_POINT_MIN)
364425bb815Sopenharmony_ci  {
365425bb815Sopenharmony_ci    /* zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx */
366425bb815Sopenharmony_ci    dst_p[0] = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | ((code_point >> 12) & LIT_UTF8_LAST_4_BITS_MASK));
367425bb815Sopenharmony_ci    dst_p[1] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | ((code_point >> 6) & LIT_UTF8_LAST_6_BITS_MASK));
368425bb815Sopenharmony_ci    dst_p[2] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (code_point & LIT_UTF8_LAST_6_BITS_MASK));
369425bb815Sopenharmony_ci    return 3;
370425bb815Sopenharmony_ci  }
371425bb815Sopenharmony_ci
372425bb815Sopenharmony_ci  JERRY_ASSERT (code_point <= LIT_UNICODE_CODE_POINT_MAX);
373425bb815Sopenharmony_ci
374425bb815Sopenharmony_ci  code_point -= LIT_UTF8_4_BYTE_CODE_POINT_MIN;
375425bb815Sopenharmony_ci
376425bb815Sopenharmony_ci  dst_p[0] = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | 0xd);
377425bb815Sopenharmony_ci  dst_p[1] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | 0x20 | ((code_point >> 16) & LIT_UTF8_LAST_4_BITS_MASK));
378425bb815Sopenharmony_ci  dst_p[2] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | ((code_point >> 10) & LIT_UTF8_LAST_6_BITS_MASK));
379425bb815Sopenharmony_ci
380425bb815Sopenharmony_ci  dst_p[3] = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | 0xd);
381425bb815Sopenharmony_ci  dst_p[4] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | 0x30 | ((code_point >> 6) & LIT_UTF8_LAST_4_BITS_MASK));
382425bb815Sopenharmony_ci  dst_p[5] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (code_point & LIT_UTF8_LAST_6_BITS_MASK));
383425bb815Sopenharmony_ci
384425bb815Sopenharmony_ci  return 3 * 2;
385425bb815Sopenharmony_ci} /* lit_code_point_to_cesu8_bytes */
386425bb815Sopenharmony_ci
387425bb815Sopenharmony_ci/**
388425bb815Sopenharmony_ci * Returns the length of the UTF8 representation of a character.
389425bb815Sopenharmony_ci *
390425bb815Sopenharmony_ci * @return length of the UTF8 representation.
391425bb815Sopenharmony_ci */
392425bb815Sopenharmony_cisize_t
393425bb815Sopenharmony_cilit_code_point_get_cesu8_length (lit_code_point_t code_point) /**< code point */
394425bb815Sopenharmony_ci{
395425bb815Sopenharmony_ci  if (code_point < LIT_UTF8_2_BYTE_CODE_POINT_MIN)
396425bb815Sopenharmony_ci  {
397425bb815Sopenharmony_ci    /* 00000000 0xxxxxxx */
398425bb815Sopenharmony_ci    return 1;
399425bb815Sopenharmony_ci  }
400425bb815Sopenharmony_ci
401425bb815Sopenharmony_ci  if (code_point < LIT_UTF8_3_BYTE_CODE_POINT_MIN)
402425bb815Sopenharmony_ci  {
403425bb815Sopenharmony_ci    /* 00000yyy yyxxxxxx */
404425bb815Sopenharmony_ci    return 2;
405425bb815Sopenharmony_ci  }
406425bb815Sopenharmony_ci
407425bb815Sopenharmony_ci  if (code_point < LIT_UTF8_4_BYTE_CODE_POINT_MIN)
408425bb815Sopenharmony_ci  {
409425bb815Sopenharmony_ci    /* zzzzyyyy yyxxxxxx */
410425bb815Sopenharmony_ci    return 3;
411425bb815Sopenharmony_ci  }
412425bb815Sopenharmony_ci
413425bb815Sopenharmony_ci  /* high + low surrogate */
414425bb815Sopenharmony_ci  return 2 * 3;
415425bb815Sopenharmony_ci} /* lit_code_point_get_cesu8_length */
416425bb815Sopenharmony_ci
417425bb815Sopenharmony_ci/**
418425bb815Sopenharmony_ci * Convert a four byte long utf8 character to two three byte long cesu8 characters
419425bb815Sopenharmony_ci */
420425bb815Sopenharmony_civoid
421425bb815Sopenharmony_cilit_four_byte_utf8_char_to_cesu8 (uint8_t *dst_p, /**< destination buffer */
422425bb815Sopenharmony_ci                                  const uint8_t *source_p) /**< source buffer */
423425bb815Sopenharmony_ci{
424425bb815Sopenharmony_ci  lit_code_point_t code_point = ((((uint32_t) source_p[0]) & LIT_UTF8_LAST_3_BITS_MASK) << 18);
425425bb815Sopenharmony_ci  code_point |= ((((uint32_t) source_p[1]) & LIT_UTF8_LAST_6_BITS_MASK) << 12);
426425bb815Sopenharmony_ci  code_point |= ((((uint32_t) source_p[2]) & LIT_UTF8_LAST_6_BITS_MASK) << 6);
427425bb815Sopenharmony_ci  code_point |= (((uint32_t) source_p[3]) & LIT_UTF8_LAST_6_BITS_MASK);
428425bb815Sopenharmony_ci
429425bb815Sopenharmony_ci  lit_code_point_to_cesu8_bytes (dst_p, code_point);
430425bb815Sopenharmony_ci} /* lit_four_byte_utf8_char_to_cesu8 */
431425bb815Sopenharmony_ci
432425bb815Sopenharmony_ci/**
433425bb815Sopenharmony_ci * Lookup hex digits in a buffer
434425bb815Sopenharmony_ci *
435425bb815Sopenharmony_ci * @return UINT32_MAX - if next 'lookup' number of characters do not form a valid hex number
436425bb815Sopenharmony_ci *         value of hex number, otherwise
437425bb815Sopenharmony_ci */
438425bb815Sopenharmony_ciuint32_t
439425bb815Sopenharmony_cilit_char_hex_lookup (const lit_utf8_byte_t *buf_p, /**< buffer */
440425bb815Sopenharmony_ci                     const lit_utf8_byte_t *const buf_end_p, /**< buffer end */
441425bb815Sopenharmony_ci                     uint32_t lookup) /**< size of lookup */
442425bb815Sopenharmony_ci{
443425bb815Sopenharmony_ci  JERRY_ASSERT (lookup <= 4);
444425bb815Sopenharmony_ci
445425bb815Sopenharmony_ci  if (JERRY_UNLIKELY (buf_p + lookup > buf_end_p))
446425bb815Sopenharmony_ci  {
447425bb815Sopenharmony_ci    return UINT32_MAX;
448425bb815Sopenharmony_ci  }
449425bb815Sopenharmony_ci
450425bb815Sopenharmony_ci  uint32_t value = 0;
451425bb815Sopenharmony_ci
452425bb815Sopenharmony_ci  while (lookup--)
453425bb815Sopenharmony_ci  {
454425bb815Sopenharmony_ci    lit_utf8_byte_t ch = *buf_p++;
455425bb815Sopenharmony_ci    if (!lit_char_is_hex_digit (ch))
456425bb815Sopenharmony_ci    {
457425bb815Sopenharmony_ci      return UINT32_MAX;
458425bb815Sopenharmony_ci    }
459425bb815Sopenharmony_ci
460425bb815Sopenharmony_ci    value <<= 4;
461425bb815Sopenharmony_ci    value += lit_char_hex_to_int (ch);
462425bb815Sopenharmony_ci  }
463425bb815Sopenharmony_ci
464425bb815Sopenharmony_ci  JERRY_ASSERT (value <= LIT_UTF16_CODE_UNIT_MAX);
465425bb815Sopenharmony_ci  return value;
466425bb815Sopenharmony_ci} /* lit_char_hex_lookup */
467425bb815Sopenharmony_ci
468425bb815Sopenharmony_ci/**
469425bb815Sopenharmony_ci * Parse a decimal number with the value clamped to UINT32_MAX.
470425bb815Sopenharmony_ci *
471425bb815Sopenharmony_ci * @returns uint32_t number
472425bb815Sopenharmony_ci */
473425bb815Sopenharmony_ciuint32_t
474425bb815Sopenharmony_cilit_parse_decimal (const lit_utf8_byte_t **buffer_p, /**< [in/out] character buffer */
475425bb815Sopenharmony_ci                   const lit_utf8_byte_t *buffer_end_p) /**< buffer end */
476425bb815Sopenharmony_ci{
477425bb815Sopenharmony_ci  const lit_utf8_byte_t *current_p = *buffer_p;
478425bb815Sopenharmony_ci  JERRY_ASSERT (lit_char_is_decimal_digit (*current_p));
479425bb815Sopenharmony_ci
480425bb815Sopenharmony_ci  uint32_t value = (uint32_t) (*current_p++ - LIT_CHAR_0);
481425bb815Sopenharmony_ci
482425bb815Sopenharmony_ci  while (current_p < buffer_end_p && lit_char_is_decimal_digit (*current_p))
483425bb815Sopenharmony_ci  {
484425bb815Sopenharmony_ci    const uint32_t digit = (uint32_t) (*current_p++ - LIT_CHAR_0);
485425bb815Sopenharmony_ci    uint32_t new_value = value * 10 + digit;
486425bb815Sopenharmony_ci
487425bb815Sopenharmony_ci    if (JERRY_UNLIKELY (value > UINT32_MAX / 10) || JERRY_UNLIKELY (new_value < value))
488425bb815Sopenharmony_ci    {
489425bb815Sopenharmony_ci      value = UINT32_MAX;
490425bb815Sopenharmony_ci      continue;
491425bb815Sopenharmony_ci    }
492425bb815Sopenharmony_ci
493425bb815Sopenharmony_ci    value = new_value;
494425bb815Sopenharmony_ci  }
495425bb815Sopenharmony_ci
496425bb815Sopenharmony_ci  *buffer_p = current_p;
497425bb815Sopenharmony_ci  return value;
498425bb815Sopenharmony_ci} /* lit_parse_decimal */
499425bb815Sopenharmony_ci
500425bb815Sopenharmony_ci/**
501425bb815Sopenharmony_ci * Check if specified character is a word character (part of IsWordChar abstract operation)
502425bb815Sopenharmony_ci *
503425bb815Sopenharmony_ci * See also: ECMA-262 v5, 15.10.2.6 (IsWordChar)
504425bb815Sopenharmony_ci *
505425bb815Sopenharmony_ci * @return true - if the character is a word character
506425bb815Sopenharmony_ci *         false - otherwise
507425bb815Sopenharmony_ci */
508425bb815Sopenharmony_cibool
509425bb815Sopenharmony_cilit_char_is_word_char (lit_code_point_t c) /**< code point */
510425bb815Sopenharmony_ci{
511425bb815Sopenharmony_ci  return ((c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)
512425bb815Sopenharmony_ci          || (c >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
513425bb815Sopenharmony_ci          || (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
514425bb815Sopenharmony_ci          || c == LIT_CHAR_UNDERSCORE);
515425bb815Sopenharmony_ci} /* lit_char_is_word_char */
516425bb815Sopenharmony_ci
517425bb815Sopenharmony_ci#if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
518425bb815Sopenharmony_ci
519425bb815Sopenharmony_ci/**
520425bb815Sopenharmony_ci * Check if the specified character is in one of those tables which contain bidirectional conversions.
521425bb815Sopenharmony_ci *
522425bb815Sopenharmony_ci * @return the mapped character sequence of an ecma character, if it's in the table.
523425bb815Sopenharmony_ci *         0 - otherwise.
524425bb815Sopenharmony_ci */
525425bb815Sopenharmony_cistatic ecma_length_t
526425bb815Sopenharmony_cisearch_in_bidirectional_conversion_tables (ecma_char_t character,        /**< code unit */
527425bb815Sopenharmony_ci                                           ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
528425bb815Sopenharmony_ci                                           bool is_lowercase)            /**< is lowercase conversion */
529425bb815Sopenharmony_ci{
530425bb815Sopenharmony_ci  /* 1, Check if the specified character is part of the lit_character_case_ranges table. */
531425bb815Sopenharmony_ci  int number_of_case_ranges = NUM_OF_ELEMENTS (lit_character_case_ranges);
532425bb815Sopenharmony_ci  int conv_counter = 0;
533425bb815Sopenharmony_ci
534425bb815Sopenharmony_ci  for (int i = 0; i < number_of_case_ranges; i++)
535425bb815Sopenharmony_ci  {
536425bb815Sopenharmony_ci    if (i % 2 == 0 && i > 0)
537425bb815Sopenharmony_ci    {
538425bb815Sopenharmony_ci      conv_counter++;
539425bb815Sopenharmony_ci    }
540425bb815Sopenharmony_ci
541425bb815Sopenharmony_ci    int range_length = lit_character_case_range_lengths[conv_counter];
542425bb815Sopenharmony_ci    ecma_char_t start_point = lit_character_case_ranges[i];
543425bb815Sopenharmony_ci
544425bb815Sopenharmony_ci    if (start_point > character || character >= start_point + range_length)
545425bb815Sopenharmony_ci    {
546425bb815Sopenharmony_ci      continue;
547425bb815Sopenharmony_ci    }
548425bb815Sopenharmony_ci
549425bb815Sopenharmony_ci    int char_dist = character - start_point;
550425bb815Sopenharmony_ci
551425bb815Sopenharmony_ci    if (i % 2 == 0)
552425bb815Sopenharmony_ci    {
553425bb815Sopenharmony_ci      output_buffer_p[0] = is_lowercase ? (ecma_char_t) (lit_character_case_ranges[i + 1] + char_dist) : character;
554425bb815Sopenharmony_ci    }
555425bb815Sopenharmony_ci    else
556425bb815Sopenharmony_ci    {
557425bb815Sopenharmony_ci      output_buffer_p[0] = is_lowercase ? character : (ecma_char_t) (lit_character_case_ranges[i - 1] + char_dist);
558425bb815Sopenharmony_ci    }
559425bb815Sopenharmony_ci
560425bb815Sopenharmony_ci    return 1;
561425bb815Sopenharmony_ci  }
562425bb815Sopenharmony_ci
563425bb815Sopenharmony_ci  /* 2, Check if the specified character is part of the character_pair_ranges table. */
564425bb815Sopenharmony_ci  int bottom = 0;
565425bb815Sopenharmony_ci  int top = NUM_OF_ELEMENTS (lit_character_pair_ranges) - 1;
566425bb815Sopenharmony_ci
567425bb815Sopenharmony_ci  while (bottom <= top)
568425bb815Sopenharmony_ci  {
569425bb815Sopenharmony_ci    int middle = (bottom + top) / 2;
570425bb815Sopenharmony_ci    ecma_char_t current_sp = lit_character_pair_ranges[middle];
571425bb815Sopenharmony_ci
572425bb815Sopenharmony_ci    if (current_sp <= character && character < current_sp + lit_character_pair_range_lengths[middle])
573425bb815Sopenharmony_ci    {
574425bb815Sopenharmony_ci      int char_dist = character - current_sp;
575425bb815Sopenharmony_ci
576425bb815Sopenharmony_ci      if ((character - current_sp) % 2 == 0)
577425bb815Sopenharmony_ci      {
578425bb815Sopenharmony_ci        output_buffer_p[0] = is_lowercase ? (ecma_char_t) (current_sp + char_dist + 1) : character;
579425bb815Sopenharmony_ci      }
580425bb815Sopenharmony_ci      else
581425bb815Sopenharmony_ci      {
582425bb815Sopenharmony_ci        output_buffer_p[0] = is_lowercase ? character : (ecma_char_t) (current_sp + char_dist - 1);
583425bb815Sopenharmony_ci      }
584425bb815Sopenharmony_ci
585425bb815Sopenharmony_ci      return 1;
586425bb815Sopenharmony_ci    }
587425bb815Sopenharmony_ci
588425bb815Sopenharmony_ci    if (character > current_sp)
589425bb815Sopenharmony_ci    {
590425bb815Sopenharmony_ci      bottom = middle + 1;
591425bb815Sopenharmony_ci    }
592425bb815Sopenharmony_ci    else
593425bb815Sopenharmony_ci    {
594425bb815Sopenharmony_ci      top = middle - 1;
595425bb815Sopenharmony_ci    }
596425bb815Sopenharmony_ci  }
597425bb815Sopenharmony_ci
598425bb815Sopenharmony_ci  /* 3, Check if the specified character is part of the character_pairs table. */
599425bb815Sopenharmony_ci  int number_of_character_pairs = NUM_OF_ELEMENTS (lit_character_pairs);
600425bb815Sopenharmony_ci
601425bb815Sopenharmony_ci  for (int i = 0; i < number_of_character_pairs; i++)
602425bb815Sopenharmony_ci  {
603425bb815Sopenharmony_ci    if (character != lit_character_pairs[i])
604425bb815Sopenharmony_ci    {
605425bb815Sopenharmony_ci      continue;
606425bb815Sopenharmony_ci    }
607425bb815Sopenharmony_ci
608425bb815Sopenharmony_ci    if (i % 2 == 0)
609425bb815Sopenharmony_ci    {
610425bb815Sopenharmony_ci      output_buffer_p[0] = is_lowercase ? lit_character_pairs[i + 1] : character;
611425bb815Sopenharmony_ci    }
612425bb815Sopenharmony_ci    else
613425bb815Sopenharmony_ci    {
614425bb815Sopenharmony_ci      output_buffer_p[0] = is_lowercase ? character : lit_character_pairs[i - 1];
615425bb815Sopenharmony_ci    }
616425bb815Sopenharmony_ci
617425bb815Sopenharmony_ci    return 1;
618425bb815Sopenharmony_ci  }
619425bb815Sopenharmony_ci
620425bb815Sopenharmony_ci  return 0;
621425bb815Sopenharmony_ci} /* search_in_bidirectional_conversion_tables */
622425bb815Sopenharmony_ci
623425bb815Sopenharmony_ci/**
624425bb815Sopenharmony_ci * Check if the specified character is in the given conversion table.
625425bb815Sopenharmony_ci *
626425bb815Sopenharmony_ci * @return the mapped character sequence of an ecma character, if it's in the table.
627425bb815Sopenharmony_ci *         0 - otherwise.
628425bb815Sopenharmony_ci */
629425bb815Sopenharmony_cistatic ecma_length_t
630425bb815Sopenharmony_cisearch_in_conversion_table (ecma_char_t character,        /**< code unit */
631425bb815Sopenharmony_ci                            ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
632425bb815Sopenharmony_ci                            const ecma_char_t *array,     /**< array */
633425bb815Sopenharmony_ci                            const uint8_t *counters)      /**< case_values counter */
634425bb815Sopenharmony_ci{
635425bb815Sopenharmony_ci  int end_point = 0;
636425bb815Sopenharmony_ci
637425bb815Sopenharmony_ci  for (int i = 0; i < 3; i++)
638425bb815Sopenharmony_ci  {
639425bb815Sopenharmony_ci    int start_point = end_point;
640425bb815Sopenharmony_ci    int size_of_case_value = i + 1;
641425bb815Sopenharmony_ci    end_point += counters[i] * (size_of_case_value + 1);
642425bb815Sopenharmony_ci
643425bb815Sopenharmony_ci    int bottom = start_point;
644425bb815Sopenharmony_ci    int top = end_point - size_of_case_value;
645425bb815Sopenharmony_ci
646425bb815Sopenharmony_ci    while (bottom <= top)
647425bb815Sopenharmony_ci    {
648425bb815Sopenharmony_ci      int middle = (bottom + top) / 2;
649425bb815Sopenharmony_ci
650425bb815Sopenharmony_ci      middle -= ((middle - bottom) % (size_of_case_value + 1));
651425bb815Sopenharmony_ci
652425bb815Sopenharmony_ci      ecma_char_t current = array[middle];
653425bb815Sopenharmony_ci
654425bb815Sopenharmony_ci      if (current == character)
655425bb815Sopenharmony_ci      {
656425bb815Sopenharmony_ci        ecma_length_t char_sequence = 1;
657425bb815Sopenharmony_ci
658425bb815Sopenharmony_ci        switch (size_of_case_value)
659425bb815Sopenharmony_ci        {
660425bb815Sopenharmony_ci          case 3:
661425bb815Sopenharmony_ci          {
662425bb815Sopenharmony_ci            output_buffer_p[2] = array[middle + 3];
663425bb815Sopenharmony_ci            char_sequence++;
664425bb815Sopenharmony_ci            /* FALLTHRU */
665425bb815Sopenharmony_ci          }
666425bb815Sopenharmony_ci          case 2:
667425bb815Sopenharmony_ci          {
668425bb815Sopenharmony_ci            output_buffer_p[1] = array[middle + 2];
669425bb815Sopenharmony_ci            char_sequence++;
670425bb815Sopenharmony_ci            /* FALLTHRU */
671425bb815Sopenharmony_ci          }
672425bb815Sopenharmony_ci          default:
673425bb815Sopenharmony_ci          {
674425bb815Sopenharmony_ci            output_buffer_p[0] = array[middle + 1];
675425bb815Sopenharmony_ci            return char_sequence;
676425bb815Sopenharmony_ci          }
677425bb815Sopenharmony_ci        }
678425bb815Sopenharmony_ci      }
679425bb815Sopenharmony_ci
680425bb815Sopenharmony_ci      if (character < current)
681425bb815Sopenharmony_ci      {
682425bb815Sopenharmony_ci        top = middle - (size_of_case_value + 1);
683425bb815Sopenharmony_ci      }
684425bb815Sopenharmony_ci      else
685425bb815Sopenharmony_ci      {
686425bb815Sopenharmony_ci        bottom = middle + (size_of_case_value + 1);
687425bb815Sopenharmony_ci      }
688425bb815Sopenharmony_ci    }
689425bb815Sopenharmony_ci  }
690425bb815Sopenharmony_ci
691425bb815Sopenharmony_ci  return 0;
692425bb815Sopenharmony_ci} /* search_in_conversion_table */
693425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
694425bb815Sopenharmony_ci
695425bb815Sopenharmony_ci/**
696425bb815Sopenharmony_ci * Returns the lowercase character sequence of an ecma character.
697425bb815Sopenharmony_ci *
698425bb815Sopenharmony_ci * Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
699425bb815Sopenharmony_ci *
700425bb815Sopenharmony_ci * @return the length of the lowercase character sequence
701425bb815Sopenharmony_ci *         which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
702425bb815Sopenharmony_ci */
703425bb815Sopenharmony_ciecma_length_t
704425bb815Sopenharmony_cilit_char_to_lower_case (ecma_char_t character, /**< input character value */
705425bb815Sopenharmony_ci                        ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
706425bb815Sopenharmony_ci                        ecma_length_t buffer_size) /**< buffer size */
707425bb815Sopenharmony_ci{
708425bb815Sopenharmony_ci  JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
709425bb815Sopenharmony_ci
710425bb815Sopenharmony_ci  if (character >= LIT_CHAR_UPPERCASE_A && character <= LIT_CHAR_UPPERCASE_Z)
711425bb815Sopenharmony_ci  {
712425bb815Sopenharmony_ci    output_buffer_p[0] = (ecma_char_t) (character + (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
713425bb815Sopenharmony_ci    return 1;
714425bb815Sopenharmony_ci  }
715425bb815Sopenharmony_ci
716425bb815Sopenharmony_ci#if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
717425bb815Sopenharmony_ci
718425bb815Sopenharmony_ci  ecma_length_t lowercase_sequence = search_in_bidirectional_conversion_tables (character, output_buffer_p, true);
719425bb815Sopenharmony_ci
720425bb815Sopenharmony_ci  if (lowercase_sequence != 0)
721425bb815Sopenharmony_ci  {
722425bb815Sopenharmony_ci    return lowercase_sequence;
723425bb815Sopenharmony_ci  }
724425bb815Sopenharmony_ci
725425bb815Sopenharmony_ci  int num_of_lowercase_ranges = NUM_OF_ELEMENTS (lit_lower_case_ranges);
726425bb815Sopenharmony_ci
727425bb815Sopenharmony_ci  for (int i = 0, j = 0; i < num_of_lowercase_ranges; i += 2, j++)
728425bb815Sopenharmony_ci  {
729425bb815Sopenharmony_ci    int range_length = lit_lower_case_range_lengths[j] - 1;
730425bb815Sopenharmony_ci    ecma_char_t start_point = lit_lower_case_ranges[i];
731425bb815Sopenharmony_ci
732425bb815Sopenharmony_ci    if (start_point <= character && character <= start_point + range_length)
733425bb815Sopenharmony_ci    {
734425bb815Sopenharmony_ci      output_buffer_p[0] = (ecma_char_t) (lit_lower_case_ranges[i + 1] + (character - start_point));
735425bb815Sopenharmony_ci      return 1;
736425bb815Sopenharmony_ci    }
737425bb815Sopenharmony_ci  }
738425bb815Sopenharmony_ci
739425bb815Sopenharmony_ci  lowercase_sequence = search_in_conversion_table (character,
740425bb815Sopenharmony_ci                                                   output_buffer_p,
741425bb815Sopenharmony_ci                                                   lit_lower_case_conversions,
742425bb815Sopenharmony_ci                                                   lit_lower_case_conversion_counters);
743425bb815Sopenharmony_ci
744425bb815Sopenharmony_ci  if (lowercase_sequence != 0)
745425bb815Sopenharmony_ci  {
746425bb815Sopenharmony_ci    return lowercase_sequence;
747425bb815Sopenharmony_ci  }
748425bb815Sopenharmony_ci
749425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
750425bb815Sopenharmony_ci
751425bb815Sopenharmony_ci  output_buffer_p[0] = character;
752425bb815Sopenharmony_ci  return 1;
753425bb815Sopenharmony_ci} /* lit_char_to_lower_case */
754425bb815Sopenharmony_ci
755425bb815Sopenharmony_ci/**
756425bb815Sopenharmony_ci * Returns the uppercase character sequence of an ecma character.
757425bb815Sopenharmony_ci *
758425bb815Sopenharmony_ci * Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
759425bb815Sopenharmony_ci *
760425bb815Sopenharmony_ci * @return the length of the uppercase character sequence
761425bb815Sopenharmony_ci *         which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
762425bb815Sopenharmony_ci */
763425bb815Sopenharmony_ciecma_length_t
764425bb815Sopenharmony_cilit_char_to_upper_case (ecma_char_t character, /**< input character value */
765425bb815Sopenharmony_ci                        ecma_char_t *output_buffer_p, /**< buffer for the result characters */
766425bb815Sopenharmony_ci                        ecma_length_t buffer_size) /**< buffer size */
767425bb815Sopenharmony_ci{
768425bb815Sopenharmony_ci  JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
769425bb815Sopenharmony_ci
770425bb815Sopenharmony_ci  if (character >= LIT_CHAR_LOWERCASE_A && character <= LIT_CHAR_LOWERCASE_Z)
771425bb815Sopenharmony_ci  {
772425bb815Sopenharmony_ci    output_buffer_p[0] = (ecma_char_t) (character - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
773425bb815Sopenharmony_ci    return 1;
774425bb815Sopenharmony_ci  }
775425bb815Sopenharmony_ci
776425bb815Sopenharmony_ci#if ENABLED (JERRY_UNICODE_CASE_CONVERSION)
777425bb815Sopenharmony_ci
778425bb815Sopenharmony_ci  ecma_length_t uppercase_sequence = search_in_bidirectional_conversion_tables (character, output_buffer_p, false);
779425bb815Sopenharmony_ci
780425bb815Sopenharmony_ci  if (uppercase_sequence != 0)
781425bb815Sopenharmony_ci  {
782425bb815Sopenharmony_ci    return uppercase_sequence;
783425bb815Sopenharmony_ci  }
784425bb815Sopenharmony_ci
785425bb815Sopenharmony_ci  int num_of_upper_case_special_ranges = NUM_OF_ELEMENTS (lit_upper_case_special_ranges);
786425bb815Sopenharmony_ci
787425bb815Sopenharmony_ci  for (int i = 0, j = 0; i < num_of_upper_case_special_ranges; i += 3, j++)
788425bb815Sopenharmony_ci  {
789425bb815Sopenharmony_ci    int range_length = lit_upper_case_special_range_lengths[j];
790425bb815Sopenharmony_ci    ecma_char_t start_point = lit_upper_case_special_ranges[i];
791425bb815Sopenharmony_ci
792425bb815Sopenharmony_ci    if (start_point <= character && character <= start_point + range_length)
793425bb815Sopenharmony_ci    {
794425bb815Sopenharmony_ci      output_buffer_p[0] = (ecma_char_t) (lit_upper_case_special_ranges[i + 1] + (character - start_point));
795425bb815Sopenharmony_ci      output_buffer_p[1] = (ecma_char_t) (lit_upper_case_special_ranges[i + 2]);
796425bb815Sopenharmony_ci      return 2;
797425bb815Sopenharmony_ci    }
798425bb815Sopenharmony_ci  }
799425bb815Sopenharmony_ci
800425bb815Sopenharmony_ci  uppercase_sequence = search_in_conversion_table (character,
801425bb815Sopenharmony_ci                                                   output_buffer_p,
802425bb815Sopenharmony_ci                                                   lit_upper_case_conversions,
803425bb815Sopenharmony_ci                                                   lit_upper_case_conversion_counters);
804425bb815Sopenharmony_ci
805425bb815Sopenharmony_ci  if (uppercase_sequence != 0)
806425bb815Sopenharmony_ci  {
807425bb815Sopenharmony_ci    return uppercase_sequence;
808425bb815Sopenharmony_ci  }
809425bb815Sopenharmony_ci
810425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */
811425bb815Sopenharmony_ci
812425bb815Sopenharmony_ci  output_buffer_p[0] = character;
813425bb815Sopenharmony_ci  return 1;
814425bb815Sopenharmony_ci} /* lit_char_to_upper_case */
815