1425bb815Sopenharmony_ci/* Copyright JS Foundation and other contributors, http://js.foundation 2425bb815Sopenharmony_ci * 3425bb815Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License"); 4425bb815Sopenharmony_ci * you may not use this file except in compliance with the License. 5425bb815Sopenharmony_ci * You may obtain a copy of the License at 6425bb815Sopenharmony_ci * 7425bb815Sopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0 8425bb815Sopenharmony_ci * 9425bb815Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software 10425bb815Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS 11425bb815Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12425bb815Sopenharmony_ci * See the License for the specific language governing permissions and 13425bb815Sopenharmony_ci * limitations under the License. 14425bb815Sopenharmony_ci */ 15425bb815Sopenharmony_ci 16425bb815Sopenharmony_ci#include "config.h" 17425bb815Sopenharmony_ci#include "lit-char-helpers.h" 18425bb815Sopenharmony_ci#include "lit-unicode-ranges.inc.h" 19425bb815Sopenharmony_ci#include "lit-strings.h" 20425bb815Sopenharmony_ci 21425bb815Sopenharmony_ci#if ENABLED (JERRY_UNICODE_CASE_CONVERSION) 22425bb815Sopenharmony_ci#include "lit-unicode-conversions.inc.h" 23425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */ 24425bb815Sopenharmony_ci 25425bb815Sopenharmony_ci#define NUM_OF_ELEMENTS(array) (sizeof (array) / sizeof ((array)[0])) 26425bb815Sopenharmony_ci 27425bb815Sopenharmony_ci/** 28425bb815Sopenharmony_ci * Binary search algorithm that searches the a 29425bb815Sopenharmony_ci * character in the given char array. 30425bb815Sopenharmony_ci * 31425bb815Sopenharmony_ci * @return true - if the character is in the given array 32425bb815Sopenharmony_ci * false - otherwise 33425bb815Sopenharmony_ci */ 34425bb815Sopenharmony_cistatic bool 35425bb815Sopenharmony_cisearch_char_in_char_array (ecma_char_t c, /**< code unit */ 36425bb815Sopenharmony_ci const ecma_char_t *array, /**< array */ 37425bb815Sopenharmony_ci int size_of_array) /**< length of the array */ 38425bb815Sopenharmony_ci{ 39425bb815Sopenharmony_ci int bottom = 0; 40425bb815Sopenharmony_ci int top = size_of_array - 1; 41425bb815Sopenharmony_ci 42425bb815Sopenharmony_ci while (bottom <= top) 43425bb815Sopenharmony_ci { 44425bb815Sopenharmony_ci int middle = (bottom + top) / 2; 45425bb815Sopenharmony_ci ecma_char_t current = array[middle]; 46425bb815Sopenharmony_ci 47425bb815Sopenharmony_ci if (current == c) 48425bb815Sopenharmony_ci { 49425bb815Sopenharmony_ci return true; 50425bb815Sopenharmony_ci } 51425bb815Sopenharmony_ci 52425bb815Sopenharmony_ci if (c < current) 53425bb815Sopenharmony_ci { 54425bb815Sopenharmony_ci top = middle - 1; 55425bb815Sopenharmony_ci } 56425bb815Sopenharmony_ci else 57425bb815Sopenharmony_ci { 58425bb815Sopenharmony_ci bottom = middle + 1; 59425bb815Sopenharmony_ci } 60425bb815Sopenharmony_ci } 61425bb815Sopenharmony_ci 62425bb815Sopenharmony_ci return false; 63425bb815Sopenharmony_ci} /* search_char_in_char_array */ 64425bb815Sopenharmony_ci 65425bb815Sopenharmony_ci/** 66425bb815Sopenharmony_ci * Binary search algorithm that searches a character in the given intervals. 67425bb815Sopenharmony_ci * Intervals specifed by two arrays. The first one contains the starting points 68425bb815Sopenharmony_ci * of the intervals, the second one contains the length of them. 69425bb815Sopenharmony_ci * 70425bb815Sopenharmony_ci * @return true - if the the character is included (inclusively) in one of the intervals in the given array 71425bb815Sopenharmony_ci * false - otherwise 72425bb815Sopenharmony_ci */ 73425bb815Sopenharmony_cistatic bool 74425bb815Sopenharmony_cisearch_char_in_interval_array (ecma_char_t c, /**< code unit */ 75425bb815Sopenharmony_ci const ecma_char_t *array_sp, /**< array of interval starting points */ 76425bb815Sopenharmony_ci const uint8_t *lengths, /**< array of interval lengths */ 77425bb815Sopenharmony_ci int size_of_array) /**< length of the array */ 78425bb815Sopenharmony_ci{ 79425bb815Sopenharmony_ci int bottom = 0; 80425bb815Sopenharmony_ci int top = size_of_array - 1; 81425bb815Sopenharmony_ci 82425bb815Sopenharmony_ci while (bottom <= top) 83425bb815Sopenharmony_ci { 84425bb815Sopenharmony_ci int middle = (bottom + top) / 2; 85425bb815Sopenharmony_ci ecma_char_t current_sp = array_sp[middle]; 86425bb815Sopenharmony_ci 87425bb815Sopenharmony_ci if (current_sp <= c && c <= current_sp + lengths[middle]) 88425bb815Sopenharmony_ci { 89425bb815Sopenharmony_ci return true; 90425bb815Sopenharmony_ci } 91425bb815Sopenharmony_ci 92425bb815Sopenharmony_ci if (c > current_sp) 93425bb815Sopenharmony_ci { 94425bb815Sopenharmony_ci bottom = middle + 1; 95425bb815Sopenharmony_ci } 96425bb815Sopenharmony_ci else 97425bb815Sopenharmony_ci { 98425bb815Sopenharmony_ci top = middle - 1; 99425bb815Sopenharmony_ci } 100425bb815Sopenharmony_ci } 101425bb815Sopenharmony_ci 102425bb815Sopenharmony_ci return false; 103425bb815Sopenharmony_ci} /* search_char_in_interval_array */ 104425bb815Sopenharmony_ci 105425bb815Sopenharmony_ci/** 106425bb815Sopenharmony_ci * Check if specified character is one of the Whitespace characters including those that fall into 107425bb815Sopenharmony_ci * "Space, Separator" ("Zs") Unicode character category or one of the Line Terminator characters. 108425bb815Sopenharmony_ci * 109425bb815Sopenharmony_ci * @return true - if the character is one of characters, listed in ECMA-262 v5, Table 2, 110425bb815Sopenharmony_ci * false - otherwise 111425bb815Sopenharmony_ci */ 112425bb815Sopenharmony_cibool 113425bb815Sopenharmony_cilit_char_is_white_space (lit_code_point_t c) /**< code point */ 114425bb815Sopenharmony_ci{ 115425bb815Sopenharmony_ci if (c <= LIT_UTF8_1_BYTE_CODE_POINT_MAX) 116425bb815Sopenharmony_ci { 117425bb815Sopenharmony_ci return (c == LIT_CHAR_SP || (c >= LIT_CHAR_TAB && c <= LIT_CHAR_CR)); 118425bb815Sopenharmony_ci } 119425bb815Sopenharmony_ci else 120425bb815Sopenharmony_ci { 121425bb815Sopenharmony_ci if (c == LIT_CHAR_NBSP || c == LIT_CHAR_BOM || c == LIT_CHAR_LS || c == LIT_CHAR_PS) 122425bb815Sopenharmony_ci { 123425bb815Sopenharmony_ci return true; 124425bb815Sopenharmony_ci } 125425bb815Sopenharmony_ci 126425bb815Sopenharmony_ci return (c <= LIT_UTF16_CODE_UNIT_MAX 127425bb815Sopenharmony_ci && ((c >= lit_unicode_separator_char_interval_sps[0] 128425bb815Sopenharmony_ci && c < lit_unicode_separator_char_interval_sps[0] + lit_unicode_separator_char_interval_lengths[0]) 129425bb815Sopenharmony_ci || search_char_in_char_array ((ecma_char_t) c, 130425bb815Sopenharmony_ci lit_unicode_separator_chars, 131425bb815Sopenharmony_ci NUM_OF_ELEMENTS (lit_unicode_separator_chars)))); 132425bb815Sopenharmony_ci } 133425bb815Sopenharmony_ci} /* lit_char_is_white_space */ 134425bb815Sopenharmony_ci 135425bb815Sopenharmony_ci/** 136425bb815Sopenharmony_ci * Check if specified character is one of LineTerminator characters 137425bb815Sopenharmony_ci * 138425bb815Sopenharmony_ci * @return true - if the character is one of characters, listed in ECMA-262 v5, Table 3, 139425bb815Sopenharmony_ci * false - otherwise 140425bb815Sopenharmony_ci */ 141425bb815Sopenharmony_cibool 142425bb815Sopenharmony_cilit_char_is_line_terminator (ecma_char_t c) /**< code unit */ 143425bb815Sopenharmony_ci{ 144425bb815Sopenharmony_ci return (c == LIT_CHAR_LF 145425bb815Sopenharmony_ci || c == LIT_CHAR_CR 146425bb815Sopenharmony_ci || c == LIT_CHAR_LS 147425bb815Sopenharmony_ci || c == LIT_CHAR_PS); 148425bb815Sopenharmony_ci} /* lit_char_is_line_terminator */ 149425bb815Sopenharmony_ci 150425bb815Sopenharmony_ci/** 151425bb815Sopenharmony_ci * Check if specified character is a unicode letter 152425bb815Sopenharmony_ci * 153425bb815Sopenharmony_ci * Note: 154425bb815Sopenharmony_ci * Unicode letter is a character, included into one of the following categories: 155425bb815Sopenharmony_ci * - Uppercase letter (Lu); 156425bb815Sopenharmony_ci * - Lowercase letter (Ll); 157425bb815Sopenharmony_ci * - Titlecase letter (Lt); 158425bb815Sopenharmony_ci * - Modifier letter (Lm); 159425bb815Sopenharmony_ci * - Other letter (Lo); 160425bb815Sopenharmony_ci * - Letter number (Nl). 161425bb815Sopenharmony_ci * 162425bb815Sopenharmony_ci * See also: 163425bb815Sopenharmony_ci * ECMA-262 v5, 7.6 164425bb815Sopenharmony_ci * 165425bb815Sopenharmony_ci * @return true - if specified character falls into one of the listed categories, 166425bb815Sopenharmony_ci * false - otherwise 167425bb815Sopenharmony_ci */ 168425bb815Sopenharmony_cistatic bool 169425bb815Sopenharmony_cilit_char_is_unicode_letter (ecma_char_t c) /**< code unit */ 170425bb815Sopenharmony_ci{ 171425bb815Sopenharmony_ci return (search_char_in_interval_array (c, 172425bb815Sopenharmony_ci lit_unicode_letter_interval_sps, 173425bb815Sopenharmony_ci lit_unicode_letter_interval_lengths, 174425bb815Sopenharmony_ci NUM_OF_ELEMENTS (lit_unicode_letter_interval_sps)) 175425bb815Sopenharmony_ci || search_char_in_char_array (c, lit_unicode_letter_chars, NUM_OF_ELEMENTS (lit_unicode_letter_chars))); 176425bb815Sopenharmony_ci} /* lit_char_is_unicode_letter */ 177425bb815Sopenharmony_ci 178425bb815Sopenharmony_ci/** 179425bb815Sopenharmony_ci * Check if specified character is a non-letter character and can be used as a 180425bb815Sopenharmony_ci * non-first character of an identifier. 181425bb815Sopenharmony_ci * These characters coverd by the following unicode categories: 182425bb815Sopenharmony_ci * - digit (Nd) 183425bb815Sopenharmony_ci * - punctuation mark (Mn, Mc) 184425bb815Sopenharmony_ci * - connector punctuation (Pc) 185425bb815Sopenharmony_ci * 186425bb815Sopenharmony_ci * See also: 187425bb815Sopenharmony_ci * ECMA-262 v5, 7.6 188425bb815Sopenharmony_ci * 189425bb815Sopenharmony_ci * @return true - if specified character falls into one of the listed categories, 190425bb815Sopenharmony_ci * false - otherwise 191425bb815Sopenharmony_ci */ 192425bb815Sopenharmony_cistatic bool 193425bb815Sopenharmony_cilit_char_is_unicode_non_letter_ident_part (ecma_char_t c) /**< code unit */ 194425bb815Sopenharmony_ci{ 195425bb815Sopenharmony_ci return (search_char_in_interval_array (c, 196425bb815Sopenharmony_ci lit_unicode_non_letter_ident_part_interval_sps, 197425bb815Sopenharmony_ci lit_unicode_non_letter_ident_part_interval_lengths, 198425bb815Sopenharmony_ci NUM_OF_ELEMENTS (lit_unicode_non_letter_ident_part_interval_sps)) 199425bb815Sopenharmony_ci || search_char_in_char_array (c, 200425bb815Sopenharmony_ci lit_unicode_non_letter_ident_part_chars, 201425bb815Sopenharmony_ci NUM_OF_ELEMENTS (lit_unicode_non_letter_ident_part_chars))); 202425bb815Sopenharmony_ci} /* lit_char_is_unicode_non_letter_ident_part */ 203425bb815Sopenharmony_ci 204425bb815Sopenharmony_ci/** 205425bb815Sopenharmony_ci * Checks whether the character is a valid identifier start. 206425bb815Sopenharmony_ci * 207425bb815Sopenharmony_ci * @return true if it is. 208425bb815Sopenharmony_ci */ 209425bb815Sopenharmony_cibool 210425bb815Sopenharmony_cilit_code_point_is_identifier_start (lit_code_point_t code_point) /**< code point */ 211425bb815Sopenharmony_ci{ 212425bb815Sopenharmony_ci /* Fast path for ASCII-defined letters. */ 213425bb815Sopenharmony_ci if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX) 214425bb815Sopenharmony_ci { 215425bb815Sopenharmony_ci return ((LEXER_TO_ASCII_LOWERCASE (code_point) >= LIT_CHAR_LOWERCASE_A 216425bb815Sopenharmony_ci && LEXER_TO_ASCII_LOWERCASE (code_point) <= LIT_CHAR_LOWERCASE_Z) 217425bb815Sopenharmony_ci || code_point == LIT_CHAR_DOLLAR_SIGN 218425bb815Sopenharmony_ci || code_point == LIT_CHAR_UNDERSCORE); 219425bb815Sopenharmony_ci } 220425bb815Sopenharmony_ci 221425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 222425bb815Sopenharmony_ci if (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN) 223425bb815Sopenharmony_ci { 224425bb815Sopenharmony_ci /* TODO: detect these ranges correctly. */ 225425bb815Sopenharmony_ci return (code_point >= 0x10C80 && code_point <= 0x10CF2); 226425bb815Sopenharmony_ci } 227425bb815Sopenharmony_ci#else /* !ENABLED (JERRY_ES2015) */ 228425bb815Sopenharmony_ci JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MIN); 229425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 230425bb815Sopenharmony_ci 231425bb815Sopenharmony_ci return lit_char_is_unicode_letter ((ecma_char_t) code_point); 232425bb815Sopenharmony_ci} /* lit_code_point_is_identifier_start */ 233425bb815Sopenharmony_ci 234425bb815Sopenharmony_ci/** 235425bb815Sopenharmony_ci * Checks whether the character is a valid identifier part. 236425bb815Sopenharmony_ci * 237425bb815Sopenharmony_ci * @return true if it is. 238425bb815Sopenharmony_ci */ 239425bb815Sopenharmony_cibool 240425bb815Sopenharmony_cilit_code_point_is_identifier_part (lit_code_point_t code_point) /**< code point */ 241425bb815Sopenharmony_ci{ 242425bb815Sopenharmony_ci /* Fast path for ASCII-defined letters. */ 243425bb815Sopenharmony_ci if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX) 244425bb815Sopenharmony_ci { 245425bb815Sopenharmony_ci return ((LEXER_TO_ASCII_LOWERCASE (code_point) >= LIT_CHAR_LOWERCASE_A 246425bb815Sopenharmony_ci && LEXER_TO_ASCII_LOWERCASE (code_point) <= LIT_CHAR_LOWERCASE_Z) 247425bb815Sopenharmony_ci || (code_point >= LIT_CHAR_0 && code_point <= LIT_CHAR_9) 248425bb815Sopenharmony_ci || code_point == LIT_CHAR_DOLLAR_SIGN 249425bb815Sopenharmony_ci || code_point == LIT_CHAR_UNDERSCORE); 250425bb815Sopenharmony_ci } 251425bb815Sopenharmony_ci 252425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 253425bb815Sopenharmony_ci if (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN) 254425bb815Sopenharmony_ci { 255425bb815Sopenharmony_ci /* TODO: detect these ranges correctly. */ 256425bb815Sopenharmony_ci return (code_point >= 0x10C80 && code_point <= 0x10CF2); 257425bb815Sopenharmony_ci } 258425bb815Sopenharmony_ci#else /* !ENABLED (JERRY_ES2015) */ 259425bb815Sopenharmony_ci JERRY_ASSERT (code_point <= LIT_UTF8_4_BYTE_CODE_POINT_MIN); 260425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 261425bb815Sopenharmony_ci 262425bb815Sopenharmony_ci return (lit_char_is_unicode_letter ((ecma_char_t) code_point) 263425bb815Sopenharmony_ci || lit_char_is_unicode_non_letter_ident_part ((ecma_char_t) code_point)); 264425bb815Sopenharmony_ci} /* lit_code_point_is_identifier_part */ 265425bb815Sopenharmony_ci 266425bb815Sopenharmony_ci/** 267425bb815Sopenharmony_ci * Check if specified character is one of OctalDigit characters (ECMA-262 v5, B.1.2) 268425bb815Sopenharmony_ci * 269425bb815Sopenharmony_ci * @return true / false 270425bb815Sopenharmony_ci */ 271425bb815Sopenharmony_cibool 272425bb815Sopenharmony_cilit_char_is_octal_digit (ecma_char_t c) /**< code unit */ 273425bb815Sopenharmony_ci{ 274425bb815Sopenharmony_ci return (c >= LIT_CHAR_ASCII_OCTAL_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_OCTAL_DIGITS_END); 275425bb815Sopenharmony_ci} /* lit_char_is_octal_digit */ 276425bb815Sopenharmony_ci 277425bb815Sopenharmony_ci/** 278425bb815Sopenharmony_ci * Check if specified character is one of DecimalDigit characters (ECMA-262 v5, 7.8.3) 279425bb815Sopenharmony_ci * 280425bb815Sopenharmony_ci * @return true / false 281425bb815Sopenharmony_ci */ 282425bb815Sopenharmony_cibool 283425bb815Sopenharmony_cilit_char_is_decimal_digit (ecma_char_t c) /**< code unit */ 284425bb815Sopenharmony_ci{ 285425bb815Sopenharmony_ci return (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END); 286425bb815Sopenharmony_ci} /* lit_char_is_decimal_digit */ 287425bb815Sopenharmony_ci 288425bb815Sopenharmony_ci/** 289425bb815Sopenharmony_ci * Check if specified character is one of HexDigit characters (ECMA-262 v5, 7.8.3) 290425bb815Sopenharmony_ci * 291425bb815Sopenharmony_ci * @return true / false 292425bb815Sopenharmony_ci */ 293425bb815Sopenharmony_cibool 294425bb815Sopenharmony_cilit_char_is_hex_digit (ecma_char_t c) /**< code unit */ 295425bb815Sopenharmony_ci{ 296425bb815Sopenharmony_ci return ((c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END) 297425bb815Sopenharmony_ci || (LEXER_TO_ASCII_LOWERCASE (c) >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN 298425bb815Sopenharmony_ci && LEXER_TO_ASCII_LOWERCASE (c) <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END)); 299425bb815Sopenharmony_ci} /* lit_char_is_hex_digit */ 300425bb815Sopenharmony_ci 301425bb815Sopenharmony_ci#if ENABLED (JERRY_ES2015) 302425bb815Sopenharmony_ci/** 303425bb815Sopenharmony_ci * Check if specified character is one of BinaryDigits characters (ECMA-262 v6, 11.8.3) 304425bb815Sopenharmony_ci * 305425bb815Sopenharmony_ci * @return true / false 306425bb815Sopenharmony_ci */ 307425bb815Sopenharmony_cibool 308425bb815Sopenharmony_cilit_char_is_binary_digit (ecma_char_t c) /** code unit */ 309425bb815Sopenharmony_ci{ 310425bb815Sopenharmony_ci return (c == LIT_CHAR_0 || c == LIT_CHAR_1); 311425bb815Sopenharmony_ci} /* lit_char_is_binary_digit */ 312425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_ES2015) */ 313425bb815Sopenharmony_ci 314425bb815Sopenharmony_ci/** 315425bb815Sopenharmony_ci * Convert a HexDigit character to its numeric value, as defined in ECMA-262 v5, 7.8.3 316425bb815Sopenharmony_ci * 317425bb815Sopenharmony_ci * @return digit value, corresponding to the hex char 318425bb815Sopenharmony_ci */ 319425bb815Sopenharmony_ciuint32_t 320425bb815Sopenharmony_cilit_char_hex_to_int (ecma_char_t c) /**< code unit, corresponding to 321425bb815Sopenharmony_ci * one of HexDigit characters */ 322425bb815Sopenharmony_ci{ 323425bb815Sopenharmony_ci JERRY_ASSERT (lit_char_is_hex_digit (c)); 324425bb815Sopenharmony_ci 325425bb815Sopenharmony_ci if (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END) 326425bb815Sopenharmony_ci { 327425bb815Sopenharmony_ci return (uint32_t) (c - LIT_CHAR_ASCII_DIGITS_BEGIN); 328425bb815Sopenharmony_ci } 329425bb815Sopenharmony_ci else if (c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END) 330425bb815Sopenharmony_ci { 331425bb815Sopenharmony_ci return (uint32_t) (c - LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN + 10); 332425bb815Sopenharmony_ci } 333425bb815Sopenharmony_ci else 334425bb815Sopenharmony_ci { 335425bb815Sopenharmony_ci return (uint32_t) (c - LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN + 10); 336425bb815Sopenharmony_ci } 337425bb815Sopenharmony_ci} /* lit_char_hex_to_int */ 338425bb815Sopenharmony_ci 339425bb815Sopenharmony_ci/** 340425bb815Sopenharmony_ci * Converts a character to UTF8 bytes. 341425bb815Sopenharmony_ci * 342425bb815Sopenharmony_ci * @return length of the UTF8 representation. 343425bb815Sopenharmony_ci */ 344425bb815Sopenharmony_cisize_t 345425bb815Sopenharmony_cilit_code_point_to_cesu8_bytes (uint8_t *dst_p, /**< destination buffer */ 346425bb815Sopenharmony_ci lit_code_point_t code_point) /**< code point */ 347425bb815Sopenharmony_ci{ 348425bb815Sopenharmony_ci if (code_point < LIT_UTF8_2_BYTE_CODE_POINT_MIN) 349425bb815Sopenharmony_ci { 350425bb815Sopenharmony_ci /* 00000000 0xxxxxxx -> 0xxxxxxx */ 351425bb815Sopenharmony_ci dst_p[0] = (uint8_t) code_point; 352425bb815Sopenharmony_ci return 1; 353425bb815Sopenharmony_ci } 354425bb815Sopenharmony_ci 355425bb815Sopenharmony_ci if (code_point < LIT_UTF8_3_BYTE_CODE_POINT_MIN) 356425bb815Sopenharmony_ci { 357425bb815Sopenharmony_ci /* 00000yyy yyxxxxxx -> 110yyyyy 10xxxxxx */ 358425bb815Sopenharmony_ci dst_p[0] = (uint8_t) (LIT_UTF8_2_BYTE_MARKER | ((code_point >> 6) & LIT_UTF8_LAST_5_BITS_MASK)); 359425bb815Sopenharmony_ci dst_p[1] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (code_point & LIT_UTF8_LAST_6_BITS_MASK)); 360425bb815Sopenharmony_ci return 2; 361425bb815Sopenharmony_ci } 362425bb815Sopenharmony_ci 363425bb815Sopenharmony_ci if (code_point < LIT_UTF8_4_BYTE_CODE_POINT_MIN) 364425bb815Sopenharmony_ci { 365425bb815Sopenharmony_ci /* zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx */ 366425bb815Sopenharmony_ci dst_p[0] = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | ((code_point >> 12) & LIT_UTF8_LAST_4_BITS_MASK)); 367425bb815Sopenharmony_ci dst_p[1] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | ((code_point >> 6) & LIT_UTF8_LAST_6_BITS_MASK)); 368425bb815Sopenharmony_ci dst_p[2] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (code_point & LIT_UTF8_LAST_6_BITS_MASK)); 369425bb815Sopenharmony_ci return 3; 370425bb815Sopenharmony_ci } 371425bb815Sopenharmony_ci 372425bb815Sopenharmony_ci JERRY_ASSERT (code_point <= LIT_UNICODE_CODE_POINT_MAX); 373425bb815Sopenharmony_ci 374425bb815Sopenharmony_ci code_point -= LIT_UTF8_4_BYTE_CODE_POINT_MIN; 375425bb815Sopenharmony_ci 376425bb815Sopenharmony_ci dst_p[0] = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | 0xd); 377425bb815Sopenharmony_ci dst_p[1] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | 0x20 | ((code_point >> 16) & LIT_UTF8_LAST_4_BITS_MASK)); 378425bb815Sopenharmony_ci dst_p[2] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | ((code_point >> 10) & LIT_UTF8_LAST_6_BITS_MASK)); 379425bb815Sopenharmony_ci 380425bb815Sopenharmony_ci dst_p[3] = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | 0xd); 381425bb815Sopenharmony_ci dst_p[4] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | 0x30 | ((code_point >> 6) & LIT_UTF8_LAST_4_BITS_MASK)); 382425bb815Sopenharmony_ci dst_p[5] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (code_point & LIT_UTF8_LAST_6_BITS_MASK)); 383425bb815Sopenharmony_ci 384425bb815Sopenharmony_ci return 3 * 2; 385425bb815Sopenharmony_ci} /* lit_code_point_to_cesu8_bytes */ 386425bb815Sopenharmony_ci 387425bb815Sopenharmony_ci/** 388425bb815Sopenharmony_ci * Returns the length of the UTF8 representation of a character. 389425bb815Sopenharmony_ci * 390425bb815Sopenharmony_ci * @return length of the UTF8 representation. 391425bb815Sopenharmony_ci */ 392425bb815Sopenharmony_cisize_t 393425bb815Sopenharmony_cilit_code_point_get_cesu8_length (lit_code_point_t code_point) /**< code point */ 394425bb815Sopenharmony_ci{ 395425bb815Sopenharmony_ci if (code_point < LIT_UTF8_2_BYTE_CODE_POINT_MIN) 396425bb815Sopenharmony_ci { 397425bb815Sopenharmony_ci /* 00000000 0xxxxxxx */ 398425bb815Sopenharmony_ci return 1; 399425bb815Sopenharmony_ci } 400425bb815Sopenharmony_ci 401425bb815Sopenharmony_ci if (code_point < LIT_UTF8_3_BYTE_CODE_POINT_MIN) 402425bb815Sopenharmony_ci { 403425bb815Sopenharmony_ci /* 00000yyy yyxxxxxx */ 404425bb815Sopenharmony_ci return 2; 405425bb815Sopenharmony_ci } 406425bb815Sopenharmony_ci 407425bb815Sopenharmony_ci if (code_point < LIT_UTF8_4_BYTE_CODE_POINT_MIN) 408425bb815Sopenharmony_ci { 409425bb815Sopenharmony_ci /* zzzzyyyy yyxxxxxx */ 410425bb815Sopenharmony_ci return 3; 411425bb815Sopenharmony_ci } 412425bb815Sopenharmony_ci 413425bb815Sopenharmony_ci /* high + low surrogate */ 414425bb815Sopenharmony_ci return 2 * 3; 415425bb815Sopenharmony_ci} /* lit_code_point_get_cesu8_length */ 416425bb815Sopenharmony_ci 417425bb815Sopenharmony_ci/** 418425bb815Sopenharmony_ci * Convert a four byte long utf8 character to two three byte long cesu8 characters 419425bb815Sopenharmony_ci */ 420425bb815Sopenharmony_civoid 421425bb815Sopenharmony_cilit_four_byte_utf8_char_to_cesu8 (uint8_t *dst_p, /**< destination buffer */ 422425bb815Sopenharmony_ci const uint8_t *source_p) /**< source buffer */ 423425bb815Sopenharmony_ci{ 424425bb815Sopenharmony_ci lit_code_point_t code_point = ((((uint32_t) source_p[0]) & LIT_UTF8_LAST_3_BITS_MASK) << 18); 425425bb815Sopenharmony_ci code_point |= ((((uint32_t) source_p[1]) & LIT_UTF8_LAST_6_BITS_MASK) << 12); 426425bb815Sopenharmony_ci code_point |= ((((uint32_t) source_p[2]) & LIT_UTF8_LAST_6_BITS_MASK) << 6); 427425bb815Sopenharmony_ci code_point |= (((uint32_t) source_p[3]) & LIT_UTF8_LAST_6_BITS_MASK); 428425bb815Sopenharmony_ci 429425bb815Sopenharmony_ci lit_code_point_to_cesu8_bytes (dst_p, code_point); 430425bb815Sopenharmony_ci} /* lit_four_byte_utf8_char_to_cesu8 */ 431425bb815Sopenharmony_ci 432425bb815Sopenharmony_ci/** 433425bb815Sopenharmony_ci * Lookup hex digits in a buffer 434425bb815Sopenharmony_ci * 435425bb815Sopenharmony_ci * @return UINT32_MAX - if next 'lookup' number of characters do not form a valid hex number 436425bb815Sopenharmony_ci * value of hex number, otherwise 437425bb815Sopenharmony_ci */ 438425bb815Sopenharmony_ciuint32_t 439425bb815Sopenharmony_cilit_char_hex_lookup (const lit_utf8_byte_t *buf_p, /**< buffer */ 440425bb815Sopenharmony_ci const lit_utf8_byte_t *const buf_end_p, /**< buffer end */ 441425bb815Sopenharmony_ci uint32_t lookup) /**< size of lookup */ 442425bb815Sopenharmony_ci{ 443425bb815Sopenharmony_ci JERRY_ASSERT (lookup <= 4); 444425bb815Sopenharmony_ci 445425bb815Sopenharmony_ci if (JERRY_UNLIKELY (buf_p + lookup > buf_end_p)) 446425bb815Sopenharmony_ci { 447425bb815Sopenharmony_ci return UINT32_MAX; 448425bb815Sopenharmony_ci } 449425bb815Sopenharmony_ci 450425bb815Sopenharmony_ci uint32_t value = 0; 451425bb815Sopenharmony_ci 452425bb815Sopenharmony_ci while (lookup--) 453425bb815Sopenharmony_ci { 454425bb815Sopenharmony_ci lit_utf8_byte_t ch = *buf_p++; 455425bb815Sopenharmony_ci if (!lit_char_is_hex_digit (ch)) 456425bb815Sopenharmony_ci { 457425bb815Sopenharmony_ci return UINT32_MAX; 458425bb815Sopenharmony_ci } 459425bb815Sopenharmony_ci 460425bb815Sopenharmony_ci value <<= 4; 461425bb815Sopenharmony_ci value += lit_char_hex_to_int (ch); 462425bb815Sopenharmony_ci } 463425bb815Sopenharmony_ci 464425bb815Sopenharmony_ci JERRY_ASSERT (value <= LIT_UTF16_CODE_UNIT_MAX); 465425bb815Sopenharmony_ci return value; 466425bb815Sopenharmony_ci} /* lit_char_hex_lookup */ 467425bb815Sopenharmony_ci 468425bb815Sopenharmony_ci/** 469425bb815Sopenharmony_ci * Parse a decimal number with the value clamped to UINT32_MAX. 470425bb815Sopenharmony_ci * 471425bb815Sopenharmony_ci * @returns uint32_t number 472425bb815Sopenharmony_ci */ 473425bb815Sopenharmony_ciuint32_t 474425bb815Sopenharmony_cilit_parse_decimal (const lit_utf8_byte_t **buffer_p, /**< [in/out] character buffer */ 475425bb815Sopenharmony_ci const lit_utf8_byte_t *buffer_end_p) /**< buffer end */ 476425bb815Sopenharmony_ci{ 477425bb815Sopenharmony_ci const lit_utf8_byte_t *current_p = *buffer_p; 478425bb815Sopenharmony_ci JERRY_ASSERT (lit_char_is_decimal_digit (*current_p)); 479425bb815Sopenharmony_ci 480425bb815Sopenharmony_ci uint32_t value = (uint32_t) (*current_p++ - LIT_CHAR_0); 481425bb815Sopenharmony_ci 482425bb815Sopenharmony_ci while (current_p < buffer_end_p && lit_char_is_decimal_digit (*current_p)) 483425bb815Sopenharmony_ci { 484425bb815Sopenharmony_ci const uint32_t digit = (uint32_t) (*current_p++ - LIT_CHAR_0); 485425bb815Sopenharmony_ci uint32_t new_value = value * 10 + digit; 486425bb815Sopenharmony_ci 487425bb815Sopenharmony_ci if (JERRY_UNLIKELY (value > UINT32_MAX / 10) || JERRY_UNLIKELY (new_value < value)) 488425bb815Sopenharmony_ci { 489425bb815Sopenharmony_ci value = UINT32_MAX; 490425bb815Sopenharmony_ci continue; 491425bb815Sopenharmony_ci } 492425bb815Sopenharmony_ci 493425bb815Sopenharmony_ci value = new_value; 494425bb815Sopenharmony_ci } 495425bb815Sopenharmony_ci 496425bb815Sopenharmony_ci *buffer_p = current_p; 497425bb815Sopenharmony_ci return value; 498425bb815Sopenharmony_ci} /* lit_parse_decimal */ 499425bb815Sopenharmony_ci 500425bb815Sopenharmony_ci/** 501425bb815Sopenharmony_ci * Check if specified character is a word character (part of IsWordChar abstract operation) 502425bb815Sopenharmony_ci * 503425bb815Sopenharmony_ci * See also: ECMA-262 v5, 15.10.2.6 (IsWordChar) 504425bb815Sopenharmony_ci * 505425bb815Sopenharmony_ci * @return true - if the character is a word character 506425bb815Sopenharmony_ci * false - otherwise 507425bb815Sopenharmony_ci */ 508425bb815Sopenharmony_cibool 509425bb815Sopenharmony_cilit_char_is_word_char (lit_code_point_t c) /**< code point */ 510425bb815Sopenharmony_ci{ 511425bb815Sopenharmony_ci return ((c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END) 512425bb815Sopenharmony_ci || (c >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END) 513425bb815Sopenharmony_ci || (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END) 514425bb815Sopenharmony_ci || c == LIT_CHAR_UNDERSCORE); 515425bb815Sopenharmony_ci} /* lit_char_is_word_char */ 516425bb815Sopenharmony_ci 517425bb815Sopenharmony_ci#if ENABLED (JERRY_UNICODE_CASE_CONVERSION) 518425bb815Sopenharmony_ci 519425bb815Sopenharmony_ci/** 520425bb815Sopenharmony_ci * Check if the specified character is in one of those tables which contain bidirectional conversions. 521425bb815Sopenharmony_ci * 522425bb815Sopenharmony_ci * @return the mapped character sequence of an ecma character, if it's in the table. 523425bb815Sopenharmony_ci * 0 - otherwise. 524425bb815Sopenharmony_ci */ 525425bb815Sopenharmony_cistatic ecma_length_t 526425bb815Sopenharmony_cisearch_in_bidirectional_conversion_tables (ecma_char_t character, /**< code unit */ 527425bb815Sopenharmony_ci ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */ 528425bb815Sopenharmony_ci bool is_lowercase) /**< is lowercase conversion */ 529425bb815Sopenharmony_ci{ 530425bb815Sopenharmony_ci /* 1, Check if the specified character is part of the lit_character_case_ranges table. */ 531425bb815Sopenharmony_ci int number_of_case_ranges = NUM_OF_ELEMENTS (lit_character_case_ranges); 532425bb815Sopenharmony_ci int conv_counter = 0; 533425bb815Sopenharmony_ci 534425bb815Sopenharmony_ci for (int i = 0; i < number_of_case_ranges; i++) 535425bb815Sopenharmony_ci { 536425bb815Sopenharmony_ci if (i % 2 == 0 && i > 0) 537425bb815Sopenharmony_ci { 538425bb815Sopenharmony_ci conv_counter++; 539425bb815Sopenharmony_ci } 540425bb815Sopenharmony_ci 541425bb815Sopenharmony_ci int range_length = lit_character_case_range_lengths[conv_counter]; 542425bb815Sopenharmony_ci ecma_char_t start_point = lit_character_case_ranges[i]; 543425bb815Sopenharmony_ci 544425bb815Sopenharmony_ci if (start_point > character || character >= start_point + range_length) 545425bb815Sopenharmony_ci { 546425bb815Sopenharmony_ci continue; 547425bb815Sopenharmony_ci } 548425bb815Sopenharmony_ci 549425bb815Sopenharmony_ci int char_dist = character - start_point; 550425bb815Sopenharmony_ci 551425bb815Sopenharmony_ci if (i % 2 == 0) 552425bb815Sopenharmony_ci { 553425bb815Sopenharmony_ci output_buffer_p[0] = is_lowercase ? (ecma_char_t) (lit_character_case_ranges[i + 1] + char_dist) : character; 554425bb815Sopenharmony_ci } 555425bb815Sopenharmony_ci else 556425bb815Sopenharmony_ci { 557425bb815Sopenharmony_ci output_buffer_p[0] = is_lowercase ? character : (ecma_char_t) (lit_character_case_ranges[i - 1] + char_dist); 558425bb815Sopenharmony_ci } 559425bb815Sopenharmony_ci 560425bb815Sopenharmony_ci return 1; 561425bb815Sopenharmony_ci } 562425bb815Sopenharmony_ci 563425bb815Sopenharmony_ci /* 2, Check if the specified character is part of the character_pair_ranges table. */ 564425bb815Sopenharmony_ci int bottom = 0; 565425bb815Sopenharmony_ci int top = NUM_OF_ELEMENTS (lit_character_pair_ranges) - 1; 566425bb815Sopenharmony_ci 567425bb815Sopenharmony_ci while (bottom <= top) 568425bb815Sopenharmony_ci { 569425bb815Sopenharmony_ci int middle = (bottom + top) / 2; 570425bb815Sopenharmony_ci ecma_char_t current_sp = lit_character_pair_ranges[middle]; 571425bb815Sopenharmony_ci 572425bb815Sopenharmony_ci if (current_sp <= character && character < current_sp + lit_character_pair_range_lengths[middle]) 573425bb815Sopenharmony_ci { 574425bb815Sopenharmony_ci int char_dist = character - current_sp; 575425bb815Sopenharmony_ci 576425bb815Sopenharmony_ci if ((character - current_sp) % 2 == 0) 577425bb815Sopenharmony_ci { 578425bb815Sopenharmony_ci output_buffer_p[0] = is_lowercase ? (ecma_char_t) (current_sp + char_dist + 1) : character; 579425bb815Sopenharmony_ci } 580425bb815Sopenharmony_ci else 581425bb815Sopenharmony_ci { 582425bb815Sopenharmony_ci output_buffer_p[0] = is_lowercase ? character : (ecma_char_t) (current_sp + char_dist - 1); 583425bb815Sopenharmony_ci } 584425bb815Sopenharmony_ci 585425bb815Sopenharmony_ci return 1; 586425bb815Sopenharmony_ci } 587425bb815Sopenharmony_ci 588425bb815Sopenharmony_ci if (character > current_sp) 589425bb815Sopenharmony_ci { 590425bb815Sopenharmony_ci bottom = middle + 1; 591425bb815Sopenharmony_ci } 592425bb815Sopenharmony_ci else 593425bb815Sopenharmony_ci { 594425bb815Sopenharmony_ci top = middle - 1; 595425bb815Sopenharmony_ci } 596425bb815Sopenharmony_ci } 597425bb815Sopenharmony_ci 598425bb815Sopenharmony_ci /* 3, Check if the specified character is part of the character_pairs table. */ 599425bb815Sopenharmony_ci int number_of_character_pairs = NUM_OF_ELEMENTS (lit_character_pairs); 600425bb815Sopenharmony_ci 601425bb815Sopenharmony_ci for (int i = 0; i < number_of_character_pairs; i++) 602425bb815Sopenharmony_ci { 603425bb815Sopenharmony_ci if (character != lit_character_pairs[i]) 604425bb815Sopenharmony_ci { 605425bb815Sopenharmony_ci continue; 606425bb815Sopenharmony_ci } 607425bb815Sopenharmony_ci 608425bb815Sopenharmony_ci if (i % 2 == 0) 609425bb815Sopenharmony_ci { 610425bb815Sopenharmony_ci output_buffer_p[0] = is_lowercase ? lit_character_pairs[i + 1] : character; 611425bb815Sopenharmony_ci } 612425bb815Sopenharmony_ci else 613425bb815Sopenharmony_ci { 614425bb815Sopenharmony_ci output_buffer_p[0] = is_lowercase ? character : lit_character_pairs[i - 1]; 615425bb815Sopenharmony_ci } 616425bb815Sopenharmony_ci 617425bb815Sopenharmony_ci return 1; 618425bb815Sopenharmony_ci } 619425bb815Sopenharmony_ci 620425bb815Sopenharmony_ci return 0; 621425bb815Sopenharmony_ci} /* search_in_bidirectional_conversion_tables */ 622425bb815Sopenharmony_ci 623425bb815Sopenharmony_ci/** 624425bb815Sopenharmony_ci * Check if the specified character is in the given conversion table. 625425bb815Sopenharmony_ci * 626425bb815Sopenharmony_ci * @return the mapped character sequence of an ecma character, if it's in the table. 627425bb815Sopenharmony_ci * 0 - otherwise. 628425bb815Sopenharmony_ci */ 629425bb815Sopenharmony_cistatic ecma_length_t 630425bb815Sopenharmony_cisearch_in_conversion_table (ecma_char_t character, /**< code unit */ 631425bb815Sopenharmony_ci ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */ 632425bb815Sopenharmony_ci const ecma_char_t *array, /**< array */ 633425bb815Sopenharmony_ci const uint8_t *counters) /**< case_values counter */ 634425bb815Sopenharmony_ci{ 635425bb815Sopenharmony_ci int end_point = 0; 636425bb815Sopenharmony_ci 637425bb815Sopenharmony_ci for (int i = 0; i < 3; i++) 638425bb815Sopenharmony_ci { 639425bb815Sopenharmony_ci int start_point = end_point; 640425bb815Sopenharmony_ci int size_of_case_value = i + 1; 641425bb815Sopenharmony_ci end_point += counters[i] * (size_of_case_value + 1); 642425bb815Sopenharmony_ci 643425bb815Sopenharmony_ci int bottom = start_point; 644425bb815Sopenharmony_ci int top = end_point - size_of_case_value; 645425bb815Sopenharmony_ci 646425bb815Sopenharmony_ci while (bottom <= top) 647425bb815Sopenharmony_ci { 648425bb815Sopenharmony_ci int middle = (bottom + top) / 2; 649425bb815Sopenharmony_ci 650425bb815Sopenharmony_ci middle -= ((middle - bottom) % (size_of_case_value + 1)); 651425bb815Sopenharmony_ci 652425bb815Sopenharmony_ci ecma_char_t current = array[middle]; 653425bb815Sopenharmony_ci 654425bb815Sopenharmony_ci if (current == character) 655425bb815Sopenharmony_ci { 656425bb815Sopenharmony_ci ecma_length_t char_sequence = 1; 657425bb815Sopenharmony_ci 658425bb815Sopenharmony_ci switch (size_of_case_value) 659425bb815Sopenharmony_ci { 660425bb815Sopenharmony_ci case 3: 661425bb815Sopenharmony_ci { 662425bb815Sopenharmony_ci output_buffer_p[2] = array[middle + 3]; 663425bb815Sopenharmony_ci char_sequence++; 664425bb815Sopenharmony_ci /* FALLTHRU */ 665425bb815Sopenharmony_ci } 666425bb815Sopenharmony_ci case 2: 667425bb815Sopenharmony_ci { 668425bb815Sopenharmony_ci output_buffer_p[1] = array[middle + 2]; 669425bb815Sopenharmony_ci char_sequence++; 670425bb815Sopenharmony_ci /* FALLTHRU */ 671425bb815Sopenharmony_ci } 672425bb815Sopenharmony_ci default: 673425bb815Sopenharmony_ci { 674425bb815Sopenharmony_ci output_buffer_p[0] = array[middle + 1]; 675425bb815Sopenharmony_ci return char_sequence; 676425bb815Sopenharmony_ci } 677425bb815Sopenharmony_ci } 678425bb815Sopenharmony_ci } 679425bb815Sopenharmony_ci 680425bb815Sopenharmony_ci if (character < current) 681425bb815Sopenharmony_ci { 682425bb815Sopenharmony_ci top = middle - (size_of_case_value + 1); 683425bb815Sopenharmony_ci } 684425bb815Sopenharmony_ci else 685425bb815Sopenharmony_ci { 686425bb815Sopenharmony_ci bottom = middle + (size_of_case_value + 1); 687425bb815Sopenharmony_ci } 688425bb815Sopenharmony_ci } 689425bb815Sopenharmony_ci } 690425bb815Sopenharmony_ci 691425bb815Sopenharmony_ci return 0; 692425bb815Sopenharmony_ci} /* search_in_conversion_table */ 693425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */ 694425bb815Sopenharmony_ci 695425bb815Sopenharmony_ci/** 696425bb815Sopenharmony_ci * Returns the lowercase character sequence of an ecma character. 697425bb815Sopenharmony_ci * 698425bb815Sopenharmony_ci * Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters. 699425bb815Sopenharmony_ci * 700425bb815Sopenharmony_ci * @return the length of the lowercase character sequence 701425bb815Sopenharmony_ci * which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH. 702425bb815Sopenharmony_ci */ 703425bb815Sopenharmony_ciecma_length_t 704425bb815Sopenharmony_cilit_char_to_lower_case (ecma_char_t character, /**< input character value */ 705425bb815Sopenharmony_ci ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */ 706425bb815Sopenharmony_ci ecma_length_t buffer_size) /**< buffer size */ 707425bb815Sopenharmony_ci{ 708425bb815Sopenharmony_ci JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH); 709425bb815Sopenharmony_ci 710425bb815Sopenharmony_ci if (character >= LIT_CHAR_UPPERCASE_A && character <= LIT_CHAR_UPPERCASE_Z) 711425bb815Sopenharmony_ci { 712425bb815Sopenharmony_ci output_buffer_p[0] = (ecma_char_t) (character + (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A)); 713425bb815Sopenharmony_ci return 1; 714425bb815Sopenharmony_ci } 715425bb815Sopenharmony_ci 716425bb815Sopenharmony_ci#if ENABLED (JERRY_UNICODE_CASE_CONVERSION) 717425bb815Sopenharmony_ci 718425bb815Sopenharmony_ci ecma_length_t lowercase_sequence = search_in_bidirectional_conversion_tables (character, output_buffer_p, true); 719425bb815Sopenharmony_ci 720425bb815Sopenharmony_ci if (lowercase_sequence != 0) 721425bb815Sopenharmony_ci { 722425bb815Sopenharmony_ci return lowercase_sequence; 723425bb815Sopenharmony_ci } 724425bb815Sopenharmony_ci 725425bb815Sopenharmony_ci int num_of_lowercase_ranges = NUM_OF_ELEMENTS (lit_lower_case_ranges); 726425bb815Sopenharmony_ci 727425bb815Sopenharmony_ci for (int i = 0, j = 0; i < num_of_lowercase_ranges; i += 2, j++) 728425bb815Sopenharmony_ci { 729425bb815Sopenharmony_ci int range_length = lit_lower_case_range_lengths[j] - 1; 730425bb815Sopenharmony_ci ecma_char_t start_point = lit_lower_case_ranges[i]; 731425bb815Sopenharmony_ci 732425bb815Sopenharmony_ci if (start_point <= character && character <= start_point + range_length) 733425bb815Sopenharmony_ci { 734425bb815Sopenharmony_ci output_buffer_p[0] = (ecma_char_t) (lit_lower_case_ranges[i + 1] + (character - start_point)); 735425bb815Sopenharmony_ci return 1; 736425bb815Sopenharmony_ci } 737425bb815Sopenharmony_ci } 738425bb815Sopenharmony_ci 739425bb815Sopenharmony_ci lowercase_sequence = search_in_conversion_table (character, 740425bb815Sopenharmony_ci output_buffer_p, 741425bb815Sopenharmony_ci lit_lower_case_conversions, 742425bb815Sopenharmony_ci lit_lower_case_conversion_counters); 743425bb815Sopenharmony_ci 744425bb815Sopenharmony_ci if (lowercase_sequence != 0) 745425bb815Sopenharmony_ci { 746425bb815Sopenharmony_ci return lowercase_sequence; 747425bb815Sopenharmony_ci } 748425bb815Sopenharmony_ci 749425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */ 750425bb815Sopenharmony_ci 751425bb815Sopenharmony_ci output_buffer_p[0] = character; 752425bb815Sopenharmony_ci return 1; 753425bb815Sopenharmony_ci} /* lit_char_to_lower_case */ 754425bb815Sopenharmony_ci 755425bb815Sopenharmony_ci/** 756425bb815Sopenharmony_ci * Returns the uppercase character sequence of an ecma character. 757425bb815Sopenharmony_ci * 758425bb815Sopenharmony_ci * Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters. 759425bb815Sopenharmony_ci * 760425bb815Sopenharmony_ci * @return the length of the uppercase character sequence 761425bb815Sopenharmony_ci * which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH. 762425bb815Sopenharmony_ci */ 763425bb815Sopenharmony_ciecma_length_t 764425bb815Sopenharmony_cilit_char_to_upper_case (ecma_char_t character, /**< input character value */ 765425bb815Sopenharmony_ci ecma_char_t *output_buffer_p, /**< buffer for the result characters */ 766425bb815Sopenharmony_ci ecma_length_t buffer_size) /**< buffer size */ 767425bb815Sopenharmony_ci{ 768425bb815Sopenharmony_ci JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH); 769425bb815Sopenharmony_ci 770425bb815Sopenharmony_ci if (character >= LIT_CHAR_LOWERCASE_A && character <= LIT_CHAR_LOWERCASE_Z) 771425bb815Sopenharmony_ci { 772425bb815Sopenharmony_ci output_buffer_p[0] = (ecma_char_t) (character - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A)); 773425bb815Sopenharmony_ci return 1; 774425bb815Sopenharmony_ci } 775425bb815Sopenharmony_ci 776425bb815Sopenharmony_ci#if ENABLED (JERRY_UNICODE_CASE_CONVERSION) 777425bb815Sopenharmony_ci 778425bb815Sopenharmony_ci ecma_length_t uppercase_sequence = search_in_bidirectional_conversion_tables (character, output_buffer_p, false); 779425bb815Sopenharmony_ci 780425bb815Sopenharmony_ci if (uppercase_sequence != 0) 781425bb815Sopenharmony_ci { 782425bb815Sopenharmony_ci return uppercase_sequence; 783425bb815Sopenharmony_ci } 784425bb815Sopenharmony_ci 785425bb815Sopenharmony_ci int num_of_upper_case_special_ranges = NUM_OF_ELEMENTS (lit_upper_case_special_ranges); 786425bb815Sopenharmony_ci 787425bb815Sopenharmony_ci for (int i = 0, j = 0; i < num_of_upper_case_special_ranges; i += 3, j++) 788425bb815Sopenharmony_ci { 789425bb815Sopenharmony_ci int range_length = lit_upper_case_special_range_lengths[j]; 790425bb815Sopenharmony_ci ecma_char_t start_point = lit_upper_case_special_ranges[i]; 791425bb815Sopenharmony_ci 792425bb815Sopenharmony_ci if (start_point <= character && character <= start_point + range_length) 793425bb815Sopenharmony_ci { 794425bb815Sopenharmony_ci output_buffer_p[0] = (ecma_char_t) (lit_upper_case_special_ranges[i + 1] + (character - start_point)); 795425bb815Sopenharmony_ci output_buffer_p[1] = (ecma_char_t) (lit_upper_case_special_ranges[i + 2]); 796425bb815Sopenharmony_ci return 2; 797425bb815Sopenharmony_ci } 798425bb815Sopenharmony_ci } 799425bb815Sopenharmony_ci 800425bb815Sopenharmony_ci uppercase_sequence = search_in_conversion_table (character, 801425bb815Sopenharmony_ci output_buffer_p, 802425bb815Sopenharmony_ci lit_upper_case_conversions, 803425bb815Sopenharmony_ci lit_upper_case_conversion_counters); 804425bb815Sopenharmony_ci 805425bb815Sopenharmony_ci if (uppercase_sequence != 0) 806425bb815Sopenharmony_ci { 807425bb815Sopenharmony_ci return uppercase_sequence; 808425bb815Sopenharmony_ci } 809425bb815Sopenharmony_ci 810425bb815Sopenharmony_ci#endif /* ENABLED (JERRY_UNICODE_CASE_CONVERSION) */ 811425bb815Sopenharmony_ci 812425bb815Sopenharmony_ci output_buffer_p[0] = character; 813425bb815Sopenharmony_ci return 1; 814425bb815Sopenharmony_ci} /* lit_char_to_upper_case */ 815