19596a2c1Sopenharmony_ci/* 29596a2c1Sopenharmony_ci * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 39596a2c1Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License"); 49596a2c1Sopenharmony_ci * you may not use this file except in compliance with the License. 59596a2c1Sopenharmony_ci * You may obtain a copy of the License at 69596a2c1Sopenharmony_ci * 79596a2c1Sopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0 89596a2c1Sopenharmony_ci * 99596a2c1Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software 109596a2c1Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS, 119596a2c1Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 129596a2c1Sopenharmony_ci * See the License for the specific language governing permissions and 139596a2c1Sopenharmony_ci * limitations under the License. 149596a2c1Sopenharmony_ci */ 159596a2c1Sopenharmony_ci#include "character.h" 169596a2c1Sopenharmony_ci 179596a2c1Sopenharmony_ci#include <sys/types.h> 189596a2c1Sopenharmony_ci#include <set> 199596a2c1Sopenharmony_ci#include <string> 209596a2c1Sopenharmony_ci 219596a2c1Sopenharmony_ci#include "cctype" 229596a2c1Sopenharmony_ci#include "map" 239596a2c1Sopenharmony_ci#include "string" 249596a2c1Sopenharmony_ci#include "unicode/umachine.h" 259596a2c1Sopenharmony_ci#include "unicode/unistr.h" 269596a2c1Sopenharmony_ci#include "unicode/urename.h" 279596a2c1Sopenharmony_ci 289596a2c1Sopenharmony_cinamespace OHOS { 299596a2c1Sopenharmony_cinamespace Global { 309596a2c1Sopenharmony_cinamespace I18n { 319596a2c1Sopenharmony_cistatic std::set<UCharDirection> RTLDirectionSet = { 329596a2c1Sopenharmony_ci U_RIGHT_TO_LEFT, 339596a2c1Sopenharmony_ci U_RIGHT_TO_LEFT_ARABIC, 349596a2c1Sopenharmony_ci U_RIGHT_TO_LEFT_EMBEDDING, 359596a2c1Sopenharmony_ci U_RIGHT_TO_LEFT_OVERRIDE, 369596a2c1Sopenharmony_ci U_RIGHT_TO_LEFT_ISOLATE 379596a2c1Sopenharmony_ci}; 389596a2c1Sopenharmony_ci 399596a2c1Sopenharmony_cibool IsDigit(const std::string &character) 409596a2c1Sopenharmony_ci{ 419596a2c1Sopenharmony_ci icu::UnicodeString unicodeString(character.c_str()); 429596a2c1Sopenharmony_ci UChar32 char32 = unicodeString.char32At(0); 439596a2c1Sopenharmony_ci return u_isdigit(char32); 449596a2c1Sopenharmony_ci} 459596a2c1Sopenharmony_ci 469596a2c1Sopenharmony_cibool IsSpaceChar(const std::string &character) 479596a2c1Sopenharmony_ci{ 489596a2c1Sopenharmony_ci icu::UnicodeString unicodeString(character.c_str()); 499596a2c1Sopenharmony_ci UChar32 char32 = unicodeString.char32At(0); 509596a2c1Sopenharmony_ci return u_isJavaSpaceChar(char32); 519596a2c1Sopenharmony_ci} 529596a2c1Sopenharmony_ci 539596a2c1Sopenharmony_cibool IsWhiteSpace(const std::string &character) 549596a2c1Sopenharmony_ci{ 559596a2c1Sopenharmony_ci icu::UnicodeString unicodeString(character.c_str()); 569596a2c1Sopenharmony_ci UChar32 char32 = unicodeString.char32At(0); 579596a2c1Sopenharmony_ci return u_isWhitespace(char32); 589596a2c1Sopenharmony_ci} 599596a2c1Sopenharmony_ci 609596a2c1Sopenharmony_cibool IsRTLCharacter(const std::string &character) 619596a2c1Sopenharmony_ci{ 629596a2c1Sopenharmony_ci icu::UnicodeString unicodeString(character.c_str()); 639596a2c1Sopenharmony_ci UChar32 char32 = unicodeString.char32At(0); 649596a2c1Sopenharmony_ci UCharDirection direction = u_charDirection(char32); 659596a2c1Sopenharmony_ci if (RTLDirectionSet.find(direction) != RTLDirectionSet.end()) { 669596a2c1Sopenharmony_ci return true; 679596a2c1Sopenharmony_ci } 689596a2c1Sopenharmony_ci return false; 699596a2c1Sopenharmony_ci} 709596a2c1Sopenharmony_ci 719596a2c1Sopenharmony_cibool IsIdeoGraphic(const std::string &character) 729596a2c1Sopenharmony_ci{ 739596a2c1Sopenharmony_ci icu::UnicodeString unicodeString(character.c_str()); 749596a2c1Sopenharmony_ci UChar32 char32 = unicodeString.char32At(0); 759596a2c1Sopenharmony_ci return u_hasBinaryProperty(char32, UCHAR_IDEOGRAPHIC); 769596a2c1Sopenharmony_ci} 779596a2c1Sopenharmony_ci 789596a2c1Sopenharmony_cibool IsLetter(const std::string &character) 799596a2c1Sopenharmony_ci{ 809596a2c1Sopenharmony_ci icu::UnicodeString unicodeString(character.c_str()); 819596a2c1Sopenharmony_ci UChar32 char32 = unicodeString.char32At(0); 829596a2c1Sopenharmony_ci return isalpha(char32); 839596a2c1Sopenharmony_ci} 849596a2c1Sopenharmony_ci 859596a2c1Sopenharmony_cibool IsLowerCase(const std::string &character) 869596a2c1Sopenharmony_ci{ 879596a2c1Sopenharmony_ci icu::UnicodeString unicodeString(character.c_str()); 889596a2c1Sopenharmony_ci UChar32 char32 = unicodeString.char32At(0); 899596a2c1Sopenharmony_ci return u_islower(char32); 909596a2c1Sopenharmony_ci} 919596a2c1Sopenharmony_ci 929596a2c1Sopenharmony_cibool IsUpperCase(const std::string &character) 939596a2c1Sopenharmony_ci{ 949596a2c1Sopenharmony_ci icu::UnicodeString unicodeString(character.c_str()); 959596a2c1Sopenharmony_ci UChar32 char32 = unicodeString.char32At(0); 969596a2c1Sopenharmony_ci return u_isupper(char32); 979596a2c1Sopenharmony_ci} 989596a2c1Sopenharmony_ci 999596a2c1Sopenharmony_cistd::map<UCharCategory, std::string> categoryMap = { 1009596a2c1Sopenharmony_ci { U_UNASSIGNED, "U_UNASSIGNED" }, 1019596a2c1Sopenharmony_ci { U_GENERAL_OTHER_TYPES, "U_GENERAL_OTHER_TYPES" }, 1029596a2c1Sopenharmony_ci { U_UPPERCASE_LETTER, "U_UPPERCASE_LETTER" }, 1039596a2c1Sopenharmony_ci { U_LOWERCASE_LETTER, "U_LOWERCASE_LETTER" }, 1049596a2c1Sopenharmony_ci { U_TITLECASE_LETTER, "U_TITLECASE_LETTER" }, 1059596a2c1Sopenharmony_ci { U_MODIFIER_LETTER, "U_MODIFIER_LETTER" }, 1069596a2c1Sopenharmony_ci { U_OTHER_LETTER, "U_OTHER_LETTER" }, 1079596a2c1Sopenharmony_ci { U_NON_SPACING_MARK, "U_NON_SPACING_MARK" }, 1089596a2c1Sopenharmony_ci { U_ENCLOSING_MARK, "U_ENCLOSING_MARK" }, 1099596a2c1Sopenharmony_ci { U_COMBINING_SPACING_MARK, "U_COMBINING_SPACING_MARK" }, 1109596a2c1Sopenharmony_ci { U_DECIMAL_DIGIT_NUMBER, "U_DECIMAL_DIGIT_NUMBER" }, 1119596a2c1Sopenharmony_ci { U_LETTER_NUMBER, "U_LETTER_NUMBER" }, 1129596a2c1Sopenharmony_ci { U_OTHER_NUMBER, "U_OTHER_NUMBER" }, 1139596a2c1Sopenharmony_ci { U_SPACE_SEPARATOR, "U_SPACE_SEPARATOR" }, 1149596a2c1Sopenharmony_ci { U_LINE_SEPARATOR, "U_LINE_SEPARATOR" }, 1159596a2c1Sopenharmony_ci { U_PARAGRAPH_SEPARATOR, "U_PARAGRAPH_SEPARATOR" }, 1169596a2c1Sopenharmony_ci { U_CONTROL_CHAR, "U_CONTROL_CHAR" }, 1179596a2c1Sopenharmony_ci { U_FORMAT_CHAR, "U_FORMAT_CHAR" }, 1189596a2c1Sopenharmony_ci { U_PRIVATE_USE_CHAR, "U_PRIVATE_USE_CHAR" }, 1199596a2c1Sopenharmony_ci { U_SURROGATE, "U_SURROGATE" }, 1209596a2c1Sopenharmony_ci { U_DASH_PUNCTUATION, "U_DASH_PUNCTUATION" }, 1219596a2c1Sopenharmony_ci { U_START_PUNCTUATION, "U_START_PUNCTUATION" }, 1229596a2c1Sopenharmony_ci { U_END_PUNCTUATION, "U_END_PUNCTUATION" }, 1239596a2c1Sopenharmony_ci { U_CONNECTOR_PUNCTUATION, "U_CONNECTOR_PUNCTUATION" }, 1249596a2c1Sopenharmony_ci { U_OTHER_PUNCTUATION, "U_OTHER_PUNCTUATION" }, 1259596a2c1Sopenharmony_ci { U_MATH_SYMBOL, "U_MATH_SYMBOL" }, 1269596a2c1Sopenharmony_ci { U_CURRENCY_SYMBOL, "U_CURRENCY_SYMBOL" }, 1279596a2c1Sopenharmony_ci { U_MODIFIER_SYMBOL, "U_MODIFIER_SYMBOL" }, 1289596a2c1Sopenharmony_ci { U_OTHER_SYMBOL, "U_OTHER_SYMBOL" }, 1299596a2c1Sopenharmony_ci { U_INITIAL_PUNCTUATION, "U_INITIAL_PUNCTUATION" }, 1309596a2c1Sopenharmony_ci { U_FINAL_PUNCTUATION, "U_FINAL_PUNCTUATION" }, 1319596a2c1Sopenharmony_ci { U_CHAR_CATEGORY_COUNT, "U_CHAR_CATEGORY_COUNT" }, 1329596a2c1Sopenharmony_ci}; 1339596a2c1Sopenharmony_ci 1349596a2c1Sopenharmony_cistd::string GetType(const std::string &character) 1359596a2c1Sopenharmony_ci{ 1369596a2c1Sopenharmony_ci icu::UnicodeString unicodeString(character.c_str()); 1379596a2c1Sopenharmony_ci UChar32 char32 = unicodeString.char32At(0); 1389596a2c1Sopenharmony_ci int8_t category = u_charType(char32); 1399596a2c1Sopenharmony_ci return categoryMap[UCharCategory(category)]; 1409596a2c1Sopenharmony_ci} 1419596a2c1Sopenharmony_ci} // namespace I18n 1429596a2c1Sopenharmony_ci} // namespace Global 1439596a2c1Sopenharmony_ci} // namespace OHOS