19596a2c1Sopenharmony_ci/*
29596a2c1Sopenharmony_ci * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
39596a2c1Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License");
49596a2c1Sopenharmony_ci * you may not use this file except in compliance with the License.
59596a2c1Sopenharmony_ci * You may obtain a copy of the License at
69596a2c1Sopenharmony_ci *
79596a2c1Sopenharmony_ci *     http://www.apache.org/licenses/LICENSE-2.0
89596a2c1Sopenharmony_ci *
99596a2c1Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software
109596a2c1Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS,
119596a2c1Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
129596a2c1Sopenharmony_ci * See the License for the specific language governing permissions and
139596a2c1Sopenharmony_ci * limitations under the License.
149596a2c1Sopenharmony_ci */
159596a2c1Sopenharmony_ci#include "character.h"
169596a2c1Sopenharmony_ci
179596a2c1Sopenharmony_ci#include <sys/types.h>
189596a2c1Sopenharmony_ci#include <set>
199596a2c1Sopenharmony_ci#include <string>
209596a2c1Sopenharmony_ci
219596a2c1Sopenharmony_ci#include "cctype"
229596a2c1Sopenharmony_ci#include "map"
239596a2c1Sopenharmony_ci#include "string"
249596a2c1Sopenharmony_ci#include "unicode/umachine.h"
259596a2c1Sopenharmony_ci#include "unicode/unistr.h"
269596a2c1Sopenharmony_ci#include "unicode/urename.h"
279596a2c1Sopenharmony_ci
289596a2c1Sopenharmony_cinamespace OHOS {
299596a2c1Sopenharmony_cinamespace Global {
309596a2c1Sopenharmony_cinamespace I18n {
319596a2c1Sopenharmony_cistatic std::set<UCharDirection> RTLDirectionSet = {
329596a2c1Sopenharmony_ci    U_RIGHT_TO_LEFT,
339596a2c1Sopenharmony_ci    U_RIGHT_TO_LEFT_ARABIC,
349596a2c1Sopenharmony_ci    U_RIGHT_TO_LEFT_EMBEDDING,
359596a2c1Sopenharmony_ci    U_RIGHT_TO_LEFT_OVERRIDE,
369596a2c1Sopenharmony_ci    U_RIGHT_TO_LEFT_ISOLATE
379596a2c1Sopenharmony_ci};
389596a2c1Sopenharmony_ci
399596a2c1Sopenharmony_cibool IsDigit(const std::string &character)
409596a2c1Sopenharmony_ci{
419596a2c1Sopenharmony_ci    icu::UnicodeString unicodeString(character.c_str());
429596a2c1Sopenharmony_ci    UChar32 char32 = unicodeString.char32At(0);
439596a2c1Sopenharmony_ci    return u_isdigit(char32);
449596a2c1Sopenharmony_ci}
459596a2c1Sopenharmony_ci
469596a2c1Sopenharmony_cibool IsSpaceChar(const std::string &character)
479596a2c1Sopenharmony_ci{
489596a2c1Sopenharmony_ci    icu::UnicodeString unicodeString(character.c_str());
499596a2c1Sopenharmony_ci    UChar32 char32 = unicodeString.char32At(0);
509596a2c1Sopenharmony_ci    return u_isJavaSpaceChar(char32);
519596a2c1Sopenharmony_ci}
529596a2c1Sopenharmony_ci
539596a2c1Sopenharmony_cibool IsWhiteSpace(const std::string &character)
549596a2c1Sopenharmony_ci{
559596a2c1Sopenharmony_ci    icu::UnicodeString unicodeString(character.c_str());
569596a2c1Sopenharmony_ci    UChar32 char32 = unicodeString.char32At(0);
579596a2c1Sopenharmony_ci    return u_isWhitespace(char32);
589596a2c1Sopenharmony_ci}
599596a2c1Sopenharmony_ci
609596a2c1Sopenharmony_cibool IsRTLCharacter(const std::string &character)
619596a2c1Sopenharmony_ci{
629596a2c1Sopenharmony_ci    icu::UnicodeString unicodeString(character.c_str());
639596a2c1Sopenharmony_ci    UChar32 char32 = unicodeString.char32At(0);
649596a2c1Sopenharmony_ci    UCharDirection direction = u_charDirection(char32);
659596a2c1Sopenharmony_ci    if (RTLDirectionSet.find(direction) != RTLDirectionSet.end()) {
669596a2c1Sopenharmony_ci        return true;
679596a2c1Sopenharmony_ci    }
689596a2c1Sopenharmony_ci    return false;
699596a2c1Sopenharmony_ci}
709596a2c1Sopenharmony_ci
719596a2c1Sopenharmony_cibool IsIdeoGraphic(const std::string &character)
729596a2c1Sopenharmony_ci{
739596a2c1Sopenharmony_ci    icu::UnicodeString unicodeString(character.c_str());
749596a2c1Sopenharmony_ci    UChar32 char32 = unicodeString.char32At(0);
759596a2c1Sopenharmony_ci    return u_hasBinaryProperty(char32, UCHAR_IDEOGRAPHIC);
769596a2c1Sopenharmony_ci}
779596a2c1Sopenharmony_ci
789596a2c1Sopenharmony_cibool IsLetter(const std::string &character)
799596a2c1Sopenharmony_ci{
809596a2c1Sopenharmony_ci    icu::UnicodeString unicodeString(character.c_str());
819596a2c1Sopenharmony_ci    UChar32 char32 = unicodeString.char32At(0);
829596a2c1Sopenharmony_ci    return isalpha(char32);
839596a2c1Sopenharmony_ci}
849596a2c1Sopenharmony_ci
859596a2c1Sopenharmony_cibool IsLowerCase(const std::string &character)
869596a2c1Sopenharmony_ci{
879596a2c1Sopenharmony_ci    icu::UnicodeString unicodeString(character.c_str());
889596a2c1Sopenharmony_ci    UChar32 char32 = unicodeString.char32At(0);
899596a2c1Sopenharmony_ci    return u_islower(char32);
909596a2c1Sopenharmony_ci}
919596a2c1Sopenharmony_ci
929596a2c1Sopenharmony_cibool IsUpperCase(const std::string &character)
939596a2c1Sopenharmony_ci{
949596a2c1Sopenharmony_ci    icu::UnicodeString unicodeString(character.c_str());
959596a2c1Sopenharmony_ci    UChar32 char32 = unicodeString.char32At(0);
969596a2c1Sopenharmony_ci    return u_isupper(char32);
979596a2c1Sopenharmony_ci}
989596a2c1Sopenharmony_ci
999596a2c1Sopenharmony_cistd::map<UCharCategory, std::string> categoryMap = {
1009596a2c1Sopenharmony_ci    { U_UNASSIGNED, "U_UNASSIGNED" },
1019596a2c1Sopenharmony_ci    { U_GENERAL_OTHER_TYPES, "U_GENERAL_OTHER_TYPES" },
1029596a2c1Sopenharmony_ci    { U_UPPERCASE_LETTER, "U_UPPERCASE_LETTER" },
1039596a2c1Sopenharmony_ci    { U_LOWERCASE_LETTER, "U_LOWERCASE_LETTER" },
1049596a2c1Sopenharmony_ci    { U_TITLECASE_LETTER, "U_TITLECASE_LETTER" },
1059596a2c1Sopenharmony_ci    { U_MODIFIER_LETTER, "U_MODIFIER_LETTER" },
1069596a2c1Sopenharmony_ci    { U_OTHER_LETTER, "U_OTHER_LETTER" },
1079596a2c1Sopenharmony_ci    { U_NON_SPACING_MARK, "U_NON_SPACING_MARK" },
1089596a2c1Sopenharmony_ci    { U_ENCLOSING_MARK, "U_ENCLOSING_MARK" },
1099596a2c1Sopenharmony_ci    { U_COMBINING_SPACING_MARK, "U_COMBINING_SPACING_MARK" },
1109596a2c1Sopenharmony_ci    { U_DECIMAL_DIGIT_NUMBER, "U_DECIMAL_DIGIT_NUMBER" },
1119596a2c1Sopenharmony_ci    { U_LETTER_NUMBER, "U_LETTER_NUMBER" },
1129596a2c1Sopenharmony_ci    { U_OTHER_NUMBER, "U_OTHER_NUMBER" },
1139596a2c1Sopenharmony_ci    { U_SPACE_SEPARATOR, "U_SPACE_SEPARATOR" },
1149596a2c1Sopenharmony_ci    { U_LINE_SEPARATOR, "U_LINE_SEPARATOR" },
1159596a2c1Sopenharmony_ci    { U_PARAGRAPH_SEPARATOR, "U_PARAGRAPH_SEPARATOR" },
1169596a2c1Sopenharmony_ci    { U_CONTROL_CHAR, "U_CONTROL_CHAR" },
1179596a2c1Sopenharmony_ci    { U_FORMAT_CHAR, "U_FORMAT_CHAR" },
1189596a2c1Sopenharmony_ci    { U_PRIVATE_USE_CHAR, "U_PRIVATE_USE_CHAR" },
1199596a2c1Sopenharmony_ci    { U_SURROGATE, "U_SURROGATE" },
1209596a2c1Sopenharmony_ci    { U_DASH_PUNCTUATION, "U_DASH_PUNCTUATION" },
1219596a2c1Sopenharmony_ci    { U_START_PUNCTUATION, "U_START_PUNCTUATION" },
1229596a2c1Sopenharmony_ci    { U_END_PUNCTUATION, "U_END_PUNCTUATION" },
1239596a2c1Sopenharmony_ci    { U_CONNECTOR_PUNCTUATION, "U_CONNECTOR_PUNCTUATION" },
1249596a2c1Sopenharmony_ci    { U_OTHER_PUNCTUATION, "U_OTHER_PUNCTUATION" },
1259596a2c1Sopenharmony_ci    { U_MATH_SYMBOL, "U_MATH_SYMBOL" },
1269596a2c1Sopenharmony_ci    { U_CURRENCY_SYMBOL, "U_CURRENCY_SYMBOL" },
1279596a2c1Sopenharmony_ci    { U_MODIFIER_SYMBOL, "U_MODIFIER_SYMBOL" },
1289596a2c1Sopenharmony_ci    { U_OTHER_SYMBOL, "U_OTHER_SYMBOL" },
1299596a2c1Sopenharmony_ci    { U_INITIAL_PUNCTUATION, "U_INITIAL_PUNCTUATION" },
1309596a2c1Sopenharmony_ci    { U_FINAL_PUNCTUATION, "U_FINAL_PUNCTUATION" },
1319596a2c1Sopenharmony_ci    { U_CHAR_CATEGORY_COUNT, "U_CHAR_CATEGORY_COUNT" },
1329596a2c1Sopenharmony_ci};
1339596a2c1Sopenharmony_ci
1349596a2c1Sopenharmony_cistd::string GetType(const std::string &character)
1359596a2c1Sopenharmony_ci{
1369596a2c1Sopenharmony_ci    icu::UnicodeString unicodeString(character.c_str());
1379596a2c1Sopenharmony_ci    UChar32 char32 = unicodeString.char32At(0);
1389596a2c1Sopenharmony_ci    int8_t category = u_charType(char32);
1399596a2c1Sopenharmony_ci    return categoryMap[UCharCategory(category)];
1409596a2c1Sopenharmony_ci}
1419596a2c1Sopenharmony_ci} // namespace I18n
1429596a2c1Sopenharmony_ci} // namespace Global
1439596a2c1Sopenharmony_ci} // namespace OHOS