19596a2c1Sopenharmony_ci/* 29596a2c1Sopenharmony_ci * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 39596a2c1Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License"); 49596a2c1Sopenharmony_ci * you may not use this file except in compliance with the License. 59596a2c1Sopenharmony_ci * You may obtain a copy of the License at 69596a2c1Sopenharmony_ci * 79596a2c1Sopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0 89596a2c1Sopenharmony_ci * 99596a2c1Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software 109596a2c1Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS, 119596a2c1Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 129596a2c1Sopenharmony_ci * See the License for the specific language governing permissions and 139596a2c1Sopenharmony_ci * limitations under the License. 149596a2c1Sopenharmony_ci */ 159596a2c1Sopenharmony_ci#include "collator.h" 169596a2c1Sopenharmony_ci 179596a2c1Sopenharmony_ci#include <unicode/stringpiece.h> 189596a2c1Sopenharmony_ci 199596a2c1Sopenharmony_ci#include "locale_config.h" 209596a2c1Sopenharmony_ci#include "map" 219596a2c1Sopenharmony_ci#include "set" 229596a2c1Sopenharmony_ci#include "unicode/strenum.h" 239596a2c1Sopenharmony_ci#include "string" 249596a2c1Sopenharmony_ci#include "unicode/errorcode.h" 259596a2c1Sopenharmony_ci#include "unicode/locid.h" 269596a2c1Sopenharmony_ci#include "unicode/ucol.h" 279596a2c1Sopenharmony_ci#include "unicode/uloc.h" 289596a2c1Sopenharmony_ci#include "unicode/unistr.h" 299596a2c1Sopenharmony_ci#include "unicode/urename.h" 309596a2c1Sopenharmony_ci#include "utility" 319596a2c1Sopenharmony_ci#include "vector" 329596a2c1Sopenharmony_ci 339596a2c1Sopenharmony_cinamespace OHOS { 349596a2c1Sopenharmony_cinamespace Global { 359596a2c1Sopenharmony_cinamespace I18n { 369596a2c1Sopenharmony_cistd::string Collator::ParseOption(std::map<std::string, std::string> &options, const std::string &key) 379596a2c1Sopenharmony_ci{ 389596a2c1Sopenharmony_ci std::map<std::string, std::string>::iterator it = options.find(key); 399596a2c1Sopenharmony_ci if (it != options.end()) { 409596a2c1Sopenharmony_ci return it->second; 419596a2c1Sopenharmony_ci } else { 429596a2c1Sopenharmony_ci return ""; 439596a2c1Sopenharmony_ci } 449596a2c1Sopenharmony_ci} 459596a2c1Sopenharmony_ci 469596a2c1Sopenharmony_civoid Collator::ParseAllOptions(std::map<std::string, std::string> &options) 479596a2c1Sopenharmony_ci{ 489596a2c1Sopenharmony_ci localeMatcher = ParseOption(options, "localeMatcher"); 499596a2c1Sopenharmony_ci if (localeMatcher == "") { 509596a2c1Sopenharmony_ci localeMatcher = "best fit"; 519596a2c1Sopenharmony_ci } 529596a2c1Sopenharmony_ci 539596a2c1Sopenharmony_ci usage = ParseOption(options, "usage"); 549596a2c1Sopenharmony_ci if (usage == "") { 559596a2c1Sopenharmony_ci usage = "sort"; 569596a2c1Sopenharmony_ci } 579596a2c1Sopenharmony_ci 589596a2c1Sopenharmony_ci sensitivity = ParseOption(options, "sensitivity"); 599596a2c1Sopenharmony_ci if (sensitivity == "") { 609596a2c1Sopenharmony_ci sensitivity = "variant"; 619596a2c1Sopenharmony_ci } 629596a2c1Sopenharmony_ci 639596a2c1Sopenharmony_ci ignorePunctuation = ParseOption(options, "ignorePunctuation"); 649596a2c1Sopenharmony_ci if (ignorePunctuation == "") { 659596a2c1Sopenharmony_ci ignorePunctuation = "false"; 669596a2c1Sopenharmony_ci } 679596a2c1Sopenharmony_ci 689596a2c1Sopenharmony_ci numeric = ParseOption(options, "numeric"); 699596a2c1Sopenharmony_ci caseFirst = ParseOption(options, "caseFirst"); 709596a2c1Sopenharmony_ci collation = ParseOption(options, "collation"); 719596a2c1Sopenharmony_ci} 729596a2c1Sopenharmony_ci 739596a2c1Sopenharmony_ciCollator::Collator(std::vector<std::string> &localeTags, std::map<std::string, std::string> &options) 749596a2c1Sopenharmony_ci{ 759596a2c1Sopenharmony_ci ParseAllOptions(options); 769596a2c1Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 779596a2c1Sopenharmony_ci localeTags.push_back(LocaleConfig::GetSystemLocale()); 789596a2c1Sopenharmony_ci for (size_t i = 0; i < localeTags.size(); i++) { 799596a2c1Sopenharmony_ci std::string curLocale = localeTags[i]; 809596a2c1Sopenharmony_ci locale = icu::Locale::forLanguageTag(icu::StringPiece(curLocale), status); 819596a2c1Sopenharmony_ci if (U_FAILURE(status)) { 829596a2c1Sopenharmony_ci status = U_ZERO_ERROR; 839596a2c1Sopenharmony_ci continue; 849596a2c1Sopenharmony_ci } 859596a2c1Sopenharmony_ci if (LocaleInfo::allValidLocales.count(locale.getLanguage()) > 0) { 869596a2c1Sopenharmony_ci localeInfo = std::make_unique<LocaleInfo>(curLocale, options); 879596a2c1Sopenharmony_ci if (!localeInfo->InitSuccess()) { 889596a2c1Sopenharmony_ci continue; 899596a2c1Sopenharmony_ci } 909596a2c1Sopenharmony_ci locale = localeInfo->GetLocale(); 919596a2c1Sopenharmony_ci localeStr = localeInfo->GetBaseName(); 929596a2c1Sopenharmony_ci createSuccess = InitCollator(); 939596a2c1Sopenharmony_ci if (!createSuccess) { 949596a2c1Sopenharmony_ci continue; 959596a2c1Sopenharmony_ci } 969596a2c1Sopenharmony_ci break; 979596a2c1Sopenharmony_ci } 989596a2c1Sopenharmony_ci } 999596a2c1Sopenharmony_ci} 1009596a2c1Sopenharmony_ci 1019596a2c1Sopenharmony_cibool Collator::IsValidCollation(std::string &collation) 1029596a2c1Sopenharmony_ci{ 1039596a2c1Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 1049596a2c1Sopenharmony_ci const char *currentCollation = uloc_toLegacyType("collation", collation.c_str()); 1059596a2c1Sopenharmony_ci if (currentCollation != nullptr) { 1069596a2c1Sopenharmony_ci std::unique_ptr<icu::StringEnumeration> enumeration( 1079596a2c1Sopenharmony_ci icu::Collator::getKeywordValuesForLocale("collation", icu::Locale(locale.getBaseName()), false, status)); 1089596a2c1Sopenharmony_ci if (!U_SUCCESS(status)) { 1099596a2c1Sopenharmony_ci return false; 1109596a2c1Sopenharmony_ci } 1119596a2c1Sopenharmony_ci int length; 1129596a2c1Sopenharmony_ci const char *validCollations = nullptr; 1139596a2c1Sopenharmony_ci if (enumeration != nullptr) { 1149596a2c1Sopenharmony_ci validCollations = enumeration->next(&length, status); 1159596a2c1Sopenharmony_ci } 1169596a2c1Sopenharmony_ci while (validCollations != nullptr) { 1179596a2c1Sopenharmony_ci if (!strcmp(validCollations, currentCollation)) { 1189596a2c1Sopenharmony_ci return true; 1199596a2c1Sopenharmony_ci } 1209596a2c1Sopenharmony_ci if (enumeration != nullptr) { 1219596a2c1Sopenharmony_ci validCollations = enumeration->next(&length, status); 1229596a2c1Sopenharmony_ci } 1239596a2c1Sopenharmony_ci } 1249596a2c1Sopenharmony_ci } 1259596a2c1Sopenharmony_ci return false; 1269596a2c1Sopenharmony_ci} 1279596a2c1Sopenharmony_ci 1289596a2c1Sopenharmony_civoid Collator::SetCollation() 1299596a2c1Sopenharmony_ci{ 1309596a2c1Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 1319596a2c1Sopenharmony_ci if (collation != "") { 1329596a2c1Sopenharmony_ci if (IsValidCollation(collation)) { 1339596a2c1Sopenharmony_ci locale.setUnicodeKeywordValue("co", collation, status); 1349596a2c1Sopenharmony_ci } else { 1359596a2c1Sopenharmony_ci collation = "default"; 1369596a2c1Sopenharmony_ci locale.setUnicodeKeywordValue("co", nullptr, status); 1379596a2c1Sopenharmony_ci } 1389596a2c1Sopenharmony_ci } else { 1399596a2c1Sopenharmony_ci collation = localeInfo->GetCollation(); 1409596a2c1Sopenharmony_ci if (collation != "") { 1419596a2c1Sopenharmony_ci if (IsValidCollation(collation)) { 1429596a2c1Sopenharmony_ci locale.setUnicodeKeywordValue("co", collation, status); 1439596a2c1Sopenharmony_ci } else { 1449596a2c1Sopenharmony_ci locale.setUnicodeKeywordValue("co", nullptr, status); 1459596a2c1Sopenharmony_ci collation = "default"; 1469596a2c1Sopenharmony_ci } 1479596a2c1Sopenharmony_ci } else { 1489596a2c1Sopenharmony_ci locale.setUnicodeKeywordValue("co", nullptr, status); 1499596a2c1Sopenharmony_ci collation = "default"; 1509596a2c1Sopenharmony_ci } 1519596a2c1Sopenharmony_ci } 1529596a2c1Sopenharmony_ci} 1539596a2c1Sopenharmony_ci 1549596a2c1Sopenharmony_civoid Collator::SetUsage() 1559596a2c1Sopenharmony_ci{ 1569596a2c1Sopenharmony_ci if (usage == "search") { 1579596a2c1Sopenharmony_ci collation = "default"; 1589596a2c1Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 1599596a2c1Sopenharmony_ci locale.setUnicodeKeywordValue("co", nullptr, status); 1609596a2c1Sopenharmony_ci } 1619596a2c1Sopenharmony_ci} 1629596a2c1Sopenharmony_ci 1639596a2c1Sopenharmony_civoid Collator::SetNumeric() 1649596a2c1Sopenharmony_ci{ 1659596a2c1Sopenharmony_ci if (!collatorPtr) { 1669596a2c1Sopenharmony_ci return; 1679596a2c1Sopenharmony_ci } 1689596a2c1Sopenharmony_ci if (numeric == "") { 1699596a2c1Sopenharmony_ci numeric = localeInfo->GetNumeric(); 1709596a2c1Sopenharmony_ci if (numeric != "true" && numeric != "false") { 1719596a2c1Sopenharmony_ci numeric = "false"; 1729596a2c1Sopenharmony_ci } 1739596a2c1Sopenharmony_ci } 1749596a2c1Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 1759596a2c1Sopenharmony_ci if (numeric == "true") { 1769596a2c1Sopenharmony_ci collatorPtr->setAttribute(UColAttribute::UCOL_NUMERIC_COLLATION, 1779596a2c1Sopenharmony_ci UColAttributeValue::UCOL_ON, status); 1789596a2c1Sopenharmony_ci } else { 1799596a2c1Sopenharmony_ci collatorPtr->setAttribute(UColAttribute::UCOL_NUMERIC_COLLATION, 1809596a2c1Sopenharmony_ci UColAttributeValue::UCOL_OFF, status); 1819596a2c1Sopenharmony_ci } 1829596a2c1Sopenharmony_ci} 1839596a2c1Sopenharmony_ci 1849596a2c1Sopenharmony_civoid Collator::SetCaseFirst() 1859596a2c1Sopenharmony_ci{ 1869596a2c1Sopenharmony_ci if (!collatorPtr) { 1879596a2c1Sopenharmony_ci return; 1889596a2c1Sopenharmony_ci } 1899596a2c1Sopenharmony_ci if (caseFirst == "") { 1909596a2c1Sopenharmony_ci caseFirst = localeInfo->GetCaseFirst(); 1919596a2c1Sopenharmony_ci if (caseFirst != "upper" && caseFirst != "lower" && caseFirst != "false") { 1929596a2c1Sopenharmony_ci caseFirst = "false"; 1939596a2c1Sopenharmony_ci } 1949596a2c1Sopenharmony_ci } 1959596a2c1Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 1969596a2c1Sopenharmony_ci if (caseFirst == "upper") { 1979596a2c1Sopenharmony_ci collatorPtr->setAttribute(UColAttribute::UCOL_CASE_FIRST, 1989596a2c1Sopenharmony_ci UColAttributeValue::UCOL_UPPER_FIRST, status); 1999596a2c1Sopenharmony_ci } else if (caseFirst == "lower") { 2009596a2c1Sopenharmony_ci collatorPtr->setAttribute(UColAttribute::UCOL_CASE_FIRST, 2019596a2c1Sopenharmony_ci UColAttributeValue::UCOL_LOWER_FIRST, status); 2029596a2c1Sopenharmony_ci } else { 2039596a2c1Sopenharmony_ci collatorPtr->setAttribute(UColAttribute::UCOL_CASE_FIRST, 2049596a2c1Sopenharmony_ci UColAttributeValue::UCOL_OFF, status); 2059596a2c1Sopenharmony_ci } 2069596a2c1Sopenharmony_ci} 2079596a2c1Sopenharmony_ci 2089596a2c1Sopenharmony_civoid Collator::SetSensitivity() 2099596a2c1Sopenharmony_ci{ 2109596a2c1Sopenharmony_ci if (!collatorPtr) { 2119596a2c1Sopenharmony_ci return; 2129596a2c1Sopenharmony_ci } 2139596a2c1Sopenharmony_ci if (sensitivity == "base") { 2149596a2c1Sopenharmony_ci collatorPtr->setStrength(icu::Collator::PRIMARY); 2159596a2c1Sopenharmony_ci } else if (sensitivity == "accent") { 2169596a2c1Sopenharmony_ci collatorPtr->setStrength(icu::Collator::SECONDARY); 2179596a2c1Sopenharmony_ci } else if (sensitivity == "case") { 2189596a2c1Sopenharmony_ci collatorPtr->setStrength(icu::Collator::PRIMARY); 2199596a2c1Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 2209596a2c1Sopenharmony_ci collatorPtr->setAttribute(UColAttribute::UCOL_CASE_LEVEL, 2219596a2c1Sopenharmony_ci UColAttributeValue::UCOL_ON, status); 2229596a2c1Sopenharmony_ci } else { 2239596a2c1Sopenharmony_ci collatorPtr->setStrength(icu::Collator::TERTIARY); 2249596a2c1Sopenharmony_ci } 2259596a2c1Sopenharmony_ci} 2269596a2c1Sopenharmony_ci 2279596a2c1Sopenharmony_civoid Collator::SetIgnorePunctuation() 2289596a2c1Sopenharmony_ci{ 2299596a2c1Sopenharmony_ci if (!collatorPtr) { 2309596a2c1Sopenharmony_ci return; 2319596a2c1Sopenharmony_ci } 2329596a2c1Sopenharmony_ci if (ignorePunctuation == "true") { 2339596a2c1Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 2349596a2c1Sopenharmony_ci collatorPtr->setAttribute(UColAttribute::UCOL_ALTERNATE_HANDLING, 2359596a2c1Sopenharmony_ci UColAttributeValue::UCOL_SHIFTED, status); 2369596a2c1Sopenharmony_ci } 2379596a2c1Sopenharmony_ci} 2389596a2c1Sopenharmony_ci 2399596a2c1Sopenharmony_cibool Collator::InitCollator() 2409596a2c1Sopenharmony_ci{ 2419596a2c1Sopenharmony_ci SetCollation(); 2429596a2c1Sopenharmony_ci SetUsage(); 2439596a2c1Sopenharmony_ci UErrorCode status = UErrorCode::U_ZERO_ERROR; 2449596a2c1Sopenharmony_ci collatorPtr = icu::Collator::createInstance(locale, status); 2459596a2c1Sopenharmony_ci if (!U_SUCCESS(status) || collatorPtr == nullptr) { 2469596a2c1Sopenharmony_ci if (collatorPtr != nullptr) { 2479596a2c1Sopenharmony_ci delete collatorPtr; 2489596a2c1Sopenharmony_ci collatorPtr = nullptr; 2499596a2c1Sopenharmony_ci } 2509596a2c1Sopenharmony_ci return false; 2519596a2c1Sopenharmony_ci } 2529596a2c1Sopenharmony_ci SetNumeric(); 2539596a2c1Sopenharmony_ci SetCaseFirst(); 2549596a2c1Sopenharmony_ci SetSensitivity(); 2559596a2c1Sopenharmony_ci SetIgnorePunctuation(); 2569596a2c1Sopenharmony_ci return true; 2579596a2c1Sopenharmony_ci} 2589596a2c1Sopenharmony_ci 2599596a2c1Sopenharmony_ciCollator::~Collator() 2609596a2c1Sopenharmony_ci{ 2619596a2c1Sopenharmony_ci if (collatorPtr != nullptr) { 2629596a2c1Sopenharmony_ci delete collatorPtr; 2639596a2c1Sopenharmony_ci collatorPtr = nullptr; 2649596a2c1Sopenharmony_ci } 2659596a2c1Sopenharmony_ci} 2669596a2c1Sopenharmony_ci 2679596a2c1Sopenharmony_ciCompareResult Collator::Compare(const std::string &first, const std::string &second) 2689596a2c1Sopenharmony_ci{ 2699596a2c1Sopenharmony_ci if (!collatorPtr) { 2709596a2c1Sopenharmony_ci return CompareResult::INVALID; 2719596a2c1Sopenharmony_ci } 2729596a2c1Sopenharmony_ci icu::Collator::EComparisonResult result = collatorPtr->compare(icu::UnicodeString(first.data(), first.length()), 2739596a2c1Sopenharmony_ci icu::UnicodeString(second.data(), second.length())); 2749596a2c1Sopenharmony_ci if (result == icu::Collator::EComparisonResult::LESS) { 2759596a2c1Sopenharmony_ci return CompareResult::SMALLER; 2769596a2c1Sopenharmony_ci } else if (result == icu::Collator::EComparisonResult::EQUAL) { 2779596a2c1Sopenharmony_ci return CompareResult::EQUAL; 2789596a2c1Sopenharmony_ci } else { 2799596a2c1Sopenharmony_ci return CompareResult::GREATER; 2809596a2c1Sopenharmony_ci } 2819596a2c1Sopenharmony_ci} 2829596a2c1Sopenharmony_ci 2839596a2c1Sopenharmony_civoid Collator::ResolvedOptions(std::map<std::string, std::string> &options) 2849596a2c1Sopenharmony_ci{ 2859596a2c1Sopenharmony_ci options.insert(std::pair<std::string, std::string>("localeMatcher", localeMatcher)); 2869596a2c1Sopenharmony_ci options.insert(std::pair<std::string, std::string>("locale", localeStr)); 2879596a2c1Sopenharmony_ci options.insert(std::pair<std::string, std::string>("usage", usage)); 2889596a2c1Sopenharmony_ci options.insert(std::pair<std::string, std::string>("sensitivity", sensitivity)); 2899596a2c1Sopenharmony_ci options.insert(std::pair<std::string, std::string>("ignorePunctuation", ignorePunctuation)); 2909596a2c1Sopenharmony_ci options.insert(std::pair<std::string, std::string>("numeric", numeric)); 2919596a2c1Sopenharmony_ci options.insert(std::pair<std::string, std::string>("caseFirst", caseFirst)); 2929596a2c1Sopenharmony_ci options.insert(std::pair<std::string, std::string>("collation", collation)); 2939596a2c1Sopenharmony_ci} 2949596a2c1Sopenharmony_ci} // namespace I18n 2959596a2c1Sopenharmony_ci} // namespace Global 2969596a2c1Sopenharmony_ci} // namespace OHOS 297