19596a2c1Sopenharmony_ci/*
29596a2c1Sopenharmony_ci * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
39596a2c1Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License");
49596a2c1Sopenharmony_ci * you may not use this file except in compliance with the License.
59596a2c1Sopenharmony_ci * You may obtain a copy of the License at
69596a2c1Sopenharmony_ci *
79596a2c1Sopenharmony_ci *     http://www.apache.org/licenses/LICENSE-2.0
89596a2c1Sopenharmony_ci *
99596a2c1Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software
109596a2c1Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS,
119596a2c1Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
129596a2c1Sopenharmony_ci * See the License for the specific language governing permissions and
139596a2c1Sopenharmony_ci * limitations under the License.
149596a2c1Sopenharmony_ci */
159596a2c1Sopenharmony_ci#include "collator.h"
169596a2c1Sopenharmony_ci
179596a2c1Sopenharmony_ci#include <unicode/stringpiece.h>
189596a2c1Sopenharmony_ci
199596a2c1Sopenharmony_ci#include "locale_config.h"
209596a2c1Sopenharmony_ci#include "map"
219596a2c1Sopenharmony_ci#include "set"
229596a2c1Sopenharmony_ci#include "unicode/strenum.h"
239596a2c1Sopenharmony_ci#include "string"
249596a2c1Sopenharmony_ci#include "unicode/errorcode.h"
259596a2c1Sopenharmony_ci#include "unicode/locid.h"
269596a2c1Sopenharmony_ci#include "unicode/ucol.h"
279596a2c1Sopenharmony_ci#include "unicode/uloc.h"
289596a2c1Sopenharmony_ci#include "unicode/unistr.h"
299596a2c1Sopenharmony_ci#include "unicode/urename.h"
309596a2c1Sopenharmony_ci#include "utility"
319596a2c1Sopenharmony_ci#include "vector"
329596a2c1Sopenharmony_ci
339596a2c1Sopenharmony_cinamespace OHOS {
349596a2c1Sopenharmony_cinamespace Global {
359596a2c1Sopenharmony_cinamespace I18n {
369596a2c1Sopenharmony_cistd::string Collator::ParseOption(std::map<std::string, std::string> &options, const std::string &key)
379596a2c1Sopenharmony_ci{
389596a2c1Sopenharmony_ci    std::map<std::string, std::string>::iterator it = options.find(key);
399596a2c1Sopenharmony_ci    if (it != options.end()) {
409596a2c1Sopenharmony_ci        return it->second;
419596a2c1Sopenharmony_ci    } else {
429596a2c1Sopenharmony_ci        return "";
439596a2c1Sopenharmony_ci    }
449596a2c1Sopenharmony_ci}
459596a2c1Sopenharmony_ci
469596a2c1Sopenharmony_civoid Collator::ParseAllOptions(std::map<std::string, std::string> &options)
479596a2c1Sopenharmony_ci{
489596a2c1Sopenharmony_ci    localeMatcher = ParseOption(options, "localeMatcher");
499596a2c1Sopenharmony_ci    if (localeMatcher == "") {
509596a2c1Sopenharmony_ci        localeMatcher = "best fit";
519596a2c1Sopenharmony_ci    }
529596a2c1Sopenharmony_ci
539596a2c1Sopenharmony_ci    usage = ParseOption(options, "usage");
549596a2c1Sopenharmony_ci    if (usage == "") {
559596a2c1Sopenharmony_ci        usage = "sort";
569596a2c1Sopenharmony_ci    }
579596a2c1Sopenharmony_ci
589596a2c1Sopenharmony_ci    sensitivity = ParseOption(options, "sensitivity");
599596a2c1Sopenharmony_ci    if (sensitivity == "") {
609596a2c1Sopenharmony_ci        sensitivity = "variant";
619596a2c1Sopenharmony_ci    }
629596a2c1Sopenharmony_ci
639596a2c1Sopenharmony_ci    ignorePunctuation = ParseOption(options, "ignorePunctuation");
649596a2c1Sopenharmony_ci    if (ignorePunctuation == "") {
659596a2c1Sopenharmony_ci        ignorePunctuation = "false";
669596a2c1Sopenharmony_ci    }
679596a2c1Sopenharmony_ci
689596a2c1Sopenharmony_ci    numeric = ParseOption(options, "numeric");
699596a2c1Sopenharmony_ci    caseFirst = ParseOption(options, "caseFirst");
709596a2c1Sopenharmony_ci    collation = ParseOption(options, "collation");
719596a2c1Sopenharmony_ci}
729596a2c1Sopenharmony_ci
739596a2c1Sopenharmony_ciCollator::Collator(std::vector<std::string> &localeTags, std::map<std::string, std::string> &options)
749596a2c1Sopenharmony_ci{
759596a2c1Sopenharmony_ci    ParseAllOptions(options);
769596a2c1Sopenharmony_ci    UErrorCode status = U_ZERO_ERROR;
779596a2c1Sopenharmony_ci    localeTags.push_back(LocaleConfig::GetSystemLocale());
789596a2c1Sopenharmony_ci    for (size_t i = 0; i < localeTags.size(); i++) {
799596a2c1Sopenharmony_ci        std::string curLocale = localeTags[i];
809596a2c1Sopenharmony_ci        locale = icu::Locale::forLanguageTag(icu::StringPiece(curLocale), status);
819596a2c1Sopenharmony_ci        if (U_FAILURE(status)) {
829596a2c1Sopenharmony_ci            status = U_ZERO_ERROR;
839596a2c1Sopenharmony_ci            continue;
849596a2c1Sopenharmony_ci        }
859596a2c1Sopenharmony_ci        if (LocaleInfo::allValidLocales.count(locale.getLanguage()) > 0) {
869596a2c1Sopenharmony_ci            localeInfo = std::make_unique<LocaleInfo>(curLocale, options);
879596a2c1Sopenharmony_ci            if (!localeInfo->InitSuccess()) {
889596a2c1Sopenharmony_ci                continue;
899596a2c1Sopenharmony_ci            }
909596a2c1Sopenharmony_ci            locale = localeInfo->GetLocale();
919596a2c1Sopenharmony_ci            localeStr = localeInfo->GetBaseName();
929596a2c1Sopenharmony_ci            createSuccess = InitCollator();
939596a2c1Sopenharmony_ci            if (!createSuccess) {
949596a2c1Sopenharmony_ci                continue;
959596a2c1Sopenharmony_ci            }
969596a2c1Sopenharmony_ci            break;
979596a2c1Sopenharmony_ci        }
989596a2c1Sopenharmony_ci    }
999596a2c1Sopenharmony_ci}
1009596a2c1Sopenharmony_ci
1019596a2c1Sopenharmony_cibool Collator::IsValidCollation(std::string &collation)
1029596a2c1Sopenharmony_ci{
1039596a2c1Sopenharmony_ci    UErrorCode status = U_ZERO_ERROR;
1049596a2c1Sopenharmony_ci    const char *currentCollation = uloc_toLegacyType("collation", collation.c_str());
1059596a2c1Sopenharmony_ci    if (currentCollation != nullptr) {
1069596a2c1Sopenharmony_ci        std::unique_ptr<icu::StringEnumeration> enumeration(
1079596a2c1Sopenharmony_ci            icu::Collator::getKeywordValuesForLocale("collation", icu::Locale(locale.getBaseName()), false, status));
1089596a2c1Sopenharmony_ci        if (!U_SUCCESS(status)) {
1099596a2c1Sopenharmony_ci            return false;
1109596a2c1Sopenharmony_ci        }
1119596a2c1Sopenharmony_ci        int length;
1129596a2c1Sopenharmony_ci        const char *validCollations = nullptr;
1139596a2c1Sopenharmony_ci        if (enumeration != nullptr) {
1149596a2c1Sopenharmony_ci            validCollations = enumeration->next(&length, status);
1159596a2c1Sopenharmony_ci        }
1169596a2c1Sopenharmony_ci        while (validCollations != nullptr) {
1179596a2c1Sopenharmony_ci            if (!strcmp(validCollations, currentCollation)) {
1189596a2c1Sopenharmony_ci                return true;
1199596a2c1Sopenharmony_ci            }
1209596a2c1Sopenharmony_ci            if (enumeration != nullptr) {
1219596a2c1Sopenharmony_ci                validCollations = enumeration->next(&length, status);
1229596a2c1Sopenharmony_ci            }
1239596a2c1Sopenharmony_ci        }
1249596a2c1Sopenharmony_ci    }
1259596a2c1Sopenharmony_ci    return false;
1269596a2c1Sopenharmony_ci}
1279596a2c1Sopenharmony_ci
1289596a2c1Sopenharmony_civoid Collator::SetCollation()
1299596a2c1Sopenharmony_ci{
1309596a2c1Sopenharmony_ci    UErrorCode status = U_ZERO_ERROR;
1319596a2c1Sopenharmony_ci    if (collation != "") {
1329596a2c1Sopenharmony_ci        if (IsValidCollation(collation)) {
1339596a2c1Sopenharmony_ci            locale.setUnicodeKeywordValue("co", collation, status);
1349596a2c1Sopenharmony_ci        } else {
1359596a2c1Sopenharmony_ci            collation = "default";
1369596a2c1Sopenharmony_ci            locale.setUnicodeKeywordValue("co", nullptr, status);
1379596a2c1Sopenharmony_ci        }
1389596a2c1Sopenharmony_ci    } else {
1399596a2c1Sopenharmony_ci        collation = localeInfo->GetCollation();
1409596a2c1Sopenharmony_ci        if (collation != "") {
1419596a2c1Sopenharmony_ci            if (IsValidCollation(collation)) {
1429596a2c1Sopenharmony_ci                locale.setUnicodeKeywordValue("co", collation, status);
1439596a2c1Sopenharmony_ci            } else {
1449596a2c1Sopenharmony_ci                locale.setUnicodeKeywordValue("co", nullptr, status);
1459596a2c1Sopenharmony_ci                collation = "default";
1469596a2c1Sopenharmony_ci            }
1479596a2c1Sopenharmony_ci        } else {
1489596a2c1Sopenharmony_ci            locale.setUnicodeKeywordValue("co", nullptr, status);
1499596a2c1Sopenharmony_ci            collation = "default";
1509596a2c1Sopenharmony_ci        }
1519596a2c1Sopenharmony_ci    }
1529596a2c1Sopenharmony_ci}
1539596a2c1Sopenharmony_ci
1549596a2c1Sopenharmony_civoid Collator::SetUsage()
1559596a2c1Sopenharmony_ci{
1569596a2c1Sopenharmony_ci    if (usage == "search") {
1579596a2c1Sopenharmony_ci        collation = "default";
1589596a2c1Sopenharmony_ci        UErrorCode status = U_ZERO_ERROR;
1599596a2c1Sopenharmony_ci        locale.setUnicodeKeywordValue("co", nullptr, status);
1609596a2c1Sopenharmony_ci    }
1619596a2c1Sopenharmony_ci}
1629596a2c1Sopenharmony_ci
1639596a2c1Sopenharmony_civoid Collator::SetNumeric()
1649596a2c1Sopenharmony_ci{
1659596a2c1Sopenharmony_ci    if (!collatorPtr) {
1669596a2c1Sopenharmony_ci        return;
1679596a2c1Sopenharmony_ci    }
1689596a2c1Sopenharmony_ci    if (numeric == "") {
1699596a2c1Sopenharmony_ci        numeric = localeInfo->GetNumeric();
1709596a2c1Sopenharmony_ci        if (numeric != "true" && numeric != "false") {
1719596a2c1Sopenharmony_ci            numeric = "false";
1729596a2c1Sopenharmony_ci        }
1739596a2c1Sopenharmony_ci    }
1749596a2c1Sopenharmony_ci    UErrorCode status = U_ZERO_ERROR;
1759596a2c1Sopenharmony_ci    if (numeric == "true") {
1769596a2c1Sopenharmony_ci        collatorPtr->setAttribute(UColAttribute::UCOL_NUMERIC_COLLATION,
1779596a2c1Sopenharmony_ci            UColAttributeValue::UCOL_ON, status);
1789596a2c1Sopenharmony_ci    } else {
1799596a2c1Sopenharmony_ci        collatorPtr->setAttribute(UColAttribute::UCOL_NUMERIC_COLLATION,
1809596a2c1Sopenharmony_ci            UColAttributeValue::UCOL_OFF, status);
1819596a2c1Sopenharmony_ci    }
1829596a2c1Sopenharmony_ci}
1839596a2c1Sopenharmony_ci
1849596a2c1Sopenharmony_civoid Collator::SetCaseFirst()
1859596a2c1Sopenharmony_ci{
1869596a2c1Sopenharmony_ci    if (!collatorPtr) {
1879596a2c1Sopenharmony_ci        return;
1889596a2c1Sopenharmony_ci    }
1899596a2c1Sopenharmony_ci    if (caseFirst == "") {
1909596a2c1Sopenharmony_ci        caseFirst = localeInfo->GetCaseFirst();
1919596a2c1Sopenharmony_ci        if (caseFirst != "upper" && caseFirst != "lower" && caseFirst != "false") {
1929596a2c1Sopenharmony_ci            caseFirst = "false";
1939596a2c1Sopenharmony_ci        }
1949596a2c1Sopenharmony_ci    }
1959596a2c1Sopenharmony_ci    UErrorCode status = U_ZERO_ERROR;
1969596a2c1Sopenharmony_ci    if (caseFirst == "upper") {
1979596a2c1Sopenharmony_ci        collatorPtr->setAttribute(UColAttribute::UCOL_CASE_FIRST,
1989596a2c1Sopenharmony_ci            UColAttributeValue::UCOL_UPPER_FIRST, status);
1999596a2c1Sopenharmony_ci    } else if (caseFirst == "lower") {
2009596a2c1Sopenharmony_ci        collatorPtr->setAttribute(UColAttribute::UCOL_CASE_FIRST,
2019596a2c1Sopenharmony_ci            UColAttributeValue::UCOL_LOWER_FIRST, status);
2029596a2c1Sopenharmony_ci    } else {
2039596a2c1Sopenharmony_ci        collatorPtr->setAttribute(UColAttribute::UCOL_CASE_FIRST,
2049596a2c1Sopenharmony_ci            UColAttributeValue::UCOL_OFF, status);
2059596a2c1Sopenharmony_ci    }
2069596a2c1Sopenharmony_ci}
2079596a2c1Sopenharmony_ci
2089596a2c1Sopenharmony_civoid Collator::SetSensitivity()
2099596a2c1Sopenharmony_ci{
2109596a2c1Sopenharmony_ci    if (!collatorPtr) {
2119596a2c1Sopenharmony_ci        return;
2129596a2c1Sopenharmony_ci    }
2139596a2c1Sopenharmony_ci    if (sensitivity == "base") {
2149596a2c1Sopenharmony_ci        collatorPtr->setStrength(icu::Collator::PRIMARY);
2159596a2c1Sopenharmony_ci    } else if (sensitivity == "accent") {
2169596a2c1Sopenharmony_ci        collatorPtr->setStrength(icu::Collator::SECONDARY);
2179596a2c1Sopenharmony_ci    } else if (sensitivity == "case") {
2189596a2c1Sopenharmony_ci        collatorPtr->setStrength(icu::Collator::PRIMARY);
2199596a2c1Sopenharmony_ci        UErrorCode status = U_ZERO_ERROR;
2209596a2c1Sopenharmony_ci        collatorPtr->setAttribute(UColAttribute::UCOL_CASE_LEVEL,
2219596a2c1Sopenharmony_ci            UColAttributeValue::UCOL_ON, status);
2229596a2c1Sopenharmony_ci    } else {
2239596a2c1Sopenharmony_ci        collatorPtr->setStrength(icu::Collator::TERTIARY);
2249596a2c1Sopenharmony_ci    }
2259596a2c1Sopenharmony_ci}
2269596a2c1Sopenharmony_ci
2279596a2c1Sopenharmony_civoid Collator::SetIgnorePunctuation()
2289596a2c1Sopenharmony_ci{
2299596a2c1Sopenharmony_ci    if (!collatorPtr) {
2309596a2c1Sopenharmony_ci        return;
2319596a2c1Sopenharmony_ci    }
2329596a2c1Sopenharmony_ci    if (ignorePunctuation == "true") {
2339596a2c1Sopenharmony_ci        UErrorCode status = U_ZERO_ERROR;
2349596a2c1Sopenharmony_ci        collatorPtr->setAttribute(UColAttribute::UCOL_ALTERNATE_HANDLING,
2359596a2c1Sopenharmony_ci            UColAttributeValue::UCOL_SHIFTED, status);
2369596a2c1Sopenharmony_ci    }
2379596a2c1Sopenharmony_ci}
2389596a2c1Sopenharmony_ci
2399596a2c1Sopenharmony_cibool Collator::InitCollator()
2409596a2c1Sopenharmony_ci{
2419596a2c1Sopenharmony_ci    SetCollation();
2429596a2c1Sopenharmony_ci    SetUsage();
2439596a2c1Sopenharmony_ci    UErrorCode status = UErrorCode::U_ZERO_ERROR;
2449596a2c1Sopenharmony_ci    collatorPtr = icu::Collator::createInstance(locale, status);
2459596a2c1Sopenharmony_ci    if (!U_SUCCESS(status) || collatorPtr == nullptr) {
2469596a2c1Sopenharmony_ci        if (collatorPtr != nullptr) {
2479596a2c1Sopenharmony_ci            delete collatorPtr;
2489596a2c1Sopenharmony_ci            collatorPtr = nullptr;
2499596a2c1Sopenharmony_ci        }
2509596a2c1Sopenharmony_ci        return false;
2519596a2c1Sopenharmony_ci    }
2529596a2c1Sopenharmony_ci    SetNumeric();
2539596a2c1Sopenharmony_ci    SetCaseFirst();
2549596a2c1Sopenharmony_ci    SetSensitivity();
2559596a2c1Sopenharmony_ci    SetIgnorePunctuation();
2569596a2c1Sopenharmony_ci    return true;
2579596a2c1Sopenharmony_ci}
2589596a2c1Sopenharmony_ci
2599596a2c1Sopenharmony_ciCollator::~Collator()
2609596a2c1Sopenharmony_ci{
2619596a2c1Sopenharmony_ci    if (collatorPtr != nullptr) {
2629596a2c1Sopenharmony_ci        delete collatorPtr;
2639596a2c1Sopenharmony_ci        collatorPtr = nullptr;
2649596a2c1Sopenharmony_ci    }
2659596a2c1Sopenharmony_ci}
2669596a2c1Sopenharmony_ci
2679596a2c1Sopenharmony_ciCompareResult Collator::Compare(const std::string &first, const std::string &second)
2689596a2c1Sopenharmony_ci{
2699596a2c1Sopenharmony_ci    if (!collatorPtr) {
2709596a2c1Sopenharmony_ci        return CompareResult::INVALID;
2719596a2c1Sopenharmony_ci    }
2729596a2c1Sopenharmony_ci    icu::Collator::EComparisonResult result = collatorPtr->compare(icu::UnicodeString(first.data(), first.length()),
2739596a2c1Sopenharmony_ci        icu::UnicodeString(second.data(), second.length()));
2749596a2c1Sopenharmony_ci    if (result == icu::Collator::EComparisonResult::LESS) {
2759596a2c1Sopenharmony_ci        return CompareResult::SMALLER;
2769596a2c1Sopenharmony_ci    } else if (result == icu::Collator::EComparisonResult::EQUAL) {
2779596a2c1Sopenharmony_ci        return CompareResult::EQUAL;
2789596a2c1Sopenharmony_ci    } else {
2799596a2c1Sopenharmony_ci        return CompareResult::GREATER;
2809596a2c1Sopenharmony_ci    }
2819596a2c1Sopenharmony_ci}
2829596a2c1Sopenharmony_ci
2839596a2c1Sopenharmony_civoid Collator::ResolvedOptions(std::map<std::string, std::string> &options)
2849596a2c1Sopenharmony_ci{
2859596a2c1Sopenharmony_ci    options.insert(std::pair<std::string, std::string>("localeMatcher", localeMatcher));
2869596a2c1Sopenharmony_ci    options.insert(std::pair<std::string, std::string>("locale", localeStr));
2879596a2c1Sopenharmony_ci    options.insert(std::pair<std::string, std::string>("usage", usage));
2889596a2c1Sopenharmony_ci    options.insert(std::pair<std::string, std::string>("sensitivity", sensitivity));
2899596a2c1Sopenharmony_ci    options.insert(std::pair<std::string, std::string>("ignorePunctuation", ignorePunctuation));
2909596a2c1Sopenharmony_ci    options.insert(std::pair<std::string, std::string>("numeric", numeric));
2919596a2c1Sopenharmony_ci    options.insert(std::pair<std::string, std::string>("caseFirst", caseFirst));
2929596a2c1Sopenharmony_ci    options.insert(std::pair<std::string, std::string>("collation", collation));
2939596a2c1Sopenharmony_ci}
2949596a2c1Sopenharmony_ci} // namespace I18n
2959596a2c1Sopenharmony_ci} // namespace Global
2969596a2c1Sopenharmony_ci} // namespace OHOS
297