14514f5e3Sopenharmony_ci/* 24514f5e3Sopenharmony_ci * Copyright (c) 2021-2024 Huawei Device Co., Ltd. 34514f5e3Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License"); 44514f5e3Sopenharmony_ci * you may not use this file except in compliance with the License. 54514f5e3Sopenharmony_ci * You may obtain a copy of the License at 64514f5e3Sopenharmony_ci * 74514f5e3Sopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0 84514f5e3Sopenharmony_ci * 94514f5e3Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software 104514f5e3Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS, 114514f5e3Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 124514f5e3Sopenharmony_ci * See the License for the specific language governing permissions and 134514f5e3Sopenharmony_ci * limitations under the License. 144514f5e3Sopenharmony_ci */ 154514f5e3Sopenharmony_ci 164514f5e3Sopenharmony_ci#include "ecmascript/js_collator.h" 174514f5e3Sopenharmony_ci 184514f5e3Sopenharmony_ci#include "ecmascript/ecma_context.h" 194514f5e3Sopenharmony_ci#include "ecmascript/intl/locale_helper.h" 204514f5e3Sopenharmony_ci#include "ecmascript/global_env.h" 214514f5e3Sopenharmony_ci#include "ecmascript/ecma_string-inl.h" 224514f5e3Sopenharmony_cinamespace panda::ecmascript { 234514f5e3Sopenharmony_ci// NOLINTNEXTLINE (readability-identifier-naming, fuchsia-statically-constructed-objects) 244514f5e3Sopenharmony_ciconst CString JSCollator::uIcuDataColl = U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll"; 254514f5e3Sopenharmony_ciconst std::map<std::string, CaseFirstOption> JSCollator::caseFirstMap = { 264514f5e3Sopenharmony_ci {"upper", CaseFirstOption::UPPER}, 274514f5e3Sopenharmony_ci {"lower", CaseFirstOption::LOWER}, 284514f5e3Sopenharmony_ci {"false", CaseFirstOption::FALSE_OPTION} 294514f5e3Sopenharmony_ci}; 304514f5e3Sopenharmony_ciconst std::map<CaseFirstOption, UColAttributeValue> JSCollator::uColAttributeValueMap = { 314514f5e3Sopenharmony_ci {CaseFirstOption::UPPER, UCOL_UPPER_FIRST}, 324514f5e3Sopenharmony_ci {CaseFirstOption::LOWER, UCOL_LOWER_FIRST}, 334514f5e3Sopenharmony_ci {CaseFirstOption::FALSE_OPTION, UCOL_OFF}, 344514f5e3Sopenharmony_ci {CaseFirstOption::UNDEFINED, UCOL_OFF} 354514f5e3Sopenharmony_ci}; 364514f5e3Sopenharmony_ciconst std::vector<LocaleMatcherOption> JSCollator::LOCALE_MATCHER_OPTION = { 374514f5e3Sopenharmony_ci LocaleMatcherOption::LOOKUP, LocaleMatcherOption::BEST_FIT 384514f5e3Sopenharmony_ci}; 394514f5e3Sopenharmony_ciconst std::vector<std::string> JSCollator::LOCALE_MATCHER_OPTION_NAME = {"lookup", "best fit"}; 404514f5e3Sopenharmony_ci 414514f5e3Sopenharmony_ciconst std::vector<CaseFirstOption> JSCollator::CASE_FIRST_OPTION = { 424514f5e3Sopenharmony_ci CaseFirstOption::UPPER, CaseFirstOption::LOWER, CaseFirstOption::FALSE_OPTION 434514f5e3Sopenharmony_ci}; 444514f5e3Sopenharmony_ciconst std::vector<std::string> JSCollator::CASE_FIRST_OPTION_NAME = {"upper", "lower", "false"}; 454514f5e3Sopenharmony_ci 464514f5e3Sopenharmony_ciconst std::set<std::string> JSCollator::RELEVANT_EXTENSION_KEYS = {"co", "kn", "kf"}; 474514f5e3Sopenharmony_ci 484514f5e3Sopenharmony_ciconst std::vector<SensitivityOption> JSCollator::SENSITIVITY_OPTION = { 494514f5e3Sopenharmony_ci SensitivityOption::BASE, SensitivityOption::ACCENT, 504514f5e3Sopenharmony_ci SensitivityOption::CASE, SensitivityOption::VARIANT 514514f5e3Sopenharmony_ci}; 524514f5e3Sopenharmony_ciconst std::vector<std::string> JSCollator::SENSITIVITY_OPTION_NAME = {"base", "accent", "case", "variant"}; 534514f5e3Sopenharmony_ci 544514f5e3Sopenharmony_ciconst std::vector<UsageOption> JSCollator::USAGE_OPTION = {UsageOption::SORT, UsageOption::SEARCH}; 554514f5e3Sopenharmony_ciconst std::vector<std::string> JSCollator::USAGE_OPTION_NAME = {"sort", "search"}; 564514f5e3Sopenharmony_ci 574514f5e3Sopenharmony_ci// All the available locales that are statically known to fulfill fast path conditions. 584514f5e3Sopenharmony_ciconst char* const JSCollator::FAST_LOCALE[] = { 594514f5e3Sopenharmony_ci "en-US", "en", "fr", "es", "de", "pt", "it", "ca", 604514f5e3Sopenharmony_ci "de-AT", "fi", "id", "id-ID", "ms", "nl", "pl", "ro", 614514f5e3Sopenharmony_ci "sl", "sv", "sw", "vi", "en-DE", "en-GB", 624514f5e3Sopenharmony_ci}; 634514f5e3Sopenharmony_ci 644514f5e3Sopenharmony_ci 654514f5e3Sopenharmony_ciJSHandle<TaggedArray> JSCollator::GetAvailableLocales(JSThread *thread, bool enableLocaleCache) 664514f5e3Sopenharmony_ci{ 674514f5e3Sopenharmony_ci const char *key = nullptr; 684514f5e3Sopenharmony_ci const char *path = JSCollator::uIcuDataColl.c_str(); 694514f5e3Sopenharmony_ci // key and path are const, so we can cache the result 704514f5e3Sopenharmony_ci if (enableLocaleCache) { 714514f5e3Sopenharmony_ci JSHandle<JSTaggedValue> cachedLocales = thread->GlobalConstants()->GetHandledCachedJSCollatorLocales(); 724514f5e3Sopenharmony_ci if (cachedLocales->IsHeapObject()) { 734514f5e3Sopenharmony_ci return JSHandle<TaggedArray>(cachedLocales); 744514f5e3Sopenharmony_ci } 754514f5e3Sopenharmony_ci } 764514f5e3Sopenharmony_ci std::vector<std::string> availableStringLocales = intl::LocaleHelper::GetAvailableLocales(thread, key, path); 774514f5e3Sopenharmony_ci JSHandle<TaggedArray> availableLocales = JSLocale::ConstructLocaleList(thread, availableStringLocales); 784514f5e3Sopenharmony_ci if (enableLocaleCache) { 794514f5e3Sopenharmony_ci GlobalEnvConstants *constants = const_cast<GlobalEnvConstants *>(thread->GlobalConstants()); 804514f5e3Sopenharmony_ci constants->SetCachedLocales(availableLocales.GetTaggedValue()); 814514f5e3Sopenharmony_ci } 824514f5e3Sopenharmony_ci return availableLocales; 834514f5e3Sopenharmony_ci} 844514f5e3Sopenharmony_ci 854514f5e3Sopenharmony_ci/* static */ 864514f5e3Sopenharmony_civoid JSCollator::SetIcuCollator(JSThread *thread, const JSHandle<JSCollator> &collator, 874514f5e3Sopenharmony_ci icu::Collator *icuCollator, const NativePointerCallback &callback) 884514f5e3Sopenharmony_ci{ 894514f5e3Sopenharmony_ci EcmaVM *ecmaVm = thread->GetEcmaVM(); 904514f5e3Sopenharmony_ci ObjectFactory *factory = ecmaVm->GetFactory(); 914514f5e3Sopenharmony_ci 924514f5e3Sopenharmony_ci ASSERT(icuCollator != nullptr); 934514f5e3Sopenharmony_ci JSTaggedValue data = collator->GetIcuField(); 944514f5e3Sopenharmony_ci if (data.IsJSNativePointer()) { 954514f5e3Sopenharmony_ci JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject()); 964514f5e3Sopenharmony_ci native->ResetExternalPointer(thread, icuCollator); 974514f5e3Sopenharmony_ci return; 984514f5e3Sopenharmony_ci } 994514f5e3Sopenharmony_ci JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuCollator, callback); 1004514f5e3Sopenharmony_ci collator->SetIcuField(thread, pointer.GetTaggedValue()); 1014514f5e3Sopenharmony_ci} 1024514f5e3Sopenharmony_ci 1034514f5e3Sopenharmony_ciJSHandle<JSCollator> JSCollator::InitializeCollator(JSThread *thread, 1044514f5e3Sopenharmony_ci const JSHandle<JSCollator> &collator, 1054514f5e3Sopenharmony_ci const JSHandle<JSTaggedValue> &locales, 1064514f5e3Sopenharmony_ci const JSHandle<JSTaggedValue> &options, 1074514f5e3Sopenharmony_ci bool forIcuCache, 1084514f5e3Sopenharmony_ci bool enableLocaleCache) 1094514f5e3Sopenharmony_ci{ 1104514f5e3Sopenharmony_ci EcmaVM *ecmaVm = thread->GetEcmaVM(); 1114514f5e3Sopenharmony_ci ObjectFactory *factory = ecmaVm->GetFactory(); 1124514f5e3Sopenharmony_ci const GlobalEnvConstants *globalConst = thread->GlobalConstants(); 1134514f5e3Sopenharmony_ci // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales). 1144514f5e3Sopenharmony_ci JSHandle<TaggedArray> requestedLocales = intl::LocaleHelper::CanonicalizeLocaleList(thread, locales); 1154514f5e3Sopenharmony_ci RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread); 1164514f5e3Sopenharmony_ci 1174514f5e3Sopenharmony_ci // 2. If options is undefined, then 1184514f5e3Sopenharmony_ci // a. Let options be ObjectCreate(null). 1194514f5e3Sopenharmony_ci // 3. Else, 1204514f5e3Sopenharmony_ci // a. Let options be ? ToObject(options). 1214514f5e3Sopenharmony_ci JSHandle<JSObject> optionsObject; 1224514f5e3Sopenharmony_ci if (options->IsUndefined()) { 1234514f5e3Sopenharmony_ci optionsObject = factory->CreateNullJSObject(); 1244514f5e3Sopenharmony_ci } else { 1254514f5e3Sopenharmony_ci optionsObject = JSTaggedValue::ToObject(thread, options); 1264514f5e3Sopenharmony_ci RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread); 1274514f5e3Sopenharmony_ci } 1284514f5e3Sopenharmony_ci // 4. Let usage be ? GetOption(options, "usage", "string", « "sort", "search" », "sort"). 1294514f5e3Sopenharmony_ci auto usage = JSLocale::GetOptionOfString<UsageOption>(thread, optionsObject, globalConst->GetHandledUsageString(), 1304514f5e3Sopenharmony_ci JSCollator::USAGE_OPTION, JSCollator::USAGE_OPTION_NAME, 1314514f5e3Sopenharmony_ci UsageOption::SORT); 1324514f5e3Sopenharmony_ci RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread); 1334514f5e3Sopenharmony_ci collator->SetUsage(usage); 1344514f5e3Sopenharmony_ci 1354514f5e3Sopenharmony_ci // 5. Let matcher be ? GetOption(options, "localeMatcher", "string", « "lookup", "best fit" », "best fit"). 1364514f5e3Sopenharmony_ci auto matcher = JSLocale::GetOptionOfString<LocaleMatcherOption>( 1374514f5e3Sopenharmony_ci thread, optionsObject, globalConst->GetHandledLocaleMatcherString(), 1384514f5e3Sopenharmony_ci JSCollator::LOCALE_MATCHER_OPTION, JSCollator::LOCALE_MATCHER_OPTION_NAME, 1394514f5e3Sopenharmony_ci LocaleMatcherOption::BEST_FIT); 1404514f5e3Sopenharmony_ci RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread); 1414514f5e3Sopenharmony_ci 1424514f5e3Sopenharmony_ci // 6. Let collation be ? GetOption(options, "collation", "string", undefined, undefined). 1434514f5e3Sopenharmony_ci // 7. If collation is not undefined, then 1444514f5e3Sopenharmony_ci // a. If collation does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception. 1454514f5e3Sopenharmony_ci JSHandle<JSTaggedValue> collation = 1464514f5e3Sopenharmony_ci JSLocale::GetOption(thread, optionsObject, globalConst->GetHandledCollationString(), OptionType::STRING, 1474514f5e3Sopenharmony_ci globalConst->GetHandledUndefined(), globalConst->GetHandledUndefined()); 1484514f5e3Sopenharmony_ci RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread); 1494514f5e3Sopenharmony_ci collator->SetCollation(thread, collation); 1504514f5e3Sopenharmony_ci std::string collationStr; 1514514f5e3Sopenharmony_ci if (!collation->IsUndefined()) { 1524514f5e3Sopenharmony_ci JSHandle<EcmaString> collationEcmaStr = JSHandle<EcmaString>::Cast(collation); 1534514f5e3Sopenharmony_ci collationStr = intl::LocaleHelper::ConvertToStdString(collationEcmaStr); 1544514f5e3Sopenharmony_ci if (!JSLocale::IsWellAlphaNumList(collationStr)) { 1554514f5e3Sopenharmony_ci THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator); 1564514f5e3Sopenharmony_ci } 1574514f5e3Sopenharmony_ci } 1584514f5e3Sopenharmony_ci 1594514f5e3Sopenharmony_ci // 8. Let numeric be ? GetOption(options, "numeric", "boolean", undefined, undefined). 1604514f5e3Sopenharmony_ci bool numeric = false; 1614514f5e3Sopenharmony_ci bool foundNumeric = 1624514f5e3Sopenharmony_ci JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledNumericString(), false, &numeric); 1634514f5e3Sopenharmony_ci RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread); 1644514f5e3Sopenharmony_ci collator->SetNumeric(numeric); 1654514f5e3Sopenharmony_ci 1664514f5e3Sopenharmony_ci // 14. Let caseFirst be ? GetOption(options, "caseFirst", "string", « "upper", "lower", "false" », undefined). 1674514f5e3Sopenharmony_ci CaseFirstOption caseFirst = JSLocale::GetOptionOfString<CaseFirstOption>( 1684514f5e3Sopenharmony_ci thread, optionsObject, globalConst->GetHandledCaseFirstString(), 1694514f5e3Sopenharmony_ci JSCollator::CASE_FIRST_OPTION, JSCollator::CASE_FIRST_OPTION_NAME, 1704514f5e3Sopenharmony_ci CaseFirstOption::UNDEFINED); 1714514f5e3Sopenharmony_ci RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread); 1724514f5e3Sopenharmony_ci collator->SetCaseFirst(caseFirst); 1734514f5e3Sopenharmony_ci 1744514f5e3Sopenharmony_ci // 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]]. 1754514f5e3Sopenharmony_ci 1764514f5e3Sopenharmony_ci // 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]], requestedLocales, opt, 1774514f5e3Sopenharmony_ci // %Collator%.[[RelevantExtensionKeys]], localeData). 1784514f5e3Sopenharmony_ci JSHandle<TaggedArray> availableLocales; 1794514f5e3Sopenharmony_ci if (requestedLocales->GetLength() == 0) { 1804514f5e3Sopenharmony_ci availableLocales = factory->EmptyArray(); 1814514f5e3Sopenharmony_ci } else { 1824514f5e3Sopenharmony_ci availableLocales = GetAvailableLocales(thread, enableLocaleCache); 1834514f5e3Sopenharmony_ci } 1844514f5e3Sopenharmony_ci ResolvedLocale r = 1854514f5e3Sopenharmony_ci JSLocale::ResolveLocale(thread, availableLocales, requestedLocales, matcher, RELEVANT_EXTENSION_KEYS); 1864514f5e3Sopenharmony_ci RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread); 1874514f5e3Sopenharmony_ci icu::Locale icuLocale = r.localeData; 1884514f5e3Sopenharmony_ci JSHandle<EcmaString> localeStr = intl::LocaleHelper::ToLanguageTag(thread, icuLocale); 1894514f5e3Sopenharmony_ci RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread); 1904514f5e3Sopenharmony_ci collator->SetLocale(thread, localeStr.GetTaggedValue()); 1914514f5e3Sopenharmony_ci ASSERT_PRINT(!icuLocale.isBogus(), "icuLocale is bogus"); 1924514f5e3Sopenharmony_ci 1934514f5e3Sopenharmony_ci // If collation is undefined iterate RelevantExtensionKeys to find "co" 1944514f5e3Sopenharmony_ci // if found, set ICU collator UnicodeKeyword to iterator->second 1954514f5e3Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 1964514f5e3Sopenharmony_ci if (!collation->IsUndefined()) { 1974514f5e3Sopenharmony_ci auto extensionIter = r.extensions.find("co"); 1984514f5e3Sopenharmony_ci if (extensionIter != r.extensions.end() && extensionIter->second != collationStr) { 1994514f5e3Sopenharmony_ci icuLocale.setUnicodeKeywordValue("co", nullptr, status); 2004514f5e3Sopenharmony_ci ASSERT_PRINT(U_SUCCESS(status), "icuLocale set co failed"); 2014514f5e3Sopenharmony_ci } 2024514f5e3Sopenharmony_ci } 2034514f5e3Sopenharmony_ci 2044514f5e3Sopenharmony_ci // If usage is serach set co-serach to icu locale key word value 2054514f5e3Sopenharmony_ci // Eles set collation string to icu locale key word value 2064514f5e3Sopenharmony_ci if (usage == UsageOption::SEARCH) { 2074514f5e3Sopenharmony_ci icuLocale.setUnicodeKeywordValue("co", "search", status); 2084514f5e3Sopenharmony_ci ASSERT(U_SUCCESS(status)); 2094514f5e3Sopenharmony_ci } else { 2104514f5e3Sopenharmony_ci if (!collationStr.empty() && JSLocale::IsWellCollation(icuLocale, collationStr)) { 2114514f5e3Sopenharmony_ci icuLocale.setUnicodeKeywordValue("co", collationStr, status); 2124514f5e3Sopenharmony_ci ASSERT(U_SUCCESS(status)); 2134514f5e3Sopenharmony_ci } 2144514f5e3Sopenharmony_ci } 2154514f5e3Sopenharmony_ci 2164514f5e3Sopenharmony_ci std::unique_ptr<icu::Collator> icuCollator(icu::Collator::createInstance(icuLocale, status)); 2174514f5e3Sopenharmony_ci if (U_FAILURE(status) || icuCollator == nullptr) { // NOLINT(readability-implicit-bool-conversion) 2184514f5e3Sopenharmony_ci if (status == UErrorCode::U_MISSING_RESOURCE_ERROR) { 2194514f5e3Sopenharmony_ci THROW_REFERENCE_ERROR_AND_RETURN(thread, "can not find icu data resources", collator); 2204514f5e3Sopenharmony_ci } 2214514f5e3Sopenharmony_ci status = U_ZERO_ERROR; 2224514f5e3Sopenharmony_ci icu::Locale localeName(icuLocale.getBaseName()); 2234514f5e3Sopenharmony_ci icuCollator.reset(icu::Collator::createInstance(localeName, status)); 2244514f5e3Sopenharmony_ci if (U_FAILURE(status) || icuCollator == nullptr) { // NOLINT(readability-implicit-bool-conversion) 2254514f5e3Sopenharmony_ci THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator); 2264514f5e3Sopenharmony_ci } 2274514f5e3Sopenharmony_ci } 2284514f5e3Sopenharmony_ci ASSERT(U_SUCCESS(status)); 2294514f5e3Sopenharmony_ci icu::Locale collatorLocale(icuCollator->getLocale(ULOC_VALID_LOCALE, status)); 2304514f5e3Sopenharmony_ci 2314514f5e3Sopenharmony_ci icuCollator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 2324514f5e3Sopenharmony_ci ASSERT(U_SUCCESS(status)); 2334514f5e3Sopenharmony_ci 2344514f5e3Sopenharmony_ci // If numeric is found set ICU collator UCOL_NUMERIC_COLLATION to numeric 2354514f5e3Sopenharmony_ci // Else iterate RelevantExtensionKeys to find "kn" 2364514f5e3Sopenharmony_ci // if found, set ICU collator UCOL_NUMERIC_COLLATION to iterator->second 2374514f5e3Sopenharmony_ci status = U_ZERO_ERROR; 2384514f5e3Sopenharmony_ci if (foundNumeric) { 2394514f5e3Sopenharmony_ci ASSERT(icuCollator.get() != nullptr); 2404514f5e3Sopenharmony_ci icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF, status); 2414514f5e3Sopenharmony_ci ASSERT(U_SUCCESS(status)); 2424514f5e3Sopenharmony_ci } else { 2434514f5e3Sopenharmony_ci auto extensionIter = r.extensions.find("kn"); 2444514f5e3Sopenharmony_ci if (extensionIter != r.extensions.end()) { 2454514f5e3Sopenharmony_ci ASSERT(icuCollator.get() != nullptr); 2464514f5e3Sopenharmony_ci bool found = (extensionIter->second == "true"); 2474514f5e3Sopenharmony_ci collator->SetNumeric(found); 2484514f5e3Sopenharmony_ci icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, found ? UCOL_ON : UCOL_OFF, status); 2494514f5e3Sopenharmony_ci ASSERT(U_SUCCESS(status)); 2504514f5e3Sopenharmony_ci } 2514514f5e3Sopenharmony_ci } 2524514f5e3Sopenharmony_ci 2534514f5e3Sopenharmony_ci // If caseFirst is not undefined set ICU collator UColAttributeValue to caseFirst 2544514f5e3Sopenharmony_ci // Else iterate RelevantExtensionKeys to find "kf" 2554514f5e3Sopenharmony_ci // if found, set ICU collator UColAttributeValue to iterator->second 2564514f5e3Sopenharmony_ci status = U_ZERO_ERROR; 2574514f5e3Sopenharmony_ci if (caseFirst != CaseFirstOption::UNDEFINED) { 2584514f5e3Sopenharmony_ci ASSERT(icuCollator.get() != nullptr); 2594514f5e3Sopenharmony_ci icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(caseFirst), status); 2604514f5e3Sopenharmony_ci ASSERT(U_SUCCESS(status)); 2614514f5e3Sopenharmony_ci } else { 2624514f5e3Sopenharmony_ci auto extensionIter = r.extensions.find("kf"); 2634514f5e3Sopenharmony_ci if (extensionIter != r.extensions.end()) { 2644514f5e3Sopenharmony_ci ASSERT(icuCollator.get() != nullptr); 2654514f5e3Sopenharmony_ci auto mapIter = caseFirstMap.find(extensionIter->second); 2664514f5e3Sopenharmony_ci if (mapIter != caseFirstMap.end()) { 2674514f5e3Sopenharmony_ci icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(mapIter->second), status); 2684514f5e3Sopenharmony_ci collator->SetCaseFirst(mapIter->second); 2694514f5e3Sopenharmony_ci } else { 2704514f5e3Sopenharmony_ci icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(CaseFirstOption::UNDEFINED), 2714514f5e3Sopenharmony_ci status); 2724514f5e3Sopenharmony_ci } 2734514f5e3Sopenharmony_ci ASSERT(U_SUCCESS(status)); 2744514f5e3Sopenharmony_ci } 2754514f5e3Sopenharmony_ci } 2764514f5e3Sopenharmony_ci 2774514f5e3Sopenharmony_ci // 24. Let sensitivity be ? GetOption(options, "sensitivity", "string", « "base", "accent", "case", "variant" », 2784514f5e3Sopenharmony_ci // undefined). 2794514f5e3Sopenharmony_ci SensitivityOption sensitivity = JSLocale::GetOptionOfString<SensitivityOption>( 2804514f5e3Sopenharmony_ci thread, optionsObject, globalConst->GetHandledSensitivityString(), 2814514f5e3Sopenharmony_ci JSCollator::SENSITIVITY_OPTION, JSCollator::SENSITIVITY_OPTION_NAME, 2824514f5e3Sopenharmony_ci SensitivityOption::UNDEFINED); 2834514f5e3Sopenharmony_ci RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread); 2844514f5e3Sopenharmony_ci // 25. If sensitivity is undefined, then 2854514f5e3Sopenharmony_ci // a. If usage is "sort", then 2864514f5e3Sopenharmony_ci // i. Let sensitivity be "variant". 2874514f5e3Sopenharmony_ci if (sensitivity == SensitivityOption::UNDEFINED) { 2884514f5e3Sopenharmony_ci if (usage == UsageOption::SORT) { 2894514f5e3Sopenharmony_ci sensitivity = SensitivityOption::VARIANT; 2904514f5e3Sopenharmony_ci } 2914514f5e3Sopenharmony_ci } 2924514f5e3Sopenharmony_ci collator->SetSensitivity(sensitivity); 2934514f5e3Sopenharmony_ci 2944514f5e3Sopenharmony_ci // Trans SensitivityOption to Icu strength option 2954514f5e3Sopenharmony_ci switch (sensitivity) { 2964514f5e3Sopenharmony_ci case SensitivityOption::BASE: 2974514f5e3Sopenharmony_ci icuCollator->setStrength(icu::Collator::PRIMARY); 2984514f5e3Sopenharmony_ci break; 2994514f5e3Sopenharmony_ci case SensitivityOption::ACCENT: 3004514f5e3Sopenharmony_ci icuCollator->setStrength(icu::Collator::SECONDARY); 3014514f5e3Sopenharmony_ci break; 3024514f5e3Sopenharmony_ci case SensitivityOption::CASE: 3034514f5e3Sopenharmony_ci icuCollator->setStrength(icu::Collator::PRIMARY); 3044514f5e3Sopenharmony_ci icuCollator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status); 3054514f5e3Sopenharmony_ci break; 3064514f5e3Sopenharmony_ci case SensitivityOption::VARIANT: 3074514f5e3Sopenharmony_ci icuCollator->setStrength(icu::Collator::TERTIARY); 3084514f5e3Sopenharmony_ci break; 3094514f5e3Sopenharmony_ci case SensitivityOption::UNDEFINED: 3104514f5e3Sopenharmony_ci break; 3114514f5e3Sopenharmony_ci case SensitivityOption::EXCEPTION: 3124514f5e3Sopenharmony_ci LOG_ECMA(FATAL) << "this branch is unreachable"; 3134514f5e3Sopenharmony_ci UNREACHABLE(); 3144514f5e3Sopenharmony_ci } 3154514f5e3Sopenharmony_ci 3164514f5e3Sopenharmony_ci // 27. Let ignorePunctuation be ? GetOption(options, "ignorePunctuation", "boolean", undefined, false). 3174514f5e3Sopenharmony_ci // 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation. 3184514f5e3Sopenharmony_ci bool ignorePunctuation = false; 3194514f5e3Sopenharmony_ci bool defaultIgnorePunctuation = false; 3204514f5e3Sopenharmony_ci // If the ignorePunctuation is not defined, which in "th" locale that is true but false on other locales. 3214514f5e3Sopenharmony_ci JSHandle<EcmaString> thKey = factory->NewFromUtf8("th"); 3224514f5e3Sopenharmony_ci if (JSTaggedValue::Equal(thread, JSHandle<JSTaggedValue>::Cast(thKey), locales)) { 3234514f5e3Sopenharmony_ci defaultIgnorePunctuation = true; 3244514f5e3Sopenharmony_ci } 3254514f5e3Sopenharmony_ci JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledIgnorePunctuationString(), 3264514f5e3Sopenharmony_ci defaultIgnorePunctuation, &ignorePunctuation); 3274514f5e3Sopenharmony_ci collator->SetIgnorePunctuation(ignorePunctuation); 3284514f5e3Sopenharmony_ci if (ignorePunctuation) { 3294514f5e3Sopenharmony_ci status = U_ZERO_ERROR; 3304514f5e3Sopenharmony_ci icuCollator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); 3314514f5e3Sopenharmony_ci ASSERT(U_SUCCESS(status)); 3324514f5e3Sopenharmony_ci } 3334514f5e3Sopenharmony_ci 3344514f5e3Sopenharmony_ci if (forIcuCache) { 3354514f5e3Sopenharmony_ci std::string cacheEntry = 3364514f5e3Sopenharmony_ci locales->IsUndefined() ? "" : EcmaStringAccessor(locales.GetTaggedValue()).ToStdString(); 3374514f5e3Sopenharmony_ci thread->GetCurrentEcmaContext()->SetIcuFormatterToCache(IcuFormatterType::COLLATOR, 3384514f5e3Sopenharmony_ci cacheEntry, icuCollator.release(), JSCollator::FreeIcuCollator); 3394514f5e3Sopenharmony_ci } else { 3404514f5e3Sopenharmony_ci SetIcuCollator(thread, collator, icuCollator.release(), JSCollator::FreeIcuCollator); 3414514f5e3Sopenharmony_ci } 3424514f5e3Sopenharmony_ci collator->SetBoundCompare(thread, JSTaggedValue::Undefined()); 3434514f5e3Sopenharmony_ci // 29. Return collator. 3444514f5e3Sopenharmony_ci return collator; 3454514f5e3Sopenharmony_ci} 3464514f5e3Sopenharmony_ci 3474514f5e3Sopenharmony_ciicu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSTaggedValue &locales) 3484514f5e3Sopenharmony_ci{ 3494514f5e3Sopenharmony_ci std::string cacheEntry = locales.IsUndefined() ? "" : EcmaStringAccessor(locales).ToStdString(); 3504514f5e3Sopenharmony_ci void *cachedCollator = 3514514f5e3Sopenharmony_ci thread->GetCurrentEcmaContext()->GetIcuFormatterFromCache(IcuFormatterType::COLLATOR, cacheEntry); 3524514f5e3Sopenharmony_ci if (cachedCollator != nullptr) { 3534514f5e3Sopenharmony_ci return reinterpret_cast<icu::Collator*>(cachedCollator); 3544514f5e3Sopenharmony_ci } 3554514f5e3Sopenharmony_ci return nullptr; 3564514f5e3Sopenharmony_ci} 3574514f5e3Sopenharmony_ci 3584514f5e3Sopenharmony_ciicu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSHandle<JSTaggedValue> &locales) 3594514f5e3Sopenharmony_ci{ 3604514f5e3Sopenharmony_ci return GetCachedIcuCollator(thread, locales.GetTaggedValue()); 3614514f5e3Sopenharmony_ci} 3624514f5e3Sopenharmony_ci 3634514f5e3Sopenharmony_ciUColAttributeValue JSCollator::OptionToUColAttribute(CaseFirstOption caseFirstOption) 3644514f5e3Sopenharmony_ci{ 3654514f5e3Sopenharmony_ci auto iter = uColAttributeValueMap.find(caseFirstOption); 3664514f5e3Sopenharmony_ci if (iter != uColAttributeValueMap.end()) { 3674514f5e3Sopenharmony_ci return iter->second; 3684514f5e3Sopenharmony_ci } 3694514f5e3Sopenharmony_ci LOG_ECMA(FATAL) << "this branch is unreachable"; 3704514f5e3Sopenharmony_ci UNREACHABLE(); 3714514f5e3Sopenharmony_ci} 3724514f5e3Sopenharmony_ci 3734514f5e3Sopenharmony_ciJSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, UsageOption usage) 3744514f5e3Sopenharmony_ci{ 3754514f5e3Sopenharmony_ci JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined()); 3764514f5e3Sopenharmony_ci auto globalConst = thread->GlobalConstants(); 3774514f5e3Sopenharmony_ci switch (usage) { 3784514f5e3Sopenharmony_ci case UsageOption::SORT: 3794514f5e3Sopenharmony_ci result.Update(globalConst->GetSortString()); 3804514f5e3Sopenharmony_ci break; 3814514f5e3Sopenharmony_ci case UsageOption::SEARCH: 3824514f5e3Sopenharmony_ci result.Update(globalConst->GetSearchString()); 3834514f5e3Sopenharmony_ci break; 3844514f5e3Sopenharmony_ci default: 3854514f5e3Sopenharmony_ci LOG_ECMA(FATAL) << "this branch is unreachable"; 3864514f5e3Sopenharmony_ci UNREACHABLE(); 3874514f5e3Sopenharmony_ci } 3884514f5e3Sopenharmony_ci return result; 3894514f5e3Sopenharmony_ci} 3904514f5e3Sopenharmony_ci 3914514f5e3Sopenharmony_ciJSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, SensitivityOption sensitivity) 3924514f5e3Sopenharmony_ci{ 3934514f5e3Sopenharmony_ci JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined()); 3944514f5e3Sopenharmony_ci auto globalConst = thread->GlobalConstants(); 3954514f5e3Sopenharmony_ci switch (sensitivity) { 3964514f5e3Sopenharmony_ci case SensitivityOption::BASE: 3974514f5e3Sopenharmony_ci result.Update(globalConst->GetBaseString()); 3984514f5e3Sopenharmony_ci break; 3994514f5e3Sopenharmony_ci case SensitivityOption::ACCENT: 4004514f5e3Sopenharmony_ci result.Update(globalConst->GetAccentString()); 4014514f5e3Sopenharmony_ci break; 4024514f5e3Sopenharmony_ci case SensitivityOption::CASE: 4034514f5e3Sopenharmony_ci result.Update(globalConst->GetCaseString()); 4044514f5e3Sopenharmony_ci break; 4054514f5e3Sopenharmony_ci case SensitivityOption::VARIANT: 4064514f5e3Sopenharmony_ci result.Update(globalConst->GetVariantString()); 4074514f5e3Sopenharmony_ci break; 4084514f5e3Sopenharmony_ci case SensitivityOption::UNDEFINED: 4094514f5e3Sopenharmony_ci break; 4104514f5e3Sopenharmony_ci default: 4114514f5e3Sopenharmony_ci LOG_ECMA(FATAL) << "this branch is unreachable"; 4124514f5e3Sopenharmony_ci UNREACHABLE(); 4134514f5e3Sopenharmony_ci } 4144514f5e3Sopenharmony_ci return result; 4154514f5e3Sopenharmony_ci} 4164514f5e3Sopenharmony_ci 4174514f5e3Sopenharmony_ciJSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, CaseFirstOption caseFirst) 4184514f5e3Sopenharmony_ci{ 4194514f5e3Sopenharmony_ci JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined()); 4204514f5e3Sopenharmony_ci auto globalConst = thread->GlobalConstants(); 4214514f5e3Sopenharmony_ci switch (caseFirst) { 4224514f5e3Sopenharmony_ci case CaseFirstOption::UPPER: 4234514f5e3Sopenharmony_ci result.Update(globalConst->GetUpperString()); 4244514f5e3Sopenharmony_ci break; 4254514f5e3Sopenharmony_ci case CaseFirstOption::LOWER: 4264514f5e3Sopenharmony_ci result.Update(globalConst->GetLowerString()); 4274514f5e3Sopenharmony_ci break; 4284514f5e3Sopenharmony_ci case CaseFirstOption::FALSE_OPTION: 4294514f5e3Sopenharmony_ci result.Update(globalConst->GetFalseString()); 4304514f5e3Sopenharmony_ci break; 4314514f5e3Sopenharmony_ci case CaseFirstOption::UNDEFINED: 4324514f5e3Sopenharmony_ci result.Update(globalConst->GetUpperString()); 4334514f5e3Sopenharmony_ci break; 4344514f5e3Sopenharmony_ci default: 4354514f5e3Sopenharmony_ci LOG_ECMA(FATAL) << "this branch is unreachable"; 4364514f5e3Sopenharmony_ci UNREACHABLE(); 4374514f5e3Sopenharmony_ci } 4384514f5e3Sopenharmony_ci return result; 4394514f5e3Sopenharmony_ci} 4404514f5e3Sopenharmony_ci 4414514f5e3Sopenharmony_ci// 11.3.4 Intl.Collator.prototype.resolvedOptions () 4424514f5e3Sopenharmony_ciJSHandle<JSObject> JSCollator::ResolvedOptions(JSThread *thread, const JSHandle<JSCollator> &collator) 4434514f5e3Sopenharmony_ci{ 4444514f5e3Sopenharmony_ci auto ecmaVm = thread->GetEcmaVM(); 4454514f5e3Sopenharmony_ci auto globalConst = thread->GlobalConstants(); 4464514f5e3Sopenharmony_ci ObjectFactory *factory = ecmaVm->GetFactory(); 4474514f5e3Sopenharmony_ci JSHandle<GlobalEnv> env = ecmaVm->GetGlobalEnv(); 4484514f5e3Sopenharmony_ci JSHandle<JSFunction> funCtor(env->GetObjectFunction()); 4494514f5e3Sopenharmony_ci JSHandle<JSObject> options(factory->NewJSObjectByConstructor(funCtor)); 4504514f5e3Sopenharmony_ci 4514514f5e3Sopenharmony_ci // [[Locale]] 4524514f5e3Sopenharmony_ci JSHandle<JSTaggedValue> property = globalConst->GetHandledLocaleString(); 4534514f5e3Sopenharmony_ci JSHandle<JSTaggedValue> locale(thread, collator->GetLocale()); 4544514f5e3Sopenharmony_ci JSObject::CreateDataPropertyOrThrow(thread, options, property, locale); 4554514f5e3Sopenharmony_ci RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSObject, thread); 4564514f5e3Sopenharmony_ci 4574514f5e3Sopenharmony_ci // [[Usage]] 4584514f5e3Sopenharmony_ci UsageOption usageOption = collator->GetUsage(); 4594514f5e3Sopenharmony_ci JSHandle<JSTaggedValue> usageValue = OptionsToEcmaString(thread, usageOption); 4604514f5e3Sopenharmony_ci JSObject::CreateDataProperty(thread, options, globalConst->GetHandledUsageString(), usageValue); 4614514f5e3Sopenharmony_ci 4624514f5e3Sopenharmony_ci // [[Sensitivity]] 4634514f5e3Sopenharmony_ci auto sentivityOption = collator->GetSensitivity(); 4644514f5e3Sopenharmony_ci JSHandle<JSTaggedValue> sensitivityValue = OptionsToEcmaString(thread, sentivityOption); 4654514f5e3Sopenharmony_ci JSObject::CreateDataProperty(thread, options, globalConst->GetHandledSensitivityString(), sensitivityValue); 4664514f5e3Sopenharmony_ci 4674514f5e3Sopenharmony_ci // [[IgnorePunctuation]] 4684514f5e3Sopenharmony_ci JSHandle<JSTaggedValue> ignorePunctuationValue(thread, JSTaggedValue(collator->GetIgnorePunctuation())); 4694514f5e3Sopenharmony_ci JSObject::CreateDataProperty(thread, options, globalConst->GetHandledIgnorePunctuationString(), 4704514f5e3Sopenharmony_ci ignorePunctuationValue); 4714514f5e3Sopenharmony_ci 4724514f5e3Sopenharmony_ci // [[Collation]] 4734514f5e3Sopenharmony_ci JSMutableHandle<JSTaggedValue> collationValue(thread, collator->GetCollation()); 4744514f5e3Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 4754514f5e3Sopenharmony_ci icu::Collator *icuCollator = collator->GetIcuCollator(); 4764514f5e3Sopenharmony_ci icu::Locale icu_locale(icuCollator->getLocale(ULOC_VALID_LOCALE, status)); 4774514f5e3Sopenharmony_ci std::string collation_value = 4784514f5e3Sopenharmony_ci icu_locale.getUnicodeKeywordValue<std::string>("co", status); 4794514f5e3Sopenharmony_ci if (collationValue->IsUndefined()) { 4804514f5e3Sopenharmony_ci if (collation_value != "search" && collation_value != "") { 4814514f5e3Sopenharmony_ci collationValue.Update(factory->NewFromStdString(collation_value).GetTaggedValue()); 4824514f5e3Sopenharmony_ci } else { 4834514f5e3Sopenharmony_ci collationValue.Update(globalConst->GetDefaultString()); 4844514f5e3Sopenharmony_ci } 4854514f5e3Sopenharmony_ci } 4864514f5e3Sopenharmony_ci JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCollationString(), collationValue); 4874514f5e3Sopenharmony_ci 4884514f5e3Sopenharmony_ci // [[Numeric]] 4894514f5e3Sopenharmony_ci JSHandle<JSTaggedValue> numericValue(thread, JSTaggedValue(collator->GetNumeric())); 4904514f5e3Sopenharmony_ci JSObject::CreateDataProperty(thread, options, globalConst->GetHandledNumericString(), numericValue); 4914514f5e3Sopenharmony_ci 4924514f5e3Sopenharmony_ci // [[CaseFirst]] 4934514f5e3Sopenharmony_ci CaseFirstOption caseFirstOption = collator->GetCaseFirst(); 4944514f5e3Sopenharmony_ci // In Ecma402 spec, caseFirst is an optional property so we set it to Upper when input is undefined 4954514f5e3Sopenharmony_ci // the requirement maybe change in the future 4964514f5e3Sopenharmony_ci JSHandle<JSTaggedValue> caseFirstValue = OptionsToEcmaString(thread, caseFirstOption); 4974514f5e3Sopenharmony_ci JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCaseFirstString(), caseFirstValue); 4984514f5e3Sopenharmony_ci return options; 4994514f5e3Sopenharmony_ci} 5004514f5e3Sopenharmony_ci 5014514f5e3Sopenharmony_ciCompareStringsOption JSCollator::CompareStringsOptionFor(JSThread* thread, 5024514f5e3Sopenharmony_ci JSHandle<JSTaggedValue> locales) 5034514f5e3Sopenharmony_ci{ 5044514f5e3Sopenharmony_ci if (locales->IsUndefined()) { 5054514f5e3Sopenharmony_ci auto context = thread->GetCurrentEcmaContext(); 5064514f5e3Sopenharmony_ci auto defaultCompareOption = context->GetDefaultCompareStringsOption(); 5074514f5e3Sopenharmony_ci if (defaultCompareOption.has_value()) { 5084514f5e3Sopenharmony_ci return defaultCompareOption.value(); 5094514f5e3Sopenharmony_ci } 5104514f5e3Sopenharmony_ci auto defaultLocale = intl::LocaleHelper::StdStringDefaultLocale(thread); 5114514f5e3Sopenharmony_ci for (const char *fastLocale : FAST_LOCALE) { 5124514f5e3Sopenharmony_ci if (strcmp(fastLocale, defaultLocale.c_str()) == 0) { 5134514f5e3Sopenharmony_ci context->SetDefaultCompareStringsOption(CompareStringsOption::TRY_FAST_PATH); 5144514f5e3Sopenharmony_ci return CompareStringsOption::TRY_FAST_PATH; 5154514f5e3Sopenharmony_ci } 5164514f5e3Sopenharmony_ci } 5174514f5e3Sopenharmony_ci context->SetDefaultCompareStringsOption(CompareStringsOption::NONE); 5184514f5e3Sopenharmony_ci return CompareStringsOption::NONE; 5194514f5e3Sopenharmony_ci } 5204514f5e3Sopenharmony_ci 5214514f5e3Sopenharmony_ci if (!locales->IsString()) { 5224514f5e3Sopenharmony_ci return CompareStringsOption::NONE; 5234514f5e3Sopenharmony_ci } 5244514f5e3Sopenharmony_ci 5254514f5e3Sopenharmony_ci JSHandle<EcmaString> localesString = JSHandle<EcmaString>::Cast(locales); 5264514f5e3Sopenharmony_ci CString localesStr = ConvertToString(*localesString, StringConvertedUsage::LOGICOPERATION); 5274514f5e3Sopenharmony_ci for (const char *fastLocale : FAST_LOCALE) { 5284514f5e3Sopenharmony_ci if (strcmp(fastLocale, localesStr.c_str()) == 0) { 5294514f5e3Sopenharmony_ci return CompareStringsOption::TRY_FAST_PATH; 5304514f5e3Sopenharmony_ci } 5314514f5e3Sopenharmony_ci } 5324514f5e3Sopenharmony_ci 5334514f5e3Sopenharmony_ci return CompareStringsOption::NONE; 5344514f5e3Sopenharmony_ci} 5354514f5e3Sopenharmony_ci 5364514f5e3Sopenharmony_ciCompareStringsOption JSCollator::CompareStringsOptionFor(JSThread* thread, 5374514f5e3Sopenharmony_ci JSHandle<JSTaggedValue> locales, 5384514f5e3Sopenharmony_ci JSHandle<JSTaggedValue> options) 5394514f5e3Sopenharmony_ci{ 5404514f5e3Sopenharmony_ci if (!options->IsUndefined()) { 5414514f5e3Sopenharmony_ci return CompareStringsOption::NONE; 5424514f5e3Sopenharmony_ci } 5434514f5e3Sopenharmony_ci return CompareStringsOptionFor(thread, locales); 5444514f5e3Sopenharmony_ci} 5454514f5e3Sopenharmony_ci 5464514f5e3Sopenharmony_ci// Anonymous namespace for ComapreStrings 5474514f5e3Sopenharmony_cinamespace { 5484514f5e3Sopenharmony_ciconstexpr uint8_t COLLATION_WEIGHT_L1[256] = { 5494514f5e3Sopenharmony_ci 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 5504514f5e3Sopenharmony_ci 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 12, 16, 28, 38, 29, 27, 15, 5514514f5e3Sopenharmony_ci 17, 18, 24, 32, 9, 8, 14, 25, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 11, 10, 5524514f5e3Sopenharmony_ci 33, 34, 35, 13, 23, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 5534514f5e3Sopenharmony_ci 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 19, 26, 20, 31, 7, 30, 49, 50, 51, 5544514f5e3Sopenharmony_ci 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 5554514f5e3Sopenharmony_ci 72, 73, 74, 21, 36, 22, 37, 0, 5564514f5e3Sopenharmony_ci}; 5574514f5e3Sopenharmony_ciconstexpr uint8_t COLLATION_WEIGHT_L3[256] = { 5584514f5e3Sopenharmony_ci 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 5594514f5e3Sopenharmony_ci 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 5604514f5e3Sopenharmony_ci 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5614514f5e3Sopenharmony_ci 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5624514f5e3Sopenharmony_ci 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5634514f5e3Sopenharmony_ci 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5644514f5e3Sopenharmony_ci 1, 1, 1, 1, 1, 1, 1, 0, 5654514f5e3Sopenharmony_ci}; 5664514f5e3Sopenharmony_ciconstexpr int COLLATION_WEIGHT_LENGTH = sizeof(COLLATION_WEIGHT_L1) / sizeof(COLLATION_WEIGHT_L1[0]); 5674514f5e3Sopenharmony_ci 5684514f5e3Sopenharmony_ciconstexpr UCollationResult ToUCollationResult(int delta) 5694514f5e3Sopenharmony_ci{ 5704514f5e3Sopenharmony_ci return delta < 0 ? UCollationResult::UCOL_LESS 5714514f5e3Sopenharmony_ci : (delta > 0 ? UCollationResult::UCOL_GREATER 5724514f5e3Sopenharmony_ci : UCollationResult::UCOL_EQUAL); 5734514f5e3Sopenharmony_ci} 5744514f5e3Sopenharmony_ci 5754514f5e3Sopenharmony_cistruct FastCompareStringsData { 5764514f5e3Sopenharmony_ci UCollationResult l1Result = UCollationResult::UCOL_EQUAL; 5774514f5e3Sopenharmony_ci UCollationResult l3Result = UCollationResult::UCOL_EQUAL; 5784514f5e3Sopenharmony_ci int processedUntil = 0; 5794514f5e3Sopenharmony_ci int firstDiffAt = 0; // The first relevant diff (L1 if exists, else L3). 5804514f5e3Sopenharmony_ci bool hasDiff = false; 5814514f5e3Sopenharmony_ci 5824514f5e3Sopenharmony_ci std::optional<UCollationResult> FastCompareFailed(int& processedUntilOut) const 5834514f5e3Sopenharmony_ci { 5844514f5e3Sopenharmony_ci if (hasDiff) { 5854514f5e3Sopenharmony_ci // Found some difference, continue there to ensure the generic algorithm picks it up. 5864514f5e3Sopenharmony_ci processedUntilOut = firstDiffAt; 5874514f5e3Sopenharmony_ci } else { 5884514f5e3Sopenharmony_ci // No difference found, reprocess the last processed character since it may be 5894514f5e3Sopenharmony_ci // followed by a unicode combining character. 5904514f5e3Sopenharmony_ci processedUntilOut = std::max(processedUntil - 1, 0); 5914514f5e3Sopenharmony_ci } 5924514f5e3Sopenharmony_ci return {}; 5934514f5e3Sopenharmony_ci } 5944514f5e3Sopenharmony_ci}; 5954514f5e3Sopenharmony_ci 5964514f5e3Sopenharmony_citemplate <class T> 5974514f5e3Sopenharmony_ciconstexpr bool CanFastCompare(T ch) 5984514f5e3Sopenharmony_ci{ 5994514f5e3Sopenharmony_ci return ch < COLLATION_WEIGHT_LENGTH && COLLATION_WEIGHT_L1[ch] != 0; 6004514f5e3Sopenharmony_ci} 6014514f5e3Sopenharmony_ci 6024514f5e3Sopenharmony_ci// Check canFastCompare, L1 weight, and L3 weight together. 6034514f5e3Sopenharmony_ci// Use FastCompareStringsData to store these results. 6044514f5e3Sopenharmony_citemplate <class T1, class T2> 6054514f5e3Sopenharmony_cibool FastCompareFlatString(const T1* lhs, const T2* rhs, int length, FastCompareStringsData& fastCompareData) 6064514f5e3Sopenharmony_ci{ 6074514f5e3Sopenharmony_ci for (int i = 0; i < length; i++) { 6084514f5e3Sopenharmony_ci const T1 l = lhs[i]; 6094514f5e3Sopenharmony_ci const T2 r = rhs[i]; 6104514f5e3Sopenharmony_ci if (!CanFastCompare(l) || !CanFastCompare(r)) { 6114514f5e3Sopenharmony_ci fastCompareData.processedUntil = i; 6124514f5e3Sopenharmony_ci return false; 6134514f5e3Sopenharmony_ci } 6144514f5e3Sopenharmony_ci auto l1Result = ToUCollationResult(COLLATION_WEIGHT_L1[l] - COLLATION_WEIGHT_L1[r]); 6154514f5e3Sopenharmony_ci if (l1Result != UCollationResult::UCOL_EQUAL) { 6164514f5e3Sopenharmony_ci fastCompareData.hasDiff = true; 6174514f5e3Sopenharmony_ci fastCompareData.firstDiffAt = i; 6184514f5e3Sopenharmony_ci fastCompareData.processedUntil = i; 6194514f5e3Sopenharmony_ci fastCompareData.l1Result = l1Result; 6204514f5e3Sopenharmony_ci return true; 6214514f5e3Sopenharmony_ci } 6224514f5e3Sopenharmony_ci if (l != r && fastCompareData.l3Result == UCollationResult::UCOL_EQUAL) { 6234514f5e3Sopenharmony_ci auto l3Result = ToUCollationResult(COLLATION_WEIGHT_L3[l] - COLLATION_WEIGHT_L3[r]); 6244514f5e3Sopenharmony_ci fastCompareData.l3Result = l3Result; 6254514f5e3Sopenharmony_ci if (!fastCompareData.hasDiff) { 6264514f5e3Sopenharmony_ci fastCompareData.hasDiff = true; 6274514f5e3Sopenharmony_ci fastCompareData.firstDiffAt = i; 6284514f5e3Sopenharmony_ci } 6294514f5e3Sopenharmony_ci } 6304514f5e3Sopenharmony_ci } 6314514f5e3Sopenharmony_ci fastCompareData.processedUntil = length; 6324514f5e3Sopenharmony_ci return true; 6334514f5e3Sopenharmony_ci} 6344514f5e3Sopenharmony_ci 6354514f5e3Sopenharmony_cibool FastCompareStringFlatContent(EcmaString* string1, EcmaString* string2, 6364514f5e3Sopenharmony_ci int length, FastCompareStringsData& fastCompareData) 6374514f5e3Sopenharmony_ci{ 6384514f5e3Sopenharmony_ci EcmaStringAccessor string1Acc(string1); 6394514f5e3Sopenharmony_ci EcmaStringAccessor string2Acc(string2); 6404514f5e3Sopenharmony_ci if (string1Acc.IsUtf8()) { 6414514f5e3Sopenharmony_ci auto l = EcmaStringAccessor::GetNonTreeUtf8Data(string1); 6424514f5e3Sopenharmony_ci if (string2Acc.IsUtf8()) { 6434514f5e3Sopenharmony_ci auto r = EcmaStringAccessor::GetNonTreeUtf8Data(string2); 6444514f5e3Sopenharmony_ci return FastCompareFlatString(l, r, length, fastCompareData); 6454514f5e3Sopenharmony_ci } else { 6464514f5e3Sopenharmony_ci auto r = EcmaStringAccessor::GetNonTreeUtf16Data(string2); 6474514f5e3Sopenharmony_ci return FastCompareFlatString(l, r, length, fastCompareData); 6484514f5e3Sopenharmony_ci } 6494514f5e3Sopenharmony_ci } else { 6504514f5e3Sopenharmony_ci auto l = EcmaStringAccessor::GetNonTreeUtf16Data(string1); 6514514f5e3Sopenharmony_ci if (string2Acc.IsUtf8()) { 6524514f5e3Sopenharmony_ci auto r = EcmaStringAccessor::GetNonTreeUtf8Data(string2); 6534514f5e3Sopenharmony_ci return FastCompareFlatString(l, r, length, fastCompareData); 6544514f5e3Sopenharmony_ci } else { 6554514f5e3Sopenharmony_ci auto r = EcmaStringAccessor::GetNonTreeUtf16Data(string2); 6564514f5e3Sopenharmony_ci return FastCompareFlatString(l, r, length, fastCompareData); 6574514f5e3Sopenharmony_ci } 6584514f5e3Sopenharmony_ci } 6594514f5e3Sopenharmony_ci UNREACHABLE(); 6604514f5e3Sopenharmony_ci} 6614514f5e3Sopenharmony_ci 6624514f5e3Sopenharmony_cibool CharIsAsciiOrOutOfBounds(EcmaString* string, int stringLength, int index) 6634514f5e3Sopenharmony_ci{ 6644514f5e3Sopenharmony_ci return index >= stringLength || EcmaStringAccessor::IsASCIICharacter(EcmaStringAccessor(string).Get<false>(index)); 6654514f5e3Sopenharmony_ci} 6664514f5e3Sopenharmony_ci 6674514f5e3Sopenharmony_cibool CharCanFastCompareOrOutOfBounds(EcmaString* string, int stringLength, int index) 6684514f5e3Sopenharmony_ci{ 6694514f5e3Sopenharmony_ci return index >= stringLength || CanFastCompare(EcmaStringAccessor(string).Get<false>(index)); 6704514f5e3Sopenharmony_ci} 6714514f5e3Sopenharmony_ci 6724514f5e3Sopenharmony_ci// Pseudo-code for simplified multi-pass algorithm is: 6734514f5e3Sopenharmony_ci// // Only a certain subset of the ASCII range can be fast-compared. 6744514f5e3Sopenharmony_ci// // In the actual single-pass algorithm below, we tolerate non-ASCII contents. 6754514f5e3Sopenharmony_ci// 1. Check string1 and string2 can fastcompare. 6764514f5e3Sopenharmony_ci// 2. Compare L1 weight for each char, the greater wins. 6774514f5e3Sopenharmony_ci// 3. Is two strings are L1 equal in common length, the longer wins. 6784514f5e3Sopenharmony_ci// 4. Compare L3 weight for each char, the greater wins. 6794514f5e3Sopenharmony_ci// 5. If all equal, return equal. 6804514f5e3Sopenharmony_ci// 6. Once some chars cannot be fastcompared, use icu. 6814514f5e3Sopenharmony_ci 6824514f5e3Sopenharmony_cistd::optional<UCollationResult> TryFastCompareStrings([[maybe_unused]] const icu::Collator* icuCollator, 6834514f5e3Sopenharmony_ci EcmaString* string1, EcmaString* string2, 6844514f5e3Sopenharmony_ci int& processedUntilOut) 6854514f5e3Sopenharmony_ci{ 6864514f5e3Sopenharmony_ci processedUntilOut = 0; 6874514f5e3Sopenharmony_ci 6884514f5e3Sopenharmony_ci const auto length1 = static_cast<int>(EcmaStringAccessor(string1).GetLength()); 6894514f5e3Sopenharmony_ci const auto length2 = static_cast<int>(EcmaStringAccessor(string2).GetLength()); 6904514f5e3Sopenharmony_ci int commonLength = std::min(length1, length2); 6914514f5e3Sopenharmony_ci 6924514f5e3Sopenharmony_ci FastCompareStringsData fastCompareData; 6934514f5e3Sopenharmony_ci if (!FastCompareStringFlatContent(string1, string2, commonLength, fastCompareData)) { 6944514f5e3Sopenharmony_ci return fastCompareData.FastCompareFailed(processedUntilOut); 6954514f5e3Sopenharmony_ci } 6964514f5e3Sopenharmony_ci // The result is only valid if the last processed character is not followed 6974514f5e3Sopenharmony_ci // by a unicode combining character. 6984514f5e3Sopenharmony_ci if (!CharIsAsciiOrOutOfBounds(string1, length1, fastCompareData.processedUntil + 1) || 6994514f5e3Sopenharmony_ci !CharIsAsciiOrOutOfBounds(string2, length2, fastCompareData.processedUntil + 1)) { 7004514f5e3Sopenharmony_ci return fastCompareData.FastCompareFailed(processedUntilOut); 7014514f5e3Sopenharmony_ci } 7024514f5e3Sopenharmony_ci if (fastCompareData.l1Result != UCollationResult::UCOL_EQUAL) { 7034514f5e3Sopenharmony_ci return fastCompareData.l1Result; 7044514f5e3Sopenharmony_ci } 7054514f5e3Sopenharmony_ci // Strings are L1-equal up to their common length, length differences win. 7064514f5e3Sopenharmony_ci UCollationResult lengthResult = ToUCollationResult(length1 - length2); 7074514f5e3Sopenharmony_ci if (lengthResult != UCollationResult::UCOL_EQUAL) { 7084514f5e3Sopenharmony_ci // Strings of different lengths may still compare as equal if the longer 7094514f5e3Sopenharmony_ci // string has a fully ignored suffix, e.g. "a" vs. "a\u{1}". 7104514f5e3Sopenharmony_ci if (!CharCanFastCompareOrOutOfBounds(string1, length1, commonLength) || 7114514f5e3Sopenharmony_ci !CharCanFastCompareOrOutOfBounds(string2, length2, commonLength)) { 7124514f5e3Sopenharmony_ci return fastCompareData.FastCompareFailed(processedUntilOut); 7134514f5e3Sopenharmony_ci } 7144514f5e3Sopenharmony_ci return lengthResult; 7154514f5e3Sopenharmony_ci } 7164514f5e3Sopenharmony_ci // L1-equal and same length, the L3 result wins. 7174514f5e3Sopenharmony_ci return fastCompareData.l3Result; 7184514f5e3Sopenharmony_ci} 7194514f5e3Sopenharmony_ci} // namespace 7204514f5e3Sopenharmony_ci 7214514f5e3Sopenharmony_ci//StringPiece is similar to std::string_view 7224514f5e3Sopenharmony_ciicu::StringPiece ToICUStringPiece(const JSHandle<EcmaString>& string, int offset = 0) 7234514f5e3Sopenharmony_ci{ 7244514f5e3Sopenharmony_ci EcmaStringAccessor stringAcc(string); 7254514f5e3Sopenharmony_ci ASSERT(stringAcc.IsUtf8()); 7264514f5e3Sopenharmony_ci ASSERT(!stringAcc.IsTreeString()); 7274514f5e3Sopenharmony_ci return icu::StringPiece(reinterpret_cast<const char*>(EcmaStringAccessor::GetNonTreeUtf8Data(*string)) + offset, 7284514f5e3Sopenharmony_ci static_cast<int>(stringAcc.GetLength()) - offset); 7294514f5e3Sopenharmony_ci} 7304514f5e3Sopenharmony_ci 7314514f5e3Sopenharmony_ci// Convert to a UTF16 string and partially convert to ICUUnicodeString 7324514f5e3Sopenharmony_ciicu::UnicodeString ToICUUnicodeString(const JSHandle<EcmaString> &string, int offset = 0) 7334514f5e3Sopenharmony_ci{ 7344514f5e3Sopenharmony_ci EcmaStringAccessor stringAcc(string); 7354514f5e3Sopenharmony_ci ASSERT(!stringAcc.IsTreeString()); 7364514f5e3Sopenharmony_ci int strLength = static_cast<int>(stringAcc.GetLength()); 7374514f5e3Sopenharmony_ci int partialLength = strLength - offset; 7384514f5e3Sopenharmony_ci if (stringAcc.IsUtf8()) { 7394514f5e3Sopenharmony_ci constexpr int shortStringLength = 80; // 80: short string length 7404514f5e3Sopenharmony_ci if (partialLength <= shortStringLength) { 7414514f5e3Sopenharmony_ci // short string on stack 7424514f5e3Sopenharmony_ci UChar shortStringBuffer[shortStringLength]; 7434514f5e3Sopenharmony_ci // utf8 is within ascii, std::copy_n from utf8 to utf16 is OK 7444514f5e3Sopenharmony_ci std::copy_n(EcmaStringAccessor::GetNonTreeUtf8Data(*string) + offset, partialLength, shortStringBuffer); 7454514f5e3Sopenharmony_ci return icu::UnicodeString(shortStringBuffer, partialLength); 7464514f5e3Sopenharmony_ci } 7474514f5e3Sopenharmony_ci CVector<uint16_t> ucharBuffer(partialLength); 7484514f5e3Sopenharmony_ci std::copy_n(EcmaStringAccessor::GetNonTreeUtf8Data(*string) + offset, partialLength, ucharBuffer.begin()); 7494514f5e3Sopenharmony_ci return icu::UnicodeString(ucharBuffer.data(), partialLength); 7504514f5e3Sopenharmony_ci } else { 7514514f5e3Sopenharmony_ci return icu::UnicodeString(EcmaStringAccessor::GetNonTreeUtf16Data(*string) + offset, partialLength); 7524514f5e3Sopenharmony_ci } 7534514f5e3Sopenharmony_ci} 7544514f5e3Sopenharmony_ci 7554514f5e3Sopenharmony_ciJSTaggedValue JSCollator::CompareStrings(JSThread *thread, const icu::Collator *icuCollator, 7564514f5e3Sopenharmony_ci const JSHandle<EcmaString> &string1, const JSHandle<EcmaString> &string2, 7574514f5e3Sopenharmony_ci [[maybe_unused]]CompareStringsOption csOption) 7584514f5e3Sopenharmony_ci{ 7594514f5e3Sopenharmony_ci if (*string1 == *string2) { 7604514f5e3Sopenharmony_ci return JSTaggedValue(UCollationResult::UCOL_EQUAL); 7614514f5e3Sopenharmony_ci } 7624514f5e3Sopenharmony_ci 7634514f5e3Sopenharmony_ci // Since Unicode has ignorable characters, 7644514f5e3Sopenharmony_ci // we cannot return early for 0-length strings. 7654514f5e3Sopenharmony_ci auto flatString1 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string1)); 7664514f5e3Sopenharmony_ci auto flatString2 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string2)); 7674514f5e3Sopenharmony_ci 7684514f5e3Sopenharmony_ci int processedUntil = 0; 7694514f5e3Sopenharmony_ci if (csOption == CompareStringsOption::TRY_FAST_PATH) { 7704514f5e3Sopenharmony_ci auto maybeResult = TryFastCompareStrings(icuCollator, *flatString1, *flatString2, processedUntil); 7714514f5e3Sopenharmony_ci if (maybeResult.has_value()) { 7724514f5e3Sopenharmony_ci return JSTaggedValue(maybeResult.value()); 7734514f5e3Sopenharmony_ci } 7744514f5e3Sopenharmony_ci } 7754514f5e3Sopenharmony_ci 7764514f5e3Sopenharmony_ci UCollationResult result; 7774514f5e3Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 7784514f5e3Sopenharmony_ci if (EcmaStringAccessor(flatString1).IsUtf8() && EcmaStringAccessor(flatString2).IsUtf8()) { 7794514f5e3Sopenharmony_ci auto string1Piece = ToICUStringPiece(flatString1, processedUntil); 7804514f5e3Sopenharmony_ci if (!string1Piece.empty()) { 7814514f5e3Sopenharmony_ci auto string2Piece = ToICUStringPiece(flatString2, processedUntil); 7824514f5e3Sopenharmony_ci if (!string2Piece.empty()) { 7834514f5e3Sopenharmony_ci result = icuCollator->compareUTF8(string1Piece, string2Piece, status); 7844514f5e3Sopenharmony_ci return JSTaggedValue(result); 7854514f5e3Sopenharmony_ci } 7864514f5e3Sopenharmony_ci } 7874514f5e3Sopenharmony_ci } 7884514f5e3Sopenharmony_ci 7894514f5e3Sopenharmony_ci auto uString1 = ToICUUnicodeString(flatString1, processedUntil); 7904514f5e3Sopenharmony_ci auto uString2 = ToICUUnicodeString(flatString2, processedUntil); 7914514f5e3Sopenharmony_ci result = icuCollator->compare(uString1, uString2, status); 7924514f5e3Sopenharmony_ci ASSERT(U_SUCCESS(status)); 7934514f5e3Sopenharmony_ci 7944514f5e3Sopenharmony_ci return JSTaggedValue(result); 7954514f5e3Sopenharmony_ci} 7964514f5e3Sopenharmony_ci} // namespace panda::ecmascript 797