1/* 2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16#include "ecmascript/intl/locale_helper.h" 17 18#include "ecmascript/checkpoint/thread_state_transition.h" 19#include "ecmascript/ecma_context.h" 20#include "ecmascript/global_env.h" 21#include "ecmascript/checkpoint/thread_state_transition.h" 22 23#if defined(__clang__) 24#pragma clang diagnostic push 25#pragma clang diagnostic ignored "-Wshadow" 26#elif defined(__GNUC__) 27#pragma GCC diagnostic push 28#pragma GCC diagnostic ignored "-Wshadow" 29#endif 30#include "unicode/localebuilder.h" 31#if defined(__clang__) 32#pragma clang diagnostic pop 33#elif defined(__GNUC__) 34#pragma GCC diagnostic pop 35#endif 36 37namespace panda::ecmascript::intl { 38JSHandle<EcmaString> LocaleHelper::UStringToString(JSThread *thread, const icu::UnicodeString &string) 39{ 40 ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); 41 return factory->NewFromUtf16(reinterpret_cast<const uint16_t *>(string.getBuffer()), string.length()); 42} 43 44JSHandle<EcmaString> LocaleHelper::UStringToString(JSThread *thread, const icu::UnicodeString &string, int32_t begin, 45 int32_t end) 46{ 47 return UStringToString(thread, string.tempSubStringBetween(begin, end)); 48} 49 50// 9.2.1 CanonicalizeLocaleList ( locales ) 51JSHandle<TaggedArray> LocaleHelper::CanonicalizeLocaleList(JSThread *thread, const JSHandle<JSTaggedValue> &locales) 52{ 53 ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); 54 // 1. If locales is undefined, then 55 // a. Return a new empty List. 56 if (locales->IsUndefined()) { 57 return factory->EmptyArray(); 58 } 59 // 2. Let seen be a new empty List. 60 JSHandle<TaggedArray> localeSeen = factory->NewTaggedArray(1); 61 // 3. If Type(locales) is String or Type(locales) is Object and locales has an [[InitializedLocale]] internal slot, 62 // then 63 // a. Let O be CreateArrayFromList(« locales »). 64 // 4. Else, 65 // a.Let O be ? ToObject(locales). 66 if (locales->IsString()) { 67 JSHandle<EcmaString> tag = JSHandle<EcmaString>::Cast(locales); 68 JSHandle<TaggedArray> temp = factory->NewTaggedArray(1); 69 temp->Set(thread, 0, tag.GetTaggedValue()); 70 JSHandle<JSArray> obj = JSArray::CreateArrayFromList(thread, temp); 71 JSHandle<TaggedArray> finalSeen = CanonicalizeHelper<JSArray>(thread, obj, localeSeen); 72 return finalSeen; 73#ifdef ARK_SUPPORT_INTL 74 } else if (locales->IsJSLocale()) { 75 JSHandle<EcmaString> tag = JSLocale::ToString(thread, JSHandle<JSLocale>::Cast(locales)); 76 JSHandle<TaggedArray> temp = factory->NewTaggedArray(1); 77 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread); 78 temp->Set(thread, 0, tag.GetTaggedValue()); 79 JSHandle<JSArray> obj = JSArray::CreateArrayFromList(thread, temp); 80 JSHandle<TaggedArray> finalSeen = CanonicalizeHelper<JSArray>(thread, obj, localeSeen); 81 return finalSeen; 82#endif 83 } else { 84 JSHandle<JSObject> obj = JSTaggedValue::ToObject(thread, locales); 85 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread); 86 JSHandle<TaggedArray> finalSeen = CanonicalizeHelper<JSObject>(thread, obj, localeSeen); 87 return finalSeen; 88 } 89 return localeSeen; 90} 91 92template<typename T> 93JSHandle<TaggedArray> LocaleHelper::CanonicalizeHelper(JSThread *thread, JSHandle<T> &obj, JSHandle<TaggedArray> &seen) 94{ 95 OperationResult operationResult = JSTaggedValue::GetProperty(thread, JSHandle<JSTaggedValue>::Cast(obj), 96 thread->GlobalConstants()->GetHandledLengthString()); 97 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread); 98 JSTaggedNumber len = JSTaggedValue::ToLength(thread, operationResult.GetValue()); 99 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread); 100 ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); 101 // 2. Let seen be a new empty List. 102 uint32_t requestedLocalesLen = len.ToUint32(); 103 seen = factory->NewTaggedArray(requestedLocalesLen); 104 // 6. Let k be 0. 105 // 7. Repeat, while k < len 106 JSMutableHandle<JSTaggedValue> pk(thread, JSTaggedValue::Undefined()); 107 JSMutableHandle<JSTaggedValue> tag(thread, JSTaggedValue::Undefined()); 108 uint32_t index = 0; 109 JSHandle<JSTaggedValue> objTagged = JSHandle<JSTaggedValue>::Cast(obj); 110 for (uint32_t k = 0; k < requestedLocalesLen; k++) { 111 // a. Let Pk be ToString(k). 112 JSHandle<JSTaggedValue> kHandle(thread, JSTaggedValue(k)); 113 JSHandle<EcmaString> str = JSTaggedValue::ToString(thread, kHandle); 114 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread); 115 pk.Update(str.GetTaggedValue()); 116 // b. Let kPresent be ? HasProperty(O, Pk). 117 bool kPresent = JSTaggedValue::HasProperty(thread, objTagged, pk); 118 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread); 119 120 // c. If kPresent is true, then 121 if (kPresent) { 122 // i. Let kValue be ? Get(O, Pk). 123 OperationResult result = JSTaggedValue::GetProperty(thread, objTagged, pk); 124 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread); 125 JSHandle<JSTaggedValue> kValue = result.GetValue(); 126 // ii. If Type(kValue) is not String or Object, throw a TypeError exception. 127 if (!kValue->IsString() && !kValue->IsJSObject()) { 128 THROW_TYPE_ERROR_AND_RETURN(thread, "kValue is not String or Object.", factory->EmptyArray()); 129 } 130 // iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]] internal slot, then 131 // 1. Let tag be kValue.[[Locale]]. 132 // iv. Else, 133 // 1. Let tag be ? ToString(kValue). 134#ifdef ARK_SUPPORT_INTL 135 if (kValue->IsJSLocale()) { 136 JSHandle<EcmaString> kValueStr = JSLocale::ToString(thread, JSHandle<JSLocale>::Cast(kValue)); 137 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread); 138 tag.Update(kValueStr.GetTaggedValue()); 139 } else { 140 JSHandle<EcmaString> kValueString = JSTaggedValue::ToString(thread, kValue); 141 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread); 142 JSHandle<EcmaString> canonicalStr = CanonicalizeUnicodeLocaleId(thread, kValueString); 143 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread); 144 tag.Update(canonicalStr.GetTaggedValue()); 145 } 146#else 147 JSHandle<EcmaString> kValueString = JSTaggedValue::ToString(thread, kValue); 148 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread); 149 JSHandle<EcmaString> canonicalStr = CanonicalizeUnicodeLocaleId(thread, kValueString); 150 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread); 151 tag.Update(canonicalStr.GetTaggedValue()); 152#endif 153 // vii. If canonicalizedTag is not an element of seen, append canonicalizedTag as the last element of seen. 154 bool isExist = false; 155 uint32_t seenLen = seen->GetLength(); 156 for (uint32_t i = 0; i < seenLen; i++) { 157 if (JSTaggedValue::SameValue(seen->Get(thread, i), tag.GetTaggedValue())) { 158 isExist = true; 159 } 160 } 161 if (!isExist) { 162 seen->Set(thread, index++, JSHandle<JSTaggedValue>::Cast(tag)); 163 } 164 } 165 // d. Increase k by 1. 166 } 167 // set capacity 168 seen = TaggedArray::SetCapacity(thread, seen, index); 169 // 8. Return seen. 170 return seen; 171} 172 173// 6.2.3 CanonicalizeUnicodeLocaleId( locale ) 174JSHandle<EcmaString> LocaleHelper::CanonicalizeUnicodeLocaleId(JSThread *thread, const JSHandle<EcmaString> &locale) 175{ 176 [[maybe_unused]] ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); 177 if (!IsStructurallyValidLanguageTag(locale)) { 178 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString()); 179 } 180 181 if (EcmaStringAccessor(locale).GetLength() == 0 || EcmaStringAccessor(locale).IsUtf16()) { 182 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString()); 183 } 184 185 std::string localeCStr = ConvertToStdString(locale); 186 std::transform(localeCStr.begin(), localeCStr.end(), localeCStr.begin(), AsciiAlphaToLower); 187 UErrorCode status = U_ZERO_ERROR; 188 icu::Locale formalLocale; 189 { 190 // Third party libs call can be in Native state 191 ThreadNativeScope nativeScope(thread); 192 formalLocale = icu::Locale::forLanguageTag(localeCStr.c_str(), status); 193 } 194 if ((U_FAILURE(status) != 0) || (formalLocale.isBogus() != 0)) { 195 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString()); 196 } 197 198 // Resets the LocaleBuilder to match the locale. 199 // Returns an instance of Locale created from the fields set on this builder. 200 formalLocale = icu::LocaleBuilder().setLocale(formalLocale).build(status); 201 // Canonicalize the locale ID of this object according to CLDR. 202 formalLocale.canonicalize(status); 203 if ((U_FAILURE(status) != 0) || (formalLocale.isBogus() != 0)) { 204 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString()); 205 } 206 JSHandle<EcmaString> languageTag = ToLanguageTag(thread, formalLocale); 207 RETURN_HANDLE_IF_ABRUPT_COMPLETION(EcmaString, thread); 208 return languageTag; 209} 210 211std::string LocaleHelper::ToStdStringLanguageTag(JSThread *thread, const icu::Locale &locale) 212{ 213 UErrorCode status = U_ZERO_ERROR; 214 auto result = locale.toLanguageTag<std::string>(status); 215 if (U_FAILURE(status) != 0) { 216 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", ""); 217 } 218 size_t findBeginning = result.find("-u-"); 219 std::string finalRes; 220 std::string tempRes; 221 if (findBeginning == std::string::npos) { 222 return result; 223 } 224 size_t specialBeginning = findBeginning + INTL_INDEX_THREE; 225 size_t specialCount = 0; 226 while ((specialBeginning < result.size()) && (result[specialBeginning] != '-')) { 227 specialCount++; 228 specialBeginning++; 229 } 230 thread->CheckSafepointIfSuspended(); 231 if (findBeginning != std::string::npos) { 232 // It begin with "-u-xx" or with more elements. 233 tempRes = result.substr(0, findBeginning + INTL_INDEX_THREE + specialCount); 234 if (result.size() <= findBeginning + INTL_INDEX_THREE + specialCount) { 235 return result; 236 } 237 std::string leftStr = result.substr(findBeginning + INTL_INDEX_THREE + specialCount + 1); 238 std::istringstream temp(leftStr); 239 std::string buffer; 240 std::vector<std::string> resContainer; 241 while (getline(temp, buffer, '-')) { 242 if (buffer != "true" && buffer != "yes") { 243 resContainer.push_back(buffer); 244 } 245 } 246 for (auto it = resContainer.begin(); it != resContainer.end(); it++) { 247 std::string tag = "-"; 248 tag += *it; 249 finalRes += tag; 250 } 251 } 252 if (!finalRes.empty()) { 253 tempRes += finalRes; 254 } 255 result = tempRes; 256 return result; 257} 258 259JSHandle<EcmaString> LocaleHelper::ToLanguageTag(JSThread *thread, const icu::Locale &locale) 260{ 261 ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); 262 return factory->NewFromStdString(ToStdStringLanguageTag(thread, locale)); 263} 264 265// 6.2.2 IsStructurallyValidLanguageTag( locale ) 266bool LocaleHelper::IsStructurallyValidLanguageTag(const JSHandle<EcmaString> &tag) 267{ 268 std::string tagCollection = ConvertToStdString(tag); 269 std::vector<std::string> containers; 270 std::string substring; 271 size_t address = 1; 272 for (auto it = tagCollection.begin(); it != tagCollection.end(); it++) { 273 if (*it != '-' && it != tagCollection.end() - 1) { 274 substring += *it; 275 } else { 276 if (it == tagCollection.end() - 1) { 277 substring += *it; 278 } 279 containers.push_back(substring); 280 if (!IsVariantSubtag(substring, containers)) { 281 return false; 282 } 283 substring.clear(); 284 } 285 } 286 bool result = DealwithLanguageTag(containers, address); 287 return result; 288} 289 290bool LocaleHelper::IsVariantSubtag(std::string substring, std::vector<std::string> containers) 291{ 292 if (IsVariantSubtag(substring)) { 293 std::transform(substring.begin(), substring.end(), substring.begin(), AsciiAlphaToLower); 294 // Ignore the first tag when checking for duplicate subtags. 295 if (std::count(containers.begin(), containers.end(), substring) > INTL_INDEX_TWO) { 296 return false; 297 } 298 } 299 return true; 300} 301 302std::string LocaleHelper::ConvertToStdString(const JSHandle<EcmaString> &ecmaStr) 303{ 304 return std::string(ConvertToString(*ecmaStr, StringConvertedUsage::LOGICOPERATION)); 305} 306 307bool LocaleHelper::DealwithLanguageTag(const std::vector<std::string> &containers, size_t &address) 308{ 309 // The abstract operation returns true if locale can be generated from the ABNF grammar in section 2.1 of the RFC, 310 // starting with Language-Tag, and does not contain duplicate variant or singleton subtags 311 // If language tag is empty, return false. 312 if (containers.empty()) { 313 return false; 314 } 315 316 // a. if the first tag is not language, return false. 317 if (!IsLanguageSubtag(containers[0])) { 318 return false; 319 } 320 321 // if the tag include language only, like "zh" or "de", return true; 322 if (containers.size() == 1) { 323 return true; 324 } 325 326 // Else, then 327 // if is unique singleton subtag, script and region tag. 328 if (IsExtensionSingleton(containers[1])) { 329 return true; 330 } 331 332 if (IsScriptSubtag(containers[address])) { 333 address++; 334 if (containers.size() == address) { 335 return true; 336 } 337 } 338 339 if (IsRegionSubtag(containers[address])) { 340 address++; 341 } 342 343 for (size_t i = address; i < containers.size(); i++) { 344 if (IsExtensionSingleton(containers[i])) { 345 return true; 346 } 347 if (!IsVariantSubtag(containers[i])) { 348 return false; 349 } 350 } 351 return true; 352} 353 354// 6.2.4 DefaultLocale () 355JSHandle<EcmaString> LocaleHelper::DefaultLocale(JSThread *thread) 356{ 357 ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); 358 return factory->NewFromStdString(StdStringDefaultLocale(thread)); 359} 360 361const std::string& LocaleHelper::StdStringDefaultLocale(JSThread *thread) 362{ 363 auto context = thread->GetCurrentEcmaContext(); 364 const std::string& cachedLocale = context->GetDefaultLocale(); 365 if (!cachedLocale.empty()) { 366 return cachedLocale; 367 } 368 icu::Locale defaultLocale; 369 if (strcmp(defaultLocale.getName(), "en_US_POSIX") == 0 || strcmp(defaultLocale.getName(), "c") == 0) { 370 context->SetDefaultLocale("en-US"); 371 } else if (defaultLocale.isBogus() != 0) { 372 context->SetDefaultLocale("und"); 373 } else { 374 context->SetDefaultLocale(ToStdStringLanguageTag(thread, defaultLocale)); 375 } 376 return context->GetDefaultLocale(); 377} 378 379void LocaleHelper::HandleLocaleExtension(size_t &start, size_t &extensionEnd, const std::string result, size_t len) 380{ 381 while (start < len - INTL_INDEX_TWO) { 382 if (result[start] != '-') { 383 start++; 384 continue; 385 } 386 if (result[start + INTL_INDEX_TWO] == '-') { 387 extensionEnd = start; 388 break; 389 } 390 start += INTL_INDEX_THREE; 391 } 392} 393 394LocaleHelper::ParsedLocale LocaleHelper::HandleLocale(const JSHandle<EcmaString> &localeString) 395{ 396 return LocaleHelper::HandleLocale(ConvertToStdString(localeString)); 397} 398 399LocaleHelper::ParsedLocale LocaleHelper::HandleLocale(const std::string &localeString) 400{ 401 size_t len = localeString.size(); 402 ParsedLocale parsedResult; 403 404 // a. The single-character subtag ’x’ as the primary subtag indicates 405 // that the language tag consists solely of subtags whose meaning is 406 // defined by private agreement. 407 // b. Extensions cannot be used in tags that are entirely private use. 408 if (IsPrivateSubTag(localeString, len)) { 409 parsedResult.base = localeString; 410 return parsedResult; 411 } 412 // If cannot find "-u-", return the whole string as base. 413 size_t foundExtension = localeString.find("-u-"); 414 if (foundExtension == std::string::npos) { 415 parsedResult.base = localeString; 416 return parsedResult; 417 } 418 // Let privateIndex be Call(%StringProto_indexOf%, foundLocale, « "-x-" »). 419 size_t privateIndex = localeString.find("-x-"); 420 if (privateIndex != std::string::npos && privateIndex < foundExtension) { 421 parsedResult.base = localeString; 422 return parsedResult; 423 } 424 const std::string basis = localeString.substr(0, foundExtension); 425 size_t extensionEnd = len; 426 ASSERT(len > INTL_INDEX_TWO); 427 size_t start = foundExtension + 1; 428 HandleLocaleExtension(start, extensionEnd, localeString, len); 429 const std::string end = localeString.substr(extensionEnd); 430 parsedResult.base = basis + end; 431 parsedResult.extension = localeString.substr(foundExtension, extensionEnd - foundExtension); 432 return parsedResult; 433} 434 435std::vector<std::string> LocaleHelper::GetAvailableLocales(JSThread *thread, const char *localeKey, 436 const char *localePath) 437{ 438 UErrorCode status = U_ZERO_ERROR; 439 auto globalConst = thread->GlobalConstants(); 440 JSHandle<EcmaString> specialValue = JSHandle<EcmaString>::Cast(globalConst->GetHandledEnUsPosixString()); 441 std::string specialString = ConvertToStdString(specialValue); 442 UEnumeration *uenum = nullptr; 443 { 444 ThreadNativeScope nativeScope(thread); 445 uenum = uloc_openAvailableByType(ULOC_AVAILABLE_WITH_LEGACY_ALIASES, &status); 446 } 447 std::vector<std::string> allLocales; 448 const char *loc = nullptr; 449 // Third party libs computing can be in Native state 450 ThreadNativeScope nativeScope(thread); 451 for (loc = uenum_next(uenum, nullptr, &status); loc != nullptr; loc = uenum_next(uenum, nullptr, &status)) { 452 ASSERT(U_SUCCESS(status)); 453 std::string locStr(loc); 454 std::replace(locStr.begin(), locStr.end(), '_', '-'); 455 if (locStr == specialString) { 456 locStr = "en-US-u-va-posix"; 457 } 458 459 if (localePath != nullptr || localeKey != nullptr) { 460 icu::Locale locale(locStr.c_str()); 461 bool res = false; 462 if (!CheckLocales(locale, localeKey, localePath, res)) { 463 continue; 464 } 465 } 466 allLocales.push_back(locStr); 467 icu::Locale formalLocale = icu::Locale::createCanonical(locStr.c_str()); 468 std::string scriptStr = formalLocale.getScript(); 469 if (!scriptStr.empty()) { 470 std::string languageStr = formalLocale.getLanguage(); 471 std::string countryStr = formalLocale.getCountry(); 472 std::string shortLocale = icu::Locale(languageStr.c_str(), countryStr.c_str()).getName(); 473 std::replace(shortLocale.begin(), shortLocale.end(), '_', '-'); 474 allLocales.push_back(shortLocale); 475 } 476 } 477 uenum_close(uenum); 478 return allLocales; 479} 480 481// 9.2.2 BestAvailableLocale ( availableLocales, locale ) 482std::string LocaleHelper::BestAvailableLocale(const std::vector<std::string> &availableLocales, 483 const std::string &locale) 484{ 485 // 1. Let candidate be locale. 486 std::string localeCandidate = locale; 487 std::string undefined = std::string(); 488 // 2. Repeat, 489 uint32_t length = availableLocales.size(); 490 while (true) { 491 // a. If availableLocales contains an element equal to candidate, return candidate. 492 for (uint32_t i = 0; i < length; ++i) { 493 std::string itemStr = availableLocales[i]; 494 if (itemStr == localeCandidate) { 495 return localeCandidate; 496 } 497 } 498 // b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate. 499 // If that character does not occur, return undefined. 500 size_t pos = localeCandidate.rfind('-'); 501 if (pos == std::string::npos) { 502 return undefined; 503 } 504 // c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate, decrease pos by 2. 505 if (pos >= INTL_INDEX_TWO && localeCandidate[pos - INTL_INDEX_TWO] == '-') { 506 pos -= INTL_INDEX_TWO; 507 } 508 // d. Let candidate be the substring of candidate from position 0, inclusive, to position pos, exclusive. 509 localeCandidate.resize(pos); 510 } 511} 512} // namespace panda::ecmascript::base