1/*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include "ecmascript/intl/locale_helper.h"
17
18#include "ecmascript/checkpoint/thread_state_transition.h"
19#include "ecmascript/ecma_context.h"
20#include "ecmascript/global_env.h"
21#include "ecmascript/checkpoint/thread_state_transition.h"
22
23#if defined(__clang__)
24#pragma clang diagnostic push
25#pragma clang diagnostic ignored "-Wshadow"
26#elif defined(__GNUC__)
27#pragma GCC diagnostic push
28#pragma GCC diagnostic ignored "-Wshadow"
29#endif
30#include "unicode/localebuilder.h"
31#if defined(__clang__)
32#pragma clang diagnostic pop
33#elif defined(__GNUC__)
34#pragma GCC diagnostic pop
35#endif
36
37namespace panda::ecmascript::intl {
38JSHandle<EcmaString> LocaleHelper::UStringToString(JSThread *thread, const icu::UnicodeString &string)
39{
40    ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
41    return factory->NewFromUtf16(reinterpret_cast<const uint16_t *>(string.getBuffer()), string.length());
42}
43
44JSHandle<EcmaString> LocaleHelper::UStringToString(JSThread *thread, const icu::UnicodeString &string, int32_t begin,
45                                                   int32_t end)
46{
47    return UStringToString(thread, string.tempSubStringBetween(begin, end));
48}
49
50// 9.2.1 CanonicalizeLocaleList ( locales )
51JSHandle<TaggedArray> LocaleHelper::CanonicalizeLocaleList(JSThread *thread, const JSHandle<JSTaggedValue> &locales)
52{
53    ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
54    // 1. If locales is undefined, then
55    //    a. Return a new empty List.
56    if (locales->IsUndefined()) {
57        return factory->EmptyArray();
58    }
59    // 2. Let seen be a new empty List.
60    JSHandle<TaggedArray> localeSeen = factory->NewTaggedArray(1);
61    // 3. If Type(locales) is String or Type(locales) is Object and locales has an [[InitializedLocale]] internal slot,
62    //    then
63    //    a. Let O be CreateArrayFromList(« locales »).
64    // 4. Else,
65    //    a.Let O be ? ToObject(locales).
66    if (locales->IsString()) {
67        JSHandle<EcmaString> tag = JSHandle<EcmaString>::Cast(locales);
68        JSHandle<TaggedArray> temp = factory->NewTaggedArray(1);
69        temp->Set(thread, 0, tag.GetTaggedValue());
70        JSHandle<JSArray> obj = JSArray::CreateArrayFromList(thread, temp);
71        JSHandle<TaggedArray> finalSeen = CanonicalizeHelper<JSArray>(thread, obj, localeSeen);
72        return finalSeen;
73#ifdef ARK_SUPPORT_INTL
74    } else if (locales->IsJSLocale()) {
75        JSHandle<EcmaString> tag = JSLocale::ToString(thread, JSHandle<JSLocale>::Cast(locales));
76        JSHandle<TaggedArray> temp = factory->NewTaggedArray(1);
77        RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
78        temp->Set(thread, 0, tag.GetTaggedValue());
79        JSHandle<JSArray> obj = JSArray::CreateArrayFromList(thread, temp);
80        JSHandle<TaggedArray> finalSeen = CanonicalizeHelper<JSArray>(thread, obj, localeSeen);
81        return finalSeen;
82#endif
83    } else {
84        JSHandle<JSObject> obj = JSTaggedValue::ToObject(thread, locales);
85        RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
86        JSHandle<TaggedArray> finalSeen = CanonicalizeHelper<JSObject>(thread, obj, localeSeen);
87        return finalSeen;
88    }
89    return localeSeen;
90}
91
92template<typename T>
93JSHandle<TaggedArray> LocaleHelper::CanonicalizeHelper(JSThread *thread, JSHandle<T> &obj, JSHandle<TaggedArray> &seen)
94{
95    OperationResult operationResult = JSTaggedValue::GetProperty(thread, JSHandle<JSTaggedValue>::Cast(obj),
96                                                                 thread->GlobalConstants()->GetHandledLengthString());
97    RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
98    JSTaggedNumber len = JSTaggedValue::ToLength(thread, operationResult.GetValue());
99    RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
100    ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
101    // 2. Let seen be a new empty List.
102    uint32_t requestedLocalesLen = len.ToUint32();
103    seen = factory->NewTaggedArray(requestedLocalesLen);
104    // 6. Let k be 0.
105    // 7. Repeat, while k < len
106    JSMutableHandle<JSTaggedValue> pk(thread, JSTaggedValue::Undefined());
107    JSMutableHandle<JSTaggedValue> tag(thread, JSTaggedValue::Undefined());
108    uint32_t index = 0;
109    JSHandle<JSTaggedValue> objTagged = JSHandle<JSTaggedValue>::Cast(obj);
110    for (uint32_t k = 0; k < requestedLocalesLen; k++) {
111        // a. Let Pk be ToString(k).
112        JSHandle<JSTaggedValue> kHandle(thread, JSTaggedValue(k));
113        JSHandle<EcmaString> str = JSTaggedValue::ToString(thread, kHandle);
114        RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
115        pk.Update(str.GetTaggedValue());
116        // b. Let kPresent be ? HasProperty(O, Pk).
117        bool kPresent = JSTaggedValue::HasProperty(thread, objTagged, pk);
118        RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
119
120        // c. If kPresent is true, then
121        if (kPresent) {
122            // i. Let kValue be ? Get(O, Pk).
123            OperationResult result = JSTaggedValue::GetProperty(thread, objTagged, pk);
124            RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
125            JSHandle<JSTaggedValue> kValue = result.GetValue();
126            // ii. If Type(kValue) is not String or Object, throw a TypeError exception.
127            if (!kValue->IsString() && !kValue->IsJSObject()) {
128                THROW_TYPE_ERROR_AND_RETURN(thread, "kValue is not String or Object.", factory->EmptyArray());
129            }
130            // iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]] internal slot, then
131            //        1. Let tag be kValue.[[Locale]].
132            // iv.  Else,
133            //        1. Let tag be ? ToString(kValue).
134#ifdef ARK_SUPPORT_INTL
135            if (kValue->IsJSLocale()) {
136                JSHandle<EcmaString> kValueStr = JSLocale::ToString(thread, JSHandle<JSLocale>::Cast(kValue));
137                RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
138                tag.Update(kValueStr.GetTaggedValue());
139            } else {
140                JSHandle<EcmaString> kValueString = JSTaggedValue::ToString(thread, kValue);
141                RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
142                JSHandle<EcmaString> canonicalStr = CanonicalizeUnicodeLocaleId(thread, kValueString);
143                RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
144                tag.Update(canonicalStr.GetTaggedValue());
145            }
146#else
147            JSHandle<EcmaString> kValueString = JSTaggedValue::ToString(thread, kValue);
148            RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
149            JSHandle<EcmaString> canonicalStr = CanonicalizeUnicodeLocaleId(thread, kValueString);
150            RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
151            tag.Update(canonicalStr.GetTaggedValue());
152#endif
153            // vii. If canonicalizedTag is not an element of seen, append canonicalizedTag as the last element of seen.
154            bool isExist = false;
155            uint32_t seenLen = seen->GetLength();
156            for (uint32_t i = 0; i < seenLen; i++) {
157                if (JSTaggedValue::SameValue(seen->Get(thread, i), tag.GetTaggedValue())) {
158                    isExist = true;
159                }
160            }
161            if (!isExist) {
162                seen->Set(thread, index++, JSHandle<JSTaggedValue>::Cast(tag));
163            }
164        }
165        // d. Increase k by 1.
166    }
167    // set capacity
168    seen = TaggedArray::SetCapacity(thread, seen, index);
169    // 8. Return seen.
170    return seen;
171}
172
173// 6.2.3 CanonicalizeUnicodeLocaleId( locale )
174JSHandle<EcmaString> LocaleHelper::CanonicalizeUnicodeLocaleId(JSThread *thread, const JSHandle<EcmaString> &locale)
175{
176    [[maybe_unused]] ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
177    if (!IsStructurallyValidLanguageTag(locale)) {
178        THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString());
179    }
180
181    if (EcmaStringAccessor(locale).GetLength() == 0 || EcmaStringAccessor(locale).IsUtf16()) {
182        THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString());
183    }
184
185    std::string localeCStr = ConvertToStdString(locale);
186    std::transform(localeCStr.begin(), localeCStr.end(), localeCStr.begin(), AsciiAlphaToLower);
187    UErrorCode status = U_ZERO_ERROR;
188    icu::Locale formalLocale;
189    {
190        // Third party libs call can be in Native state
191        ThreadNativeScope nativeScope(thread);
192        formalLocale = icu::Locale::forLanguageTag(localeCStr.c_str(), status);
193    }
194    if ((U_FAILURE(status) != 0) || (formalLocale.isBogus() != 0)) {
195        THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString());
196    }
197
198    // Resets the LocaleBuilder to match the locale.
199    // Returns an instance of Locale created from the fields set on this builder.
200    formalLocale = icu::LocaleBuilder().setLocale(formalLocale).build(status);
201    // Canonicalize the locale ID of this object according to CLDR.
202    formalLocale.canonicalize(status);
203    if ((U_FAILURE(status) != 0) || (formalLocale.isBogus() != 0)) {
204        THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString());
205    }
206    JSHandle<EcmaString> languageTag = ToLanguageTag(thread, formalLocale);
207    RETURN_HANDLE_IF_ABRUPT_COMPLETION(EcmaString, thread);
208    return languageTag;
209}
210
211std::string LocaleHelper::ToStdStringLanguageTag(JSThread *thread, const icu::Locale &locale)
212{
213    UErrorCode status = U_ZERO_ERROR;
214    auto result = locale.toLanguageTag<std::string>(status);
215    if (U_FAILURE(status) != 0) {
216        THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", "");
217    }
218    size_t findBeginning = result.find("-u-");
219    std::string finalRes;
220    std::string tempRes;
221    if (findBeginning == std::string::npos) {
222        return result;
223    }
224    size_t specialBeginning = findBeginning + INTL_INDEX_THREE;
225    size_t specialCount = 0;
226    while ((specialBeginning < result.size()) && (result[specialBeginning] != '-')) {
227        specialCount++;
228        specialBeginning++;
229    }
230    thread->CheckSafepointIfSuspended();
231    if (findBeginning != std::string::npos) {
232        // It begin with "-u-xx" or with more elements.
233        tempRes = result.substr(0, findBeginning + INTL_INDEX_THREE + specialCount);
234        if (result.size() <= findBeginning + INTL_INDEX_THREE + specialCount) {
235            return result;
236        }
237        std::string leftStr = result.substr(findBeginning + INTL_INDEX_THREE + specialCount + 1);
238        std::istringstream temp(leftStr);
239        std::string buffer;
240        std::vector<std::string> resContainer;
241        while (getline(temp, buffer, '-')) {
242            if (buffer != "true" && buffer != "yes") {
243                resContainer.push_back(buffer);
244            }
245        }
246        for (auto it = resContainer.begin(); it != resContainer.end(); it++) {
247            std::string tag = "-";
248            tag += *it;
249            finalRes += tag;
250        }
251    }
252    if (!finalRes.empty()) {
253        tempRes += finalRes;
254    }
255    result = tempRes;
256    return result;
257}
258
259JSHandle<EcmaString> LocaleHelper::ToLanguageTag(JSThread *thread, const icu::Locale &locale)
260{
261    ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
262    return factory->NewFromStdString(ToStdStringLanguageTag(thread, locale));
263}
264
265// 6.2.2 IsStructurallyValidLanguageTag( locale )
266bool LocaleHelper::IsStructurallyValidLanguageTag(const JSHandle<EcmaString> &tag)
267{
268    std::string tagCollection = ConvertToStdString(tag);
269    std::vector<std::string> containers;
270    std::string substring;
271    size_t address = 1;
272    for (auto it = tagCollection.begin(); it != tagCollection.end(); it++) {
273        if (*it != '-' && it != tagCollection.end() - 1) {
274            substring += *it;
275        } else {
276            if (it == tagCollection.end() - 1) {
277                substring += *it;
278            }
279            containers.push_back(substring);
280            if (!IsVariantSubtag(substring, containers)) {
281                return false;
282            }
283            substring.clear();
284        }
285    }
286    bool result = DealwithLanguageTag(containers, address);
287    return result;
288}
289
290bool LocaleHelper::IsVariantSubtag(std::string substring, std::vector<std::string> containers)
291{
292    if (IsVariantSubtag(substring)) {
293        std::transform(substring.begin(), substring.end(), substring.begin(), AsciiAlphaToLower);
294        // Ignore the first tag when checking for duplicate subtags.
295        if (std::count(containers.begin(), containers.end(), substring) > INTL_INDEX_TWO) {
296            return false;
297        }
298    }
299    return true;
300}
301
302std::string LocaleHelper::ConvertToStdString(const JSHandle<EcmaString> &ecmaStr)
303{
304    return std::string(ConvertToString(*ecmaStr, StringConvertedUsage::LOGICOPERATION));
305}
306
307bool LocaleHelper::DealwithLanguageTag(const std::vector<std::string> &containers, size_t &address)
308{
309    // The abstract operation returns true if locale can be generated from the ABNF grammar in section 2.1 of the RFC,
310    // starting with Language-Tag, and does not contain duplicate variant or singleton subtags
311    // If language tag is empty, return false.
312    if (containers.empty()) {
313        return false;
314    }
315
316    // a. if the first tag is not language, return false.
317    if (!IsLanguageSubtag(containers[0])) {
318        return false;
319    }
320
321    // if the tag include language only, like "zh" or "de", return true;
322    if (containers.size() == 1) {
323        return true;
324    }
325
326    // Else, then
327    // if is unique singleton subtag, script and region tag.
328    if (IsExtensionSingleton(containers[1])) {
329        return true;
330    }
331
332    if (IsScriptSubtag(containers[address])) {
333        address++;
334        if (containers.size() == address) {
335            return true;
336        }
337    }
338
339    if (IsRegionSubtag(containers[address])) {
340        address++;
341    }
342
343    for (size_t i = address; i < containers.size(); i++) {
344        if (IsExtensionSingleton(containers[i])) {
345            return true;
346        }
347        if (!IsVariantSubtag(containers[i])) {
348            return false;
349        }
350    }
351    return true;
352}
353
354// 6.2.4 DefaultLocale ()
355JSHandle<EcmaString> LocaleHelper::DefaultLocale(JSThread *thread)
356{
357    ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
358    return factory->NewFromStdString(StdStringDefaultLocale(thread));
359}
360
361const std::string& LocaleHelper::StdStringDefaultLocale(JSThread *thread)
362{
363    auto context = thread->GetCurrentEcmaContext();
364    const std::string& cachedLocale = context->GetDefaultLocale();
365    if (!cachedLocale.empty()) {
366        return cachedLocale;
367    }
368    icu::Locale defaultLocale;
369    if (strcmp(defaultLocale.getName(), "en_US_POSIX") == 0 || strcmp(defaultLocale.getName(), "c") == 0) {
370        context->SetDefaultLocale("en-US");
371    } else if (defaultLocale.isBogus() != 0) {
372        context->SetDefaultLocale("und");
373    } else {
374        context->SetDefaultLocale(ToStdStringLanguageTag(thread, defaultLocale));
375    }
376    return context->GetDefaultLocale();
377}
378
379void LocaleHelper::HandleLocaleExtension(size_t &start, size_t &extensionEnd, const std::string result, size_t len)
380{
381    while (start < len - INTL_INDEX_TWO) {
382        if (result[start] != '-') {
383            start++;
384            continue;
385        }
386        if (result[start + INTL_INDEX_TWO] == '-') {
387            extensionEnd = start;
388            break;
389        }
390        start += INTL_INDEX_THREE;
391    }
392}
393
394LocaleHelper::ParsedLocale LocaleHelper::HandleLocale(const JSHandle<EcmaString> &localeString)
395{
396    return LocaleHelper::HandleLocale(ConvertToStdString(localeString));
397}
398
399LocaleHelper::ParsedLocale LocaleHelper::HandleLocale(const std::string &localeString)
400{
401    size_t len = localeString.size();
402    ParsedLocale parsedResult;
403
404    // a. The single-character subtag ’x’ as the primary subtag indicates
405    //    that the language tag consists solely of subtags whose meaning is
406    //    defined by private agreement.
407    // b. Extensions cannot be used in tags that are entirely private use.
408    if (IsPrivateSubTag(localeString, len)) {
409        parsedResult.base = localeString;
410        return parsedResult;
411    }
412    // If cannot find "-u-", return the whole string as base.
413    size_t foundExtension = localeString.find("-u-");
414    if (foundExtension == std::string::npos) {
415        parsedResult.base = localeString;
416        return parsedResult;
417    }
418    // Let privateIndex be Call(%StringProto_indexOf%, foundLocale, « "-x-" »).
419    size_t privateIndex = localeString.find("-x-");
420    if (privateIndex != std::string::npos && privateIndex < foundExtension) {
421        parsedResult.base = localeString;
422        return parsedResult;
423    }
424    const std::string basis = localeString.substr(0, foundExtension);
425    size_t extensionEnd = len;
426    ASSERT(len > INTL_INDEX_TWO);
427    size_t start = foundExtension + 1;
428    HandleLocaleExtension(start, extensionEnd, localeString, len);
429    const std::string end = localeString.substr(extensionEnd);
430    parsedResult.base = basis + end;
431    parsedResult.extension = localeString.substr(foundExtension, extensionEnd - foundExtension);
432    return parsedResult;
433}
434
435std::vector<std::string> LocaleHelper::GetAvailableLocales(JSThread *thread, const char *localeKey,
436                                                           const char *localePath)
437{
438    UErrorCode status = U_ZERO_ERROR;
439    auto globalConst = thread->GlobalConstants();
440    JSHandle<EcmaString> specialValue = JSHandle<EcmaString>::Cast(globalConst->GetHandledEnUsPosixString());
441    std::string specialString = ConvertToStdString(specialValue);
442    UEnumeration *uenum = nullptr;
443    {
444        ThreadNativeScope nativeScope(thread);
445        uenum = uloc_openAvailableByType(ULOC_AVAILABLE_WITH_LEGACY_ALIASES, &status);
446    }
447    std::vector<std::string> allLocales;
448    const char *loc = nullptr;
449    // Third party libs computing can be in Native state
450    ThreadNativeScope nativeScope(thread);
451    for (loc = uenum_next(uenum, nullptr, &status); loc != nullptr; loc = uenum_next(uenum, nullptr, &status)) {
452        ASSERT(U_SUCCESS(status));
453        std::string locStr(loc);
454        std::replace(locStr.begin(), locStr.end(), '_', '-');
455        if (locStr == specialString) {
456            locStr = "en-US-u-va-posix";
457        }
458
459        if (localePath != nullptr || localeKey != nullptr) {
460            icu::Locale locale(locStr.c_str());
461            bool res = false;
462            if (!CheckLocales(locale, localeKey, localePath, res)) {
463                continue;
464            }
465        }
466        allLocales.push_back(locStr);
467        icu::Locale formalLocale = icu::Locale::createCanonical(locStr.c_str());
468        std::string scriptStr = formalLocale.getScript();
469        if (!scriptStr.empty()) {
470            std::string languageStr = formalLocale.getLanguage();
471            std::string countryStr = formalLocale.getCountry();
472            std::string shortLocale = icu::Locale(languageStr.c_str(), countryStr.c_str()).getName();
473            std::replace(shortLocale.begin(), shortLocale.end(), '_', '-');
474            allLocales.push_back(shortLocale);
475        }
476    }
477    uenum_close(uenum);
478    return allLocales;
479}
480
481// 9.2.2 BestAvailableLocale ( availableLocales, locale )
482std::string LocaleHelper::BestAvailableLocale(const std::vector<std::string> &availableLocales,
483                                              const std::string &locale)
484{
485    // 1. Let candidate be locale.
486    std::string localeCandidate = locale;
487    std::string undefined = std::string();
488    // 2. Repeat,
489    uint32_t length = availableLocales.size();
490    while (true) {
491        // a. If availableLocales contains an element equal to candidate, return candidate.
492        for (uint32_t i = 0; i < length; ++i) {
493            std::string itemStr = availableLocales[i];
494            if (itemStr == localeCandidate) {
495                return localeCandidate;
496            }
497        }
498        // b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate.
499        //    If that character does not occur, return undefined.
500        size_t pos = localeCandidate.rfind('-');
501        if (pos == std::string::npos) {
502            return undefined;
503        }
504        // c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate, decrease pos by 2.
505        if (pos >= INTL_INDEX_TWO && localeCandidate[pos - INTL_INDEX_TWO] == '-') {
506            pos -= INTL_INDEX_TWO;
507        }
508        // d. Let candidate be the substring of candidate from position 0, inclusive, to position pos, exclusive.
509        localeCandidate.resize(pos);
510    }
511}
512} // namespace panda::ecmascript::base