11cb0ef41Sopenharmony_ci// © 2018 and later: Unicode, Inc. and others.
21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
31cb0ef41Sopenharmony_ci
41cb0ef41Sopenharmony_ci#include "unicode/utypes.h"
51cb0ef41Sopenharmony_ci
61cb0ef41Sopenharmony_ci#if !UCONFIG_NO_FORMATTING
71cb0ef41Sopenharmony_ci
81cb0ef41Sopenharmony_ci// Allow implicit conversion from char16_t* to UnicodeString for this file:
91cb0ef41Sopenharmony_ci// Helpful in toString methods and elsewhere.
101cb0ef41Sopenharmony_ci#define UNISTR_FROM_STRING_EXPLICIT
111cb0ef41Sopenharmony_ci
121cb0ef41Sopenharmony_ci#include "static_unicode_sets.h"
131cb0ef41Sopenharmony_ci#include "umutex.h"
141cb0ef41Sopenharmony_ci#include "ucln_cmn.h"
151cb0ef41Sopenharmony_ci#include "unicode/uniset.h"
161cb0ef41Sopenharmony_ci#include "uresimp.h"
171cb0ef41Sopenharmony_ci#include "cstring.h"
181cb0ef41Sopenharmony_ci#include "uassert.h"
191cb0ef41Sopenharmony_ci
201cb0ef41Sopenharmony_ciusing namespace icu;
211cb0ef41Sopenharmony_ciusing namespace icu::unisets;
221cb0ef41Sopenharmony_ci
231cb0ef41Sopenharmony_ci
241cb0ef41Sopenharmony_cinamespace {
251cb0ef41Sopenharmony_ci
261cb0ef41Sopenharmony_ciUnicodeSet* gUnicodeSets[UNISETS_KEY_COUNT] = {};
271cb0ef41Sopenharmony_ci
281cb0ef41Sopenharmony_ci// Save the empty instance in static memory to have well-defined behavior if a
291cb0ef41Sopenharmony_ci// regular UnicodeSet cannot be allocated.
301cb0ef41Sopenharmony_cialignas(UnicodeSet)
311cb0ef41Sopenharmony_cichar gEmptyUnicodeSet[sizeof(UnicodeSet)];
321cb0ef41Sopenharmony_ci
331cb0ef41Sopenharmony_ci// Whether the gEmptyUnicodeSet is initialized and ready to use.
341cb0ef41Sopenharmony_ciUBool gEmptyUnicodeSetInitialized = false;
351cb0ef41Sopenharmony_ci
361cb0ef41Sopenharmony_ciinline UnicodeSet* getImpl(Key key) {
371cb0ef41Sopenharmony_ci    UnicodeSet* candidate = gUnicodeSets[key];
381cb0ef41Sopenharmony_ci    if (candidate == nullptr) {
391cb0ef41Sopenharmony_ci        return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet);
401cb0ef41Sopenharmony_ci    }
411cb0ef41Sopenharmony_ci    return candidate;
421cb0ef41Sopenharmony_ci}
431cb0ef41Sopenharmony_ci
441cb0ef41Sopenharmony_ciUnicodeSet* computeUnion(Key k1, Key k2) {
451cb0ef41Sopenharmony_ci    UnicodeSet* result = new UnicodeSet();
461cb0ef41Sopenharmony_ci    if (result == nullptr) {
471cb0ef41Sopenharmony_ci        return nullptr;
481cb0ef41Sopenharmony_ci    }
491cb0ef41Sopenharmony_ci    result->addAll(*getImpl(k1));
501cb0ef41Sopenharmony_ci    result->addAll(*getImpl(k2));
511cb0ef41Sopenharmony_ci    result->freeze();
521cb0ef41Sopenharmony_ci    return result;
531cb0ef41Sopenharmony_ci}
541cb0ef41Sopenharmony_ci
551cb0ef41Sopenharmony_ciUnicodeSet* computeUnion(Key k1, Key k2, Key k3) {
561cb0ef41Sopenharmony_ci    UnicodeSet* result = new UnicodeSet();
571cb0ef41Sopenharmony_ci    if (result == nullptr) {
581cb0ef41Sopenharmony_ci        return nullptr;
591cb0ef41Sopenharmony_ci    }
601cb0ef41Sopenharmony_ci    result->addAll(*getImpl(k1));
611cb0ef41Sopenharmony_ci    result->addAll(*getImpl(k2));
621cb0ef41Sopenharmony_ci    result->addAll(*getImpl(k3));
631cb0ef41Sopenharmony_ci    result->freeze();
641cb0ef41Sopenharmony_ci    return result;
651cb0ef41Sopenharmony_ci}
661cb0ef41Sopenharmony_ci
671cb0ef41Sopenharmony_ci
681cb0ef41Sopenharmony_civoid saveSet(Key key, const UnicodeString& unicodeSetPattern, UErrorCode& status) {
691cb0ef41Sopenharmony_ci    // assert unicodeSets.get(key) == null;
701cb0ef41Sopenharmony_ci    gUnicodeSets[key] = new UnicodeSet(unicodeSetPattern, status);
711cb0ef41Sopenharmony_ci}
721cb0ef41Sopenharmony_ci
731cb0ef41Sopenharmony_ciclass ParseDataSink : public ResourceSink {
741cb0ef41Sopenharmony_ci  public:
751cb0ef41Sopenharmony_ci    void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) override {
761cb0ef41Sopenharmony_ci        ResourceTable contextsTable = value.getTable(status);
771cb0ef41Sopenharmony_ci        if (U_FAILURE(status)) { return; }
781cb0ef41Sopenharmony_ci        for (int i = 0; contextsTable.getKeyAndValue(i, key, value); i++) {
791cb0ef41Sopenharmony_ci            if (uprv_strcmp(key, "date") == 0) {
801cb0ef41Sopenharmony_ci                // ignore
811cb0ef41Sopenharmony_ci            } else {
821cb0ef41Sopenharmony_ci                ResourceTable strictnessTable = value.getTable(status);
831cb0ef41Sopenharmony_ci                if (U_FAILURE(status)) { return; }
841cb0ef41Sopenharmony_ci                for (int j = 0; strictnessTable.getKeyAndValue(j, key, value); j++) {
851cb0ef41Sopenharmony_ci                    bool isLenient = (uprv_strcmp(key, "lenient") == 0);
861cb0ef41Sopenharmony_ci                    ResourceArray array = value.getArray(status);
871cb0ef41Sopenharmony_ci                    if (U_FAILURE(status)) { return; }
881cb0ef41Sopenharmony_ci                    for (int k = 0; k < array.getSize(); k++) {
891cb0ef41Sopenharmony_ci                        array.getValue(k, value);
901cb0ef41Sopenharmony_ci                        UnicodeString str = value.getUnicodeString(status);
911cb0ef41Sopenharmony_ci                        if (U_FAILURE(status)) { return; }
921cb0ef41Sopenharmony_ci                        // There is both lenient and strict data for comma/period,
931cb0ef41Sopenharmony_ci                        // but not for any of the other symbols.
941cb0ef41Sopenharmony_ci                        if (str.indexOf(u'.') != -1) {
951cb0ef41Sopenharmony_ci                            saveSet(isLenient ? PERIOD : STRICT_PERIOD, str, status);
961cb0ef41Sopenharmony_ci                        } else if (str.indexOf(u',') != -1) {
971cb0ef41Sopenharmony_ci                            saveSet(isLenient ? COMMA : STRICT_COMMA, str, status);
981cb0ef41Sopenharmony_ci                        } else if (str.indexOf(u'+') != -1) {
991cb0ef41Sopenharmony_ci                            saveSet(PLUS_SIGN, str, status);
1001cb0ef41Sopenharmony_ci                        } else if (str.indexOf(u'-') != -1) {
1011cb0ef41Sopenharmony_ci                            saveSet(MINUS_SIGN, str, status);
1021cb0ef41Sopenharmony_ci                        } else if (str.indexOf(u'$') != -1) {
1031cb0ef41Sopenharmony_ci                            saveSet(DOLLAR_SIGN, str, status);
1041cb0ef41Sopenharmony_ci                        } else if (str.indexOf(u'£') != -1) {
1051cb0ef41Sopenharmony_ci                            saveSet(POUND_SIGN, str, status);
1061cb0ef41Sopenharmony_ci                        } else if (str.indexOf(u'₹') != -1) {
1071cb0ef41Sopenharmony_ci                            saveSet(RUPEE_SIGN, str, status);
1081cb0ef41Sopenharmony_ci                        } else if (str.indexOf(u'¥') != -1) {
1091cb0ef41Sopenharmony_ci                            saveSet(YEN_SIGN, str, status);
1101cb0ef41Sopenharmony_ci                        } else if (str.indexOf(u'₩') != -1) {
1111cb0ef41Sopenharmony_ci                            saveSet(WON_SIGN, str, status);
1121cb0ef41Sopenharmony_ci                        } else if (str.indexOf(u'%') != -1) {
1131cb0ef41Sopenharmony_ci                            saveSet(PERCENT_SIGN, str, status);
1141cb0ef41Sopenharmony_ci                        } else if (str.indexOf(u'‰') != -1) {
1151cb0ef41Sopenharmony_ci                            saveSet(PERMILLE_SIGN, str, status);
1161cb0ef41Sopenharmony_ci                        } else if (str.indexOf(u'’') != -1) {
1171cb0ef41Sopenharmony_ci                            saveSet(APOSTROPHE_SIGN, str, status);
1181cb0ef41Sopenharmony_ci                        } else {
1191cb0ef41Sopenharmony_ci                            // Unknown class of parse lenients
1201cb0ef41Sopenharmony_ci                            // TODO(ICU-20428): Make ICU automatically accept new classes?
1211cb0ef41Sopenharmony_ci                            U_ASSERT(false);
1221cb0ef41Sopenharmony_ci                        }
1231cb0ef41Sopenharmony_ci                        if (U_FAILURE(status)) { return; }
1241cb0ef41Sopenharmony_ci                    }
1251cb0ef41Sopenharmony_ci                }
1261cb0ef41Sopenharmony_ci            }
1271cb0ef41Sopenharmony_ci        }
1281cb0ef41Sopenharmony_ci    }
1291cb0ef41Sopenharmony_ci};
1301cb0ef41Sopenharmony_ci
1311cb0ef41Sopenharmony_ci
1321cb0ef41Sopenharmony_ciicu::UInitOnce gNumberParseUniSetsInitOnce {};
1331cb0ef41Sopenharmony_ci
1341cb0ef41Sopenharmony_ciUBool U_CALLCONV cleanupNumberParseUniSets() {
1351cb0ef41Sopenharmony_ci    if (gEmptyUnicodeSetInitialized) {
1361cb0ef41Sopenharmony_ci        reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->~UnicodeSet();
1371cb0ef41Sopenharmony_ci        gEmptyUnicodeSetInitialized = false;
1381cb0ef41Sopenharmony_ci    }
1391cb0ef41Sopenharmony_ci    for (int32_t i = 0; i < UNISETS_KEY_COUNT; i++) {
1401cb0ef41Sopenharmony_ci        delete gUnicodeSets[i];
1411cb0ef41Sopenharmony_ci        gUnicodeSets[i] = nullptr;
1421cb0ef41Sopenharmony_ci    }
1431cb0ef41Sopenharmony_ci    gNumberParseUniSetsInitOnce.reset();
1441cb0ef41Sopenharmony_ci    return true;
1451cb0ef41Sopenharmony_ci}
1461cb0ef41Sopenharmony_ci
1471cb0ef41Sopenharmony_civoid U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
1481cb0ef41Sopenharmony_ci    ucln_common_registerCleanup(UCLN_COMMON_NUMPARSE_UNISETS, cleanupNumberParseUniSets);
1491cb0ef41Sopenharmony_ci
1501cb0ef41Sopenharmony_ci    // Initialize the empty instance for well-defined fallback behavior
1511cb0ef41Sopenharmony_ci    new(gEmptyUnicodeSet) UnicodeSet();
1521cb0ef41Sopenharmony_ci    reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->freeze();
1531cb0ef41Sopenharmony_ci    gEmptyUnicodeSetInitialized = true;
1541cb0ef41Sopenharmony_ci
1551cb0ef41Sopenharmony_ci    // These sets were decided after discussion with icu-design@. See tickets #13084 and #13309.
1561cb0ef41Sopenharmony_ci    // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
1571cb0ef41Sopenharmony_ci    gUnicodeSets[DEFAULT_IGNORABLES] = new UnicodeSet(
1581cb0ef41Sopenharmony_ci            u"[[:Zs:][\\u0009][:Bidi_Control:][:Variation_Selector:]]", status);
1591cb0ef41Sopenharmony_ci    gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(u"[[:Bidi_Control:]]", status);
1601cb0ef41Sopenharmony_ci
1611cb0ef41Sopenharmony_ci    LocalUResourceBundlePointer rb(ures_open(nullptr, "root", &status));
1621cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) { return; }
1631cb0ef41Sopenharmony_ci    ParseDataSink sink;
1641cb0ef41Sopenharmony_ci    ures_getAllItemsWithFallback(rb.getAlias(), "parse", sink, status);
1651cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) { return; }
1661cb0ef41Sopenharmony_ci
1671cb0ef41Sopenharmony_ci    // NOTE: It is OK for these assertions to fail if there was a no-data build.
1681cb0ef41Sopenharmony_ci    U_ASSERT(gUnicodeSets[COMMA] != nullptr);
1691cb0ef41Sopenharmony_ci    U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr);
1701cb0ef41Sopenharmony_ci    U_ASSERT(gUnicodeSets[PERIOD] != nullptr);
1711cb0ef41Sopenharmony_ci    U_ASSERT(gUnicodeSets[STRICT_PERIOD] != nullptr);
1721cb0ef41Sopenharmony_ci    U_ASSERT(gUnicodeSets[APOSTROPHE_SIGN] != nullptr);
1731cb0ef41Sopenharmony_ci
1741cb0ef41Sopenharmony_ci    LocalPointer<UnicodeSet> otherGrouping(new UnicodeSet(
1751cb0ef41Sopenharmony_ci        u"[٬‘\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]",
1761cb0ef41Sopenharmony_ci        status
1771cb0ef41Sopenharmony_ci    ), status);
1781cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) { return; }
1791cb0ef41Sopenharmony_ci    otherGrouping->addAll(*gUnicodeSets[APOSTROPHE_SIGN]);
1801cb0ef41Sopenharmony_ci    gUnicodeSets[OTHER_GROUPING_SEPARATORS] = otherGrouping.orphan();
1811cb0ef41Sopenharmony_ci    gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS);
1821cb0ef41Sopenharmony_ci    gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion(
1831cb0ef41Sopenharmony_ci            STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS);
1841cb0ef41Sopenharmony_ci
1851cb0ef41Sopenharmony_ci    U_ASSERT(gUnicodeSets[MINUS_SIGN] != nullptr);
1861cb0ef41Sopenharmony_ci    U_ASSERT(gUnicodeSets[PLUS_SIGN] != nullptr);
1871cb0ef41Sopenharmony_ci    U_ASSERT(gUnicodeSets[PERCENT_SIGN] != nullptr);
1881cb0ef41Sopenharmony_ci    U_ASSERT(gUnicodeSets[PERMILLE_SIGN] != nullptr);
1891cb0ef41Sopenharmony_ci
1901cb0ef41Sopenharmony_ci    gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status);
1911cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) { return; }
1921cb0ef41Sopenharmony_ci
1931cb0ef41Sopenharmony_ci    U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr);
1941cb0ef41Sopenharmony_ci    U_ASSERT(gUnicodeSets[POUND_SIGN] != nullptr);
1951cb0ef41Sopenharmony_ci    U_ASSERT(gUnicodeSets[RUPEE_SIGN] != nullptr);
1961cb0ef41Sopenharmony_ci    U_ASSERT(gUnicodeSets[YEN_SIGN] != nullptr);
1971cb0ef41Sopenharmony_ci    U_ASSERT(gUnicodeSets[WON_SIGN] != nullptr);
1981cb0ef41Sopenharmony_ci
1991cb0ef41Sopenharmony_ci    gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status);
2001cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) { return; }
2011cb0ef41Sopenharmony_ci    gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
2021cb0ef41Sopenharmony_ci    gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
2031cb0ef41Sopenharmony_ci
2041cb0ef41Sopenharmony_ci    for (auto* uniset : gUnicodeSets) {
2051cb0ef41Sopenharmony_ci        if (uniset != nullptr) {
2061cb0ef41Sopenharmony_ci            uniset->freeze();
2071cb0ef41Sopenharmony_ci        }
2081cb0ef41Sopenharmony_ci    }
2091cb0ef41Sopenharmony_ci}
2101cb0ef41Sopenharmony_ci
2111cb0ef41Sopenharmony_ci}
2121cb0ef41Sopenharmony_ci
2131cb0ef41Sopenharmony_ciconst UnicodeSet* unisets::get(Key key) {
2141cb0ef41Sopenharmony_ci    UErrorCode localStatus = U_ZERO_ERROR;
2151cb0ef41Sopenharmony_ci    umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus);
2161cb0ef41Sopenharmony_ci    if (U_FAILURE(localStatus)) {
2171cb0ef41Sopenharmony_ci        return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet);
2181cb0ef41Sopenharmony_ci    }
2191cb0ef41Sopenharmony_ci    return getImpl(key);
2201cb0ef41Sopenharmony_ci}
2211cb0ef41Sopenharmony_ci
2221cb0ef41Sopenharmony_ciKey unisets::chooseFrom(UnicodeString str, Key key1) {
2231cb0ef41Sopenharmony_ci    return get(key1)->contains(str) ? key1 : NONE;
2241cb0ef41Sopenharmony_ci}
2251cb0ef41Sopenharmony_ci
2261cb0ef41Sopenharmony_ciKey unisets::chooseFrom(UnicodeString str, Key key1, Key key2) {
2271cb0ef41Sopenharmony_ci    return get(key1)->contains(str) ? key1 : chooseFrom(str, key2);
2281cb0ef41Sopenharmony_ci}
2291cb0ef41Sopenharmony_ci
2301cb0ef41Sopenharmony_ci//Key unisets::chooseCurrency(UnicodeString str) {
2311cb0ef41Sopenharmony_ci//    if (get(DOLLAR_SIGN)->contains(str)) {
2321cb0ef41Sopenharmony_ci//        return DOLLAR_SIGN;
2331cb0ef41Sopenharmony_ci//    } else if (get(POUND_SIGN)->contains(str)) {
2341cb0ef41Sopenharmony_ci//        return POUND_SIGN;
2351cb0ef41Sopenharmony_ci//    } else if (get(RUPEE_SIGN)->contains(str)) {
2361cb0ef41Sopenharmony_ci//        return RUPEE_SIGN;
2371cb0ef41Sopenharmony_ci//    } else if (get(YEN_SIGN)->contains(str)) {
2381cb0ef41Sopenharmony_ci//        return YEN_SIGN;
2391cb0ef41Sopenharmony_ci//    } else {
2401cb0ef41Sopenharmony_ci//        return NONE;
2411cb0ef41Sopenharmony_ci//    }
2421cb0ef41Sopenharmony_ci//}
2431cb0ef41Sopenharmony_ci
2441cb0ef41Sopenharmony_ci
2451cb0ef41Sopenharmony_ci#endif /* #if !UCONFIG_NO_FORMATTING */
246