11cb0ef41Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others.
21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
31cb0ef41Sopenharmony_ci/*
41cb0ef41Sopenharmony_ci**********************************************************************
51cb0ef41Sopenharmony_ci*   Copyright (C) 2014, International Business Machines
61cb0ef41Sopenharmony_ci*   Corporation and others.  All Rights Reserved.
71cb0ef41Sopenharmony_ci**********************************************************************
81cb0ef41Sopenharmony_ci*
91cb0ef41Sopenharmony_ci* scriptset.cpp
101cb0ef41Sopenharmony_ci*
111cb0ef41Sopenharmony_ci* created on: 2013 Jan 7
121cb0ef41Sopenharmony_ci* created by: Andy Heninger
131cb0ef41Sopenharmony_ci*/
141cb0ef41Sopenharmony_ci
151cb0ef41Sopenharmony_ci#include "unicode/utypes.h"
161cb0ef41Sopenharmony_ci
171cb0ef41Sopenharmony_ci#include "unicode/uchar.h"
181cb0ef41Sopenharmony_ci#include "unicode/unistr.h"
191cb0ef41Sopenharmony_ci
201cb0ef41Sopenharmony_ci#include "scriptset.h"
211cb0ef41Sopenharmony_ci#include "uassert.h"
221cb0ef41Sopenharmony_ci#include "cmemory.h"
231cb0ef41Sopenharmony_ci
241cb0ef41Sopenharmony_ciU_NAMESPACE_BEGIN
251cb0ef41Sopenharmony_ci
261cb0ef41Sopenharmony_ci//----------------------------------------------------------------------------
271cb0ef41Sopenharmony_ci//
281cb0ef41Sopenharmony_ci//  ScriptSet implementation
291cb0ef41Sopenharmony_ci//
301cb0ef41Sopenharmony_ci//----------------------------------------------------------------------------
311cb0ef41Sopenharmony_ciScriptSet::ScriptSet() {
321cb0ef41Sopenharmony_ci    uprv_memset(bits, 0, sizeof(bits));
331cb0ef41Sopenharmony_ci}
341cb0ef41Sopenharmony_ci
351cb0ef41Sopenharmony_ciScriptSet::~ScriptSet() {
361cb0ef41Sopenharmony_ci}
371cb0ef41Sopenharmony_ci
381cb0ef41Sopenharmony_ciScriptSet::ScriptSet(const ScriptSet &other) {
391cb0ef41Sopenharmony_ci    *this = other;
401cb0ef41Sopenharmony_ci}
411cb0ef41Sopenharmony_ci
421cb0ef41Sopenharmony_ciScriptSet & ScriptSet::operator =(const ScriptSet &other) {
431cb0ef41Sopenharmony_ci    uprv_memcpy(bits, other.bits, sizeof(bits));
441cb0ef41Sopenharmony_ci    return *this;
451cb0ef41Sopenharmony_ci}
461cb0ef41Sopenharmony_ci
471cb0ef41Sopenharmony_cibool ScriptSet::operator == (const ScriptSet &other) const {
481cb0ef41Sopenharmony_ci    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
491cb0ef41Sopenharmony_ci        if (bits[i] != other.bits[i]) {
501cb0ef41Sopenharmony_ci            return false;
511cb0ef41Sopenharmony_ci        }
521cb0ef41Sopenharmony_ci    }
531cb0ef41Sopenharmony_ci    return true;
541cb0ef41Sopenharmony_ci}
551cb0ef41Sopenharmony_ci
561cb0ef41Sopenharmony_ciUBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
571cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) {
581cb0ef41Sopenharmony_ci        return false;
591cb0ef41Sopenharmony_ci    }
601cb0ef41Sopenharmony_ci    if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) {
611cb0ef41Sopenharmony_ci        status = U_ILLEGAL_ARGUMENT_ERROR;
621cb0ef41Sopenharmony_ci        return false;
631cb0ef41Sopenharmony_ci    }
641cb0ef41Sopenharmony_ci    uint32_t index = script / 32;
651cb0ef41Sopenharmony_ci    uint32_t bit   = 1 << (script & 31);
661cb0ef41Sopenharmony_ci    return ((bits[index] & bit) != 0);
671cb0ef41Sopenharmony_ci}
681cb0ef41Sopenharmony_ci
691cb0ef41Sopenharmony_ci
701cb0ef41Sopenharmony_ciScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
711cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) {
721cb0ef41Sopenharmony_ci        return *this;
731cb0ef41Sopenharmony_ci    }
741cb0ef41Sopenharmony_ci    if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) {
751cb0ef41Sopenharmony_ci        status = U_ILLEGAL_ARGUMENT_ERROR;
761cb0ef41Sopenharmony_ci        return *this;
771cb0ef41Sopenharmony_ci    }
781cb0ef41Sopenharmony_ci    uint32_t index = script / 32;
791cb0ef41Sopenharmony_ci    uint32_t bit   = 1 << (script & 31);
801cb0ef41Sopenharmony_ci    bits[index] |= bit;
811cb0ef41Sopenharmony_ci    return *this;
821cb0ef41Sopenharmony_ci}
831cb0ef41Sopenharmony_ci
841cb0ef41Sopenharmony_ciScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
851cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) {
861cb0ef41Sopenharmony_ci        return *this;
871cb0ef41Sopenharmony_ci    }
881cb0ef41Sopenharmony_ci    if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) {
891cb0ef41Sopenharmony_ci        status = U_ILLEGAL_ARGUMENT_ERROR;
901cb0ef41Sopenharmony_ci        return *this;
911cb0ef41Sopenharmony_ci    }
921cb0ef41Sopenharmony_ci    uint32_t index = script / 32;
931cb0ef41Sopenharmony_ci    uint32_t bit   = 1 << (script & 31);
941cb0ef41Sopenharmony_ci    bits[index] &= ~bit;
951cb0ef41Sopenharmony_ci    return *this;
961cb0ef41Sopenharmony_ci}
971cb0ef41Sopenharmony_ci
981cb0ef41Sopenharmony_ci
991cb0ef41Sopenharmony_ci
1001cb0ef41Sopenharmony_ciScriptSet &ScriptSet::Union(const ScriptSet &other) {
1011cb0ef41Sopenharmony_ci    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
1021cb0ef41Sopenharmony_ci        bits[i] |= other.bits[i];
1031cb0ef41Sopenharmony_ci    }
1041cb0ef41Sopenharmony_ci    return *this;
1051cb0ef41Sopenharmony_ci}
1061cb0ef41Sopenharmony_ci
1071cb0ef41Sopenharmony_ciScriptSet &ScriptSet::intersect(const ScriptSet &other) {
1081cb0ef41Sopenharmony_ci    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
1091cb0ef41Sopenharmony_ci        bits[i] &= other.bits[i];
1101cb0ef41Sopenharmony_ci    }
1111cb0ef41Sopenharmony_ci    return *this;
1121cb0ef41Sopenharmony_ci}
1131cb0ef41Sopenharmony_ci
1141cb0ef41Sopenharmony_ciScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
1151cb0ef41Sopenharmony_ci    ScriptSet t;
1161cb0ef41Sopenharmony_ci    t.set(script, status);
1171cb0ef41Sopenharmony_ci    if (U_SUCCESS(status)) {
1181cb0ef41Sopenharmony_ci        this->intersect(t);
1191cb0ef41Sopenharmony_ci    }
1201cb0ef41Sopenharmony_ci    return *this;
1211cb0ef41Sopenharmony_ci}
1221cb0ef41Sopenharmony_ci
1231cb0ef41Sopenharmony_ciUBool ScriptSet::intersects(const ScriptSet &other) const {
1241cb0ef41Sopenharmony_ci    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
1251cb0ef41Sopenharmony_ci        if ((bits[i] & other.bits[i]) != 0) {
1261cb0ef41Sopenharmony_ci            return true;
1271cb0ef41Sopenharmony_ci        }
1281cb0ef41Sopenharmony_ci    }
1291cb0ef41Sopenharmony_ci    return false;
1301cb0ef41Sopenharmony_ci}
1311cb0ef41Sopenharmony_ci
1321cb0ef41Sopenharmony_ciUBool ScriptSet::contains(const ScriptSet &other) const {
1331cb0ef41Sopenharmony_ci    ScriptSet t(*this);
1341cb0ef41Sopenharmony_ci    t.intersect(other);
1351cb0ef41Sopenharmony_ci    return (t == other);
1361cb0ef41Sopenharmony_ci}
1371cb0ef41Sopenharmony_ci
1381cb0ef41Sopenharmony_ci
1391cb0ef41Sopenharmony_ciScriptSet &ScriptSet::setAll() {
1401cb0ef41Sopenharmony_ci    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
1411cb0ef41Sopenharmony_ci        bits[i] = 0xffffffffu;
1421cb0ef41Sopenharmony_ci    }
1431cb0ef41Sopenharmony_ci    return *this;
1441cb0ef41Sopenharmony_ci}
1451cb0ef41Sopenharmony_ci
1461cb0ef41Sopenharmony_ci
1471cb0ef41Sopenharmony_ciScriptSet &ScriptSet::resetAll() {
1481cb0ef41Sopenharmony_ci    uprv_memset(bits, 0, sizeof(bits));
1491cb0ef41Sopenharmony_ci    return *this;
1501cb0ef41Sopenharmony_ci}
1511cb0ef41Sopenharmony_ci
1521cb0ef41Sopenharmony_ciint32_t ScriptSet::countMembers() const {
1531cb0ef41Sopenharmony_ci    // This bit counter is good for sparse numbers of '1's, which is
1541cb0ef41Sopenharmony_ci    //  very much the case that we will usually have.
1551cb0ef41Sopenharmony_ci    int32_t count = 0;
1561cb0ef41Sopenharmony_ci    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
1571cb0ef41Sopenharmony_ci        uint32_t x = bits[i];
1581cb0ef41Sopenharmony_ci        while (x > 0) {
1591cb0ef41Sopenharmony_ci            count++;
1601cb0ef41Sopenharmony_ci            x &= (x - 1);    // and off the least significant one bit.
1611cb0ef41Sopenharmony_ci        }
1621cb0ef41Sopenharmony_ci    }
1631cb0ef41Sopenharmony_ci    return count;
1641cb0ef41Sopenharmony_ci}
1651cb0ef41Sopenharmony_ci
1661cb0ef41Sopenharmony_ciint32_t ScriptSet::hashCode() const {
1671cb0ef41Sopenharmony_ci    int32_t hash = 0;
1681cb0ef41Sopenharmony_ci    for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
1691cb0ef41Sopenharmony_ci        hash ^= bits[i];
1701cb0ef41Sopenharmony_ci    }
1711cb0ef41Sopenharmony_ci    return hash;
1721cb0ef41Sopenharmony_ci}
1731cb0ef41Sopenharmony_ci
1741cb0ef41Sopenharmony_ciint32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
1751cb0ef41Sopenharmony_ci    // TODO: Wants a better implementation.
1761cb0ef41Sopenharmony_ci    if (fromIndex < 0) {
1771cb0ef41Sopenharmony_ci        return -1;
1781cb0ef41Sopenharmony_ci    }
1791cb0ef41Sopenharmony_ci    UErrorCode status = U_ZERO_ERROR;
1801cb0ef41Sopenharmony_ci    for (int32_t scriptIndex = fromIndex; scriptIndex < SCRIPT_LIMIT; scriptIndex++) {
1811cb0ef41Sopenharmony_ci        if (test((UScriptCode)scriptIndex, status)) {
1821cb0ef41Sopenharmony_ci            return scriptIndex;
1831cb0ef41Sopenharmony_ci        }
1841cb0ef41Sopenharmony_ci    }
1851cb0ef41Sopenharmony_ci    return -1;
1861cb0ef41Sopenharmony_ci}
1871cb0ef41Sopenharmony_ci
1881cb0ef41Sopenharmony_ciUBool ScriptSet::isEmpty() const {
1891cb0ef41Sopenharmony_ci    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
1901cb0ef41Sopenharmony_ci        if (bits[i] != 0) {
1911cb0ef41Sopenharmony_ci            return false;
1921cb0ef41Sopenharmony_ci        }
1931cb0ef41Sopenharmony_ci    }
1941cb0ef41Sopenharmony_ci    return true;
1951cb0ef41Sopenharmony_ci}
1961cb0ef41Sopenharmony_ci
1971cb0ef41Sopenharmony_ciUnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
1981cb0ef41Sopenharmony_ci    UBool firstTime = true;
1991cb0ef41Sopenharmony_ci    for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
2001cb0ef41Sopenharmony_ci        if (!firstTime) {
2011cb0ef41Sopenharmony_ci            dest.append((char16_t)0x20);
2021cb0ef41Sopenharmony_ci        }
2031cb0ef41Sopenharmony_ci        firstTime = false;
2041cb0ef41Sopenharmony_ci        const char *scriptName = uscript_getShortName((UScriptCode(i)));
2051cb0ef41Sopenharmony_ci        dest.append(UnicodeString(scriptName, -1, US_INV));
2061cb0ef41Sopenharmony_ci    }
2071cb0ef41Sopenharmony_ci    return dest;
2081cb0ef41Sopenharmony_ci}
2091cb0ef41Sopenharmony_ci
2101cb0ef41Sopenharmony_ciScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
2111cb0ef41Sopenharmony_ci    resetAll();
2121cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) {
2131cb0ef41Sopenharmony_ci        return *this;
2141cb0ef41Sopenharmony_ci    }
2151cb0ef41Sopenharmony_ci    UnicodeString oneScriptName;
2161cb0ef41Sopenharmony_ci    for (int32_t i=0; i<scriptString.length();) {
2171cb0ef41Sopenharmony_ci        UChar32 c = scriptString.char32At(i);
2181cb0ef41Sopenharmony_ci        i = scriptString.moveIndex32(i, 1);
2191cb0ef41Sopenharmony_ci        if (!u_isUWhiteSpace(c)) {
2201cb0ef41Sopenharmony_ci            oneScriptName.append(c);
2211cb0ef41Sopenharmony_ci            if (i < scriptString.length()) {
2221cb0ef41Sopenharmony_ci                continue;
2231cb0ef41Sopenharmony_ci            }
2241cb0ef41Sopenharmony_ci        }
2251cb0ef41Sopenharmony_ci        if (oneScriptName.length() > 0) {
2261cb0ef41Sopenharmony_ci            char buf[40];
2271cb0ef41Sopenharmony_ci            oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
2281cb0ef41Sopenharmony_ci            buf[sizeof(buf)-1] = 0;
2291cb0ef41Sopenharmony_ci            int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
2301cb0ef41Sopenharmony_ci            if (sc == UCHAR_INVALID_CODE) {
2311cb0ef41Sopenharmony_ci                status = U_ILLEGAL_ARGUMENT_ERROR;
2321cb0ef41Sopenharmony_ci            } else {
2331cb0ef41Sopenharmony_ci                this->set((UScriptCode)sc, status);
2341cb0ef41Sopenharmony_ci            }
2351cb0ef41Sopenharmony_ci            if (U_FAILURE(status)) {
2361cb0ef41Sopenharmony_ci                return *this;
2371cb0ef41Sopenharmony_ci            }
2381cb0ef41Sopenharmony_ci            oneScriptName.remove();
2391cb0ef41Sopenharmony_ci        }
2401cb0ef41Sopenharmony_ci    }
2411cb0ef41Sopenharmony_ci    return *this;
2421cb0ef41Sopenharmony_ci}
2431cb0ef41Sopenharmony_ci
2441cb0ef41Sopenharmony_civoid ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) {
2451cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) { return; }
2461cb0ef41Sopenharmony_ci    static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 20;
2471cb0ef41Sopenharmony_ci    MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts;
2481cb0ef41Sopenharmony_ci    UErrorCode internalStatus = U_ZERO_ERROR;
2491cb0ef41Sopenharmony_ci    int32_t script_count = -1;
2501cb0ef41Sopenharmony_ci
2511cb0ef41Sopenharmony_ci    while (true) {
2521cb0ef41Sopenharmony_ci        script_count = uscript_getScriptExtensions(
2531cb0ef41Sopenharmony_ci            codePoint, scripts.getAlias(), scripts.getCapacity(), &internalStatus);
2541cb0ef41Sopenharmony_ci        if (internalStatus == U_BUFFER_OVERFLOW_ERROR) {
2551cb0ef41Sopenharmony_ci            // Need to allocate more space
2561cb0ef41Sopenharmony_ci            if (scripts.resize(script_count) == nullptr) {
2571cb0ef41Sopenharmony_ci                status = U_MEMORY_ALLOCATION_ERROR;
2581cb0ef41Sopenharmony_ci                return;
2591cb0ef41Sopenharmony_ci            }
2601cb0ef41Sopenharmony_ci            internalStatus = U_ZERO_ERROR;
2611cb0ef41Sopenharmony_ci        } else {
2621cb0ef41Sopenharmony_ci            break;
2631cb0ef41Sopenharmony_ci        }
2641cb0ef41Sopenharmony_ci    }
2651cb0ef41Sopenharmony_ci
2661cb0ef41Sopenharmony_ci    // Check if we failed for some reason other than buffer overflow
2671cb0ef41Sopenharmony_ci    if (U_FAILURE(internalStatus)) {
2681cb0ef41Sopenharmony_ci        status = internalStatus;
2691cb0ef41Sopenharmony_ci        return;
2701cb0ef41Sopenharmony_ci    }
2711cb0ef41Sopenharmony_ci
2721cb0ef41Sopenharmony_ci    // Load the scripts into the ScriptSet and return
2731cb0ef41Sopenharmony_ci    for (int32_t i = 0; i < script_count; i++) {
2741cb0ef41Sopenharmony_ci        this->set(scripts[i], status);
2751cb0ef41Sopenharmony_ci        if (U_FAILURE(status)) { return; }
2761cb0ef41Sopenharmony_ci    }
2771cb0ef41Sopenharmony_ci}
2781cb0ef41Sopenharmony_ci
2791cb0ef41Sopenharmony_ciU_NAMESPACE_END
2801cb0ef41Sopenharmony_ci
2811cb0ef41Sopenharmony_ciU_CAPI UBool U_EXPORT2
2821cb0ef41Sopenharmony_ciuhash_equalsScriptSet(const UElement key1, const UElement key2) {
2831cb0ef41Sopenharmony_ci    icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
2841cb0ef41Sopenharmony_ci    icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
2851cb0ef41Sopenharmony_ci    return (*s1 == *s2);
2861cb0ef41Sopenharmony_ci}
2871cb0ef41Sopenharmony_ci
2881cb0ef41Sopenharmony_ciU_CAPI int8_t U_EXPORT2
2891cb0ef41Sopenharmony_ciuhash_compareScriptSet(UElement key0, UElement key1) {
2901cb0ef41Sopenharmony_ci    icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
2911cb0ef41Sopenharmony_ci    icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
2921cb0ef41Sopenharmony_ci    int32_t diff = s0->countMembers() - s1->countMembers();
2931cb0ef41Sopenharmony_ci    if (diff != 0) return static_cast<UBool>(diff);
2941cb0ef41Sopenharmony_ci    int32_t i0 = s0->nextSetBit(0);
2951cb0ef41Sopenharmony_ci    int32_t i1 = s1->nextSetBit(0);
2961cb0ef41Sopenharmony_ci    while ((diff = i0-i1) == 0 && i0 > 0) {
2971cb0ef41Sopenharmony_ci        i0 = s0->nextSetBit(i0+1);
2981cb0ef41Sopenharmony_ci        i1 = s1->nextSetBit(i1+1);
2991cb0ef41Sopenharmony_ci    }
3001cb0ef41Sopenharmony_ci    return (int8_t)diff;
3011cb0ef41Sopenharmony_ci}
3021cb0ef41Sopenharmony_ci
3031cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2
3041cb0ef41Sopenharmony_ciuhash_hashScriptSet(const UElement key) {
3051cb0ef41Sopenharmony_ci    icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
3061cb0ef41Sopenharmony_ci    return s->hashCode();
3071cb0ef41Sopenharmony_ci}
3081cb0ef41Sopenharmony_ci
3091cb0ef41Sopenharmony_ciU_CAPI void U_EXPORT2
3101cb0ef41Sopenharmony_ciuhash_deleteScriptSet(void *obj) {
3111cb0ef41Sopenharmony_ci    icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
3121cb0ef41Sopenharmony_ci    delete s;
3131cb0ef41Sopenharmony_ci}
314