11cb0ef41Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others. 21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 31cb0ef41Sopenharmony_ci/* 41cb0ef41Sopenharmony_ci********************************************************************** 51cb0ef41Sopenharmony_ci* Copyright (C) 2014, International Business Machines 61cb0ef41Sopenharmony_ci* Corporation and others. All Rights Reserved. 71cb0ef41Sopenharmony_ci********************************************************************** 81cb0ef41Sopenharmony_ci* 91cb0ef41Sopenharmony_ci* scriptset.cpp 101cb0ef41Sopenharmony_ci* 111cb0ef41Sopenharmony_ci* created on: 2013 Jan 7 121cb0ef41Sopenharmony_ci* created by: Andy Heninger 131cb0ef41Sopenharmony_ci*/ 141cb0ef41Sopenharmony_ci 151cb0ef41Sopenharmony_ci#include "unicode/utypes.h" 161cb0ef41Sopenharmony_ci 171cb0ef41Sopenharmony_ci#include "unicode/uchar.h" 181cb0ef41Sopenharmony_ci#include "unicode/unistr.h" 191cb0ef41Sopenharmony_ci 201cb0ef41Sopenharmony_ci#include "scriptset.h" 211cb0ef41Sopenharmony_ci#include "uassert.h" 221cb0ef41Sopenharmony_ci#include "cmemory.h" 231cb0ef41Sopenharmony_ci 241cb0ef41Sopenharmony_ciU_NAMESPACE_BEGIN 251cb0ef41Sopenharmony_ci 261cb0ef41Sopenharmony_ci//---------------------------------------------------------------------------- 271cb0ef41Sopenharmony_ci// 281cb0ef41Sopenharmony_ci// ScriptSet implementation 291cb0ef41Sopenharmony_ci// 301cb0ef41Sopenharmony_ci//---------------------------------------------------------------------------- 311cb0ef41Sopenharmony_ciScriptSet::ScriptSet() { 321cb0ef41Sopenharmony_ci uprv_memset(bits, 0, sizeof(bits)); 331cb0ef41Sopenharmony_ci} 341cb0ef41Sopenharmony_ci 351cb0ef41Sopenharmony_ciScriptSet::~ScriptSet() { 361cb0ef41Sopenharmony_ci} 371cb0ef41Sopenharmony_ci 381cb0ef41Sopenharmony_ciScriptSet::ScriptSet(const ScriptSet &other) { 391cb0ef41Sopenharmony_ci *this = other; 401cb0ef41Sopenharmony_ci} 411cb0ef41Sopenharmony_ci 421cb0ef41Sopenharmony_ciScriptSet & ScriptSet::operator =(const ScriptSet &other) { 431cb0ef41Sopenharmony_ci uprv_memcpy(bits, other.bits, sizeof(bits)); 441cb0ef41Sopenharmony_ci return *this; 451cb0ef41Sopenharmony_ci} 461cb0ef41Sopenharmony_ci 471cb0ef41Sopenharmony_cibool ScriptSet::operator == (const ScriptSet &other) const { 481cb0ef41Sopenharmony_ci for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 491cb0ef41Sopenharmony_ci if (bits[i] != other.bits[i]) { 501cb0ef41Sopenharmony_ci return false; 511cb0ef41Sopenharmony_ci } 521cb0ef41Sopenharmony_ci } 531cb0ef41Sopenharmony_ci return true; 541cb0ef41Sopenharmony_ci} 551cb0ef41Sopenharmony_ci 561cb0ef41Sopenharmony_ciUBool ScriptSet::test(UScriptCode script, UErrorCode &status) const { 571cb0ef41Sopenharmony_ci if (U_FAILURE(status)) { 581cb0ef41Sopenharmony_ci return false; 591cb0ef41Sopenharmony_ci } 601cb0ef41Sopenharmony_ci if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) { 611cb0ef41Sopenharmony_ci status = U_ILLEGAL_ARGUMENT_ERROR; 621cb0ef41Sopenharmony_ci return false; 631cb0ef41Sopenharmony_ci } 641cb0ef41Sopenharmony_ci uint32_t index = script / 32; 651cb0ef41Sopenharmony_ci uint32_t bit = 1 << (script & 31); 661cb0ef41Sopenharmony_ci return ((bits[index] & bit) != 0); 671cb0ef41Sopenharmony_ci} 681cb0ef41Sopenharmony_ci 691cb0ef41Sopenharmony_ci 701cb0ef41Sopenharmony_ciScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) { 711cb0ef41Sopenharmony_ci if (U_FAILURE(status)) { 721cb0ef41Sopenharmony_ci return *this; 731cb0ef41Sopenharmony_ci } 741cb0ef41Sopenharmony_ci if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) { 751cb0ef41Sopenharmony_ci status = U_ILLEGAL_ARGUMENT_ERROR; 761cb0ef41Sopenharmony_ci return *this; 771cb0ef41Sopenharmony_ci } 781cb0ef41Sopenharmony_ci uint32_t index = script / 32; 791cb0ef41Sopenharmony_ci uint32_t bit = 1 << (script & 31); 801cb0ef41Sopenharmony_ci bits[index] |= bit; 811cb0ef41Sopenharmony_ci return *this; 821cb0ef41Sopenharmony_ci} 831cb0ef41Sopenharmony_ci 841cb0ef41Sopenharmony_ciScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) { 851cb0ef41Sopenharmony_ci if (U_FAILURE(status)) { 861cb0ef41Sopenharmony_ci return *this; 871cb0ef41Sopenharmony_ci } 881cb0ef41Sopenharmony_ci if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) { 891cb0ef41Sopenharmony_ci status = U_ILLEGAL_ARGUMENT_ERROR; 901cb0ef41Sopenharmony_ci return *this; 911cb0ef41Sopenharmony_ci } 921cb0ef41Sopenharmony_ci uint32_t index = script / 32; 931cb0ef41Sopenharmony_ci uint32_t bit = 1 << (script & 31); 941cb0ef41Sopenharmony_ci bits[index] &= ~bit; 951cb0ef41Sopenharmony_ci return *this; 961cb0ef41Sopenharmony_ci} 971cb0ef41Sopenharmony_ci 981cb0ef41Sopenharmony_ci 991cb0ef41Sopenharmony_ci 1001cb0ef41Sopenharmony_ciScriptSet &ScriptSet::Union(const ScriptSet &other) { 1011cb0ef41Sopenharmony_ci for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 1021cb0ef41Sopenharmony_ci bits[i] |= other.bits[i]; 1031cb0ef41Sopenharmony_ci } 1041cb0ef41Sopenharmony_ci return *this; 1051cb0ef41Sopenharmony_ci} 1061cb0ef41Sopenharmony_ci 1071cb0ef41Sopenharmony_ciScriptSet &ScriptSet::intersect(const ScriptSet &other) { 1081cb0ef41Sopenharmony_ci for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 1091cb0ef41Sopenharmony_ci bits[i] &= other.bits[i]; 1101cb0ef41Sopenharmony_ci } 1111cb0ef41Sopenharmony_ci return *this; 1121cb0ef41Sopenharmony_ci} 1131cb0ef41Sopenharmony_ci 1141cb0ef41Sopenharmony_ciScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) { 1151cb0ef41Sopenharmony_ci ScriptSet t; 1161cb0ef41Sopenharmony_ci t.set(script, status); 1171cb0ef41Sopenharmony_ci if (U_SUCCESS(status)) { 1181cb0ef41Sopenharmony_ci this->intersect(t); 1191cb0ef41Sopenharmony_ci } 1201cb0ef41Sopenharmony_ci return *this; 1211cb0ef41Sopenharmony_ci} 1221cb0ef41Sopenharmony_ci 1231cb0ef41Sopenharmony_ciUBool ScriptSet::intersects(const ScriptSet &other) const { 1241cb0ef41Sopenharmony_ci for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 1251cb0ef41Sopenharmony_ci if ((bits[i] & other.bits[i]) != 0) { 1261cb0ef41Sopenharmony_ci return true; 1271cb0ef41Sopenharmony_ci } 1281cb0ef41Sopenharmony_ci } 1291cb0ef41Sopenharmony_ci return false; 1301cb0ef41Sopenharmony_ci} 1311cb0ef41Sopenharmony_ci 1321cb0ef41Sopenharmony_ciUBool ScriptSet::contains(const ScriptSet &other) const { 1331cb0ef41Sopenharmony_ci ScriptSet t(*this); 1341cb0ef41Sopenharmony_ci t.intersect(other); 1351cb0ef41Sopenharmony_ci return (t == other); 1361cb0ef41Sopenharmony_ci} 1371cb0ef41Sopenharmony_ci 1381cb0ef41Sopenharmony_ci 1391cb0ef41Sopenharmony_ciScriptSet &ScriptSet::setAll() { 1401cb0ef41Sopenharmony_ci for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 1411cb0ef41Sopenharmony_ci bits[i] = 0xffffffffu; 1421cb0ef41Sopenharmony_ci } 1431cb0ef41Sopenharmony_ci return *this; 1441cb0ef41Sopenharmony_ci} 1451cb0ef41Sopenharmony_ci 1461cb0ef41Sopenharmony_ci 1471cb0ef41Sopenharmony_ciScriptSet &ScriptSet::resetAll() { 1481cb0ef41Sopenharmony_ci uprv_memset(bits, 0, sizeof(bits)); 1491cb0ef41Sopenharmony_ci return *this; 1501cb0ef41Sopenharmony_ci} 1511cb0ef41Sopenharmony_ci 1521cb0ef41Sopenharmony_ciint32_t ScriptSet::countMembers() const { 1531cb0ef41Sopenharmony_ci // This bit counter is good for sparse numbers of '1's, which is 1541cb0ef41Sopenharmony_ci // very much the case that we will usually have. 1551cb0ef41Sopenharmony_ci int32_t count = 0; 1561cb0ef41Sopenharmony_ci for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 1571cb0ef41Sopenharmony_ci uint32_t x = bits[i]; 1581cb0ef41Sopenharmony_ci while (x > 0) { 1591cb0ef41Sopenharmony_ci count++; 1601cb0ef41Sopenharmony_ci x &= (x - 1); // and off the least significant one bit. 1611cb0ef41Sopenharmony_ci } 1621cb0ef41Sopenharmony_ci } 1631cb0ef41Sopenharmony_ci return count; 1641cb0ef41Sopenharmony_ci} 1651cb0ef41Sopenharmony_ci 1661cb0ef41Sopenharmony_ciint32_t ScriptSet::hashCode() const { 1671cb0ef41Sopenharmony_ci int32_t hash = 0; 1681cb0ef41Sopenharmony_ci for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 1691cb0ef41Sopenharmony_ci hash ^= bits[i]; 1701cb0ef41Sopenharmony_ci } 1711cb0ef41Sopenharmony_ci return hash; 1721cb0ef41Sopenharmony_ci} 1731cb0ef41Sopenharmony_ci 1741cb0ef41Sopenharmony_ciint32_t ScriptSet::nextSetBit(int32_t fromIndex) const { 1751cb0ef41Sopenharmony_ci // TODO: Wants a better implementation. 1761cb0ef41Sopenharmony_ci if (fromIndex < 0) { 1771cb0ef41Sopenharmony_ci return -1; 1781cb0ef41Sopenharmony_ci } 1791cb0ef41Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 1801cb0ef41Sopenharmony_ci for (int32_t scriptIndex = fromIndex; scriptIndex < SCRIPT_LIMIT; scriptIndex++) { 1811cb0ef41Sopenharmony_ci if (test((UScriptCode)scriptIndex, status)) { 1821cb0ef41Sopenharmony_ci return scriptIndex; 1831cb0ef41Sopenharmony_ci } 1841cb0ef41Sopenharmony_ci } 1851cb0ef41Sopenharmony_ci return -1; 1861cb0ef41Sopenharmony_ci} 1871cb0ef41Sopenharmony_ci 1881cb0ef41Sopenharmony_ciUBool ScriptSet::isEmpty() const { 1891cb0ef41Sopenharmony_ci for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 1901cb0ef41Sopenharmony_ci if (bits[i] != 0) { 1911cb0ef41Sopenharmony_ci return false; 1921cb0ef41Sopenharmony_ci } 1931cb0ef41Sopenharmony_ci } 1941cb0ef41Sopenharmony_ci return true; 1951cb0ef41Sopenharmony_ci} 1961cb0ef41Sopenharmony_ci 1971cb0ef41Sopenharmony_ciUnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const { 1981cb0ef41Sopenharmony_ci UBool firstTime = true; 1991cb0ef41Sopenharmony_ci for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) { 2001cb0ef41Sopenharmony_ci if (!firstTime) { 2011cb0ef41Sopenharmony_ci dest.append((char16_t)0x20); 2021cb0ef41Sopenharmony_ci } 2031cb0ef41Sopenharmony_ci firstTime = false; 2041cb0ef41Sopenharmony_ci const char *scriptName = uscript_getShortName((UScriptCode(i))); 2051cb0ef41Sopenharmony_ci dest.append(UnicodeString(scriptName, -1, US_INV)); 2061cb0ef41Sopenharmony_ci } 2071cb0ef41Sopenharmony_ci return dest; 2081cb0ef41Sopenharmony_ci} 2091cb0ef41Sopenharmony_ci 2101cb0ef41Sopenharmony_ciScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) { 2111cb0ef41Sopenharmony_ci resetAll(); 2121cb0ef41Sopenharmony_ci if (U_FAILURE(status)) { 2131cb0ef41Sopenharmony_ci return *this; 2141cb0ef41Sopenharmony_ci } 2151cb0ef41Sopenharmony_ci UnicodeString oneScriptName; 2161cb0ef41Sopenharmony_ci for (int32_t i=0; i<scriptString.length();) { 2171cb0ef41Sopenharmony_ci UChar32 c = scriptString.char32At(i); 2181cb0ef41Sopenharmony_ci i = scriptString.moveIndex32(i, 1); 2191cb0ef41Sopenharmony_ci if (!u_isUWhiteSpace(c)) { 2201cb0ef41Sopenharmony_ci oneScriptName.append(c); 2211cb0ef41Sopenharmony_ci if (i < scriptString.length()) { 2221cb0ef41Sopenharmony_ci continue; 2231cb0ef41Sopenharmony_ci } 2241cb0ef41Sopenharmony_ci } 2251cb0ef41Sopenharmony_ci if (oneScriptName.length() > 0) { 2261cb0ef41Sopenharmony_ci char buf[40]; 2271cb0ef41Sopenharmony_ci oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV); 2281cb0ef41Sopenharmony_ci buf[sizeof(buf)-1] = 0; 2291cb0ef41Sopenharmony_ci int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf); 2301cb0ef41Sopenharmony_ci if (sc == UCHAR_INVALID_CODE) { 2311cb0ef41Sopenharmony_ci status = U_ILLEGAL_ARGUMENT_ERROR; 2321cb0ef41Sopenharmony_ci } else { 2331cb0ef41Sopenharmony_ci this->set((UScriptCode)sc, status); 2341cb0ef41Sopenharmony_ci } 2351cb0ef41Sopenharmony_ci if (U_FAILURE(status)) { 2361cb0ef41Sopenharmony_ci return *this; 2371cb0ef41Sopenharmony_ci } 2381cb0ef41Sopenharmony_ci oneScriptName.remove(); 2391cb0ef41Sopenharmony_ci } 2401cb0ef41Sopenharmony_ci } 2411cb0ef41Sopenharmony_ci return *this; 2421cb0ef41Sopenharmony_ci} 2431cb0ef41Sopenharmony_ci 2441cb0ef41Sopenharmony_civoid ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) { 2451cb0ef41Sopenharmony_ci if (U_FAILURE(status)) { return; } 2461cb0ef41Sopenharmony_ci static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 20; 2471cb0ef41Sopenharmony_ci MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts; 2481cb0ef41Sopenharmony_ci UErrorCode internalStatus = U_ZERO_ERROR; 2491cb0ef41Sopenharmony_ci int32_t script_count = -1; 2501cb0ef41Sopenharmony_ci 2511cb0ef41Sopenharmony_ci while (true) { 2521cb0ef41Sopenharmony_ci script_count = uscript_getScriptExtensions( 2531cb0ef41Sopenharmony_ci codePoint, scripts.getAlias(), scripts.getCapacity(), &internalStatus); 2541cb0ef41Sopenharmony_ci if (internalStatus == U_BUFFER_OVERFLOW_ERROR) { 2551cb0ef41Sopenharmony_ci // Need to allocate more space 2561cb0ef41Sopenharmony_ci if (scripts.resize(script_count) == nullptr) { 2571cb0ef41Sopenharmony_ci status = U_MEMORY_ALLOCATION_ERROR; 2581cb0ef41Sopenharmony_ci return; 2591cb0ef41Sopenharmony_ci } 2601cb0ef41Sopenharmony_ci internalStatus = U_ZERO_ERROR; 2611cb0ef41Sopenharmony_ci } else { 2621cb0ef41Sopenharmony_ci break; 2631cb0ef41Sopenharmony_ci } 2641cb0ef41Sopenharmony_ci } 2651cb0ef41Sopenharmony_ci 2661cb0ef41Sopenharmony_ci // Check if we failed for some reason other than buffer overflow 2671cb0ef41Sopenharmony_ci if (U_FAILURE(internalStatus)) { 2681cb0ef41Sopenharmony_ci status = internalStatus; 2691cb0ef41Sopenharmony_ci return; 2701cb0ef41Sopenharmony_ci } 2711cb0ef41Sopenharmony_ci 2721cb0ef41Sopenharmony_ci // Load the scripts into the ScriptSet and return 2731cb0ef41Sopenharmony_ci for (int32_t i = 0; i < script_count; i++) { 2741cb0ef41Sopenharmony_ci this->set(scripts[i], status); 2751cb0ef41Sopenharmony_ci if (U_FAILURE(status)) { return; } 2761cb0ef41Sopenharmony_ci } 2771cb0ef41Sopenharmony_ci} 2781cb0ef41Sopenharmony_ci 2791cb0ef41Sopenharmony_ciU_NAMESPACE_END 2801cb0ef41Sopenharmony_ci 2811cb0ef41Sopenharmony_ciU_CAPI UBool U_EXPORT2 2821cb0ef41Sopenharmony_ciuhash_equalsScriptSet(const UElement key1, const UElement key2) { 2831cb0ef41Sopenharmony_ci icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 2841cb0ef41Sopenharmony_ci icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer); 2851cb0ef41Sopenharmony_ci return (*s1 == *s2); 2861cb0ef41Sopenharmony_ci} 2871cb0ef41Sopenharmony_ci 2881cb0ef41Sopenharmony_ciU_CAPI int8_t U_EXPORT2 2891cb0ef41Sopenharmony_ciuhash_compareScriptSet(UElement key0, UElement key1) { 2901cb0ef41Sopenharmony_ci icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer); 2911cb0ef41Sopenharmony_ci icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 2921cb0ef41Sopenharmony_ci int32_t diff = s0->countMembers() - s1->countMembers(); 2931cb0ef41Sopenharmony_ci if (diff != 0) return static_cast<UBool>(diff); 2941cb0ef41Sopenharmony_ci int32_t i0 = s0->nextSetBit(0); 2951cb0ef41Sopenharmony_ci int32_t i1 = s1->nextSetBit(0); 2961cb0ef41Sopenharmony_ci while ((diff = i0-i1) == 0 && i0 > 0) { 2971cb0ef41Sopenharmony_ci i0 = s0->nextSetBit(i0+1); 2981cb0ef41Sopenharmony_ci i1 = s1->nextSetBit(i1+1); 2991cb0ef41Sopenharmony_ci } 3001cb0ef41Sopenharmony_ci return (int8_t)diff; 3011cb0ef41Sopenharmony_ci} 3021cb0ef41Sopenharmony_ci 3031cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 3041cb0ef41Sopenharmony_ciuhash_hashScriptSet(const UElement key) { 3051cb0ef41Sopenharmony_ci icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer); 3061cb0ef41Sopenharmony_ci return s->hashCode(); 3071cb0ef41Sopenharmony_ci} 3081cb0ef41Sopenharmony_ci 3091cb0ef41Sopenharmony_ciU_CAPI void U_EXPORT2 3101cb0ef41Sopenharmony_ciuhash_deleteScriptSet(void *obj) { 3111cb0ef41Sopenharmony_ci icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj); 3121cb0ef41Sopenharmony_ci delete s; 3131cb0ef41Sopenharmony_ci} 314