1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4********************************************************************** 5* Copyright (C) 2014, International Business Machines 6* Corporation and others. All Rights Reserved. 7********************************************************************** 8* 9* scriptset.cpp 10* 11* created on: 2013 Jan 7 12* created by: Andy Heninger 13*/ 14 15#include "unicode/utypes.h" 16 17#include "unicode/uchar.h" 18#include "unicode/unistr.h" 19 20#include "scriptset.h" 21#include "uassert.h" 22#include "cmemory.h" 23 24U_NAMESPACE_BEGIN 25 26//---------------------------------------------------------------------------- 27// 28// ScriptSet implementation 29// 30//---------------------------------------------------------------------------- 31ScriptSet::ScriptSet() { 32 uprv_memset(bits, 0, sizeof(bits)); 33} 34 35ScriptSet::~ScriptSet() { 36} 37 38ScriptSet::ScriptSet(const ScriptSet &other) { 39 *this = other; 40} 41 42ScriptSet & ScriptSet::operator =(const ScriptSet &other) { 43 uprv_memcpy(bits, other.bits, sizeof(bits)); 44 return *this; 45} 46 47bool ScriptSet::operator == (const ScriptSet &other) const { 48 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 49 if (bits[i] != other.bits[i]) { 50 return false; 51 } 52 } 53 return true; 54} 55 56UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const { 57 if (U_FAILURE(status)) { 58 return false; 59 } 60 if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) { 61 status = U_ILLEGAL_ARGUMENT_ERROR; 62 return false; 63 } 64 uint32_t index = script / 32; 65 uint32_t bit = 1 << (script & 31); 66 return ((bits[index] & bit) != 0); 67} 68 69 70ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) { 71 if (U_FAILURE(status)) { 72 return *this; 73 } 74 if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) { 75 status = U_ILLEGAL_ARGUMENT_ERROR; 76 return *this; 77 } 78 uint32_t index = script / 32; 79 uint32_t bit = 1 << (script & 31); 80 bits[index] |= bit; 81 return *this; 82} 83 84ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) { 85 if (U_FAILURE(status)) { 86 return *this; 87 } 88 if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) { 89 status = U_ILLEGAL_ARGUMENT_ERROR; 90 return *this; 91 } 92 uint32_t index = script / 32; 93 uint32_t bit = 1 << (script & 31); 94 bits[index] &= ~bit; 95 return *this; 96} 97 98 99 100ScriptSet &ScriptSet::Union(const ScriptSet &other) { 101 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 102 bits[i] |= other.bits[i]; 103 } 104 return *this; 105} 106 107ScriptSet &ScriptSet::intersect(const ScriptSet &other) { 108 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 109 bits[i] &= other.bits[i]; 110 } 111 return *this; 112} 113 114ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) { 115 ScriptSet t; 116 t.set(script, status); 117 if (U_SUCCESS(status)) { 118 this->intersect(t); 119 } 120 return *this; 121} 122 123UBool ScriptSet::intersects(const ScriptSet &other) const { 124 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 125 if ((bits[i] & other.bits[i]) != 0) { 126 return true; 127 } 128 } 129 return false; 130} 131 132UBool ScriptSet::contains(const ScriptSet &other) const { 133 ScriptSet t(*this); 134 t.intersect(other); 135 return (t == other); 136} 137 138 139ScriptSet &ScriptSet::setAll() { 140 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 141 bits[i] = 0xffffffffu; 142 } 143 return *this; 144} 145 146 147ScriptSet &ScriptSet::resetAll() { 148 uprv_memset(bits, 0, sizeof(bits)); 149 return *this; 150} 151 152int32_t ScriptSet::countMembers() const { 153 // This bit counter is good for sparse numbers of '1's, which is 154 // very much the case that we will usually have. 155 int32_t count = 0; 156 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 157 uint32_t x = bits[i]; 158 while (x > 0) { 159 count++; 160 x &= (x - 1); // and off the least significant one bit. 161 } 162 } 163 return count; 164} 165 166int32_t ScriptSet::hashCode() const { 167 int32_t hash = 0; 168 for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 169 hash ^= bits[i]; 170 } 171 return hash; 172} 173 174int32_t ScriptSet::nextSetBit(int32_t fromIndex) const { 175 // TODO: Wants a better implementation. 176 if (fromIndex < 0) { 177 return -1; 178 } 179 UErrorCode status = U_ZERO_ERROR; 180 for (int32_t scriptIndex = fromIndex; scriptIndex < SCRIPT_LIMIT; scriptIndex++) { 181 if (test((UScriptCode)scriptIndex, status)) { 182 return scriptIndex; 183 } 184 } 185 return -1; 186} 187 188UBool ScriptSet::isEmpty() const { 189 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 190 if (bits[i] != 0) { 191 return false; 192 } 193 } 194 return true; 195} 196 197UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const { 198 UBool firstTime = true; 199 for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) { 200 if (!firstTime) { 201 dest.append((char16_t)0x20); 202 } 203 firstTime = false; 204 const char *scriptName = uscript_getShortName((UScriptCode(i))); 205 dest.append(UnicodeString(scriptName, -1, US_INV)); 206 } 207 return dest; 208} 209 210ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) { 211 resetAll(); 212 if (U_FAILURE(status)) { 213 return *this; 214 } 215 UnicodeString oneScriptName; 216 for (int32_t i=0; i<scriptString.length();) { 217 UChar32 c = scriptString.char32At(i); 218 i = scriptString.moveIndex32(i, 1); 219 if (!u_isUWhiteSpace(c)) { 220 oneScriptName.append(c); 221 if (i < scriptString.length()) { 222 continue; 223 } 224 } 225 if (oneScriptName.length() > 0) { 226 char buf[40]; 227 oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV); 228 buf[sizeof(buf)-1] = 0; 229 int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf); 230 if (sc == UCHAR_INVALID_CODE) { 231 status = U_ILLEGAL_ARGUMENT_ERROR; 232 } else { 233 this->set((UScriptCode)sc, status); 234 } 235 if (U_FAILURE(status)) { 236 return *this; 237 } 238 oneScriptName.remove(); 239 } 240 } 241 return *this; 242} 243 244void ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) { 245 if (U_FAILURE(status)) { return; } 246 static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 20; 247 MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts; 248 UErrorCode internalStatus = U_ZERO_ERROR; 249 int32_t script_count = -1; 250 251 while (true) { 252 script_count = uscript_getScriptExtensions( 253 codePoint, scripts.getAlias(), scripts.getCapacity(), &internalStatus); 254 if (internalStatus == U_BUFFER_OVERFLOW_ERROR) { 255 // Need to allocate more space 256 if (scripts.resize(script_count) == nullptr) { 257 status = U_MEMORY_ALLOCATION_ERROR; 258 return; 259 } 260 internalStatus = U_ZERO_ERROR; 261 } else { 262 break; 263 } 264 } 265 266 // Check if we failed for some reason other than buffer overflow 267 if (U_FAILURE(internalStatus)) { 268 status = internalStatus; 269 return; 270 } 271 272 // Load the scripts into the ScriptSet and return 273 for (int32_t i = 0; i < script_count; i++) { 274 this->set(scripts[i], status); 275 if (U_FAILURE(status)) { return; } 276 } 277} 278 279U_NAMESPACE_END 280 281U_CAPI UBool U_EXPORT2 282uhash_equalsScriptSet(const UElement key1, const UElement key2) { 283 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 284 icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer); 285 return (*s1 == *s2); 286} 287 288U_CAPI int8_t U_EXPORT2 289uhash_compareScriptSet(UElement key0, UElement key1) { 290 icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer); 291 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 292 int32_t diff = s0->countMembers() - s1->countMembers(); 293 if (diff != 0) return static_cast<UBool>(diff); 294 int32_t i0 = s0->nextSetBit(0); 295 int32_t i1 = s1->nextSetBit(0); 296 while ((diff = i0-i1) == 0 && i0 > 0) { 297 i0 = s0->nextSetBit(i0+1); 298 i1 = s1->nextSetBit(i1+1); 299 } 300 return (int8_t)diff; 301} 302 303U_CAPI int32_t U_EXPORT2 304uhash_hashScriptSet(const UElement key) { 305 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer); 306 return s->hashCode(); 307} 308 309U_CAPI void U_EXPORT2 310uhash_deleteScriptSet(void *obj) { 311 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj); 312 delete s; 313} 314