1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5*   Copyright (C) 2014, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7**********************************************************************
8*
9* scriptset.cpp
10*
11* created on: 2013 Jan 7
12* created by: Andy Heninger
13*/
14
15#include "unicode/utypes.h"
16
17#include "unicode/uchar.h"
18#include "unicode/unistr.h"
19
20#include "scriptset.h"
21#include "uassert.h"
22#include "cmemory.h"
23
24U_NAMESPACE_BEGIN
25
26//----------------------------------------------------------------------------
27//
28//  ScriptSet implementation
29//
30//----------------------------------------------------------------------------
31ScriptSet::ScriptSet() {
32    uprv_memset(bits, 0, sizeof(bits));
33}
34
35ScriptSet::~ScriptSet() {
36}
37
38ScriptSet::ScriptSet(const ScriptSet &other) {
39    *this = other;
40}
41
42ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
43    uprv_memcpy(bits, other.bits, sizeof(bits));
44    return *this;
45}
46
47bool ScriptSet::operator == (const ScriptSet &other) const {
48    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
49        if (bits[i] != other.bits[i]) {
50            return false;
51        }
52    }
53    return true;
54}
55
56UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
57    if (U_FAILURE(status)) {
58        return false;
59    }
60    if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) {
61        status = U_ILLEGAL_ARGUMENT_ERROR;
62        return false;
63    }
64    uint32_t index = script / 32;
65    uint32_t bit   = 1 << (script & 31);
66    return ((bits[index] & bit) != 0);
67}
68
69
70ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
71    if (U_FAILURE(status)) {
72        return *this;
73    }
74    if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) {
75        status = U_ILLEGAL_ARGUMENT_ERROR;
76        return *this;
77    }
78    uint32_t index = script / 32;
79    uint32_t bit   = 1 << (script & 31);
80    bits[index] |= bit;
81    return *this;
82}
83
84ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
85    if (U_FAILURE(status)) {
86        return *this;
87    }
88    if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) {
89        status = U_ILLEGAL_ARGUMENT_ERROR;
90        return *this;
91    }
92    uint32_t index = script / 32;
93    uint32_t bit   = 1 << (script & 31);
94    bits[index] &= ~bit;
95    return *this;
96}
97
98
99
100ScriptSet &ScriptSet::Union(const ScriptSet &other) {
101    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
102        bits[i] |= other.bits[i];
103    }
104    return *this;
105}
106
107ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
108    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
109        bits[i] &= other.bits[i];
110    }
111    return *this;
112}
113
114ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
115    ScriptSet t;
116    t.set(script, status);
117    if (U_SUCCESS(status)) {
118        this->intersect(t);
119    }
120    return *this;
121}
122
123UBool ScriptSet::intersects(const ScriptSet &other) const {
124    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
125        if ((bits[i] & other.bits[i]) != 0) {
126            return true;
127        }
128    }
129    return false;
130}
131
132UBool ScriptSet::contains(const ScriptSet &other) const {
133    ScriptSet t(*this);
134    t.intersect(other);
135    return (t == other);
136}
137
138
139ScriptSet &ScriptSet::setAll() {
140    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
141        bits[i] = 0xffffffffu;
142    }
143    return *this;
144}
145
146
147ScriptSet &ScriptSet::resetAll() {
148    uprv_memset(bits, 0, sizeof(bits));
149    return *this;
150}
151
152int32_t ScriptSet::countMembers() const {
153    // This bit counter is good for sparse numbers of '1's, which is
154    //  very much the case that we will usually have.
155    int32_t count = 0;
156    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
157        uint32_t x = bits[i];
158        while (x > 0) {
159            count++;
160            x &= (x - 1);    // and off the least significant one bit.
161        }
162    }
163    return count;
164}
165
166int32_t ScriptSet::hashCode() const {
167    int32_t hash = 0;
168    for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
169        hash ^= bits[i];
170    }
171    return hash;
172}
173
174int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
175    // TODO: Wants a better implementation.
176    if (fromIndex < 0) {
177        return -1;
178    }
179    UErrorCode status = U_ZERO_ERROR;
180    for (int32_t scriptIndex = fromIndex; scriptIndex < SCRIPT_LIMIT; scriptIndex++) {
181        if (test((UScriptCode)scriptIndex, status)) {
182            return scriptIndex;
183        }
184    }
185    return -1;
186}
187
188UBool ScriptSet::isEmpty() const {
189    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
190        if (bits[i] != 0) {
191            return false;
192        }
193    }
194    return true;
195}
196
197UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
198    UBool firstTime = true;
199    for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
200        if (!firstTime) {
201            dest.append((char16_t)0x20);
202        }
203        firstTime = false;
204        const char *scriptName = uscript_getShortName((UScriptCode(i)));
205        dest.append(UnicodeString(scriptName, -1, US_INV));
206    }
207    return dest;
208}
209
210ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
211    resetAll();
212    if (U_FAILURE(status)) {
213        return *this;
214    }
215    UnicodeString oneScriptName;
216    for (int32_t i=0; i<scriptString.length();) {
217        UChar32 c = scriptString.char32At(i);
218        i = scriptString.moveIndex32(i, 1);
219        if (!u_isUWhiteSpace(c)) {
220            oneScriptName.append(c);
221            if (i < scriptString.length()) {
222                continue;
223            }
224        }
225        if (oneScriptName.length() > 0) {
226            char buf[40];
227            oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
228            buf[sizeof(buf)-1] = 0;
229            int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
230            if (sc == UCHAR_INVALID_CODE) {
231                status = U_ILLEGAL_ARGUMENT_ERROR;
232            } else {
233                this->set((UScriptCode)sc, status);
234            }
235            if (U_FAILURE(status)) {
236                return *this;
237            }
238            oneScriptName.remove();
239        }
240    }
241    return *this;
242}
243
244void ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) {
245    if (U_FAILURE(status)) { return; }
246    static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 20;
247    MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts;
248    UErrorCode internalStatus = U_ZERO_ERROR;
249    int32_t script_count = -1;
250
251    while (true) {
252        script_count = uscript_getScriptExtensions(
253            codePoint, scripts.getAlias(), scripts.getCapacity(), &internalStatus);
254        if (internalStatus == U_BUFFER_OVERFLOW_ERROR) {
255            // Need to allocate more space
256            if (scripts.resize(script_count) == nullptr) {
257                status = U_MEMORY_ALLOCATION_ERROR;
258                return;
259            }
260            internalStatus = U_ZERO_ERROR;
261        } else {
262            break;
263        }
264    }
265
266    // Check if we failed for some reason other than buffer overflow
267    if (U_FAILURE(internalStatus)) {
268        status = internalStatus;
269        return;
270    }
271
272    // Load the scripts into the ScriptSet and return
273    for (int32_t i = 0; i < script_count; i++) {
274        this->set(scripts[i], status);
275        if (U_FAILURE(status)) { return; }
276    }
277}
278
279U_NAMESPACE_END
280
281U_CAPI UBool U_EXPORT2
282uhash_equalsScriptSet(const UElement key1, const UElement key2) {
283    icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
284    icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
285    return (*s1 == *s2);
286}
287
288U_CAPI int8_t U_EXPORT2
289uhash_compareScriptSet(UElement key0, UElement key1) {
290    icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
291    icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
292    int32_t diff = s0->countMembers() - s1->countMembers();
293    if (diff != 0) return static_cast<UBool>(diff);
294    int32_t i0 = s0->nextSetBit(0);
295    int32_t i1 = s1->nextSetBit(0);
296    while ((diff = i0-i1) == 0 && i0 > 0) {
297        i0 = s0->nextSetBit(i0+1);
298        i1 = s1->nextSetBit(i1+1);
299    }
300    return (int8_t)diff;
301}
302
303U_CAPI int32_t U_EXPORT2
304uhash_hashScriptSet(const UElement key) {
305    icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
306    return s->hashCode();
307}
308
309U_CAPI void U_EXPORT2
310uhash_deleteScriptSet(void *obj) {
311    icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
312    delete s;
313}
314