1cb93a386Sopenharmony_ci/*
2cb93a386Sopenharmony_ci* Copyright 2020 Google Inc.
3cb93a386Sopenharmony_ci*
4cb93a386Sopenharmony_ci* Use of this source code is governed by a BSD-style license that can be
5cb93a386Sopenharmony_ci* found in the LICENSE file.
6cb93a386Sopenharmony_ci*/
7cb93a386Sopenharmony_ci
8cb93a386Sopenharmony_ci#include "include/core/SkString.h"
9cb93a386Sopenharmony_ci#include "include/core/SkTypes.h"
10cb93a386Sopenharmony_ci#include "include/private/SkBitmaskEnum.h"
11cb93a386Sopenharmony_ci#include "include/private/SkMutex.h"
12cb93a386Sopenharmony_ci#include "include/private/SkOnce.h"
13cb93a386Sopenharmony_ci#include "include/private/SkTArray.h"
14cb93a386Sopenharmony_ci#include "include/private/SkTemplates.h"
15cb93a386Sopenharmony_ci#include "include/private/SkTo.h"
16cb93a386Sopenharmony_ci#include "modules/skunicode/include/SkUnicode.h"
17cb93a386Sopenharmony_ci#include "modules/skunicode/src/SkUnicode_icu.h"
18cb93a386Sopenharmony_ci#include "modules/skunicode/src/SkUnicode_icu_bidi.h"
19cb93a386Sopenharmony_ci#include "src/utils/SkUTF.h"
20cb93a386Sopenharmony_ci#include "include/private/SkTHash.h"
21cb93a386Sopenharmony_ci#include <unicode/umachine.h>
22cb93a386Sopenharmony_ci#include <functional>
23cb93a386Sopenharmony_ci#include <string>
24cb93a386Sopenharmony_ci#include <utility>
25cb93a386Sopenharmony_ci#include <vector>
26cb93a386Sopenharmony_ci
27cb93a386Sopenharmony_ci#if defined(SK_USING_THIRD_PARTY_ICU)
28cb93a386Sopenharmony_ci#include "SkLoadICU.h"
29cb93a386Sopenharmony_ci#endif
30cb93a386Sopenharmony_ci
31cb93a386Sopenharmony_cistatic const SkICULib* ICULib() {
32cb93a386Sopenharmony_ci    static const auto gICU = SkLoadICULib();
33cb93a386Sopenharmony_ci
34cb93a386Sopenharmony_ci    return gICU.get();
35cb93a386Sopenharmony_ci}
36cb93a386Sopenharmony_ci
37cb93a386Sopenharmony_ci// sk_* wrappers for ICU funcs
38cb93a386Sopenharmony_ci#define SKICU_FUNC(funcname)                                                                \
39cb93a386Sopenharmony_ci    template <typename... Args>                                                             \
40cb93a386Sopenharmony_ci    auto sk_##funcname(Args&&... args) -> decltype(funcname(std::forward<Args>(args)...)) { \
41cb93a386Sopenharmony_ci        return ICULib()->f_##funcname(std::forward<Args>(args)...);                         \
42cb93a386Sopenharmony_ci    }                                                                                       \
43cb93a386Sopenharmony_ci
44cb93a386Sopenharmony_ciSKICU_EMIT_FUNCS
45cb93a386Sopenharmony_ci#undef SKICU_FUNC
46cb93a386Sopenharmony_ci
47cb93a386Sopenharmony_ciconst char* SkUnicode_IcuBidi::errorName(UErrorCode status) {
48cb93a386Sopenharmony_ci    return sk_u_errorName(status);
49cb93a386Sopenharmony_ci}
50cb93a386Sopenharmony_ci
51cb93a386Sopenharmony_civoid SkUnicode_IcuBidi::bidi_close(UBiDi* bidi) {
52cb93a386Sopenharmony_ci    sk_ubidi_close(bidi);
53cb93a386Sopenharmony_ci}
54cb93a386Sopenharmony_ciUBiDiDirection SkUnicode_IcuBidi::bidi_getDirection(const UBiDi* bidi) {
55cb93a386Sopenharmony_ci    return sk_ubidi_getDirection(bidi);
56cb93a386Sopenharmony_ci}
57cb93a386Sopenharmony_ciSkBidiIterator::Position SkUnicode_IcuBidi::bidi_getLength(const UBiDi* bidi) {
58cb93a386Sopenharmony_ci    return sk_ubidi_getLength(bidi);
59cb93a386Sopenharmony_ci}
60cb93a386Sopenharmony_ciSkBidiIterator::Level SkUnicode_IcuBidi::bidi_getLevelAt(const UBiDi* bidi, int pos) {
61cb93a386Sopenharmony_ci    return sk_ubidi_getLevelAt(bidi, pos);
62cb93a386Sopenharmony_ci}
63cb93a386Sopenharmony_ciUBiDi* SkUnicode_IcuBidi::bidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode* pErrorCode) {
64cb93a386Sopenharmony_ci    return sk_ubidi_openSized(maxLength, maxRunCount, pErrorCode);
65cb93a386Sopenharmony_ci}
66cb93a386Sopenharmony_civoid SkUnicode_IcuBidi::bidi_setPara(UBiDi* bidi,
67cb93a386Sopenharmony_ci                         const UChar* text,
68cb93a386Sopenharmony_ci                         int32_t length,
69cb93a386Sopenharmony_ci                         UBiDiLevel paraLevel,
70cb93a386Sopenharmony_ci                         UBiDiLevel* embeddingLevels,
71cb93a386Sopenharmony_ci                         UErrorCode* status) {
72cb93a386Sopenharmony_ci    return sk_ubidi_setPara(bidi, text, length, paraLevel, embeddingLevels, status);
73cb93a386Sopenharmony_ci}
74cb93a386Sopenharmony_civoid SkUnicode_IcuBidi::bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[],
75cb93a386Sopenharmony_ci                               int levelsCount,
76cb93a386Sopenharmony_ci                               int32_t logicalFromVisual[]) {
77cb93a386Sopenharmony_ci    sk_ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
78cb93a386Sopenharmony_ci}
79cb93a386Sopenharmony_ci
80cb93a386Sopenharmony_cistatic inline UBreakIterator* sk_ubrk_clone(const UBreakIterator* bi, UErrorCode* status) {
81cb93a386Sopenharmony_ci    const auto* icu = ICULib();
82cb93a386Sopenharmony_ci    SkASSERT(icu->f_ubrk_clone_ || icu->f_ubrk_safeClone_);
83cb93a386Sopenharmony_ci    return icu->f_ubrk_clone_
84cb93a386Sopenharmony_ci        ? icu->f_ubrk_clone_(bi, status)
85cb93a386Sopenharmony_ci        : icu->f_ubrk_safeClone_(bi, nullptr, nullptr, status);
86cb93a386Sopenharmony_ci}
87cb93a386Sopenharmony_ci
88cb93a386Sopenharmony_cistatic UText* utext_close_wrapper(UText* ut) {
89cb93a386Sopenharmony_ci    return sk_utext_close(ut);
90cb93a386Sopenharmony_ci}
91cb93a386Sopenharmony_cistatic void ubrk_close_wrapper(UBreakIterator* bi) {
92cb93a386Sopenharmony_ci    sk_ubrk_close(bi);
93cb93a386Sopenharmony_ci}
94cb93a386Sopenharmony_ci
95cb93a386Sopenharmony_ciusing ICUUText = std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close),
96cb93a386Sopenharmony_ci                                                         utext_close_wrapper>>;
97cb93a386Sopenharmony_ciusing ICUBreakIterator = std::unique_ptr<UBreakIterator, SkFunctionWrapper<decltype(ubrk_close),
98cb93a386Sopenharmony_ci                                                                           ubrk_close_wrapper>>;
99cb93a386Sopenharmony_ci/** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
100cb93a386Sopenharmony_cistatic inline SkUnichar utf8_next(const char** ptr, const char* end) {
101cb93a386Sopenharmony_ci    SkUnichar val = SkUTF::NextUTF8(ptr, end);
102cb93a386Sopenharmony_ci    return val < 0 ? 0xFFFD : val;
103cb93a386Sopenharmony_ci}
104cb93a386Sopenharmony_ci
105cb93a386Sopenharmony_cistatic UBreakIteratorType convertType(SkUnicode::BreakType type) {
106cb93a386Sopenharmony_ci    switch (type) {
107cb93a386Sopenharmony_ci        case SkUnicode::BreakType::kLines: return UBRK_LINE;
108cb93a386Sopenharmony_ci        case SkUnicode::BreakType::kGraphemes: return UBRK_CHARACTER;
109cb93a386Sopenharmony_ci        case SkUnicode::BreakType::kWords: return UBRK_WORD;
110cb93a386Sopenharmony_ci        default:
111cb93a386Sopenharmony_ci            return UBRK_CHARACTER;
112cb93a386Sopenharmony_ci    }
113cb93a386Sopenharmony_ci}
114cb93a386Sopenharmony_ci
115cb93a386Sopenharmony_ciclass SkBreakIterator_icu : public SkBreakIterator {
116cb93a386Sopenharmony_ci    ICUBreakIterator fBreakIterator;
117cb93a386Sopenharmony_ci    Position fLastResult;
118cb93a386Sopenharmony_ci public:
119cb93a386Sopenharmony_ci    explicit SkBreakIterator_icu(ICUBreakIterator iter)
120cb93a386Sopenharmony_ci            : fBreakIterator(std::move(iter))
121cb93a386Sopenharmony_ci            , fLastResult(0) {}
122cb93a386Sopenharmony_ci    Position first() override { return fLastResult = sk_ubrk_first(fBreakIterator.get()); }
123cb93a386Sopenharmony_ci    Position current() override { return fLastResult = sk_ubrk_current(fBreakIterator.get()); }
124cb93a386Sopenharmony_ci    Position next() override { return fLastResult = sk_ubrk_next(fBreakIterator.get()); }
125cb93a386Sopenharmony_ci    Status status() override { return sk_ubrk_getRuleStatus(fBreakIterator.get()); }
126cb93a386Sopenharmony_ci    bool isDone() override { return fLastResult == UBRK_DONE; }
127cb93a386Sopenharmony_ci
128cb93a386Sopenharmony_ci    bool setText(const char utftext8[], int utf8Units) override {
129cb93a386Sopenharmony_ci        UErrorCode status = U_ZERO_ERROR;
130cb93a386Sopenharmony_ci        ICUUText text(sk_utext_openUTF8(nullptr, &utftext8[0], utf8Units, &status));
131cb93a386Sopenharmony_ci
132cb93a386Sopenharmony_ci        if (U_FAILURE(status)) {
133cb93a386Sopenharmony_ci            SkDEBUGF("Break error: %s", sk_u_errorName(status));
134cb93a386Sopenharmony_ci            return false;
135cb93a386Sopenharmony_ci        }
136cb93a386Sopenharmony_ci        SkASSERT(text);
137cb93a386Sopenharmony_ci        sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status);
138cb93a386Sopenharmony_ci        if (U_FAILURE(status)) {
139cb93a386Sopenharmony_ci            SkDEBUGF("Break error: %s", sk_u_errorName(status));
140cb93a386Sopenharmony_ci            return false;
141cb93a386Sopenharmony_ci        }
142cb93a386Sopenharmony_ci        fLastResult = 0;
143cb93a386Sopenharmony_ci        return true;
144cb93a386Sopenharmony_ci    }
145cb93a386Sopenharmony_ci    bool setText(const char16_t utftext16[], int utf16Units) override {
146cb93a386Sopenharmony_ci        UErrorCode status = U_ZERO_ERROR;
147cb93a386Sopenharmony_ci        ICUUText text(sk_utext_openUChars(nullptr, reinterpret_cast<const UChar*>(&utftext16[0]),
148cb93a386Sopenharmony_ci                                          utf16Units, &status));
149cb93a386Sopenharmony_ci
150cb93a386Sopenharmony_ci        if (U_FAILURE(status)) {
151cb93a386Sopenharmony_ci            SkDEBUGF("Break error: %s", sk_u_errorName(status));
152cb93a386Sopenharmony_ci            return false;
153cb93a386Sopenharmony_ci        }
154cb93a386Sopenharmony_ci        SkASSERT(text);
155cb93a386Sopenharmony_ci        sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status);
156cb93a386Sopenharmony_ci        if (U_FAILURE(status)) {
157cb93a386Sopenharmony_ci            SkDEBUGF("Break error: %s", sk_u_errorName(status));
158cb93a386Sopenharmony_ci            return false;
159cb93a386Sopenharmony_ci        }
160cb93a386Sopenharmony_ci        fLastResult = 0;
161cb93a386Sopenharmony_ci        return true;
162cb93a386Sopenharmony_ci    }
163cb93a386Sopenharmony_ci};
164cb93a386Sopenharmony_ci
165cb93a386Sopenharmony_ciclass SkIcuBreakIteratorCache {
166cb93a386Sopenharmony_ci    SkTHashMap<SkUnicode::BreakType, ICUBreakIterator> fBreakCache;
167cb93a386Sopenharmony_ci    SkMutex fBreakCacheMutex;
168cb93a386Sopenharmony_ci
169cb93a386Sopenharmony_ci public:
170cb93a386Sopenharmony_ci    static SkIcuBreakIteratorCache& get() {
171cb93a386Sopenharmony_ci        static SkIcuBreakIteratorCache instance;
172cb93a386Sopenharmony_ci        return instance;
173cb93a386Sopenharmony_ci    }
174cb93a386Sopenharmony_ci
175cb93a386Sopenharmony_ci    ICUBreakIterator makeBreakIterator(SkUnicode::BreakType type) {
176cb93a386Sopenharmony_ci        UErrorCode status = U_ZERO_ERROR;
177cb93a386Sopenharmony_ci        ICUBreakIterator* cachedIterator;
178cb93a386Sopenharmony_ci        {
179cb93a386Sopenharmony_ci            SkAutoMutexExclusive lock(fBreakCacheMutex);
180cb93a386Sopenharmony_ci            cachedIterator = fBreakCache.find(type);
181cb93a386Sopenharmony_ci            if (!cachedIterator) {
182cb93a386Sopenharmony_ci                ICUBreakIterator newIterator(sk_ubrk_open(convertType(type), sk_uloc_getDefault(),
183cb93a386Sopenharmony_ci                                                          nullptr, 0, &status));
184cb93a386Sopenharmony_ci                if (U_FAILURE(status)) {
185cb93a386Sopenharmony_ci                    SkDEBUGF("Break error: %s", sk_u_errorName(status));
186cb93a386Sopenharmony_ci                } else {
187cb93a386Sopenharmony_ci                    cachedIterator = fBreakCache.set(type, std::move(newIterator));
188cb93a386Sopenharmony_ci                }
189cb93a386Sopenharmony_ci            }
190cb93a386Sopenharmony_ci        }
191cb93a386Sopenharmony_ci        ICUBreakIterator iterator;
192cb93a386Sopenharmony_ci        if (cachedIterator) {
193cb93a386Sopenharmony_ci            iterator.reset(sk_ubrk_clone(cachedIterator->get(), &status));
194cb93a386Sopenharmony_ci            if (U_FAILURE(status)) {
195cb93a386Sopenharmony_ci                SkDEBUGF("Break error: %s", sk_u_errorName(status));
196cb93a386Sopenharmony_ci            }
197cb93a386Sopenharmony_ci        }
198cb93a386Sopenharmony_ci        return iterator;
199cb93a386Sopenharmony_ci    }
200cb93a386Sopenharmony_ci};
201cb93a386Sopenharmony_ci
202cb93a386Sopenharmony_ciclass SkUnicode_icu : public SkUnicode {
203cb93a386Sopenharmony_ci
204cb93a386Sopenharmony_ci    std::unique_ptr<SkUnicode> copy() override {
205cb93a386Sopenharmony_ci        return std::make_unique<SkUnicode_icu>();
206cb93a386Sopenharmony_ci    }
207cb93a386Sopenharmony_ci
208cb93a386Sopenharmony_ci    static bool extractWords(uint16_t utf16[], int utf16Units, const char* locale,  std::vector<Position>* words) {
209cb93a386Sopenharmony_ci
210cb93a386Sopenharmony_ci        UErrorCode status = U_ZERO_ERROR;
211cb93a386Sopenharmony_ci
212cb93a386Sopenharmony_ci        ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(BreakType::kWords);
213cb93a386Sopenharmony_ci        if (!iterator) {
214cb93a386Sopenharmony_ci            SkDEBUGF("Break error: %s", sk_u_errorName(status));
215cb93a386Sopenharmony_ci            return false;
216cb93a386Sopenharmony_ci        }
217cb93a386Sopenharmony_ci        SkASSERT(iterator);
218cb93a386Sopenharmony_ci
219cb93a386Sopenharmony_ci        ICUUText utf16UText(sk_utext_openUChars(nullptr, (UChar*)utf16, utf16Units, &status));
220cb93a386Sopenharmony_ci        if (U_FAILURE(status)) {
221cb93a386Sopenharmony_ci            SkDEBUGF("Break error: %s", sk_u_errorName(status));
222cb93a386Sopenharmony_ci            return false;
223cb93a386Sopenharmony_ci        }
224cb93a386Sopenharmony_ci
225cb93a386Sopenharmony_ci        sk_ubrk_setUText(iterator.get(), utf16UText.get(), &status);
226cb93a386Sopenharmony_ci        if (U_FAILURE(status)) {
227cb93a386Sopenharmony_ci            SkDEBUGF("Break error: %s", sk_u_errorName(status));
228cb93a386Sopenharmony_ci            return false;
229cb93a386Sopenharmony_ci        }
230cb93a386Sopenharmony_ci
231cb93a386Sopenharmony_ci        // Get the words
232cb93a386Sopenharmony_ci        int32_t pos = sk_ubrk_first(iterator.get());
233cb93a386Sopenharmony_ci        while (pos != UBRK_DONE) {
234cb93a386Sopenharmony_ci            words->emplace_back(pos);
235cb93a386Sopenharmony_ci            pos = sk_ubrk_next(iterator.get());
236cb93a386Sopenharmony_ci        }
237cb93a386Sopenharmony_ci
238cb93a386Sopenharmony_ci        return true;
239cb93a386Sopenharmony_ci    }
240cb93a386Sopenharmony_ci
241cb93a386Sopenharmony_ci    static bool extractPositions
242cb93a386Sopenharmony_ci        (const char utf8[], int utf8Units, BreakType type, std::function<void(int, int)> setBreak) {
243cb93a386Sopenharmony_ci
244cb93a386Sopenharmony_ci        UErrorCode status = U_ZERO_ERROR;
245cb93a386Sopenharmony_ci        ICUUText text(sk_utext_openUTF8(nullptr, &utf8[0], utf8Units, &status));
246cb93a386Sopenharmony_ci
247cb93a386Sopenharmony_ci        if (U_FAILURE(status)) {
248cb93a386Sopenharmony_ci            SkDEBUGF("Break error: %s", sk_u_errorName(status));
249cb93a386Sopenharmony_ci            return false;
250cb93a386Sopenharmony_ci        }
251cb93a386Sopenharmony_ci        SkASSERT(text);
252cb93a386Sopenharmony_ci
253cb93a386Sopenharmony_ci        ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(type);
254cb93a386Sopenharmony_ci        if (!iterator) {
255cb93a386Sopenharmony_ci            return false;
256cb93a386Sopenharmony_ci        }
257cb93a386Sopenharmony_ci
258cb93a386Sopenharmony_ci        sk_ubrk_setUText(iterator.get(), text.get(), &status);
259cb93a386Sopenharmony_ci        if (U_FAILURE(status)) {
260cb93a386Sopenharmony_ci            SkDEBUGF("Break error: %s", sk_u_errorName(status));
261cb93a386Sopenharmony_ci            return false;
262cb93a386Sopenharmony_ci        }
263cb93a386Sopenharmony_ci
264cb93a386Sopenharmony_ci        auto iter = iterator.get();
265cb93a386Sopenharmony_ci        int32_t pos = sk_ubrk_first(iter);
266cb93a386Sopenharmony_ci        while (pos != UBRK_DONE) {
267cb93a386Sopenharmony_ci            int s = type == SkUnicode::BreakType::kLines
268cb93a386Sopenharmony_ci                        ? UBRK_LINE_SOFT
269cb93a386Sopenharmony_ci                        : sk_ubrk_getRuleStatus(iter);
270cb93a386Sopenharmony_ci            setBreak(pos, s);
271cb93a386Sopenharmony_ci            pos = sk_ubrk_next(iter);
272cb93a386Sopenharmony_ci        }
273cb93a386Sopenharmony_ci
274cb93a386Sopenharmony_ci        if (type == SkUnicode::BreakType::kLines) {
275cb93a386Sopenharmony_ci            // This is a workaround for https://bugs.chromium.org/p/skia/issues/detail?id=10715
276cb93a386Sopenharmony_ci            // (ICU line break iterator does not work correctly on Thai text with new lines)
277cb93a386Sopenharmony_ci            // So, we only use the iterator to collect soft line breaks and
278cb93a386Sopenharmony_ci            // scan the text for all hard line breaks ourselves
279cb93a386Sopenharmony_ci            const char* end = utf8 + utf8Units;
280cb93a386Sopenharmony_ci            const char* ch = utf8;
281cb93a386Sopenharmony_ci            while (ch < end) {
282cb93a386Sopenharmony_ci                auto unichar = utf8_next(&ch, end);
283cb93a386Sopenharmony_ci                if (isHardLineBreak(unichar)) {
284cb93a386Sopenharmony_ci                    setBreak(ch - utf8, UBRK_LINE_HARD);
285cb93a386Sopenharmony_ci                }
286cb93a386Sopenharmony_ci            }
287cb93a386Sopenharmony_ci        }
288cb93a386Sopenharmony_ci        return true;
289cb93a386Sopenharmony_ci    }
290cb93a386Sopenharmony_ci
291cb93a386Sopenharmony_ci    static bool isControl(SkUnichar utf8) {
292cb93a386Sopenharmony_ci        return sk_u_iscntrl(utf8);
293cb93a386Sopenharmony_ci    }
294cb93a386Sopenharmony_ci
295cb93a386Sopenharmony_ci    static bool isWhitespace(SkUnichar utf8) {
296cb93a386Sopenharmony_ci        return sk_u_isWhitespace(utf8);
297cb93a386Sopenharmony_ci    }
298cb93a386Sopenharmony_ci
299cb93a386Sopenharmony_ci    static bool isSpace(SkUnichar utf8) {
300cb93a386Sopenharmony_ci        return sk_u_isspace(utf8);
301cb93a386Sopenharmony_ci    }
302cb93a386Sopenharmony_ci
303cb93a386Sopenharmony_ci    static bool isTabulation(SkUnichar utf8) {
304cb93a386Sopenharmony_ci        return utf8 == '\t';
305cb93a386Sopenharmony_ci    }
306cb93a386Sopenharmony_ci
307cb93a386Sopenharmony_ci    static bool isHardBreak(SkUnichar utf8) {
308cb93a386Sopenharmony_ci        auto property = sk_u_getIntPropertyValue(utf8, UCHAR_LINE_BREAK);
309cb93a386Sopenharmony_ci        return property == U_LB_LINE_FEED || property == U_LB_MANDATORY_BREAK;
310cb93a386Sopenharmony_ci    }
311cb93a386Sopenharmony_ci
312cb93a386Sopenharmony_ci    static bool isIdeographic(SkUnichar unichar) {
313cb93a386Sopenharmony_ci        return sk_u_hasBinaryProperty(unichar, UCHAR_IDEOGRAPHIC);
314cb93a386Sopenharmony_ci    }
315cb93a386Sopenharmony_ci
316cb93a386Sopenharmony_cipublic:
317cb93a386Sopenharmony_ci    ~SkUnicode_icu() override { }
318cb93a386Sopenharmony_ci    std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
319cb93a386Sopenharmony_ci                                                     SkBidiIterator::Direction dir) override {
320cb93a386Sopenharmony_ci        return SkUnicode::makeBidiIterator(text, count, dir);
321cb93a386Sopenharmony_ci    }
322cb93a386Sopenharmony_ci    std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
323cb93a386Sopenharmony_ci                                                     int count,
324cb93a386Sopenharmony_ci                                                     SkBidiIterator::Direction dir) override {
325cb93a386Sopenharmony_ci        return SkUnicode::makeBidiIterator(text, count, dir);
326cb93a386Sopenharmony_ci    }
327cb93a386Sopenharmony_ci    std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
328cb93a386Sopenharmony_ci                                                       BreakType breakType) override {
329cb93a386Sopenharmony_ci        UErrorCode status = U_ZERO_ERROR;
330cb93a386Sopenharmony_ci        ICUBreakIterator iterator(sk_ubrk_open(convertType(breakType), locale, nullptr, 0,
331cb93a386Sopenharmony_ci                                               &status));
332cb93a386Sopenharmony_ci        if (U_FAILURE(status)) {
333cb93a386Sopenharmony_ci            SkDEBUGF("Break error: %s", sk_u_errorName(status));
334cb93a386Sopenharmony_ci            return nullptr;
335cb93a386Sopenharmony_ci        }
336cb93a386Sopenharmony_ci        return std::unique_ptr<SkBreakIterator>(new SkBreakIterator_icu(std::move(iterator)));
337cb93a386Sopenharmony_ci    }
338cb93a386Sopenharmony_ci    std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override {
339cb93a386Sopenharmony_ci        return makeBreakIterator(sk_uloc_getDefault(), breakType);
340cb93a386Sopenharmony_ci    }
341cb93a386Sopenharmony_ci
342cb93a386Sopenharmony_ci    static bool isHardLineBreak(SkUnichar utf8) {
343cb93a386Sopenharmony_ci        auto property = sk_u_getIntPropertyValue(utf8, UCHAR_LINE_BREAK);
344cb93a386Sopenharmony_ci        return property == U_LB_LINE_FEED || property == U_LB_MANDATORY_BREAK;
345cb93a386Sopenharmony_ci    }
346cb93a386Sopenharmony_ci
347cb93a386Sopenharmony_ci    SkString toUpper(const SkString& str) override {
348cb93a386Sopenharmony_ci        // Convert to UTF16 since that's what ICU wants.
349cb93a386Sopenharmony_ci        auto str16 = SkUnicode::convertUtf8ToUtf16(str.c_str(), str.size());
350cb93a386Sopenharmony_ci
351cb93a386Sopenharmony_ci        UErrorCode icu_err = U_ZERO_ERROR;
352cb93a386Sopenharmony_ci        const auto upper16len = sk_u_strToUpper(nullptr, 0, (UChar*)(str16.c_str()), str16.size(),
353cb93a386Sopenharmony_ci                                                nullptr, &icu_err);
354cb93a386Sopenharmony_ci        if (icu_err != U_BUFFER_OVERFLOW_ERROR || upper16len <= 0) {
355cb93a386Sopenharmony_ci            return SkString();
356cb93a386Sopenharmony_ci        }
357cb93a386Sopenharmony_ci
358cb93a386Sopenharmony_ci        SkAutoSTArray<128, uint16_t> upper16(upper16len);
359cb93a386Sopenharmony_ci        icu_err = U_ZERO_ERROR;
360cb93a386Sopenharmony_ci        sk_u_strToUpper((UChar*)(upper16.get()), SkToS32(upper16.size()),
361cb93a386Sopenharmony_ci                        (UChar*)(str16.c_str()), str16.size(),
362cb93a386Sopenharmony_ci                        nullptr, &icu_err);
363cb93a386Sopenharmony_ci        SkASSERT(!U_FAILURE(icu_err));
364cb93a386Sopenharmony_ci
365cb93a386Sopenharmony_ci        // ... and back to utf8 'cause that's what we want.
366cb93a386Sopenharmony_ci        return convertUtf16ToUtf8((char16_t*)upper16.get(), upper16.size());
367cb93a386Sopenharmony_ci    }
368cb93a386Sopenharmony_ci
369cb93a386Sopenharmony_ci    bool getBidiRegions(const char utf8[],
370cb93a386Sopenharmony_ci                        int utf8Units,
371cb93a386Sopenharmony_ci                        TextDirection dir,
372cb93a386Sopenharmony_ci                        std::vector<BidiRegion>* results) override {
373cb93a386Sopenharmony_ci        return SkUnicode::extractBidi(utf8, utf8Units, dir, results);
374cb93a386Sopenharmony_ci    }
375cb93a386Sopenharmony_ci
376cb93a386Sopenharmony_ci    bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override {
377cb93a386Sopenharmony_ci
378cb93a386Sopenharmony_ci        // Convert to UTF16 since we want the results in utf16
379cb93a386Sopenharmony_ci        auto utf16 = convertUtf8ToUtf16(utf8, utf8Units);
380cb93a386Sopenharmony_ci        return SkUnicode_icu::extractWords((uint16_t*)utf16.c_str(), utf16.size(), locale, results);
381cb93a386Sopenharmony_ci    }
382cb93a386Sopenharmony_ci
383cb93a386Sopenharmony_ci    bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs,
384cb93a386Sopenharmony_ci                          SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
385cb93a386Sopenharmony_ci        results->reset();
386cb93a386Sopenharmony_ci        results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
387cb93a386Sopenharmony_ci
388cb93a386Sopenharmony_ci        SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kLines, [&](int pos,
389cb93a386Sopenharmony_ci                                                                       int status) {
390cb93a386Sopenharmony_ci            (*results)[pos] |= status == UBRK_LINE_HARD
391cb93a386Sopenharmony_ci                                    ? CodeUnitFlags::kHardLineBreakBefore
392cb93a386Sopenharmony_ci                                    : CodeUnitFlags::kSoftLineBreakBefore;
393cb93a386Sopenharmony_ci        });
394cb93a386Sopenharmony_ci
395cb93a386Sopenharmony_ci        SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kGraphemes, [&](int pos,
396cb93a386Sopenharmony_ci                                                                       int status) {
397cb93a386Sopenharmony_ci            (*results)[pos] |= CodeUnitFlags::kGraphemeStart;
398cb93a386Sopenharmony_ci        });
399cb93a386Sopenharmony_ci
400cb93a386Sopenharmony_ci        const char* current = utf8;
401cb93a386Sopenharmony_ci        const char* end = utf8 + utf8Units;
402cb93a386Sopenharmony_ci        while (current < end) {
403cb93a386Sopenharmony_ci            auto before = current - utf8;
404cb93a386Sopenharmony_ci            SkUnichar unichar = SkUTF::NextUTF8(&current, end);
405cb93a386Sopenharmony_ci            if (unichar < 0) unichar = 0xFFFD;
406cb93a386Sopenharmony_ci            auto after = current - utf8;
407cb93a386Sopenharmony_ci            if (replaceTabs && SkUnicode_icu::isTabulation(unichar)) {
408cb93a386Sopenharmony_ci                results->at(before) |= SkUnicode::kTabulation;
409cb93a386Sopenharmony_ci                if (replaceTabs) {
410cb93a386Sopenharmony_ci                    unichar = ' ';
411cb93a386Sopenharmony_ci                    utf8[before] = ' ';
412cb93a386Sopenharmony_ci                }
413cb93a386Sopenharmony_ci            }
414cb93a386Sopenharmony_ci            for (auto i = before; i < after; ++i) {
415cb93a386Sopenharmony_ci                if (SkUnicode_icu::isSpace(unichar)) {
416cb93a386Sopenharmony_ci                    results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
417cb93a386Sopenharmony_ci                }
418cb93a386Sopenharmony_ci                if (SkUnicode_icu::isWhitespace(unichar)) {
419cb93a386Sopenharmony_ci                    results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
420cb93a386Sopenharmony_ci                }
421cb93a386Sopenharmony_ci                if (SkUnicode_icu::isControl(unichar)) {
422cb93a386Sopenharmony_ci                    results->at(i) |= SkUnicode::kControl;
423cb93a386Sopenharmony_ci                }
424cb93a386Sopenharmony_ci                if (SkUnicode_icu::isIdeographic(unichar)) {
425cb93a386Sopenharmony_ci                    results->at(i) |= SkUnicode::kIdeographic;
426cb93a386Sopenharmony_ci                }
427cb93a386Sopenharmony_ci            }
428cb93a386Sopenharmony_ci        }
429cb93a386Sopenharmony_ci
430cb93a386Sopenharmony_ci        return true;
431cb93a386Sopenharmony_ci    }
432cb93a386Sopenharmony_ci
433cb93a386Sopenharmony_ci    bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
434cb93a386Sopenharmony_ci                          SkTArray<SkUnicode::CodeUnitFlags, true>* results) override {
435cb93a386Sopenharmony_ci        results->reset();
436cb93a386Sopenharmony_ci        results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
437cb93a386Sopenharmony_ci
438cb93a386Sopenharmony_ci        // Get white spaces
439cb93a386Sopenharmony_ci        this->forEachCodepoint((char16_t*)&utf16[0], utf16Units,
440cb93a386Sopenharmony_ci           [results, replaceTabs, &utf16](SkUnichar unichar, int32_t start, int32_t end) {
441cb93a386Sopenharmony_ci                for (auto i = start; i < end; ++i) {
442cb93a386Sopenharmony_ci                    if (replaceTabs && SkUnicode_icu::isTabulation(unichar)) {
443cb93a386Sopenharmony_ci                        results->at(i) |= SkUnicode::kTabulation;
444cb93a386Sopenharmony_ci                    if (replaceTabs) {
445cb93a386Sopenharmony_ci                            unichar = ' ';
446cb93a386Sopenharmony_ci                            utf16[start] = ' ';
447cb93a386Sopenharmony_ci                        }
448cb93a386Sopenharmony_ci                    }
449cb93a386Sopenharmony_ci                    if (SkUnicode_icu::isSpace(unichar)) {
450cb93a386Sopenharmony_ci                        results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
451cb93a386Sopenharmony_ci                    }
452cb93a386Sopenharmony_ci                    if (SkUnicode_icu::isWhitespace(unichar)) {
453cb93a386Sopenharmony_ci                        results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
454cb93a386Sopenharmony_ci                    }
455cb93a386Sopenharmony_ci                    if (SkUnicode_icu::isControl(unichar)) {
456cb93a386Sopenharmony_ci                        results->at(i) |= SkUnicode::kControl;
457cb93a386Sopenharmony_ci                    }
458cb93a386Sopenharmony_ci                }
459cb93a386Sopenharmony_ci           });
460cb93a386Sopenharmony_ci        // Get graphemes
461cb93a386Sopenharmony_ci        this->forEachBreak((char16_t*)&utf16[0],
462cb93a386Sopenharmony_ci                           utf16Units,
463cb93a386Sopenharmony_ci                           SkUnicode::BreakType::kGraphemes,
464cb93a386Sopenharmony_ci                           [results](SkBreakIterator::Position pos, SkBreakIterator::Status) {
465cb93a386Sopenharmony_ci                               (*results)[pos] |= CodeUnitFlags::kGraphemeStart;
466cb93a386Sopenharmony_ci                           });
467cb93a386Sopenharmony_ci        // Get line breaks
468cb93a386Sopenharmony_ci        this->forEachBreak(
469cb93a386Sopenharmony_ci                (char16_t*)&utf16[0],
470cb93a386Sopenharmony_ci                utf16Units,
471cb93a386Sopenharmony_ci                SkUnicode::BreakType::kLines,
472cb93a386Sopenharmony_ci                [results](SkBreakIterator::Position pos, SkBreakIterator::Status status) {
473cb93a386Sopenharmony_ci                    if (status ==
474cb93a386Sopenharmony_ci                        (SkBreakIterator::Status)SkUnicode::LineBreakType::kHardLineBreak) {
475cb93a386Sopenharmony_ci                        // Hard line breaks clears off all the other flags
476cb93a386Sopenharmony_ci                        // TODO: Treat \n as a formatting mark and do not pass it to SkShaper
477cb93a386Sopenharmony_ci                        (*results)[pos-1] = CodeUnitFlags::kHardLineBreakBefore;
478cb93a386Sopenharmony_ci                    } else {
479cb93a386Sopenharmony_ci                        (*results)[pos] |= CodeUnitFlags::kSoftLineBreakBefore;
480cb93a386Sopenharmony_ci                    }
481cb93a386Sopenharmony_ci                });
482cb93a386Sopenharmony_ci
483cb93a386Sopenharmony_ci        return true;
484cb93a386Sopenharmony_ci    }
485cb93a386Sopenharmony_ci
486cb93a386Sopenharmony_ci    void reorderVisual(const BidiLevel runLevels[],
487cb93a386Sopenharmony_ci                       int levelsCount,
488cb93a386Sopenharmony_ci                       int32_t logicalFromVisual[]) override {
489cb93a386Sopenharmony_ci        SkUnicode_IcuBidi::bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
490cb93a386Sopenharmony_ci    }
491cb93a386Sopenharmony_ci};
492cb93a386Sopenharmony_ci
493cb93a386Sopenharmony_cistd::unique_ptr<SkUnicode> SkUnicode::MakeIcuBasedUnicode() {
494cb93a386Sopenharmony_ci    #if defined(SK_USING_THIRD_PARTY_ICU)
495cb93a386Sopenharmony_ci    if (!SkLoadICU()) {
496cb93a386Sopenharmony_ci        static SkOnce once;
497cb93a386Sopenharmony_ci        once([] { SkDEBUGF("SkLoadICU() failed!\n"); });
498cb93a386Sopenharmony_ci        return nullptr;
499cb93a386Sopenharmony_ci    }
500cb93a386Sopenharmony_ci    #endif
501cb93a386Sopenharmony_ci
502cb93a386Sopenharmony_ci    return ICULib()
503cb93a386Sopenharmony_ci        ? std::make_unique<SkUnicode_icu>()
504cb93a386Sopenharmony_ci        : nullptr;
505cb93a386Sopenharmony_ci}
506