1cb93a386Sopenharmony_ci/* 2cb93a386Sopenharmony_ci* Copyright 2020 Google Inc. 3cb93a386Sopenharmony_ci* 4cb93a386Sopenharmony_ci* Use of this source code is governed by a BSD-style license that can be 5cb93a386Sopenharmony_ci* found in the LICENSE file. 6cb93a386Sopenharmony_ci*/ 7cb93a386Sopenharmony_ci 8cb93a386Sopenharmony_ci#include "include/core/SkString.h" 9cb93a386Sopenharmony_ci#include "include/core/SkTypes.h" 10cb93a386Sopenharmony_ci#include "include/private/SkBitmaskEnum.h" 11cb93a386Sopenharmony_ci#include "include/private/SkMutex.h" 12cb93a386Sopenharmony_ci#include "include/private/SkOnce.h" 13cb93a386Sopenharmony_ci#include "include/private/SkTArray.h" 14cb93a386Sopenharmony_ci#include "include/private/SkTemplates.h" 15cb93a386Sopenharmony_ci#include "include/private/SkTo.h" 16cb93a386Sopenharmony_ci#include "modules/skunicode/include/SkUnicode.h" 17cb93a386Sopenharmony_ci#include "modules/skunicode/src/SkUnicode_icu.h" 18cb93a386Sopenharmony_ci#include "modules/skunicode/src/SkUnicode_icu_bidi.h" 19cb93a386Sopenharmony_ci#include "src/utils/SkUTF.h" 20cb93a386Sopenharmony_ci#include "include/private/SkTHash.h" 21cb93a386Sopenharmony_ci#include <unicode/umachine.h> 22cb93a386Sopenharmony_ci#include <functional> 23cb93a386Sopenharmony_ci#include <string> 24cb93a386Sopenharmony_ci#include <utility> 25cb93a386Sopenharmony_ci#include <vector> 26cb93a386Sopenharmony_ci 27cb93a386Sopenharmony_ci#if defined(SK_USING_THIRD_PARTY_ICU) 28cb93a386Sopenharmony_ci#include "SkLoadICU.h" 29cb93a386Sopenharmony_ci#endif 30cb93a386Sopenharmony_ci 31cb93a386Sopenharmony_cistatic const SkICULib* ICULib() { 32cb93a386Sopenharmony_ci static const auto gICU = SkLoadICULib(); 33cb93a386Sopenharmony_ci 34cb93a386Sopenharmony_ci return gICU.get(); 35cb93a386Sopenharmony_ci} 36cb93a386Sopenharmony_ci 37cb93a386Sopenharmony_ci// sk_* wrappers for ICU funcs 38cb93a386Sopenharmony_ci#define SKICU_FUNC(funcname) \ 39cb93a386Sopenharmony_ci template <typename... Args> \ 40cb93a386Sopenharmony_ci auto sk_##funcname(Args&&... args) -> decltype(funcname(std::forward<Args>(args)...)) { \ 41cb93a386Sopenharmony_ci return ICULib()->f_##funcname(std::forward<Args>(args)...); \ 42cb93a386Sopenharmony_ci } \ 43cb93a386Sopenharmony_ci 44cb93a386Sopenharmony_ciSKICU_EMIT_FUNCS 45cb93a386Sopenharmony_ci#undef SKICU_FUNC 46cb93a386Sopenharmony_ci 47cb93a386Sopenharmony_ciconst char* SkUnicode_IcuBidi::errorName(UErrorCode status) { 48cb93a386Sopenharmony_ci return sk_u_errorName(status); 49cb93a386Sopenharmony_ci} 50cb93a386Sopenharmony_ci 51cb93a386Sopenharmony_civoid SkUnicode_IcuBidi::bidi_close(UBiDi* bidi) { 52cb93a386Sopenharmony_ci sk_ubidi_close(bidi); 53cb93a386Sopenharmony_ci} 54cb93a386Sopenharmony_ciUBiDiDirection SkUnicode_IcuBidi::bidi_getDirection(const UBiDi* bidi) { 55cb93a386Sopenharmony_ci return sk_ubidi_getDirection(bidi); 56cb93a386Sopenharmony_ci} 57cb93a386Sopenharmony_ciSkBidiIterator::Position SkUnicode_IcuBidi::bidi_getLength(const UBiDi* bidi) { 58cb93a386Sopenharmony_ci return sk_ubidi_getLength(bidi); 59cb93a386Sopenharmony_ci} 60cb93a386Sopenharmony_ciSkBidiIterator::Level SkUnicode_IcuBidi::bidi_getLevelAt(const UBiDi* bidi, int pos) { 61cb93a386Sopenharmony_ci return sk_ubidi_getLevelAt(bidi, pos); 62cb93a386Sopenharmony_ci} 63cb93a386Sopenharmony_ciUBiDi* SkUnicode_IcuBidi::bidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode* pErrorCode) { 64cb93a386Sopenharmony_ci return sk_ubidi_openSized(maxLength, maxRunCount, pErrorCode); 65cb93a386Sopenharmony_ci} 66cb93a386Sopenharmony_civoid SkUnicode_IcuBidi::bidi_setPara(UBiDi* bidi, 67cb93a386Sopenharmony_ci const UChar* text, 68cb93a386Sopenharmony_ci int32_t length, 69cb93a386Sopenharmony_ci UBiDiLevel paraLevel, 70cb93a386Sopenharmony_ci UBiDiLevel* embeddingLevels, 71cb93a386Sopenharmony_ci UErrorCode* status) { 72cb93a386Sopenharmony_ci return sk_ubidi_setPara(bidi, text, length, paraLevel, embeddingLevels, status); 73cb93a386Sopenharmony_ci} 74cb93a386Sopenharmony_civoid SkUnicode_IcuBidi::bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[], 75cb93a386Sopenharmony_ci int levelsCount, 76cb93a386Sopenharmony_ci int32_t logicalFromVisual[]) { 77cb93a386Sopenharmony_ci sk_ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual); 78cb93a386Sopenharmony_ci} 79cb93a386Sopenharmony_ci 80cb93a386Sopenharmony_cistatic inline UBreakIterator* sk_ubrk_clone(const UBreakIterator* bi, UErrorCode* status) { 81cb93a386Sopenharmony_ci const auto* icu = ICULib(); 82cb93a386Sopenharmony_ci SkASSERT(icu->f_ubrk_clone_ || icu->f_ubrk_safeClone_); 83cb93a386Sopenharmony_ci return icu->f_ubrk_clone_ 84cb93a386Sopenharmony_ci ? icu->f_ubrk_clone_(bi, status) 85cb93a386Sopenharmony_ci : icu->f_ubrk_safeClone_(bi, nullptr, nullptr, status); 86cb93a386Sopenharmony_ci} 87cb93a386Sopenharmony_ci 88cb93a386Sopenharmony_cistatic UText* utext_close_wrapper(UText* ut) { 89cb93a386Sopenharmony_ci return sk_utext_close(ut); 90cb93a386Sopenharmony_ci} 91cb93a386Sopenharmony_cistatic void ubrk_close_wrapper(UBreakIterator* bi) { 92cb93a386Sopenharmony_ci sk_ubrk_close(bi); 93cb93a386Sopenharmony_ci} 94cb93a386Sopenharmony_ci 95cb93a386Sopenharmony_ciusing ICUUText = std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close), 96cb93a386Sopenharmony_ci utext_close_wrapper>>; 97cb93a386Sopenharmony_ciusing ICUBreakIterator = std::unique_ptr<UBreakIterator, SkFunctionWrapper<decltype(ubrk_close), 98cb93a386Sopenharmony_ci ubrk_close_wrapper>>; 99cb93a386Sopenharmony_ci/** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */ 100cb93a386Sopenharmony_cistatic inline SkUnichar utf8_next(const char** ptr, const char* end) { 101cb93a386Sopenharmony_ci SkUnichar val = SkUTF::NextUTF8(ptr, end); 102cb93a386Sopenharmony_ci return val < 0 ? 0xFFFD : val; 103cb93a386Sopenharmony_ci} 104cb93a386Sopenharmony_ci 105cb93a386Sopenharmony_cistatic UBreakIteratorType convertType(SkUnicode::BreakType type) { 106cb93a386Sopenharmony_ci switch (type) { 107cb93a386Sopenharmony_ci case SkUnicode::BreakType::kLines: return UBRK_LINE; 108cb93a386Sopenharmony_ci case SkUnicode::BreakType::kGraphemes: return UBRK_CHARACTER; 109cb93a386Sopenharmony_ci case SkUnicode::BreakType::kWords: return UBRK_WORD; 110cb93a386Sopenharmony_ci default: 111cb93a386Sopenharmony_ci return UBRK_CHARACTER; 112cb93a386Sopenharmony_ci } 113cb93a386Sopenharmony_ci} 114cb93a386Sopenharmony_ci 115cb93a386Sopenharmony_ciclass SkBreakIterator_icu : public SkBreakIterator { 116cb93a386Sopenharmony_ci ICUBreakIterator fBreakIterator; 117cb93a386Sopenharmony_ci Position fLastResult; 118cb93a386Sopenharmony_ci public: 119cb93a386Sopenharmony_ci explicit SkBreakIterator_icu(ICUBreakIterator iter) 120cb93a386Sopenharmony_ci : fBreakIterator(std::move(iter)) 121cb93a386Sopenharmony_ci , fLastResult(0) {} 122cb93a386Sopenharmony_ci Position first() override { return fLastResult = sk_ubrk_first(fBreakIterator.get()); } 123cb93a386Sopenharmony_ci Position current() override { return fLastResult = sk_ubrk_current(fBreakIterator.get()); } 124cb93a386Sopenharmony_ci Position next() override { return fLastResult = sk_ubrk_next(fBreakIterator.get()); } 125cb93a386Sopenharmony_ci Status status() override { return sk_ubrk_getRuleStatus(fBreakIterator.get()); } 126cb93a386Sopenharmony_ci bool isDone() override { return fLastResult == UBRK_DONE; } 127cb93a386Sopenharmony_ci 128cb93a386Sopenharmony_ci bool setText(const char utftext8[], int utf8Units) override { 129cb93a386Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 130cb93a386Sopenharmony_ci ICUUText text(sk_utext_openUTF8(nullptr, &utftext8[0], utf8Units, &status)); 131cb93a386Sopenharmony_ci 132cb93a386Sopenharmony_ci if (U_FAILURE(status)) { 133cb93a386Sopenharmony_ci SkDEBUGF("Break error: %s", sk_u_errorName(status)); 134cb93a386Sopenharmony_ci return false; 135cb93a386Sopenharmony_ci } 136cb93a386Sopenharmony_ci SkASSERT(text); 137cb93a386Sopenharmony_ci sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status); 138cb93a386Sopenharmony_ci if (U_FAILURE(status)) { 139cb93a386Sopenharmony_ci SkDEBUGF("Break error: %s", sk_u_errorName(status)); 140cb93a386Sopenharmony_ci return false; 141cb93a386Sopenharmony_ci } 142cb93a386Sopenharmony_ci fLastResult = 0; 143cb93a386Sopenharmony_ci return true; 144cb93a386Sopenharmony_ci } 145cb93a386Sopenharmony_ci bool setText(const char16_t utftext16[], int utf16Units) override { 146cb93a386Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 147cb93a386Sopenharmony_ci ICUUText text(sk_utext_openUChars(nullptr, reinterpret_cast<const UChar*>(&utftext16[0]), 148cb93a386Sopenharmony_ci utf16Units, &status)); 149cb93a386Sopenharmony_ci 150cb93a386Sopenharmony_ci if (U_FAILURE(status)) { 151cb93a386Sopenharmony_ci SkDEBUGF("Break error: %s", sk_u_errorName(status)); 152cb93a386Sopenharmony_ci return false; 153cb93a386Sopenharmony_ci } 154cb93a386Sopenharmony_ci SkASSERT(text); 155cb93a386Sopenharmony_ci sk_ubrk_setUText(fBreakIterator.get(), text.get(), &status); 156cb93a386Sopenharmony_ci if (U_FAILURE(status)) { 157cb93a386Sopenharmony_ci SkDEBUGF("Break error: %s", sk_u_errorName(status)); 158cb93a386Sopenharmony_ci return false; 159cb93a386Sopenharmony_ci } 160cb93a386Sopenharmony_ci fLastResult = 0; 161cb93a386Sopenharmony_ci return true; 162cb93a386Sopenharmony_ci } 163cb93a386Sopenharmony_ci}; 164cb93a386Sopenharmony_ci 165cb93a386Sopenharmony_ciclass SkIcuBreakIteratorCache { 166cb93a386Sopenharmony_ci SkTHashMap<SkUnicode::BreakType, ICUBreakIterator> fBreakCache; 167cb93a386Sopenharmony_ci SkMutex fBreakCacheMutex; 168cb93a386Sopenharmony_ci 169cb93a386Sopenharmony_ci public: 170cb93a386Sopenharmony_ci static SkIcuBreakIteratorCache& get() { 171cb93a386Sopenharmony_ci static SkIcuBreakIteratorCache instance; 172cb93a386Sopenharmony_ci return instance; 173cb93a386Sopenharmony_ci } 174cb93a386Sopenharmony_ci 175cb93a386Sopenharmony_ci ICUBreakIterator makeBreakIterator(SkUnicode::BreakType type) { 176cb93a386Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 177cb93a386Sopenharmony_ci ICUBreakIterator* cachedIterator; 178cb93a386Sopenharmony_ci { 179cb93a386Sopenharmony_ci SkAutoMutexExclusive lock(fBreakCacheMutex); 180cb93a386Sopenharmony_ci cachedIterator = fBreakCache.find(type); 181cb93a386Sopenharmony_ci if (!cachedIterator) { 182cb93a386Sopenharmony_ci ICUBreakIterator newIterator(sk_ubrk_open(convertType(type), sk_uloc_getDefault(), 183cb93a386Sopenharmony_ci nullptr, 0, &status)); 184cb93a386Sopenharmony_ci if (U_FAILURE(status)) { 185cb93a386Sopenharmony_ci SkDEBUGF("Break error: %s", sk_u_errorName(status)); 186cb93a386Sopenharmony_ci } else { 187cb93a386Sopenharmony_ci cachedIterator = fBreakCache.set(type, std::move(newIterator)); 188cb93a386Sopenharmony_ci } 189cb93a386Sopenharmony_ci } 190cb93a386Sopenharmony_ci } 191cb93a386Sopenharmony_ci ICUBreakIterator iterator; 192cb93a386Sopenharmony_ci if (cachedIterator) { 193cb93a386Sopenharmony_ci iterator.reset(sk_ubrk_clone(cachedIterator->get(), &status)); 194cb93a386Sopenharmony_ci if (U_FAILURE(status)) { 195cb93a386Sopenharmony_ci SkDEBUGF("Break error: %s", sk_u_errorName(status)); 196cb93a386Sopenharmony_ci } 197cb93a386Sopenharmony_ci } 198cb93a386Sopenharmony_ci return iterator; 199cb93a386Sopenharmony_ci } 200cb93a386Sopenharmony_ci}; 201cb93a386Sopenharmony_ci 202cb93a386Sopenharmony_ciclass SkUnicode_icu : public SkUnicode { 203cb93a386Sopenharmony_ci 204cb93a386Sopenharmony_ci std::unique_ptr<SkUnicode> copy() override { 205cb93a386Sopenharmony_ci return std::make_unique<SkUnicode_icu>(); 206cb93a386Sopenharmony_ci } 207cb93a386Sopenharmony_ci 208cb93a386Sopenharmony_ci static bool extractWords(uint16_t utf16[], int utf16Units, const char* locale, std::vector<Position>* words) { 209cb93a386Sopenharmony_ci 210cb93a386Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 211cb93a386Sopenharmony_ci 212cb93a386Sopenharmony_ci ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(BreakType::kWords); 213cb93a386Sopenharmony_ci if (!iterator) { 214cb93a386Sopenharmony_ci SkDEBUGF("Break error: %s", sk_u_errorName(status)); 215cb93a386Sopenharmony_ci return false; 216cb93a386Sopenharmony_ci } 217cb93a386Sopenharmony_ci SkASSERT(iterator); 218cb93a386Sopenharmony_ci 219cb93a386Sopenharmony_ci ICUUText utf16UText(sk_utext_openUChars(nullptr, (UChar*)utf16, utf16Units, &status)); 220cb93a386Sopenharmony_ci if (U_FAILURE(status)) { 221cb93a386Sopenharmony_ci SkDEBUGF("Break error: %s", sk_u_errorName(status)); 222cb93a386Sopenharmony_ci return false; 223cb93a386Sopenharmony_ci } 224cb93a386Sopenharmony_ci 225cb93a386Sopenharmony_ci sk_ubrk_setUText(iterator.get(), utf16UText.get(), &status); 226cb93a386Sopenharmony_ci if (U_FAILURE(status)) { 227cb93a386Sopenharmony_ci SkDEBUGF("Break error: %s", sk_u_errorName(status)); 228cb93a386Sopenharmony_ci return false; 229cb93a386Sopenharmony_ci } 230cb93a386Sopenharmony_ci 231cb93a386Sopenharmony_ci // Get the words 232cb93a386Sopenharmony_ci int32_t pos = sk_ubrk_first(iterator.get()); 233cb93a386Sopenharmony_ci while (pos != UBRK_DONE) { 234cb93a386Sopenharmony_ci words->emplace_back(pos); 235cb93a386Sopenharmony_ci pos = sk_ubrk_next(iterator.get()); 236cb93a386Sopenharmony_ci } 237cb93a386Sopenharmony_ci 238cb93a386Sopenharmony_ci return true; 239cb93a386Sopenharmony_ci } 240cb93a386Sopenharmony_ci 241cb93a386Sopenharmony_ci static bool extractPositions 242cb93a386Sopenharmony_ci (const char utf8[], int utf8Units, BreakType type, std::function<void(int, int)> setBreak) { 243cb93a386Sopenharmony_ci 244cb93a386Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 245cb93a386Sopenharmony_ci ICUUText text(sk_utext_openUTF8(nullptr, &utf8[0], utf8Units, &status)); 246cb93a386Sopenharmony_ci 247cb93a386Sopenharmony_ci if (U_FAILURE(status)) { 248cb93a386Sopenharmony_ci SkDEBUGF("Break error: %s", sk_u_errorName(status)); 249cb93a386Sopenharmony_ci return false; 250cb93a386Sopenharmony_ci } 251cb93a386Sopenharmony_ci SkASSERT(text); 252cb93a386Sopenharmony_ci 253cb93a386Sopenharmony_ci ICUBreakIterator iterator = SkIcuBreakIteratorCache::get().makeBreakIterator(type); 254cb93a386Sopenharmony_ci if (!iterator) { 255cb93a386Sopenharmony_ci return false; 256cb93a386Sopenharmony_ci } 257cb93a386Sopenharmony_ci 258cb93a386Sopenharmony_ci sk_ubrk_setUText(iterator.get(), text.get(), &status); 259cb93a386Sopenharmony_ci if (U_FAILURE(status)) { 260cb93a386Sopenharmony_ci SkDEBUGF("Break error: %s", sk_u_errorName(status)); 261cb93a386Sopenharmony_ci return false; 262cb93a386Sopenharmony_ci } 263cb93a386Sopenharmony_ci 264cb93a386Sopenharmony_ci auto iter = iterator.get(); 265cb93a386Sopenharmony_ci int32_t pos = sk_ubrk_first(iter); 266cb93a386Sopenharmony_ci while (pos != UBRK_DONE) { 267cb93a386Sopenharmony_ci int s = type == SkUnicode::BreakType::kLines 268cb93a386Sopenharmony_ci ? UBRK_LINE_SOFT 269cb93a386Sopenharmony_ci : sk_ubrk_getRuleStatus(iter); 270cb93a386Sopenharmony_ci setBreak(pos, s); 271cb93a386Sopenharmony_ci pos = sk_ubrk_next(iter); 272cb93a386Sopenharmony_ci } 273cb93a386Sopenharmony_ci 274cb93a386Sopenharmony_ci if (type == SkUnicode::BreakType::kLines) { 275cb93a386Sopenharmony_ci // This is a workaround for https://bugs.chromium.org/p/skia/issues/detail?id=10715 276cb93a386Sopenharmony_ci // (ICU line break iterator does not work correctly on Thai text with new lines) 277cb93a386Sopenharmony_ci // So, we only use the iterator to collect soft line breaks and 278cb93a386Sopenharmony_ci // scan the text for all hard line breaks ourselves 279cb93a386Sopenharmony_ci const char* end = utf8 + utf8Units; 280cb93a386Sopenharmony_ci const char* ch = utf8; 281cb93a386Sopenharmony_ci while (ch < end) { 282cb93a386Sopenharmony_ci auto unichar = utf8_next(&ch, end); 283cb93a386Sopenharmony_ci if (isHardLineBreak(unichar)) { 284cb93a386Sopenharmony_ci setBreak(ch - utf8, UBRK_LINE_HARD); 285cb93a386Sopenharmony_ci } 286cb93a386Sopenharmony_ci } 287cb93a386Sopenharmony_ci } 288cb93a386Sopenharmony_ci return true; 289cb93a386Sopenharmony_ci } 290cb93a386Sopenharmony_ci 291cb93a386Sopenharmony_ci static bool isControl(SkUnichar utf8) { 292cb93a386Sopenharmony_ci return sk_u_iscntrl(utf8); 293cb93a386Sopenharmony_ci } 294cb93a386Sopenharmony_ci 295cb93a386Sopenharmony_ci static bool isWhitespace(SkUnichar utf8) { 296cb93a386Sopenharmony_ci return sk_u_isWhitespace(utf8); 297cb93a386Sopenharmony_ci } 298cb93a386Sopenharmony_ci 299cb93a386Sopenharmony_ci static bool isSpace(SkUnichar utf8) { 300cb93a386Sopenharmony_ci return sk_u_isspace(utf8); 301cb93a386Sopenharmony_ci } 302cb93a386Sopenharmony_ci 303cb93a386Sopenharmony_ci static bool isTabulation(SkUnichar utf8) { 304cb93a386Sopenharmony_ci return utf8 == '\t'; 305cb93a386Sopenharmony_ci } 306cb93a386Sopenharmony_ci 307cb93a386Sopenharmony_ci static bool isHardBreak(SkUnichar utf8) { 308cb93a386Sopenharmony_ci auto property = sk_u_getIntPropertyValue(utf8, UCHAR_LINE_BREAK); 309cb93a386Sopenharmony_ci return property == U_LB_LINE_FEED || property == U_LB_MANDATORY_BREAK; 310cb93a386Sopenharmony_ci } 311cb93a386Sopenharmony_ci 312cb93a386Sopenharmony_ci static bool isIdeographic(SkUnichar unichar) { 313cb93a386Sopenharmony_ci return sk_u_hasBinaryProperty(unichar, UCHAR_IDEOGRAPHIC); 314cb93a386Sopenharmony_ci } 315cb93a386Sopenharmony_ci 316cb93a386Sopenharmony_cipublic: 317cb93a386Sopenharmony_ci ~SkUnicode_icu() override { } 318cb93a386Sopenharmony_ci std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count, 319cb93a386Sopenharmony_ci SkBidiIterator::Direction dir) override { 320cb93a386Sopenharmony_ci return SkUnicode::makeBidiIterator(text, count, dir); 321cb93a386Sopenharmony_ci } 322cb93a386Sopenharmony_ci std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[], 323cb93a386Sopenharmony_ci int count, 324cb93a386Sopenharmony_ci SkBidiIterator::Direction dir) override { 325cb93a386Sopenharmony_ci return SkUnicode::makeBidiIterator(text, count, dir); 326cb93a386Sopenharmony_ci } 327cb93a386Sopenharmony_ci std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[], 328cb93a386Sopenharmony_ci BreakType breakType) override { 329cb93a386Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 330cb93a386Sopenharmony_ci ICUBreakIterator iterator(sk_ubrk_open(convertType(breakType), locale, nullptr, 0, 331cb93a386Sopenharmony_ci &status)); 332cb93a386Sopenharmony_ci if (U_FAILURE(status)) { 333cb93a386Sopenharmony_ci SkDEBUGF("Break error: %s", sk_u_errorName(status)); 334cb93a386Sopenharmony_ci return nullptr; 335cb93a386Sopenharmony_ci } 336cb93a386Sopenharmony_ci return std::unique_ptr<SkBreakIterator>(new SkBreakIterator_icu(std::move(iterator))); 337cb93a386Sopenharmony_ci } 338cb93a386Sopenharmony_ci std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override { 339cb93a386Sopenharmony_ci return makeBreakIterator(sk_uloc_getDefault(), breakType); 340cb93a386Sopenharmony_ci } 341cb93a386Sopenharmony_ci 342cb93a386Sopenharmony_ci static bool isHardLineBreak(SkUnichar utf8) { 343cb93a386Sopenharmony_ci auto property = sk_u_getIntPropertyValue(utf8, UCHAR_LINE_BREAK); 344cb93a386Sopenharmony_ci return property == U_LB_LINE_FEED || property == U_LB_MANDATORY_BREAK; 345cb93a386Sopenharmony_ci } 346cb93a386Sopenharmony_ci 347cb93a386Sopenharmony_ci SkString toUpper(const SkString& str) override { 348cb93a386Sopenharmony_ci // Convert to UTF16 since that's what ICU wants. 349cb93a386Sopenharmony_ci auto str16 = SkUnicode::convertUtf8ToUtf16(str.c_str(), str.size()); 350cb93a386Sopenharmony_ci 351cb93a386Sopenharmony_ci UErrorCode icu_err = U_ZERO_ERROR; 352cb93a386Sopenharmony_ci const auto upper16len = sk_u_strToUpper(nullptr, 0, (UChar*)(str16.c_str()), str16.size(), 353cb93a386Sopenharmony_ci nullptr, &icu_err); 354cb93a386Sopenharmony_ci if (icu_err != U_BUFFER_OVERFLOW_ERROR || upper16len <= 0) { 355cb93a386Sopenharmony_ci return SkString(); 356cb93a386Sopenharmony_ci } 357cb93a386Sopenharmony_ci 358cb93a386Sopenharmony_ci SkAutoSTArray<128, uint16_t> upper16(upper16len); 359cb93a386Sopenharmony_ci icu_err = U_ZERO_ERROR; 360cb93a386Sopenharmony_ci sk_u_strToUpper((UChar*)(upper16.get()), SkToS32(upper16.size()), 361cb93a386Sopenharmony_ci (UChar*)(str16.c_str()), str16.size(), 362cb93a386Sopenharmony_ci nullptr, &icu_err); 363cb93a386Sopenharmony_ci SkASSERT(!U_FAILURE(icu_err)); 364cb93a386Sopenharmony_ci 365cb93a386Sopenharmony_ci // ... and back to utf8 'cause that's what we want. 366cb93a386Sopenharmony_ci return convertUtf16ToUtf8((char16_t*)upper16.get(), upper16.size()); 367cb93a386Sopenharmony_ci } 368cb93a386Sopenharmony_ci 369cb93a386Sopenharmony_ci bool getBidiRegions(const char utf8[], 370cb93a386Sopenharmony_ci int utf8Units, 371cb93a386Sopenharmony_ci TextDirection dir, 372cb93a386Sopenharmony_ci std::vector<BidiRegion>* results) override { 373cb93a386Sopenharmony_ci return SkUnicode::extractBidi(utf8, utf8Units, dir, results); 374cb93a386Sopenharmony_ci } 375cb93a386Sopenharmony_ci 376cb93a386Sopenharmony_ci bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override { 377cb93a386Sopenharmony_ci 378cb93a386Sopenharmony_ci // Convert to UTF16 since we want the results in utf16 379cb93a386Sopenharmony_ci auto utf16 = convertUtf8ToUtf16(utf8, utf8Units); 380cb93a386Sopenharmony_ci return SkUnicode_icu::extractWords((uint16_t*)utf16.c_str(), utf16.size(), locale, results); 381cb93a386Sopenharmony_ci } 382cb93a386Sopenharmony_ci 383cb93a386Sopenharmony_ci bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs, 384cb93a386Sopenharmony_ci SkTArray<SkUnicode::CodeUnitFlags, true>* results) override { 385cb93a386Sopenharmony_ci results->reset(); 386cb93a386Sopenharmony_ci results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag); 387cb93a386Sopenharmony_ci 388cb93a386Sopenharmony_ci SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kLines, [&](int pos, 389cb93a386Sopenharmony_ci int status) { 390cb93a386Sopenharmony_ci (*results)[pos] |= status == UBRK_LINE_HARD 391cb93a386Sopenharmony_ci ? CodeUnitFlags::kHardLineBreakBefore 392cb93a386Sopenharmony_ci : CodeUnitFlags::kSoftLineBreakBefore; 393cb93a386Sopenharmony_ci }); 394cb93a386Sopenharmony_ci 395cb93a386Sopenharmony_ci SkUnicode_icu::extractPositions(utf8, utf8Units, BreakType::kGraphemes, [&](int pos, 396cb93a386Sopenharmony_ci int status) { 397cb93a386Sopenharmony_ci (*results)[pos] |= CodeUnitFlags::kGraphemeStart; 398cb93a386Sopenharmony_ci }); 399cb93a386Sopenharmony_ci 400cb93a386Sopenharmony_ci const char* current = utf8; 401cb93a386Sopenharmony_ci const char* end = utf8 + utf8Units; 402cb93a386Sopenharmony_ci while (current < end) { 403cb93a386Sopenharmony_ci auto before = current - utf8; 404cb93a386Sopenharmony_ci SkUnichar unichar = SkUTF::NextUTF8(¤t, end); 405cb93a386Sopenharmony_ci if (unichar < 0) unichar = 0xFFFD; 406cb93a386Sopenharmony_ci auto after = current - utf8; 407cb93a386Sopenharmony_ci if (replaceTabs && SkUnicode_icu::isTabulation(unichar)) { 408cb93a386Sopenharmony_ci results->at(before) |= SkUnicode::kTabulation; 409cb93a386Sopenharmony_ci if (replaceTabs) { 410cb93a386Sopenharmony_ci unichar = ' '; 411cb93a386Sopenharmony_ci utf8[before] = ' '; 412cb93a386Sopenharmony_ci } 413cb93a386Sopenharmony_ci } 414cb93a386Sopenharmony_ci for (auto i = before; i < after; ++i) { 415cb93a386Sopenharmony_ci if (SkUnicode_icu::isSpace(unichar)) { 416cb93a386Sopenharmony_ci results->at(i) |= SkUnicode::kPartOfIntraWordBreak; 417cb93a386Sopenharmony_ci } 418cb93a386Sopenharmony_ci if (SkUnicode_icu::isWhitespace(unichar)) { 419cb93a386Sopenharmony_ci results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak; 420cb93a386Sopenharmony_ci } 421cb93a386Sopenharmony_ci if (SkUnicode_icu::isControl(unichar)) { 422cb93a386Sopenharmony_ci results->at(i) |= SkUnicode::kControl; 423cb93a386Sopenharmony_ci } 424cb93a386Sopenharmony_ci if (SkUnicode_icu::isIdeographic(unichar)) { 425cb93a386Sopenharmony_ci results->at(i) |= SkUnicode::kIdeographic; 426cb93a386Sopenharmony_ci } 427cb93a386Sopenharmony_ci } 428cb93a386Sopenharmony_ci } 429cb93a386Sopenharmony_ci 430cb93a386Sopenharmony_ci return true; 431cb93a386Sopenharmony_ci } 432cb93a386Sopenharmony_ci 433cb93a386Sopenharmony_ci bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs, 434cb93a386Sopenharmony_ci SkTArray<SkUnicode::CodeUnitFlags, true>* results) override { 435cb93a386Sopenharmony_ci results->reset(); 436cb93a386Sopenharmony_ci results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag); 437cb93a386Sopenharmony_ci 438cb93a386Sopenharmony_ci // Get white spaces 439cb93a386Sopenharmony_ci this->forEachCodepoint((char16_t*)&utf16[0], utf16Units, 440cb93a386Sopenharmony_ci [results, replaceTabs, &utf16](SkUnichar unichar, int32_t start, int32_t end) { 441cb93a386Sopenharmony_ci for (auto i = start; i < end; ++i) { 442cb93a386Sopenharmony_ci if (replaceTabs && SkUnicode_icu::isTabulation(unichar)) { 443cb93a386Sopenharmony_ci results->at(i) |= SkUnicode::kTabulation; 444cb93a386Sopenharmony_ci if (replaceTabs) { 445cb93a386Sopenharmony_ci unichar = ' '; 446cb93a386Sopenharmony_ci utf16[start] = ' '; 447cb93a386Sopenharmony_ci } 448cb93a386Sopenharmony_ci } 449cb93a386Sopenharmony_ci if (SkUnicode_icu::isSpace(unichar)) { 450cb93a386Sopenharmony_ci results->at(i) |= SkUnicode::kPartOfIntraWordBreak; 451cb93a386Sopenharmony_ci } 452cb93a386Sopenharmony_ci if (SkUnicode_icu::isWhitespace(unichar)) { 453cb93a386Sopenharmony_ci results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak; 454cb93a386Sopenharmony_ci } 455cb93a386Sopenharmony_ci if (SkUnicode_icu::isControl(unichar)) { 456cb93a386Sopenharmony_ci results->at(i) |= SkUnicode::kControl; 457cb93a386Sopenharmony_ci } 458cb93a386Sopenharmony_ci } 459cb93a386Sopenharmony_ci }); 460cb93a386Sopenharmony_ci // Get graphemes 461cb93a386Sopenharmony_ci this->forEachBreak((char16_t*)&utf16[0], 462cb93a386Sopenharmony_ci utf16Units, 463cb93a386Sopenharmony_ci SkUnicode::BreakType::kGraphemes, 464cb93a386Sopenharmony_ci [results](SkBreakIterator::Position pos, SkBreakIterator::Status) { 465cb93a386Sopenharmony_ci (*results)[pos] |= CodeUnitFlags::kGraphemeStart; 466cb93a386Sopenharmony_ci }); 467cb93a386Sopenharmony_ci // Get line breaks 468cb93a386Sopenharmony_ci this->forEachBreak( 469cb93a386Sopenharmony_ci (char16_t*)&utf16[0], 470cb93a386Sopenharmony_ci utf16Units, 471cb93a386Sopenharmony_ci SkUnicode::BreakType::kLines, 472cb93a386Sopenharmony_ci [results](SkBreakIterator::Position pos, SkBreakIterator::Status status) { 473cb93a386Sopenharmony_ci if (status == 474cb93a386Sopenharmony_ci (SkBreakIterator::Status)SkUnicode::LineBreakType::kHardLineBreak) { 475cb93a386Sopenharmony_ci // Hard line breaks clears off all the other flags 476cb93a386Sopenharmony_ci // TODO: Treat \n as a formatting mark and do not pass it to SkShaper 477cb93a386Sopenharmony_ci (*results)[pos-1] = CodeUnitFlags::kHardLineBreakBefore; 478cb93a386Sopenharmony_ci } else { 479cb93a386Sopenharmony_ci (*results)[pos] |= CodeUnitFlags::kSoftLineBreakBefore; 480cb93a386Sopenharmony_ci } 481cb93a386Sopenharmony_ci }); 482cb93a386Sopenharmony_ci 483cb93a386Sopenharmony_ci return true; 484cb93a386Sopenharmony_ci } 485cb93a386Sopenharmony_ci 486cb93a386Sopenharmony_ci void reorderVisual(const BidiLevel runLevels[], 487cb93a386Sopenharmony_ci int levelsCount, 488cb93a386Sopenharmony_ci int32_t logicalFromVisual[]) override { 489cb93a386Sopenharmony_ci SkUnicode_IcuBidi::bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual); 490cb93a386Sopenharmony_ci } 491cb93a386Sopenharmony_ci}; 492cb93a386Sopenharmony_ci 493cb93a386Sopenharmony_cistd::unique_ptr<SkUnicode> SkUnicode::MakeIcuBasedUnicode() { 494cb93a386Sopenharmony_ci #if defined(SK_USING_THIRD_PARTY_ICU) 495cb93a386Sopenharmony_ci if (!SkLoadICU()) { 496cb93a386Sopenharmony_ci static SkOnce once; 497cb93a386Sopenharmony_ci once([] { SkDEBUGF("SkLoadICU() failed!\n"); }); 498cb93a386Sopenharmony_ci return nullptr; 499cb93a386Sopenharmony_ci } 500cb93a386Sopenharmony_ci #endif 501cb93a386Sopenharmony_ci 502cb93a386Sopenharmony_ci return ICULib() 503cb93a386Sopenharmony_ci ? std::make_unique<SkUnicode_icu>() 504cb93a386Sopenharmony_ci : nullptr; 505cb93a386Sopenharmony_ci} 506