1 /* 2* Copyright 2022 Google Inc. 3* 4* Use of this source code is governed by a BSD-style license that can be 5* found in the LICENSE file. 6*/ 7#include "include/core/SkSpan.h" 8#include "include/core/SkString.h" 9#include "include/core/SkTypes.h" 10#include "include/private/SkBitmaskEnum.h" 11#include "include/private/SkTArray.h" 12#include "include/private/SkTo.h" 13#include "modules/skunicode/include/SkUnicode.h" 14#include "modules/skunicode/src/SkUnicode_client.h" 15#include "modules/skunicode/src/SkUnicode_icu_bidi.h" 16#include "src/utils/SkUTF.h" 17 18#include <algorithm> 19#include <cstdint> 20#include <memory> 21#include <string> 22#include <utility> 23#include <vector> 24#include <array> 25#include <unicode/ubidi.h> 26#include <unicode/ubrk.h> 27#include <unicode/uchar.h> 28#include <unicode/uloc.h> 29#include <unicode/uscript.h> 30#include <unicode/ustring.h> 31#include <unicode/utext.h> 32#include <unicode/utypes.h> 33 34 35#ifndef SK_UNICODE_ICU_IMPLEMENTATION 36 37const char* SkUnicode_IcuBidi::errorName(UErrorCode status) { 38 return u_errorName_skia(status); 39} 40void SkUnicode_IcuBidi::bidi_close(UBiDi* bidi) { 41 ubidi_close_skia(bidi); 42} 43UBiDiDirection SkUnicode_IcuBidi::bidi_getDirection(const UBiDi* bidi) { 44 return ubidi_getDirection_skia(bidi); 45} 46SkBidiIterator::Position SkUnicode_IcuBidi::bidi_getLength(const UBiDi* bidi) { 47 return ubidi_getLength_skia(bidi); 48} 49SkBidiIterator::Level SkUnicode_IcuBidi::bidi_getLevelAt(const UBiDi* bidi, int pos) { 50 return ubidi_getLevelAt_skia(bidi, pos); 51} 52UBiDi* SkUnicode_IcuBidi::bidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode* pErrorCode) { 53 return ubidi_openSized_skia(maxLength, maxRunCount, pErrorCode); 54} 55void SkUnicode_IcuBidi::bidi_setPara(UBiDi* bidi, 56 const UChar* text, 57 int32_t length, 58 UBiDiLevel paraLevel, 59 UBiDiLevel* embeddingLevels, 60 UErrorCode* status) { 61 return ubidi_setPara_skia(bidi, text, length, paraLevel, embeddingLevels, status); 62} 63void SkUnicode_IcuBidi::bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[], 64 int levelsCount, 65 int32_t logicalFromVisual[]) { 66 ubidi_reorderVisual_skia(runLevels, levelsCount, logicalFromVisual); 67} 68#endif 69 70class SkUnicode_client : public SkUnicode { 71public: 72 struct Data { 73 SkSpan<const char> fText8; 74 SkSpan<const char16_t> fText16; 75 std::vector<Position> fWords; 76 std::vector<SkUnicode::Position> fGraphemeBreaks; 77 std::vector<SkUnicode::LineBreakBefore> fLineBreaks; 78 Data(SkSpan<char> text, 79 std::vector<SkUnicode::Position> words, 80 std::vector<SkUnicode::Position> graphemeBreaks, 81 std::vector<SkUnicode::LineBreakBefore> lineBreaks) 82 : fText8(text) 83 , fText16(SkSpan<const char16_t>(nullptr, 0)) 84 , fWords(std::move(words)) 85 , fGraphemeBreaks(std::move(graphemeBreaks)) 86 , fLineBreaks(std::move(lineBreaks)) { 87 } 88 89 void reset() { 90 fText8 = SkSpan<const char>(nullptr, 0); 91 fText16 = SkSpan<const char16_t>(nullptr, 0); 92 fGraphemeBreaks.clear(); 93 fLineBreaks.clear(); 94 } 95 }; 96 SkUnicode_client() = delete; 97 SkUnicode_client(SkSpan<char> text, 98 std::vector<SkUnicode::Position> words, 99 std::vector<SkUnicode::Position> graphemeBreaks, 100 std::vector<SkUnicode::LineBreakBefore> lineBreaks) 101 : fData(std::make_shared<Data>(text, 102 std::move(words), 103 std::move(graphemeBreaks), 104 std::move(lineBreaks))) { } 105 SkUnicode_client(const SkUnicode_client* origin) 106 : fData(origin->fData) {} 107 108 109 std::unique_ptr<SkUnicode> copy() override { 110 return std::make_unique<SkUnicode_client>(this); 111 } 112 113 ~SkUnicode_client() override = default; 114 115 void reset() { fData->reset(); } 116 // For SkShaper 117 std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count, 118 SkBidiIterator::Direction dir) override; 119 std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[], 120 int count, 121 SkBidiIterator::Direction dir) override; 122 std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[], 123 BreakType breakType) override; 124 std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override; 125 // For SkParagraph 126 bool getBidiRegions(const char utf8[], 127 int utf8Units, 128 TextDirection dir, 129 std::vector<BidiRegion>* results) override { 130 return SkUnicode::extractBidi(utf8, utf8Units, dir, results); 131 } 132 133 // TODO: Take if from the Client or hard code here? 134 static bool isControl(SkUnichar utf8) { 135 return (utf8 < ' ') || (utf8 >= 0x7f && utf8 <= 0x9f) || 136 (utf8 >= 0x200D && utf8 <= 0x200F) || 137 (utf8 >= 0x202A && utf8 <= 0x202E); 138 } 139 140 static bool isWhitespace(SkUnichar unichar) { 141 static constexpr std::array<SkUnichar, 21> whitespaces { 142 0x0009, // character tabulation 143 0x000A, // line feed 144 0x000B, // line tabulation 145 0x000C, // form feed 146 0x000D, // carriage return 147 0x0020, // space 148 //0x0085, // next line 149 //0x00A0, // no-break space 150 0x1680, // ogham space mark 151 0x2000, // en quad 152 0x2001, // em quad 153 0x2002, // en space 154 0x2003, // em space 155 0x2004, // three-per-em space 156 0x2005, // four-per-em space 157 0x2006, // six-per-em space 158 //0x2007, // figure space 159 0x2008, // punctuation space 160 0x2009, // thin space 161 0x200A, // hair space 162 0x2028, // line separator 163 0x2029, // paragraph separator 164 //0x202F, // narrow no-break space 165 0x205F, // medium mathematical space 166 0x3000};// ideographic space 167 return std::find(whitespaces.begin(), whitespaces.end(), unichar) != whitespaces.end(); 168 } 169 170 static bool isSpace(SkUnichar unichar) { 171 static constexpr std::array<SkUnichar, 25> spaces { 172 0x0009, // character tabulation 173 0x000A, // line feed 174 0x000B, // line tabulation 175 0x000C, // form feed 176 0x000D, // carriage return 177 0x0020, // space 178 0x0085, // next line 179 0x00A0, // no-break space 180 0x1680, // ogham space mark 181 0x2000, // en quad 182 0x2001, // em quad 183 0x2002, // en space 184 0x2003, // em space 185 0x2004, // three-per-em space 186 0x2005, // four-per-em space 187 0x2006, // six-per-em space 188 0x2007, // figure space 189 0x2008, // punctuation space 190 0x2009, // thin space 191 0x200A, // hair space 192 0x2028, // line separator 193 0x2029, // paragraph separator 194 0x202F, // narrow no-break space 195 0x205F, // medium mathematical space 196 0x3000}; // ideographic space 197 return std::find(spaces.begin(), spaces.end(), unichar) != spaces.end(); 198 } 199 200 static bool isTabulation(SkUnichar utf8) { 201 return utf8 == '\t'; 202 } 203 204 static bool isHardBreak(SkUnichar utf8) { 205 return utf8 == '\n'; 206 } 207 208 static bool isIdeographic(SkUnichar unichar) { 209 static constexpr std::array<std::pair<SkUnichar, SkUnichar>, 8> ranges {{ 210 {4352, 4607}, // Hangul Jamo 211 {11904, 42191}, // CJK_Radicals 212 {43072, 43135}, // Phags_Pa 213 {44032, 55215}, // Hangul_Syllables 214 {63744, 64255}, // CJK_Compatibility_Ideographs 215 {65072, 65103}, // CJK_Compatibility_Forms 216 {65381, 65500}, // Katakana_Hangul_Halfwidth 217 {131072, 196607} // Supplementary_Ideographic_Plane 218 }}; 219 for (auto range : ranges) { 220 if (range.first <= unichar && range.second > unichar) { 221 return true; 222 } 223 } 224 return false; 225 } 226 227 bool computeCodeUnitFlags(char utf8[], 228 int utf8Units, 229 bool replaceTabs, 230 SkTArray<SkUnicode::CodeUnitFlags, true>* results) override { 231 results->clear(); 232 results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag); 233 for (auto& lineBreak : fData->fLineBreaks) { 234 (*results)[lineBreak.pos] |= 235 lineBreak.breakType == LineBreakType::kHardLineBreak 236 ? CodeUnitFlags::kHardLineBreakBefore 237 : CodeUnitFlags::kSoftLineBreakBefore; 238 } 239 for (auto& grapheme : fData->fGraphemeBreaks) { 240 (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart; 241 } 242 const char* current = utf8; 243 const char* end = utf8 + utf8Units; 244 while (current < end) { 245 auto before = current - utf8; 246 SkUnichar unichar = SkUTF::NextUTF8(¤t, end); 247 if (unichar < 0) unichar = 0xFFFD; 248 auto after = current - utf8; 249 if (replaceTabs && SkUnicode_client::isTabulation(unichar)) { 250 results->at(before) |= SkUnicode::kTabulation; 251 if (replaceTabs) { 252 unichar = ' '; 253 utf8[before] = ' '; 254 } 255 } 256 for (auto i = before; i < after; ++i) { 257 if (SkUnicode_client::isSpace(unichar)) { 258 results->at(i) |= SkUnicode::kPartOfIntraWordBreak; 259 } 260 if (SkUnicode_client::isWhitespace(unichar)) { 261 results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak; 262 } 263 if (SkUnicode_client::isControl(unichar)) { 264 results->at(i) |= SkUnicode::kControl; 265 } 266 if (SkUnicode_client::isIdeographic(unichar)) { 267 results->at(i) |= SkUnicode::kIdeographic; 268 } 269 } 270 } 271 return true; 272 } 273 274 bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs, 275 SkTArray<SkUnicode::CodeUnitFlags, true>* results) override { 276 results->clear(); 277 results->push_back_n(utf16Units + 1, CodeUnitFlags::kNoCodeUnitFlag); 278 for (auto& lineBreak : fData->fLineBreaks) { 279 (*results)[lineBreak.pos] |= 280 lineBreak.breakType == LineBreakType::kHardLineBreak 281 ? CodeUnitFlags::kHardLineBreakBefore 282 : CodeUnitFlags::kSoftLineBreakBefore; 283 } 284 for (auto& grapheme : fData->fGraphemeBreaks) { 285 (*results)[grapheme] |= CodeUnitFlags::kGraphemeStart; 286 } 287 return true; 288 } 289 290 bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override { 291 *results = fData->fWords; 292 return true; 293 } 294 295 SkString toUpper(const SkString& str) override { 296 SkASSERT(false); 297 return SkString(fData->fText8.data(), fData->fText8.size()); 298 } 299 300 void reorderVisual(const BidiLevel runLevels[], 301 int levelsCount, 302 int32_t logicalFromVisual[]) override { 303 SkUnicode_IcuBidi::bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual); 304 } 305private: 306 friend class SkBreakIterator_client; 307 308 std::shared_ptr<Data> fData; 309}; 310 311class SkBreakIterator_client: public SkBreakIterator { 312 std::shared_ptr<SkUnicode_client::Data> fData; 313 Position fLastResult; 314 Position fStart; 315 Position fEnd; 316public: 317 explicit SkBreakIterator_client(std::shared_ptr<SkUnicode_client::Data> data) : fData(data) { } 318 Position first() override 319 { return fData->fLineBreaks[fStart + (fLastResult = 0)].pos; } 320 Position current() override 321 { return fData->fLineBreaks[fStart + fLastResult].pos; } 322 Position next() override 323 { return fData->fLineBreaks[fStart + fLastResult + 1].pos; } 324 Status status() override { 325 return fData->fLineBreaks[fStart + fLastResult].breakType == 326 SkUnicode::LineBreakType::kHardLineBreak 327 ? SkUnicode::CodeUnitFlags::kHardLineBreakBefore 328 : SkUnicode::CodeUnitFlags::kSoftLineBreakBefore; 329 } 330 bool isDone() override { return fStart + fLastResult == fEnd; } 331 bool setText(const char utftext8[], int utf8Units) override { 332 SkASSERT(utftext8 >= fData->fText8.data() && 333 utf8Units <= SkToS16(fData->fText8.size())); 334 fStart = utftext8 - fData->fText8.data(); 335 fEnd = fStart + utf8Units; 336 fLastResult = 0; 337 return true; 338 } 339 bool setText(const char16_t utftext16[], int utf16Units) override { 340 SkASSERT(utftext16 >= fData->fText16.data() && 341 utf16Units <= SkToS16(fData->fText16.size())); 342 fStart = utftext16 - fData->fText16.data(); 343 fEnd = fStart + utf16Units; 344 fLastResult = 0; 345 return true; 346 } 347}; 348std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const uint16_t text[], int count, 349 SkBidiIterator::Direction dir) { 350 return SkUnicode::makeBidiIterator(text, count, dir); 351} 352std::unique_ptr<SkBidiIterator> SkUnicode_client::makeBidiIterator(const char text[], 353 int count, 354 SkBidiIterator::Direction dir) { 355 return SkUnicode::makeBidiIterator(text, count, dir); 356} 357std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(const char locale[], 358 BreakType breakType) { 359 return std::make_unique<SkBreakIterator_client>(fData); 360} 361std::unique_ptr<SkBreakIterator> SkUnicode_client::makeBreakIterator(BreakType breakType) { 362 return std::make_unique<SkBreakIterator_client>(fData); 363} 364 365std::unique_ptr<SkUnicode> SkUnicode::MakeClientBasedUnicode( 366 SkSpan<char> text, 367 std::vector<SkUnicode::Position> words, 368 std::vector<SkUnicode::Position> graphemeBreaks, 369 std::vector<SkUnicode::LineBreakBefore> lineBreaks) { 370 return std::make_unique<SkUnicode_client>(text, words, graphemeBreaks, lineBreaks); 371} 372 373