1/* 2 * Copyright 2020 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7#ifndef SkUnicode_DEFINED 8#define SkUnicode_DEFINED 9#include "include/core/SkSpan.h" 10#include "include/core/SkString.h" 11#include "include/core/SkTypes.h" 12#include "include/private/SkBitmaskEnum.h" // IWYU pragma: keep 13#include "include/private/SkTArray.h" 14#include "include/private/SkTo.h" 15#include "src/utils/SkUTF.h" 16 17#include <cstddef> 18#include <cstdint> 19#include <memory> 20#include <string> 21#include <vector> 22 23#if !defined(SKUNICODE_IMPLEMENTATION) 24 #define SKUNICODE_IMPLEMENTATION 0 25#endif 26 27#if !defined(SKUNICODE_API) 28 #if defined(SKUNICODE_DLL) 29 #if defined(_MSC_VER) 30 #if SKUNICODE_IMPLEMENTATION 31 #define SKUNICODE_API __declspec(dllexport) 32 #else 33 #define SKUNICODE_API __declspec(dllimport) 34 #endif 35 #else 36 #define SKUNICODE_API __attribute__((visibility("default"))) 37 #endif 38 #else 39 #define SKUNICODE_API 40 #endif 41#endif 42 43class SKUNICODE_API SkBidiIterator { 44public: 45 typedef int32_t Position; 46 typedef uint8_t Level; 47 struct Region { 48 Region(Position start, Position end, Level level) 49 : start(start), end(end), level(level) { } 50 Position start; 51 Position end; 52 Level level; 53 }; 54 enum Direction { 55 kLTR, 56 kRTL, 57 }; 58 virtual ~SkBidiIterator() = default; 59 virtual Position getLength() = 0; 60 virtual Level getLevelAt(Position) = 0; 61}; 62 63class SKUNICODE_API SkBreakIterator { 64public: 65 typedef int32_t Position; 66 typedef int32_t Status; 67 virtual ~SkBreakIterator() = default; 68 virtual Position first() = 0; 69 virtual Position current() = 0; 70 virtual Position next() = 0; 71 virtual Status status() = 0; 72 virtual bool isDone() = 0; 73 virtual bool setText(const char utftext8[], int utf8Units) = 0; 74 virtual bool setText(const char16_t utftext16[], int utf16Units) = 0; 75}; 76 77class SKUNICODE_API SkUnicode { 78 public: 79 enum CodeUnitFlags { 80 kNoCodeUnitFlag = 0x00, 81 kPartOfWhiteSpaceBreak = 0x01, 82 kGraphemeStart = 0x02, 83 kSoftLineBreakBefore = 0x04, 84 kHardLineBreakBefore = 0x08, 85 kPartOfIntraWordBreak = 0x10, 86 kControl = 0x20, 87 kTabulation = 0x40, 88 kGlyphClusterStart = 0x80, 89 kIdeographic = 0x100, 90 }; 91 enum class TextDirection { 92 kLTR, 93 kRTL, 94 }; 95 typedef size_t Position; 96 typedef uint8_t BidiLevel; 97 struct BidiRegion { 98 BidiRegion(Position start, Position end, BidiLevel level) 99 : start(start), end(end), level(level) { } 100 Position start; 101 Position end; 102 BidiLevel level; 103 }; 104 enum class LineBreakType { 105 kSoftLineBreak = 0, 106 kHardLineBreak = 100, 107 }; 108 109 enum class BreakType { 110 kWords, 111 kGraphemes, 112 kLines 113 }; 114 struct LineBreakBefore { 115 LineBreakBefore(Position pos, LineBreakType breakType) 116 : pos(pos), breakType(breakType) { } 117 Position pos; 118 LineBreakType breakType; 119 }; 120 121 virtual ~SkUnicode() = default; 122 123 virtual SkString toUpper(const SkString&) = 0; 124 125 // Methods used in SkShaper and SkText 126 virtual std::unique_ptr<SkBidiIterator> makeBidiIterator 127 (const uint16_t text[], int count, SkBidiIterator::Direction) = 0; 128 virtual std::unique_ptr<SkBidiIterator> makeBidiIterator 129 (const char text[], int count, SkBidiIterator::Direction) = 0; 130 virtual std::unique_ptr<SkBreakIterator> makeBreakIterator 131 (const char locale[], BreakType breakType) = 0; 132 virtual std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType type) = 0; 133 134 // Methods used in SkParagraph 135 static bool isTabulation(SkUnicode::CodeUnitFlags flags); 136 static bool isHardLineBreak(SkUnicode::CodeUnitFlags flags); 137 static bool isSoftLineBreak(SkUnicode::CodeUnitFlags flags); 138 static bool isGraphemeStart(SkUnicode::CodeUnitFlags flags); 139 static bool isControl(SkUnicode::CodeUnitFlags flags); 140 static bool isPartOfWhiteSpaceBreak(SkUnicode::CodeUnitFlags flags); 141 static bool isIdeographic(SkUnichar utf8); 142 static bool extractBidi(const char utf8[], 143 int utf8Units, 144 TextDirection dir, 145 std::vector<BidiRegion>* bidiRegions); 146 virtual bool getBidiRegions(const char utf8[], 147 int utf8Units, 148 TextDirection dir, 149 std::vector<BidiRegion>* results) = 0; 150 virtual bool getWords(const char utf8[], int utf8Units, const char* locale, 151 std::vector<Position>* results) = 0; 152 virtual bool computeCodeUnitFlags( 153 char utf8[], int utf8Units, bool replaceTabs, 154 SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0; 155 virtual bool computeCodeUnitFlags( 156 char16_t utf16[], int utf16Units, bool replaceTabs, 157 SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0; 158 159 static SkString convertUtf16ToUtf8(const char16_t * utf16, int utf16Units); 160 static SkString convertUtf16ToUtf8(const std::u16string& utf16); 161 static std::u16string convertUtf8ToUtf16(const char* utf8, int utf8Units); 162 static std::u16string convertUtf8ToUtf16(const SkString& utf8); 163 164 template <typename Appender8, typename Appender16> 165 static bool extractUtfConversionMapping(SkSpan<const char> utf8, Appender8&& appender8, Appender16&& appender16) { 166 size_t size8 = 0; 167 size_t size16 = 0; 168 auto ptr = utf8.begin(); 169 auto end = utf8.end(); 170 while (ptr < end) { 171 172 size_t index = SkToSizeT(ptr - utf8.begin()); 173 SkUnichar u = SkUTF::NextUTF8(&ptr, end); 174 175 // All UTF8 code units refer to the same codepoint 176 size_t next = SkToSizeT(ptr - utf8.begin()); 177 for (auto i = index; i < next; ++i) { 178 //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size()); 179 appender16(size8); 180 ++size16; 181 } 182 //SkASSERT(fUTF16IndexForUTF8Index.size() == next); 183 SkASSERT(size16 == next); 184 if (size16 != next) { 185 return false; 186 } 187 188 // One or two UTF16 code units refer to the same codepoint 189 uint16_t buffer[2]; 190 size_t count = SkUTF::ToUTF16(u, buffer); 191 //fUTF8IndexForUTF16Index.emplace_back(index); 192 appender8(index); 193 ++size8; 194 if (count > 1) { 195 //fUTF8IndexForUTF16Index.emplace_back(index); 196 appender8(index); 197 ++size8; 198 } 199 } 200 //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size()); 201 appender16(size8); 202 ++size16; 203 //fUTF8IndexForUTF16Index.emplace_back(fText.size()); 204 appender8(utf8.size()); 205 ++size8; 206 207 return true; 208 } 209 210 template <typename Callback> 211 void forEachCodepoint(const char* utf8, int32_t utf8Units, Callback&& callback) { 212 const char* current = utf8; 213 const char* end = utf8 + utf8Units; 214 while (current < end) { 215 auto before = current - utf8; 216 SkUnichar unichar = SkUTF::NextUTF8(¤t, end); 217 if (unichar < 0) unichar = 0xFFFD; 218 auto after = current - utf8; 219 uint16_t buffer[2]; 220 size_t count = SkUTF::ToUTF16(unichar, buffer); 221 callback(unichar, before, after, count); 222 } 223 } 224 225 template <typename Callback> 226 void forEachCodepoint(const char16_t* utf16, int32_t utf16Units, Callback&& callback) { 227 const char16_t* current = utf16; 228 const char16_t* end = utf16 + utf16Units; 229 while (current < end) { 230 auto before = current - utf16; 231 SkUnichar unichar = SkUTF::NextUTF16((const uint16_t**)¤t, (const uint16_t*)end); 232 auto after = current - utf16; 233 callback(unichar, before, after); 234 } 235 } 236 237 template <typename Callback> 238 void forEachBidiRegion(const uint16_t utf16[], int utf16Units, SkBidiIterator::Direction dir, Callback&& callback) { 239 auto iter = makeBidiIterator(utf16, utf16Units, dir); 240 const uint16_t* start16 = utf16; 241 const uint16_t* end16 = utf16 + utf16Units; 242 SkBidiIterator::Level currentLevel = 0; 243 244 SkBidiIterator::Position pos16 = 0; 245 while (pos16 <= iter->getLength()) { 246 auto level = iter->getLevelAt(pos16); 247 if (pos16 == 0) { 248 currentLevel = level; 249 } else if (level != currentLevel) { 250 callback(pos16, start16 - utf16, currentLevel); 251 currentLevel = level; 252 } 253 if (start16 == end16) { 254 break; 255 } 256 SkUnichar u = SkUTF::NextUTF16(&start16, end16); 257 pos16 += SkUTF::ToUTF16(u); 258 } 259 } 260 261 template <typename Callback> 262 void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type, Callback&& callback) { 263 auto iter = makeBreakIterator(type); 264 iter->setText(utf16, utf16Units); 265 auto pos = iter->first(); 266 do { 267 callback(pos, iter->status()); 268 pos = iter->next(); 269 } while (!iter->isDone()); 270 } 271 272 virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0; 273 274 virtual std::unique_ptr<SkUnicode> copy() = 0; 275 276 static std::unique_ptr<SkUnicode> Make(); 277 278 static std::unique_ptr<SkUnicode> MakeIcuBasedUnicode(); 279 280 static std::unique_ptr<SkUnicode> MakeClientBasedUnicode( 281 SkSpan<char> text, 282 std::vector<SkUnicode::Position> words, 283 std::vector<SkUnicode::Position> graphemeBreaks, 284 std::vector<SkUnicode::LineBreakBefore> lineBreaks); 285}; 286 287namespace sknonstd { 288 template <> struct is_bitmask_enum<SkUnicode::CodeUnitFlags> : std::true_type {}; 289} // namespace sknonstd 290#endif // SkUnicode_DEFINED 291