1 /*
2  * Copyright 2020 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 #ifndef SkUnicode_DEFINED
8 #define SkUnicode_DEFINED
9 #include "include/core/SkSpan.h"
10 #include "include/core/SkString.h"
11 #include "include/core/SkTypes.h"
12 #include "include/private/SkBitmaskEnum.h" // IWYU pragma: keep
13 #include "include/private/SkTArray.h"
14 #include "include/private/SkTo.h"
15 #include "src/utils/SkUTF.h"
16 
17 #include <cstddef>
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <vector>
22 
23 #if !defined(SKUNICODE_IMPLEMENTATION)
24     #define SKUNICODE_IMPLEMENTATION 0
25 #endif
26 
27 #if !defined(SKUNICODE_API)
28     #if defined(SKUNICODE_DLL)
29         #if defined(_MSC_VER)
30             #if SKUNICODE_IMPLEMENTATION
31                 #define SKUNICODE_API __declspec(dllexport)
32             #else
33                 #define SKUNICODE_API __declspec(dllimport)
34             #endif
35         #else
36             #define SKUNICODE_API __attribute__((visibility("default")))
37         #endif
38     #else
39         #define SKUNICODE_API
40     #endif
41 #endif
42 
43 class SKUNICODE_API SkBidiIterator {
44 public:
45     typedef int32_t Position;
46     typedef uint8_t Level;
47     struct Region {
RegionSkBidiIterator::Region48         Region(Position start, Position end, Level level)
49             : start(start), end(end), level(level) { }
50         Position start;
51         Position end;
52         Level level;
53     };
54     enum Direction {
55         kLTR,
56         kRTL,
57     };
58     virtual ~SkBidiIterator() = default;
59     virtual Position getLength() = 0;
60     virtual Level getLevelAt(Position) = 0;
61 };
62 
63 class SKUNICODE_API SkBreakIterator {
64 public:
65     typedef int32_t Position;
66     typedef int32_t Status;
67     virtual ~SkBreakIterator() = default;
68     virtual Position first() = 0;
69     virtual Position current() = 0;
70     virtual Position next() = 0;
71     virtual Status status() = 0;
72     virtual bool isDone() = 0;
73     virtual bool setText(const char utftext8[], int utf8Units) = 0;
74     virtual bool setText(const char16_t utftext16[], int utf16Units) = 0;
75 };
76 
77 class SKUNICODE_API SkUnicode {
78     public:
79         enum CodeUnitFlags {
80             kNoCodeUnitFlag = 0x00,
81             kPartOfWhiteSpaceBreak = 0x01,
82             kGraphemeStart = 0x02,
83             kSoftLineBreakBefore = 0x04,
84             kHardLineBreakBefore = 0x08,
85             kPartOfIntraWordBreak = 0x10,
86             kControl = 0x20,
87             kTabulation = 0x40,
88             kGlyphClusterStart = 0x80,
89             kIdeographic = 0x100,
90         };
91         enum class TextDirection {
92             kLTR,
93             kRTL,
94         };
95         typedef size_t Position;
96         typedef uint8_t BidiLevel;
97         struct BidiRegion {
BidiRegionSkUnicode::BidiRegion98             BidiRegion(Position start, Position end, BidiLevel level)
99               : start(start), end(end), level(level) { }
100             Position start;
101             Position end;
102             BidiLevel level;
103         };
104         enum class LineBreakType {
105             kSoftLineBreak = 0,
106             kHardLineBreak = 100,
107         };
108 
109         enum class BreakType {
110             kWords,
111             kGraphemes,
112             kLines
113         };
114         struct LineBreakBefore {
LineBreakBeforeSkUnicode::LineBreakBefore115             LineBreakBefore(Position pos, LineBreakType breakType)
116               : pos(pos), breakType(breakType) { }
117             Position pos;
118             LineBreakType breakType;
119         };
120 
121         virtual ~SkUnicode() = default;
122 
123         virtual SkString toUpper(const SkString&) = 0;
124 
125         // Methods used in SkShaper and SkText
126         virtual std::unique_ptr<SkBidiIterator> makeBidiIterator
127             (const uint16_t text[], int count, SkBidiIterator::Direction) = 0;
128         virtual std::unique_ptr<SkBidiIterator> makeBidiIterator
129             (const char text[], int count, SkBidiIterator::Direction) = 0;
130         virtual std::unique_ptr<SkBreakIterator> makeBreakIterator
131             (const char locale[], BreakType breakType) = 0;
132         virtual std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType type) = 0;
133 
134         // Methods used in SkParagraph
135         static bool isTabulation(SkUnicode::CodeUnitFlags flags);
136         static bool isHardLineBreak(SkUnicode::CodeUnitFlags flags);
137         static bool isSoftLineBreak(SkUnicode::CodeUnitFlags flags);
138         static bool isGraphemeStart(SkUnicode::CodeUnitFlags flags);
139         static bool isControl(SkUnicode::CodeUnitFlags flags);
140         static bool isPartOfWhiteSpaceBreak(SkUnicode::CodeUnitFlags flags);
141         static bool isIdeographic(SkUnichar utf8);
142         static bool extractBidi(const char utf8[],
143                                 int utf8Units,
144                                 TextDirection dir,
145                                 std::vector<BidiRegion>* bidiRegions);
146         virtual bool getBidiRegions(const char utf8[],
147                                     int utf8Units,
148                                     TextDirection dir,
149                                     std::vector<BidiRegion>* results) = 0;
150         virtual bool getWords(const char utf8[], int utf8Units, const char* locale,
151                               std::vector<Position>* results) = 0;
152         virtual bool computeCodeUnitFlags(
153                 char utf8[], int utf8Units, bool replaceTabs,
154                 SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0;
155         virtual bool computeCodeUnitFlags(
156                 char16_t utf16[], int utf16Units, bool replaceTabs,
157                 SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0;
158 
159         static SkString convertUtf16ToUtf8(const char16_t * utf16, int utf16Units);
160         static SkString convertUtf16ToUtf8(const std::u16string& utf16);
161         static std::u16string convertUtf8ToUtf16(const char* utf8, int utf8Units);
162         static std::u16string convertUtf8ToUtf16(const SkString& utf8);
163 
164         template <typename Appender8, typename Appender16>
extractUtfConversionMapping(SkSpan<const char> utf8, Appender8&& appender8, Appender16&& appender16)165         static bool extractUtfConversionMapping(SkSpan<const char> utf8, Appender8&& appender8, Appender16&& appender16) {
166             size_t size8 = 0;
167             size_t size16 = 0;
168             auto ptr = utf8.begin();
169             auto end = utf8.end();
170             while (ptr < end) {
171 
172                 size_t index = SkToSizeT(ptr - utf8.begin());
173                 SkUnichar u = SkUTF::NextUTF8(&ptr, end);
174 
175                 // All UTF8 code units refer to the same codepoint
176                 size_t next = SkToSizeT(ptr - utf8.begin());
177                 for (auto i = index; i < next; ++i) {
178                     //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size());
179                     appender16(size8);
180                     ++size16;
181                 }
182                 //SkASSERT(fUTF16IndexForUTF8Index.size() == next);
183                 SkASSERT(size16 == next);
184                 if (size16 != next) {
185                     return false;
186                 }
187 
188                 // One or two UTF16 code units refer to the same codepoint
189                 uint16_t buffer[2];
190                 size_t count = SkUTF::ToUTF16(u, buffer);
191                 //fUTF8IndexForUTF16Index.emplace_back(index);
192                 appender8(index);
193                 ++size8;
194                 if (count > 1) {
195                     //fUTF8IndexForUTF16Index.emplace_back(index);
196                     appender8(index);
197                     ++size8;
198                 }
199             }
200             //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size());
201             appender16(size8);
202             ++size16;
203             //fUTF8IndexForUTF16Index.emplace_back(fText.size());
204             appender8(utf8.size());
205             ++size8;
206 
207             return true;
208         }
209 
210         template <typename Callback>
forEachCodepoint(const char* utf8, int32_t utf8Units, Callback&& callback)211         void forEachCodepoint(const char* utf8, int32_t utf8Units, Callback&& callback) {
212             const char* current = utf8;
213             const char* end = utf8 + utf8Units;
214             while (current < end) {
215                 auto before = current - utf8;
216                 SkUnichar unichar = SkUTF::NextUTF8(&current, end);
217                 if (unichar < 0) unichar = 0xFFFD;
218                 auto after = current - utf8;
219                 uint16_t buffer[2];
220                 size_t count = SkUTF::ToUTF16(unichar, buffer);
221                 callback(unichar, before, after, count);
222             }
223         }
224 
225         template <typename Callback>
forEachCodepoint(const char16_t* utf16, int32_t utf16Units, Callback&& callback)226         void forEachCodepoint(const char16_t* utf16, int32_t utf16Units, Callback&& callback) {
227             const char16_t* current = utf16;
228             const char16_t* end = utf16 + utf16Units;
229             while (current < end) {
230                 auto before = current - utf16;
231                 SkUnichar unichar = SkUTF::NextUTF16((const uint16_t**)&current, (const uint16_t*)end);
232                 auto after = current - utf16;
233                 callback(unichar, before, after);
234             }
235         }
236 
237         template <typename Callback>
forEachBidiRegion(const uint16_t utf16[], int utf16Units, SkBidiIterator::Direction dir, Callback&& callback)238         void forEachBidiRegion(const uint16_t utf16[], int utf16Units, SkBidiIterator::Direction dir, Callback&& callback) {
239             auto iter = makeBidiIterator(utf16, utf16Units, dir);
240             const uint16_t* start16 = utf16;
241             const uint16_t* end16 = utf16 + utf16Units;
242             SkBidiIterator::Level currentLevel = 0;
243 
244             SkBidiIterator::Position pos16 = 0;
245             while (pos16 <= iter->getLength()) {
246                 auto level = iter->getLevelAt(pos16);
247                 if (pos16 == 0) {
248                     currentLevel = level;
249                 } else if (level != currentLevel) {
250                     callback(pos16, start16 - utf16, currentLevel);
251                     currentLevel = level;
252                 }
253                 if (start16 == end16) {
254                     break;
255                 }
256                 SkUnichar u = SkUTF::NextUTF16(&start16, end16);
257                 pos16 += SkUTF::ToUTF16(u);
258             }
259         }
260 
261         template <typename Callback>
forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type, Callback&& callback)262         void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type, Callback&& callback) {
263             auto iter = makeBreakIterator(type);
264             iter->setText(utf16, utf16Units);
265             auto pos = iter->first();
266             do {
267                 callback(pos, iter->status());
268                 pos = iter->next();
269             } while (!iter->isDone());
270         }
271 
272         virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0;
273 
274         virtual std::unique_ptr<SkUnicode> copy() = 0;
275 
276         static std::unique_ptr<SkUnicode> Make();
277 
278         static std::unique_ptr<SkUnicode> MakeIcuBasedUnicode();
279 
280         static std::unique_ptr<SkUnicode> MakeClientBasedUnicode(
281                 SkSpan<char> text,
282                 std::vector<SkUnicode::Position> words,
283                 std::vector<SkUnicode::Position> graphemeBreaks,
284                 std::vector<SkUnicode::LineBreakBefore> lineBreaks);
285 };
286 
287 namespace sknonstd {
288     template <> struct is_bitmask_enum<SkUnicode::CodeUnitFlags> : std::true_type {};
289 }  // namespace sknonstd
290 #endif // SkUnicode_DEFINED
291