1/*
2 * Copyright 2020 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7#ifndef SkUnicode_DEFINED
8#define SkUnicode_DEFINED
9#include "include/core/SkSpan.h"
10#include "include/core/SkString.h"
11#include "include/core/SkTypes.h"
12#include "include/private/SkBitmaskEnum.h" // IWYU pragma: keep
13#include "include/private/SkTArray.h"
14#include "include/private/SkTo.h"
15#include "src/utils/SkUTF.h"
16
17#include <cstddef>
18#include <cstdint>
19#include <memory>
20#include <string>
21#include <vector>
22
23#if !defined(SKUNICODE_IMPLEMENTATION)
24    #define SKUNICODE_IMPLEMENTATION 0
25#endif
26
27#if !defined(SKUNICODE_API)
28    #if defined(SKUNICODE_DLL)
29        #if defined(_MSC_VER)
30            #if SKUNICODE_IMPLEMENTATION
31                #define SKUNICODE_API __declspec(dllexport)
32            #else
33                #define SKUNICODE_API __declspec(dllimport)
34            #endif
35        #else
36            #define SKUNICODE_API __attribute__((visibility("default")))
37        #endif
38    #else
39        #define SKUNICODE_API
40    #endif
41#endif
42
43class SKUNICODE_API SkBidiIterator {
44public:
45    typedef int32_t Position;
46    typedef uint8_t Level;
47    struct Region {
48        Region(Position start, Position end, Level level)
49            : start(start), end(end), level(level) { }
50        Position start;
51        Position end;
52        Level level;
53    };
54    enum Direction {
55        kLTR,
56        kRTL,
57    };
58    virtual ~SkBidiIterator() = default;
59    virtual Position getLength() = 0;
60    virtual Level getLevelAt(Position) = 0;
61};
62
63class SKUNICODE_API SkBreakIterator {
64public:
65    typedef int32_t Position;
66    typedef int32_t Status;
67    virtual ~SkBreakIterator() = default;
68    virtual Position first() = 0;
69    virtual Position current() = 0;
70    virtual Position next() = 0;
71    virtual Status status() = 0;
72    virtual bool isDone() = 0;
73    virtual bool setText(const char utftext8[], int utf8Units) = 0;
74    virtual bool setText(const char16_t utftext16[], int utf16Units) = 0;
75};
76
77class SKUNICODE_API SkUnicode {
78    public:
79        enum CodeUnitFlags {
80            kNoCodeUnitFlag = 0x00,
81            kPartOfWhiteSpaceBreak = 0x01,
82            kGraphemeStart = 0x02,
83            kSoftLineBreakBefore = 0x04,
84            kHardLineBreakBefore = 0x08,
85            kPartOfIntraWordBreak = 0x10,
86            kControl = 0x20,
87            kTabulation = 0x40,
88            kGlyphClusterStart = 0x80,
89            kIdeographic = 0x100,
90        };
91        enum class TextDirection {
92            kLTR,
93            kRTL,
94        };
95        typedef size_t Position;
96        typedef uint8_t BidiLevel;
97        struct BidiRegion {
98            BidiRegion(Position start, Position end, BidiLevel level)
99              : start(start), end(end), level(level) { }
100            Position start;
101            Position end;
102            BidiLevel level;
103        };
104        enum class LineBreakType {
105            kSoftLineBreak = 0,
106            kHardLineBreak = 100,
107        };
108
109        enum class BreakType {
110            kWords,
111            kGraphemes,
112            kLines
113        };
114        struct LineBreakBefore {
115            LineBreakBefore(Position pos, LineBreakType breakType)
116              : pos(pos), breakType(breakType) { }
117            Position pos;
118            LineBreakType breakType;
119        };
120
121        virtual ~SkUnicode() = default;
122
123        virtual SkString toUpper(const SkString&) = 0;
124
125        // Methods used in SkShaper and SkText
126        virtual std::unique_ptr<SkBidiIterator> makeBidiIterator
127            (const uint16_t text[], int count, SkBidiIterator::Direction) = 0;
128        virtual std::unique_ptr<SkBidiIterator> makeBidiIterator
129            (const char text[], int count, SkBidiIterator::Direction) = 0;
130        virtual std::unique_ptr<SkBreakIterator> makeBreakIterator
131            (const char locale[], BreakType breakType) = 0;
132        virtual std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType type) = 0;
133
134        // Methods used in SkParagraph
135        static bool isTabulation(SkUnicode::CodeUnitFlags flags);
136        static bool isHardLineBreak(SkUnicode::CodeUnitFlags flags);
137        static bool isSoftLineBreak(SkUnicode::CodeUnitFlags flags);
138        static bool isGraphemeStart(SkUnicode::CodeUnitFlags flags);
139        static bool isControl(SkUnicode::CodeUnitFlags flags);
140        static bool isPartOfWhiteSpaceBreak(SkUnicode::CodeUnitFlags flags);
141        static bool isIdeographic(SkUnichar utf8);
142        static bool extractBidi(const char utf8[],
143                                int utf8Units,
144                                TextDirection dir,
145                                std::vector<BidiRegion>* bidiRegions);
146        virtual bool getBidiRegions(const char utf8[],
147                                    int utf8Units,
148                                    TextDirection dir,
149                                    std::vector<BidiRegion>* results) = 0;
150        virtual bool getWords(const char utf8[], int utf8Units, const char* locale,
151                              std::vector<Position>* results) = 0;
152        virtual bool computeCodeUnitFlags(
153                char utf8[], int utf8Units, bool replaceTabs,
154                SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0;
155        virtual bool computeCodeUnitFlags(
156                char16_t utf16[], int utf16Units, bool replaceTabs,
157                SkTArray<SkUnicode::CodeUnitFlags, true>* results) = 0;
158
159        static SkString convertUtf16ToUtf8(const char16_t * utf16, int utf16Units);
160        static SkString convertUtf16ToUtf8(const std::u16string& utf16);
161        static std::u16string convertUtf8ToUtf16(const char* utf8, int utf8Units);
162        static std::u16string convertUtf8ToUtf16(const SkString& utf8);
163
164        template <typename Appender8, typename Appender16>
165        static bool extractUtfConversionMapping(SkSpan<const char> utf8, Appender8&& appender8, Appender16&& appender16) {
166            size_t size8 = 0;
167            size_t size16 = 0;
168            auto ptr = utf8.begin();
169            auto end = utf8.end();
170            while (ptr < end) {
171
172                size_t index = SkToSizeT(ptr - utf8.begin());
173                SkUnichar u = SkUTF::NextUTF8(&ptr, end);
174
175                // All UTF8 code units refer to the same codepoint
176                size_t next = SkToSizeT(ptr - utf8.begin());
177                for (auto i = index; i < next; ++i) {
178                    //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size());
179                    appender16(size8);
180                    ++size16;
181                }
182                //SkASSERT(fUTF16IndexForUTF8Index.size() == next);
183                SkASSERT(size16 == next);
184                if (size16 != next) {
185                    return false;
186                }
187
188                // One or two UTF16 code units refer to the same codepoint
189                uint16_t buffer[2];
190                size_t count = SkUTF::ToUTF16(u, buffer);
191                //fUTF8IndexForUTF16Index.emplace_back(index);
192                appender8(index);
193                ++size8;
194                if (count > 1) {
195                    //fUTF8IndexForUTF16Index.emplace_back(index);
196                    appender8(index);
197                    ++size8;
198                }
199            }
200            //fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size());
201            appender16(size8);
202            ++size16;
203            //fUTF8IndexForUTF16Index.emplace_back(fText.size());
204            appender8(utf8.size());
205            ++size8;
206
207            return true;
208        }
209
210        template <typename Callback>
211        void forEachCodepoint(const char* utf8, int32_t utf8Units, Callback&& callback) {
212            const char* current = utf8;
213            const char* end = utf8 + utf8Units;
214            while (current < end) {
215                auto before = current - utf8;
216                SkUnichar unichar = SkUTF::NextUTF8(&current, end);
217                if (unichar < 0) unichar = 0xFFFD;
218                auto after = current - utf8;
219                uint16_t buffer[2];
220                size_t count = SkUTF::ToUTF16(unichar, buffer);
221                callback(unichar, before, after, count);
222            }
223        }
224
225        template <typename Callback>
226        void forEachCodepoint(const char16_t* utf16, int32_t utf16Units, Callback&& callback) {
227            const char16_t* current = utf16;
228            const char16_t* end = utf16 + utf16Units;
229            while (current < end) {
230                auto before = current - utf16;
231                SkUnichar unichar = SkUTF::NextUTF16((const uint16_t**)&current, (const uint16_t*)end);
232                auto after = current - utf16;
233                callback(unichar, before, after);
234            }
235        }
236
237        template <typename Callback>
238        void forEachBidiRegion(const uint16_t utf16[], int utf16Units, SkBidiIterator::Direction dir, Callback&& callback) {
239            auto iter = makeBidiIterator(utf16, utf16Units, dir);
240            const uint16_t* start16 = utf16;
241            const uint16_t* end16 = utf16 + utf16Units;
242            SkBidiIterator::Level currentLevel = 0;
243
244            SkBidiIterator::Position pos16 = 0;
245            while (pos16 <= iter->getLength()) {
246                auto level = iter->getLevelAt(pos16);
247                if (pos16 == 0) {
248                    currentLevel = level;
249                } else if (level != currentLevel) {
250                    callback(pos16, start16 - utf16, currentLevel);
251                    currentLevel = level;
252                }
253                if (start16 == end16) {
254                    break;
255                }
256                SkUnichar u = SkUTF::NextUTF16(&start16, end16);
257                pos16 += SkUTF::ToUTF16(u);
258            }
259        }
260
261        template <typename Callback>
262        void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type, Callback&& callback) {
263            auto iter = makeBreakIterator(type);
264            iter->setText(utf16, utf16Units);
265            auto pos = iter->first();
266            do {
267                callback(pos, iter->status());
268                pos = iter->next();
269            } while (!iter->isDone());
270        }
271
272        virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0;
273
274        virtual std::unique_ptr<SkUnicode> copy() = 0;
275
276        static std::unique_ptr<SkUnicode> Make();
277
278        static std::unique_ptr<SkUnicode> MakeIcuBasedUnicode();
279
280        static std::unique_ptr<SkUnicode> MakeClientBasedUnicode(
281                SkSpan<char> text,
282                std::vector<SkUnicode::Position> words,
283                std::vector<SkUnicode::Position> graphemeBreaks,
284                std::vector<SkUnicode::LineBreakBefore> lineBreaks);
285};
286
287namespace sknonstd {
288    template <> struct is_bitmask_enum<SkUnicode::CodeUnitFlags> : std::true_type {};
289}  // namespace sknonstd
290#endif // SkUnicode_DEFINED
291