xref: /third_party/skia/src/utils/SkJSON.cpp (revision cb93a386)
1/*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "src/utils/SkJSON.h"
9
10#include "include/core/SkStream.h"
11#include "include/core/SkString.h"
12#include "include/private/SkMalloc.h"
13#include "include/utils/SkParse.h"
14#include "src/utils/SkUTF.h"
15
16#include <cmath>
17#include <tuple>
18#include <vector>
19
20namespace skjson {
21
22// #define SK_JSON_REPORT_ERRORS
23
24static_assert( sizeof(Value) == 8, "");
25static_assert(alignof(Value) == 8, "");
26
27static constexpr size_t kRecAlign = alignof(Value);
28
29void Value::init_tagged(Tag t) {
30    memset(fData8, 0, sizeof(fData8));
31    fData8[0] = SkTo<uint8_t>(t);
32    SkASSERT(this->getTag() == t);
33}
34
35// Pointer values store a type (in the lower kTagBits bits) and a pointer.
36void Value::init_tagged_pointer(Tag t, void* p) {
37    if (sizeof(Value) == sizeof(uintptr_t)) {
38        *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
39        // For 64-bit, we rely on the pointer lower bits being zero.
40        SkASSERT(!(fData8[0] & kTagMask));
41        fData8[0] |= SkTo<uint8_t>(t);
42    } else {
43        // For 32-bit, we store the pointer in the upper word
44        SkASSERT(sizeof(Value) == sizeof(uintptr_t) * 2);
45        this->init_tagged(t);
46        *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
47    }
48
49    SkASSERT(this->getTag()    == t);
50    SkASSERT(this->ptr<void>() == p);
51}
52
53NullValue::NullValue() {
54    this->init_tagged(Tag::kNull);
55    SkASSERT(this->getTag() == Tag::kNull);
56}
57
58BoolValue::BoolValue(bool b) {
59    this->init_tagged(Tag::kBool);
60    *this->cast<bool>() = b;
61    SkASSERT(this->getTag() == Tag::kBool);
62}
63
64NumberValue::NumberValue(int32_t i) {
65    this->init_tagged(Tag::kInt);
66    *this->cast<int32_t>() = i;
67    SkASSERT(this->getTag() == Tag::kInt);
68}
69
70NumberValue::NumberValue(float f) {
71    this->init_tagged(Tag::kFloat);
72    *this->cast<float>() = f;
73    SkASSERT(this->getTag() == Tag::kFloat);
74}
75
76// Vector recs point to externally allocated slabs with the following layout:
77//
78//   [size_t n] [REC_0] ... [REC_n-1] [optional extra trailing storage]
79//
80// Long strings use extra_alloc_size == 1 to store the \0 terminator.
81//
82template <typename T, size_t extra_alloc_size = 0>
83static void* MakeVector(const void* src, size_t size, SkArenaAlloc& alloc) {
84    // The Ts are already in memory, so their size should be safe.
85    const auto total_size = sizeof(size_t) + size * sizeof(T) + extra_alloc_size;
86    auto* size_ptr = reinterpret_cast<size_t*>(alloc.makeBytesAlignedTo(total_size, kRecAlign));
87
88    *size_ptr = size;
89    sk_careful_memcpy(size_ptr + 1, src, size * sizeof(T));
90
91    return size_ptr;
92}
93
94ArrayValue::ArrayValue(const Value* src, size_t size, SkArenaAlloc& alloc) {
95    this->init_tagged_pointer(Tag::kArray, MakeVector<Value>(src, size, alloc));
96    SkASSERT(this->getTag() == Tag::kArray);
97}
98
99// Strings have two flavors:
100//
101// -- short strings (len <= 7) -> these are stored inline, in the record
102//    (one byte reserved for null terminator/type):
103//
104//        [str] [\0]|[max_len - actual_len]
105//
106//    Storing [max_len - actual_len] allows the 'len' field to double-up as a
107//    null terminator when size == max_len (this works 'cause kShortString == 0).
108//
109// -- long strings (len > 7) -> these are externally allocated vectors (VectorRec<char>).
110//
111// The string data plus a null-char terminator are copied over.
112//
113namespace {
114
115// An internal string builder with a fast 8 byte short string load path
116// (for the common case where the string is not at the end of the stream).
117class FastString final : public Value {
118public:
119    FastString(const char* src, size_t size, const char* eos, SkArenaAlloc& alloc) {
120        SkASSERT(src <= eos);
121
122        if (size > kMaxInlineStringSize) {
123            this->initLongString(src, size, alloc);
124            SkASSERT(this->getTag() == Tag::kString);
125            return;
126        }
127
128        // initFastShortString is faster (doh), but requires access to 6 chars past src.
129        if (src && src + 6 <= eos) {
130            this->initFastShortString(src, size);
131        } else {
132            this->initShortString(src, size);
133        }
134
135        SkASSERT(this->getTag() == Tag::kShortString);
136    }
137
138private:
139    // first byte reserved for tagging, \0 terminator => 6 usable chars
140    inline static constexpr size_t kMaxInlineStringSize = sizeof(Value) - 2;
141
142    void initLongString(const char* src, size_t size, SkArenaAlloc& alloc) {
143        SkASSERT(size > kMaxInlineStringSize);
144
145        this->init_tagged_pointer(Tag::kString, MakeVector<char, 1>(src, size, alloc));
146
147        auto* data = this->cast<VectorValue<char, Value::Type::kString>>()->begin();
148        const_cast<char*>(data)[size] = '\0';
149    }
150
151    void initShortString(const char* src, size_t size) {
152        SkASSERT(size <= kMaxInlineStringSize);
153
154        this->init_tagged(Tag::kShortString);
155        sk_careful_memcpy(this->cast<char>(), src, size);
156        // Null terminator provided by init_tagged() above (fData8 is zero-initialized).
157    }
158
159    void initFastShortString(const char* src, size_t size) {
160        SkASSERT(size <= kMaxInlineStringSize);
161
162        uint64_t* s64 = this->cast<uint64_t>();
163
164        // Load 8 chars and mask out the tag and \0 terminator.
165        // Note: we picked kShortString == 0 to avoid setting explicitly below.
166        static_assert(SkToU8(Tag::kShortString) == 0, "please don't break this");
167
168        // Since the first byte is occupied by the tag, we want the string chars [0..5] to land
169        // on bytes [1..6] => the fastest way is to read8 @(src - 1) (always safe, because the
170        // string requires a " prefix at the very least).
171        memcpy(s64, src - 1, 8);
172
173#if defined(SK_CPU_LENDIAN)
174        // The mask for a max-length string (6), with a leading tag and trailing \0 is
175        // 0x00ffffffffffff00.  Accounting for the final left-shift, this becomes
176        // 0x0000ffffffffffff.
177        *s64 &= (0x0000ffffffffffffULL >> ((kMaxInlineStringSize - size) * 8)) // trailing \0s
178                    << 8;                                                      // tag byte
179#else
180        static_assert(false, "Big-endian builds are not supported at this time.");
181#endif
182    }
183};
184
185} // namespace
186
187StringValue::StringValue(const char* src, size_t size, SkArenaAlloc& alloc) {
188    new (this) FastString(src, size, src, alloc);
189}
190
191ObjectValue::ObjectValue(const Member* src, size_t size, SkArenaAlloc& alloc) {
192    this->init_tagged_pointer(Tag::kObject, MakeVector<Member>(src, size, alloc));
193    SkASSERT(this->getTag() == Tag::kObject);
194}
195
196
197// Boring public Value glue.
198
199static int inline_strcmp(const char a[], const char b[]) {
200    for (;;) {
201        char c = *a++;
202        if (c == 0) {
203            break;
204        }
205        if (c != *b++) {
206            return 1;
207        }
208    }
209    return *b != 0;
210}
211
212const Value& ObjectValue::operator[](const char* key) const {
213    // Reverse search for duplicates resolution (policy: return last).
214    const auto* begin  = this->begin();
215    const auto* member = this->end();
216
217    while (member > begin) {
218        --member;
219        if (0 == inline_strcmp(key, member->fKey.as<StringValue>().begin())) {
220            return member->fValue;
221        }
222    }
223
224    static const Value g_null = NullValue();
225    return g_null;
226}
227
228namespace {
229
230// Lexer/parser inspired by rapidjson [1], sajson [2] and pjson [3].
231//
232// [1] https://github.com/Tencent/rapidjson/
233// [2] https://github.com/chadaustin/sajson
234// [3] https://pastebin.com/hnhSTL3h
235
236
237// bit 0 (0x01) - plain ASCII string character
238// bit 1 (0x02) - whitespace
239// bit 2 (0x04) - string terminator (" \\ \0 [control chars] **AND } ]** <- see matchString notes)
240// bit 3 (0x08) - 0-9
241// bit 4 (0x10) - 0-9 e E .
242// bit 5 (0x20) - scope terminator (} ])
243static constexpr uint8_t g_token_flags[256] = {
244 // 0    1    2    3    4    5    6    7      8    9    A    B    C    D    E    F
245    4,   4,   4,   4,   4,   4,   4,   4,     4,   6,   6,   4,   4,   6,   4,   4, // 0
246    4,   4,   4,   4,   4,   4,   4,   4,     4,   4,   4,   4,   4,   4,   4,   4, // 1
247    3,   1,   4,   1,   1,   1,   1,   1,     1,   1,   1,   1,   1,   1,   0x11,1, // 2
248 0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,  0x19,0x19,   1,   1,   1,   1,   1,   1, // 3
249    1,   1,   1,   1,   1,   0x11,1,   1,     1,   1,   1,   1,   1,   1,   1,   1, // 4
250    1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   1,   4,0x25,   1,   1, // 5
251    1,   1,   1,   1,   1,   0x11,1,   1,     1,   1,   1,   1,   1,   1,   1,   1, // 6
252    1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   1,   1,0x25,   1,   1, // 7
253
254 // 128-255
255    0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
256    0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
257    0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
258    0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0
259};
260
261static inline bool is_ws(char c)       { return g_token_flags[static_cast<uint8_t>(c)] & 0x02; }
262static inline bool is_eostring(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x04; }
263static inline bool is_digit(char c)    { return g_token_flags[static_cast<uint8_t>(c)] & 0x08; }
264static inline bool is_numeric(char c)  { return g_token_flags[static_cast<uint8_t>(c)] & 0x10; }
265static inline bool is_eoscope(char c)  { return g_token_flags[static_cast<uint8_t>(c)] & 0x20; }
266
267static inline const char* skip_ws(const char* p) {
268    while (is_ws(*p)) ++p;
269    return p;
270}
271
272static inline float pow10(int32_t exp) {
273    static constexpr float g_pow10_table[63] =
274    {
275       1.e-031f, 1.e-030f, 1.e-029f, 1.e-028f, 1.e-027f, 1.e-026f, 1.e-025f, 1.e-024f,
276       1.e-023f, 1.e-022f, 1.e-021f, 1.e-020f, 1.e-019f, 1.e-018f, 1.e-017f, 1.e-016f,
277       1.e-015f, 1.e-014f, 1.e-013f, 1.e-012f, 1.e-011f, 1.e-010f, 1.e-009f, 1.e-008f,
278       1.e-007f, 1.e-006f, 1.e-005f, 1.e-004f, 1.e-003f, 1.e-002f, 1.e-001f, 1.e+000f,
279       1.e+001f, 1.e+002f, 1.e+003f, 1.e+004f, 1.e+005f, 1.e+006f, 1.e+007f, 1.e+008f,
280       1.e+009f, 1.e+010f, 1.e+011f, 1.e+012f, 1.e+013f, 1.e+014f, 1.e+015f, 1.e+016f,
281       1.e+017f, 1.e+018f, 1.e+019f, 1.e+020f, 1.e+021f, 1.e+022f, 1.e+023f, 1.e+024f,
282       1.e+025f, 1.e+026f, 1.e+027f, 1.e+028f, 1.e+029f, 1.e+030f, 1.e+031f
283    };
284
285    static constexpr int32_t k_exp_offset = SK_ARRAY_COUNT(g_pow10_table) / 2;
286
287    // We only support negative exponents for now.
288    SkASSERT(exp <= 0);
289
290    return (exp >= -k_exp_offset) ? g_pow10_table[exp + k_exp_offset]
291                                  : std::pow(10.0f, static_cast<float>(exp));
292}
293
294class DOMParser {
295public:
296    explicit DOMParser(SkArenaAlloc& alloc)
297        : fAlloc(alloc) {
298        fValueStack.reserve(kValueStackReserve);
299        fUnescapeBuffer.reserve(kUnescapeBufferReserve);
300    }
301
302    Value parse(const char* p, size_t size) {
303        if (!size) {
304            return this->error(NullValue(), p, "invalid empty input");
305        }
306
307        const char* p_stop = p + size - 1;
308
309        // We're only checking for end-of-stream on object/array close('}',']'),
310        // so we must trim any whitespace from the buffer tail.
311        while (p_stop > p && is_ws(*p_stop)) --p_stop;
312
313        SkASSERT(p_stop >= p && p_stop < p + size);
314        if (!is_eoscope(*p_stop)) {
315            return this->error(NullValue(), p_stop, "invalid top-level value");
316        }
317
318        p = skip_ws(p);
319
320        switch (*p) {
321        case '{':
322            goto match_object;
323        case '[':
324            goto match_array;
325        default:
326            return this->error(NullValue(), p, "invalid top-level value");
327        }
328
329    match_object:
330        SkASSERT(*p == '{');
331        p = skip_ws(p + 1);
332
333        this->pushObjectScope();
334
335        if (*p == '}') goto pop_object;
336
337        // goto match_object_key;
338    match_object_key:
339        p = skip_ws(p);
340        if (*p != '"') return this->error(NullValue(), p, "expected object key");
341
342        p = this->matchString(p, p_stop, [this](const char* key, size_t size, const char* eos) {
343            this->pushObjectKey(key, size, eos);
344        });
345        if (!p) return NullValue();
346
347        p = skip_ws(p);
348        if (*p != ':') return this->error(NullValue(), p, "expected ':' separator");
349
350        ++p;
351
352        // goto match_value;
353    match_value:
354        p = skip_ws(p);
355
356        switch (*p) {
357        case '\0':
358            return this->error(NullValue(), p, "unexpected input end");
359        case '"':
360            p = this->matchString(p, p_stop, [this](const char* str, size_t size, const char* eos) {
361                this->pushString(str, size, eos);
362            });
363            break;
364        case '[':
365            goto match_array;
366        case 'f':
367            p = this->matchFalse(p);
368            break;
369        case 'n':
370            p = this->matchNull(p);
371            break;
372        case 't':
373            p = this->matchTrue(p);
374            break;
375        case '{':
376            goto match_object;
377        default:
378            p = this->matchNumber(p);
379            break;
380        }
381
382        if (!p) return NullValue();
383
384        // goto match_post_value;
385    match_post_value:
386        SkASSERT(!this->inTopLevelScope());
387
388        p = skip_ws(p);
389        switch (*p) {
390        case ',':
391            ++p;
392            if (this->inObjectScope()) {
393                goto match_object_key;
394            } else {
395                SkASSERT(this->inArrayScope());
396                goto match_value;
397            }
398        case ']':
399            goto pop_array;
400        case '}':
401            goto pop_object;
402        default:
403            return this->error(NullValue(), p - 1, "unexpected value-trailing token");
404        }
405
406        // unreachable
407        SkASSERT(false);
408
409    pop_object:
410        SkASSERT(*p == '}');
411
412        if (this->inArrayScope()) {
413            return this->error(NullValue(), p, "unexpected object terminator");
414        }
415
416        this->popObjectScope();
417
418        // goto pop_common
419    pop_common:
420        SkASSERT(is_eoscope(*p));
421
422        if (this->inTopLevelScope()) {
423            SkASSERT(fValueStack.size() == 1);
424
425            // Success condition: parsed the top level element and reached the stop token.
426            return p == p_stop
427                ? fValueStack.front()
428                : this->error(NullValue(), p + 1, "trailing root garbage");
429        }
430
431        if (p == p_stop) {
432            return this->error(NullValue(), p, "unexpected end-of-input");
433        }
434
435        ++p;
436
437        goto match_post_value;
438
439    match_array:
440        SkASSERT(*p == '[');
441        p = skip_ws(p + 1);
442
443        this->pushArrayScope();
444
445        if (*p != ']') goto match_value;
446
447        // goto pop_array;
448    pop_array:
449        SkASSERT(*p == ']');
450
451        if (this->inObjectScope()) {
452            return this->error(NullValue(), p, "unexpected array terminator");
453        }
454
455        this->popArrayScope();
456
457        goto pop_common;
458
459        SkASSERT(false);
460        return NullValue();
461    }
462
463    std::tuple<const char*, const SkString> getError() const {
464        return std::make_tuple(fErrorToken, fErrorMessage);
465    }
466
467private:
468    SkArenaAlloc&         fAlloc;
469
470    // Pending values stack.
471    inline static constexpr size_t kValueStackReserve = 256;
472    std::vector<Value>    fValueStack;
473
474    // String unescape buffer.
475    inline static constexpr size_t kUnescapeBufferReserve = 512;
476    std::vector<char>     fUnescapeBuffer;
477
478    // Tracks the current object/array scope, as an index into fStack:
479    //
480    //   - for objects: fScopeIndex =  (index of first value in scope)
481    //   - for arrays : fScopeIndex = -(index of first value in scope)
482    //
483    // fScopeIndex == 0 IFF we are at the top level (no current/active scope).
484    intptr_t              fScopeIndex = 0;
485
486    // Error reporting.
487    const char*           fErrorToken = nullptr;
488    SkString              fErrorMessage;
489
490    bool inTopLevelScope() const { return fScopeIndex == 0; }
491    bool inObjectScope()   const { return fScopeIndex >  0; }
492    bool inArrayScope()    const { return fScopeIndex <  0; }
493
494    // Helper for masquerading raw primitive types as Values (bypassing tagging, etc).
495    template <typename T>
496    class RawValue final : public Value {
497    public:
498        explicit RawValue(T v) {
499            static_assert(sizeof(T) <= sizeof(Value), "");
500            *this->cast<T>() = v;
501        }
502
503        T operator *() const { return *this->cast<T>(); }
504    };
505
506    template <typename VectorT>
507    void popScopeAsVec(size_t scope_start) {
508        SkASSERT(scope_start > 0);
509        SkASSERT(scope_start <= fValueStack.size());
510
511        using T = typename VectorT::ValueT;
512        static_assert( sizeof(T) >=  sizeof(Value), "");
513        static_assert( sizeof(T)  %  sizeof(Value) == 0, "");
514        static_assert(alignof(T) == alignof(Value), "");
515
516        const auto scope_count = fValueStack.size() - scope_start,
517                         count = scope_count / (sizeof(T) / sizeof(Value));
518        SkASSERT(scope_count % (sizeof(T) / sizeof(Value)) == 0);
519
520        const auto* begin = reinterpret_cast<const T*>(fValueStack.data() + scope_start);
521
522        // Restore the previous scope index from saved placeholder value,
523        // and instantiate as a vector of values in scope.
524        auto& placeholder = fValueStack[scope_start - 1];
525        fScopeIndex = *static_cast<RawValue<intptr_t>&>(placeholder);
526        placeholder = VectorT(begin, count, fAlloc);
527
528        // Drop the (consumed) values in scope.
529        fValueStack.resize(scope_start);
530    }
531
532    void pushObjectScope() {
533        // Save a scope index now, and then later we'll overwrite this value as the Object itself.
534        fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
535
536        // New object scope.
537        fScopeIndex = SkTo<intptr_t>(fValueStack.size());
538    }
539
540    void popObjectScope() {
541        SkASSERT(this->inObjectScope());
542        this->popScopeAsVec<ObjectValue>(SkTo<size_t>(fScopeIndex));
543
544        SkDEBUGCODE(
545            const auto& obj = fValueStack.back().as<ObjectValue>();
546            SkASSERT(obj.is<ObjectValue>());
547            for (const auto& member : obj) {
548                SkASSERT(member.fKey.is<StringValue>());
549            }
550        )
551    }
552
553    void pushArrayScope() {
554        // Save a scope index now, and then later we'll overwrite this value as the Array itself.
555        fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
556
557        // New array scope.
558        fScopeIndex = -SkTo<intptr_t>(fValueStack.size());
559    }
560
561    void popArrayScope() {
562        SkASSERT(this->inArrayScope());
563        this->popScopeAsVec<ArrayValue>(SkTo<size_t>(-fScopeIndex));
564
565        SkDEBUGCODE(
566            const auto& arr = fValueStack.back().as<ArrayValue>();
567            SkASSERT(arr.is<ArrayValue>());
568        )
569    }
570
571    void pushObjectKey(const char* key, size_t size, const char* eos) {
572        SkASSERT(this->inObjectScope());
573        SkASSERT(fValueStack.size() >= SkTo<size_t>(fScopeIndex));
574        SkASSERT(!((fValueStack.size() - SkTo<size_t>(fScopeIndex)) & 1));
575        this->pushString(key, size, eos);
576    }
577
578    void pushTrue() {
579        fValueStack.push_back(BoolValue(true));
580    }
581
582    void pushFalse() {
583        fValueStack.push_back(BoolValue(false));
584    }
585
586    void pushNull() {
587        fValueStack.push_back(NullValue());
588    }
589
590    void pushString(const char* s, size_t size, const char* eos) {
591        fValueStack.push_back(FastString(s, size, eos, fAlloc));
592    }
593
594    void pushInt32(int32_t i) {
595        fValueStack.push_back(NumberValue(i));
596    }
597
598    void pushFloat(float f) {
599        fValueStack.push_back(NumberValue(f));
600    }
601
602    template <typename T>
603    T error(T&& ret_val, const char* p, const char* msg) {
604#if defined(SK_JSON_REPORT_ERRORS)
605        fErrorToken = p;
606        fErrorMessage.set(msg);
607#endif
608        return ret_val;
609    }
610
611    const char* matchTrue(const char* p) {
612        SkASSERT(p[0] == 't');
613
614        if (p[1] == 'r' && p[2] == 'u' && p[3] == 'e') {
615            this->pushTrue();
616            return p + 4;
617        }
618
619        return this->error(nullptr, p, "invalid token");
620    }
621
622    const char* matchFalse(const char* p) {
623        SkASSERT(p[0] == 'f');
624
625        if (p[1] == 'a' && p[2] == 'l' && p[3] == 's' && p[4] == 'e') {
626            this->pushFalse();
627            return p + 5;
628        }
629
630        return this->error(nullptr, p, "invalid token");
631    }
632
633    const char* matchNull(const char* p) {
634        SkASSERT(p[0] == 'n');
635
636        if (p[1] == 'u' && p[2] == 'l' && p[3] == 'l') {
637            this->pushNull();
638            return p + 4;
639        }
640
641        return this->error(nullptr, p, "invalid token");
642    }
643
644    const std::vector<char>* unescapeString(const char* begin, const char* end) {
645        fUnescapeBuffer.clear();
646
647        for (const auto* p = begin; p != end; ++p) {
648            if (*p != '\\') {
649                fUnescapeBuffer.push_back(*p);
650                continue;
651            }
652
653            if (++p == end) {
654                return nullptr;
655            }
656
657            switch (*p) {
658            case  '"': fUnescapeBuffer.push_back( '"'); break;
659            case '\\': fUnescapeBuffer.push_back('\\'); break;
660            case  '/': fUnescapeBuffer.push_back( '/'); break;
661            case  'b': fUnescapeBuffer.push_back('\b'); break;
662            case  'f': fUnescapeBuffer.push_back('\f'); break;
663            case  'n': fUnescapeBuffer.push_back('\n'); break;
664            case  'r': fUnescapeBuffer.push_back('\r'); break;
665            case  't': fUnescapeBuffer.push_back('\t'); break;
666            case  'u': {
667                if (p + 4 >= end) {
668                    return nullptr;
669                }
670
671                uint32_t hexed;
672                const char hex_str[] = {p[1], p[2], p[3], p[4], '\0'};
673                const auto* eos = SkParse::FindHex(hex_str, &hexed);
674                if (!eos || *eos) {
675                    return nullptr;
676                }
677
678                char utf8[SkUTF::kMaxBytesInUTF8Sequence];
679                const auto utf8_len = SkUTF::ToUTF8(SkTo<SkUnichar>(hexed), utf8);
680                fUnescapeBuffer.insert(fUnescapeBuffer.end(), utf8, utf8 + utf8_len);
681                p += 4;
682            } break;
683            default: return nullptr;
684            }
685        }
686
687        return &fUnescapeBuffer;
688    }
689
690    template <typename MatchFunc>
691    const char* matchString(const char* p, const char* p_stop, MatchFunc&& func) {
692        SkASSERT(*p == '"');
693        const auto* s_begin = p + 1;
694        bool requires_unescape = false;
695
696        do {
697            // Consume string chars.
698            // This is the fast path, and hopefully we only hit it once then quick-exit below.
699            for (p = p + 1; !is_eostring(*p); ++p);
700
701            if (*p == '"') {
702                // Valid string found.
703                if (!requires_unescape) {
704                    func(s_begin, p - s_begin, p_stop);
705                } else {
706                    // Slow unescape.  We could avoid this extra copy with some effort,
707                    // but in practice escaped strings should be rare.
708                    const auto* buf = this->unescapeString(s_begin, p);
709                    if (!buf) {
710                        break;
711                    }
712
713                    SkASSERT(!buf->empty());
714                    func(buf->data(), buf->size(), buf->data() + buf->size() - 1);
715                }
716                return p + 1;
717            }
718
719            if (*p == '\\') {
720                requires_unescape = true;
721                ++p;
722                continue;
723            }
724
725            // End-of-scope chars are special: we use them to tag the end of the input.
726            // Thus they cannot be consumed indiscriminately -- we need to check if we hit the
727            // end of the input.  To that effect, we treat them as string terminators above,
728            // then we catch them here.
729            if (is_eoscope(*p)) {
730                continue;
731            }
732
733            // Invalid/unexpected char.
734            break;
735        } while (p != p_stop);
736
737        // Premature end-of-input, or illegal string char.
738        return this->error(nullptr, s_begin - 1, "invalid string");
739    }
740
741    const char* matchFastFloatDecimalPart(const char* p, int sign, float f, int exp) {
742        SkASSERT(exp <= 0);
743
744        for (;;) {
745            if (!is_digit(*p)) break;
746            f = f * 10.f + (*p++ - '0'); --exp;
747            if (!is_digit(*p)) break;
748            f = f * 10.f + (*p++ - '0'); --exp;
749        }
750
751        const auto decimal_scale = pow10(exp);
752        if (is_numeric(*p) || !decimal_scale) {
753            SkASSERT((*p == '.' || *p == 'e' || *p == 'E') || !decimal_scale);
754            // Malformed input, or an (unsupported) exponent, or a collapsed decimal factor.
755            return nullptr;
756        }
757
758        this->pushFloat(sign * f * decimal_scale);
759
760        return p;
761    }
762
763    const char* matchFastFloatPart(const char* p, int sign, float f) {
764        for (;;) {
765            if (!is_digit(*p)) break;
766            f = f * 10.f + (*p++ - '0');
767            if (!is_digit(*p)) break;
768            f = f * 10.f + (*p++ - '0');
769        }
770
771        if (!is_numeric(*p)) {
772            // Matched (integral) float.
773            this->pushFloat(sign * f);
774            return p;
775        }
776
777        return (*p == '.') ? this->matchFastFloatDecimalPart(p + 1, sign, f, 0)
778                           : nullptr;
779    }
780
781    const char* matchFast32OrFloat(const char* p) {
782        int sign = 1;
783        if (*p == '-') {
784            sign = -1;
785            ++p;
786        }
787
788        const auto* digits_start = p;
789
790        int32_t n32 = 0;
791
792        // This is the largest absolute int32 value we can handle before
793        // risking overflow *on the next digit* (214748363).
794        static constexpr int32_t kMaxInt32 = (std::numeric_limits<int32_t>::max() - 9) / 10;
795
796        if (is_digit(*p)) {
797            n32 = (*p++ - '0');
798            for (;;) {
799                if (!is_digit(*p) || n32 > kMaxInt32) break;
800                n32 = n32 * 10 + (*p++ - '0');
801            }
802        }
803
804        if (!is_numeric(*p)) {
805            // Did we actually match any digits?
806            if (p > digits_start) {
807                this->pushInt32(sign * n32);
808                return p;
809            }
810            return nullptr;
811        }
812
813        if (*p == '.') {
814            const auto* decimals_start = ++p;
815
816            int exp = 0;
817
818            for (;;) {
819                if (!is_digit(*p) || n32 > kMaxInt32) break;
820                n32 = n32 * 10 + (*p++ - '0'); --exp;
821                if (!is_digit(*p) || n32 > kMaxInt32) break;
822                n32 = n32 * 10 + (*p++ - '0'); --exp;
823            }
824
825            if (!is_numeric(*p)) {
826                // Did we actually match any digits?
827                if (p > decimals_start) {
828                    this->pushFloat(sign * n32 * pow10(exp));
829                    return p;
830                }
831                return nullptr;
832            }
833
834            if (n32 > kMaxInt32) {
835                // we ran out on n32 bits
836                return this->matchFastFloatDecimalPart(p, sign, n32, exp);
837            }
838        }
839
840        return this->matchFastFloatPart(p, sign, n32);
841    }
842
843    const char* matchNumber(const char* p) {
844        if (const auto* fast = this->matchFast32OrFloat(p)) return fast;
845
846        // slow fallback
847        char* matched;
848        float f = strtof(p, &matched);
849        if (matched > p) {
850            this->pushFloat(f);
851            return matched;
852        }
853        return this->error(nullptr, p, "invalid numeric token");
854    }
855};
856
857void Write(const Value& v, SkWStream* stream) {
858    switch (v.getType()) {
859    case Value::Type::kNull:
860        stream->writeText("null");
861        break;
862    case Value::Type::kBool:
863        stream->writeText(*v.as<BoolValue>() ? "true" : "false");
864        break;
865    case Value::Type::kNumber:
866        stream->writeScalarAsText(*v.as<NumberValue>());
867        break;
868    case Value::Type::kString:
869        stream->writeText("\"");
870        stream->writeText(v.as<StringValue>().begin());
871        stream->writeText("\"");
872        break;
873    case Value::Type::kArray: {
874        const auto& array = v.as<ArrayValue>();
875        stream->writeText("[");
876        bool first_value = true;
877        for (const auto& entry : array) {
878            if (!first_value) stream->writeText(",");
879            Write(entry, stream);
880            first_value = false;
881        }
882        stream->writeText("]");
883        break;
884    }
885    case Value::Type::kObject:
886        const auto& object = v.as<ObjectValue>();
887        stream->writeText("{");
888        bool first_member = true;
889        for (const auto& member : object) {
890            SkASSERT(member.fKey.getType() == Value::Type::kString);
891            if (!first_member) stream->writeText(",");
892            Write(member.fKey, stream);
893            stream->writeText(":");
894            Write(member.fValue, stream);
895            first_member = false;
896        }
897        stream->writeText("}");
898        break;
899    }
900}
901
902} // namespace
903
904SkString Value::toString() const {
905    SkDynamicMemoryWStream wstream;
906    Write(*this, &wstream);
907    const auto data = wstream.detachAsData();
908    // TODO: is there a better way to pass data around without copying?
909    return SkString(static_cast<const char*>(data->data()), data->size());
910}
911
912static constexpr size_t kMinChunkSize = 4096;
913
914DOM::DOM(const char* data, size_t size)
915    : fAlloc(kMinChunkSize) {
916    DOMParser parser(fAlloc);
917
918    fRoot = parser.parse(data, size);
919}
920
921void DOM::write(SkWStream* stream) const {
922    Write(fRoot, stream);
923}
924
925} // namespace skjson
926