1/* 2 * Copyright 2018 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "src/utils/SkJSON.h" 9 10#include "include/core/SkStream.h" 11#include "include/core/SkString.h" 12#include "include/private/SkMalloc.h" 13#include "include/utils/SkParse.h" 14#include "src/utils/SkUTF.h" 15 16#include <cmath> 17#include <tuple> 18#include <vector> 19 20namespace skjson { 21 22// #define SK_JSON_REPORT_ERRORS 23 24static_assert( sizeof(Value) == 8, ""); 25static_assert(alignof(Value) == 8, ""); 26 27static constexpr size_t kRecAlign = alignof(Value); 28 29void Value::init_tagged(Tag t) { 30 memset(fData8, 0, sizeof(fData8)); 31 fData8[0] = SkTo<uint8_t>(t); 32 SkASSERT(this->getTag() == t); 33} 34 35// Pointer values store a type (in the lower kTagBits bits) and a pointer. 36void Value::init_tagged_pointer(Tag t, void* p) { 37 if (sizeof(Value) == sizeof(uintptr_t)) { 38 *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p); 39 // For 64-bit, we rely on the pointer lower bits being zero. 40 SkASSERT(!(fData8[0] & kTagMask)); 41 fData8[0] |= SkTo<uint8_t>(t); 42 } else { 43 // For 32-bit, we store the pointer in the upper word 44 SkASSERT(sizeof(Value) == sizeof(uintptr_t) * 2); 45 this->init_tagged(t); 46 *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p); 47 } 48 49 SkASSERT(this->getTag() == t); 50 SkASSERT(this->ptr<void>() == p); 51} 52 53NullValue::NullValue() { 54 this->init_tagged(Tag::kNull); 55 SkASSERT(this->getTag() == Tag::kNull); 56} 57 58BoolValue::BoolValue(bool b) { 59 this->init_tagged(Tag::kBool); 60 *this->cast<bool>() = b; 61 SkASSERT(this->getTag() == Tag::kBool); 62} 63 64NumberValue::NumberValue(int32_t i) { 65 this->init_tagged(Tag::kInt); 66 *this->cast<int32_t>() = i; 67 SkASSERT(this->getTag() == Tag::kInt); 68} 69 70NumberValue::NumberValue(float f) { 71 this->init_tagged(Tag::kFloat); 72 *this->cast<float>() = f; 73 SkASSERT(this->getTag() == Tag::kFloat); 74} 75 76// Vector recs point to externally allocated slabs with the following layout: 77// 78// [size_t n] [REC_0] ... [REC_n-1] [optional extra trailing storage] 79// 80// Long strings use extra_alloc_size == 1 to store the \0 terminator. 81// 82template <typename T, size_t extra_alloc_size = 0> 83static void* MakeVector(const void* src, size_t size, SkArenaAlloc& alloc) { 84 // The Ts are already in memory, so their size should be safe. 85 const auto total_size = sizeof(size_t) + size * sizeof(T) + extra_alloc_size; 86 auto* size_ptr = reinterpret_cast<size_t*>(alloc.makeBytesAlignedTo(total_size, kRecAlign)); 87 88 *size_ptr = size; 89 sk_careful_memcpy(size_ptr + 1, src, size * sizeof(T)); 90 91 return size_ptr; 92} 93 94ArrayValue::ArrayValue(const Value* src, size_t size, SkArenaAlloc& alloc) { 95 this->init_tagged_pointer(Tag::kArray, MakeVector<Value>(src, size, alloc)); 96 SkASSERT(this->getTag() == Tag::kArray); 97} 98 99// Strings have two flavors: 100// 101// -- short strings (len <= 7) -> these are stored inline, in the record 102// (one byte reserved for null terminator/type): 103// 104// [str] [\0]|[max_len - actual_len] 105// 106// Storing [max_len - actual_len] allows the 'len' field to double-up as a 107// null terminator when size == max_len (this works 'cause kShortString == 0). 108// 109// -- long strings (len > 7) -> these are externally allocated vectors (VectorRec<char>). 110// 111// The string data plus a null-char terminator are copied over. 112// 113namespace { 114 115// An internal string builder with a fast 8 byte short string load path 116// (for the common case where the string is not at the end of the stream). 117class FastString final : public Value { 118public: 119 FastString(const char* src, size_t size, const char* eos, SkArenaAlloc& alloc) { 120 SkASSERT(src <= eos); 121 122 if (size > kMaxInlineStringSize) { 123 this->initLongString(src, size, alloc); 124 SkASSERT(this->getTag() == Tag::kString); 125 return; 126 } 127 128 // initFastShortString is faster (doh), but requires access to 6 chars past src. 129 if (src && src + 6 <= eos) { 130 this->initFastShortString(src, size); 131 } else { 132 this->initShortString(src, size); 133 } 134 135 SkASSERT(this->getTag() == Tag::kShortString); 136 } 137 138private: 139 // first byte reserved for tagging, \0 terminator => 6 usable chars 140 inline static constexpr size_t kMaxInlineStringSize = sizeof(Value) - 2; 141 142 void initLongString(const char* src, size_t size, SkArenaAlloc& alloc) { 143 SkASSERT(size > kMaxInlineStringSize); 144 145 this->init_tagged_pointer(Tag::kString, MakeVector<char, 1>(src, size, alloc)); 146 147 auto* data = this->cast<VectorValue<char, Value::Type::kString>>()->begin(); 148 const_cast<char*>(data)[size] = '\0'; 149 } 150 151 void initShortString(const char* src, size_t size) { 152 SkASSERT(size <= kMaxInlineStringSize); 153 154 this->init_tagged(Tag::kShortString); 155 sk_careful_memcpy(this->cast<char>(), src, size); 156 // Null terminator provided by init_tagged() above (fData8 is zero-initialized). 157 } 158 159 void initFastShortString(const char* src, size_t size) { 160 SkASSERT(size <= kMaxInlineStringSize); 161 162 uint64_t* s64 = this->cast<uint64_t>(); 163 164 // Load 8 chars and mask out the tag and \0 terminator. 165 // Note: we picked kShortString == 0 to avoid setting explicitly below. 166 static_assert(SkToU8(Tag::kShortString) == 0, "please don't break this"); 167 168 // Since the first byte is occupied by the tag, we want the string chars [0..5] to land 169 // on bytes [1..6] => the fastest way is to read8 @(src - 1) (always safe, because the 170 // string requires a " prefix at the very least). 171 memcpy(s64, src - 1, 8); 172 173#if defined(SK_CPU_LENDIAN) 174 // The mask for a max-length string (6), with a leading tag and trailing \0 is 175 // 0x00ffffffffffff00. Accounting for the final left-shift, this becomes 176 // 0x0000ffffffffffff. 177 *s64 &= (0x0000ffffffffffffULL >> ((kMaxInlineStringSize - size) * 8)) // trailing \0s 178 << 8; // tag byte 179#else 180 static_assert(false, "Big-endian builds are not supported at this time."); 181#endif 182 } 183}; 184 185} // namespace 186 187StringValue::StringValue(const char* src, size_t size, SkArenaAlloc& alloc) { 188 new (this) FastString(src, size, src, alloc); 189} 190 191ObjectValue::ObjectValue(const Member* src, size_t size, SkArenaAlloc& alloc) { 192 this->init_tagged_pointer(Tag::kObject, MakeVector<Member>(src, size, alloc)); 193 SkASSERT(this->getTag() == Tag::kObject); 194} 195 196 197// Boring public Value glue. 198 199static int inline_strcmp(const char a[], const char b[]) { 200 for (;;) { 201 char c = *a++; 202 if (c == 0) { 203 break; 204 } 205 if (c != *b++) { 206 return 1; 207 } 208 } 209 return *b != 0; 210} 211 212const Value& ObjectValue::operator[](const char* key) const { 213 // Reverse search for duplicates resolution (policy: return last). 214 const auto* begin = this->begin(); 215 const auto* member = this->end(); 216 217 while (member > begin) { 218 --member; 219 if (0 == inline_strcmp(key, member->fKey.as<StringValue>().begin())) { 220 return member->fValue; 221 } 222 } 223 224 static const Value g_null = NullValue(); 225 return g_null; 226} 227 228namespace { 229 230// Lexer/parser inspired by rapidjson [1], sajson [2] and pjson [3]. 231// 232// [1] https://github.com/Tencent/rapidjson/ 233// [2] https://github.com/chadaustin/sajson 234// [3] https://pastebin.com/hnhSTL3h 235 236 237// bit 0 (0x01) - plain ASCII string character 238// bit 1 (0x02) - whitespace 239// bit 2 (0x04) - string terminator (" \\ \0 [control chars] **AND } ]** <- see matchString notes) 240// bit 3 (0x08) - 0-9 241// bit 4 (0x10) - 0-9 e E . 242// bit 5 (0x20) - scope terminator (} ]) 243static constexpr uint8_t g_token_flags[256] = { 244 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 245 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 4, 4, 6, 4, 4, // 0 246 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 1 247 3, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0x11,1, // 2 248 0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19, 0x19,0x19, 1, 1, 1, 1, 1, 1, // 3 249 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 250 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,0x25, 1, 1, // 5 251 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 252 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,0x25, 1, 1, // 7 253 254 // 128-255 255 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 256 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 257 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 258 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0 259}; 260 261static inline bool is_ws(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x02; } 262static inline bool is_eostring(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x04; } 263static inline bool is_digit(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x08; } 264static inline bool is_numeric(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x10; } 265static inline bool is_eoscope(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x20; } 266 267static inline const char* skip_ws(const char* p) { 268 while (is_ws(*p)) ++p; 269 return p; 270} 271 272static inline float pow10(int32_t exp) { 273 static constexpr float g_pow10_table[63] = 274 { 275 1.e-031f, 1.e-030f, 1.e-029f, 1.e-028f, 1.e-027f, 1.e-026f, 1.e-025f, 1.e-024f, 276 1.e-023f, 1.e-022f, 1.e-021f, 1.e-020f, 1.e-019f, 1.e-018f, 1.e-017f, 1.e-016f, 277 1.e-015f, 1.e-014f, 1.e-013f, 1.e-012f, 1.e-011f, 1.e-010f, 1.e-009f, 1.e-008f, 278 1.e-007f, 1.e-006f, 1.e-005f, 1.e-004f, 1.e-003f, 1.e-002f, 1.e-001f, 1.e+000f, 279 1.e+001f, 1.e+002f, 1.e+003f, 1.e+004f, 1.e+005f, 1.e+006f, 1.e+007f, 1.e+008f, 280 1.e+009f, 1.e+010f, 1.e+011f, 1.e+012f, 1.e+013f, 1.e+014f, 1.e+015f, 1.e+016f, 281 1.e+017f, 1.e+018f, 1.e+019f, 1.e+020f, 1.e+021f, 1.e+022f, 1.e+023f, 1.e+024f, 282 1.e+025f, 1.e+026f, 1.e+027f, 1.e+028f, 1.e+029f, 1.e+030f, 1.e+031f 283 }; 284 285 static constexpr int32_t k_exp_offset = SK_ARRAY_COUNT(g_pow10_table) / 2; 286 287 // We only support negative exponents for now. 288 SkASSERT(exp <= 0); 289 290 return (exp >= -k_exp_offset) ? g_pow10_table[exp + k_exp_offset] 291 : std::pow(10.0f, static_cast<float>(exp)); 292} 293 294class DOMParser { 295public: 296 explicit DOMParser(SkArenaAlloc& alloc) 297 : fAlloc(alloc) { 298 fValueStack.reserve(kValueStackReserve); 299 fUnescapeBuffer.reserve(kUnescapeBufferReserve); 300 } 301 302 Value parse(const char* p, size_t size) { 303 if (!size) { 304 return this->error(NullValue(), p, "invalid empty input"); 305 } 306 307 const char* p_stop = p + size - 1; 308 309 // We're only checking for end-of-stream on object/array close('}',']'), 310 // so we must trim any whitespace from the buffer tail. 311 while (p_stop > p && is_ws(*p_stop)) --p_stop; 312 313 SkASSERT(p_stop >= p && p_stop < p + size); 314 if (!is_eoscope(*p_stop)) { 315 return this->error(NullValue(), p_stop, "invalid top-level value"); 316 } 317 318 p = skip_ws(p); 319 320 switch (*p) { 321 case '{': 322 goto match_object; 323 case '[': 324 goto match_array; 325 default: 326 return this->error(NullValue(), p, "invalid top-level value"); 327 } 328 329 match_object: 330 SkASSERT(*p == '{'); 331 p = skip_ws(p + 1); 332 333 this->pushObjectScope(); 334 335 if (*p == '}') goto pop_object; 336 337 // goto match_object_key; 338 match_object_key: 339 p = skip_ws(p); 340 if (*p != '"') return this->error(NullValue(), p, "expected object key"); 341 342 p = this->matchString(p, p_stop, [this](const char* key, size_t size, const char* eos) { 343 this->pushObjectKey(key, size, eos); 344 }); 345 if (!p) return NullValue(); 346 347 p = skip_ws(p); 348 if (*p != ':') return this->error(NullValue(), p, "expected ':' separator"); 349 350 ++p; 351 352 // goto match_value; 353 match_value: 354 p = skip_ws(p); 355 356 switch (*p) { 357 case '\0': 358 return this->error(NullValue(), p, "unexpected input end"); 359 case '"': 360 p = this->matchString(p, p_stop, [this](const char* str, size_t size, const char* eos) { 361 this->pushString(str, size, eos); 362 }); 363 break; 364 case '[': 365 goto match_array; 366 case 'f': 367 p = this->matchFalse(p); 368 break; 369 case 'n': 370 p = this->matchNull(p); 371 break; 372 case 't': 373 p = this->matchTrue(p); 374 break; 375 case '{': 376 goto match_object; 377 default: 378 p = this->matchNumber(p); 379 break; 380 } 381 382 if (!p) return NullValue(); 383 384 // goto match_post_value; 385 match_post_value: 386 SkASSERT(!this->inTopLevelScope()); 387 388 p = skip_ws(p); 389 switch (*p) { 390 case ',': 391 ++p; 392 if (this->inObjectScope()) { 393 goto match_object_key; 394 } else { 395 SkASSERT(this->inArrayScope()); 396 goto match_value; 397 } 398 case ']': 399 goto pop_array; 400 case '}': 401 goto pop_object; 402 default: 403 return this->error(NullValue(), p - 1, "unexpected value-trailing token"); 404 } 405 406 // unreachable 407 SkASSERT(false); 408 409 pop_object: 410 SkASSERT(*p == '}'); 411 412 if (this->inArrayScope()) { 413 return this->error(NullValue(), p, "unexpected object terminator"); 414 } 415 416 this->popObjectScope(); 417 418 // goto pop_common 419 pop_common: 420 SkASSERT(is_eoscope(*p)); 421 422 if (this->inTopLevelScope()) { 423 SkASSERT(fValueStack.size() == 1); 424 425 // Success condition: parsed the top level element and reached the stop token. 426 return p == p_stop 427 ? fValueStack.front() 428 : this->error(NullValue(), p + 1, "trailing root garbage"); 429 } 430 431 if (p == p_stop) { 432 return this->error(NullValue(), p, "unexpected end-of-input"); 433 } 434 435 ++p; 436 437 goto match_post_value; 438 439 match_array: 440 SkASSERT(*p == '['); 441 p = skip_ws(p + 1); 442 443 this->pushArrayScope(); 444 445 if (*p != ']') goto match_value; 446 447 // goto pop_array; 448 pop_array: 449 SkASSERT(*p == ']'); 450 451 if (this->inObjectScope()) { 452 return this->error(NullValue(), p, "unexpected array terminator"); 453 } 454 455 this->popArrayScope(); 456 457 goto pop_common; 458 459 SkASSERT(false); 460 return NullValue(); 461 } 462 463 std::tuple<const char*, const SkString> getError() const { 464 return std::make_tuple(fErrorToken, fErrorMessage); 465 } 466 467private: 468 SkArenaAlloc& fAlloc; 469 470 // Pending values stack. 471 inline static constexpr size_t kValueStackReserve = 256; 472 std::vector<Value> fValueStack; 473 474 // String unescape buffer. 475 inline static constexpr size_t kUnescapeBufferReserve = 512; 476 std::vector<char> fUnescapeBuffer; 477 478 // Tracks the current object/array scope, as an index into fStack: 479 // 480 // - for objects: fScopeIndex = (index of first value in scope) 481 // - for arrays : fScopeIndex = -(index of first value in scope) 482 // 483 // fScopeIndex == 0 IFF we are at the top level (no current/active scope). 484 intptr_t fScopeIndex = 0; 485 486 // Error reporting. 487 const char* fErrorToken = nullptr; 488 SkString fErrorMessage; 489 490 bool inTopLevelScope() const { return fScopeIndex == 0; } 491 bool inObjectScope() const { return fScopeIndex > 0; } 492 bool inArrayScope() const { return fScopeIndex < 0; } 493 494 // Helper for masquerading raw primitive types as Values (bypassing tagging, etc). 495 template <typename T> 496 class RawValue final : public Value { 497 public: 498 explicit RawValue(T v) { 499 static_assert(sizeof(T) <= sizeof(Value), ""); 500 *this->cast<T>() = v; 501 } 502 503 T operator *() const { return *this->cast<T>(); } 504 }; 505 506 template <typename VectorT> 507 void popScopeAsVec(size_t scope_start) { 508 SkASSERT(scope_start > 0); 509 SkASSERT(scope_start <= fValueStack.size()); 510 511 using T = typename VectorT::ValueT; 512 static_assert( sizeof(T) >= sizeof(Value), ""); 513 static_assert( sizeof(T) % sizeof(Value) == 0, ""); 514 static_assert(alignof(T) == alignof(Value), ""); 515 516 const auto scope_count = fValueStack.size() - scope_start, 517 count = scope_count / (sizeof(T) / sizeof(Value)); 518 SkASSERT(scope_count % (sizeof(T) / sizeof(Value)) == 0); 519 520 const auto* begin = reinterpret_cast<const T*>(fValueStack.data() + scope_start); 521 522 // Restore the previous scope index from saved placeholder value, 523 // and instantiate as a vector of values in scope. 524 auto& placeholder = fValueStack[scope_start - 1]; 525 fScopeIndex = *static_cast<RawValue<intptr_t>&>(placeholder); 526 placeholder = VectorT(begin, count, fAlloc); 527 528 // Drop the (consumed) values in scope. 529 fValueStack.resize(scope_start); 530 } 531 532 void pushObjectScope() { 533 // Save a scope index now, and then later we'll overwrite this value as the Object itself. 534 fValueStack.push_back(RawValue<intptr_t>(fScopeIndex)); 535 536 // New object scope. 537 fScopeIndex = SkTo<intptr_t>(fValueStack.size()); 538 } 539 540 void popObjectScope() { 541 SkASSERT(this->inObjectScope()); 542 this->popScopeAsVec<ObjectValue>(SkTo<size_t>(fScopeIndex)); 543 544 SkDEBUGCODE( 545 const auto& obj = fValueStack.back().as<ObjectValue>(); 546 SkASSERT(obj.is<ObjectValue>()); 547 for (const auto& member : obj) { 548 SkASSERT(member.fKey.is<StringValue>()); 549 } 550 ) 551 } 552 553 void pushArrayScope() { 554 // Save a scope index now, and then later we'll overwrite this value as the Array itself. 555 fValueStack.push_back(RawValue<intptr_t>(fScopeIndex)); 556 557 // New array scope. 558 fScopeIndex = -SkTo<intptr_t>(fValueStack.size()); 559 } 560 561 void popArrayScope() { 562 SkASSERT(this->inArrayScope()); 563 this->popScopeAsVec<ArrayValue>(SkTo<size_t>(-fScopeIndex)); 564 565 SkDEBUGCODE( 566 const auto& arr = fValueStack.back().as<ArrayValue>(); 567 SkASSERT(arr.is<ArrayValue>()); 568 ) 569 } 570 571 void pushObjectKey(const char* key, size_t size, const char* eos) { 572 SkASSERT(this->inObjectScope()); 573 SkASSERT(fValueStack.size() >= SkTo<size_t>(fScopeIndex)); 574 SkASSERT(!((fValueStack.size() - SkTo<size_t>(fScopeIndex)) & 1)); 575 this->pushString(key, size, eos); 576 } 577 578 void pushTrue() { 579 fValueStack.push_back(BoolValue(true)); 580 } 581 582 void pushFalse() { 583 fValueStack.push_back(BoolValue(false)); 584 } 585 586 void pushNull() { 587 fValueStack.push_back(NullValue()); 588 } 589 590 void pushString(const char* s, size_t size, const char* eos) { 591 fValueStack.push_back(FastString(s, size, eos, fAlloc)); 592 } 593 594 void pushInt32(int32_t i) { 595 fValueStack.push_back(NumberValue(i)); 596 } 597 598 void pushFloat(float f) { 599 fValueStack.push_back(NumberValue(f)); 600 } 601 602 template <typename T> 603 T error(T&& ret_val, const char* p, const char* msg) { 604#if defined(SK_JSON_REPORT_ERRORS) 605 fErrorToken = p; 606 fErrorMessage.set(msg); 607#endif 608 return ret_val; 609 } 610 611 const char* matchTrue(const char* p) { 612 SkASSERT(p[0] == 't'); 613 614 if (p[1] == 'r' && p[2] == 'u' && p[3] == 'e') { 615 this->pushTrue(); 616 return p + 4; 617 } 618 619 return this->error(nullptr, p, "invalid token"); 620 } 621 622 const char* matchFalse(const char* p) { 623 SkASSERT(p[0] == 'f'); 624 625 if (p[1] == 'a' && p[2] == 'l' && p[3] == 's' && p[4] == 'e') { 626 this->pushFalse(); 627 return p + 5; 628 } 629 630 return this->error(nullptr, p, "invalid token"); 631 } 632 633 const char* matchNull(const char* p) { 634 SkASSERT(p[0] == 'n'); 635 636 if (p[1] == 'u' && p[2] == 'l' && p[3] == 'l') { 637 this->pushNull(); 638 return p + 4; 639 } 640 641 return this->error(nullptr, p, "invalid token"); 642 } 643 644 const std::vector<char>* unescapeString(const char* begin, const char* end) { 645 fUnescapeBuffer.clear(); 646 647 for (const auto* p = begin; p != end; ++p) { 648 if (*p != '\\') { 649 fUnescapeBuffer.push_back(*p); 650 continue; 651 } 652 653 if (++p == end) { 654 return nullptr; 655 } 656 657 switch (*p) { 658 case '"': fUnescapeBuffer.push_back( '"'); break; 659 case '\\': fUnescapeBuffer.push_back('\\'); break; 660 case '/': fUnescapeBuffer.push_back( '/'); break; 661 case 'b': fUnescapeBuffer.push_back('\b'); break; 662 case 'f': fUnescapeBuffer.push_back('\f'); break; 663 case 'n': fUnescapeBuffer.push_back('\n'); break; 664 case 'r': fUnescapeBuffer.push_back('\r'); break; 665 case 't': fUnescapeBuffer.push_back('\t'); break; 666 case 'u': { 667 if (p + 4 >= end) { 668 return nullptr; 669 } 670 671 uint32_t hexed; 672 const char hex_str[] = {p[1], p[2], p[3], p[4], '\0'}; 673 const auto* eos = SkParse::FindHex(hex_str, &hexed); 674 if (!eos || *eos) { 675 return nullptr; 676 } 677 678 char utf8[SkUTF::kMaxBytesInUTF8Sequence]; 679 const auto utf8_len = SkUTF::ToUTF8(SkTo<SkUnichar>(hexed), utf8); 680 fUnescapeBuffer.insert(fUnescapeBuffer.end(), utf8, utf8 + utf8_len); 681 p += 4; 682 } break; 683 default: return nullptr; 684 } 685 } 686 687 return &fUnescapeBuffer; 688 } 689 690 template <typename MatchFunc> 691 const char* matchString(const char* p, const char* p_stop, MatchFunc&& func) { 692 SkASSERT(*p == '"'); 693 const auto* s_begin = p + 1; 694 bool requires_unescape = false; 695 696 do { 697 // Consume string chars. 698 // This is the fast path, and hopefully we only hit it once then quick-exit below. 699 for (p = p + 1; !is_eostring(*p); ++p); 700 701 if (*p == '"') { 702 // Valid string found. 703 if (!requires_unescape) { 704 func(s_begin, p - s_begin, p_stop); 705 } else { 706 // Slow unescape. We could avoid this extra copy with some effort, 707 // but in practice escaped strings should be rare. 708 const auto* buf = this->unescapeString(s_begin, p); 709 if (!buf) { 710 break; 711 } 712 713 SkASSERT(!buf->empty()); 714 func(buf->data(), buf->size(), buf->data() + buf->size() - 1); 715 } 716 return p + 1; 717 } 718 719 if (*p == '\\') { 720 requires_unescape = true; 721 ++p; 722 continue; 723 } 724 725 // End-of-scope chars are special: we use them to tag the end of the input. 726 // Thus they cannot be consumed indiscriminately -- we need to check if we hit the 727 // end of the input. To that effect, we treat them as string terminators above, 728 // then we catch them here. 729 if (is_eoscope(*p)) { 730 continue; 731 } 732 733 // Invalid/unexpected char. 734 break; 735 } while (p != p_stop); 736 737 // Premature end-of-input, or illegal string char. 738 return this->error(nullptr, s_begin - 1, "invalid string"); 739 } 740 741 const char* matchFastFloatDecimalPart(const char* p, int sign, float f, int exp) { 742 SkASSERT(exp <= 0); 743 744 for (;;) { 745 if (!is_digit(*p)) break; 746 f = f * 10.f + (*p++ - '0'); --exp; 747 if (!is_digit(*p)) break; 748 f = f * 10.f + (*p++ - '0'); --exp; 749 } 750 751 const auto decimal_scale = pow10(exp); 752 if (is_numeric(*p) || !decimal_scale) { 753 SkASSERT((*p == '.' || *p == 'e' || *p == 'E') || !decimal_scale); 754 // Malformed input, or an (unsupported) exponent, or a collapsed decimal factor. 755 return nullptr; 756 } 757 758 this->pushFloat(sign * f * decimal_scale); 759 760 return p; 761 } 762 763 const char* matchFastFloatPart(const char* p, int sign, float f) { 764 for (;;) { 765 if (!is_digit(*p)) break; 766 f = f * 10.f + (*p++ - '0'); 767 if (!is_digit(*p)) break; 768 f = f * 10.f + (*p++ - '0'); 769 } 770 771 if (!is_numeric(*p)) { 772 // Matched (integral) float. 773 this->pushFloat(sign * f); 774 return p; 775 } 776 777 return (*p == '.') ? this->matchFastFloatDecimalPart(p + 1, sign, f, 0) 778 : nullptr; 779 } 780 781 const char* matchFast32OrFloat(const char* p) { 782 int sign = 1; 783 if (*p == '-') { 784 sign = -1; 785 ++p; 786 } 787 788 const auto* digits_start = p; 789 790 int32_t n32 = 0; 791 792 // This is the largest absolute int32 value we can handle before 793 // risking overflow *on the next digit* (214748363). 794 static constexpr int32_t kMaxInt32 = (std::numeric_limits<int32_t>::max() - 9) / 10; 795 796 if (is_digit(*p)) { 797 n32 = (*p++ - '0'); 798 for (;;) { 799 if (!is_digit(*p) || n32 > kMaxInt32) break; 800 n32 = n32 * 10 + (*p++ - '0'); 801 } 802 } 803 804 if (!is_numeric(*p)) { 805 // Did we actually match any digits? 806 if (p > digits_start) { 807 this->pushInt32(sign * n32); 808 return p; 809 } 810 return nullptr; 811 } 812 813 if (*p == '.') { 814 const auto* decimals_start = ++p; 815 816 int exp = 0; 817 818 for (;;) { 819 if (!is_digit(*p) || n32 > kMaxInt32) break; 820 n32 = n32 * 10 + (*p++ - '0'); --exp; 821 if (!is_digit(*p) || n32 > kMaxInt32) break; 822 n32 = n32 * 10 + (*p++ - '0'); --exp; 823 } 824 825 if (!is_numeric(*p)) { 826 // Did we actually match any digits? 827 if (p > decimals_start) { 828 this->pushFloat(sign * n32 * pow10(exp)); 829 return p; 830 } 831 return nullptr; 832 } 833 834 if (n32 > kMaxInt32) { 835 // we ran out on n32 bits 836 return this->matchFastFloatDecimalPart(p, sign, n32, exp); 837 } 838 } 839 840 return this->matchFastFloatPart(p, sign, n32); 841 } 842 843 const char* matchNumber(const char* p) { 844 if (const auto* fast = this->matchFast32OrFloat(p)) return fast; 845 846 // slow fallback 847 char* matched; 848 float f = strtof(p, &matched); 849 if (matched > p) { 850 this->pushFloat(f); 851 return matched; 852 } 853 return this->error(nullptr, p, "invalid numeric token"); 854 } 855}; 856 857void Write(const Value& v, SkWStream* stream) { 858 switch (v.getType()) { 859 case Value::Type::kNull: 860 stream->writeText("null"); 861 break; 862 case Value::Type::kBool: 863 stream->writeText(*v.as<BoolValue>() ? "true" : "false"); 864 break; 865 case Value::Type::kNumber: 866 stream->writeScalarAsText(*v.as<NumberValue>()); 867 break; 868 case Value::Type::kString: 869 stream->writeText("\""); 870 stream->writeText(v.as<StringValue>().begin()); 871 stream->writeText("\""); 872 break; 873 case Value::Type::kArray: { 874 const auto& array = v.as<ArrayValue>(); 875 stream->writeText("["); 876 bool first_value = true; 877 for (const auto& entry : array) { 878 if (!first_value) stream->writeText(","); 879 Write(entry, stream); 880 first_value = false; 881 } 882 stream->writeText("]"); 883 break; 884 } 885 case Value::Type::kObject: 886 const auto& object = v.as<ObjectValue>(); 887 stream->writeText("{"); 888 bool first_member = true; 889 for (const auto& member : object) { 890 SkASSERT(member.fKey.getType() == Value::Type::kString); 891 if (!first_member) stream->writeText(","); 892 Write(member.fKey, stream); 893 stream->writeText(":"); 894 Write(member.fValue, stream); 895 first_member = false; 896 } 897 stream->writeText("}"); 898 break; 899 } 900} 901 902} // namespace 903 904SkString Value::toString() const { 905 SkDynamicMemoryWStream wstream; 906 Write(*this, &wstream); 907 const auto data = wstream.detachAsData(); 908 // TODO: is there a better way to pass data around without copying? 909 return SkString(static_cast<const char*>(data->data()), data->size()); 910} 911 912static constexpr size_t kMinChunkSize = 4096; 913 914DOM::DOM(const char* data, size_t size) 915 : fAlloc(kMinChunkSize) { 916 DOMParser parser(fAlloc); 917 918 fRoot = parser.parse(data, size); 919} 920 921void DOM::write(SkWStream* stream) const { 922 Write(fRoot, stream); 923} 924 925} // namespace skjson 926