1// Copyright 2019 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "encoding.h" 6 7#include <algorithm> 8#include <cassert> 9#include <cmath> 10#include <cstring> 11#include <limits> 12#include <stack> 13 14namespace v8_inspector_protocol_encoding { 15// ============================================================================= 16// Status and Error codes 17// ============================================================================= 18 19std::string Status::ToASCIIString() const { 20 switch (error) { 21 case Error::OK: 22 return "OK"; 23 case Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS: 24 return ToASCIIString("JSON: unprocessed input remains"); 25 case Error::JSON_PARSER_STACK_LIMIT_EXCEEDED: 26 return ToASCIIString("JSON: stack limit exceeded"); 27 case Error::JSON_PARSER_NO_INPUT: 28 return ToASCIIString("JSON: no input"); 29 case Error::JSON_PARSER_INVALID_TOKEN: 30 return ToASCIIString("JSON: invalid token"); 31 case Error::JSON_PARSER_INVALID_NUMBER: 32 return ToASCIIString("JSON: invalid number"); 33 case Error::JSON_PARSER_INVALID_STRING: 34 return ToASCIIString("JSON: invalid string"); 35 case Error::JSON_PARSER_UNEXPECTED_ARRAY_END: 36 return ToASCIIString("JSON: unexpected array end"); 37 case Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED: 38 return ToASCIIString("JSON: comma or array end expected"); 39 case Error::JSON_PARSER_STRING_LITERAL_EXPECTED: 40 return ToASCIIString("JSON: string literal expected"); 41 case Error::JSON_PARSER_COLON_EXPECTED: 42 return ToASCIIString("JSON: colon expected"); 43 case Error::JSON_PARSER_UNEXPECTED_MAP_END: 44 return ToASCIIString("JSON: unexpected map end"); 45 case Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED: 46 return ToASCIIString("JSON: comma or map end expected"); 47 case Error::JSON_PARSER_VALUE_EXPECTED: 48 return ToASCIIString("JSON: value expected"); 49 50 case Error::CBOR_INVALID_INT32: 51 return ToASCIIString("CBOR: invalid int32"); 52 case Error::CBOR_INVALID_DOUBLE: 53 return ToASCIIString("CBOR: invalid double"); 54 case Error::CBOR_INVALID_ENVELOPE: 55 return ToASCIIString("CBOR: invalid envelope"); 56 case Error::CBOR_INVALID_STRING8: 57 return ToASCIIString("CBOR: invalid string8"); 58 case Error::CBOR_INVALID_STRING16: 59 return ToASCIIString("CBOR: invalid string16"); 60 case Error::CBOR_INVALID_BINARY: 61 return ToASCIIString("CBOR: invalid binary"); 62 case Error::CBOR_UNSUPPORTED_VALUE: 63 return ToASCIIString("CBOR: unsupported value"); 64 case Error::CBOR_NO_INPUT: 65 return ToASCIIString("CBOR: no input"); 66 case Error::CBOR_INVALID_START_BYTE: 67 return ToASCIIString("CBOR: invalid start byte"); 68 case Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE: 69 return ToASCIIString("CBOR: unexpected eof expected value"); 70 case Error::CBOR_UNEXPECTED_EOF_IN_ARRAY: 71 return ToASCIIString("CBOR: unexpected eof in array"); 72 case Error::CBOR_UNEXPECTED_EOF_IN_MAP: 73 return ToASCIIString("CBOR: unexpected eof in map"); 74 case Error::CBOR_INVALID_MAP_KEY: 75 return ToASCIIString("CBOR: invalid map key"); 76 case Error::CBOR_STACK_LIMIT_EXCEEDED: 77 return ToASCIIString("CBOR: stack limit exceeded"); 78 case Error::CBOR_TRAILING_JUNK: 79 return ToASCIIString("CBOR: trailing junk"); 80 case Error::CBOR_MAP_START_EXPECTED: 81 return ToASCIIString("CBOR: map start expected"); 82 case Error::CBOR_MAP_STOP_EXPECTED: 83 return ToASCIIString("CBOR: map stop expected"); 84 case Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED: 85 return ToASCIIString("CBOR: envelope size limit exceeded"); 86 } 87 // Some compilers can't figure out that we can't get here. 88 return "INVALID ERROR CODE"; 89} 90 91std::string Status::ToASCIIString(const char* msg) const { 92 return std::string(msg) + " at position " + std::to_string(pos); 93} 94 95namespace cbor { 96namespace { 97// Indicates the number of bits the "initial byte" needs to be shifted to the 98// right after applying |kMajorTypeMask| to produce the major type in the 99// lowermost bits. 100static constexpr uint8_t kMajorTypeBitShift = 5u; 101// Mask selecting the low-order 5 bits of the "initial byte", which is where 102// the additional information is encoded. 103static constexpr uint8_t kAdditionalInformationMask = 0x1f; 104// Mask selecting the high-order 3 bits of the "initial byte", which indicates 105// the major type of the encoded value. 106static constexpr uint8_t kMajorTypeMask = 0xe0; 107// Indicates the integer is in the following byte. 108static constexpr uint8_t kAdditionalInformation1Byte = 24u; 109// Indicates the integer is in the next 2 bytes. 110static constexpr uint8_t kAdditionalInformation2Bytes = 25u; 111// Indicates the integer is in the next 4 bytes. 112static constexpr uint8_t kAdditionalInformation4Bytes = 26u; 113// Indicates the integer is in the next 8 bytes. 114static constexpr uint8_t kAdditionalInformation8Bytes = 27u; 115 116// Encodes the initial byte, consisting of the |type| in the first 3 bits 117// followed by 5 bits of |additional_info|. 118constexpr uint8_t EncodeInitialByte(MajorType type, uint8_t additional_info) { 119 return (static_cast<uint8_t>(type) << kMajorTypeBitShift) | 120 (additional_info & kAdditionalInformationMask); 121} 122 123// TAG 24 indicates that what follows is a byte string which is 124// encoded in CBOR format. We use this as a wrapper for 125// maps and arrays, allowing us to skip them, because the 126// byte string carries its size (byte length). 127// https://tools.ietf.org/html/rfc7049#section-2.4.4.1 128static constexpr uint8_t kInitialByteForEnvelope = 129 EncodeInitialByte(MajorType::TAG, 24); 130// The initial byte for a byte string with at most 2^32 bytes 131// of payload. This is used for envelope encoding, even if 132// the byte string is shorter. 133static constexpr uint8_t kInitialByteFor32BitLengthByteString = 134 EncodeInitialByte(MajorType::BYTE_STRING, 26); 135 136// See RFC 7049 Section 2.2.1, indefinite length arrays / maps have additional 137// info = 31. 138static constexpr uint8_t kInitialByteIndefiniteLengthArray = 139 EncodeInitialByte(MajorType::ARRAY, 31); 140static constexpr uint8_t kInitialByteIndefiniteLengthMap = 141 EncodeInitialByte(MajorType::MAP, 31); 142// See RFC 7049 Section 2.3, Table 1; this is used for finishing indefinite 143// length maps / arrays. 144static constexpr uint8_t kStopByte = 145 EncodeInitialByte(MajorType::SIMPLE_VALUE, 31); 146 147// See RFC 7049 Section 2.3, Table 2. 148static constexpr uint8_t kEncodedTrue = 149 EncodeInitialByte(MajorType::SIMPLE_VALUE, 21); 150static constexpr uint8_t kEncodedFalse = 151 EncodeInitialByte(MajorType::SIMPLE_VALUE, 20); 152static constexpr uint8_t kEncodedNull = 153 EncodeInitialByte(MajorType::SIMPLE_VALUE, 22); 154static constexpr uint8_t kInitialByteForDouble = 155 EncodeInitialByte(MajorType::SIMPLE_VALUE, 27); 156 157// See RFC 7049 Table 3 and Section 2.4.4.2. This is used as a prefix for 158// arbitrary binary data encoded as BYTE_STRING. 159static constexpr uint8_t kExpectedConversionToBase64Tag = 160 EncodeInitialByte(MajorType::TAG, 22); 161 162// Writes the bytes for |v| to |out|, starting with the most significant byte. 163// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html 164template <typename T, class C> 165void WriteBytesMostSignificantByteFirst(T v, C* out) { 166 for (int shift_bytes = sizeof(T) - 1; shift_bytes >= 0; --shift_bytes) 167 out->push_back(0xff & (v >> (shift_bytes * 8))); 168} 169 170// Extracts sizeof(T) bytes from |in| to extract a value of type T 171// (e.g. uint64_t, uint32_t, ...), most significant byte first. 172// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html 173template <typename T> 174T ReadBytesMostSignificantByteFirst(span<uint8_t> in) { 175 assert(in.size() >= sizeof(T)); 176 T result = 0; 177 for (size_t shift_bytes = 0; shift_bytes < sizeof(T); ++shift_bytes) 178 result |= T(in[sizeof(T) - 1 - shift_bytes]) << (shift_bytes * 8); 179 return result; 180} 181} // namespace 182 183namespace internals { 184// Reads the start of a token with definitive size from |bytes|. 185// |type| is the major type as specified in RFC 7049 Section 2.1. 186// |value| is the payload (e.g. for MajorType::UNSIGNED) or is the size 187// (e.g. for BYTE_STRING). 188// If successful, returns the number of bytes read. Otherwise returns 0. 189size_t ReadTokenStart(span<uint8_t> bytes, MajorType* type, uint64_t* value) { 190 if (bytes.empty()) 191 return 0; 192 uint8_t initial_byte = bytes[0]; 193 *type = MajorType((initial_byte & kMajorTypeMask) >> kMajorTypeBitShift); 194 195 uint8_t additional_information = initial_byte & kAdditionalInformationMask; 196 if (additional_information < 24) { 197 // Values 0-23 are encoded directly into the additional info of the 198 // initial byte. 199 *value = additional_information; 200 return 1; 201 } 202 if (additional_information == kAdditionalInformation1Byte) { 203 // Values 24-255 are encoded with one initial byte, followed by the value. 204 if (bytes.size() < 2) 205 return 0; 206 *value = ReadBytesMostSignificantByteFirst<uint8_t>(bytes.subspan(1)); 207 return 2; 208 } 209 if (additional_information == kAdditionalInformation2Bytes) { 210 // Values 256-65535: 1 initial byte + 2 bytes payload. 211 if (bytes.size() < 1 + sizeof(uint16_t)) 212 return 0; 213 *value = ReadBytesMostSignificantByteFirst<uint16_t>(bytes.subspan(1)); 214 return 3; 215 } 216 if (additional_information == kAdditionalInformation4Bytes) { 217 // 32 bit uint: 1 initial byte + 4 bytes payload. 218 if (bytes.size() < 1 + sizeof(uint32_t)) 219 return 0; 220 *value = ReadBytesMostSignificantByteFirst<uint32_t>(bytes.subspan(1)); 221 return 5; 222 } 223 if (additional_information == kAdditionalInformation8Bytes) { 224 // 64 bit uint: 1 initial byte + 8 bytes payload. 225 if (bytes.size() < 1 + sizeof(uint64_t)) 226 return 0; 227 *value = ReadBytesMostSignificantByteFirst<uint64_t>(bytes.subspan(1)); 228 return 9; 229 } 230 return 0; 231} 232 233// Writes the start of a token with |type|. The |value| may indicate the size, 234// or it may be the payload if the value is an unsigned integer. 235template <typename C> 236void WriteTokenStartTmpl(MajorType type, uint64_t value, C* encoded) { 237 if (value < 24) { 238 // Values 0-23 are encoded directly into the additional info of the 239 // initial byte. 240 encoded->push_back(EncodeInitialByte(type, /*additional_info=*/value)); 241 return; 242 } 243 if (value <= std::numeric_limits<uint8_t>::max()) { 244 // Values 24-255 are encoded with one initial byte, followed by the value. 245 encoded->push_back(EncodeInitialByte(type, kAdditionalInformation1Byte)); 246 encoded->push_back(value); 247 return; 248 } 249 if (value <= std::numeric_limits<uint16_t>::max()) { 250 // Values 256-65535: 1 initial byte + 2 bytes payload. 251 encoded->push_back(EncodeInitialByte(type, kAdditionalInformation2Bytes)); 252 WriteBytesMostSignificantByteFirst<uint16_t>(value, encoded); 253 return; 254 } 255 if (value <= std::numeric_limits<uint32_t>::max()) { 256 // 32 bit uint: 1 initial byte + 4 bytes payload. 257 encoded->push_back(EncodeInitialByte(type, kAdditionalInformation4Bytes)); 258 WriteBytesMostSignificantByteFirst<uint32_t>(static_cast<uint32_t>(value), 259 encoded); 260 return; 261 } 262 // 64 bit uint: 1 initial byte + 8 bytes payload. 263 encoded->push_back(EncodeInitialByte(type, kAdditionalInformation8Bytes)); 264 WriteBytesMostSignificantByteFirst<uint64_t>(value, encoded); 265} 266void WriteTokenStart(MajorType type, 267 uint64_t value, 268 std::vector<uint8_t>* encoded) { 269 WriteTokenStartTmpl(type, value, encoded); 270} 271void WriteTokenStart(MajorType type, uint64_t value, std::string* encoded) { 272 WriteTokenStartTmpl(type, value, encoded); 273} 274} // namespace internals 275 276// ============================================================================= 277// Detecting CBOR content 278// ============================================================================= 279 280uint8_t InitialByteForEnvelope() { 281 return kInitialByteForEnvelope; 282} 283uint8_t InitialByteFor32BitLengthByteString() { 284 return kInitialByteFor32BitLengthByteString; 285} 286bool IsCBORMessage(span<uint8_t> msg) { 287 return msg.size() >= 6 && msg[0] == InitialByteForEnvelope() && 288 msg[1] == InitialByteFor32BitLengthByteString(); 289} 290 291// ============================================================================= 292// Encoding invidiual CBOR items 293// ============================================================================= 294 295uint8_t EncodeTrue() { 296 return kEncodedTrue; 297} 298uint8_t EncodeFalse() { 299 return kEncodedFalse; 300} 301uint8_t EncodeNull() { 302 return kEncodedNull; 303} 304 305uint8_t EncodeIndefiniteLengthArrayStart() { 306 return kInitialByteIndefiniteLengthArray; 307} 308 309uint8_t EncodeIndefiniteLengthMapStart() { 310 return kInitialByteIndefiniteLengthMap; 311} 312 313uint8_t EncodeStop() { 314 return kStopByte; 315} 316 317template <typename C> 318void EncodeInt32Tmpl(int32_t value, C* out) { 319 if (value >= 0) { 320 internals::WriteTokenStart(MajorType::UNSIGNED, value, out); 321 } else { 322 uint64_t representation = static_cast<uint64_t>(-(value + 1)); 323 internals::WriteTokenStart(MajorType::NEGATIVE, representation, out); 324 } 325} 326void EncodeInt32(int32_t value, std::vector<uint8_t>* out) { 327 EncodeInt32Tmpl(value, out); 328} 329void EncodeInt32(int32_t value, std::string* out) { 330 EncodeInt32Tmpl(value, out); 331} 332 333template <typename C> 334void EncodeString16Tmpl(span<uint16_t> in, C* out) { 335 uint64_t byte_length = static_cast<uint64_t>(in.size_bytes()); 336 internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out); 337 // When emitting UTF16 characters, we always write the least significant byte 338 // first; this is because it's the native representation for X86. 339 // TODO(johannes): Implement a more efficient thing here later, e.g. 340 // casting *iff* the machine has this byte order. 341 // The wire format for UTF16 chars will probably remain the same 342 // (least significant byte first) since this way we can have 343 // golden files, unittests, etc. that port easily and universally. 344 // See also: 345 // https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html 346 for (const uint16_t two_bytes : in) { 347 out->push_back(two_bytes); 348 out->push_back(two_bytes >> 8); 349 } 350} 351void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out) { 352 EncodeString16Tmpl(in, out); 353} 354void EncodeString16(span<uint16_t> in, std::string* out) { 355 EncodeString16Tmpl(in, out); 356} 357 358template <typename C> 359void EncodeString8Tmpl(span<uint8_t> in, C* out) { 360 internals::WriteTokenStart(MajorType::STRING, 361 static_cast<uint64_t>(in.size_bytes()), out); 362 out->insert(out->end(), in.begin(), in.end()); 363} 364void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out) { 365 EncodeString8Tmpl(in, out); 366} 367void EncodeString8(span<uint8_t> in, std::string* out) { 368 EncodeString8Tmpl(in, out); 369} 370 371template <typename C> 372void EncodeFromLatin1Tmpl(span<uint8_t> latin1, C* out) { 373 for (size_t ii = 0; ii < latin1.size(); ++ii) { 374 if (latin1[ii] <= 127) 375 continue; 376 // If there's at least one non-ASCII char, convert to UTF8. 377 std::vector<uint8_t> utf8(latin1.begin(), latin1.begin() + ii); 378 for (; ii < latin1.size(); ++ii) { 379 if (latin1[ii] <= 127) { 380 utf8.push_back(latin1[ii]); 381 } else { 382 // 0xC0 means it's a UTF8 sequence with 2 bytes. 383 utf8.push_back((latin1[ii] >> 6) | 0xc0); 384 utf8.push_back((latin1[ii] | 0x80) & 0xbf); 385 } 386 } 387 EncodeString8(SpanFrom(utf8), out); 388 return; 389 } 390 EncodeString8(latin1, out); 391} 392void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out) { 393 EncodeFromLatin1Tmpl(latin1, out); 394} 395void EncodeFromLatin1(span<uint8_t> latin1, std::string* out) { 396 EncodeFromLatin1Tmpl(latin1, out); 397} 398 399template <typename C> 400void EncodeFromUTF16Tmpl(span<uint16_t> utf16, C* out) { 401 // If there's at least one non-ASCII char, encode as STRING16 (UTF16). 402 for (uint16_t ch : utf16) { 403 if (ch <= 127) 404 continue; 405 EncodeString16(utf16, out); 406 return; 407 } 408 // It's all US-ASCII, strip out every second byte and encode as UTF8. 409 internals::WriteTokenStart(MajorType::STRING, 410 static_cast<uint64_t>(utf16.size()), out); 411 out->insert(out->end(), utf16.begin(), utf16.end()); 412} 413void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out) { 414 EncodeFromUTF16Tmpl(utf16, out); 415} 416void EncodeFromUTF16(span<uint16_t> utf16, std::string* out) { 417 EncodeFromUTF16Tmpl(utf16, out); 418} 419 420template <typename C> 421void EncodeBinaryTmpl(span<uint8_t> in, C* out) { 422 out->push_back(kExpectedConversionToBase64Tag); 423 uint64_t byte_length = static_cast<uint64_t>(in.size_bytes()); 424 internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out); 425 out->insert(out->end(), in.begin(), in.end()); 426} 427void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out) { 428 EncodeBinaryTmpl(in, out); 429} 430void EncodeBinary(span<uint8_t> in, std::string* out) { 431 EncodeBinaryTmpl(in, out); 432} 433 434// A double is encoded with a specific initial byte 435// (kInitialByteForDouble) plus the 64 bits of payload for its value. 436constexpr size_t kEncodedDoubleSize = 1 + sizeof(uint64_t); 437 438// An envelope is encoded with a specific initial byte 439// (kInitialByteForEnvelope), plus the start byte for a BYTE_STRING with a 32 440// bit wide length, plus a 32 bit length for that string. 441constexpr size_t kEncodedEnvelopeHeaderSize = 1 + 1 + sizeof(uint32_t); 442 443template <typename C> 444void EncodeDoubleTmpl(double value, C* out) { 445 // The additional_info=27 indicates 64 bits for the double follow. 446 // See RFC 7049 Section 2.3, Table 1. 447 out->push_back(kInitialByteForDouble); 448 union { 449 double from_double; 450 uint64_t to_uint64; 451 } reinterpret; 452 reinterpret.from_double = value; 453 WriteBytesMostSignificantByteFirst<uint64_t>(reinterpret.to_uint64, out); 454} 455void EncodeDouble(double value, std::vector<uint8_t>* out) { 456 EncodeDoubleTmpl(value, out); 457} 458void EncodeDouble(double value, std::string* out) { 459 EncodeDoubleTmpl(value, out); 460} 461 462// ============================================================================= 463// cbor::EnvelopeEncoder - for wrapping submessages 464// ============================================================================= 465 466template <typename C> 467void EncodeStartTmpl(C* out, size_t* byte_size_pos) { 468 assert(*byte_size_pos == 0); 469 out->push_back(kInitialByteForEnvelope); 470 out->push_back(kInitialByteFor32BitLengthByteString); 471 *byte_size_pos = out->size(); 472 out->resize(out->size() + sizeof(uint32_t)); 473} 474 475void EnvelopeEncoder::EncodeStart(std::vector<uint8_t>* out) { 476 EncodeStartTmpl<std::vector<uint8_t>>(out, &byte_size_pos_); 477} 478 479void EnvelopeEncoder::EncodeStart(std::string* out) { 480 EncodeStartTmpl<std::string>(out, &byte_size_pos_); 481} 482 483template <typename C> 484bool EncodeStopTmpl(C* out, size_t* byte_size_pos) { 485 assert(*byte_size_pos != 0); 486 // The byte size is the size of the payload, that is, all the 487 // bytes that were written past the byte size position itself. 488 uint64_t byte_size = out->size() - (*byte_size_pos + sizeof(uint32_t)); 489 // We store exactly 4 bytes, so at most INT32MAX, with most significant 490 // byte first. 491 if (byte_size > std::numeric_limits<uint32_t>::max()) 492 return false; 493 for (int shift_bytes = sizeof(uint32_t) - 1; shift_bytes >= 0; 494 --shift_bytes) { 495 (*out)[(*byte_size_pos)++] = 0xff & (byte_size >> (shift_bytes * 8)); 496 } 497 return true; 498} 499 500bool EnvelopeEncoder::EncodeStop(std::vector<uint8_t>* out) { 501 return EncodeStopTmpl(out, &byte_size_pos_); 502} 503 504bool EnvelopeEncoder::EncodeStop(std::string* out) { 505 return EncodeStopTmpl(out, &byte_size_pos_); 506} 507 508// ============================================================================= 509// cbor::NewCBOREncoder - for encoding from a streaming parser 510// ============================================================================= 511 512namespace { 513template <typename C> 514class CBOREncoder : public StreamingParserHandler { 515 public: 516 CBOREncoder(C* out, Status* status) : out_(out), status_(status) { 517 *status_ = Status(); 518 } 519 520 void HandleMapBegin() override { 521 if (!status_->ok()) 522 return; 523 envelopes_.emplace_back(); 524 envelopes_.back().EncodeStart(out_); 525 out_->push_back(kInitialByteIndefiniteLengthMap); 526 } 527 528 void HandleMapEnd() override { 529 if (!status_->ok()) 530 return; 531 out_->push_back(kStopByte); 532 assert(!envelopes_.empty()); 533 if (!envelopes_.back().EncodeStop(out_)) { 534 HandleError( 535 Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size())); 536 return; 537 } 538 envelopes_.pop_back(); 539 } 540 541 void HandleArrayBegin() override { 542 if (!status_->ok()) 543 return; 544 envelopes_.emplace_back(); 545 envelopes_.back().EncodeStart(out_); 546 out_->push_back(kInitialByteIndefiniteLengthArray); 547 } 548 549 void HandleArrayEnd() override { 550 if (!status_->ok()) 551 return; 552 out_->push_back(kStopByte); 553 assert(!envelopes_.empty()); 554 if (!envelopes_.back().EncodeStop(out_)) { 555 HandleError( 556 Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size())); 557 return; 558 } 559 envelopes_.pop_back(); 560 } 561 562 void HandleString8(span<uint8_t> chars) override { 563 if (!status_->ok()) 564 return; 565 EncodeString8(chars, out_); 566 } 567 568 void HandleString16(span<uint16_t> chars) override { 569 if (!status_->ok()) 570 return; 571 EncodeFromUTF16(chars, out_); 572 } 573 574 void HandleBinary(span<uint8_t> bytes) override { 575 if (!status_->ok()) 576 return; 577 EncodeBinary(bytes, out_); 578 } 579 580 void HandleDouble(double value) override { 581 if (!status_->ok()) 582 return; 583 EncodeDouble(value, out_); 584 } 585 586 void HandleInt32(int32_t value) override { 587 if (!status_->ok()) 588 return; 589 EncodeInt32(value, out_); 590 } 591 592 void HandleBool(bool value) override { 593 if (!status_->ok()) 594 return; 595 // See RFC 7049 Section 2.3, Table 2. 596 out_->push_back(value ? kEncodedTrue : kEncodedFalse); 597 } 598 599 void HandleNull() override { 600 if (!status_->ok()) 601 return; 602 // See RFC 7049 Section 2.3, Table 2. 603 out_->push_back(kEncodedNull); 604 } 605 606 void HandleError(Status error) override { 607 if (!status_->ok()) 608 return; 609 *status_ = error; 610 out_->clear(); 611 } 612 613 private: 614 C* out_; 615 std::vector<EnvelopeEncoder> envelopes_; 616 Status* status_; 617}; 618} // namespace 619 620std::unique_ptr<StreamingParserHandler> NewCBOREncoder( 621 std::vector<uint8_t>* out, 622 Status* status) { 623 return std::unique_ptr<StreamingParserHandler>( 624 new CBOREncoder<std::vector<uint8_t>>(out, status)); 625} 626std::unique_ptr<StreamingParserHandler> NewCBOREncoder(std::string* out, 627 Status* status) { 628 return std::unique_ptr<StreamingParserHandler>( 629 new CBOREncoder<std::string>(out, status)); 630} 631 632// ============================================================================= 633// cbor::CBORTokenizer - for parsing individual CBOR items 634// ============================================================================= 635 636CBORTokenizer::CBORTokenizer(span<uint8_t> bytes) : bytes_(bytes) { 637 ReadNextToken(/*enter_envelope=*/false); 638} 639CBORTokenizer::~CBORTokenizer() {} 640 641CBORTokenTag CBORTokenizer::TokenTag() const { 642 return token_tag_; 643} 644 645void CBORTokenizer::Next() { 646 if (token_tag_ == CBORTokenTag::ERROR_VALUE || 647 token_tag_ == CBORTokenTag::DONE) 648 return; 649 ReadNextToken(/*enter_envelope=*/false); 650} 651 652void CBORTokenizer::EnterEnvelope() { 653 assert(token_tag_ == CBORTokenTag::ENVELOPE); 654 ReadNextToken(/*enter_envelope=*/true); 655} 656 657Status CBORTokenizer::Status() const { 658 return status_; 659} 660 661// The following accessor functions ::GetInt32, ::GetDouble, 662// ::GetString8, ::GetString16WireRep, ::GetBinary, ::GetEnvelopeContents 663// assume that a particular token was recognized in ::ReadNextToken. 664// That's where all the error checking is done. By design, 665// the accessors (assuming the token was recognized) never produce 666// an error. 667 668int32_t CBORTokenizer::GetInt32() const { 669 assert(token_tag_ == CBORTokenTag::INT32); 670 // The range checks happen in ::ReadNextToken(). 671 return static_cast<int32_t>( 672 token_start_type_ == MajorType::UNSIGNED 673 ? token_start_internal_value_ 674 : -static_cast<int64_t>(token_start_internal_value_) - 1); 675} 676 677double CBORTokenizer::GetDouble() const { 678 assert(token_tag_ == CBORTokenTag::DOUBLE); 679 union { 680 uint64_t from_uint64; 681 double to_double; 682 } reinterpret; 683 reinterpret.from_uint64 = ReadBytesMostSignificantByteFirst<uint64_t>( 684 bytes_.subspan(status_.pos + 1)); 685 return reinterpret.to_double; 686} 687 688span<uint8_t> CBORTokenizer::GetString8() const { 689 assert(token_tag_ == CBORTokenTag::STRING8); 690 auto length = static_cast<size_t>(token_start_internal_value_); 691 return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); 692} 693 694span<uint8_t> CBORTokenizer::GetString16WireRep() const { 695 assert(token_tag_ == CBORTokenTag::STRING16); 696 auto length = static_cast<size_t>(token_start_internal_value_); 697 return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); 698} 699 700span<uint8_t> CBORTokenizer::GetBinary() const { 701 assert(token_tag_ == CBORTokenTag::BINARY); 702 auto length = static_cast<size_t>(token_start_internal_value_); 703 return bytes_.subspan(status_.pos + (token_byte_length_ - length), length); 704} 705 706span<uint8_t> CBORTokenizer::GetEnvelopeContents() const { 707 assert(token_tag_ == CBORTokenTag::ENVELOPE); 708 auto length = static_cast<size_t>(token_start_internal_value_); 709 return bytes_.subspan(status_.pos + kEncodedEnvelopeHeaderSize, length); 710} 711 712// All error checking happens in ::ReadNextToken, so that the accessors 713// can avoid having to carry an error return value. 714// 715// With respect to checking the encoded lengths of strings, arrays, etc: 716// On the wire, CBOR uses 1,2,4, and 8 byte unsigned integers, so 717// we initially read them as uint64_t, usually into token_start_internal_value_. 718// 719// However, since these containers have a representation on the machine, 720// we need to do corresponding size computations on the input byte array, 721// output span (e.g. the payload for a string), etc., and size_t is 722// machine specific (in practice either 32 bit or 64 bit). 723// 724// Further, we must avoid overflowing size_t. Therefore, we use this 725// kMaxValidLength constant to: 726// - Reject values that are larger than the architecture specific 727// max size_t (differs between 32 bit and 64 bit arch). 728// - Reserve at least one bit so that we can check against overflows 729// when adding lengths (array / string length / etc.); we do this by 730// ensuring that the inputs to an addition are <= kMaxValidLength, 731// and then checking whether the sum went past it. 732// 733// See also 734// https://chromium.googlesource.com/chromium/src/+/master/docs/security/integer-semantics.md 735static const uint64_t kMaxValidLength = 736 std::min<uint64_t>(std::numeric_limits<uint64_t>::max() >> 2, 737 std::numeric_limits<size_t>::max()); 738 739void CBORTokenizer::ReadNextToken(bool enter_envelope) { 740 if (enter_envelope) { 741 status_.pos += kEncodedEnvelopeHeaderSize; 742 } else { 743 status_.pos = 744 status_.pos == Status::npos() ? 0 : status_.pos + token_byte_length_; 745 } 746 status_.error = Error::OK; 747 if (status_.pos >= bytes_.size()) { 748 token_tag_ = CBORTokenTag::DONE; 749 return; 750 } 751 const size_t remaining_bytes = bytes_.size() - status_.pos; 752 switch (bytes_[status_.pos]) { 753 case kStopByte: 754 SetToken(CBORTokenTag::STOP, 1); 755 return; 756 case kInitialByteIndefiniteLengthMap: 757 SetToken(CBORTokenTag::MAP_START, 1); 758 return; 759 case kInitialByteIndefiniteLengthArray: 760 SetToken(CBORTokenTag::ARRAY_START, 1); 761 return; 762 case kEncodedTrue: 763 SetToken(CBORTokenTag::TRUE_VALUE, 1); 764 return; 765 case kEncodedFalse: 766 SetToken(CBORTokenTag::FALSE_VALUE, 1); 767 return; 768 case kEncodedNull: 769 SetToken(CBORTokenTag::NULL_VALUE, 1); 770 return; 771 case kExpectedConversionToBase64Tag: { // BINARY 772 const size_t bytes_read = internals::ReadTokenStart( 773 bytes_.subspan(status_.pos + 1), &token_start_type_, 774 &token_start_internal_value_); 775 if (!bytes_read || token_start_type_ != MajorType::BYTE_STRING || 776 token_start_internal_value_ > kMaxValidLength) { 777 SetError(Error::CBOR_INVALID_BINARY); 778 return; 779 } 780 const uint64_t token_byte_length = token_start_internal_value_ + 781 /* tag before token start: */ 1 + 782 /* token start: */ bytes_read; 783 if (token_byte_length > remaining_bytes) { 784 SetError(Error::CBOR_INVALID_BINARY); 785 return; 786 } 787 SetToken(CBORTokenTag::BINARY, static_cast<size_t>(token_byte_length)); 788 return; 789 } 790 case kInitialByteForDouble: { // DOUBLE 791 if (kEncodedDoubleSize > remaining_bytes) { 792 SetError(Error::CBOR_INVALID_DOUBLE); 793 return; 794 } 795 SetToken(CBORTokenTag::DOUBLE, kEncodedDoubleSize); 796 return; 797 } 798 case kInitialByteForEnvelope: { // ENVELOPE 799 if (kEncodedEnvelopeHeaderSize > remaining_bytes) { 800 SetError(Error::CBOR_INVALID_ENVELOPE); 801 return; 802 } 803 // The envelope must be a byte string with 32 bit length. 804 if (bytes_[status_.pos + 1] != kInitialByteFor32BitLengthByteString) { 805 SetError(Error::CBOR_INVALID_ENVELOPE); 806 return; 807 } 808 // Read the length of the byte string. 809 token_start_internal_value_ = ReadBytesMostSignificantByteFirst<uint32_t>( 810 bytes_.subspan(status_.pos + 2)); 811 if (token_start_internal_value_ > kMaxValidLength) { 812 SetError(Error::CBOR_INVALID_ENVELOPE); 813 return; 814 } 815 uint64_t token_byte_length = 816 token_start_internal_value_ + kEncodedEnvelopeHeaderSize; 817 if (token_byte_length > remaining_bytes) { 818 SetError(Error::CBOR_INVALID_ENVELOPE); 819 return; 820 } 821 SetToken(CBORTokenTag::ENVELOPE, static_cast<size_t>(token_byte_length)); 822 return; 823 } 824 default: { 825 const size_t bytes_read = internals::ReadTokenStart( 826 bytes_.subspan(status_.pos), &token_start_type_, 827 &token_start_internal_value_); 828 switch (token_start_type_) { 829 case MajorType::UNSIGNED: // INT32. 830 // INT32 is a signed int32 (int32 makes sense for the 831 // inspector_protocol, it's not a CBOR limitation), so we check 832 // against the signed max, so that the allowable values are 833 // 0, 1, 2, ... 2^31 - 1. 834 if (!bytes_read || 835 static_cast<int64_t>(std::numeric_limits<int32_t>::max()) < 836 static_cast<int64_t>(token_start_internal_value_)) { 837 SetError(Error::CBOR_INVALID_INT32); 838 return; 839 } 840 SetToken(CBORTokenTag::INT32, bytes_read); 841 return; 842 case MajorType::NEGATIVE: { // INT32. 843 // INT32 is a signed int32 (int32 makes sense for the 844 // inspector_protocol, it's not a CBOR limitation); in CBOR, the 845 // negative values for INT32 are represented as NEGATIVE, that is, -1 846 // INT32 is represented as 1 << 5 | 0 (major type 1, additional info 847 // value 0). 848 // The represented allowed values range is -1 to -2^31. 849 // They are mapped into the encoded range of 0 to 2^31-1. 850 // We check the the payload in token_start_internal_value_ against 851 // that range (2^31-1 is also known as 852 // std::numeric_limits<int32_t>::max()). 853 if (!bytes_read || 854 static_cast<int64_t>(token_start_internal_value_) > 855 static_cast<int64_t>(std::numeric_limits<int32_t>::max())) { 856 SetError(Error::CBOR_INVALID_INT32); 857 return; 858 } 859 SetToken(CBORTokenTag::INT32, bytes_read); 860 return; 861 } 862 case MajorType::STRING: { // STRING8. 863 if (!bytes_read || token_start_internal_value_ > kMaxValidLength) { 864 SetError(Error::CBOR_INVALID_STRING8); 865 return; 866 } 867 uint64_t token_byte_length = token_start_internal_value_ + bytes_read; 868 if (token_byte_length > remaining_bytes) { 869 SetError(Error::CBOR_INVALID_STRING8); 870 return; 871 } 872 SetToken(CBORTokenTag::STRING8, 873 static_cast<size_t>(token_byte_length)); 874 return; 875 } 876 case MajorType::BYTE_STRING: { // STRING16. 877 // Length must be divisible by 2 since UTF16 is 2 bytes per 878 // character, hence the &1 check. 879 if (!bytes_read || token_start_internal_value_ > kMaxValidLength || 880 token_start_internal_value_ & 1) { 881 SetError(Error::CBOR_INVALID_STRING16); 882 return; 883 } 884 uint64_t token_byte_length = token_start_internal_value_ + bytes_read; 885 if (token_byte_length > remaining_bytes) { 886 SetError(Error::CBOR_INVALID_STRING16); 887 return; 888 } 889 SetToken(CBORTokenTag::STRING16, 890 static_cast<size_t>(token_byte_length)); 891 return; 892 } 893 case MajorType::ARRAY: 894 case MajorType::MAP: 895 case MajorType::TAG: 896 case MajorType::SIMPLE_VALUE: 897 SetError(Error::CBOR_UNSUPPORTED_VALUE); 898 return; 899 } 900 } 901 } 902} 903 904void CBORTokenizer::SetToken(CBORTokenTag token_tag, size_t token_byte_length) { 905 token_tag_ = token_tag; 906 token_byte_length_ = token_byte_length; 907} 908 909void CBORTokenizer::SetError(Error error) { 910 token_tag_ = CBORTokenTag::ERROR_VALUE; 911 status_.error = error; 912} 913 914// ============================================================================= 915// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages 916// ============================================================================= 917 918namespace { 919// When parsing CBOR, we limit recursion depth for objects and arrays 920// to this constant. 921static constexpr int kStackLimit = 300; 922 923// Below are three parsing routines for CBOR, which cover enough 924// to roundtrip JSON messages. 925bool ParseMap(int32_t stack_depth, 926 CBORTokenizer* tokenizer, 927 StreamingParserHandler* out); 928bool ParseArray(int32_t stack_depth, 929 CBORTokenizer* tokenizer, 930 StreamingParserHandler* out); 931bool ParseValue(int32_t stack_depth, 932 CBORTokenizer* tokenizer, 933 StreamingParserHandler* out); 934 935void ParseUTF16String(CBORTokenizer* tokenizer, StreamingParserHandler* out) { 936 std::vector<uint16_t> value; 937 span<uint8_t> rep = tokenizer->GetString16WireRep(); 938 for (size_t ii = 0; ii < rep.size(); ii += 2) 939 value.push_back((rep[ii + 1] << 8) | rep[ii]); 940 out->HandleString16(span<uint16_t>(value.data(), value.size())); 941 tokenizer->Next(); 942} 943 944bool ParseUTF8String(CBORTokenizer* tokenizer, StreamingParserHandler* out) { 945 assert(tokenizer->TokenTag() == CBORTokenTag::STRING8); 946 out->HandleString8(tokenizer->GetString8()); 947 tokenizer->Next(); 948 return true; 949} 950 951bool ParseValue(int32_t stack_depth, 952 CBORTokenizer* tokenizer, 953 StreamingParserHandler* out) { 954 if (stack_depth > kStackLimit) { 955 out->HandleError( 956 Status{Error::CBOR_STACK_LIMIT_EXCEEDED, tokenizer->Status().pos}); 957 return false; 958 } 959 // Skip past the envelope to get to what's inside. 960 if (tokenizer->TokenTag() == CBORTokenTag::ENVELOPE) 961 tokenizer->EnterEnvelope(); 962 switch (tokenizer->TokenTag()) { 963 case CBORTokenTag::ERROR_VALUE: 964 out->HandleError(tokenizer->Status()); 965 return false; 966 case CBORTokenTag::DONE: 967 out->HandleError(Status{Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE, 968 tokenizer->Status().pos}); 969 return false; 970 case CBORTokenTag::TRUE_VALUE: 971 out->HandleBool(true); 972 tokenizer->Next(); 973 return true; 974 case CBORTokenTag::FALSE_VALUE: 975 out->HandleBool(false); 976 tokenizer->Next(); 977 return true; 978 case CBORTokenTag::NULL_VALUE: 979 out->HandleNull(); 980 tokenizer->Next(); 981 return true; 982 case CBORTokenTag::INT32: 983 out->HandleInt32(tokenizer->GetInt32()); 984 tokenizer->Next(); 985 return true; 986 case CBORTokenTag::DOUBLE: 987 out->HandleDouble(tokenizer->GetDouble()); 988 tokenizer->Next(); 989 return true; 990 case CBORTokenTag::STRING8: 991 return ParseUTF8String(tokenizer, out); 992 case CBORTokenTag::STRING16: 993 ParseUTF16String(tokenizer, out); 994 return true; 995 case CBORTokenTag::BINARY: { 996 out->HandleBinary(tokenizer->GetBinary()); 997 tokenizer->Next(); 998 return true; 999 } 1000 case CBORTokenTag::MAP_START: 1001 return ParseMap(stack_depth + 1, tokenizer, out); 1002 case CBORTokenTag::ARRAY_START: 1003 return ParseArray(stack_depth + 1, tokenizer, out); 1004 default: 1005 out->HandleError( 1006 Status{Error::CBOR_UNSUPPORTED_VALUE, tokenizer->Status().pos}); 1007 return false; 1008 } 1009} 1010 1011// |bytes| must start with the indefinite length array byte, so basically, 1012// ParseArray may only be called after an indefinite length array has been 1013// detected. 1014bool ParseArray(int32_t stack_depth, 1015 CBORTokenizer* tokenizer, 1016 StreamingParserHandler* out) { 1017 assert(tokenizer->TokenTag() == CBORTokenTag::ARRAY_START); 1018 tokenizer->Next(); 1019 out->HandleArrayBegin(); 1020 while (tokenizer->TokenTag() != CBORTokenTag::STOP) { 1021 if (tokenizer->TokenTag() == CBORTokenTag::DONE) { 1022 out->HandleError( 1023 Status{Error::CBOR_UNEXPECTED_EOF_IN_ARRAY, tokenizer->Status().pos}); 1024 return false; 1025 } 1026 if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) { 1027 out->HandleError(tokenizer->Status()); 1028 return false; 1029 } 1030 // Parse value. 1031 if (!ParseValue(stack_depth, tokenizer, out)) 1032 return false; 1033 } 1034 out->HandleArrayEnd(); 1035 tokenizer->Next(); 1036 return true; 1037} 1038 1039// |bytes| must start with the indefinite length array byte, so basically, 1040// ParseArray may only be called after an indefinite length array has been 1041// detected. 1042bool ParseMap(int32_t stack_depth, 1043 CBORTokenizer* tokenizer, 1044 StreamingParserHandler* out) { 1045 assert(tokenizer->TokenTag() == CBORTokenTag::MAP_START); 1046 out->HandleMapBegin(); 1047 tokenizer->Next(); 1048 while (tokenizer->TokenTag() != CBORTokenTag::STOP) { 1049 if (tokenizer->TokenTag() == CBORTokenTag::DONE) { 1050 out->HandleError( 1051 Status{Error::CBOR_UNEXPECTED_EOF_IN_MAP, tokenizer->Status().pos}); 1052 return false; 1053 } 1054 if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) { 1055 out->HandleError(tokenizer->Status()); 1056 return false; 1057 } 1058 // Parse key. 1059 if (tokenizer->TokenTag() == CBORTokenTag::STRING8) { 1060 if (!ParseUTF8String(tokenizer, out)) 1061 return false; 1062 } else if (tokenizer->TokenTag() == CBORTokenTag::STRING16) { 1063 ParseUTF16String(tokenizer, out); 1064 } else { 1065 out->HandleError( 1066 Status{Error::CBOR_INVALID_MAP_KEY, tokenizer->Status().pos}); 1067 return false; 1068 } 1069 // Parse value. 1070 if (!ParseValue(stack_depth, tokenizer, out)) 1071 return false; 1072 } 1073 out->HandleMapEnd(); 1074 tokenizer->Next(); 1075 return true; 1076} 1077} // namespace 1078 1079void ParseCBOR(span<uint8_t> bytes, StreamingParserHandler* out) { 1080 if (bytes.empty()) { 1081 out->HandleError(Status{Error::CBOR_NO_INPUT, 0}); 1082 return; 1083 } 1084 if (bytes[0] != kInitialByteForEnvelope) { 1085 out->HandleError(Status{Error::CBOR_INVALID_START_BYTE, 0}); 1086 return; 1087 } 1088 CBORTokenizer tokenizer(bytes); 1089 if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) { 1090 out->HandleError(tokenizer.Status()); 1091 return; 1092 } 1093 // We checked for the envelope start byte above, so the tokenizer 1094 // must agree here, since it's not an error. 1095 assert(tokenizer.TokenTag() == CBORTokenTag::ENVELOPE); 1096 tokenizer.EnterEnvelope(); 1097 if (tokenizer.TokenTag() != CBORTokenTag::MAP_START) { 1098 out->HandleError( 1099 Status{Error::CBOR_MAP_START_EXPECTED, tokenizer.Status().pos}); 1100 return; 1101 } 1102 if (!ParseMap(/*stack_depth=*/1, &tokenizer, out)) 1103 return; 1104 if (tokenizer.TokenTag() == CBORTokenTag::DONE) 1105 return; 1106 if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) { 1107 out->HandleError(tokenizer.Status()); 1108 return; 1109 } 1110 out->HandleError(Status{Error::CBOR_TRAILING_JUNK, tokenizer.Status().pos}); 1111} 1112 1113// ============================================================================= 1114// cbor::AppendString8EntryToMap - for limited in-place editing of messages 1115// ============================================================================= 1116 1117template <typename C> 1118Status AppendString8EntryToCBORMapTmpl(span<uint8_t> string8_key, 1119 span<uint8_t> string8_value, 1120 C* cbor) { 1121 // Careful below: Don't compare (*cbor)[idx] with a uint8_t, since 1122 // it could be a char (signed!). Instead, use bytes. 1123 span<uint8_t> bytes(reinterpret_cast<const uint8_t*>(cbor->data()), 1124 cbor->size()); 1125 CBORTokenizer tokenizer(bytes); 1126 if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) 1127 return tokenizer.Status(); 1128 if (tokenizer.TokenTag() != CBORTokenTag::ENVELOPE) 1129 return Status(Error::CBOR_INVALID_ENVELOPE, 0); 1130 size_t envelope_size = tokenizer.GetEnvelopeContents().size(); 1131 size_t old_size = cbor->size(); 1132 if (old_size != envelope_size + kEncodedEnvelopeHeaderSize) 1133 return Status(Error::CBOR_INVALID_ENVELOPE, 0); 1134 if (envelope_size == 0 || 1135 (tokenizer.GetEnvelopeContents()[0] != EncodeIndefiniteLengthMapStart())) 1136 return Status(Error::CBOR_MAP_START_EXPECTED, kEncodedEnvelopeHeaderSize); 1137 if (bytes[bytes.size() - 1] != EncodeStop()) 1138 return Status(Error::CBOR_MAP_STOP_EXPECTED, cbor->size() - 1); 1139 cbor->pop_back(); 1140 EncodeString8(string8_key, cbor); 1141 EncodeString8(string8_value, cbor); 1142 cbor->push_back(EncodeStop()); 1143 size_t new_envelope_size = envelope_size + (cbor->size() - old_size); 1144 if (new_envelope_size > std::numeric_limits<uint32_t>::max()) 1145 return Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, 0); 1146 size_t size_pos = cbor->size() - new_envelope_size - sizeof(uint32_t); 1147 uint8_t* out = reinterpret_cast<uint8_t*>(&cbor->at(size_pos)); 1148 *(out++) = (new_envelope_size >> 24) & 0xff; 1149 *(out++) = (new_envelope_size >> 16) & 0xff; 1150 *(out++) = (new_envelope_size >> 8) & 0xff; 1151 *(out) = new_envelope_size & 0xff; 1152 return Status(); 1153} 1154Status AppendString8EntryToCBORMap(span<uint8_t> string8_key, 1155 span<uint8_t> string8_value, 1156 std::vector<uint8_t>* cbor) { 1157 return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor); 1158} 1159Status AppendString8EntryToCBORMap(span<uint8_t> string8_key, 1160 span<uint8_t> string8_value, 1161 std::string* cbor) { 1162 return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor); 1163} 1164} // namespace cbor 1165 1166namespace json { 1167 1168// ============================================================================= 1169// json::NewJSONEncoder - for encoding streaming parser events as JSON 1170// ============================================================================= 1171 1172namespace { 1173// Prints |value| to |out| with 4 hex digits, most significant chunk first. 1174template <typename C> 1175void PrintHex(uint16_t value, C* out) { 1176 for (int ii = 3; ii >= 0; --ii) { 1177 int four_bits = 0xf & (value >> (4 * ii)); 1178 out->push_back(four_bits + ((four_bits <= 9) ? '0' : ('a' - 10))); 1179 } 1180} 1181 1182// In the writer below, we maintain a stack of State instances. 1183// It is just enough to emit the appropriate delimiters and brackets 1184// in JSON. 1185enum class Container { 1186 // Used for the top-level, initial state. 1187 NONE, 1188 // Inside a JSON object. 1189 MAP, 1190 // Inside a JSON array. 1191 ARRAY 1192}; 1193class State { 1194 public: 1195 explicit State(Container container) : container_(container) {} 1196 void StartElement(std::vector<uint8_t>* out) { StartElementTmpl(out); } 1197 void StartElement(std::string* out) { StartElementTmpl(out); } 1198 Container container() const { return container_; } 1199 1200 private: 1201 template <typename C> 1202 void StartElementTmpl(C* out) { 1203 assert(container_ != Container::NONE || size_ == 0); 1204 if (size_ != 0) { 1205 char delim = (!(size_ & 1) || container_ == Container::ARRAY) ? ',' : ':'; 1206 out->push_back(delim); 1207 } 1208 ++size_; 1209 } 1210 1211 Container container_ = Container::NONE; 1212 int size_ = 0; 1213}; 1214 1215constexpr char kBase64Table[] = 1216 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 1217 "abcdefghijklmnopqrstuvwxyz0123456789+/"; 1218 1219template <typename C> 1220void Base64Encode(const span<uint8_t>& in, C* out) { 1221 // The following three cases are based on the tables in the example 1222 // section in https://en.wikipedia.org/wiki/Base64. We process three 1223 // input bytes at a time, emitting 4 output bytes at a time. 1224 size_t ii = 0; 1225 1226 // While possible, process three input bytes. 1227 for (; ii + 3 <= in.size(); ii += 3) { 1228 uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8) | in[ii + 2]; 1229 out->push_back(kBase64Table[(twentyfour_bits >> 18)]); 1230 out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]); 1231 out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]); 1232 out->push_back(kBase64Table[twentyfour_bits & 0x3f]); 1233 } 1234 if (ii + 2 <= in.size()) { // Process two input bytes. 1235 uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8); 1236 out->push_back(kBase64Table[(twentyfour_bits >> 18)]); 1237 out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]); 1238 out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]); 1239 out->push_back('='); // Emit padding. 1240 return; 1241 } 1242 if (ii + 1 <= in.size()) { // Process a single input byte. 1243 uint32_t twentyfour_bits = (in[ii] << 16); 1244 out->push_back(kBase64Table[(twentyfour_bits >> 18)]); 1245 out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]); 1246 out->push_back('='); // Emit padding. 1247 out->push_back('='); // Emit padding. 1248 } 1249} 1250 1251// Implements a handler for JSON parser events to emit a JSON string. 1252template <typename C> 1253class JSONEncoder : public StreamingParserHandler { 1254 public: 1255 JSONEncoder(const Platform* platform, C* out, Status* status) 1256 : platform_(platform), out_(out), status_(status) { 1257 *status_ = Status(); 1258 state_.emplace(Container::NONE); 1259 } 1260 1261 void HandleMapBegin() override { 1262 if (!status_->ok()) 1263 return; 1264 assert(!state_.empty()); 1265 state_.top().StartElement(out_); 1266 state_.emplace(Container::MAP); 1267 Emit('{'); 1268 } 1269 1270 void HandleMapEnd() override { 1271 if (!status_->ok()) 1272 return; 1273 assert(state_.size() >= 2 && state_.top().container() == Container::MAP); 1274 state_.pop(); 1275 Emit('}'); 1276 } 1277 1278 void HandleArrayBegin() override { 1279 if (!status_->ok()) 1280 return; 1281 state_.top().StartElement(out_); 1282 state_.emplace(Container::ARRAY); 1283 Emit('['); 1284 } 1285 1286 void HandleArrayEnd() override { 1287 if (!status_->ok()) 1288 return; 1289 assert(state_.size() >= 2 && state_.top().container() == Container::ARRAY); 1290 state_.pop(); 1291 Emit(']'); 1292 } 1293 1294 void HandleString16(span<uint16_t> chars) override { 1295 if (!status_->ok()) 1296 return; 1297 state_.top().StartElement(out_); 1298 Emit('"'); 1299 for (const uint16_t ch : chars) { 1300 if (ch == '"') { 1301 Emit("\\\""); 1302 } else if (ch == '\\') { 1303 Emit("\\\\"); 1304 } else if (ch == '\b') { 1305 Emit("\\b"); 1306 } else if (ch == '\f') { 1307 Emit("\\f"); 1308 } else if (ch == '\n') { 1309 Emit("\\n"); 1310 } else if (ch == '\r') { 1311 Emit("\\r"); 1312 } else if (ch == '\t') { 1313 Emit("\\t"); 1314 } else if (ch >= 32 && ch <= 126) { 1315 Emit(ch); 1316 } else { 1317 Emit("\\u"); 1318 PrintHex(ch, out_); 1319 } 1320 } 1321 Emit('"'); 1322 } 1323 1324 void HandleString8(span<uint8_t> chars) override { 1325 if (!status_->ok()) 1326 return; 1327 state_.top().StartElement(out_); 1328 Emit('"'); 1329 for (size_t ii = 0; ii < chars.size(); ++ii) { 1330 uint8_t c = chars[ii]; 1331 if (c == '"') { 1332 Emit("\\\""); 1333 } else if (c == '\\') { 1334 Emit("\\\\"); 1335 } else if (c == '\b') { 1336 Emit("\\b"); 1337 } else if (c == '\f') { 1338 Emit("\\f"); 1339 } else if (c == '\n') { 1340 Emit("\\n"); 1341 } else if (c == '\r') { 1342 Emit("\\r"); 1343 } else if (c == '\t') { 1344 Emit("\\t"); 1345 } else if (c >= 32 && c <= 126) { 1346 Emit(c); 1347 } else if (c < 32) { 1348 Emit("\\u"); 1349 PrintHex(static_cast<uint16_t>(c), out_); 1350 } else { 1351 // Inspect the leading byte to figure out how long the utf8 1352 // byte sequence is; while doing this initialize |codepoint| 1353 // with the first few bits. 1354 // See table in: https://en.wikipedia.org/wiki/UTF-8 1355 // byte one is 110x xxxx -> 2 byte utf8 sequence 1356 // byte one is 1110 xxxx -> 3 byte utf8 sequence 1357 // byte one is 1111 0xxx -> 4 byte utf8 sequence 1358 uint32_t codepoint; 1359 int num_bytes_left; 1360 if ((c & 0xe0) == 0xc0) { // 2 byte utf8 sequence 1361 num_bytes_left = 1; 1362 codepoint = c & 0x1f; 1363 } else if ((c & 0xf0) == 0xe0) { // 3 byte utf8 sequence 1364 num_bytes_left = 2; 1365 codepoint = c & 0x0f; 1366 } else if ((c & 0xf8) == 0xf0) { // 4 byte utf8 sequence 1367 codepoint = c & 0x07; 1368 num_bytes_left = 3; 1369 } else { 1370 continue; // invalid leading byte 1371 } 1372 1373 // If we have enough bytes in our input, decode the remaining ones 1374 // belonging to this Unicode character into |codepoint|. 1375 if (ii + num_bytes_left > chars.size()) 1376 continue; 1377 while (num_bytes_left > 0) { 1378 c = chars[++ii]; 1379 --num_bytes_left; 1380 // Check the next byte is a continuation byte, that is 10xx xxxx. 1381 if ((c & 0xc0) != 0x80) 1382 continue; 1383 codepoint = (codepoint << 6) | (c & 0x3f); 1384 } 1385 1386 // Disallow overlong encodings for ascii characters, as these 1387 // would include " and other characters significant to JSON 1388 // string termination / control. 1389 if (codepoint < 0x7f) 1390 continue; 1391 // Invalid in UTF8, and can't be represented in UTF16 anyway. 1392 if (codepoint > 0x10ffff) 1393 continue; 1394 1395 // So, now we transcode to UTF16, 1396 // using the math described at https://en.wikipedia.org/wiki/UTF-16, 1397 // for either one or two 16 bit characters. 1398 if (codepoint < 0xffff) { 1399 Emit("\\u"); 1400 PrintHex(static_cast<uint16_t>(codepoint), out_); 1401 continue; 1402 } 1403 codepoint -= 0x10000; 1404 // high surrogate 1405 Emit("\\u"); 1406 PrintHex(static_cast<uint16_t>((codepoint >> 10) + 0xd800), out_); 1407 // low surrogate 1408 Emit("\\u"); 1409 PrintHex(static_cast<uint16_t>((codepoint & 0x3ff) + 0xdc00), out_); 1410 } 1411 } 1412 Emit('"'); 1413 } 1414 1415 void HandleBinary(span<uint8_t> bytes) override { 1416 if (!status_->ok()) 1417 return; 1418 state_.top().StartElement(out_); 1419 Emit('"'); 1420 Base64Encode(bytes, out_); 1421 Emit('"'); 1422 } 1423 1424 void HandleDouble(double value) override { 1425 if (!status_->ok()) 1426 return; 1427 state_.top().StartElement(out_); 1428 // JSON cannot represent NaN or Infinity. So, for compatibility, 1429 // we behave like the JSON object in web browsers: emit 'null'. 1430 if (!std::isfinite(value)) { 1431 Emit("null"); 1432 return; 1433 } 1434 std::unique_ptr<char[]> str_value = platform_->DToStr(value); 1435 1436 // DToStr may fail to emit a 0 before the decimal dot. E.g. this is 1437 // the case in base::NumberToString in Chromium (which is based on 1438 // dmg_fp). So, much like 1439 // https://cs.chromium.org/chromium/src/base/json/json_writer.cc 1440 // we probe for this and emit the leading 0 anyway if necessary. 1441 const char* chars = str_value.get(); 1442 if (chars[0] == '.') { 1443 Emit('0'); 1444 } else if (chars[0] == '-' && chars[1] == '.') { 1445 Emit("-0"); 1446 ++chars; 1447 } 1448 Emit(chars); 1449 } 1450 1451 void HandleInt32(int32_t value) override { 1452 if (!status_->ok()) 1453 return; 1454 state_.top().StartElement(out_); 1455 Emit(std::to_string(value)); 1456 } 1457 1458 void HandleBool(bool value) override { 1459 if (!status_->ok()) 1460 return; 1461 state_.top().StartElement(out_); 1462 Emit(value ? "true" : "false"); 1463 } 1464 1465 void HandleNull() override { 1466 if (!status_->ok()) 1467 return; 1468 state_.top().StartElement(out_); 1469 Emit("null"); 1470 } 1471 1472 void HandleError(Status error) override { 1473 assert(!error.ok()); 1474 *status_ = error; 1475 out_->clear(); 1476 } 1477 1478 private: 1479 void Emit(char c) { out_->push_back(c); } 1480 void Emit(const char* str) { 1481 out_->insert(out_->end(), str, str + strlen(str)); 1482 } 1483 void Emit(const std::string& str) { 1484 out_->insert(out_->end(), str.begin(), str.end()); 1485 } 1486 1487 const Platform* platform_; 1488 C* out_; 1489 Status* status_; 1490 std::stack<State> state_; 1491}; 1492} // namespace 1493 1494std::unique_ptr<StreamingParserHandler> NewJSONEncoder( 1495 const Platform* platform, 1496 std::vector<uint8_t>* out, 1497 Status* status) { 1498 return std::unique_ptr<StreamingParserHandler>( 1499 new JSONEncoder<std::vector<uint8_t>>(platform, out, status)); 1500} 1501std::unique_ptr<StreamingParserHandler> NewJSONEncoder(const Platform* platform, 1502 std::string* out, 1503 Status* status) { 1504 return std::unique_ptr<StreamingParserHandler>( 1505 new JSONEncoder<std::string>(platform, out, status)); 1506} 1507 1508// ============================================================================= 1509// json::ParseJSON - for receiving streaming parser events for JSON. 1510// ============================================================================= 1511 1512namespace { 1513const int kStackLimit = 300; 1514 1515enum Token { 1516 ObjectBegin, 1517 ObjectEnd, 1518 ArrayBegin, 1519 ArrayEnd, 1520 StringLiteral, 1521 Number, 1522 BoolTrue, 1523 BoolFalse, 1524 NullToken, 1525 ListSeparator, 1526 ObjectPairSeparator, 1527 InvalidToken, 1528 NoInput 1529}; 1530 1531const char* const kNullString = "null"; 1532const char* const kTrueString = "true"; 1533const char* const kFalseString = "false"; 1534 1535template <typename Char> 1536class JsonParser { 1537 public: 1538 JsonParser(const Platform* platform, StreamingParserHandler* handler) 1539 : platform_(platform), handler_(handler) {} 1540 1541 void Parse(const Char* start, size_t length) { 1542 start_pos_ = start; 1543 const Char* end = start + length; 1544 const Char* tokenEnd = nullptr; 1545 ParseValue(start, end, &tokenEnd, 0); 1546 if (error_) 1547 return; 1548 if (tokenEnd != end) { 1549 HandleError(Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS, tokenEnd); 1550 } 1551 } 1552 1553 private: 1554 bool CharsToDouble(const uint16_t* chars, size_t length, double* result) { 1555 std::string buffer; 1556 buffer.reserve(length + 1); 1557 for (size_t ii = 0; ii < length; ++ii) { 1558 bool is_ascii = !(chars[ii] & ~0x7F); 1559 if (!is_ascii) 1560 return false; 1561 buffer.push_back(static_cast<char>(chars[ii])); 1562 } 1563 return platform_->StrToD(buffer.c_str(), result); 1564 } 1565 1566 bool CharsToDouble(const uint8_t* chars, size_t length, double* result) { 1567 std::string buffer(reinterpret_cast<const char*>(chars), length); 1568 return platform_->StrToD(buffer.c_str(), result); 1569 } 1570 1571 static bool ParseConstToken(const Char* start, 1572 const Char* end, 1573 const Char** token_end, 1574 const char* token) { 1575 // |token| is \0 terminated, it's one of the constants at top of the file. 1576 while (start < end && *token != '\0' && *start++ == *token++) { 1577 } 1578 if (*token != '\0') 1579 return false; 1580 *token_end = start; 1581 return true; 1582 } 1583 1584 static bool ReadInt(const Char* start, 1585 const Char* end, 1586 const Char** token_end, 1587 bool allow_leading_zeros) { 1588 if (start == end) 1589 return false; 1590 bool has_leading_zero = '0' == *start; 1591 int length = 0; 1592 while (start < end && '0' <= *start && *start <= '9') { 1593 ++start; 1594 ++length; 1595 } 1596 if (!length) 1597 return false; 1598 if (!allow_leading_zeros && length > 1 && has_leading_zero) 1599 return false; 1600 *token_end = start; 1601 return true; 1602 } 1603 1604 static bool ParseNumberToken(const Char* start, 1605 const Char* end, 1606 const Char** token_end) { 1607 // We just grab the number here. We validate the size in DecodeNumber. 1608 // According to RFC4627, a valid number is: [minus] int [frac] [exp] 1609 if (start == end) 1610 return false; 1611 Char c = *start; 1612 if ('-' == c) 1613 ++start; 1614 1615 if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/false)) 1616 return false; 1617 if (start == end) { 1618 *token_end = start; 1619 return true; 1620 } 1621 1622 // Optional fraction part 1623 c = *start; 1624 if ('.' == c) { 1625 ++start; 1626 if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true)) 1627 return false; 1628 if (start == end) { 1629 *token_end = start; 1630 return true; 1631 } 1632 c = *start; 1633 } 1634 1635 // Optional exponent part 1636 if ('e' == c || 'E' == c) { 1637 ++start; 1638 if (start == end) 1639 return false; 1640 c = *start; 1641 if ('-' == c || '+' == c) { 1642 ++start; 1643 if (start == end) 1644 return false; 1645 } 1646 if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true)) 1647 return false; 1648 } 1649 1650 *token_end = start; 1651 return true; 1652 } 1653 1654 static bool ReadHexDigits(const Char* start, 1655 const Char* end, 1656 const Char** token_end, 1657 int digits) { 1658 if (end - start < digits) 1659 return false; 1660 for (int i = 0; i < digits; ++i) { 1661 Char c = *start++; 1662 if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || 1663 ('A' <= c && c <= 'F'))) 1664 return false; 1665 } 1666 *token_end = start; 1667 return true; 1668 } 1669 1670 static bool ParseStringToken(const Char* start, 1671 const Char* end, 1672 const Char** token_end) { 1673 while (start < end) { 1674 Char c = *start++; 1675 if ('\\' == c) { 1676 if (start == end) 1677 return false; 1678 c = *start++; 1679 // Make sure the escaped char is valid. 1680 switch (c) { 1681 case 'x': 1682 if (!ReadHexDigits(start, end, &start, 2)) 1683 return false; 1684 break; 1685 case 'u': 1686 if (!ReadHexDigits(start, end, &start, 4)) 1687 return false; 1688 break; 1689 case '\\': 1690 case '/': 1691 case 'b': 1692 case 'f': 1693 case 'n': 1694 case 'r': 1695 case 't': 1696 case 'v': 1697 case '"': 1698 break; 1699 default: 1700 return false; 1701 } 1702 } else if ('"' == c) { 1703 *token_end = start; 1704 return true; 1705 } 1706 } 1707 return false; 1708 } 1709 1710 static bool SkipComment(const Char* start, 1711 const Char* end, 1712 const Char** comment_end) { 1713 if (start == end) 1714 return false; 1715 1716 if (*start != '/' || start + 1 >= end) 1717 return false; 1718 ++start; 1719 1720 if (*start == '/') { 1721 // Single line comment, read to newline. 1722 for (++start; start < end; ++start) { 1723 if (*start == '\n' || *start == '\r') { 1724 *comment_end = start + 1; 1725 return true; 1726 } 1727 } 1728 *comment_end = end; 1729 // Comment reaches end-of-input, which is fine. 1730 return true; 1731 } 1732 1733 if (*start == '*') { 1734 Char previous = '\0'; 1735 // Block comment, read until end marker. 1736 for (++start; start < end; previous = *start++) { 1737 if (previous == '*' && *start == '/') { 1738 *comment_end = start + 1; 1739 return true; 1740 } 1741 } 1742 // Block comment must close before end-of-input. 1743 return false; 1744 } 1745 1746 return false; 1747 } 1748 1749 static bool IsSpaceOrNewLine(Char c) { 1750 // \v = vertial tab; \f = form feed page break. 1751 return c == ' ' || c == '\n' || c == '\v' || c == '\f' || c == '\r' || 1752 c == '\t'; 1753 } 1754 1755 static void SkipWhitespaceAndComments(const Char* start, 1756 const Char* end, 1757 const Char** whitespace_end) { 1758 while (start < end) { 1759 if (IsSpaceOrNewLine(*start)) { 1760 ++start; 1761 } else if (*start == '/') { 1762 const Char* comment_end = nullptr; 1763 if (!SkipComment(start, end, &comment_end)) 1764 break; 1765 start = comment_end; 1766 } else { 1767 break; 1768 } 1769 } 1770 *whitespace_end = start; 1771 } 1772 1773 static Token ParseToken(const Char* start, 1774 const Char* end, 1775 const Char** tokenStart, 1776 const Char** token_end) { 1777 SkipWhitespaceAndComments(start, end, tokenStart); 1778 start = *tokenStart; 1779 1780 if (start == end) 1781 return NoInput; 1782 1783 switch (*start) { 1784 case 'n': 1785 if (ParseConstToken(start, end, token_end, kNullString)) 1786 return NullToken; 1787 break; 1788 case 't': 1789 if (ParseConstToken(start, end, token_end, kTrueString)) 1790 return BoolTrue; 1791 break; 1792 case 'f': 1793 if (ParseConstToken(start, end, token_end, kFalseString)) 1794 return BoolFalse; 1795 break; 1796 case '[': 1797 *token_end = start + 1; 1798 return ArrayBegin; 1799 case ']': 1800 *token_end = start + 1; 1801 return ArrayEnd; 1802 case ',': 1803 *token_end = start + 1; 1804 return ListSeparator; 1805 case '{': 1806 *token_end = start + 1; 1807 return ObjectBegin; 1808 case '}': 1809 *token_end = start + 1; 1810 return ObjectEnd; 1811 case ':': 1812 *token_end = start + 1; 1813 return ObjectPairSeparator; 1814 case '0': 1815 case '1': 1816 case '2': 1817 case '3': 1818 case '4': 1819 case '5': 1820 case '6': 1821 case '7': 1822 case '8': 1823 case '9': 1824 case '-': 1825 if (ParseNumberToken(start, end, token_end)) 1826 return Number; 1827 break; 1828 case '"': 1829 if (ParseStringToken(start + 1, end, token_end)) 1830 return StringLiteral; 1831 break; 1832 } 1833 return InvalidToken; 1834 } 1835 1836 static int HexToInt(Char c) { 1837 if ('0' <= c && c <= '9') 1838 return c - '0'; 1839 if ('A' <= c && c <= 'F') 1840 return c - 'A' + 10; 1841 if ('a' <= c && c <= 'f') 1842 return c - 'a' + 10; 1843 assert(false); // Unreachable. 1844 return 0; 1845 } 1846 1847 static bool DecodeString(const Char* start, 1848 const Char* end, 1849 std::vector<uint16_t>* output) { 1850 if (start == end) 1851 return true; 1852 if (start > end) 1853 return false; 1854 output->reserve(end - start); 1855 while (start < end) { 1856 uint16_t c = *start++; 1857 // If the |Char| we're dealing with is really a byte, then 1858 // we have utf8 here, and we need to check for multibyte characters 1859 // and transcode them to utf16 (either one or two utf16 chars). 1860 if (sizeof(Char) == sizeof(uint8_t) && c > 0x7f) { 1861 // Inspect the leading byte to figure out how long the utf8 1862 // byte sequence is; while doing this initialize |codepoint| 1863 // with the first few bits. 1864 // See table in: https://en.wikipedia.org/wiki/UTF-8 1865 // byte one is 110x xxxx -> 2 byte utf8 sequence 1866 // byte one is 1110 xxxx -> 3 byte utf8 sequence 1867 // byte one is 1111 0xxx -> 4 byte utf8 sequence 1868 uint32_t codepoint; 1869 int num_bytes_left; 1870 if ((c & 0xe0) == 0xc0) { // 2 byte utf8 sequence 1871 num_bytes_left = 1; 1872 codepoint = c & 0x1f; 1873 } else if ((c & 0xf0) == 0xe0) { // 3 byte utf8 sequence 1874 num_bytes_left = 2; 1875 codepoint = c & 0x0f; 1876 } else if ((c & 0xf8) == 0xf0) { // 4 byte utf8 sequence 1877 codepoint = c & 0x07; 1878 num_bytes_left = 3; 1879 } else { 1880 return false; // invalid leading byte 1881 } 1882 1883 // If we have enough bytes in our inpput, decode the remaining ones 1884 // belonging to this Unicode character into |codepoint|. 1885 if (start + num_bytes_left > end) 1886 return false; 1887 while (num_bytes_left > 0) { 1888 c = *start++; 1889 --num_bytes_left; 1890 // Check the next byte is a continuation byte, that is 10xx xxxx. 1891 if ((c & 0xc0) != 0x80) 1892 return false; 1893 codepoint = (codepoint << 6) | (c & 0x3f); 1894 } 1895 1896 // Disallow overlong encodings for ascii characters, as these 1897 // would include " and other characters significant to JSON 1898 // string termination / control. 1899 if (codepoint <= 0x7f) 1900 return false; 1901 // Invalid in UTF8, and can't be represented in UTF16 anyway. 1902 if (codepoint > 0x10ffff) 1903 return false; 1904 1905 // So, now we transcode to UTF16, 1906 // using the math described at https://en.wikipedia.org/wiki/UTF-16, 1907 // for either one or two 16 bit characters. 1908 if (codepoint < 0xffff) { 1909 output->push_back(codepoint); 1910 continue; 1911 } 1912 codepoint -= 0x10000; 1913 output->push_back((codepoint >> 10) + 0xd800); // high surrogate 1914 output->push_back((codepoint & 0x3ff) + 0xdc00); // low surrogate 1915 continue; 1916 } 1917 if ('\\' != c) { 1918 output->push_back(c); 1919 continue; 1920 } 1921 if (start == end) 1922 return false; 1923 c = *start++; 1924 1925 if (c == 'x') { 1926 // \x is not supported. 1927 return false; 1928 } 1929 1930 switch (c) { 1931 case '"': 1932 case '/': 1933 case '\\': 1934 break; 1935 case 'b': 1936 c = '\b'; 1937 break; 1938 case 'f': 1939 c = '\f'; 1940 break; 1941 case 'n': 1942 c = '\n'; 1943 break; 1944 case 'r': 1945 c = '\r'; 1946 break; 1947 case 't': 1948 c = '\t'; 1949 break; 1950 case 'v': 1951 c = '\v'; 1952 break; 1953 case 'u': 1954 c = (HexToInt(*start) << 12) + (HexToInt(*(start + 1)) << 8) + 1955 (HexToInt(*(start + 2)) << 4) + HexToInt(*(start + 3)); 1956 start += 4; 1957 break; 1958 default: 1959 return false; 1960 } 1961 output->push_back(c); 1962 } 1963 return true; 1964 } 1965 1966 void ParseValue(const Char* start, 1967 const Char* end, 1968 const Char** value_token_end, 1969 int depth) { 1970 if (depth > kStackLimit) { 1971 HandleError(Error::JSON_PARSER_STACK_LIMIT_EXCEEDED, start); 1972 return; 1973 } 1974 const Char* token_start = nullptr; 1975 const Char* token_end = nullptr; 1976 Token token = ParseToken(start, end, &token_start, &token_end); 1977 switch (token) { 1978 case NoInput: 1979 HandleError(Error::JSON_PARSER_NO_INPUT, token_start); 1980 return; 1981 case InvalidToken: 1982 HandleError(Error::JSON_PARSER_INVALID_TOKEN, token_start); 1983 return; 1984 case NullToken: 1985 handler_->HandleNull(); 1986 break; 1987 case BoolTrue: 1988 handler_->HandleBool(true); 1989 break; 1990 case BoolFalse: 1991 handler_->HandleBool(false); 1992 break; 1993 case Number: { 1994 double value; 1995 if (!CharsToDouble(token_start, token_end - token_start, &value)) { 1996 HandleError(Error::JSON_PARSER_INVALID_NUMBER, token_start); 1997 return; 1998 } 1999 if (value >= std::numeric_limits<int32_t>::min() && 2000 value <= std::numeric_limits<int32_t>::max() && 2001 static_cast<int32_t>(value) == value) 2002 handler_->HandleInt32(static_cast<int32_t>(value)); 2003 else 2004 handler_->HandleDouble(value); 2005 break; 2006 } 2007 case StringLiteral: { 2008 std::vector<uint16_t> value; 2009 bool ok = DecodeString(token_start + 1, token_end - 1, &value); 2010 if (!ok) { 2011 HandleError(Error::JSON_PARSER_INVALID_STRING, token_start); 2012 return; 2013 } 2014 handler_->HandleString16(span<uint16_t>(value.data(), value.size())); 2015 break; 2016 } 2017 case ArrayBegin: { 2018 handler_->HandleArrayBegin(); 2019 start = token_end; 2020 token = ParseToken(start, end, &token_start, &token_end); 2021 while (token != ArrayEnd) { 2022 ParseValue(start, end, &token_end, depth + 1); 2023 if (error_) 2024 return; 2025 2026 // After a list value, we expect a comma or the end of the list. 2027 start = token_end; 2028 token = ParseToken(start, end, &token_start, &token_end); 2029 if (token == ListSeparator) { 2030 start = token_end; 2031 token = ParseToken(start, end, &token_start, &token_end); 2032 if (token == ArrayEnd) { 2033 HandleError(Error::JSON_PARSER_UNEXPECTED_ARRAY_END, token_start); 2034 return; 2035 } 2036 } else if (token != ArrayEnd) { 2037 // Unexpected value after list value. Bail out. 2038 HandleError(Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED, 2039 token_start); 2040 return; 2041 } 2042 } 2043 handler_->HandleArrayEnd(); 2044 break; 2045 } 2046 case ObjectBegin: { 2047 handler_->HandleMapBegin(); 2048 start = token_end; 2049 token = ParseToken(start, end, &token_start, &token_end); 2050 while (token != ObjectEnd) { 2051 if (token != StringLiteral) { 2052 HandleError(Error::JSON_PARSER_STRING_LITERAL_EXPECTED, 2053 token_start); 2054 return; 2055 } 2056 std::vector<uint16_t> key; 2057 if (!DecodeString(token_start + 1, token_end - 1, &key)) { 2058 HandleError(Error::JSON_PARSER_INVALID_STRING, token_start); 2059 return; 2060 } 2061 handler_->HandleString16(span<uint16_t>(key.data(), key.size())); 2062 start = token_end; 2063 2064 token = ParseToken(start, end, &token_start, &token_end); 2065 if (token != ObjectPairSeparator) { 2066 HandleError(Error::JSON_PARSER_COLON_EXPECTED, token_start); 2067 return; 2068 } 2069 start = token_end; 2070 2071 ParseValue(start, end, &token_end, depth + 1); 2072 if (error_) 2073 return; 2074 start = token_end; 2075 2076 // After a key/value pair, we expect a comma or the end of the 2077 // object. 2078 token = ParseToken(start, end, &token_start, &token_end); 2079 if (token == ListSeparator) { 2080 start = token_end; 2081 token = ParseToken(start, end, &token_start, &token_end); 2082 if (token == ObjectEnd) { 2083 HandleError(Error::JSON_PARSER_UNEXPECTED_MAP_END, token_start); 2084 return; 2085 } 2086 } else if (token != ObjectEnd) { 2087 // Unexpected value after last object value. Bail out. 2088 HandleError(Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED, 2089 token_start); 2090 return; 2091 } 2092 } 2093 handler_->HandleMapEnd(); 2094 break; 2095 } 2096 2097 default: 2098 // We got a token that's not a value. 2099 HandleError(Error::JSON_PARSER_VALUE_EXPECTED, token_start); 2100 return; 2101 } 2102 2103 SkipWhitespaceAndComments(token_end, end, value_token_end); 2104 } 2105 2106 void HandleError(Error error, const Char* pos) { 2107 assert(error != Error::OK); 2108 if (!error_) { 2109 handler_->HandleError( 2110 Status{error, static_cast<size_t>(pos - start_pos_)}); 2111 error_ = true; 2112 } 2113 } 2114 2115 const Char* start_pos_ = nullptr; 2116 bool error_ = false; 2117 const Platform* platform_; 2118 StreamingParserHandler* handler_; 2119}; 2120} // namespace 2121 2122void ParseJSON(const Platform& platform, 2123 span<uint8_t> chars, 2124 StreamingParserHandler* handler) { 2125 JsonParser<uint8_t> parser(&platform, handler); 2126 parser.Parse(chars.data(), chars.size()); 2127} 2128 2129void ParseJSON(const Platform& platform, 2130 span<uint16_t> chars, 2131 StreamingParserHandler* handler) { 2132 JsonParser<uint16_t> parser(&platform, handler); 2133 parser.Parse(chars.data(), chars.size()); 2134} 2135 2136// ============================================================================= 2137// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding 2138// ============================================================================= 2139template <typename C> 2140Status ConvertCBORToJSONTmpl(const Platform& platform, 2141 span<uint8_t> cbor, 2142 C* json) { 2143 Status status; 2144 std::unique_ptr<StreamingParserHandler> json_writer = 2145 NewJSONEncoder(&platform, json, &status); 2146 cbor::ParseCBOR(cbor, json_writer.get()); 2147 return status; 2148} 2149 2150Status ConvertCBORToJSON(const Platform& platform, 2151 span<uint8_t> cbor, 2152 std::vector<uint8_t>* json) { 2153 return ConvertCBORToJSONTmpl(platform, cbor, json); 2154} 2155Status ConvertCBORToJSON(const Platform& platform, 2156 span<uint8_t> cbor, 2157 std::string* json) { 2158 return ConvertCBORToJSONTmpl(platform, cbor, json); 2159} 2160 2161template <typename T, typename C> 2162Status ConvertJSONToCBORTmpl(const Platform& platform, span<T> json, C* cbor) { 2163 Status status; 2164 std::unique_ptr<StreamingParserHandler> encoder = 2165 cbor::NewCBOREncoder(cbor, &status); 2166 ParseJSON(platform, json, encoder.get()); 2167 return status; 2168} 2169Status ConvertJSONToCBOR(const Platform& platform, 2170 span<uint8_t> json, 2171 std::string* cbor) { 2172 return ConvertJSONToCBORTmpl(platform, json, cbor); 2173} 2174Status ConvertJSONToCBOR(const Platform& platform, 2175 span<uint16_t> json, 2176 std::string* cbor) { 2177 return ConvertJSONToCBORTmpl(platform, json, cbor); 2178} 2179Status ConvertJSONToCBOR(const Platform& platform, 2180 span<uint8_t> json, 2181 std::vector<uint8_t>* cbor) { 2182 return ConvertJSONToCBORTmpl(platform, json, cbor); 2183} 2184Status ConvertJSONToCBOR(const Platform& platform, 2185 span<uint16_t> json, 2186 std::vector<uint8_t>* cbor) { 2187 return ConvertJSONToCBORTmpl(platform, json, cbor); 2188} 2189} // namespace json 2190} // namespace v8_inspector_protocol_encoding 2191