1// Copyright 2019 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "encoding.h"
6
7#include <algorithm>
8#include <cassert>
9#include <cmath>
10#include <cstring>
11#include <limits>
12#include <stack>
13
14namespace v8_inspector_protocol_encoding {
15// =============================================================================
16// Status and Error codes
17// =============================================================================
18
19std::string Status::ToASCIIString() const {
20  switch (error) {
21    case Error::OK:
22      return "OK";
23    case Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS:
24      return ToASCIIString("JSON: unprocessed input remains");
25    case Error::JSON_PARSER_STACK_LIMIT_EXCEEDED:
26      return ToASCIIString("JSON: stack limit exceeded");
27    case Error::JSON_PARSER_NO_INPUT:
28      return ToASCIIString("JSON: no input");
29    case Error::JSON_PARSER_INVALID_TOKEN:
30      return ToASCIIString("JSON: invalid token");
31    case Error::JSON_PARSER_INVALID_NUMBER:
32      return ToASCIIString("JSON: invalid number");
33    case Error::JSON_PARSER_INVALID_STRING:
34      return ToASCIIString("JSON: invalid string");
35    case Error::JSON_PARSER_UNEXPECTED_ARRAY_END:
36      return ToASCIIString("JSON: unexpected array end");
37    case Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED:
38      return ToASCIIString("JSON: comma or array end expected");
39    case Error::JSON_PARSER_STRING_LITERAL_EXPECTED:
40      return ToASCIIString("JSON: string literal expected");
41    case Error::JSON_PARSER_COLON_EXPECTED:
42      return ToASCIIString("JSON: colon expected");
43    case Error::JSON_PARSER_UNEXPECTED_MAP_END:
44      return ToASCIIString("JSON: unexpected map end");
45    case Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED:
46      return ToASCIIString("JSON: comma or map end expected");
47    case Error::JSON_PARSER_VALUE_EXPECTED:
48      return ToASCIIString("JSON: value expected");
49
50    case Error::CBOR_INVALID_INT32:
51      return ToASCIIString("CBOR: invalid int32");
52    case Error::CBOR_INVALID_DOUBLE:
53      return ToASCIIString("CBOR: invalid double");
54    case Error::CBOR_INVALID_ENVELOPE:
55      return ToASCIIString("CBOR: invalid envelope");
56    case Error::CBOR_INVALID_STRING8:
57      return ToASCIIString("CBOR: invalid string8");
58    case Error::CBOR_INVALID_STRING16:
59      return ToASCIIString("CBOR: invalid string16");
60    case Error::CBOR_INVALID_BINARY:
61      return ToASCIIString("CBOR: invalid binary");
62    case Error::CBOR_UNSUPPORTED_VALUE:
63      return ToASCIIString("CBOR: unsupported value");
64    case Error::CBOR_NO_INPUT:
65      return ToASCIIString("CBOR: no input");
66    case Error::CBOR_INVALID_START_BYTE:
67      return ToASCIIString("CBOR: invalid start byte");
68    case Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE:
69      return ToASCIIString("CBOR: unexpected eof expected value");
70    case Error::CBOR_UNEXPECTED_EOF_IN_ARRAY:
71      return ToASCIIString("CBOR: unexpected eof in array");
72    case Error::CBOR_UNEXPECTED_EOF_IN_MAP:
73      return ToASCIIString("CBOR: unexpected eof in map");
74    case Error::CBOR_INVALID_MAP_KEY:
75      return ToASCIIString("CBOR: invalid map key");
76    case Error::CBOR_STACK_LIMIT_EXCEEDED:
77      return ToASCIIString("CBOR: stack limit exceeded");
78    case Error::CBOR_TRAILING_JUNK:
79      return ToASCIIString("CBOR: trailing junk");
80    case Error::CBOR_MAP_START_EXPECTED:
81      return ToASCIIString("CBOR: map start expected");
82    case Error::CBOR_MAP_STOP_EXPECTED:
83      return ToASCIIString("CBOR: map stop expected");
84    case Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED:
85      return ToASCIIString("CBOR: envelope size limit exceeded");
86  }
87  // Some compilers can't figure out that we can't get here.
88  return "INVALID ERROR CODE";
89}
90
91std::string Status::ToASCIIString(const char* msg) const {
92  return std::string(msg) + " at position " + std::to_string(pos);
93}
94
95namespace cbor {
96namespace {
97// Indicates the number of bits the "initial byte" needs to be shifted to the
98// right after applying |kMajorTypeMask| to produce the major type in the
99// lowermost bits.
100static constexpr uint8_t kMajorTypeBitShift = 5u;
101// Mask selecting the low-order 5 bits of the "initial byte", which is where
102// the additional information is encoded.
103static constexpr uint8_t kAdditionalInformationMask = 0x1f;
104// Mask selecting the high-order 3 bits of the "initial byte", which indicates
105// the major type of the encoded value.
106static constexpr uint8_t kMajorTypeMask = 0xe0;
107// Indicates the integer is in the following byte.
108static constexpr uint8_t kAdditionalInformation1Byte = 24u;
109// Indicates the integer is in the next 2 bytes.
110static constexpr uint8_t kAdditionalInformation2Bytes = 25u;
111// Indicates the integer is in the next 4 bytes.
112static constexpr uint8_t kAdditionalInformation4Bytes = 26u;
113// Indicates the integer is in the next 8 bytes.
114static constexpr uint8_t kAdditionalInformation8Bytes = 27u;
115
116// Encodes the initial byte, consisting of the |type| in the first 3 bits
117// followed by 5 bits of |additional_info|.
118constexpr uint8_t EncodeInitialByte(MajorType type, uint8_t additional_info) {
119  return (static_cast<uint8_t>(type) << kMajorTypeBitShift) |
120         (additional_info & kAdditionalInformationMask);
121}
122
123// TAG 24 indicates that what follows is a byte string which is
124// encoded in CBOR format. We use this as a wrapper for
125// maps and arrays, allowing us to skip them, because the
126// byte string carries its size (byte length).
127// https://tools.ietf.org/html/rfc7049#section-2.4.4.1
128static constexpr uint8_t kInitialByteForEnvelope =
129    EncodeInitialByte(MajorType::TAG, 24);
130// The initial byte for a byte string with at most 2^32 bytes
131// of payload. This is used for envelope encoding, even if
132// the byte string is shorter.
133static constexpr uint8_t kInitialByteFor32BitLengthByteString =
134    EncodeInitialByte(MajorType::BYTE_STRING, 26);
135
136// See RFC 7049 Section 2.2.1, indefinite length arrays / maps have additional
137// info = 31.
138static constexpr uint8_t kInitialByteIndefiniteLengthArray =
139    EncodeInitialByte(MajorType::ARRAY, 31);
140static constexpr uint8_t kInitialByteIndefiniteLengthMap =
141    EncodeInitialByte(MajorType::MAP, 31);
142// See RFC 7049 Section 2.3, Table 1; this is used for finishing indefinite
143// length maps / arrays.
144static constexpr uint8_t kStopByte =
145    EncodeInitialByte(MajorType::SIMPLE_VALUE, 31);
146
147// See RFC 7049 Section 2.3, Table 2.
148static constexpr uint8_t kEncodedTrue =
149    EncodeInitialByte(MajorType::SIMPLE_VALUE, 21);
150static constexpr uint8_t kEncodedFalse =
151    EncodeInitialByte(MajorType::SIMPLE_VALUE, 20);
152static constexpr uint8_t kEncodedNull =
153    EncodeInitialByte(MajorType::SIMPLE_VALUE, 22);
154static constexpr uint8_t kInitialByteForDouble =
155    EncodeInitialByte(MajorType::SIMPLE_VALUE, 27);
156
157// See RFC 7049 Table 3 and Section 2.4.4.2. This is used as a prefix for
158// arbitrary binary data encoded as BYTE_STRING.
159static constexpr uint8_t kExpectedConversionToBase64Tag =
160    EncodeInitialByte(MajorType::TAG, 22);
161
162// Writes the bytes for |v| to |out|, starting with the most significant byte.
163// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
164template <typename T, class C>
165void WriteBytesMostSignificantByteFirst(T v, C* out) {
166  for (int shift_bytes = sizeof(T) - 1; shift_bytes >= 0; --shift_bytes)
167    out->push_back(0xff & (v >> (shift_bytes * 8)));
168}
169
170// Extracts sizeof(T) bytes from |in| to extract a value of type T
171// (e.g. uint64_t, uint32_t, ...), most significant byte first.
172// See also: https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
173template <typename T>
174T ReadBytesMostSignificantByteFirst(span<uint8_t> in) {
175  assert(in.size() >= sizeof(T));
176  T result = 0;
177  for (size_t shift_bytes = 0; shift_bytes < sizeof(T); ++shift_bytes)
178    result |= T(in[sizeof(T) - 1 - shift_bytes]) << (shift_bytes * 8);
179  return result;
180}
181}  // namespace
182
183namespace internals {
184// Reads the start of a token with definitive size from |bytes|.
185// |type| is the major type as specified in RFC 7049 Section 2.1.
186// |value| is the payload (e.g. for MajorType::UNSIGNED) or is the size
187// (e.g. for BYTE_STRING).
188// If successful, returns the number of bytes read. Otherwise returns 0.
189size_t ReadTokenStart(span<uint8_t> bytes, MajorType* type, uint64_t* value) {
190  if (bytes.empty())
191    return 0;
192  uint8_t initial_byte = bytes[0];
193  *type = MajorType((initial_byte & kMajorTypeMask) >> kMajorTypeBitShift);
194
195  uint8_t additional_information = initial_byte & kAdditionalInformationMask;
196  if (additional_information < 24) {
197    // Values 0-23 are encoded directly into the additional info of the
198    // initial byte.
199    *value = additional_information;
200    return 1;
201  }
202  if (additional_information == kAdditionalInformation1Byte) {
203    // Values 24-255 are encoded with one initial byte, followed by the value.
204    if (bytes.size() < 2)
205      return 0;
206    *value = ReadBytesMostSignificantByteFirst<uint8_t>(bytes.subspan(1));
207    return 2;
208  }
209  if (additional_information == kAdditionalInformation2Bytes) {
210    // Values 256-65535: 1 initial byte + 2 bytes payload.
211    if (bytes.size() < 1 + sizeof(uint16_t))
212      return 0;
213    *value = ReadBytesMostSignificantByteFirst<uint16_t>(bytes.subspan(1));
214    return 3;
215  }
216  if (additional_information == kAdditionalInformation4Bytes) {
217    // 32 bit uint: 1 initial byte + 4 bytes payload.
218    if (bytes.size() < 1 + sizeof(uint32_t))
219      return 0;
220    *value = ReadBytesMostSignificantByteFirst<uint32_t>(bytes.subspan(1));
221    return 5;
222  }
223  if (additional_information == kAdditionalInformation8Bytes) {
224    // 64 bit uint: 1 initial byte + 8 bytes payload.
225    if (bytes.size() < 1 + sizeof(uint64_t))
226      return 0;
227    *value = ReadBytesMostSignificantByteFirst<uint64_t>(bytes.subspan(1));
228    return 9;
229  }
230  return 0;
231}
232
233// Writes the start of a token with |type|. The |value| may indicate the size,
234// or it may be the payload if the value is an unsigned integer.
235template <typename C>
236void WriteTokenStartTmpl(MajorType type, uint64_t value, C* encoded) {
237  if (value < 24) {
238    // Values 0-23 are encoded directly into the additional info of the
239    // initial byte.
240    encoded->push_back(EncodeInitialByte(type, /*additional_info=*/value));
241    return;
242  }
243  if (value <= std::numeric_limits<uint8_t>::max()) {
244    // Values 24-255 are encoded with one initial byte, followed by the value.
245    encoded->push_back(EncodeInitialByte(type, kAdditionalInformation1Byte));
246    encoded->push_back(value);
247    return;
248  }
249  if (value <= std::numeric_limits<uint16_t>::max()) {
250    // Values 256-65535: 1 initial byte + 2 bytes payload.
251    encoded->push_back(EncodeInitialByte(type, kAdditionalInformation2Bytes));
252    WriteBytesMostSignificantByteFirst<uint16_t>(value, encoded);
253    return;
254  }
255  if (value <= std::numeric_limits<uint32_t>::max()) {
256    // 32 bit uint: 1 initial byte + 4 bytes payload.
257    encoded->push_back(EncodeInitialByte(type, kAdditionalInformation4Bytes));
258    WriteBytesMostSignificantByteFirst<uint32_t>(static_cast<uint32_t>(value),
259                                                 encoded);
260    return;
261  }
262  // 64 bit uint: 1 initial byte + 8 bytes payload.
263  encoded->push_back(EncodeInitialByte(type, kAdditionalInformation8Bytes));
264  WriteBytesMostSignificantByteFirst<uint64_t>(value, encoded);
265}
266void WriteTokenStart(MajorType type,
267                     uint64_t value,
268                     std::vector<uint8_t>* encoded) {
269  WriteTokenStartTmpl(type, value, encoded);
270}
271void WriteTokenStart(MajorType type, uint64_t value, std::string* encoded) {
272  WriteTokenStartTmpl(type, value, encoded);
273}
274}  // namespace internals
275
276// =============================================================================
277// Detecting CBOR content
278// =============================================================================
279
280uint8_t InitialByteForEnvelope() {
281  return kInitialByteForEnvelope;
282}
283uint8_t InitialByteFor32BitLengthByteString() {
284  return kInitialByteFor32BitLengthByteString;
285}
286bool IsCBORMessage(span<uint8_t> msg) {
287  return msg.size() >= 6 && msg[0] == InitialByteForEnvelope() &&
288         msg[1] == InitialByteFor32BitLengthByteString();
289}
290
291// =============================================================================
292// Encoding invidiual CBOR items
293// =============================================================================
294
295uint8_t EncodeTrue() {
296  return kEncodedTrue;
297}
298uint8_t EncodeFalse() {
299  return kEncodedFalse;
300}
301uint8_t EncodeNull() {
302  return kEncodedNull;
303}
304
305uint8_t EncodeIndefiniteLengthArrayStart() {
306  return kInitialByteIndefiniteLengthArray;
307}
308
309uint8_t EncodeIndefiniteLengthMapStart() {
310  return kInitialByteIndefiniteLengthMap;
311}
312
313uint8_t EncodeStop() {
314  return kStopByte;
315}
316
317template <typename C>
318void EncodeInt32Tmpl(int32_t value, C* out) {
319  if (value >= 0) {
320    internals::WriteTokenStart(MajorType::UNSIGNED, value, out);
321  } else {
322    uint64_t representation = static_cast<uint64_t>(-(value + 1));
323    internals::WriteTokenStart(MajorType::NEGATIVE, representation, out);
324  }
325}
326void EncodeInt32(int32_t value, std::vector<uint8_t>* out) {
327  EncodeInt32Tmpl(value, out);
328}
329void EncodeInt32(int32_t value, std::string* out) {
330  EncodeInt32Tmpl(value, out);
331}
332
333template <typename C>
334void EncodeString16Tmpl(span<uint16_t> in, C* out) {
335  uint64_t byte_length = static_cast<uint64_t>(in.size_bytes());
336  internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out);
337  // When emitting UTF16 characters, we always write the least significant byte
338  // first; this is because it's the native representation for X86.
339  // TODO(johannes): Implement a more efficient thing here later, e.g.
340  // casting *iff* the machine has this byte order.
341  // The wire format for UTF16 chars will probably remain the same
342  // (least significant byte first) since this way we can have
343  // golden files, unittests, etc. that port easily and universally.
344  // See also:
345  // https://commandcenter.blogspot.com/2012/04/byte-order-fallacy.html
346  for (const uint16_t two_bytes : in) {
347    out->push_back(two_bytes);
348    out->push_back(two_bytes >> 8);
349  }
350}
351void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out) {
352  EncodeString16Tmpl(in, out);
353}
354void EncodeString16(span<uint16_t> in, std::string* out) {
355  EncodeString16Tmpl(in, out);
356}
357
358template <typename C>
359void EncodeString8Tmpl(span<uint8_t> in, C* out) {
360  internals::WriteTokenStart(MajorType::STRING,
361                             static_cast<uint64_t>(in.size_bytes()), out);
362  out->insert(out->end(), in.begin(), in.end());
363}
364void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out) {
365  EncodeString8Tmpl(in, out);
366}
367void EncodeString8(span<uint8_t> in, std::string* out) {
368  EncodeString8Tmpl(in, out);
369}
370
371template <typename C>
372void EncodeFromLatin1Tmpl(span<uint8_t> latin1, C* out) {
373  for (size_t ii = 0; ii < latin1.size(); ++ii) {
374    if (latin1[ii] <= 127)
375      continue;
376    // If there's at least one non-ASCII char, convert to UTF8.
377    std::vector<uint8_t> utf8(latin1.begin(), latin1.begin() + ii);
378    for (; ii < latin1.size(); ++ii) {
379      if (latin1[ii] <= 127) {
380        utf8.push_back(latin1[ii]);
381      } else {
382        // 0xC0 means it's a UTF8 sequence with 2 bytes.
383        utf8.push_back((latin1[ii] >> 6) | 0xc0);
384        utf8.push_back((latin1[ii] | 0x80) & 0xbf);
385      }
386    }
387    EncodeString8(SpanFrom(utf8), out);
388    return;
389  }
390  EncodeString8(latin1, out);
391}
392void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out) {
393  EncodeFromLatin1Tmpl(latin1, out);
394}
395void EncodeFromLatin1(span<uint8_t> latin1, std::string* out) {
396  EncodeFromLatin1Tmpl(latin1, out);
397}
398
399template <typename C>
400void EncodeFromUTF16Tmpl(span<uint16_t> utf16, C* out) {
401  // If there's at least one non-ASCII char, encode as STRING16 (UTF16).
402  for (uint16_t ch : utf16) {
403    if (ch <= 127)
404      continue;
405    EncodeString16(utf16, out);
406    return;
407  }
408  // It's all US-ASCII, strip out every second byte and encode as UTF8.
409  internals::WriteTokenStart(MajorType::STRING,
410                             static_cast<uint64_t>(utf16.size()), out);
411  out->insert(out->end(), utf16.begin(), utf16.end());
412}
413void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out) {
414  EncodeFromUTF16Tmpl(utf16, out);
415}
416void EncodeFromUTF16(span<uint16_t> utf16, std::string* out) {
417  EncodeFromUTF16Tmpl(utf16, out);
418}
419
420template <typename C>
421void EncodeBinaryTmpl(span<uint8_t> in, C* out) {
422  out->push_back(kExpectedConversionToBase64Tag);
423  uint64_t byte_length = static_cast<uint64_t>(in.size_bytes());
424  internals::WriteTokenStart(MajorType::BYTE_STRING, byte_length, out);
425  out->insert(out->end(), in.begin(), in.end());
426}
427void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out) {
428  EncodeBinaryTmpl(in, out);
429}
430void EncodeBinary(span<uint8_t> in, std::string* out) {
431  EncodeBinaryTmpl(in, out);
432}
433
434// A double is encoded with a specific initial byte
435// (kInitialByteForDouble) plus the 64 bits of payload for its value.
436constexpr size_t kEncodedDoubleSize = 1 + sizeof(uint64_t);
437
438// An envelope is encoded with a specific initial byte
439// (kInitialByteForEnvelope), plus the start byte for a BYTE_STRING with a 32
440// bit wide length, plus a 32 bit length for that string.
441constexpr size_t kEncodedEnvelopeHeaderSize = 1 + 1 + sizeof(uint32_t);
442
443template <typename C>
444void EncodeDoubleTmpl(double value, C* out) {
445  // The additional_info=27 indicates 64 bits for the double follow.
446  // See RFC 7049 Section 2.3, Table 1.
447  out->push_back(kInitialByteForDouble);
448  union {
449    double from_double;
450    uint64_t to_uint64;
451  } reinterpret;
452  reinterpret.from_double = value;
453  WriteBytesMostSignificantByteFirst<uint64_t>(reinterpret.to_uint64, out);
454}
455void EncodeDouble(double value, std::vector<uint8_t>* out) {
456  EncodeDoubleTmpl(value, out);
457}
458void EncodeDouble(double value, std::string* out) {
459  EncodeDoubleTmpl(value, out);
460}
461
462// =============================================================================
463// cbor::EnvelopeEncoder - for wrapping submessages
464// =============================================================================
465
466template <typename C>
467void EncodeStartTmpl(C* out, size_t* byte_size_pos) {
468  assert(*byte_size_pos == 0);
469  out->push_back(kInitialByteForEnvelope);
470  out->push_back(kInitialByteFor32BitLengthByteString);
471  *byte_size_pos = out->size();
472  out->resize(out->size() + sizeof(uint32_t));
473}
474
475void EnvelopeEncoder::EncodeStart(std::vector<uint8_t>* out) {
476  EncodeStartTmpl<std::vector<uint8_t>>(out, &byte_size_pos_);
477}
478
479void EnvelopeEncoder::EncodeStart(std::string* out) {
480  EncodeStartTmpl<std::string>(out, &byte_size_pos_);
481}
482
483template <typename C>
484bool EncodeStopTmpl(C* out, size_t* byte_size_pos) {
485  assert(*byte_size_pos != 0);
486  // The byte size is the size of the payload, that is, all the
487  // bytes that were written past the byte size position itself.
488  uint64_t byte_size = out->size() - (*byte_size_pos + sizeof(uint32_t));
489  // We store exactly 4 bytes, so at most INT32MAX, with most significant
490  // byte first.
491  if (byte_size > std::numeric_limits<uint32_t>::max())
492    return false;
493  for (int shift_bytes = sizeof(uint32_t) - 1; shift_bytes >= 0;
494       --shift_bytes) {
495    (*out)[(*byte_size_pos)++] = 0xff & (byte_size >> (shift_bytes * 8));
496  }
497  return true;
498}
499
500bool EnvelopeEncoder::EncodeStop(std::vector<uint8_t>* out) {
501  return EncodeStopTmpl(out, &byte_size_pos_);
502}
503
504bool EnvelopeEncoder::EncodeStop(std::string* out) {
505  return EncodeStopTmpl(out, &byte_size_pos_);
506}
507
508// =============================================================================
509// cbor::NewCBOREncoder - for encoding from a streaming parser
510// =============================================================================
511
512namespace {
513template <typename C>
514class CBOREncoder : public StreamingParserHandler {
515 public:
516  CBOREncoder(C* out, Status* status) : out_(out), status_(status) {
517    *status_ = Status();
518  }
519
520  void HandleMapBegin() override {
521    if (!status_->ok())
522      return;
523    envelopes_.emplace_back();
524    envelopes_.back().EncodeStart(out_);
525    out_->push_back(kInitialByteIndefiniteLengthMap);
526  }
527
528  void HandleMapEnd() override {
529    if (!status_->ok())
530      return;
531    out_->push_back(kStopByte);
532    assert(!envelopes_.empty());
533    if (!envelopes_.back().EncodeStop(out_)) {
534      HandleError(
535          Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size()));
536      return;
537    }
538    envelopes_.pop_back();
539  }
540
541  void HandleArrayBegin() override {
542    if (!status_->ok())
543      return;
544    envelopes_.emplace_back();
545    envelopes_.back().EncodeStart(out_);
546    out_->push_back(kInitialByteIndefiniteLengthArray);
547  }
548
549  void HandleArrayEnd() override {
550    if (!status_->ok())
551      return;
552    out_->push_back(kStopByte);
553    assert(!envelopes_.empty());
554    if (!envelopes_.back().EncodeStop(out_)) {
555      HandleError(
556          Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, out_->size()));
557      return;
558    }
559    envelopes_.pop_back();
560  }
561
562  void HandleString8(span<uint8_t> chars) override {
563    if (!status_->ok())
564      return;
565    EncodeString8(chars, out_);
566  }
567
568  void HandleString16(span<uint16_t> chars) override {
569    if (!status_->ok())
570      return;
571    EncodeFromUTF16(chars, out_);
572  }
573
574  void HandleBinary(span<uint8_t> bytes) override {
575    if (!status_->ok())
576      return;
577    EncodeBinary(bytes, out_);
578  }
579
580  void HandleDouble(double value) override {
581    if (!status_->ok())
582      return;
583    EncodeDouble(value, out_);
584  }
585
586  void HandleInt32(int32_t value) override {
587    if (!status_->ok())
588      return;
589    EncodeInt32(value, out_);
590  }
591
592  void HandleBool(bool value) override {
593    if (!status_->ok())
594      return;
595    // See RFC 7049 Section 2.3, Table 2.
596    out_->push_back(value ? kEncodedTrue : kEncodedFalse);
597  }
598
599  void HandleNull() override {
600    if (!status_->ok())
601      return;
602    // See RFC 7049 Section 2.3, Table 2.
603    out_->push_back(kEncodedNull);
604  }
605
606  void HandleError(Status error) override {
607    if (!status_->ok())
608      return;
609    *status_ = error;
610    out_->clear();
611  }
612
613 private:
614  C* out_;
615  std::vector<EnvelopeEncoder> envelopes_;
616  Status* status_;
617};
618}  // namespace
619
620std::unique_ptr<StreamingParserHandler> NewCBOREncoder(
621    std::vector<uint8_t>* out,
622    Status* status) {
623  return std::unique_ptr<StreamingParserHandler>(
624      new CBOREncoder<std::vector<uint8_t>>(out, status));
625}
626std::unique_ptr<StreamingParserHandler> NewCBOREncoder(std::string* out,
627                                                       Status* status) {
628  return std::unique_ptr<StreamingParserHandler>(
629      new CBOREncoder<std::string>(out, status));
630}
631
632// =============================================================================
633// cbor::CBORTokenizer - for parsing individual CBOR items
634// =============================================================================
635
636CBORTokenizer::CBORTokenizer(span<uint8_t> bytes) : bytes_(bytes) {
637  ReadNextToken(/*enter_envelope=*/false);
638}
639CBORTokenizer::~CBORTokenizer() {}
640
641CBORTokenTag CBORTokenizer::TokenTag() const {
642  return token_tag_;
643}
644
645void CBORTokenizer::Next() {
646  if (token_tag_ == CBORTokenTag::ERROR_VALUE ||
647      token_tag_ == CBORTokenTag::DONE)
648    return;
649  ReadNextToken(/*enter_envelope=*/false);
650}
651
652void CBORTokenizer::EnterEnvelope() {
653  assert(token_tag_ == CBORTokenTag::ENVELOPE);
654  ReadNextToken(/*enter_envelope=*/true);
655}
656
657Status CBORTokenizer::Status() const {
658  return status_;
659}
660
661// The following accessor functions ::GetInt32, ::GetDouble,
662// ::GetString8, ::GetString16WireRep, ::GetBinary, ::GetEnvelopeContents
663// assume that a particular token was recognized in ::ReadNextToken.
664// That's where all the error checking is done. By design,
665// the accessors (assuming the token was recognized) never produce
666// an error.
667
668int32_t CBORTokenizer::GetInt32() const {
669  assert(token_tag_ == CBORTokenTag::INT32);
670  // The range checks happen in ::ReadNextToken().
671  return static_cast<int32_t>(
672      token_start_type_ == MajorType::UNSIGNED
673          ? token_start_internal_value_
674          : -static_cast<int64_t>(token_start_internal_value_) - 1);
675}
676
677double CBORTokenizer::GetDouble() const {
678  assert(token_tag_ == CBORTokenTag::DOUBLE);
679  union {
680    uint64_t from_uint64;
681    double to_double;
682  } reinterpret;
683  reinterpret.from_uint64 = ReadBytesMostSignificantByteFirst<uint64_t>(
684      bytes_.subspan(status_.pos + 1));
685  return reinterpret.to_double;
686}
687
688span<uint8_t> CBORTokenizer::GetString8() const {
689  assert(token_tag_ == CBORTokenTag::STRING8);
690  auto length = static_cast<size_t>(token_start_internal_value_);
691  return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
692}
693
694span<uint8_t> CBORTokenizer::GetString16WireRep() const {
695  assert(token_tag_ == CBORTokenTag::STRING16);
696  auto length = static_cast<size_t>(token_start_internal_value_);
697  return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
698}
699
700span<uint8_t> CBORTokenizer::GetBinary() const {
701  assert(token_tag_ == CBORTokenTag::BINARY);
702  auto length = static_cast<size_t>(token_start_internal_value_);
703  return bytes_.subspan(status_.pos + (token_byte_length_ - length), length);
704}
705
706span<uint8_t> CBORTokenizer::GetEnvelopeContents() const {
707  assert(token_tag_ == CBORTokenTag::ENVELOPE);
708  auto length = static_cast<size_t>(token_start_internal_value_);
709  return bytes_.subspan(status_.pos + kEncodedEnvelopeHeaderSize, length);
710}
711
712// All error checking happens in ::ReadNextToken, so that the accessors
713// can avoid having to carry an error return value.
714//
715// With respect to checking the encoded lengths of strings, arrays, etc:
716// On the wire, CBOR uses 1,2,4, and 8 byte unsigned integers, so
717// we initially read them as uint64_t, usually into token_start_internal_value_.
718//
719// However, since these containers have a representation on the machine,
720// we need to do corresponding size computations on the input byte array,
721// output span (e.g. the payload for a string), etc., and size_t is
722// machine specific (in practice either 32 bit or 64 bit).
723//
724// Further, we must avoid overflowing size_t. Therefore, we use this
725// kMaxValidLength constant to:
726// - Reject values that are larger than the architecture specific
727//   max size_t (differs between 32 bit and 64 bit arch).
728// - Reserve at least one bit so that we can check against overflows
729//   when adding lengths (array / string length / etc.); we do this by
730//   ensuring that the inputs to an addition are <= kMaxValidLength,
731//   and then checking whether the sum went past it.
732//
733// See also
734// https://chromium.googlesource.com/chromium/src/+/master/docs/security/integer-semantics.md
735static const uint64_t kMaxValidLength =
736    std::min<uint64_t>(std::numeric_limits<uint64_t>::max() >> 2,
737                       std::numeric_limits<size_t>::max());
738
739void CBORTokenizer::ReadNextToken(bool enter_envelope) {
740  if (enter_envelope) {
741    status_.pos += kEncodedEnvelopeHeaderSize;
742  } else {
743    status_.pos =
744        status_.pos == Status::npos() ? 0 : status_.pos + token_byte_length_;
745  }
746  status_.error = Error::OK;
747  if (status_.pos >= bytes_.size()) {
748    token_tag_ = CBORTokenTag::DONE;
749    return;
750  }
751  const size_t remaining_bytes = bytes_.size() - status_.pos;
752  switch (bytes_[status_.pos]) {
753    case kStopByte:
754      SetToken(CBORTokenTag::STOP, 1);
755      return;
756    case kInitialByteIndefiniteLengthMap:
757      SetToken(CBORTokenTag::MAP_START, 1);
758      return;
759    case kInitialByteIndefiniteLengthArray:
760      SetToken(CBORTokenTag::ARRAY_START, 1);
761      return;
762    case kEncodedTrue:
763      SetToken(CBORTokenTag::TRUE_VALUE, 1);
764      return;
765    case kEncodedFalse:
766      SetToken(CBORTokenTag::FALSE_VALUE, 1);
767      return;
768    case kEncodedNull:
769      SetToken(CBORTokenTag::NULL_VALUE, 1);
770      return;
771    case kExpectedConversionToBase64Tag: {  // BINARY
772      const size_t bytes_read = internals::ReadTokenStart(
773          bytes_.subspan(status_.pos + 1), &token_start_type_,
774          &token_start_internal_value_);
775      if (!bytes_read || token_start_type_ != MajorType::BYTE_STRING ||
776          token_start_internal_value_ > kMaxValidLength) {
777        SetError(Error::CBOR_INVALID_BINARY);
778        return;
779      }
780      const uint64_t token_byte_length = token_start_internal_value_ +
781                                         /* tag before token start: */ 1 +
782                                         /* token start: */ bytes_read;
783      if (token_byte_length > remaining_bytes) {
784        SetError(Error::CBOR_INVALID_BINARY);
785        return;
786      }
787      SetToken(CBORTokenTag::BINARY, static_cast<size_t>(token_byte_length));
788      return;
789    }
790    case kInitialByteForDouble: {  // DOUBLE
791      if (kEncodedDoubleSize > remaining_bytes) {
792        SetError(Error::CBOR_INVALID_DOUBLE);
793        return;
794      }
795      SetToken(CBORTokenTag::DOUBLE, kEncodedDoubleSize);
796      return;
797    }
798    case kInitialByteForEnvelope: {  // ENVELOPE
799      if (kEncodedEnvelopeHeaderSize > remaining_bytes) {
800        SetError(Error::CBOR_INVALID_ENVELOPE);
801        return;
802      }
803      // The envelope must be a byte string with 32 bit length.
804      if (bytes_[status_.pos + 1] != kInitialByteFor32BitLengthByteString) {
805        SetError(Error::CBOR_INVALID_ENVELOPE);
806        return;
807      }
808      // Read the length of the byte string.
809      token_start_internal_value_ = ReadBytesMostSignificantByteFirst<uint32_t>(
810          bytes_.subspan(status_.pos + 2));
811      if (token_start_internal_value_ > kMaxValidLength) {
812        SetError(Error::CBOR_INVALID_ENVELOPE);
813        return;
814      }
815      uint64_t token_byte_length =
816          token_start_internal_value_ + kEncodedEnvelopeHeaderSize;
817      if (token_byte_length > remaining_bytes) {
818        SetError(Error::CBOR_INVALID_ENVELOPE);
819        return;
820      }
821      SetToken(CBORTokenTag::ENVELOPE, static_cast<size_t>(token_byte_length));
822      return;
823    }
824    default: {
825      const size_t bytes_read = internals::ReadTokenStart(
826          bytes_.subspan(status_.pos), &token_start_type_,
827          &token_start_internal_value_);
828      switch (token_start_type_) {
829        case MajorType::UNSIGNED:  // INT32.
830          // INT32 is a signed int32 (int32 makes sense for the
831          // inspector_protocol, it's not a CBOR limitation), so we check
832          // against the signed max, so that the allowable values are
833          // 0, 1, 2, ... 2^31 - 1.
834          if (!bytes_read ||
835                static_cast<int64_t>(std::numeric_limits<int32_t>::max()) <
836                  static_cast<int64_t>(token_start_internal_value_)) {
837            SetError(Error::CBOR_INVALID_INT32);
838            return;
839          }
840          SetToken(CBORTokenTag::INT32, bytes_read);
841          return;
842        case MajorType::NEGATIVE: {  // INT32.
843          // INT32 is a signed int32 (int32 makes sense for the
844          // inspector_protocol, it's not a CBOR limitation); in CBOR, the
845          // negative values for INT32 are represented as NEGATIVE, that is, -1
846          // INT32 is represented as 1 << 5 | 0 (major type 1, additional info
847          // value 0).
848          // The represented allowed values range is -1 to -2^31.
849          // They are mapped into the encoded range of 0 to 2^31-1.
850          // We check the the payload in token_start_internal_value_ against
851          // that range (2^31-1 is also known as
852          // std::numeric_limits<int32_t>::max()).
853          if (!bytes_read ||
854	      static_cast<int64_t>(token_start_internal_value_) >
855                static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
856            SetError(Error::CBOR_INVALID_INT32);
857            return;
858          }
859          SetToken(CBORTokenTag::INT32, bytes_read);
860          return;
861        }
862        case MajorType::STRING: {  // STRING8.
863          if (!bytes_read || token_start_internal_value_ > kMaxValidLength) {
864            SetError(Error::CBOR_INVALID_STRING8);
865            return;
866          }
867          uint64_t token_byte_length = token_start_internal_value_ + bytes_read;
868          if (token_byte_length > remaining_bytes) {
869            SetError(Error::CBOR_INVALID_STRING8);
870            return;
871          }
872          SetToken(CBORTokenTag::STRING8,
873                   static_cast<size_t>(token_byte_length));
874          return;
875        }
876        case MajorType::BYTE_STRING: {  // STRING16.
877          // Length must be divisible by 2 since UTF16 is 2 bytes per
878          // character, hence the &1 check.
879          if (!bytes_read || token_start_internal_value_ > kMaxValidLength ||
880              token_start_internal_value_ & 1) {
881            SetError(Error::CBOR_INVALID_STRING16);
882            return;
883          }
884          uint64_t token_byte_length = token_start_internal_value_ + bytes_read;
885          if (token_byte_length > remaining_bytes) {
886            SetError(Error::CBOR_INVALID_STRING16);
887            return;
888          }
889          SetToken(CBORTokenTag::STRING16,
890                   static_cast<size_t>(token_byte_length));
891          return;
892        }
893        case MajorType::ARRAY:
894        case MajorType::MAP:
895        case MajorType::TAG:
896        case MajorType::SIMPLE_VALUE:
897          SetError(Error::CBOR_UNSUPPORTED_VALUE);
898          return;
899      }
900    }
901  }
902}
903
904void CBORTokenizer::SetToken(CBORTokenTag token_tag, size_t token_byte_length) {
905  token_tag_ = token_tag;
906  token_byte_length_ = token_byte_length;
907}
908
909void CBORTokenizer::SetError(Error error) {
910  token_tag_ = CBORTokenTag::ERROR_VALUE;
911  status_.error = error;
912}
913
914// =============================================================================
915// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages
916// =============================================================================
917
918namespace {
919// When parsing CBOR, we limit recursion depth for objects and arrays
920// to this constant.
921static constexpr int kStackLimit = 300;
922
923// Below are three parsing routines for CBOR, which cover enough
924// to roundtrip JSON messages.
925bool ParseMap(int32_t stack_depth,
926              CBORTokenizer* tokenizer,
927              StreamingParserHandler* out);
928bool ParseArray(int32_t stack_depth,
929                CBORTokenizer* tokenizer,
930                StreamingParserHandler* out);
931bool ParseValue(int32_t stack_depth,
932                CBORTokenizer* tokenizer,
933                StreamingParserHandler* out);
934
935void ParseUTF16String(CBORTokenizer* tokenizer, StreamingParserHandler* out) {
936  std::vector<uint16_t> value;
937  span<uint8_t> rep = tokenizer->GetString16WireRep();
938  for (size_t ii = 0; ii < rep.size(); ii += 2)
939    value.push_back((rep[ii + 1] << 8) | rep[ii]);
940  out->HandleString16(span<uint16_t>(value.data(), value.size()));
941  tokenizer->Next();
942}
943
944bool ParseUTF8String(CBORTokenizer* tokenizer, StreamingParserHandler* out) {
945  assert(tokenizer->TokenTag() == CBORTokenTag::STRING8);
946  out->HandleString8(tokenizer->GetString8());
947  tokenizer->Next();
948  return true;
949}
950
951bool ParseValue(int32_t stack_depth,
952                CBORTokenizer* tokenizer,
953                StreamingParserHandler* out) {
954  if (stack_depth > kStackLimit) {
955    out->HandleError(
956        Status{Error::CBOR_STACK_LIMIT_EXCEEDED, tokenizer->Status().pos});
957    return false;
958  }
959  // Skip past the envelope to get to what's inside.
960  if (tokenizer->TokenTag() == CBORTokenTag::ENVELOPE)
961    tokenizer->EnterEnvelope();
962  switch (tokenizer->TokenTag()) {
963    case CBORTokenTag::ERROR_VALUE:
964      out->HandleError(tokenizer->Status());
965      return false;
966    case CBORTokenTag::DONE:
967      out->HandleError(Status{Error::CBOR_UNEXPECTED_EOF_EXPECTED_VALUE,
968                              tokenizer->Status().pos});
969      return false;
970    case CBORTokenTag::TRUE_VALUE:
971      out->HandleBool(true);
972      tokenizer->Next();
973      return true;
974    case CBORTokenTag::FALSE_VALUE:
975      out->HandleBool(false);
976      tokenizer->Next();
977      return true;
978    case CBORTokenTag::NULL_VALUE:
979      out->HandleNull();
980      tokenizer->Next();
981      return true;
982    case CBORTokenTag::INT32:
983      out->HandleInt32(tokenizer->GetInt32());
984      tokenizer->Next();
985      return true;
986    case CBORTokenTag::DOUBLE:
987      out->HandleDouble(tokenizer->GetDouble());
988      tokenizer->Next();
989      return true;
990    case CBORTokenTag::STRING8:
991      return ParseUTF8String(tokenizer, out);
992    case CBORTokenTag::STRING16:
993      ParseUTF16String(tokenizer, out);
994      return true;
995    case CBORTokenTag::BINARY: {
996      out->HandleBinary(tokenizer->GetBinary());
997      tokenizer->Next();
998      return true;
999    }
1000    case CBORTokenTag::MAP_START:
1001      return ParseMap(stack_depth + 1, tokenizer, out);
1002    case CBORTokenTag::ARRAY_START:
1003      return ParseArray(stack_depth + 1, tokenizer, out);
1004    default:
1005      out->HandleError(
1006          Status{Error::CBOR_UNSUPPORTED_VALUE, tokenizer->Status().pos});
1007      return false;
1008  }
1009}
1010
1011// |bytes| must start with the indefinite length array byte, so basically,
1012// ParseArray may only be called after an indefinite length array has been
1013// detected.
1014bool ParseArray(int32_t stack_depth,
1015                CBORTokenizer* tokenizer,
1016                StreamingParserHandler* out) {
1017  assert(tokenizer->TokenTag() == CBORTokenTag::ARRAY_START);
1018  tokenizer->Next();
1019  out->HandleArrayBegin();
1020  while (tokenizer->TokenTag() != CBORTokenTag::STOP) {
1021    if (tokenizer->TokenTag() == CBORTokenTag::DONE) {
1022      out->HandleError(
1023          Status{Error::CBOR_UNEXPECTED_EOF_IN_ARRAY, tokenizer->Status().pos});
1024      return false;
1025    }
1026    if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) {
1027      out->HandleError(tokenizer->Status());
1028      return false;
1029    }
1030    // Parse value.
1031    if (!ParseValue(stack_depth, tokenizer, out))
1032      return false;
1033  }
1034  out->HandleArrayEnd();
1035  tokenizer->Next();
1036  return true;
1037}
1038
1039// |bytes| must start with the indefinite length array byte, so basically,
1040// ParseArray may only be called after an indefinite length array has been
1041// detected.
1042bool ParseMap(int32_t stack_depth,
1043              CBORTokenizer* tokenizer,
1044              StreamingParserHandler* out) {
1045  assert(tokenizer->TokenTag() == CBORTokenTag::MAP_START);
1046  out->HandleMapBegin();
1047  tokenizer->Next();
1048  while (tokenizer->TokenTag() != CBORTokenTag::STOP) {
1049    if (tokenizer->TokenTag() == CBORTokenTag::DONE) {
1050      out->HandleError(
1051          Status{Error::CBOR_UNEXPECTED_EOF_IN_MAP, tokenizer->Status().pos});
1052      return false;
1053    }
1054    if (tokenizer->TokenTag() == CBORTokenTag::ERROR_VALUE) {
1055      out->HandleError(tokenizer->Status());
1056      return false;
1057    }
1058    // Parse key.
1059    if (tokenizer->TokenTag() == CBORTokenTag::STRING8) {
1060      if (!ParseUTF8String(tokenizer, out))
1061        return false;
1062    } else if (tokenizer->TokenTag() == CBORTokenTag::STRING16) {
1063      ParseUTF16String(tokenizer, out);
1064    } else {
1065      out->HandleError(
1066          Status{Error::CBOR_INVALID_MAP_KEY, tokenizer->Status().pos});
1067      return false;
1068    }
1069    // Parse value.
1070    if (!ParseValue(stack_depth, tokenizer, out))
1071      return false;
1072  }
1073  out->HandleMapEnd();
1074  tokenizer->Next();
1075  return true;
1076}
1077}  // namespace
1078
1079void ParseCBOR(span<uint8_t> bytes, StreamingParserHandler* out) {
1080  if (bytes.empty()) {
1081    out->HandleError(Status{Error::CBOR_NO_INPUT, 0});
1082    return;
1083  }
1084  if (bytes[0] != kInitialByteForEnvelope) {
1085    out->HandleError(Status{Error::CBOR_INVALID_START_BYTE, 0});
1086    return;
1087  }
1088  CBORTokenizer tokenizer(bytes);
1089  if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) {
1090    out->HandleError(tokenizer.Status());
1091    return;
1092  }
1093  // We checked for the envelope start byte above, so the tokenizer
1094  // must agree here, since it's not an error.
1095  assert(tokenizer.TokenTag() == CBORTokenTag::ENVELOPE);
1096  tokenizer.EnterEnvelope();
1097  if (tokenizer.TokenTag() != CBORTokenTag::MAP_START) {
1098    out->HandleError(
1099        Status{Error::CBOR_MAP_START_EXPECTED, tokenizer.Status().pos});
1100    return;
1101  }
1102  if (!ParseMap(/*stack_depth=*/1, &tokenizer, out))
1103    return;
1104  if (tokenizer.TokenTag() == CBORTokenTag::DONE)
1105    return;
1106  if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE) {
1107    out->HandleError(tokenizer.Status());
1108    return;
1109  }
1110  out->HandleError(Status{Error::CBOR_TRAILING_JUNK, tokenizer.Status().pos});
1111}
1112
1113// =============================================================================
1114// cbor::AppendString8EntryToMap - for limited in-place editing of messages
1115// =============================================================================
1116
1117template <typename C>
1118Status AppendString8EntryToCBORMapTmpl(span<uint8_t> string8_key,
1119                                       span<uint8_t> string8_value,
1120                                       C* cbor) {
1121  // Careful below: Don't compare (*cbor)[idx] with a uint8_t, since
1122  // it could be a char (signed!). Instead, use bytes.
1123  span<uint8_t> bytes(reinterpret_cast<const uint8_t*>(cbor->data()),
1124                      cbor->size());
1125  CBORTokenizer tokenizer(bytes);
1126  if (tokenizer.TokenTag() == CBORTokenTag::ERROR_VALUE)
1127    return tokenizer.Status();
1128  if (tokenizer.TokenTag() != CBORTokenTag::ENVELOPE)
1129    return Status(Error::CBOR_INVALID_ENVELOPE, 0);
1130  size_t envelope_size = tokenizer.GetEnvelopeContents().size();
1131  size_t old_size = cbor->size();
1132  if (old_size != envelope_size + kEncodedEnvelopeHeaderSize)
1133    return Status(Error::CBOR_INVALID_ENVELOPE, 0);
1134  if (envelope_size == 0 ||
1135      (tokenizer.GetEnvelopeContents()[0] != EncodeIndefiniteLengthMapStart()))
1136    return Status(Error::CBOR_MAP_START_EXPECTED, kEncodedEnvelopeHeaderSize);
1137  if (bytes[bytes.size() - 1] != EncodeStop())
1138    return Status(Error::CBOR_MAP_STOP_EXPECTED, cbor->size() - 1);
1139  cbor->pop_back();
1140  EncodeString8(string8_key, cbor);
1141  EncodeString8(string8_value, cbor);
1142  cbor->push_back(EncodeStop());
1143  size_t new_envelope_size = envelope_size + (cbor->size() - old_size);
1144  if (new_envelope_size > std::numeric_limits<uint32_t>::max())
1145    return Status(Error::CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED, 0);
1146  size_t size_pos = cbor->size() - new_envelope_size - sizeof(uint32_t);
1147  uint8_t* out = reinterpret_cast<uint8_t*>(&cbor->at(size_pos));
1148  *(out++) = (new_envelope_size >> 24) & 0xff;
1149  *(out++) = (new_envelope_size >> 16) & 0xff;
1150  *(out++) = (new_envelope_size >> 8) & 0xff;
1151  *(out) = new_envelope_size & 0xff;
1152  return Status();
1153}
1154Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
1155                                   span<uint8_t> string8_value,
1156                                   std::vector<uint8_t>* cbor) {
1157  return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor);
1158}
1159Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
1160                                   span<uint8_t> string8_value,
1161                                   std::string* cbor) {
1162  return AppendString8EntryToCBORMapTmpl(string8_key, string8_value, cbor);
1163}
1164}  // namespace cbor
1165
1166namespace json {
1167
1168// =============================================================================
1169// json::NewJSONEncoder - for encoding streaming parser events as JSON
1170// =============================================================================
1171
1172namespace {
1173// Prints |value| to |out| with 4 hex digits, most significant chunk first.
1174template <typename C>
1175void PrintHex(uint16_t value, C* out) {
1176  for (int ii = 3; ii >= 0; --ii) {
1177    int four_bits = 0xf & (value >> (4 * ii));
1178    out->push_back(four_bits + ((four_bits <= 9) ? '0' : ('a' - 10)));
1179  }
1180}
1181
1182// In the writer below, we maintain a stack of State instances.
1183// It is just enough to emit the appropriate delimiters and brackets
1184// in JSON.
1185enum class Container {
1186  // Used for the top-level, initial state.
1187  NONE,
1188  // Inside a JSON object.
1189  MAP,
1190  // Inside a JSON array.
1191  ARRAY
1192};
1193class State {
1194 public:
1195  explicit State(Container container) : container_(container) {}
1196  void StartElement(std::vector<uint8_t>* out) { StartElementTmpl(out); }
1197  void StartElement(std::string* out) { StartElementTmpl(out); }
1198  Container container() const { return container_; }
1199
1200 private:
1201  template <typename C>
1202  void StartElementTmpl(C* out) {
1203    assert(container_ != Container::NONE || size_ == 0);
1204    if (size_ != 0) {
1205      char delim = (!(size_ & 1) || container_ == Container::ARRAY) ? ',' : ':';
1206      out->push_back(delim);
1207    }
1208    ++size_;
1209  }
1210
1211  Container container_ = Container::NONE;
1212  int size_ = 0;
1213};
1214
1215constexpr char kBase64Table[] =
1216    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1217    "abcdefghijklmnopqrstuvwxyz0123456789+/";
1218
1219template <typename C>
1220void Base64Encode(const span<uint8_t>& in, C* out) {
1221  // The following three cases are based on the tables in the example
1222  // section in https://en.wikipedia.org/wiki/Base64. We process three
1223  // input bytes at a time, emitting 4 output bytes at a time.
1224  size_t ii = 0;
1225
1226  // While possible, process three input bytes.
1227  for (; ii + 3 <= in.size(); ii += 3) {
1228    uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8) | in[ii + 2];
1229    out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
1230    out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
1231    out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]);
1232    out->push_back(kBase64Table[twentyfour_bits & 0x3f]);
1233  }
1234  if (ii + 2 <= in.size()) {  // Process two input bytes.
1235    uint32_t twentyfour_bits = (in[ii] << 16) | (in[ii + 1] << 8);
1236    out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
1237    out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
1238    out->push_back(kBase64Table[(twentyfour_bits >> 6) & 0x3f]);
1239    out->push_back('=');  // Emit padding.
1240    return;
1241  }
1242  if (ii + 1 <= in.size()) {  // Process a single input byte.
1243    uint32_t twentyfour_bits = (in[ii] << 16);
1244    out->push_back(kBase64Table[(twentyfour_bits >> 18)]);
1245    out->push_back(kBase64Table[(twentyfour_bits >> 12) & 0x3f]);
1246    out->push_back('=');  // Emit padding.
1247    out->push_back('=');  // Emit padding.
1248  }
1249}
1250
1251// Implements a handler for JSON parser events to emit a JSON string.
1252template <typename C>
1253class JSONEncoder : public StreamingParserHandler {
1254 public:
1255  JSONEncoder(const Platform* platform, C* out, Status* status)
1256      : platform_(platform), out_(out), status_(status) {
1257    *status_ = Status();
1258    state_.emplace(Container::NONE);
1259  }
1260
1261  void HandleMapBegin() override {
1262    if (!status_->ok())
1263      return;
1264    assert(!state_.empty());
1265    state_.top().StartElement(out_);
1266    state_.emplace(Container::MAP);
1267    Emit('{');
1268  }
1269
1270  void HandleMapEnd() override {
1271    if (!status_->ok())
1272      return;
1273    assert(state_.size() >= 2 && state_.top().container() == Container::MAP);
1274    state_.pop();
1275    Emit('}');
1276  }
1277
1278  void HandleArrayBegin() override {
1279    if (!status_->ok())
1280      return;
1281    state_.top().StartElement(out_);
1282    state_.emplace(Container::ARRAY);
1283    Emit('[');
1284  }
1285
1286  void HandleArrayEnd() override {
1287    if (!status_->ok())
1288      return;
1289    assert(state_.size() >= 2 && state_.top().container() == Container::ARRAY);
1290    state_.pop();
1291    Emit(']');
1292  }
1293
1294  void HandleString16(span<uint16_t> chars) override {
1295    if (!status_->ok())
1296      return;
1297    state_.top().StartElement(out_);
1298    Emit('"');
1299    for (const uint16_t ch : chars) {
1300      if (ch == '"') {
1301        Emit("\\\"");
1302      } else if (ch == '\\') {
1303        Emit("\\\\");
1304      } else if (ch == '\b') {
1305        Emit("\\b");
1306      } else if (ch == '\f') {
1307        Emit("\\f");
1308      } else if (ch == '\n') {
1309        Emit("\\n");
1310      } else if (ch == '\r') {
1311        Emit("\\r");
1312      } else if (ch == '\t') {
1313        Emit("\\t");
1314      } else if (ch >= 32 && ch <= 126) {
1315        Emit(ch);
1316      } else {
1317        Emit("\\u");
1318        PrintHex(ch, out_);
1319      }
1320    }
1321    Emit('"');
1322  }
1323
1324  void HandleString8(span<uint8_t> chars) override {
1325    if (!status_->ok())
1326      return;
1327    state_.top().StartElement(out_);
1328    Emit('"');
1329    for (size_t ii = 0; ii < chars.size(); ++ii) {
1330      uint8_t c = chars[ii];
1331      if (c == '"') {
1332        Emit("\\\"");
1333      } else if (c == '\\') {
1334        Emit("\\\\");
1335      } else if (c == '\b') {
1336        Emit("\\b");
1337      } else if (c == '\f') {
1338        Emit("\\f");
1339      } else if (c == '\n') {
1340        Emit("\\n");
1341      } else if (c == '\r') {
1342        Emit("\\r");
1343      } else if (c == '\t') {
1344        Emit("\\t");
1345      } else if (c >= 32 && c <= 126) {
1346        Emit(c);
1347      } else if (c < 32) {
1348        Emit("\\u");
1349        PrintHex(static_cast<uint16_t>(c), out_);
1350      } else {
1351        // Inspect the leading byte to figure out how long the utf8
1352        // byte sequence is; while doing this initialize |codepoint|
1353        // with the first few bits.
1354        // See table in: https://en.wikipedia.org/wiki/UTF-8
1355        // byte one is 110x xxxx -> 2 byte utf8 sequence
1356        // byte one is 1110 xxxx -> 3 byte utf8 sequence
1357        // byte one is 1111 0xxx -> 4 byte utf8 sequence
1358        uint32_t codepoint;
1359        int num_bytes_left;
1360        if ((c & 0xe0) == 0xc0) {  // 2 byte utf8 sequence
1361          num_bytes_left = 1;
1362          codepoint = c & 0x1f;
1363        } else if ((c & 0xf0) == 0xe0) {  // 3 byte utf8 sequence
1364          num_bytes_left = 2;
1365          codepoint = c & 0x0f;
1366        } else if ((c & 0xf8) == 0xf0) {  // 4 byte utf8 sequence
1367          codepoint = c & 0x07;
1368          num_bytes_left = 3;
1369        } else {
1370          continue;  // invalid leading byte
1371        }
1372
1373        // If we have enough bytes in our input, decode the remaining ones
1374        // belonging to this Unicode character into |codepoint|.
1375        if (ii + num_bytes_left > chars.size())
1376          continue;
1377        while (num_bytes_left > 0) {
1378          c = chars[++ii];
1379          --num_bytes_left;
1380          // Check the next byte is a continuation byte, that is 10xx xxxx.
1381          if ((c & 0xc0) != 0x80)
1382            continue;
1383          codepoint = (codepoint << 6) | (c & 0x3f);
1384        }
1385
1386        // Disallow overlong encodings for ascii characters, as these
1387        // would include " and other characters significant to JSON
1388        // string termination / control.
1389        if (codepoint < 0x7f)
1390          continue;
1391        // Invalid in UTF8, and can't be represented in UTF16 anyway.
1392        if (codepoint > 0x10ffff)
1393          continue;
1394
1395        // So, now we transcode to UTF16,
1396        // using the math described at https://en.wikipedia.org/wiki/UTF-16,
1397        // for either one or two 16 bit characters.
1398        if (codepoint < 0xffff) {
1399          Emit("\\u");
1400          PrintHex(static_cast<uint16_t>(codepoint), out_);
1401          continue;
1402        }
1403        codepoint -= 0x10000;
1404        // high surrogate
1405        Emit("\\u");
1406        PrintHex(static_cast<uint16_t>((codepoint >> 10) + 0xd800), out_);
1407        // low surrogate
1408        Emit("\\u");
1409        PrintHex(static_cast<uint16_t>((codepoint & 0x3ff) + 0xdc00), out_);
1410      }
1411    }
1412    Emit('"');
1413  }
1414
1415  void HandleBinary(span<uint8_t> bytes) override {
1416    if (!status_->ok())
1417      return;
1418    state_.top().StartElement(out_);
1419    Emit('"');
1420    Base64Encode(bytes, out_);
1421    Emit('"');
1422  }
1423
1424  void HandleDouble(double value) override {
1425    if (!status_->ok())
1426      return;
1427    state_.top().StartElement(out_);
1428    // JSON cannot represent NaN or Infinity. So, for compatibility,
1429    // we behave like the JSON object in web browsers: emit 'null'.
1430    if (!std::isfinite(value)) {
1431      Emit("null");
1432      return;
1433    }
1434    std::unique_ptr<char[]> str_value = platform_->DToStr(value);
1435
1436    // DToStr may fail to emit a 0 before the decimal dot. E.g. this is
1437    // the case in base::NumberToString in Chromium (which is based on
1438    // dmg_fp). So, much like
1439    // https://cs.chromium.org/chromium/src/base/json/json_writer.cc
1440    // we probe for this and emit the leading 0 anyway if necessary.
1441    const char* chars = str_value.get();
1442    if (chars[0] == '.') {
1443      Emit('0');
1444    } else if (chars[0] == '-' && chars[1] == '.') {
1445      Emit("-0");
1446      ++chars;
1447    }
1448    Emit(chars);
1449  }
1450
1451  void HandleInt32(int32_t value) override {
1452    if (!status_->ok())
1453      return;
1454    state_.top().StartElement(out_);
1455    Emit(std::to_string(value));
1456  }
1457
1458  void HandleBool(bool value) override {
1459    if (!status_->ok())
1460      return;
1461    state_.top().StartElement(out_);
1462    Emit(value ? "true" : "false");
1463  }
1464
1465  void HandleNull() override {
1466    if (!status_->ok())
1467      return;
1468    state_.top().StartElement(out_);
1469    Emit("null");
1470  }
1471
1472  void HandleError(Status error) override {
1473    assert(!error.ok());
1474    *status_ = error;
1475    out_->clear();
1476  }
1477
1478 private:
1479  void Emit(char c) { out_->push_back(c); }
1480  void Emit(const char* str) {
1481    out_->insert(out_->end(), str, str + strlen(str));
1482  }
1483  void Emit(const std::string& str) {
1484    out_->insert(out_->end(), str.begin(), str.end());
1485  }
1486
1487  const Platform* platform_;
1488  C* out_;
1489  Status* status_;
1490  std::stack<State> state_;
1491};
1492}  // namespace
1493
1494std::unique_ptr<StreamingParserHandler> NewJSONEncoder(
1495    const Platform* platform,
1496    std::vector<uint8_t>* out,
1497    Status* status) {
1498  return std::unique_ptr<StreamingParserHandler>(
1499      new JSONEncoder<std::vector<uint8_t>>(platform, out, status));
1500}
1501std::unique_ptr<StreamingParserHandler> NewJSONEncoder(const Platform* platform,
1502                                                       std::string* out,
1503                                                       Status* status) {
1504  return std::unique_ptr<StreamingParserHandler>(
1505      new JSONEncoder<std::string>(platform, out, status));
1506}
1507
1508// =============================================================================
1509// json::ParseJSON - for receiving streaming parser events for JSON.
1510// =============================================================================
1511
1512namespace {
1513const int kStackLimit = 300;
1514
1515enum Token {
1516  ObjectBegin,
1517  ObjectEnd,
1518  ArrayBegin,
1519  ArrayEnd,
1520  StringLiteral,
1521  Number,
1522  BoolTrue,
1523  BoolFalse,
1524  NullToken,
1525  ListSeparator,
1526  ObjectPairSeparator,
1527  InvalidToken,
1528  NoInput
1529};
1530
1531const char* const kNullString = "null";
1532const char* const kTrueString = "true";
1533const char* const kFalseString = "false";
1534
1535template <typename Char>
1536class JsonParser {
1537 public:
1538  JsonParser(const Platform* platform, StreamingParserHandler* handler)
1539      : platform_(platform), handler_(handler) {}
1540
1541  void Parse(const Char* start, size_t length) {
1542    start_pos_ = start;
1543    const Char* end = start + length;
1544    const Char* tokenEnd = nullptr;
1545    ParseValue(start, end, &tokenEnd, 0);
1546    if (error_)
1547      return;
1548    if (tokenEnd != end) {
1549      HandleError(Error::JSON_PARSER_UNPROCESSED_INPUT_REMAINS, tokenEnd);
1550    }
1551  }
1552
1553 private:
1554  bool CharsToDouble(const uint16_t* chars, size_t length, double* result) {
1555    std::string buffer;
1556    buffer.reserve(length + 1);
1557    for (size_t ii = 0; ii < length; ++ii) {
1558      bool is_ascii = !(chars[ii] & ~0x7F);
1559      if (!is_ascii)
1560        return false;
1561      buffer.push_back(static_cast<char>(chars[ii]));
1562    }
1563    return platform_->StrToD(buffer.c_str(), result);
1564  }
1565
1566  bool CharsToDouble(const uint8_t* chars, size_t length, double* result) {
1567    std::string buffer(reinterpret_cast<const char*>(chars), length);
1568    return platform_->StrToD(buffer.c_str(), result);
1569  }
1570
1571  static bool ParseConstToken(const Char* start,
1572                              const Char* end,
1573                              const Char** token_end,
1574                              const char* token) {
1575    // |token| is \0 terminated, it's one of the constants at top of the file.
1576    while (start < end && *token != '\0' && *start++ == *token++) {
1577    }
1578    if (*token != '\0')
1579      return false;
1580    *token_end = start;
1581    return true;
1582  }
1583
1584  static bool ReadInt(const Char* start,
1585                      const Char* end,
1586                      const Char** token_end,
1587                      bool allow_leading_zeros) {
1588    if (start == end)
1589      return false;
1590    bool has_leading_zero = '0' == *start;
1591    int length = 0;
1592    while (start < end && '0' <= *start && *start <= '9') {
1593      ++start;
1594      ++length;
1595    }
1596    if (!length)
1597      return false;
1598    if (!allow_leading_zeros && length > 1 && has_leading_zero)
1599      return false;
1600    *token_end = start;
1601    return true;
1602  }
1603
1604  static bool ParseNumberToken(const Char* start,
1605                               const Char* end,
1606                               const Char** token_end) {
1607    // We just grab the number here. We validate the size in DecodeNumber.
1608    // According to RFC4627, a valid number is: [minus] int [frac] [exp]
1609    if (start == end)
1610      return false;
1611    Char c = *start;
1612    if ('-' == c)
1613      ++start;
1614
1615    if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/false))
1616      return false;
1617    if (start == end) {
1618      *token_end = start;
1619      return true;
1620    }
1621
1622    // Optional fraction part
1623    c = *start;
1624    if ('.' == c) {
1625      ++start;
1626      if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true))
1627        return false;
1628      if (start == end) {
1629        *token_end = start;
1630        return true;
1631      }
1632      c = *start;
1633    }
1634
1635    // Optional exponent part
1636    if ('e' == c || 'E' == c) {
1637      ++start;
1638      if (start == end)
1639        return false;
1640      c = *start;
1641      if ('-' == c || '+' == c) {
1642        ++start;
1643        if (start == end)
1644          return false;
1645      }
1646      if (!ReadInt(start, end, &start, /*allow_leading_zeros=*/true))
1647        return false;
1648    }
1649
1650    *token_end = start;
1651    return true;
1652  }
1653
1654  static bool ReadHexDigits(const Char* start,
1655                            const Char* end,
1656                            const Char** token_end,
1657                            int digits) {
1658    if (end - start < digits)
1659      return false;
1660    for (int i = 0; i < digits; ++i) {
1661      Char c = *start++;
1662      if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
1663            ('A' <= c && c <= 'F')))
1664        return false;
1665    }
1666    *token_end = start;
1667    return true;
1668  }
1669
1670  static bool ParseStringToken(const Char* start,
1671                               const Char* end,
1672                               const Char** token_end) {
1673    while (start < end) {
1674      Char c = *start++;
1675      if ('\\' == c) {
1676        if (start == end)
1677          return false;
1678        c = *start++;
1679        // Make sure the escaped char is valid.
1680        switch (c) {
1681          case 'x':
1682            if (!ReadHexDigits(start, end, &start, 2))
1683              return false;
1684            break;
1685          case 'u':
1686            if (!ReadHexDigits(start, end, &start, 4))
1687              return false;
1688            break;
1689          case '\\':
1690          case '/':
1691          case 'b':
1692          case 'f':
1693          case 'n':
1694          case 'r':
1695          case 't':
1696          case 'v':
1697          case '"':
1698            break;
1699          default:
1700            return false;
1701        }
1702      } else if ('"' == c) {
1703        *token_end = start;
1704        return true;
1705      }
1706    }
1707    return false;
1708  }
1709
1710  static bool SkipComment(const Char* start,
1711                          const Char* end,
1712                          const Char** comment_end) {
1713    if (start == end)
1714      return false;
1715
1716    if (*start != '/' || start + 1 >= end)
1717      return false;
1718    ++start;
1719
1720    if (*start == '/') {
1721      // Single line comment, read to newline.
1722      for (++start; start < end; ++start) {
1723        if (*start == '\n' || *start == '\r') {
1724          *comment_end = start + 1;
1725          return true;
1726        }
1727      }
1728      *comment_end = end;
1729      // Comment reaches end-of-input, which is fine.
1730      return true;
1731    }
1732
1733    if (*start == '*') {
1734      Char previous = '\0';
1735      // Block comment, read until end marker.
1736      for (++start; start < end; previous = *start++) {
1737        if (previous == '*' && *start == '/') {
1738          *comment_end = start + 1;
1739          return true;
1740        }
1741      }
1742      // Block comment must close before end-of-input.
1743      return false;
1744    }
1745
1746    return false;
1747  }
1748
1749  static bool IsSpaceOrNewLine(Char c) {
1750    // \v = vertial tab; \f = form feed page break.
1751    return c == ' ' || c == '\n' || c == '\v' || c == '\f' || c == '\r' ||
1752           c == '\t';
1753  }
1754
1755  static void SkipWhitespaceAndComments(const Char* start,
1756                                        const Char* end,
1757                                        const Char** whitespace_end) {
1758    while (start < end) {
1759      if (IsSpaceOrNewLine(*start)) {
1760        ++start;
1761      } else if (*start == '/') {
1762        const Char* comment_end = nullptr;
1763        if (!SkipComment(start, end, &comment_end))
1764          break;
1765        start = comment_end;
1766      } else {
1767        break;
1768      }
1769    }
1770    *whitespace_end = start;
1771  }
1772
1773  static Token ParseToken(const Char* start,
1774                          const Char* end,
1775                          const Char** tokenStart,
1776                          const Char** token_end) {
1777    SkipWhitespaceAndComments(start, end, tokenStart);
1778    start = *tokenStart;
1779
1780    if (start == end)
1781      return NoInput;
1782
1783    switch (*start) {
1784      case 'n':
1785        if (ParseConstToken(start, end, token_end, kNullString))
1786          return NullToken;
1787        break;
1788      case 't':
1789        if (ParseConstToken(start, end, token_end, kTrueString))
1790          return BoolTrue;
1791        break;
1792      case 'f':
1793        if (ParseConstToken(start, end, token_end, kFalseString))
1794          return BoolFalse;
1795        break;
1796      case '[':
1797        *token_end = start + 1;
1798        return ArrayBegin;
1799      case ']':
1800        *token_end = start + 1;
1801        return ArrayEnd;
1802      case ',':
1803        *token_end = start + 1;
1804        return ListSeparator;
1805      case '{':
1806        *token_end = start + 1;
1807        return ObjectBegin;
1808      case '}':
1809        *token_end = start + 1;
1810        return ObjectEnd;
1811      case ':':
1812        *token_end = start + 1;
1813        return ObjectPairSeparator;
1814      case '0':
1815      case '1':
1816      case '2':
1817      case '3':
1818      case '4':
1819      case '5':
1820      case '6':
1821      case '7':
1822      case '8':
1823      case '9':
1824      case '-':
1825        if (ParseNumberToken(start, end, token_end))
1826          return Number;
1827        break;
1828      case '"':
1829        if (ParseStringToken(start + 1, end, token_end))
1830          return StringLiteral;
1831        break;
1832    }
1833    return InvalidToken;
1834  }
1835
1836  static int HexToInt(Char c) {
1837    if ('0' <= c && c <= '9')
1838      return c - '0';
1839    if ('A' <= c && c <= 'F')
1840      return c - 'A' + 10;
1841    if ('a' <= c && c <= 'f')
1842      return c - 'a' + 10;
1843    assert(false);  // Unreachable.
1844    return 0;
1845  }
1846
1847  static bool DecodeString(const Char* start,
1848                           const Char* end,
1849                           std::vector<uint16_t>* output) {
1850    if (start == end)
1851      return true;
1852    if (start > end)
1853      return false;
1854    output->reserve(end - start);
1855    while (start < end) {
1856      uint16_t c = *start++;
1857      // If the |Char| we're dealing with is really a byte, then
1858      // we have utf8 here, and we need to check for multibyte characters
1859      // and transcode them to utf16 (either one or two utf16 chars).
1860      if (sizeof(Char) == sizeof(uint8_t) && c > 0x7f) {
1861        // Inspect the leading byte to figure out how long the utf8
1862        // byte sequence is; while doing this initialize |codepoint|
1863        // with the first few bits.
1864        // See table in: https://en.wikipedia.org/wiki/UTF-8
1865        // byte one is 110x xxxx -> 2 byte utf8 sequence
1866        // byte one is 1110 xxxx -> 3 byte utf8 sequence
1867        // byte one is 1111 0xxx -> 4 byte utf8 sequence
1868        uint32_t codepoint;
1869        int num_bytes_left;
1870        if ((c & 0xe0) == 0xc0) {  // 2 byte utf8 sequence
1871          num_bytes_left = 1;
1872          codepoint = c & 0x1f;
1873        } else if ((c & 0xf0) == 0xe0) {  // 3 byte utf8 sequence
1874          num_bytes_left = 2;
1875          codepoint = c & 0x0f;
1876        } else if ((c & 0xf8) == 0xf0) {  // 4 byte utf8 sequence
1877          codepoint = c & 0x07;
1878          num_bytes_left = 3;
1879        } else {
1880          return false;  // invalid leading byte
1881        }
1882
1883        // If we have enough bytes in our inpput, decode the remaining ones
1884        // belonging to this Unicode character into |codepoint|.
1885        if (start + num_bytes_left > end)
1886          return false;
1887        while (num_bytes_left > 0) {
1888          c = *start++;
1889          --num_bytes_left;
1890          // Check the next byte is a continuation byte, that is 10xx xxxx.
1891          if ((c & 0xc0) != 0x80)
1892            return false;
1893          codepoint = (codepoint << 6) | (c & 0x3f);
1894        }
1895
1896        // Disallow overlong encodings for ascii characters, as these
1897        // would include " and other characters significant to JSON
1898        // string termination / control.
1899        if (codepoint <= 0x7f)
1900          return false;
1901        // Invalid in UTF8, and can't be represented in UTF16 anyway.
1902        if (codepoint > 0x10ffff)
1903          return false;
1904
1905        // So, now we transcode to UTF16,
1906        // using the math described at https://en.wikipedia.org/wiki/UTF-16,
1907        // for either one or two 16 bit characters.
1908        if (codepoint < 0xffff) {
1909          output->push_back(codepoint);
1910          continue;
1911        }
1912        codepoint -= 0x10000;
1913        output->push_back((codepoint >> 10) + 0xd800);    // high surrogate
1914        output->push_back((codepoint & 0x3ff) + 0xdc00);  // low surrogate
1915        continue;
1916      }
1917      if ('\\' != c) {
1918        output->push_back(c);
1919        continue;
1920      }
1921      if (start == end)
1922        return false;
1923      c = *start++;
1924
1925      if (c == 'x') {
1926        // \x is not supported.
1927        return false;
1928      }
1929
1930      switch (c) {
1931        case '"':
1932        case '/':
1933        case '\\':
1934          break;
1935        case 'b':
1936          c = '\b';
1937          break;
1938        case 'f':
1939          c = '\f';
1940          break;
1941        case 'n':
1942          c = '\n';
1943          break;
1944        case 'r':
1945          c = '\r';
1946          break;
1947        case 't':
1948          c = '\t';
1949          break;
1950        case 'v':
1951          c = '\v';
1952          break;
1953        case 'u':
1954          c = (HexToInt(*start) << 12) + (HexToInt(*(start + 1)) << 8) +
1955              (HexToInt(*(start + 2)) << 4) + HexToInt(*(start + 3));
1956          start += 4;
1957          break;
1958        default:
1959          return false;
1960      }
1961      output->push_back(c);
1962    }
1963    return true;
1964  }
1965
1966  void ParseValue(const Char* start,
1967                  const Char* end,
1968                  const Char** value_token_end,
1969                  int depth) {
1970    if (depth > kStackLimit) {
1971      HandleError(Error::JSON_PARSER_STACK_LIMIT_EXCEEDED, start);
1972      return;
1973    }
1974    const Char* token_start = nullptr;
1975    const Char* token_end = nullptr;
1976    Token token = ParseToken(start, end, &token_start, &token_end);
1977    switch (token) {
1978      case NoInput:
1979        HandleError(Error::JSON_PARSER_NO_INPUT, token_start);
1980        return;
1981      case InvalidToken:
1982        HandleError(Error::JSON_PARSER_INVALID_TOKEN, token_start);
1983        return;
1984      case NullToken:
1985        handler_->HandleNull();
1986        break;
1987      case BoolTrue:
1988        handler_->HandleBool(true);
1989        break;
1990      case BoolFalse:
1991        handler_->HandleBool(false);
1992        break;
1993      case Number: {
1994        double value;
1995        if (!CharsToDouble(token_start, token_end - token_start, &value)) {
1996          HandleError(Error::JSON_PARSER_INVALID_NUMBER, token_start);
1997          return;
1998        }
1999        if (value >= std::numeric_limits<int32_t>::min() &&
2000            value <= std::numeric_limits<int32_t>::max() &&
2001            static_cast<int32_t>(value) == value)
2002          handler_->HandleInt32(static_cast<int32_t>(value));
2003        else
2004          handler_->HandleDouble(value);
2005        break;
2006      }
2007      case StringLiteral: {
2008        std::vector<uint16_t> value;
2009        bool ok = DecodeString(token_start + 1, token_end - 1, &value);
2010        if (!ok) {
2011          HandleError(Error::JSON_PARSER_INVALID_STRING, token_start);
2012          return;
2013        }
2014        handler_->HandleString16(span<uint16_t>(value.data(), value.size()));
2015        break;
2016      }
2017      case ArrayBegin: {
2018        handler_->HandleArrayBegin();
2019        start = token_end;
2020        token = ParseToken(start, end, &token_start, &token_end);
2021        while (token != ArrayEnd) {
2022          ParseValue(start, end, &token_end, depth + 1);
2023          if (error_)
2024            return;
2025
2026          // After a list value, we expect a comma or the end of the list.
2027          start = token_end;
2028          token = ParseToken(start, end, &token_start, &token_end);
2029          if (token == ListSeparator) {
2030            start = token_end;
2031            token = ParseToken(start, end, &token_start, &token_end);
2032            if (token == ArrayEnd) {
2033              HandleError(Error::JSON_PARSER_UNEXPECTED_ARRAY_END, token_start);
2034              return;
2035            }
2036          } else if (token != ArrayEnd) {
2037            // Unexpected value after list value. Bail out.
2038            HandleError(Error::JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED,
2039                        token_start);
2040            return;
2041          }
2042        }
2043        handler_->HandleArrayEnd();
2044        break;
2045      }
2046      case ObjectBegin: {
2047        handler_->HandleMapBegin();
2048        start = token_end;
2049        token = ParseToken(start, end, &token_start, &token_end);
2050        while (token != ObjectEnd) {
2051          if (token != StringLiteral) {
2052            HandleError(Error::JSON_PARSER_STRING_LITERAL_EXPECTED,
2053                        token_start);
2054            return;
2055          }
2056          std::vector<uint16_t> key;
2057          if (!DecodeString(token_start + 1, token_end - 1, &key)) {
2058            HandleError(Error::JSON_PARSER_INVALID_STRING, token_start);
2059            return;
2060          }
2061          handler_->HandleString16(span<uint16_t>(key.data(), key.size()));
2062          start = token_end;
2063
2064          token = ParseToken(start, end, &token_start, &token_end);
2065          if (token != ObjectPairSeparator) {
2066            HandleError(Error::JSON_PARSER_COLON_EXPECTED, token_start);
2067            return;
2068          }
2069          start = token_end;
2070
2071          ParseValue(start, end, &token_end, depth + 1);
2072          if (error_)
2073            return;
2074          start = token_end;
2075
2076          // After a key/value pair, we expect a comma or the end of the
2077          // object.
2078          token = ParseToken(start, end, &token_start, &token_end);
2079          if (token == ListSeparator) {
2080            start = token_end;
2081            token = ParseToken(start, end, &token_start, &token_end);
2082            if (token == ObjectEnd) {
2083              HandleError(Error::JSON_PARSER_UNEXPECTED_MAP_END, token_start);
2084              return;
2085            }
2086          } else if (token != ObjectEnd) {
2087            // Unexpected value after last object value. Bail out.
2088            HandleError(Error::JSON_PARSER_COMMA_OR_MAP_END_EXPECTED,
2089                        token_start);
2090            return;
2091          }
2092        }
2093        handler_->HandleMapEnd();
2094        break;
2095      }
2096
2097      default:
2098        // We got a token that's not a value.
2099        HandleError(Error::JSON_PARSER_VALUE_EXPECTED, token_start);
2100        return;
2101    }
2102
2103    SkipWhitespaceAndComments(token_end, end, value_token_end);
2104  }
2105
2106  void HandleError(Error error, const Char* pos) {
2107    assert(error != Error::OK);
2108    if (!error_) {
2109      handler_->HandleError(
2110          Status{error, static_cast<size_t>(pos - start_pos_)});
2111      error_ = true;
2112    }
2113  }
2114
2115  const Char* start_pos_ = nullptr;
2116  bool error_ = false;
2117  const Platform* platform_;
2118  StreamingParserHandler* handler_;
2119};
2120}  // namespace
2121
2122void ParseJSON(const Platform& platform,
2123               span<uint8_t> chars,
2124               StreamingParserHandler* handler) {
2125  JsonParser<uint8_t> parser(&platform, handler);
2126  parser.Parse(chars.data(), chars.size());
2127}
2128
2129void ParseJSON(const Platform& platform,
2130               span<uint16_t> chars,
2131               StreamingParserHandler* handler) {
2132  JsonParser<uint16_t> parser(&platform, handler);
2133  parser.Parse(chars.data(), chars.size());
2134}
2135
2136// =============================================================================
2137// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding
2138// =============================================================================
2139template <typename C>
2140Status ConvertCBORToJSONTmpl(const Platform& platform,
2141                             span<uint8_t> cbor,
2142                             C* json) {
2143  Status status;
2144  std::unique_ptr<StreamingParserHandler> json_writer =
2145      NewJSONEncoder(&platform, json, &status);
2146  cbor::ParseCBOR(cbor, json_writer.get());
2147  return status;
2148}
2149
2150Status ConvertCBORToJSON(const Platform& platform,
2151                         span<uint8_t> cbor,
2152                         std::vector<uint8_t>* json) {
2153  return ConvertCBORToJSONTmpl(platform, cbor, json);
2154}
2155Status ConvertCBORToJSON(const Platform& platform,
2156                         span<uint8_t> cbor,
2157                         std::string* json) {
2158  return ConvertCBORToJSONTmpl(platform, cbor, json);
2159}
2160
2161template <typename T, typename C>
2162Status ConvertJSONToCBORTmpl(const Platform& platform, span<T> json, C* cbor) {
2163  Status status;
2164  std::unique_ptr<StreamingParserHandler> encoder =
2165      cbor::NewCBOREncoder(cbor, &status);
2166  ParseJSON(platform, json, encoder.get());
2167  return status;
2168}
2169Status ConvertJSONToCBOR(const Platform& platform,
2170                         span<uint8_t> json,
2171                         std::string* cbor) {
2172  return ConvertJSONToCBORTmpl(platform, json, cbor);
2173}
2174Status ConvertJSONToCBOR(const Platform& platform,
2175                         span<uint16_t> json,
2176                         std::string* cbor) {
2177  return ConvertJSONToCBORTmpl(platform, json, cbor);
2178}
2179Status ConvertJSONToCBOR(const Platform& platform,
2180                         span<uint8_t> json,
2181                         std::vector<uint8_t>* cbor) {
2182  return ConvertJSONToCBORTmpl(platform, json, cbor);
2183}
2184Status ConvertJSONToCBOR(const Platform& platform,
2185                         span<uint16_t> json,
2186                         std::vector<uint8_t>* cbor) {
2187  return ConvertJSONToCBORTmpl(platform, json, cbor);
2188}
2189}  // namespace json
2190}  // namespace v8_inspector_protocol_encoding
2191