1// Copyright 2019 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_
6#define V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_
7
8#include <cstddef>
9#include <cstdint>
10#include <cstring>
11#include <limits>
12#include <memory>
13#include <string>
14#include <vector>
15
16namespace v8_inspector_protocol_encoding {
17
18// =============================================================================
19// span - sequence of bytes
20// =============================================================================
21
22// This template is similar to std::span, which will be included in C++20.
23template <typename T>
24class span {
25 public:
26  using index_type = size_t;
27
28  span() : data_(nullptr), size_(0) {}
29  span(const T* data, index_type size) : data_(data), size_(size) {}
30
31  const T* data() const { return data_; }
32
33  const T* begin() const { return data_; }
34  const T* end() const { return data_ + size_; }
35
36  const T& operator[](index_type idx) const { return data_[idx]; }
37
38  span<T> subspan(index_type offset, index_type count) const {
39    return span(data_ + offset, count);
40  }
41
42  span<T> subspan(index_type offset) const {
43    return span(data_ + offset, size_ - offset);
44  }
45
46  bool empty() const { return size_ == 0; }
47
48  index_type size() const { return size_; }
49  index_type size_bytes() const { return size_ * sizeof(T); }
50
51 private:
52  const T* data_;
53  index_type size_;
54};
55
56template <typename T>
57span<T> SpanFrom(const std::vector<T>& v) {
58  return span<T>(v.data(), v.size());
59}
60
61template <size_t N>
62span<uint8_t> SpanFrom(const char (&str)[N]) {
63  return span<uint8_t>(reinterpret_cast<const uint8_t*>(str), N - 1);
64}
65
66inline span<uint8_t> SpanFrom(const char* str) {
67  return str ? span<uint8_t>(reinterpret_cast<const uint8_t*>(str), strlen(str))
68             : span<uint8_t>();
69}
70
71inline span<uint8_t> SpanFrom(const std::string& v) {
72  return span<uint8_t>(reinterpret_cast<const uint8_t*>(v.data()), v.size());
73}
74
75// =============================================================================
76// Status and Error codes
77// =============================================================================
78enum class Error {
79  OK = 0,
80  // JSON parsing errors - json_parser.{h,cc}.
81  JSON_PARSER_UNPROCESSED_INPUT_REMAINS = 0x01,
82  JSON_PARSER_STACK_LIMIT_EXCEEDED = 0x02,
83  JSON_PARSER_NO_INPUT = 0x03,
84  JSON_PARSER_INVALID_TOKEN = 0x04,
85  JSON_PARSER_INVALID_NUMBER = 0x05,
86  JSON_PARSER_INVALID_STRING = 0x06,
87  JSON_PARSER_UNEXPECTED_ARRAY_END = 0x07,
88  JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED = 0x08,
89  JSON_PARSER_STRING_LITERAL_EXPECTED = 0x09,
90  JSON_PARSER_COLON_EXPECTED = 0x0a,
91  JSON_PARSER_UNEXPECTED_MAP_END = 0x0b,
92  JSON_PARSER_COMMA_OR_MAP_END_EXPECTED = 0x0c,
93  JSON_PARSER_VALUE_EXPECTED = 0x0d,
94
95  CBOR_INVALID_INT32 = 0x0e,
96  CBOR_INVALID_DOUBLE = 0x0f,
97  CBOR_INVALID_ENVELOPE = 0x10,
98  CBOR_INVALID_STRING8 = 0x11,
99  CBOR_INVALID_STRING16 = 0x12,
100  CBOR_INVALID_BINARY = 0x13,
101  CBOR_UNSUPPORTED_VALUE = 0x14,
102  CBOR_NO_INPUT = 0x15,
103  CBOR_INVALID_START_BYTE = 0x16,
104  CBOR_UNEXPECTED_EOF_EXPECTED_VALUE = 0x17,
105  CBOR_UNEXPECTED_EOF_IN_ARRAY = 0x18,
106  CBOR_UNEXPECTED_EOF_IN_MAP = 0x19,
107  CBOR_INVALID_MAP_KEY = 0x1a,
108  CBOR_STACK_LIMIT_EXCEEDED = 0x1b,
109  CBOR_TRAILING_JUNK = 0x1c,
110  CBOR_MAP_START_EXPECTED = 0x1d,
111  CBOR_MAP_STOP_EXPECTED = 0x1e,
112  CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED = 0x1f,
113};
114
115// A status value with position that can be copied. The default status
116// is OK. Usually, error status values should come with a valid position.
117struct Status {
118  static constexpr size_t npos() { return std::numeric_limits<size_t>::max(); }
119
120  bool ok() const { return error == Error::OK; }
121
122  Error error = Error::OK;
123  size_t pos = npos();
124  Status(Error error, size_t pos) : error(error), pos(pos) {}
125  Status() = default;
126
127  // Returns a 7 bit US-ASCII string, either "OK" or an error message
128  // that includes the position.
129  std::string ToASCIIString() const;
130
131 private:
132  std::string ToASCIIString(const char* msg) const;
133};
134
135// Handler interface for parser events emitted by a streaming parser.
136// See cbor::NewCBOREncoder, cbor::ParseCBOR, json::NewJSONEncoder,
137// json::ParseJSON.
138class StreamingParserHandler {
139 public:
140  virtual ~StreamingParserHandler() = default;
141  virtual void HandleMapBegin() = 0;
142  virtual void HandleMapEnd() = 0;
143  virtual void HandleArrayBegin() = 0;
144  virtual void HandleArrayEnd() = 0;
145  virtual void HandleString8(span<uint8_t> chars) = 0;
146  virtual void HandleString16(span<uint16_t> chars) = 0;
147  virtual void HandleBinary(span<uint8_t> bytes) = 0;
148  virtual void HandleDouble(double value) = 0;
149  virtual void HandleInt32(int32_t value) = 0;
150  virtual void HandleBool(bool value) = 0;
151  virtual void HandleNull() = 0;
152
153  // The parser may send one error even after other events have already
154  // been received. Client code is reponsible to then discard the
155  // already processed events.
156  // |error| must be an eror, as in, |error.is_ok()| can't be true.
157  virtual void HandleError(Status error) = 0;
158};
159
160namespace cbor {
161// The binary encoding for the inspector protocol follows the CBOR specification
162// (RFC 7049). Additional constraints:
163// - Only indefinite length maps and arrays are supported.
164// - Maps and arrays are wrapped with an envelope, that is, a
165//   CBOR tag with value 24 followed by a byte string specifying
166//   the byte length of the enclosed map / array. The byte string
167//   must use a 32 bit wide length.
168// - At the top level, a message must be an indefinite length map
169//   wrapped by an envelope.
170// - Maximal size for messages is 2^32 (4 GiB).
171// - For scalars, we support only the int32_t range, encoded as
172//   UNSIGNED/NEGATIVE (major types 0 / 1).
173// - UTF16 strings, including with unbalanced surrogate pairs, are encoded
174//   as CBOR BYTE_STRING (major type 2). For such strings, the number of
175//   bytes encoded must be even.
176// - UTF8 strings (major type 3) are supported.
177// - 7 bit US-ASCII strings must always be encoded as UTF8 strings, never
178//   as UTF16 strings.
179// - Arbitrary byte arrays, in the inspector protocol called 'binary',
180//   are encoded as BYTE_STRING (major type 2), prefixed with a byte
181//   indicating base64 when rendered as JSON.
182
183// =============================================================================
184// Detecting CBOR content
185// =============================================================================
186
187// The first byte for an envelope, which we use for wrapping dictionaries
188// and arrays; and the byte that indicates a byte string with 32 bit length.
189// These two bytes start an envelope, and thereby also any CBOR message
190// produced or consumed by this protocol. See also |EnvelopeEncoder| below.
191uint8_t InitialByteForEnvelope();
192uint8_t InitialByteFor32BitLengthByteString();
193
194// Checks whether |msg| is a cbor message.
195bool IsCBORMessage(span<uint8_t> msg);
196
197// =============================================================================
198// Encoding individual CBOR items
199// =============================================================================
200
201// Some constants for CBOR tokens that only take a single byte on the wire.
202uint8_t EncodeTrue();
203uint8_t EncodeFalse();
204uint8_t EncodeNull();
205uint8_t EncodeIndefiniteLengthArrayStart();
206uint8_t EncodeIndefiniteLengthMapStart();
207uint8_t EncodeStop();
208
209// Encodes |value| as |UNSIGNED| (major type 0) iff >= 0, or |NEGATIVE|
210// (major type 1) iff < 0.
211void EncodeInt32(int32_t value, std::vector<uint8_t>* out);
212void EncodeInt32(int32_t value, std::string* out);
213
214// Encodes a UTF16 string as a BYTE_STRING (major type 2). Each utf16
215// character in |in| is emitted with most significant byte first,
216// appending to |out|.
217void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out);
218void EncodeString16(span<uint16_t> in, std::string* out);
219
220// Encodes a UTF8 string |in| as STRING (major type 3).
221void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out);
222void EncodeString8(span<uint8_t> in, std::string* out);
223
224// Encodes the given |latin1| string as STRING8.
225// If any non-ASCII character is present, it will be represented
226// as a 2 byte UTF8 sequence.
227void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out);
228void EncodeFromLatin1(span<uint8_t> latin1, std::string* out);
229
230// Encodes the given |utf16| string as STRING8 if it's entirely US-ASCII.
231// Otherwise, encodes as STRING16.
232void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out);
233void EncodeFromUTF16(span<uint16_t> utf16, std::string* out);
234
235// Encodes arbitrary binary data in |in| as a BYTE_STRING (major type 2) with
236// definitive length, prefixed with tag 22 indicating expected conversion to
237// base64 (see RFC 7049, Table 3 and Section 2.4.4.2).
238void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out);
239void EncodeBinary(span<uint8_t> in, std::string* out);
240
241// Encodes / decodes a double as Major type 7 (SIMPLE_VALUE),
242// with additional info = 27, followed by 8 bytes in big endian.
243void EncodeDouble(double value, std::vector<uint8_t>* out);
244void EncodeDouble(double value, std::string* out);
245
246// =============================================================================
247// cbor::EnvelopeEncoder - for wrapping submessages
248// =============================================================================
249
250// An envelope indicates the byte length of a wrapped item.
251// We use this for maps and array, which allows the decoder
252// to skip such (nested) values whole sale.
253// It's implemented as a CBOR tag (major type 6) with additional
254// info = 24, followed by a byte string with a 32 bit length value;
255// so the maximal structure that we can wrap is 2^32 bits long.
256// See also: https://tools.ietf.org/html/rfc7049#section-2.4.4.1
257class EnvelopeEncoder {
258 public:
259  // Emits the envelope start bytes and records the position for the
260  // byte size in |byte_size_pos_|. Also emits empty bytes for the
261  // byte sisze so that encoding can continue.
262  void EncodeStart(std::vector<uint8_t>* out);
263  void EncodeStart(std::string* out);
264  // This records the current size in |out| at position byte_size_pos_.
265  // Returns true iff successful.
266  bool EncodeStop(std::vector<uint8_t>* out);
267  bool EncodeStop(std::string* out);
268
269 private:
270  size_t byte_size_pos_ = 0;
271};
272
273// =============================================================================
274// cbor::NewCBOREncoder - for encoding from a streaming parser
275// =============================================================================
276
277// This can be used to convert to CBOR, by passing the return value to a parser
278// that drives it. The handler will encode into |out|, and iff an error occurs
279// it will set |status| to an error and clear |out|. Otherwise, |status.ok()|
280// will be |true|.
281std::unique_ptr<StreamingParserHandler> NewCBOREncoder(
282    std::vector<uint8_t>* out,
283    Status* status);
284std::unique_ptr<StreamingParserHandler> NewCBOREncoder(std::string* out,
285                                                       Status* status);
286
287// =============================================================================
288// cbor::CBORTokenizer - for parsing individual CBOR items
289// =============================================================================
290
291// Tags for the tokens within a CBOR message that CBORTokenizer understands.
292// Note that this is not the same terminology as the CBOR spec (RFC 7049),
293// but rather, our adaptation. For instance, we lump unsigned and signed
294// major type into INT32 here (and disallow values outside the int32_t range).
295enum class CBORTokenTag {
296  // Encountered an error in the structure of the message. Consult
297  // status() for details.
298  ERROR_VALUE,
299  // Booleans and NULL.
300  TRUE_VALUE,
301  FALSE_VALUE,
302  NULL_VALUE,
303  // An int32_t (signed 32 bit integer).
304  INT32,
305  // A double (64 bit floating point).
306  DOUBLE,
307  // A UTF8 string.
308  STRING8,
309  // A UTF16 string.
310  STRING16,
311  // A binary string.
312  BINARY,
313  // Starts an indefinite length map; after the map start we expect
314  // alternating keys and values, followed by STOP.
315  MAP_START,
316  // Starts an indefinite length array; after the array start we
317  // expect values, followed by STOP.
318  ARRAY_START,
319  // Ends a map or an array.
320  STOP,
321  // An envelope indicator, wrapping a map or array.
322  // Internally this carries the byte length of the wrapped
323  // map or array. While CBORTokenizer::Next() will read / skip the entire
324  // envelope, CBORTokenizer::EnterEnvelope() reads the tokens
325  // inside of it.
326  ENVELOPE,
327  // We've reached the end there is nothing else to read.
328  DONE,
329};
330
331// The major types from RFC 7049 Section 2.1.
332enum class MajorType {
333  UNSIGNED = 0,
334  NEGATIVE = 1,
335  BYTE_STRING = 2,
336  STRING = 3,
337  ARRAY = 4,
338  MAP = 5,
339  TAG = 6,
340  SIMPLE_VALUE = 7
341};
342
343// CBORTokenizer segments a CBOR message, presenting the tokens therein as
344// numbers, strings, etc. This is not a complete CBOR parser, but makes it much
345// easier to implement one (e.g. ParseCBOR, above). It can also be used to parse
346// messages partially.
347class CBORTokenizer {
348 public:
349  explicit CBORTokenizer(span<uint8_t> bytes);
350  ~CBORTokenizer();
351
352  // Identifies the current token that we're looking at,
353  // or ERROR_VALUE (in which ase ::Status() has details)
354  // or DONE (if we're past the last token).
355  CBORTokenTag TokenTag() const;
356
357  // Advances to the next token.
358  void Next();
359  // Can only be called if TokenTag() == CBORTokenTag::ENVELOPE.
360  // While Next() would skip past the entire envelope / what it's
361  // wrapping, EnterEnvelope positions the cursor inside of the envelope,
362  // letting the client explore the nested structure.
363  void EnterEnvelope();
364
365  // If TokenTag() is CBORTokenTag::ERROR_VALUE, then Status().error describes
366  // the error more precisely; otherwise it'll be set to Error::OK.
367  // In either case, Status().pos is the current position.
368  struct Status Status() const;
369
370  // The following methods retrieve the token values. They can only
371  // be called if TokenTag() matches.
372
373  // To be called only if ::TokenTag() == CBORTokenTag::INT32.
374  int32_t GetInt32() const;
375
376  // To be called only if ::TokenTag() == CBORTokenTag::DOUBLE.
377  double GetDouble() const;
378
379  // To be called only if ::TokenTag() == CBORTokenTag::STRING8.
380  span<uint8_t> GetString8() const;
381
382  // Wire representation for STRING16 is low byte first (little endian).
383  // To be called only if ::TokenTag() == CBORTokenTag::STRING16.
384  span<uint8_t> GetString16WireRep() const;
385
386  // To be called only if ::TokenTag() == CBORTokenTag::BINARY.
387  span<uint8_t> GetBinary() const;
388
389  // To be called only if ::TokenTag() == CBORTokenTag::ENVELOPE.
390  span<uint8_t> GetEnvelopeContents() const;
391
392 private:
393  void ReadNextToken(bool enter_envelope);
394  void SetToken(CBORTokenTag token, size_t token_byte_length);
395  void SetError(Error error);
396
397  span<uint8_t> bytes_;
398  CBORTokenTag token_tag_;
399  struct Status status_;
400  size_t token_byte_length_;
401  MajorType token_start_type_;
402  uint64_t token_start_internal_value_;
403};
404
405// =============================================================================
406// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages
407// =============================================================================
408
409// Parses a CBOR encoded message from |bytes|, sending events to
410// |out|. If an error occurs, sends |out->HandleError|, and parsing stops.
411// The client is responsible for discarding the already received information in
412// that case.
413void ParseCBOR(span<uint8_t> bytes, StreamingParserHandler* out);
414
415// =============================================================================
416// cbor::AppendString8EntryToMap - for limited in-place editing of messages
417// =============================================================================
418
419// Modifies the |cbor| message by appending a new key/value entry at the end
420// of the map. Patches up the envelope size; Status.ok() iff successful.
421// If not successful, |cbor| may be corrupted after this call.
422Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
423                                   span<uint8_t> string8_value,
424                                   std::vector<uint8_t>* cbor);
425Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
426                                   span<uint8_t> string8_value,
427                                   std::string* cbor);
428
429namespace internals {  // Exposed only for writing tests.
430size_t ReadTokenStart(span<uint8_t> bytes,
431                      cbor::MajorType* type,
432                      uint64_t* value);
433
434void WriteTokenStart(cbor::MajorType type,
435                     uint64_t value,
436                     std::vector<uint8_t>* encoded);
437void WriteTokenStart(cbor::MajorType type,
438                     uint64_t value,
439                     std::string* encoded);
440}  // namespace internals
441}  // namespace cbor
442
443namespace json {
444// Client code must provide an instance. Implementation should delegate
445// to whatever is appropriate.
446class Platform {
447 public:
448  virtual ~Platform() = default;
449  // Parses |str| into |result|. Returns false iff there are
450  // leftover characters or parsing errors.
451  virtual bool StrToD(const char* str, double* result) const = 0;
452
453  // Prints |value| in a format suitable for JSON.
454  virtual std::unique_ptr<char[]> DToStr(double value) const = 0;
455};
456
457// =============================================================================
458// json::NewJSONEncoder - for encoding streaming parser events as JSON
459// =============================================================================
460
461// Returns a handler object which will write ascii characters to |out|.
462// |status->ok()| will be false iff the handler routine HandleError() is called.
463// In that case, we'll stop emitting output.
464// Except for calling the HandleError routine at any time, the client
465// code must call the Handle* methods in an order in which they'd occur
466// in valid JSON; otherwise we may crash (the code uses assert).
467std::unique_ptr<StreamingParserHandler> NewJSONEncoder(
468    const Platform* platform,
469    std::vector<uint8_t>* out,
470    Status* status);
471std::unique_ptr<StreamingParserHandler> NewJSONEncoder(const Platform* platform,
472                                                       std::string* out,
473                                                       Status* status);
474
475// =============================================================================
476// json::ParseJSON - for receiving streaming parser events for JSON
477// =============================================================================
478
479void ParseJSON(const Platform& platform,
480               span<uint8_t> chars,
481               StreamingParserHandler* handler);
482void ParseJSON(const Platform& platform,
483               span<uint16_t> chars,
484               StreamingParserHandler* handler);
485
486// =============================================================================
487// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding
488// =============================================================================
489Status ConvertCBORToJSON(const Platform& platform,
490                         span<uint8_t> cbor,
491                         std::string* json);
492Status ConvertCBORToJSON(const Platform& platform,
493                         span<uint8_t> cbor,
494                         std::vector<uint8_t>* json);
495Status ConvertJSONToCBOR(const Platform& platform,
496                         span<uint8_t> json,
497                         std::vector<uint8_t>* cbor);
498Status ConvertJSONToCBOR(const Platform& platform,
499                         span<uint16_t> json,
500                         std::vector<uint8_t>* cbor);
501Status ConvertJSONToCBOR(const Platform& platform,
502                         span<uint8_t> json,
503                         std::string* cbor);
504Status ConvertJSONToCBOR(const Platform& platform,
505                         span<uint16_t> json,
506                         std::string* cbor);
507}  // namespace json
508}  // namespace v8_inspector_protocol_encoding
509
510#endif  // V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_
511