1// Copyright 2019 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_ 6#define V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_ 7 8#include <cstddef> 9#include <cstdint> 10#include <cstring> 11#include <limits> 12#include <memory> 13#include <string> 14#include <vector> 15 16namespace v8_inspector_protocol_encoding { 17 18// ============================================================================= 19// span - sequence of bytes 20// ============================================================================= 21 22// This template is similar to std::span, which will be included in C++20. 23template <typename T> 24class span { 25 public: 26 using index_type = size_t; 27 28 span() : data_(nullptr), size_(0) {} 29 span(const T* data, index_type size) : data_(data), size_(size) {} 30 31 const T* data() const { return data_; } 32 33 const T* begin() const { return data_; } 34 const T* end() const { return data_ + size_; } 35 36 const T& operator[](index_type idx) const { return data_[idx]; } 37 38 span<T> subspan(index_type offset, index_type count) const { 39 return span(data_ + offset, count); 40 } 41 42 span<T> subspan(index_type offset) const { 43 return span(data_ + offset, size_ - offset); 44 } 45 46 bool empty() const { return size_ == 0; } 47 48 index_type size() const { return size_; } 49 index_type size_bytes() const { return size_ * sizeof(T); } 50 51 private: 52 const T* data_; 53 index_type size_; 54}; 55 56template <typename T> 57span<T> SpanFrom(const std::vector<T>& v) { 58 return span<T>(v.data(), v.size()); 59} 60 61template <size_t N> 62span<uint8_t> SpanFrom(const char (&str)[N]) { 63 return span<uint8_t>(reinterpret_cast<const uint8_t*>(str), N - 1); 64} 65 66inline span<uint8_t> SpanFrom(const char* str) { 67 return str ? span<uint8_t>(reinterpret_cast<const uint8_t*>(str), strlen(str)) 68 : span<uint8_t>(); 69} 70 71inline span<uint8_t> SpanFrom(const std::string& v) { 72 return span<uint8_t>(reinterpret_cast<const uint8_t*>(v.data()), v.size()); 73} 74 75// ============================================================================= 76// Status and Error codes 77// ============================================================================= 78enum class Error { 79 OK = 0, 80 // JSON parsing errors - json_parser.{h,cc}. 81 JSON_PARSER_UNPROCESSED_INPUT_REMAINS = 0x01, 82 JSON_PARSER_STACK_LIMIT_EXCEEDED = 0x02, 83 JSON_PARSER_NO_INPUT = 0x03, 84 JSON_PARSER_INVALID_TOKEN = 0x04, 85 JSON_PARSER_INVALID_NUMBER = 0x05, 86 JSON_PARSER_INVALID_STRING = 0x06, 87 JSON_PARSER_UNEXPECTED_ARRAY_END = 0x07, 88 JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED = 0x08, 89 JSON_PARSER_STRING_LITERAL_EXPECTED = 0x09, 90 JSON_PARSER_COLON_EXPECTED = 0x0a, 91 JSON_PARSER_UNEXPECTED_MAP_END = 0x0b, 92 JSON_PARSER_COMMA_OR_MAP_END_EXPECTED = 0x0c, 93 JSON_PARSER_VALUE_EXPECTED = 0x0d, 94 95 CBOR_INVALID_INT32 = 0x0e, 96 CBOR_INVALID_DOUBLE = 0x0f, 97 CBOR_INVALID_ENVELOPE = 0x10, 98 CBOR_INVALID_STRING8 = 0x11, 99 CBOR_INVALID_STRING16 = 0x12, 100 CBOR_INVALID_BINARY = 0x13, 101 CBOR_UNSUPPORTED_VALUE = 0x14, 102 CBOR_NO_INPUT = 0x15, 103 CBOR_INVALID_START_BYTE = 0x16, 104 CBOR_UNEXPECTED_EOF_EXPECTED_VALUE = 0x17, 105 CBOR_UNEXPECTED_EOF_IN_ARRAY = 0x18, 106 CBOR_UNEXPECTED_EOF_IN_MAP = 0x19, 107 CBOR_INVALID_MAP_KEY = 0x1a, 108 CBOR_STACK_LIMIT_EXCEEDED = 0x1b, 109 CBOR_TRAILING_JUNK = 0x1c, 110 CBOR_MAP_START_EXPECTED = 0x1d, 111 CBOR_MAP_STOP_EXPECTED = 0x1e, 112 CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED = 0x1f, 113}; 114 115// A status value with position that can be copied. The default status 116// is OK. Usually, error status values should come with a valid position. 117struct Status { 118 static constexpr size_t npos() { return std::numeric_limits<size_t>::max(); } 119 120 bool ok() const { return error == Error::OK; } 121 122 Error error = Error::OK; 123 size_t pos = npos(); 124 Status(Error error, size_t pos) : error(error), pos(pos) {} 125 Status() = default; 126 127 // Returns a 7 bit US-ASCII string, either "OK" or an error message 128 // that includes the position. 129 std::string ToASCIIString() const; 130 131 private: 132 std::string ToASCIIString(const char* msg) const; 133}; 134 135// Handler interface for parser events emitted by a streaming parser. 136// See cbor::NewCBOREncoder, cbor::ParseCBOR, json::NewJSONEncoder, 137// json::ParseJSON. 138class StreamingParserHandler { 139 public: 140 virtual ~StreamingParserHandler() = default; 141 virtual void HandleMapBegin() = 0; 142 virtual void HandleMapEnd() = 0; 143 virtual void HandleArrayBegin() = 0; 144 virtual void HandleArrayEnd() = 0; 145 virtual void HandleString8(span<uint8_t> chars) = 0; 146 virtual void HandleString16(span<uint16_t> chars) = 0; 147 virtual void HandleBinary(span<uint8_t> bytes) = 0; 148 virtual void HandleDouble(double value) = 0; 149 virtual void HandleInt32(int32_t value) = 0; 150 virtual void HandleBool(bool value) = 0; 151 virtual void HandleNull() = 0; 152 153 // The parser may send one error even after other events have already 154 // been received. Client code is reponsible to then discard the 155 // already processed events. 156 // |error| must be an eror, as in, |error.is_ok()| can't be true. 157 virtual void HandleError(Status error) = 0; 158}; 159 160namespace cbor { 161// The binary encoding for the inspector protocol follows the CBOR specification 162// (RFC 7049). Additional constraints: 163// - Only indefinite length maps and arrays are supported. 164// - Maps and arrays are wrapped with an envelope, that is, a 165// CBOR tag with value 24 followed by a byte string specifying 166// the byte length of the enclosed map / array. The byte string 167// must use a 32 bit wide length. 168// - At the top level, a message must be an indefinite length map 169// wrapped by an envelope. 170// - Maximal size for messages is 2^32 (4 GiB). 171// - For scalars, we support only the int32_t range, encoded as 172// UNSIGNED/NEGATIVE (major types 0 / 1). 173// - UTF16 strings, including with unbalanced surrogate pairs, are encoded 174// as CBOR BYTE_STRING (major type 2). For such strings, the number of 175// bytes encoded must be even. 176// - UTF8 strings (major type 3) are supported. 177// - 7 bit US-ASCII strings must always be encoded as UTF8 strings, never 178// as UTF16 strings. 179// - Arbitrary byte arrays, in the inspector protocol called 'binary', 180// are encoded as BYTE_STRING (major type 2), prefixed with a byte 181// indicating base64 when rendered as JSON. 182 183// ============================================================================= 184// Detecting CBOR content 185// ============================================================================= 186 187// The first byte for an envelope, which we use for wrapping dictionaries 188// and arrays; and the byte that indicates a byte string with 32 bit length. 189// These two bytes start an envelope, and thereby also any CBOR message 190// produced or consumed by this protocol. See also |EnvelopeEncoder| below. 191uint8_t InitialByteForEnvelope(); 192uint8_t InitialByteFor32BitLengthByteString(); 193 194// Checks whether |msg| is a cbor message. 195bool IsCBORMessage(span<uint8_t> msg); 196 197// ============================================================================= 198// Encoding individual CBOR items 199// ============================================================================= 200 201// Some constants for CBOR tokens that only take a single byte on the wire. 202uint8_t EncodeTrue(); 203uint8_t EncodeFalse(); 204uint8_t EncodeNull(); 205uint8_t EncodeIndefiniteLengthArrayStart(); 206uint8_t EncodeIndefiniteLengthMapStart(); 207uint8_t EncodeStop(); 208 209// Encodes |value| as |UNSIGNED| (major type 0) iff >= 0, or |NEGATIVE| 210// (major type 1) iff < 0. 211void EncodeInt32(int32_t value, std::vector<uint8_t>* out); 212void EncodeInt32(int32_t value, std::string* out); 213 214// Encodes a UTF16 string as a BYTE_STRING (major type 2). Each utf16 215// character in |in| is emitted with most significant byte first, 216// appending to |out|. 217void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out); 218void EncodeString16(span<uint16_t> in, std::string* out); 219 220// Encodes a UTF8 string |in| as STRING (major type 3). 221void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out); 222void EncodeString8(span<uint8_t> in, std::string* out); 223 224// Encodes the given |latin1| string as STRING8. 225// If any non-ASCII character is present, it will be represented 226// as a 2 byte UTF8 sequence. 227void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out); 228void EncodeFromLatin1(span<uint8_t> latin1, std::string* out); 229 230// Encodes the given |utf16| string as STRING8 if it's entirely US-ASCII. 231// Otherwise, encodes as STRING16. 232void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out); 233void EncodeFromUTF16(span<uint16_t> utf16, std::string* out); 234 235// Encodes arbitrary binary data in |in| as a BYTE_STRING (major type 2) with 236// definitive length, prefixed with tag 22 indicating expected conversion to 237// base64 (see RFC 7049, Table 3 and Section 2.4.4.2). 238void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out); 239void EncodeBinary(span<uint8_t> in, std::string* out); 240 241// Encodes / decodes a double as Major type 7 (SIMPLE_VALUE), 242// with additional info = 27, followed by 8 bytes in big endian. 243void EncodeDouble(double value, std::vector<uint8_t>* out); 244void EncodeDouble(double value, std::string* out); 245 246// ============================================================================= 247// cbor::EnvelopeEncoder - for wrapping submessages 248// ============================================================================= 249 250// An envelope indicates the byte length of a wrapped item. 251// We use this for maps and array, which allows the decoder 252// to skip such (nested) values whole sale. 253// It's implemented as a CBOR tag (major type 6) with additional 254// info = 24, followed by a byte string with a 32 bit length value; 255// so the maximal structure that we can wrap is 2^32 bits long. 256// See also: https://tools.ietf.org/html/rfc7049#section-2.4.4.1 257class EnvelopeEncoder { 258 public: 259 // Emits the envelope start bytes and records the position for the 260 // byte size in |byte_size_pos_|. Also emits empty bytes for the 261 // byte sisze so that encoding can continue. 262 void EncodeStart(std::vector<uint8_t>* out); 263 void EncodeStart(std::string* out); 264 // This records the current size in |out| at position byte_size_pos_. 265 // Returns true iff successful. 266 bool EncodeStop(std::vector<uint8_t>* out); 267 bool EncodeStop(std::string* out); 268 269 private: 270 size_t byte_size_pos_ = 0; 271}; 272 273// ============================================================================= 274// cbor::NewCBOREncoder - for encoding from a streaming parser 275// ============================================================================= 276 277// This can be used to convert to CBOR, by passing the return value to a parser 278// that drives it. The handler will encode into |out|, and iff an error occurs 279// it will set |status| to an error and clear |out|. Otherwise, |status.ok()| 280// will be |true|. 281std::unique_ptr<StreamingParserHandler> NewCBOREncoder( 282 std::vector<uint8_t>* out, 283 Status* status); 284std::unique_ptr<StreamingParserHandler> NewCBOREncoder(std::string* out, 285 Status* status); 286 287// ============================================================================= 288// cbor::CBORTokenizer - for parsing individual CBOR items 289// ============================================================================= 290 291// Tags for the tokens within a CBOR message that CBORTokenizer understands. 292// Note that this is not the same terminology as the CBOR spec (RFC 7049), 293// but rather, our adaptation. For instance, we lump unsigned and signed 294// major type into INT32 here (and disallow values outside the int32_t range). 295enum class CBORTokenTag { 296 // Encountered an error in the structure of the message. Consult 297 // status() for details. 298 ERROR_VALUE, 299 // Booleans and NULL. 300 TRUE_VALUE, 301 FALSE_VALUE, 302 NULL_VALUE, 303 // An int32_t (signed 32 bit integer). 304 INT32, 305 // A double (64 bit floating point). 306 DOUBLE, 307 // A UTF8 string. 308 STRING8, 309 // A UTF16 string. 310 STRING16, 311 // A binary string. 312 BINARY, 313 // Starts an indefinite length map; after the map start we expect 314 // alternating keys and values, followed by STOP. 315 MAP_START, 316 // Starts an indefinite length array; after the array start we 317 // expect values, followed by STOP. 318 ARRAY_START, 319 // Ends a map or an array. 320 STOP, 321 // An envelope indicator, wrapping a map or array. 322 // Internally this carries the byte length of the wrapped 323 // map or array. While CBORTokenizer::Next() will read / skip the entire 324 // envelope, CBORTokenizer::EnterEnvelope() reads the tokens 325 // inside of it. 326 ENVELOPE, 327 // We've reached the end there is nothing else to read. 328 DONE, 329}; 330 331// The major types from RFC 7049 Section 2.1. 332enum class MajorType { 333 UNSIGNED = 0, 334 NEGATIVE = 1, 335 BYTE_STRING = 2, 336 STRING = 3, 337 ARRAY = 4, 338 MAP = 5, 339 TAG = 6, 340 SIMPLE_VALUE = 7 341}; 342 343// CBORTokenizer segments a CBOR message, presenting the tokens therein as 344// numbers, strings, etc. This is not a complete CBOR parser, but makes it much 345// easier to implement one (e.g. ParseCBOR, above). It can also be used to parse 346// messages partially. 347class CBORTokenizer { 348 public: 349 explicit CBORTokenizer(span<uint8_t> bytes); 350 ~CBORTokenizer(); 351 352 // Identifies the current token that we're looking at, 353 // or ERROR_VALUE (in which ase ::Status() has details) 354 // or DONE (if we're past the last token). 355 CBORTokenTag TokenTag() const; 356 357 // Advances to the next token. 358 void Next(); 359 // Can only be called if TokenTag() == CBORTokenTag::ENVELOPE. 360 // While Next() would skip past the entire envelope / what it's 361 // wrapping, EnterEnvelope positions the cursor inside of the envelope, 362 // letting the client explore the nested structure. 363 void EnterEnvelope(); 364 365 // If TokenTag() is CBORTokenTag::ERROR_VALUE, then Status().error describes 366 // the error more precisely; otherwise it'll be set to Error::OK. 367 // In either case, Status().pos is the current position. 368 struct Status Status() const; 369 370 // The following methods retrieve the token values. They can only 371 // be called if TokenTag() matches. 372 373 // To be called only if ::TokenTag() == CBORTokenTag::INT32. 374 int32_t GetInt32() const; 375 376 // To be called only if ::TokenTag() == CBORTokenTag::DOUBLE. 377 double GetDouble() const; 378 379 // To be called only if ::TokenTag() == CBORTokenTag::STRING8. 380 span<uint8_t> GetString8() const; 381 382 // Wire representation for STRING16 is low byte first (little endian). 383 // To be called only if ::TokenTag() == CBORTokenTag::STRING16. 384 span<uint8_t> GetString16WireRep() const; 385 386 // To be called only if ::TokenTag() == CBORTokenTag::BINARY. 387 span<uint8_t> GetBinary() const; 388 389 // To be called only if ::TokenTag() == CBORTokenTag::ENVELOPE. 390 span<uint8_t> GetEnvelopeContents() const; 391 392 private: 393 void ReadNextToken(bool enter_envelope); 394 void SetToken(CBORTokenTag token, size_t token_byte_length); 395 void SetError(Error error); 396 397 span<uint8_t> bytes_; 398 CBORTokenTag token_tag_; 399 struct Status status_; 400 size_t token_byte_length_; 401 MajorType token_start_type_; 402 uint64_t token_start_internal_value_; 403}; 404 405// ============================================================================= 406// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages 407// ============================================================================= 408 409// Parses a CBOR encoded message from |bytes|, sending events to 410// |out|. If an error occurs, sends |out->HandleError|, and parsing stops. 411// The client is responsible for discarding the already received information in 412// that case. 413void ParseCBOR(span<uint8_t> bytes, StreamingParserHandler* out); 414 415// ============================================================================= 416// cbor::AppendString8EntryToMap - for limited in-place editing of messages 417// ============================================================================= 418 419// Modifies the |cbor| message by appending a new key/value entry at the end 420// of the map. Patches up the envelope size; Status.ok() iff successful. 421// If not successful, |cbor| may be corrupted after this call. 422Status AppendString8EntryToCBORMap(span<uint8_t> string8_key, 423 span<uint8_t> string8_value, 424 std::vector<uint8_t>* cbor); 425Status AppendString8EntryToCBORMap(span<uint8_t> string8_key, 426 span<uint8_t> string8_value, 427 std::string* cbor); 428 429namespace internals { // Exposed only for writing tests. 430size_t ReadTokenStart(span<uint8_t> bytes, 431 cbor::MajorType* type, 432 uint64_t* value); 433 434void WriteTokenStart(cbor::MajorType type, 435 uint64_t value, 436 std::vector<uint8_t>* encoded); 437void WriteTokenStart(cbor::MajorType type, 438 uint64_t value, 439 std::string* encoded); 440} // namespace internals 441} // namespace cbor 442 443namespace json { 444// Client code must provide an instance. Implementation should delegate 445// to whatever is appropriate. 446class Platform { 447 public: 448 virtual ~Platform() = default; 449 // Parses |str| into |result|. Returns false iff there are 450 // leftover characters or parsing errors. 451 virtual bool StrToD(const char* str, double* result) const = 0; 452 453 // Prints |value| in a format suitable for JSON. 454 virtual std::unique_ptr<char[]> DToStr(double value) const = 0; 455}; 456 457// ============================================================================= 458// json::NewJSONEncoder - for encoding streaming parser events as JSON 459// ============================================================================= 460 461// Returns a handler object which will write ascii characters to |out|. 462// |status->ok()| will be false iff the handler routine HandleError() is called. 463// In that case, we'll stop emitting output. 464// Except for calling the HandleError routine at any time, the client 465// code must call the Handle* methods in an order in which they'd occur 466// in valid JSON; otherwise we may crash (the code uses assert). 467std::unique_ptr<StreamingParserHandler> NewJSONEncoder( 468 const Platform* platform, 469 std::vector<uint8_t>* out, 470 Status* status); 471std::unique_ptr<StreamingParserHandler> NewJSONEncoder(const Platform* platform, 472 std::string* out, 473 Status* status); 474 475// ============================================================================= 476// json::ParseJSON - for receiving streaming parser events for JSON 477// ============================================================================= 478 479void ParseJSON(const Platform& platform, 480 span<uint8_t> chars, 481 StreamingParserHandler* handler); 482void ParseJSON(const Platform& platform, 483 span<uint16_t> chars, 484 StreamingParserHandler* handler); 485 486// ============================================================================= 487// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding 488// ============================================================================= 489Status ConvertCBORToJSON(const Platform& platform, 490 span<uint8_t> cbor, 491 std::string* json); 492Status ConvertCBORToJSON(const Platform& platform, 493 span<uint8_t> cbor, 494 std::vector<uint8_t>* json); 495Status ConvertJSONToCBOR(const Platform& platform, 496 span<uint8_t> json, 497 std::vector<uint8_t>* cbor); 498Status ConvertJSONToCBOR(const Platform& platform, 499 span<uint16_t> json, 500 std::vector<uint8_t>* cbor); 501Status ConvertJSONToCBOR(const Platform& platform, 502 span<uint8_t> json, 503 std::string* cbor); 504Status ConvertJSONToCBOR(const Platform& platform, 505 span<uint16_t> json, 506 std::string* cbor); 507} // namespace json 508} // namespace v8_inspector_protocol_encoding 509 510#endif // V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_ 511