xref: /third_party/node/deps/v8/src/json/json-parser.h (revision 1cb0ef41)
1// Copyright 2011 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_JSON_JSON_PARSER_H_
6#define V8_JSON_JSON_PARSER_H_
7
8#include "include/v8-callbacks.h"
9#include "src/base/small-vector.h"
10#include "src/base/strings.h"
11#include "src/common/high-allocation-throughput-scope.h"
12#include "src/execution/isolate.h"
13#include "src/heap/factory.h"
14#include "src/objects/objects.h"
15#include "src/zone/zone-containers.h"
16
17namespace v8 {
18namespace internal {
19
20enum ParseElementResult { kElementFound, kElementNotFound };
21
22class JsonString final {
23 public:
24  JsonString()
25      : start_(0),
26        length_(0),
27        needs_conversion_(false),
28        internalize_(false),
29        has_escape_(false),
30        is_index_(false) {}
31
32  explicit JsonString(uint32_t index)
33      : index_(index),
34        length_(0),
35        needs_conversion_(false),
36        internalize_(false),
37        has_escape_(false),
38        is_index_(true) {}
39
40  JsonString(int start, int length, bool needs_conversion,
41             bool needs_internalization, bool has_escape)
42      : start_(start),
43        length_(length),
44        needs_conversion_(needs_conversion),
45        internalize_(needs_internalization ||
46                     length_ <= kMaxInternalizedStringValueLength),
47        has_escape_(has_escape),
48        is_index_(false) {}
49
50  bool internalize() const {
51    DCHECK(!is_index_);
52    return internalize_;
53  }
54
55  bool needs_conversion() const {
56    DCHECK(!is_index_);
57    return needs_conversion_;
58  }
59
60  bool has_escape() const {
61    DCHECK(!is_index_);
62    return has_escape_;
63  }
64
65  int start() const {
66    DCHECK(!is_index_);
67    return start_;
68  }
69
70  int length() const {
71    DCHECK(!is_index_);
72    return length_;
73  }
74
75  uint32_t index() const {
76    DCHECK(is_index_);
77    return index_;
78  }
79
80  bool is_index() const { return is_index_; }
81
82 private:
83  static const int kMaxInternalizedStringValueLength = 10;
84
85  union {
86    const int start_;
87    const uint32_t index_;
88  };
89  const int length_;
90  const bool needs_conversion_ : 1;
91  const bool internalize_ : 1;
92  const bool has_escape_ : 1;
93  const bool is_index_ : 1;
94};
95
96struct JsonProperty {
97  JsonProperty() { UNREACHABLE(); }
98  explicit JsonProperty(const JsonString& string) : string(string) {}
99
100  JsonString string;
101  Handle<Object> value;
102};
103
104class JsonParseInternalizer {
105 public:
106  static MaybeHandle<Object> Internalize(Isolate* isolate,
107                                         Handle<Object> object,
108                                         Handle<Object> reviver);
109
110 private:
111  JsonParseInternalizer(Isolate* isolate, Handle<JSReceiver> reviver)
112      : isolate_(isolate), reviver_(reviver) {}
113
114  MaybeHandle<Object> InternalizeJsonProperty(Handle<JSReceiver> holder,
115                                              Handle<String> key);
116
117  bool RecurseAndApply(Handle<JSReceiver> holder, Handle<String> name);
118
119  Isolate* isolate_;
120  Handle<JSReceiver> reviver_;
121};
122
123enum class JsonToken : uint8_t {
124  NUMBER,
125  STRING,
126  LBRACE,
127  RBRACE,
128  LBRACK,
129  RBRACK,
130  TRUE_LITERAL,
131  FALSE_LITERAL,
132  NULL_LITERAL,
133  WHITESPACE,
134  COLON,
135  COMMA,
136  ILLEGAL,
137  EOS
138};
139
140// A simple json parser.
141template <typename Char>
142class JsonParser final {
143 public:
144  using SeqString = typename CharTraits<Char>::String;
145  using SeqExternalString = typename CharTraits<Char>::ExternalString;
146
147  V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Parse(
148      Isolate* isolate, Handle<String> source, Handle<Object> reviver) {
149    HighAllocationThroughputScope high_throughput_scope(
150        V8::GetCurrentPlatform());
151    Handle<Object> result;
152    ASSIGN_RETURN_ON_EXCEPTION(isolate, result,
153                               JsonParser(isolate, source).ParseJson(), Object);
154    if (reviver->IsCallable()) {
155      return JsonParseInternalizer::Internalize(isolate, result, reviver);
156    }
157    return result;
158  }
159
160  static constexpr base::uc32 kEndOfString = static_cast<base::uc32>(-1);
161  static constexpr base::uc32 kInvalidUnicodeCharacter =
162      static_cast<base::uc32>(-1);
163
164 private:
165  template <typename T>
166  using SmallVector = base::SmallVector<T, 16>;
167  struct JsonContinuation {
168    enum Type : uint8_t { kReturn, kObjectProperty, kArrayElement };
169    JsonContinuation(Isolate* isolate, Type type, size_t index)
170        : scope(isolate),
171          type_(type),
172          index(static_cast<uint32_t>(index)),
173          max_index(0),
174          elements(0) {}
175
176    Type type() const { return static_cast<Type>(type_); }
177    void set_type(Type type) { type_ = static_cast<uint8_t>(type); }
178
179    HandleScope scope;
180    // Unfortunately GCC doesn't like packing Type in two bits.
181    uint32_t type_ : 2;
182    uint32_t index : 30;
183    uint32_t max_index;
184    uint32_t elements;
185  };
186
187  JsonParser(Isolate* isolate, Handle<String> source);
188  ~JsonParser();
189
190  // Parse a string containing a single JSON value.
191  MaybeHandle<Object> ParseJson();
192
193  void advance() { ++cursor_; }
194
195  base::uc32 CurrentCharacter() {
196    if (V8_UNLIKELY(is_at_end())) return kEndOfString;
197    return *cursor_;
198  }
199
200  base::uc32 NextCharacter() {
201    advance();
202    return CurrentCharacter();
203  }
204
205  void AdvanceToNonDecimal();
206
207  V8_INLINE JsonToken peek() const { return next_; }
208
209  void Consume(JsonToken token) {
210    DCHECK_EQ(peek(), token);
211    advance();
212  }
213
214  void Expect(JsonToken token) {
215    if (V8_LIKELY(peek() == token)) {
216      advance();
217    } else {
218      ReportUnexpectedToken(peek());
219    }
220  }
221
222  void ExpectNext(JsonToken token) {
223    SkipWhitespace();
224    Expect(token);
225  }
226
227  bool Check(JsonToken token) {
228    SkipWhitespace();
229    if (next_ != token) return false;
230    advance();
231    return true;
232  }
233
234  template <size_t N>
235  void ScanLiteral(const char (&s)[N]) {
236    DCHECK(!is_at_end());
237    // There's at least 1 character, we always consume a character and compare
238    // the next character. The first character was compared before we jumped
239    // to ScanLiteral.
240    STATIC_ASSERT(N > 2);
241    size_t remaining = static_cast<size_t>(end_ - cursor_);
242    if (V8_LIKELY(remaining >= N - 1 &&
243                  CompareCharsEqual(s + 1, cursor_ + 1, N - 2))) {
244      cursor_ += N - 1;
245      return;
246    }
247
248    cursor_++;
249    for (size_t i = 0; i < std::min(N - 2, remaining - 1); i++) {
250      if (*(s + 1 + i) != *cursor_) {
251        ReportUnexpectedCharacter(*cursor_);
252        return;
253      }
254      cursor_++;
255    }
256
257    DCHECK(is_at_end());
258    ReportUnexpectedToken(JsonToken::EOS);
259  }
260
261  // The JSON lexical grammar is specified in the ECMAScript 5 standard,
262  // section 15.12.1.1. The only allowed whitespace characters between tokens
263  // are tab, carriage-return, newline and space.
264  void SkipWhitespace();
265
266  // A JSON string (production JSONString) is subset of valid JavaScript string
267  // literals. The string must only be double-quoted (not single-quoted), and
268  // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
269  // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
270  JsonString ScanJsonString(bool needs_internalization);
271  JsonString ScanJsonPropertyKey(JsonContinuation* cont);
272  base::uc32 ScanUnicodeCharacter();
273  Handle<String> MakeString(const JsonString& string,
274                            Handle<String> hint = Handle<String>());
275
276  template <typename SinkChar>
277  void DecodeString(SinkChar* sink, int start, int length);
278
279  template <typename SinkSeqString>
280  Handle<String> DecodeString(const JsonString& string,
281                              Handle<SinkSeqString> intermediate,
282                              Handle<String> hint);
283
284  // A JSON number (production JSONNumber) is a subset of the valid JavaScript
285  // decimal number literals.
286  // It includes an optional minus sign, must have at least one
287  // digit before and after a decimal point, may not have prefixed zeros (unless
288  // the integer part is zero), and may include an exponent part (e.g., "e-10").
289  // Hexadecimal and octal numbers are not allowed.
290  Handle<Object> ParseJsonNumber();
291
292  // Parse a single JSON value from input (grammar production JSONValue).
293  // A JSON value is either a (double-quoted) string literal, a number literal,
294  // one of "true", "false", or "null", or an object or array literal.
295  MaybeHandle<Object> ParseJsonValue();
296
297  Handle<Object> BuildJsonObject(
298      const JsonContinuation& cont,
299      const SmallVector<JsonProperty>& property_stack, Handle<Map> feedback);
300  Handle<Object> BuildJsonArray(
301      const JsonContinuation& cont,
302      const SmallVector<Handle<Object>>& element_stack);
303
304  // Mark that a parsing error has happened at the current character.
305  void ReportUnexpectedCharacter(base::uc32 c);
306  // Mark that a parsing error has happened at the current token.
307  void ReportUnexpectedToken(JsonToken token);
308
309  inline Isolate* isolate() { return isolate_; }
310  inline Factory* factory() { return isolate_->factory(); }
311  inline Handle<JSFunction> object_constructor() { return object_constructor_; }
312
313  static const int kInitialSpecialStringLength = 32;
314
315  static void UpdatePointersCallback(void* parser) {
316    reinterpret_cast<JsonParser<Char>*>(parser)->UpdatePointers();
317  }
318
319  void UpdatePointers() {
320    DisallowGarbageCollection no_gc;
321    const Char* chars = Handle<SeqString>::cast(source_)->GetChars(no_gc);
322    if (chars_ != chars) {
323      size_t position = cursor_ - chars_;
324      size_t length = end_ - chars_;
325      chars_ = chars;
326      cursor_ = chars_ + position;
327      end_ = chars_ + length;
328    }
329  }
330
331 private:
332  static const bool kIsOneByte = sizeof(Char) == 1;
333
334  bool is_at_end() const {
335    DCHECK_LE(cursor_, end_);
336    return cursor_ == end_;
337  }
338
339  int position() const { return static_cast<int>(cursor_ - chars_); }
340
341  Isolate* isolate_;
342  const uint64_t hash_seed_;
343  JsonToken next_;
344  // Indicates whether the bytes underneath source_ can relocate during GC.
345  bool chars_may_relocate_;
346  Handle<JSFunction> object_constructor_;
347  const Handle<String> original_source_;
348  Handle<String> source_;
349
350  // Cached pointer to the raw chars in source. In case source is on-heap, we
351  // register an UpdatePointers callback. For this reason, chars_, cursor_ and
352  // end_ should never be locally cached across a possible allocation. The scope
353  // in which we cache chars has to be guarded by a DisallowGarbageCollection
354  // scope.
355  const Char* cursor_;
356  const Char* end_;
357  const Char* chars_;
358};
359
360// Explicit instantiation declarations.
361extern template class JsonParser<uint8_t>;
362extern template class JsonParser<uint16_t>;
363
364}  // namespace internal
365}  // namespace v8
366
367#endif  // V8_JSON_JSON_PARSER_H_
368