16d528ed9Sopenharmony_ci// Copyright (c) 2012 The Chromium Authors. All rights reserved. 26d528ed9Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be 36d528ed9Sopenharmony_ci// found in the LICENSE file. 46d528ed9Sopenharmony_ci 56d528ed9Sopenharmony_ci#ifndef BASE_JSON_JSON_PARSER_H_ 66d528ed9Sopenharmony_ci#define BASE_JSON_JSON_PARSER_H_ 76d528ed9Sopenharmony_ci 86d528ed9Sopenharmony_ci#include <stddef.h> 96d528ed9Sopenharmony_ci#include <stdint.h> 106d528ed9Sopenharmony_ci 116d528ed9Sopenharmony_ci#include <memory> 126d528ed9Sopenharmony_ci#include <optional> 136d528ed9Sopenharmony_ci#include <string> 146d528ed9Sopenharmony_ci#include <string_view> 156d528ed9Sopenharmony_ci 166d528ed9Sopenharmony_ci#include "base/compiler_specific.h" 176d528ed9Sopenharmony_ci#include "base/gtest_prod_util.h" 186d528ed9Sopenharmony_ci#include "base/json/json_reader.h" 196d528ed9Sopenharmony_ci 206d528ed9Sopenharmony_cinamespace base { 216d528ed9Sopenharmony_ci 226d528ed9Sopenharmony_ciclass Value; 236d528ed9Sopenharmony_ci 246d528ed9Sopenharmony_cinamespace internal { 256d528ed9Sopenharmony_ci 266d528ed9Sopenharmony_ciclass JSONParserTest; 276d528ed9Sopenharmony_ci 286d528ed9Sopenharmony_ci// The implementation behind the JSONReader interface. This class is not meant 296d528ed9Sopenharmony_ci// to be used directly; it encapsulates logic that need not be exposed publicly. 306d528ed9Sopenharmony_ci// 316d528ed9Sopenharmony_ci// This parser guarantees O(n) time through the input string. Iteration happens 326d528ed9Sopenharmony_ci// on the byte level, with the functions ConsumeChars() and ConsumeChar(). The 336d528ed9Sopenharmony_ci// conversion from byte to JSON token happens without advancing the parser in 346d528ed9Sopenharmony_ci// GetNextToken/ParseToken, that is tokenization operates on the current parser 356d528ed9Sopenharmony_ci// position without advancing. 366d528ed9Sopenharmony_ci// 376d528ed9Sopenharmony_ci// Built on top of these are a family of Consume functions that iterate 386d528ed9Sopenharmony_ci// internally. Invariant: on entry of a Consume function, the parser is wound 396d528ed9Sopenharmony_ci// to the first byte of a valid JSON token. On exit, it is on the first byte 406d528ed9Sopenharmony_ci// after the token that was just consumed, which would likely be the first byte 416d528ed9Sopenharmony_ci// of the next token. 426d528ed9Sopenharmony_ciclass JSONParser { 436d528ed9Sopenharmony_ci public: 446d528ed9Sopenharmony_ci JSONParser(int options, int max_depth = JSONReader::kStackMaxDepth); 456d528ed9Sopenharmony_ci ~JSONParser(); 466d528ed9Sopenharmony_ci 476d528ed9Sopenharmony_ci // Parses the input string according to the set options and returns the 486d528ed9Sopenharmony_ci // result as a Value. 496d528ed9Sopenharmony_ci // Wrap this in base::FooValue::From() to check the Value is of type Foo and 506d528ed9Sopenharmony_ci // convert to a FooValue at the same time. 516d528ed9Sopenharmony_ci std::optional<Value> Parse(std::string_view input); 526d528ed9Sopenharmony_ci 536d528ed9Sopenharmony_ci // Returns the error code. 546d528ed9Sopenharmony_ci JSONReader::JsonParseError error_code() const; 556d528ed9Sopenharmony_ci 566d528ed9Sopenharmony_ci // Returns the human-friendly error message. 576d528ed9Sopenharmony_ci std::string GetErrorMessage() const; 586d528ed9Sopenharmony_ci 596d528ed9Sopenharmony_ci // Returns the error line number if parse error happened. Otherwise always 606d528ed9Sopenharmony_ci // returns 0. 616d528ed9Sopenharmony_ci int error_line() const; 626d528ed9Sopenharmony_ci 636d528ed9Sopenharmony_ci // Returns the error column number if parse error happened. Otherwise always 646d528ed9Sopenharmony_ci // returns 0. 656d528ed9Sopenharmony_ci int error_column() const; 666d528ed9Sopenharmony_ci 676d528ed9Sopenharmony_ci private: 686d528ed9Sopenharmony_ci enum Token { 696d528ed9Sopenharmony_ci T_OBJECT_BEGIN, // { 706d528ed9Sopenharmony_ci T_OBJECT_END, // } 716d528ed9Sopenharmony_ci T_ARRAY_BEGIN, // [ 726d528ed9Sopenharmony_ci T_ARRAY_END, // ] 736d528ed9Sopenharmony_ci T_STRING, 746d528ed9Sopenharmony_ci T_NUMBER, 756d528ed9Sopenharmony_ci T_BOOL_TRUE, // true 766d528ed9Sopenharmony_ci T_BOOL_FALSE, // false 776d528ed9Sopenharmony_ci T_NULL, // null 786d528ed9Sopenharmony_ci T_LIST_SEPARATOR, // , 796d528ed9Sopenharmony_ci T_OBJECT_PAIR_SEPARATOR, // : 806d528ed9Sopenharmony_ci T_END_OF_INPUT, 816d528ed9Sopenharmony_ci T_INVALID_TOKEN, 826d528ed9Sopenharmony_ci }; 836d528ed9Sopenharmony_ci 846d528ed9Sopenharmony_ci // A helper class used for parsing strings. One optimization performed is to 856d528ed9Sopenharmony_ci // create base::Value with a std::string_view to avoid unnecessary std::string 866d528ed9Sopenharmony_ci // copies. This is not possible if the input string needs to be decoded from 876d528ed9Sopenharmony_ci // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped. 886d528ed9Sopenharmony_ci // This class centralizes that logic. 896d528ed9Sopenharmony_ci class StringBuilder { 906d528ed9Sopenharmony_ci public: 916d528ed9Sopenharmony_ci // Empty constructor. Used for creating a builder with which to assign to. 926d528ed9Sopenharmony_ci StringBuilder(); 936d528ed9Sopenharmony_ci 946d528ed9Sopenharmony_ci // |pos| is the beginning of an input string, excluding the |"|. 956d528ed9Sopenharmony_ci explicit StringBuilder(const char* pos); 966d528ed9Sopenharmony_ci 976d528ed9Sopenharmony_ci ~StringBuilder(); 986d528ed9Sopenharmony_ci 996d528ed9Sopenharmony_ci StringBuilder& operator=(StringBuilder&& other); 1006d528ed9Sopenharmony_ci 1016d528ed9Sopenharmony_ci // Appends the Unicode code point |point| to the string, either by 1026d528ed9Sopenharmony_ci // increasing the |length_| of the string if the string has not been 1036d528ed9Sopenharmony_ci // converted, or by appending the UTF8 bytes for the code point. 1046d528ed9Sopenharmony_ci void Append(uint32_t point); 1056d528ed9Sopenharmony_ci 1066d528ed9Sopenharmony_ci // Converts the builder from its default std::string_view to a full 1076d528ed9Sopenharmony_ci // std::string, performing a copy. Once a builder is converted, it cannot be 1086d528ed9Sopenharmony_ci // made a std::string_view again. 1096d528ed9Sopenharmony_ci void Convert(); 1106d528ed9Sopenharmony_ci 1116d528ed9Sopenharmony_ci // Returns the builder as a string, invalidating all state. This allows 1126d528ed9Sopenharmony_ci // the internal string buffer representation to be destructively moved 1136d528ed9Sopenharmony_ci // in cases where the builder will not be needed any more. 1146d528ed9Sopenharmony_ci std::string DestructiveAsString(); 1156d528ed9Sopenharmony_ci 1166d528ed9Sopenharmony_ci private: 1176d528ed9Sopenharmony_ci // The beginning of the input string. 1186d528ed9Sopenharmony_ci const char* pos_; 1196d528ed9Sopenharmony_ci 1206d528ed9Sopenharmony_ci // Number of bytes in |pos_| that make up the string being built. 1216d528ed9Sopenharmony_ci size_t length_; 1226d528ed9Sopenharmony_ci 1236d528ed9Sopenharmony_ci // The copied string representation. Will be unset until Convert() is 1246d528ed9Sopenharmony_ci // called. 1256d528ed9Sopenharmony_ci std::optional<std::string> string_; 1266d528ed9Sopenharmony_ci }; 1276d528ed9Sopenharmony_ci 1286d528ed9Sopenharmony_ci // Returns the next |count| bytes of the input stream, or nullopt if fewer 1296d528ed9Sopenharmony_ci // than |count| bytes remain. 1306d528ed9Sopenharmony_ci std::optional<std::string_view> PeekChars(int count); 1316d528ed9Sopenharmony_ci 1326d528ed9Sopenharmony_ci // Calls PeekChars() with a |count| of 1. 1336d528ed9Sopenharmony_ci std::optional<char> PeekChar(); 1346d528ed9Sopenharmony_ci 1356d528ed9Sopenharmony_ci // Returns the next |count| bytes of the input stream, or nullopt if fewer 1366d528ed9Sopenharmony_ci // than |count| bytes remain, and advances the parser position by |count|. 1376d528ed9Sopenharmony_ci std::optional<std::string_view> ConsumeChars(int count); 1386d528ed9Sopenharmony_ci 1396d528ed9Sopenharmony_ci // Calls ConsumeChars() with a |count| of 1. 1406d528ed9Sopenharmony_ci std::optional<char> ConsumeChar(); 1416d528ed9Sopenharmony_ci 1426d528ed9Sopenharmony_ci // Returns a pointer to the current character position. 1436d528ed9Sopenharmony_ci const char* pos(); 1446d528ed9Sopenharmony_ci 1456d528ed9Sopenharmony_ci // Skips over whitespace and comments to find the next token in the stream. 1466d528ed9Sopenharmony_ci // This does not advance the parser for non-whitespace or comment chars. 1476d528ed9Sopenharmony_ci Token GetNextToken(); 1486d528ed9Sopenharmony_ci 1496d528ed9Sopenharmony_ci // Consumes whitespace characters and comments until the next non-that is 1506d528ed9Sopenharmony_ci // encountered. 1516d528ed9Sopenharmony_ci void EatWhitespaceAndComments(); 1526d528ed9Sopenharmony_ci // Helper function that consumes a comment, assuming that the parser is 1536d528ed9Sopenharmony_ci // currently wound to a '/'. 1546d528ed9Sopenharmony_ci bool EatComment(); 1556d528ed9Sopenharmony_ci 1566d528ed9Sopenharmony_ci // Calls GetNextToken() and then ParseToken(). 1576d528ed9Sopenharmony_ci std::optional<Value> ParseNextToken(); 1586d528ed9Sopenharmony_ci 1596d528ed9Sopenharmony_ci // Takes a token that represents the start of a Value ("a structural token" 1606d528ed9Sopenharmony_ci // in RFC terms) and consumes it, returning the result as a Value. 1616d528ed9Sopenharmony_ci std::optional<Value> ParseToken(Token token); 1626d528ed9Sopenharmony_ci 1636d528ed9Sopenharmony_ci // Assuming that the parser is currently wound to '{', this parses a JSON 1646d528ed9Sopenharmony_ci // object into a Value. 1656d528ed9Sopenharmony_ci std::optional<Value> ConsumeDictionary(); 1666d528ed9Sopenharmony_ci 1676d528ed9Sopenharmony_ci // Assuming that the parser is wound to '[', this parses a JSON list into a 1686d528ed9Sopenharmony_ci // Value. 1696d528ed9Sopenharmony_ci std::optional<Value> ConsumeList(); 1706d528ed9Sopenharmony_ci 1716d528ed9Sopenharmony_ci // Calls through ConsumeStringRaw and wraps it in a value. 1726d528ed9Sopenharmony_ci std::optional<Value> ConsumeString(); 1736d528ed9Sopenharmony_ci 1746d528ed9Sopenharmony_ci // Assuming that the parser is wound to a double quote, this parses a string, 1756d528ed9Sopenharmony_ci // decoding any escape sequences and converts UTF-16 to UTF-8. Returns true on 1766d528ed9Sopenharmony_ci // success and places result into |out|. Returns false on failure with 1776d528ed9Sopenharmony_ci // error information set. 1786d528ed9Sopenharmony_ci bool ConsumeStringRaw(StringBuilder* out); 1796d528ed9Sopenharmony_ci // Helper function for ConsumeStringRaw() that consumes the next four or 10 1806d528ed9Sopenharmony_ci // bytes (parser is wound to the first character of a HEX sequence, with the 1816d528ed9Sopenharmony_ci // potential for consuming another \uXXXX for a surrogate). Returns true on 1826d528ed9Sopenharmony_ci // success and places the code point |out_code_point|, and false on failure. 1836d528ed9Sopenharmony_ci bool DecodeUTF16(uint32_t* out_code_point); 1846d528ed9Sopenharmony_ci 1856d528ed9Sopenharmony_ci // Assuming that the parser is wound to the start of a valid JSON number, 1866d528ed9Sopenharmony_ci // this parses and converts it to either an int or double value. 1876d528ed9Sopenharmony_ci std::optional<Value> ConsumeNumber(); 1886d528ed9Sopenharmony_ci // Helper that reads characters that are ints. Returns true if a number was 1896d528ed9Sopenharmony_ci // read and false on error. 1906d528ed9Sopenharmony_ci bool ReadInt(bool allow_leading_zeros); 1916d528ed9Sopenharmony_ci 1926d528ed9Sopenharmony_ci // Consumes the literal values of |true|, |false|, and |null|, assuming the 1936d528ed9Sopenharmony_ci // parser is wound to the first character of any of those. 1946d528ed9Sopenharmony_ci std::optional<Value> ConsumeLiteral(); 1956d528ed9Sopenharmony_ci 1966d528ed9Sopenharmony_ci // Helper function that returns true if the byte sequence |match| can be 1976d528ed9Sopenharmony_ci // consumed at the current parser position. Returns false if there are fewer 1986d528ed9Sopenharmony_ci // than |match|-length bytes or if the sequence does not match, and the 1996d528ed9Sopenharmony_ci // parser state is unchanged. 2006d528ed9Sopenharmony_ci bool ConsumeIfMatch(std::string_view match); 2016d528ed9Sopenharmony_ci 2026d528ed9Sopenharmony_ci // Sets the error information to |code| at the current column, based on 2036d528ed9Sopenharmony_ci // |index_| and |index_last_line_|, with an optional positive/negative 2046d528ed9Sopenharmony_ci // adjustment by |column_adjust|. 2056d528ed9Sopenharmony_ci void ReportError(JSONReader::JsonParseError code, int column_adjust); 2066d528ed9Sopenharmony_ci 2076d528ed9Sopenharmony_ci // Given the line and column number of an error, formats one of the error 2086d528ed9Sopenharmony_ci // message contants from json_reader.h for human display. 2096d528ed9Sopenharmony_ci static std::string FormatErrorMessage(int line, 2106d528ed9Sopenharmony_ci int column, 2116d528ed9Sopenharmony_ci const std::string& description); 2126d528ed9Sopenharmony_ci 2136d528ed9Sopenharmony_ci // base::JSONParserOptions that control parsing. 2146d528ed9Sopenharmony_ci const int options_; 2156d528ed9Sopenharmony_ci 2166d528ed9Sopenharmony_ci // Maximum depth to parse. 2176d528ed9Sopenharmony_ci const int max_depth_; 2186d528ed9Sopenharmony_ci 2196d528ed9Sopenharmony_ci // The input stream being parsed. Note: Not guaranteed to NUL-terminated. 2206d528ed9Sopenharmony_ci std::string_view input_; 2216d528ed9Sopenharmony_ci 2226d528ed9Sopenharmony_ci // The index in the input stream to which the parser is wound. 2236d528ed9Sopenharmony_ci int index_; 2246d528ed9Sopenharmony_ci 2256d528ed9Sopenharmony_ci // The number of times the parser has recursed (current stack depth). 2266d528ed9Sopenharmony_ci int stack_depth_; 2276d528ed9Sopenharmony_ci 2286d528ed9Sopenharmony_ci // The line number that the parser is at currently. 2296d528ed9Sopenharmony_ci int line_number_; 2306d528ed9Sopenharmony_ci 2316d528ed9Sopenharmony_ci // The last value of |index_| on the previous line. 2326d528ed9Sopenharmony_ci int index_last_line_; 2336d528ed9Sopenharmony_ci 2346d528ed9Sopenharmony_ci // Error information. 2356d528ed9Sopenharmony_ci JSONReader::JsonParseError error_code_; 2366d528ed9Sopenharmony_ci int error_line_; 2376d528ed9Sopenharmony_ci int error_column_; 2386d528ed9Sopenharmony_ci 2396d528ed9Sopenharmony_ci friend class JSONParserTest; 2406d528ed9Sopenharmony_ci FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar); 2416d528ed9Sopenharmony_ci FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary); 2426d528ed9Sopenharmony_ci FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList); 2436d528ed9Sopenharmony_ci FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString); 2446d528ed9Sopenharmony_ci FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals); 2456d528ed9Sopenharmony_ci FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers); 2466d528ed9Sopenharmony_ci FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages); 2476d528ed9Sopenharmony_ci FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ReplaceInvalidCharacters); 2486d528ed9Sopenharmony_ci FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ReplaceInvalidUTF16EscapeSequence); 2496d528ed9Sopenharmony_ci 2506d528ed9Sopenharmony_ci JSONParser(const JSONParser&) = delete; 2516d528ed9Sopenharmony_ci JSONParser& operator=(const JSONParser&) = delete; 2526d528ed9Sopenharmony_ci}; 2536d528ed9Sopenharmony_ci 2546d528ed9Sopenharmony_ci// Used when decoding and an invalid utf-8 sequence is encountered. 2556d528ed9Sopenharmony_ciextern const char kUnicodeReplacementString[]; 2566d528ed9Sopenharmony_ci 2576d528ed9Sopenharmony_ci} // namespace internal 2586d528ed9Sopenharmony_ci} // namespace base 2596d528ed9Sopenharmony_ci 2606d528ed9Sopenharmony_ci#endif // BASE_JSON_JSON_PARSER_H_ 261