1c5f01b2fSopenharmony_ci// __ _____ _____ _____ 2c5f01b2fSopenharmony_ci// __| | __| | | | JSON for Modern C++ 3c5f01b2fSopenharmony_ci// | | |__ | | | | | | version 3.11.2 4c5f01b2fSopenharmony_ci// |_____|_____|_____|_|___| https://github.com/nlohmann/json 5c5f01b2fSopenharmony_ci// 6c5f01b2fSopenharmony_ci// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me> 7c5f01b2fSopenharmony_ci// SPDX-License-Identifier: MIT 8c5f01b2fSopenharmony_ci 9c5f01b2fSopenharmony_ci#pragma once 10c5f01b2fSopenharmony_ci 11c5f01b2fSopenharmony_ci#include <array> // array 12c5f01b2fSopenharmony_ci#include <clocale> // localeconv 13c5f01b2fSopenharmony_ci#include <cstddef> // size_t 14c5f01b2fSopenharmony_ci#include <cstdio> // snprintf 15c5f01b2fSopenharmony_ci#include <cstdlib> // strtof, strtod, strtold, strtoll, strtoull 16c5f01b2fSopenharmony_ci#include <initializer_list> // initializer_list 17c5f01b2fSopenharmony_ci#include <string> // char_traits, string 18c5f01b2fSopenharmony_ci#include <utility> // move 19c5f01b2fSopenharmony_ci#include <vector> // vector 20c5f01b2fSopenharmony_ci 21c5f01b2fSopenharmony_ci#include <nlohmann/detail/input/input_adapters.hpp> 22c5f01b2fSopenharmony_ci#include <nlohmann/detail/input/position_t.hpp> 23c5f01b2fSopenharmony_ci#include <nlohmann/detail/macro_scope.hpp> 24c5f01b2fSopenharmony_ci 25c5f01b2fSopenharmony_ciNLOHMANN_JSON_NAMESPACE_BEGIN 26c5f01b2fSopenharmony_cinamespace detail 27c5f01b2fSopenharmony_ci{ 28c5f01b2fSopenharmony_ci 29c5f01b2fSopenharmony_ci/////////// 30c5f01b2fSopenharmony_ci// lexer // 31c5f01b2fSopenharmony_ci/////////// 32c5f01b2fSopenharmony_ci 33c5f01b2fSopenharmony_citemplate<typename BasicJsonType> 34c5f01b2fSopenharmony_ciclass lexer_base 35c5f01b2fSopenharmony_ci{ 36c5f01b2fSopenharmony_ci public: 37c5f01b2fSopenharmony_ci /// token types for the parser 38c5f01b2fSopenharmony_ci enum class token_type 39c5f01b2fSopenharmony_ci { 40c5f01b2fSopenharmony_ci uninitialized, ///< indicating the scanner is uninitialized 41c5f01b2fSopenharmony_ci literal_true, ///< the `true` literal 42c5f01b2fSopenharmony_ci literal_false, ///< the `false` literal 43c5f01b2fSopenharmony_ci literal_null, ///< the `null` literal 44c5f01b2fSopenharmony_ci value_string, ///< a string -- use get_string() for actual value 45c5f01b2fSopenharmony_ci value_unsigned, ///< an unsigned integer -- use get_number_unsigned() for actual value 46c5f01b2fSopenharmony_ci value_integer, ///< a signed integer -- use get_number_integer() for actual value 47c5f01b2fSopenharmony_ci value_float, ///< an floating point number -- use get_number_float() for actual value 48c5f01b2fSopenharmony_ci begin_array, ///< the character for array begin `[` 49c5f01b2fSopenharmony_ci begin_object, ///< the character for object begin `{` 50c5f01b2fSopenharmony_ci end_array, ///< the character for array end `]` 51c5f01b2fSopenharmony_ci end_object, ///< the character for object end `}` 52c5f01b2fSopenharmony_ci name_separator, ///< the name separator `:` 53c5f01b2fSopenharmony_ci value_separator, ///< the value separator `,` 54c5f01b2fSopenharmony_ci parse_error, ///< indicating a parse error 55c5f01b2fSopenharmony_ci end_of_input, ///< indicating the end of the input buffer 56c5f01b2fSopenharmony_ci literal_or_value ///< a literal or the begin of a value (only for diagnostics) 57c5f01b2fSopenharmony_ci }; 58c5f01b2fSopenharmony_ci 59c5f01b2fSopenharmony_ci /// return name of values of type token_type (only used for errors) 60c5f01b2fSopenharmony_ci JSON_HEDLEY_RETURNS_NON_NULL 61c5f01b2fSopenharmony_ci JSON_HEDLEY_CONST 62c5f01b2fSopenharmony_ci static const char* token_type_name(const token_type t) noexcept 63c5f01b2fSopenharmony_ci { 64c5f01b2fSopenharmony_ci switch (t) 65c5f01b2fSopenharmony_ci { 66c5f01b2fSopenharmony_ci case token_type::uninitialized: 67c5f01b2fSopenharmony_ci return "<uninitialized>"; 68c5f01b2fSopenharmony_ci case token_type::literal_true: 69c5f01b2fSopenharmony_ci return "true literal"; 70c5f01b2fSopenharmony_ci case token_type::literal_false: 71c5f01b2fSopenharmony_ci return "false literal"; 72c5f01b2fSopenharmony_ci case token_type::literal_null: 73c5f01b2fSopenharmony_ci return "null literal"; 74c5f01b2fSopenharmony_ci case token_type::value_string: 75c5f01b2fSopenharmony_ci return "string literal"; 76c5f01b2fSopenharmony_ci case token_type::value_unsigned: 77c5f01b2fSopenharmony_ci case token_type::value_integer: 78c5f01b2fSopenharmony_ci case token_type::value_float: 79c5f01b2fSopenharmony_ci return "number literal"; 80c5f01b2fSopenharmony_ci case token_type::begin_array: 81c5f01b2fSopenharmony_ci return "'['"; 82c5f01b2fSopenharmony_ci case token_type::begin_object: 83c5f01b2fSopenharmony_ci return "'{'"; 84c5f01b2fSopenharmony_ci case token_type::end_array: 85c5f01b2fSopenharmony_ci return "']'"; 86c5f01b2fSopenharmony_ci case token_type::end_object: 87c5f01b2fSopenharmony_ci return "'}'"; 88c5f01b2fSopenharmony_ci case token_type::name_separator: 89c5f01b2fSopenharmony_ci return "':'"; 90c5f01b2fSopenharmony_ci case token_type::value_separator: 91c5f01b2fSopenharmony_ci return "','"; 92c5f01b2fSopenharmony_ci case token_type::parse_error: 93c5f01b2fSopenharmony_ci return "<parse error>"; 94c5f01b2fSopenharmony_ci case token_type::end_of_input: 95c5f01b2fSopenharmony_ci return "end of input"; 96c5f01b2fSopenharmony_ci case token_type::literal_or_value: 97c5f01b2fSopenharmony_ci return "'[', '{', or a literal"; 98c5f01b2fSopenharmony_ci // LCOV_EXCL_START 99c5f01b2fSopenharmony_ci default: // catch non-enum values 100c5f01b2fSopenharmony_ci return "unknown token"; 101c5f01b2fSopenharmony_ci // LCOV_EXCL_STOP 102c5f01b2fSopenharmony_ci } 103c5f01b2fSopenharmony_ci } 104c5f01b2fSopenharmony_ci}; 105c5f01b2fSopenharmony_ci/*! 106c5f01b2fSopenharmony_ci@brief lexical analysis 107c5f01b2fSopenharmony_ci 108c5f01b2fSopenharmony_ciThis class organizes the lexical analysis during JSON deserialization. 109c5f01b2fSopenharmony_ci*/ 110c5f01b2fSopenharmony_citemplate<typename BasicJsonType, typename InputAdapterType> 111c5f01b2fSopenharmony_ciclass lexer : public lexer_base<BasicJsonType> 112c5f01b2fSopenharmony_ci{ 113c5f01b2fSopenharmony_ci using number_integer_t = typename BasicJsonType::number_integer_t; 114c5f01b2fSopenharmony_ci using number_unsigned_t = typename BasicJsonType::number_unsigned_t; 115c5f01b2fSopenharmony_ci using number_float_t = typename BasicJsonType::number_float_t; 116c5f01b2fSopenharmony_ci using string_t = typename BasicJsonType::string_t; 117c5f01b2fSopenharmony_ci using char_type = typename InputAdapterType::char_type; 118c5f01b2fSopenharmony_ci using char_int_type = typename std::char_traits<char_type>::int_type; 119c5f01b2fSopenharmony_ci 120c5f01b2fSopenharmony_ci public: 121c5f01b2fSopenharmony_ci using token_type = typename lexer_base<BasicJsonType>::token_type; 122c5f01b2fSopenharmony_ci 123c5f01b2fSopenharmony_ci explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) noexcept 124c5f01b2fSopenharmony_ci : ia(std::move(adapter)) 125c5f01b2fSopenharmony_ci , ignore_comments(ignore_comments_) 126c5f01b2fSopenharmony_ci , decimal_point_char(static_cast<char_int_type>(get_decimal_point())) 127c5f01b2fSopenharmony_ci {} 128c5f01b2fSopenharmony_ci 129c5f01b2fSopenharmony_ci // delete because of pointer members 130c5f01b2fSopenharmony_ci lexer(const lexer&) = delete; 131c5f01b2fSopenharmony_ci lexer(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) 132c5f01b2fSopenharmony_ci lexer& operator=(lexer&) = delete; 133c5f01b2fSopenharmony_ci lexer& operator=(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) 134c5f01b2fSopenharmony_ci ~lexer() = default; 135c5f01b2fSopenharmony_ci 136c5f01b2fSopenharmony_ci private: 137c5f01b2fSopenharmony_ci ///////////////////// 138c5f01b2fSopenharmony_ci // locales 139c5f01b2fSopenharmony_ci ///////////////////// 140c5f01b2fSopenharmony_ci 141c5f01b2fSopenharmony_ci /// return the locale-dependent decimal point 142c5f01b2fSopenharmony_ci JSON_HEDLEY_PURE 143c5f01b2fSopenharmony_ci static char get_decimal_point() noexcept 144c5f01b2fSopenharmony_ci { 145c5f01b2fSopenharmony_ci const auto* loc = localeconv(); 146c5f01b2fSopenharmony_ci JSON_ASSERT(loc != nullptr); 147c5f01b2fSopenharmony_ci return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point); 148c5f01b2fSopenharmony_ci } 149c5f01b2fSopenharmony_ci 150c5f01b2fSopenharmony_ci ///////////////////// 151c5f01b2fSopenharmony_ci // scan functions 152c5f01b2fSopenharmony_ci ///////////////////// 153c5f01b2fSopenharmony_ci 154c5f01b2fSopenharmony_ci /*! 155c5f01b2fSopenharmony_ci @brief get codepoint from 4 hex characters following `\u` 156c5f01b2fSopenharmony_ci 157c5f01b2fSopenharmony_ci For input "\u c1 c2 c3 c4" the codepoint is: 158c5f01b2fSopenharmony_ci (c1 * 0x1000) + (c2 * 0x0100) + (c3 * 0x0010) + c4 159c5f01b2fSopenharmony_ci = (c1 << 12) + (c2 << 8) + (c3 << 4) + (c4 << 0) 160c5f01b2fSopenharmony_ci 161c5f01b2fSopenharmony_ci Furthermore, the possible characters '0'..'9', 'A'..'F', and 'a'..'f' 162c5f01b2fSopenharmony_ci must be converted to the integers 0x0..0x9, 0xA..0xF, 0xA..0xF, resp. The 163c5f01b2fSopenharmony_ci conversion is done by subtracting the offset (0x30, 0x37, and 0x57) 164c5f01b2fSopenharmony_ci between the ASCII value of the character and the desired integer value. 165c5f01b2fSopenharmony_ci 166c5f01b2fSopenharmony_ci @return codepoint (0x0000..0xFFFF) or -1 in case of an error (e.g. EOF or 167c5f01b2fSopenharmony_ci non-hex character) 168c5f01b2fSopenharmony_ci */ 169c5f01b2fSopenharmony_ci int get_codepoint() 170c5f01b2fSopenharmony_ci { 171c5f01b2fSopenharmony_ci // this function only makes sense after reading `\u` 172c5f01b2fSopenharmony_ci JSON_ASSERT(current == 'u'); 173c5f01b2fSopenharmony_ci int codepoint = 0; 174c5f01b2fSopenharmony_ci 175c5f01b2fSopenharmony_ci const auto factors = { 12u, 8u, 4u, 0u }; 176c5f01b2fSopenharmony_ci for (const auto factor : factors) 177c5f01b2fSopenharmony_ci { 178c5f01b2fSopenharmony_ci get(); 179c5f01b2fSopenharmony_ci 180c5f01b2fSopenharmony_ci if (current >= '0' && current <= '9') 181c5f01b2fSopenharmony_ci { 182c5f01b2fSopenharmony_ci codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x30u) << factor); 183c5f01b2fSopenharmony_ci } 184c5f01b2fSopenharmony_ci else if (current >= 'A' && current <= 'F') 185c5f01b2fSopenharmony_ci { 186c5f01b2fSopenharmony_ci codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x37u) << factor); 187c5f01b2fSopenharmony_ci } 188c5f01b2fSopenharmony_ci else if (current >= 'a' && current <= 'f') 189c5f01b2fSopenharmony_ci { 190c5f01b2fSopenharmony_ci codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x57u) << factor); 191c5f01b2fSopenharmony_ci } 192c5f01b2fSopenharmony_ci else 193c5f01b2fSopenharmony_ci { 194c5f01b2fSopenharmony_ci return -1; 195c5f01b2fSopenharmony_ci } 196c5f01b2fSopenharmony_ci } 197c5f01b2fSopenharmony_ci 198c5f01b2fSopenharmony_ci JSON_ASSERT(0x0000 <= codepoint && codepoint <= 0xFFFF); 199c5f01b2fSopenharmony_ci return codepoint; 200c5f01b2fSopenharmony_ci } 201c5f01b2fSopenharmony_ci 202c5f01b2fSopenharmony_ci /*! 203c5f01b2fSopenharmony_ci @brief check if the next byte(s) are inside a given range 204c5f01b2fSopenharmony_ci 205c5f01b2fSopenharmony_ci Adds the current byte and, for each passed range, reads a new byte and 206c5f01b2fSopenharmony_ci checks if it is inside the range. If a violation was detected, set up an 207c5f01b2fSopenharmony_ci error message and return false. Otherwise, return true. 208c5f01b2fSopenharmony_ci 209c5f01b2fSopenharmony_ci @param[in] ranges list of integers; interpreted as list of pairs of 210c5f01b2fSopenharmony_ci inclusive lower and upper bound, respectively 211c5f01b2fSopenharmony_ci 212c5f01b2fSopenharmony_ci @pre The passed list @a ranges must have 2, 4, or 6 elements; that is, 213c5f01b2fSopenharmony_ci 1, 2, or 3 pairs. This precondition is enforced by an assertion. 214c5f01b2fSopenharmony_ci 215c5f01b2fSopenharmony_ci @return true if and only if no range violation was detected 216c5f01b2fSopenharmony_ci */ 217c5f01b2fSopenharmony_ci bool next_byte_in_range(std::initializer_list<char_int_type> ranges) 218c5f01b2fSopenharmony_ci { 219c5f01b2fSopenharmony_ci JSON_ASSERT(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6); 220c5f01b2fSopenharmony_ci add(current); 221c5f01b2fSopenharmony_ci 222c5f01b2fSopenharmony_ci for (auto range = ranges.begin(); range != ranges.end(); ++range) 223c5f01b2fSopenharmony_ci { 224c5f01b2fSopenharmony_ci get(); 225c5f01b2fSopenharmony_ci if (JSON_HEDLEY_LIKELY(*range <= current && current <= *(++range))) 226c5f01b2fSopenharmony_ci { 227c5f01b2fSopenharmony_ci add(current); 228c5f01b2fSopenharmony_ci } 229c5f01b2fSopenharmony_ci else 230c5f01b2fSopenharmony_ci { 231c5f01b2fSopenharmony_ci error_message = "invalid string: ill-formed UTF-8 byte"; 232c5f01b2fSopenharmony_ci return false; 233c5f01b2fSopenharmony_ci } 234c5f01b2fSopenharmony_ci } 235c5f01b2fSopenharmony_ci 236c5f01b2fSopenharmony_ci return true; 237c5f01b2fSopenharmony_ci } 238c5f01b2fSopenharmony_ci 239c5f01b2fSopenharmony_ci /*! 240c5f01b2fSopenharmony_ci @brief scan a string literal 241c5f01b2fSopenharmony_ci 242c5f01b2fSopenharmony_ci This function scans a string according to Sect. 7 of RFC 8259. While 243c5f01b2fSopenharmony_ci scanning, bytes are escaped and copied into buffer token_buffer. Then the 244c5f01b2fSopenharmony_ci function returns successfully, token_buffer is *not* null-terminated (as it 245c5f01b2fSopenharmony_ci may contain \0 bytes), and token_buffer.size() is the number of bytes in the 246c5f01b2fSopenharmony_ci string. 247c5f01b2fSopenharmony_ci 248c5f01b2fSopenharmony_ci @return token_type::value_string if string could be successfully scanned, 249c5f01b2fSopenharmony_ci token_type::parse_error otherwise 250c5f01b2fSopenharmony_ci 251c5f01b2fSopenharmony_ci @note In case of errors, variable error_message contains a textual 252c5f01b2fSopenharmony_ci description. 253c5f01b2fSopenharmony_ci */ 254c5f01b2fSopenharmony_ci token_type scan_string() 255c5f01b2fSopenharmony_ci { 256c5f01b2fSopenharmony_ci // reset token_buffer (ignore opening quote) 257c5f01b2fSopenharmony_ci reset(); 258c5f01b2fSopenharmony_ci 259c5f01b2fSopenharmony_ci // we entered the function by reading an open quote 260c5f01b2fSopenharmony_ci JSON_ASSERT(current == '\"'); 261c5f01b2fSopenharmony_ci 262c5f01b2fSopenharmony_ci while (true) 263c5f01b2fSopenharmony_ci { 264c5f01b2fSopenharmony_ci // get next character 265c5f01b2fSopenharmony_ci switch (get()) 266c5f01b2fSopenharmony_ci { 267c5f01b2fSopenharmony_ci // end of file while parsing string 268c5f01b2fSopenharmony_ci case std::char_traits<char_type>::eof(): 269c5f01b2fSopenharmony_ci { 270c5f01b2fSopenharmony_ci error_message = "invalid string: missing closing quote"; 271c5f01b2fSopenharmony_ci return token_type::parse_error; 272c5f01b2fSopenharmony_ci } 273c5f01b2fSopenharmony_ci 274c5f01b2fSopenharmony_ci // closing quote 275c5f01b2fSopenharmony_ci case '\"': 276c5f01b2fSopenharmony_ci { 277c5f01b2fSopenharmony_ci return token_type::value_string; 278c5f01b2fSopenharmony_ci } 279c5f01b2fSopenharmony_ci 280c5f01b2fSopenharmony_ci // escapes 281c5f01b2fSopenharmony_ci case '\\': 282c5f01b2fSopenharmony_ci { 283c5f01b2fSopenharmony_ci switch (get()) 284c5f01b2fSopenharmony_ci { 285c5f01b2fSopenharmony_ci // quotation mark 286c5f01b2fSopenharmony_ci case '\"': 287c5f01b2fSopenharmony_ci add('\"'); 288c5f01b2fSopenharmony_ci break; 289c5f01b2fSopenharmony_ci // reverse solidus 290c5f01b2fSopenharmony_ci case '\\': 291c5f01b2fSopenharmony_ci add('\\'); 292c5f01b2fSopenharmony_ci break; 293c5f01b2fSopenharmony_ci // solidus 294c5f01b2fSopenharmony_ci case '/': 295c5f01b2fSopenharmony_ci add('/'); 296c5f01b2fSopenharmony_ci break; 297c5f01b2fSopenharmony_ci // backspace 298c5f01b2fSopenharmony_ci case 'b': 299c5f01b2fSopenharmony_ci add('\b'); 300c5f01b2fSopenharmony_ci break; 301c5f01b2fSopenharmony_ci // form feed 302c5f01b2fSopenharmony_ci case 'f': 303c5f01b2fSopenharmony_ci add('\f'); 304c5f01b2fSopenharmony_ci break; 305c5f01b2fSopenharmony_ci // line feed 306c5f01b2fSopenharmony_ci case 'n': 307c5f01b2fSopenharmony_ci add('\n'); 308c5f01b2fSopenharmony_ci break; 309c5f01b2fSopenharmony_ci // carriage return 310c5f01b2fSopenharmony_ci case 'r': 311c5f01b2fSopenharmony_ci add('\r'); 312c5f01b2fSopenharmony_ci break; 313c5f01b2fSopenharmony_ci // tab 314c5f01b2fSopenharmony_ci case 't': 315c5f01b2fSopenharmony_ci add('\t'); 316c5f01b2fSopenharmony_ci break; 317c5f01b2fSopenharmony_ci 318c5f01b2fSopenharmony_ci // unicode escapes 319c5f01b2fSopenharmony_ci case 'u': 320c5f01b2fSopenharmony_ci { 321c5f01b2fSopenharmony_ci const int codepoint1 = get_codepoint(); 322c5f01b2fSopenharmony_ci int codepoint = codepoint1; // start with codepoint1 323c5f01b2fSopenharmony_ci 324c5f01b2fSopenharmony_ci if (JSON_HEDLEY_UNLIKELY(codepoint1 == -1)) 325c5f01b2fSopenharmony_ci { 326c5f01b2fSopenharmony_ci error_message = "invalid string: '\\u' must be followed by 4 hex digits"; 327c5f01b2fSopenharmony_ci return token_type::parse_error; 328c5f01b2fSopenharmony_ci } 329c5f01b2fSopenharmony_ci 330c5f01b2fSopenharmony_ci // check if code point is a high surrogate 331c5f01b2fSopenharmony_ci if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF) 332c5f01b2fSopenharmony_ci { 333c5f01b2fSopenharmony_ci // expect next \uxxxx entry 334c5f01b2fSopenharmony_ci if (JSON_HEDLEY_LIKELY(get() == '\\' && get() == 'u')) 335c5f01b2fSopenharmony_ci { 336c5f01b2fSopenharmony_ci const int codepoint2 = get_codepoint(); 337c5f01b2fSopenharmony_ci 338c5f01b2fSopenharmony_ci if (JSON_HEDLEY_UNLIKELY(codepoint2 == -1)) 339c5f01b2fSopenharmony_ci { 340c5f01b2fSopenharmony_ci error_message = "invalid string: '\\u' must be followed by 4 hex digits"; 341c5f01b2fSopenharmony_ci return token_type::parse_error; 342c5f01b2fSopenharmony_ci } 343c5f01b2fSopenharmony_ci 344c5f01b2fSopenharmony_ci // check if codepoint2 is a low surrogate 345c5f01b2fSopenharmony_ci if (JSON_HEDLEY_LIKELY(0xDC00 <= codepoint2 && codepoint2 <= 0xDFFF)) 346c5f01b2fSopenharmony_ci { 347c5f01b2fSopenharmony_ci // overwrite codepoint 348c5f01b2fSopenharmony_ci codepoint = static_cast<int>( 349c5f01b2fSopenharmony_ci // high surrogate occupies the most significant 22 bits 350c5f01b2fSopenharmony_ci (static_cast<unsigned int>(codepoint1) << 10u) 351c5f01b2fSopenharmony_ci // low surrogate occupies the least significant 15 bits 352c5f01b2fSopenharmony_ci + static_cast<unsigned int>(codepoint2) 353c5f01b2fSopenharmony_ci // there is still the 0xD800, 0xDC00 and 0x10000 noise 354c5f01b2fSopenharmony_ci // in the result, so we have to subtract with: 355c5f01b2fSopenharmony_ci // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00 356c5f01b2fSopenharmony_ci - 0x35FDC00u); 357c5f01b2fSopenharmony_ci } 358c5f01b2fSopenharmony_ci else 359c5f01b2fSopenharmony_ci { 360c5f01b2fSopenharmony_ci error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF"; 361c5f01b2fSopenharmony_ci return token_type::parse_error; 362c5f01b2fSopenharmony_ci } 363c5f01b2fSopenharmony_ci } 364c5f01b2fSopenharmony_ci else 365c5f01b2fSopenharmony_ci { 366c5f01b2fSopenharmony_ci error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF"; 367c5f01b2fSopenharmony_ci return token_type::parse_error; 368c5f01b2fSopenharmony_ci } 369c5f01b2fSopenharmony_ci } 370c5f01b2fSopenharmony_ci else 371c5f01b2fSopenharmony_ci { 372c5f01b2fSopenharmony_ci if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF)) 373c5f01b2fSopenharmony_ci { 374c5f01b2fSopenharmony_ci error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF"; 375c5f01b2fSopenharmony_ci return token_type::parse_error; 376c5f01b2fSopenharmony_ci } 377c5f01b2fSopenharmony_ci } 378c5f01b2fSopenharmony_ci 379c5f01b2fSopenharmony_ci // result of the above calculation yields a proper codepoint 380c5f01b2fSopenharmony_ci JSON_ASSERT(0x00 <= codepoint && codepoint <= 0x10FFFF); 381c5f01b2fSopenharmony_ci 382c5f01b2fSopenharmony_ci // translate codepoint into bytes 383c5f01b2fSopenharmony_ci if (codepoint < 0x80) 384c5f01b2fSopenharmony_ci { 385c5f01b2fSopenharmony_ci // 1-byte characters: 0xxxxxxx (ASCII) 386c5f01b2fSopenharmony_ci add(static_cast<char_int_type>(codepoint)); 387c5f01b2fSopenharmony_ci } 388c5f01b2fSopenharmony_ci else if (codepoint <= 0x7FF) 389c5f01b2fSopenharmony_ci { 390c5f01b2fSopenharmony_ci // 2-byte characters: 110xxxxx 10xxxxxx 391c5f01b2fSopenharmony_ci add(static_cast<char_int_type>(0xC0u | (static_cast<unsigned int>(codepoint) >> 6u))); 392c5f01b2fSopenharmony_ci add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu))); 393c5f01b2fSopenharmony_ci } 394c5f01b2fSopenharmony_ci else if (codepoint <= 0xFFFF) 395c5f01b2fSopenharmony_ci { 396c5f01b2fSopenharmony_ci // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx 397c5f01b2fSopenharmony_ci add(static_cast<char_int_type>(0xE0u | (static_cast<unsigned int>(codepoint) >> 12u))); 398c5f01b2fSopenharmony_ci add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu))); 399c5f01b2fSopenharmony_ci add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu))); 400c5f01b2fSopenharmony_ci } 401c5f01b2fSopenharmony_ci else 402c5f01b2fSopenharmony_ci { 403c5f01b2fSopenharmony_ci // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 404c5f01b2fSopenharmony_ci add(static_cast<char_int_type>(0xF0u | (static_cast<unsigned int>(codepoint) >> 18u))); 405c5f01b2fSopenharmony_ci add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 12u) & 0x3Fu))); 406c5f01b2fSopenharmony_ci add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu))); 407c5f01b2fSopenharmony_ci add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu))); 408c5f01b2fSopenharmony_ci } 409c5f01b2fSopenharmony_ci 410c5f01b2fSopenharmony_ci break; 411c5f01b2fSopenharmony_ci } 412c5f01b2fSopenharmony_ci 413c5f01b2fSopenharmony_ci // other characters after escape 414c5f01b2fSopenharmony_ci default: 415c5f01b2fSopenharmony_ci error_message = "invalid string: forbidden character after backslash"; 416c5f01b2fSopenharmony_ci return token_type::parse_error; 417c5f01b2fSopenharmony_ci } 418c5f01b2fSopenharmony_ci 419c5f01b2fSopenharmony_ci break; 420c5f01b2fSopenharmony_ci } 421c5f01b2fSopenharmony_ci 422c5f01b2fSopenharmony_ci // invalid control characters 423c5f01b2fSopenharmony_ci case 0x00: 424c5f01b2fSopenharmony_ci { 425c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000"; 426c5f01b2fSopenharmony_ci return token_type::parse_error; 427c5f01b2fSopenharmony_ci } 428c5f01b2fSopenharmony_ci 429c5f01b2fSopenharmony_ci case 0x01: 430c5f01b2fSopenharmony_ci { 431c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001"; 432c5f01b2fSopenharmony_ci return token_type::parse_error; 433c5f01b2fSopenharmony_ci } 434c5f01b2fSopenharmony_ci 435c5f01b2fSopenharmony_ci case 0x02: 436c5f01b2fSopenharmony_ci { 437c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002"; 438c5f01b2fSopenharmony_ci return token_type::parse_error; 439c5f01b2fSopenharmony_ci } 440c5f01b2fSopenharmony_ci 441c5f01b2fSopenharmony_ci case 0x03: 442c5f01b2fSopenharmony_ci { 443c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003"; 444c5f01b2fSopenharmony_ci return token_type::parse_error; 445c5f01b2fSopenharmony_ci } 446c5f01b2fSopenharmony_ci 447c5f01b2fSopenharmony_ci case 0x04: 448c5f01b2fSopenharmony_ci { 449c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004"; 450c5f01b2fSopenharmony_ci return token_type::parse_error; 451c5f01b2fSopenharmony_ci } 452c5f01b2fSopenharmony_ci 453c5f01b2fSopenharmony_ci case 0x05: 454c5f01b2fSopenharmony_ci { 455c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005"; 456c5f01b2fSopenharmony_ci return token_type::parse_error; 457c5f01b2fSopenharmony_ci } 458c5f01b2fSopenharmony_ci 459c5f01b2fSopenharmony_ci case 0x06: 460c5f01b2fSopenharmony_ci { 461c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006"; 462c5f01b2fSopenharmony_ci return token_type::parse_error; 463c5f01b2fSopenharmony_ci } 464c5f01b2fSopenharmony_ci 465c5f01b2fSopenharmony_ci case 0x07: 466c5f01b2fSopenharmony_ci { 467c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007"; 468c5f01b2fSopenharmony_ci return token_type::parse_error; 469c5f01b2fSopenharmony_ci } 470c5f01b2fSopenharmony_ci 471c5f01b2fSopenharmony_ci case 0x08: 472c5f01b2fSopenharmony_ci { 473c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b"; 474c5f01b2fSopenharmony_ci return token_type::parse_error; 475c5f01b2fSopenharmony_ci } 476c5f01b2fSopenharmony_ci 477c5f01b2fSopenharmony_ci case 0x09: 478c5f01b2fSopenharmony_ci { 479c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t"; 480c5f01b2fSopenharmony_ci return token_type::parse_error; 481c5f01b2fSopenharmony_ci } 482c5f01b2fSopenharmony_ci 483c5f01b2fSopenharmony_ci case 0x0A: 484c5f01b2fSopenharmony_ci { 485c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n"; 486c5f01b2fSopenharmony_ci return token_type::parse_error; 487c5f01b2fSopenharmony_ci } 488c5f01b2fSopenharmony_ci 489c5f01b2fSopenharmony_ci case 0x0B: 490c5f01b2fSopenharmony_ci { 491c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B"; 492c5f01b2fSopenharmony_ci return token_type::parse_error; 493c5f01b2fSopenharmony_ci } 494c5f01b2fSopenharmony_ci 495c5f01b2fSopenharmony_ci case 0x0C: 496c5f01b2fSopenharmony_ci { 497c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f"; 498c5f01b2fSopenharmony_ci return token_type::parse_error; 499c5f01b2fSopenharmony_ci } 500c5f01b2fSopenharmony_ci 501c5f01b2fSopenharmony_ci case 0x0D: 502c5f01b2fSopenharmony_ci { 503c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r"; 504c5f01b2fSopenharmony_ci return token_type::parse_error; 505c5f01b2fSopenharmony_ci } 506c5f01b2fSopenharmony_ci 507c5f01b2fSopenharmony_ci case 0x0E: 508c5f01b2fSopenharmony_ci { 509c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E"; 510c5f01b2fSopenharmony_ci return token_type::parse_error; 511c5f01b2fSopenharmony_ci } 512c5f01b2fSopenharmony_ci 513c5f01b2fSopenharmony_ci case 0x0F: 514c5f01b2fSopenharmony_ci { 515c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F"; 516c5f01b2fSopenharmony_ci return token_type::parse_error; 517c5f01b2fSopenharmony_ci } 518c5f01b2fSopenharmony_ci 519c5f01b2fSopenharmony_ci case 0x10: 520c5f01b2fSopenharmony_ci { 521c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010"; 522c5f01b2fSopenharmony_ci return token_type::parse_error; 523c5f01b2fSopenharmony_ci } 524c5f01b2fSopenharmony_ci 525c5f01b2fSopenharmony_ci case 0x11: 526c5f01b2fSopenharmony_ci { 527c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011"; 528c5f01b2fSopenharmony_ci return token_type::parse_error; 529c5f01b2fSopenharmony_ci } 530c5f01b2fSopenharmony_ci 531c5f01b2fSopenharmony_ci case 0x12: 532c5f01b2fSopenharmony_ci { 533c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012"; 534c5f01b2fSopenharmony_ci return token_type::parse_error; 535c5f01b2fSopenharmony_ci } 536c5f01b2fSopenharmony_ci 537c5f01b2fSopenharmony_ci case 0x13: 538c5f01b2fSopenharmony_ci { 539c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013"; 540c5f01b2fSopenharmony_ci return token_type::parse_error; 541c5f01b2fSopenharmony_ci } 542c5f01b2fSopenharmony_ci 543c5f01b2fSopenharmony_ci case 0x14: 544c5f01b2fSopenharmony_ci { 545c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014"; 546c5f01b2fSopenharmony_ci return token_type::parse_error; 547c5f01b2fSopenharmony_ci } 548c5f01b2fSopenharmony_ci 549c5f01b2fSopenharmony_ci case 0x15: 550c5f01b2fSopenharmony_ci { 551c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015"; 552c5f01b2fSopenharmony_ci return token_type::parse_error; 553c5f01b2fSopenharmony_ci } 554c5f01b2fSopenharmony_ci 555c5f01b2fSopenharmony_ci case 0x16: 556c5f01b2fSopenharmony_ci { 557c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016"; 558c5f01b2fSopenharmony_ci return token_type::parse_error; 559c5f01b2fSopenharmony_ci } 560c5f01b2fSopenharmony_ci 561c5f01b2fSopenharmony_ci case 0x17: 562c5f01b2fSopenharmony_ci { 563c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017"; 564c5f01b2fSopenharmony_ci return token_type::parse_error; 565c5f01b2fSopenharmony_ci } 566c5f01b2fSopenharmony_ci 567c5f01b2fSopenharmony_ci case 0x18: 568c5f01b2fSopenharmony_ci { 569c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018"; 570c5f01b2fSopenharmony_ci return token_type::parse_error; 571c5f01b2fSopenharmony_ci } 572c5f01b2fSopenharmony_ci 573c5f01b2fSopenharmony_ci case 0x19: 574c5f01b2fSopenharmony_ci { 575c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019"; 576c5f01b2fSopenharmony_ci return token_type::parse_error; 577c5f01b2fSopenharmony_ci } 578c5f01b2fSopenharmony_ci 579c5f01b2fSopenharmony_ci case 0x1A: 580c5f01b2fSopenharmony_ci { 581c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A"; 582c5f01b2fSopenharmony_ci return token_type::parse_error; 583c5f01b2fSopenharmony_ci } 584c5f01b2fSopenharmony_ci 585c5f01b2fSopenharmony_ci case 0x1B: 586c5f01b2fSopenharmony_ci { 587c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B"; 588c5f01b2fSopenharmony_ci return token_type::parse_error; 589c5f01b2fSopenharmony_ci } 590c5f01b2fSopenharmony_ci 591c5f01b2fSopenharmony_ci case 0x1C: 592c5f01b2fSopenharmony_ci { 593c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C"; 594c5f01b2fSopenharmony_ci return token_type::parse_error; 595c5f01b2fSopenharmony_ci } 596c5f01b2fSopenharmony_ci 597c5f01b2fSopenharmony_ci case 0x1D: 598c5f01b2fSopenharmony_ci { 599c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D"; 600c5f01b2fSopenharmony_ci return token_type::parse_error; 601c5f01b2fSopenharmony_ci } 602c5f01b2fSopenharmony_ci 603c5f01b2fSopenharmony_ci case 0x1E: 604c5f01b2fSopenharmony_ci { 605c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E"; 606c5f01b2fSopenharmony_ci return token_type::parse_error; 607c5f01b2fSopenharmony_ci } 608c5f01b2fSopenharmony_ci 609c5f01b2fSopenharmony_ci case 0x1F: 610c5f01b2fSopenharmony_ci { 611c5f01b2fSopenharmony_ci error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F"; 612c5f01b2fSopenharmony_ci return token_type::parse_error; 613c5f01b2fSopenharmony_ci } 614c5f01b2fSopenharmony_ci 615c5f01b2fSopenharmony_ci // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace)) 616c5f01b2fSopenharmony_ci case 0x20: 617c5f01b2fSopenharmony_ci case 0x21: 618c5f01b2fSopenharmony_ci case 0x23: 619c5f01b2fSopenharmony_ci case 0x24: 620c5f01b2fSopenharmony_ci case 0x25: 621c5f01b2fSopenharmony_ci case 0x26: 622c5f01b2fSopenharmony_ci case 0x27: 623c5f01b2fSopenharmony_ci case 0x28: 624c5f01b2fSopenharmony_ci case 0x29: 625c5f01b2fSopenharmony_ci case 0x2A: 626c5f01b2fSopenharmony_ci case 0x2B: 627c5f01b2fSopenharmony_ci case 0x2C: 628c5f01b2fSopenharmony_ci case 0x2D: 629c5f01b2fSopenharmony_ci case 0x2E: 630c5f01b2fSopenharmony_ci case 0x2F: 631c5f01b2fSopenharmony_ci case 0x30: 632c5f01b2fSopenharmony_ci case 0x31: 633c5f01b2fSopenharmony_ci case 0x32: 634c5f01b2fSopenharmony_ci case 0x33: 635c5f01b2fSopenharmony_ci case 0x34: 636c5f01b2fSopenharmony_ci case 0x35: 637c5f01b2fSopenharmony_ci case 0x36: 638c5f01b2fSopenharmony_ci case 0x37: 639c5f01b2fSopenharmony_ci case 0x38: 640c5f01b2fSopenharmony_ci case 0x39: 641c5f01b2fSopenharmony_ci case 0x3A: 642c5f01b2fSopenharmony_ci case 0x3B: 643c5f01b2fSopenharmony_ci case 0x3C: 644c5f01b2fSopenharmony_ci case 0x3D: 645c5f01b2fSopenharmony_ci case 0x3E: 646c5f01b2fSopenharmony_ci case 0x3F: 647c5f01b2fSopenharmony_ci case 0x40: 648c5f01b2fSopenharmony_ci case 0x41: 649c5f01b2fSopenharmony_ci case 0x42: 650c5f01b2fSopenharmony_ci case 0x43: 651c5f01b2fSopenharmony_ci case 0x44: 652c5f01b2fSopenharmony_ci case 0x45: 653c5f01b2fSopenharmony_ci case 0x46: 654c5f01b2fSopenharmony_ci case 0x47: 655c5f01b2fSopenharmony_ci case 0x48: 656c5f01b2fSopenharmony_ci case 0x49: 657c5f01b2fSopenharmony_ci case 0x4A: 658c5f01b2fSopenharmony_ci case 0x4B: 659c5f01b2fSopenharmony_ci case 0x4C: 660c5f01b2fSopenharmony_ci case 0x4D: 661c5f01b2fSopenharmony_ci case 0x4E: 662c5f01b2fSopenharmony_ci case 0x4F: 663c5f01b2fSopenharmony_ci case 0x50: 664c5f01b2fSopenharmony_ci case 0x51: 665c5f01b2fSopenharmony_ci case 0x52: 666c5f01b2fSopenharmony_ci case 0x53: 667c5f01b2fSopenharmony_ci case 0x54: 668c5f01b2fSopenharmony_ci case 0x55: 669c5f01b2fSopenharmony_ci case 0x56: 670c5f01b2fSopenharmony_ci case 0x57: 671c5f01b2fSopenharmony_ci case 0x58: 672c5f01b2fSopenharmony_ci case 0x59: 673c5f01b2fSopenharmony_ci case 0x5A: 674c5f01b2fSopenharmony_ci case 0x5B: 675c5f01b2fSopenharmony_ci case 0x5D: 676c5f01b2fSopenharmony_ci case 0x5E: 677c5f01b2fSopenharmony_ci case 0x5F: 678c5f01b2fSopenharmony_ci case 0x60: 679c5f01b2fSopenharmony_ci case 0x61: 680c5f01b2fSopenharmony_ci case 0x62: 681c5f01b2fSopenharmony_ci case 0x63: 682c5f01b2fSopenharmony_ci case 0x64: 683c5f01b2fSopenharmony_ci case 0x65: 684c5f01b2fSopenharmony_ci case 0x66: 685c5f01b2fSopenharmony_ci case 0x67: 686c5f01b2fSopenharmony_ci case 0x68: 687c5f01b2fSopenharmony_ci case 0x69: 688c5f01b2fSopenharmony_ci case 0x6A: 689c5f01b2fSopenharmony_ci case 0x6B: 690c5f01b2fSopenharmony_ci case 0x6C: 691c5f01b2fSopenharmony_ci case 0x6D: 692c5f01b2fSopenharmony_ci case 0x6E: 693c5f01b2fSopenharmony_ci case 0x6F: 694c5f01b2fSopenharmony_ci case 0x70: 695c5f01b2fSopenharmony_ci case 0x71: 696c5f01b2fSopenharmony_ci case 0x72: 697c5f01b2fSopenharmony_ci case 0x73: 698c5f01b2fSopenharmony_ci case 0x74: 699c5f01b2fSopenharmony_ci case 0x75: 700c5f01b2fSopenharmony_ci case 0x76: 701c5f01b2fSopenharmony_ci case 0x77: 702c5f01b2fSopenharmony_ci case 0x78: 703c5f01b2fSopenharmony_ci case 0x79: 704c5f01b2fSopenharmony_ci case 0x7A: 705c5f01b2fSopenharmony_ci case 0x7B: 706c5f01b2fSopenharmony_ci case 0x7C: 707c5f01b2fSopenharmony_ci case 0x7D: 708c5f01b2fSopenharmony_ci case 0x7E: 709c5f01b2fSopenharmony_ci case 0x7F: 710c5f01b2fSopenharmony_ci { 711c5f01b2fSopenharmony_ci add(current); 712c5f01b2fSopenharmony_ci break; 713c5f01b2fSopenharmony_ci } 714c5f01b2fSopenharmony_ci 715c5f01b2fSopenharmony_ci // U+0080..U+07FF: bytes C2..DF 80..BF 716c5f01b2fSopenharmony_ci case 0xC2: 717c5f01b2fSopenharmony_ci case 0xC3: 718c5f01b2fSopenharmony_ci case 0xC4: 719c5f01b2fSopenharmony_ci case 0xC5: 720c5f01b2fSopenharmony_ci case 0xC6: 721c5f01b2fSopenharmony_ci case 0xC7: 722c5f01b2fSopenharmony_ci case 0xC8: 723c5f01b2fSopenharmony_ci case 0xC9: 724c5f01b2fSopenharmony_ci case 0xCA: 725c5f01b2fSopenharmony_ci case 0xCB: 726c5f01b2fSopenharmony_ci case 0xCC: 727c5f01b2fSopenharmony_ci case 0xCD: 728c5f01b2fSopenharmony_ci case 0xCE: 729c5f01b2fSopenharmony_ci case 0xCF: 730c5f01b2fSopenharmony_ci case 0xD0: 731c5f01b2fSopenharmony_ci case 0xD1: 732c5f01b2fSopenharmony_ci case 0xD2: 733c5f01b2fSopenharmony_ci case 0xD3: 734c5f01b2fSopenharmony_ci case 0xD4: 735c5f01b2fSopenharmony_ci case 0xD5: 736c5f01b2fSopenharmony_ci case 0xD6: 737c5f01b2fSopenharmony_ci case 0xD7: 738c5f01b2fSopenharmony_ci case 0xD8: 739c5f01b2fSopenharmony_ci case 0xD9: 740c5f01b2fSopenharmony_ci case 0xDA: 741c5f01b2fSopenharmony_ci case 0xDB: 742c5f01b2fSopenharmony_ci case 0xDC: 743c5f01b2fSopenharmony_ci case 0xDD: 744c5f01b2fSopenharmony_ci case 0xDE: 745c5f01b2fSopenharmony_ci case 0xDF: 746c5f01b2fSopenharmony_ci { 747c5f01b2fSopenharmony_ci if (JSON_HEDLEY_UNLIKELY(!next_byte_in_range({0x80, 0xBF}))) 748c5f01b2fSopenharmony_ci { 749c5f01b2fSopenharmony_ci return token_type::parse_error; 750c5f01b2fSopenharmony_ci } 751c5f01b2fSopenharmony_ci break; 752c5f01b2fSopenharmony_ci } 753c5f01b2fSopenharmony_ci 754c5f01b2fSopenharmony_ci // U+0800..U+0FFF: bytes E0 A0..BF 80..BF 755c5f01b2fSopenharmony_ci case 0xE0: 756c5f01b2fSopenharmony_ci { 757c5f01b2fSopenharmony_ci if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF})))) 758c5f01b2fSopenharmony_ci { 759c5f01b2fSopenharmony_ci return token_type::parse_error; 760c5f01b2fSopenharmony_ci } 761c5f01b2fSopenharmony_ci break; 762c5f01b2fSopenharmony_ci } 763c5f01b2fSopenharmony_ci 764c5f01b2fSopenharmony_ci // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF 765c5f01b2fSopenharmony_ci // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF 766c5f01b2fSopenharmony_ci case 0xE1: 767c5f01b2fSopenharmony_ci case 0xE2: 768c5f01b2fSopenharmony_ci case 0xE3: 769c5f01b2fSopenharmony_ci case 0xE4: 770c5f01b2fSopenharmony_ci case 0xE5: 771c5f01b2fSopenharmony_ci case 0xE6: 772c5f01b2fSopenharmony_ci case 0xE7: 773c5f01b2fSopenharmony_ci case 0xE8: 774c5f01b2fSopenharmony_ci case 0xE9: 775c5f01b2fSopenharmony_ci case 0xEA: 776c5f01b2fSopenharmony_ci case 0xEB: 777c5f01b2fSopenharmony_ci case 0xEC: 778c5f01b2fSopenharmony_ci case 0xEE: 779c5f01b2fSopenharmony_ci case 0xEF: 780c5f01b2fSopenharmony_ci { 781c5f01b2fSopenharmony_ci if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF})))) 782c5f01b2fSopenharmony_ci { 783c5f01b2fSopenharmony_ci return token_type::parse_error; 784c5f01b2fSopenharmony_ci } 785c5f01b2fSopenharmony_ci break; 786c5f01b2fSopenharmony_ci } 787c5f01b2fSopenharmony_ci 788c5f01b2fSopenharmony_ci // U+D000..U+D7FF: bytes ED 80..9F 80..BF 789c5f01b2fSopenharmony_ci case 0xED: 790c5f01b2fSopenharmony_ci { 791c5f01b2fSopenharmony_ci if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x9F, 0x80, 0xBF})))) 792c5f01b2fSopenharmony_ci { 793c5f01b2fSopenharmony_ci return token_type::parse_error; 794c5f01b2fSopenharmony_ci } 795c5f01b2fSopenharmony_ci break; 796c5f01b2fSopenharmony_ci } 797c5f01b2fSopenharmony_ci 798c5f01b2fSopenharmony_ci // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF 799c5f01b2fSopenharmony_ci case 0xF0: 800c5f01b2fSopenharmony_ci { 801c5f01b2fSopenharmony_ci if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) 802c5f01b2fSopenharmony_ci { 803c5f01b2fSopenharmony_ci return token_type::parse_error; 804c5f01b2fSopenharmony_ci } 805c5f01b2fSopenharmony_ci break; 806c5f01b2fSopenharmony_ci } 807c5f01b2fSopenharmony_ci 808c5f01b2fSopenharmony_ci // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF 809c5f01b2fSopenharmony_ci case 0xF1: 810c5f01b2fSopenharmony_ci case 0xF2: 811c5f01b2fSopenharmony_ci case 0xF3: 812c5f01b2fSopenharmony_ci { 813c5f01b2fSopenharmony_ci if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) 814c5f01b2fSopenharmony_ci { 815c5f01b2fSopenharmony_ci return token_type::parse_error; 816c5f01b2fSopenharmony_ci } 817c5f01b2fSopenharmony_ci break; 818c5f01b2fSopenharmony_ci } 819c5f01b2fSopenharmony_ci 820c5f01b2fSopenharmony_ci // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF 821c5f01b2fSopenharmony_ci case 0xF4: 822c5f01b2fSopenharmony_ci { 823c5f01b2fSopenharmony_ci if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF})))) 824c5f01b2fSopenharmony_ci { 825c5f01b2fSopenharmony_ci return token_type::parse_error; 826c5f01b2fSopenharmony_ci } 827c5f01b2fSopenharmony_ci break; 828c5f01b2fSopenharmony_ci } 829c5f01b2fSopenharmony_ci 830c5f01b2fSopenharmony_ci // remaining bytes (80..C1 and F5..FF) are ill-formed 831c5f01b2fSopenharmony_ci default: 832c5f01b2fSopenharmony_ci { 833c5f01b2fSopenharmony_ci error_message = "invalid string: ill-formed UTF-8 byte"; 834c5f01b2fSopenharmony_ci return token_type::parse_error; 835c5f01b2fSopenharmony_ci } 836c5f01b2fSopenharmony_ci } 837c5f01b2fSopenharmony_ci } 838c5f01b2fSopenharmony_ci } 839c5f01b2fSopenharmony_ci 840c5f01b2fSopenharmony_ci /*! 841c5f01b2fSopenharmony_ci * @brief scan a comment 842c5f01b2fSopenharmony_ci * @return whether comment could be scanned successfully 843c5f01b2fSopenharmony_ci */ 844c5f01b2fSopenharmony_ci bool scan_comment() 845c5f01b2fSopenharmony_ci { 846c5f01b2fSopenharmony_ci switch (get()) 847c5f01b2fSopenharmony_ci { 848c5f01b2fSopenharmony_ci // single-line comments skip input until a newline or EOF is read 849c5f01b2fSopenharmony_ci case '/': 850c5f01b2fSopenharmony_ci { 851c5f01b2fSopenharmony_ci while (true) 852c5f01b2fSopenharmony_ci { 853c5f01b2fSopenharmony_ci switch (get()) 854c5f01b2fSopenharmony_ci { 855c5f01b2fSopenharmony_ci case '\n': 856c5f01b2fSopenharmony_ci case '\r': 857c5f01b2fSopenharmony_ci case std::char_traits<char_type>::eof(): 858c5f01b2fSopenharmony_ci case '\0': 859c5f01b2fSopenharmony_ci return true; 860c5f01b2fSopenharmony_ci 861c5f01b2fSopenharmony_ci default: 862c5f01b2fSopenharmony_ci break; 863c5f01b2fSopenharmony_ci } 864c5f01b2fSopenharmony_ci } 865c5f01b2fSopenharmony_ci } 866c5f01b2fSopenharmony_ci 867c5f01b2fSopenharmony_ci // multi-line comments skip input until */ is read 868c5f01b2fSopenharmony_ci case '*': 869c5f01b2fSopenharmony_ci { 870c5f01b2fSopenharmony_ci while (true) 871c5f01b2fSopenharmony_ci { 872c5f01b2fSopenharmony_ci switch (get()) 873c5f01b2fSopenharmony_ci { 874c5f01b2fSopenharmony_ci case std::char_traits<char_type>::eof(): 875c5f01b2fSopenharmony_ci case '\0': 876c5f01b2fSopenharmony_ci { 877c5f01b2fSopenharmony_ci error_message = "invalid comment; missing closing '*/'"; 878c5f01b2fSopenharmony_ci return false; 879c5f01b2fSopenharmony_ci } 880c5f01b2fSopenharmony_ci 881c5f01b2fSopenharmony_ci case '*': 882c5f01b2fSopenharmony_ci { 883c5f01b2fSopenharmony_ci switch (get()) 884c5f01b2fSopenharmony_ci { 885c5f01b2fSopenharmony_ci case '/': 886c5f01b2fSopenharmony_ci return true; 887c5f01b2fSopenharmony_ci 888c5f01b2fSopenharmony_ci default: 889c5f01b2fSopenharmony_ci { 890c5f01b2fSopenharmony_ci unget(); 891c5f01b2fSopenharmony_ci continue; 892c5f01b2fSopenharmony_ci } 893c5f01b2fSopenharmony_ci } 894c5f01b2fSopenharmony_ci } 895c5f01b2fSopenharmony_ci 896c5f01b2fSopenharmony_ci default: 897c5f01b2fSopenharmony_ci continue; 898c5f01b2fSopenharmony_ci } 899c5f01b2fSopenharmony_ci } 900c5f01b2fSopenharmony_ci } 901c5f01b2fSopenharmony_ci 902c5f01b2fSopenharmony_ci // unexpected character after reading '/' 903c5f01b2fSopenharmony_ci default: 904c5f01b2fSopenharmony_ci { 905c5f01b2fSopenharmony_ci error_message = "invalid comment; expecting '/' or '*' after '/'"; 906c5f01b2fSopenharmony_ci return false; 907c5f01b2fSopenharmony_ci } 908c5f01b2fSopenharmony_ci } 909c5f01b2fSopenharmony_ci } 910c5f01b2fSopenharmony_ci 911c5f01b2fSopenharmony_ci JSON_HEDLEY_NON_NULL(2) 912c5f01b2fSopenharmony_ci static void strtof(float& f, const char* str, char** endptr) noexcept 913c5f01b2fSopenharmony_ci { 914c5f01b2fSopenharmony_ci f = std::strtof(str, endptr); 915c5f01b2fSopenharmony_ci } 916c5f01b2fSopenharmony_ci 917c5f01b2fSopenharmony_ci JSON_HEDLEY_NON_NULL(2) 918c5f01b2fSopenharmony_ci static void strtof(double& f, const char* str, char** endptr) noexcept 919c5f01b2fSopenharmony_ci { 920c5f01b2fSopenharmony_ci f = std::strtod(str, endptr); 921c5f01b2fSopenharmony_ci } 922c5f01b2fSopenharmony_ci 923c5f01b2fSopenharmony_ci JSON_HEDLEY_NON_NULL(2) 924c5f01b2fSopenharmony_ci static void strtof(long double& f, const char* str, char** endptr) noexcept 925c5f01b2fSopenharmony_ci { 926c5f01b2fSopenharmony_ci f = std::strtold(str, endptr); 927c5f01b2fSopenharmony_ci } 928c5f01b2fSopenharmony_ci 929c5f01b2fSopenharmony_ci /*! 930c5f01b2fSopenharmony_ci @brief scan a number literal 931c5f01b2fSopenharmony_ci 932c5f01b2fSopenharmony_ci This function scans a string according to Sect. 6 of RFC 8259. 933c5f01b2fSopenharmony_ci 934c5f01b2fSopenharmony_ci The function is realized with a deterministic finite state machine derived 935c5f01b2fSopenharmony_ci from the grammar described in RFC 8259. Starting in state "init", the 936c5f01b2fSopenharmony_ci input is read and used to determined the next state. Only state "done" 937c5f01b2fSopenharmony_ci accepts the number. State "error" is a trap state to model errors. In the 938c5f01b2fSopenharmony_ci table below, "anything" means any character but the ones listed before. 939c5f01b2fSopenharmony_ci 940c5f01b2fSopenharmony_ci state | 0 | 1-9 | e E | + | - | . | anything 941c5f01b2fSopenharmony_ci ---------|----------|----------|----------|---------|---------|----------|----------- 942c5f01b2fSopenharmony_ci init | zero | any1 | [error] | [error] | minus | [error] | [error] 943c5f01b2fSopenharmony_ci minus | zero | any1 | [error] | [error] | [error] | [error] | [error] 944c5f01b2fSopenharmony_ci zero | done | done | exponent | done | done | decimal1 | done 945c5f01b2fSopenharmony_ci any1 | any1 | any1 | exponent | done | done | decimal1 | done 946c5f01b2fSopenharmony_ci decimal1 | decimal2 | decimal2 | [error] | [error] | [error] | [error] | [error] 947c5f01b2fSopenharmony_ci decimal2 | decimal2 | decimal2 | exponent | done | done | done | done 948c5f01b2fSopenharmony_ci exponent | any2 | any2 | [error] | sign | sign | [error] | [error] 949c5f01b2fSopenharmony_ci sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] 950c5f01b2fSopenharmony_ci any2 | any2 | any2 | done | done | done | done | done 951c5f01b2fSopenharmony_ci 952c5f01b2fSopenharmony_ci The state machine is realized with one label per state (prefixed with 953c5f01b2fSopenharmony_ci "scan_number_") and `goto` statements between them. The state machine 954c5f01b2fSopenharmony_ci contains cycles, but any cycle can be left when EOF is read. Therefore, 955c5f01b2fSopenharmony_ci the function is guaranteed to terminate. 956c5f01b2fSopenharmony_ci 957c5f01b2fSopenharmony_ci During scanning, the read bytes are stored in token_buffer. This string is 958c5f01b2fSopenharmony_ci then converted to a signed integer, an unsigned integer, or a 959c5f01b2fSopenharmony_ci floating-point number. 960c5f01b2fSopenharmony_ci 961c5f01b2fSopenharmony_ci @return token_type::value_unsigned, token_type::value_integer, or 962c5f01b2fSopenharmony_ci token_type::value_float if number could be successfully scanned, 963c5f01b2fSopenharmony_ci token_type::parse_error otherwise 964c5f01b2fSopenharmony_ci 965c5f01b2fSopenharmony_ci @note The scanner is independent of the current locale. Internally, the 966c5f01b2fSopenharmony_ci locale's decimal point is used instead of `.` to work with the 967c5f01b2fSopenharmony_ci locale-dependent converters. 968c5f01b2fSopenharmony_ci */ 969c5f01b2fSopenharmony_ci token_type scan_number() // lgtm [cpp/use-of-goto] 970c5f01b2fSopenharmony_ci { 971c5f01b2fSopenharmony_ci // reset token_buffer to store the number's bytes 972c5f01b2fSopenharmony_ci reset(); 973c5f01b2fSopenharmony_ci 974c5f01b2fSopenharmony_ci // the type of the parsed number; initially set to unsigned; will be 975c5f01b2fSopenharmony_ci // changed if minus sign, decimal point or exponent is read 976c5f01b2fSopenharmony_ci token_type number_type = token_type::value_unsigned; 977c5f01b2fSopenharmony_ci 978c5f01b2fSopenharmony_ci // state (init): we just found out we need to scan a number 979c5f01b2fSopenharmony_ci switch (current) 980c5f01b2fSopenharmony_ci { 981c5f01b2fSopenharmony_ci case '-': 982c5f01b2fSopenharmony_ci { 983c5f01b2fSopenharmony_ci add(current); 984c5f01b2fSopenharmony_ci goto scan_number_minus; 985c5f01b2fSopenharmony_ci } 986c5f01b2fSopenharmony_ci 987c5f01b2fSopenharmony_ci case '0': 988c5f01b2fSopenharmony_ci { 989c5f01b2fSopenharmony_ci add(current); 990c5f01b2fSopenharmony_ci goto scan_number_zero; 991c5f01b2fSopenharmony_ci } 992c5f01b2fSopenharmony_ci 993c5f01b2fSopenharmony_ci case '1': 994c5f01b2fSopenharmony_ci case '2': 995c5f01b2fSopenharmony_ci case '3': 996c5f01b2fSopenharmony_ci case '4': 997c5f01b2fSopenharmony_ci case '5': 998c5f01b2fSopenharmony_ci case '6': 999c5f01b2fSopenharmony_ci case '7': 1000c5f01b2fSopenharmony_ci case '8': 1001c5f01b2fSopenharmony_ci case '9': 1002c5f01b2fSopenharmony_ci { 1003c5f01b2fSopenharmony_ci add(current); 1004c5f01b2fSopenharmony_ci goto scan_number_any1; 1005c5f01b2fSopenharmony_ci } 1006c5f01b2fSopenharmony_ci 1007c5f01b2fSopenharmony_ci // all other characters are rejected outside scan_number() 1008c5f01b2fSopenharmony_ci default: // LCOV_EXCL_LINE 1009c5f01b2fSopenharmony_ci JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE 1010c5f01b2fSopenharmony_ci } 1011c5f01b2fSopenharmony_ci 1012c5f01b2fSopenharmony_ciscan_number_minus: 1013c5f01b2fSopenharmony_ci // state: we just parsed a leading minus sign 1014c5f01b2fSopenharmony_ci number_type = token_type::value_integer; 1015c5f01b2fSopenharmony_ci switch (get()) 1016c5f01b2fSopenharmony_ci { 1017c5f01b2fSopenharmony_ci case '0': 1018c5f01b2fSopenharmony_ci { 1019c5f01b2fSopenharmony_ci add(current); 1020c5f01b2fSopenharmony_ci goto scan_number_zero; 1021c5f01b2fSopenharmony_ci } 1022c5f01b2fSopenharmony_ci 1023c5f01b2fSopenharmony_ci case '1': 1024c5f01b2fSopenharmony_ci case '2': 1025c5f01b2fSopenharmony_ci case '3': 1026c5f01b2fSopenharmony_ci case '4': 1027c5f01b2fSopenharmony_ci case '5': 1028c5f01b2fSopenharmony_ci case '6': 1029c5f01b2fSopenharmony_ci case '7': 1030c5f01b2fSopenharmony_ci case '8': 1031c5f01b2fSopenharmony_ci case '9': 1032c5f01b2fSopenharmony_ci { 1033c5f01b2fSopenharmony_ci add(current); 1034c5f01b2fSopenharmony_ci goto scan_number_any1; 1035c5f01b2fSopenharmony_ci } 1036c5f01b2fSopenharmony_ci 1037c5f01b2fSopenharmony_ci default: 1038c5f01b2fSopenharmony_ci { 1039c5f01b2fSopenharmony_ci error_message = "invalid number; expected digit after '-'"; 1040c5f01b2fSopenharmony_ci return token_type::parse_error; 1041c5f01b2fSopenharmony_ci } 1042c5f01b2fSopenharmony_ci } 1043c5f01b2fSopenharmony_ci 1044c5f01b2fSopenharmony_ciscan_number_zero: 1045c5f01b2fSopenharmony_ci // state: we just parse a zero (maybe with a leading minus sign) 1046c5f01b2fSopenharmony_ci switch (get()) 1047c5f01b2fSopenharmony_ci { 1048c5f01b2fSopenharmony_ci case '.': 1049c5f01b2fSopenharmony_ci { 1050c5f01b2fSopenharmony_ci add(decimal_point_char); 1051c5f01b2fSopenharmony_ci goto scan_number_decimal1; 1052c5f01b2fSopenharmony_ci } 1053c5f01b2fSopenharmony_ci 1054c5f01b2fSopenharmony_ci case 'e': 1055c5f01b2fSopenharmony_ci case 'E': 1056c5f01b2fSopenharmony_ci { 1057c5f01b2fSopenharmony_ci add(current); 1058c5f01b2fSopenharmony_ci goto scan_number_exponent; 1059c5f01b2fSopenharmony_ci } 1060c5f01b2fSopenharmony_ci 1061c5f01b2fSopenharmony_ci default: 1062c5f01b2fSopenharmony_ci goto scan_number_done; 1063c5f01b2fSopenharmony_ci } 1064c5f01b2fSopenharmony_ci 1065c5f01b2fSopenharmony_ciscan_number_any1: 1066c5f01b2fSopenharmony_ci // state: we just parsed a number 0-9 (maybe with a leading minus sign) 1067c5f01b2fSopenharmony_ci switch (get()) 1068c5f01b2fSopenharmony_ci { 1069c5f01b2fSopenharmony_ci case '0': 1070c5f01b2fSopenharmony_ci case '1': 1071c5f01b2fSopenharmony_ci case '2': 1072c5f01b2fSopenharmony_ci case '3': 1073c5f01b2fSopenharmony_ci case '4': 1074c5f01b2fSopenharmony_ci case '5': 1075c5f01b2fSopenharmony_ci case '6': 1076c5f01b2fSopenharmony_ci case '7': 1077c5f01b2fSopenharmony_ci case '8': 1078c5f01b2fSopenharmony_ci case '9': 1079c5f01b2fSopenharmony_ci { 1080c5f01b2fSopenharmony_ci add(current); 1081c5f01b2fSopenharmony_ci goto scan_number_any1; 1082c5f01b2fSopenharmony_ci } 1083c5f01b2fSopenharmony_ci 1084c5f01b2fSopenharmony_ci case '.': 1085c5f01b2fSopenharmony_ci { 1086c5f01b2fSopenharmony_ci add(decimal_point_char); 1087c5f01b2fSopenharmony_ci goto scan_number_decimal1; 1088c5f01b2fSopenharmony_ci } 1089c5f01b2fSopenharmony_ci 1090c5f01b2fSopenharmony_ci case 'e': 1091c5f01b2fSopenharmony_ci case 'E': 1092c5f01b2fSopenharmony_ci { 1093c5f01b2fSopenharmony_ci add(current); 1094c5f01b2fSopenharmony_ci goto scan_number_exponent; 1095c5f01b2fSopenharmony_ci } 1096c5f01b2fSopenharmony_ci 1097c5f01b2fSopenharmony_ci default: 1098c5f01b2fSopenharmony_ci goto scan_number_done; 1099c5f01b2fSopenharmony_ci } 1100c5f01b2fSopenharmony_ci 1101c5f01b2fSopenharmony_ciscan_number_decimal1: 1102c5f01b2fSopenharmony_ci // state: we just parsed a decimal point 1103c5f01b2fSopenharmony_ci number_type = token_type::value_float; 1104c5f01b2fSopenharmony_ci switch (get()) 1105c5f01b2fSopenharmony_ci { 1106c5f01b2fSopenharmony_ci case '0': 1107c5f01b2fSopenharmony_ci case '1': 1108c5f01b2fSopenharmony_ci case '2': 1109c5f01b2fSopenharmony_ci case '3': 1110c5f01b2fSopenharmony_ci case '4': 1111c5f01b2fSopenharmony_ci case '5': 1112c5f01b2fSopenharmony_ci case '6': 1113c5f01b2fSopenharmony_ci case '7': 1114c5f01b2fSopenharmony_ci case '8': 1115c5f01b2fSopenharmony_ci case '9': 1116c5f01b2fSopenharmony_ci { 1117c5f01b2fSopenharmony_ci add(current); 1118c5f01b2fSopenharmony_ci goto scan_number_decimal2; 1119c5f01b2fSopenharmony_ci } 1120c5f01b2fSopenharmony_ci 1121c5f01b2fSopenharmony_ci default: 1122c5f01b2fSopenharmony_ci { 1123c5f01b2fSopenharmony_ci error_message = "invalid number; expected digit after '.'"; 1124c5f01b2fSopenharmony_ci return token_type::parse_error; 1125c5f01b2fSopenharmony_ci } 1126c5f01b2fSopenharmony_ci } 1127c5f01b2fSopenharmony_ci 1128c5f01b2fSopenharmony_ciscan_number_decimal2: 1129c5f01b2fSopenharmony_ci // we just parsed at least one number after a decimal point 1130c5f01b2fSopenharmony_ci switch (get()) 1131c5f01b2fSopenharmony_ci { 1132c5f01b2fSopenharmony_ci case '0': 1133c5f01b2fSopenharmony_ci case '1': 1134c5f01b2fSopenharmony_ci case '2': 1135c5f01b2fSopenharmony_ci case '3': 1136c5f01b2fSopenharmony_ci case '4': 1137c5f01b2fSopenharmony_ci case '5': 1138c5f01b2fSopenharmony_ci case '6': 1139c5f01b2fSopenharmony_ci case '7': 1140c5f01b2fSopenharmony_ci case '8': 1141c5f01b2fSopenharmony_ci case '9': 1142c5f01b2fSopenharmony_ci { 1143c5f01b2fSopenharmony_ci add(current); 1144c5f01b2fSopenharmony_ci goto scan_number_decimal2; 1145c5f01b2fSopenharmony_ci } 1146c5f01b2fSopenharmony_ci 1147c5f01b2fSopenharmony_ci case 'e': 1148c5f01b2fSopenharmony_ci case 'E': 1149c5f01b2fSopenharmony_ci { 1150c5f01b2fSopenharmony_ci add(current); 1151c5f01b2fSopenharmony_ci goto scan_number_exponent; 1152c5f01b2fSopenharmony_ci } 1153c5f01b2fSopenharmony_ci 1154c5f01b2fSopenharmony_ci default: 1155c5f01b2fSopenharmony_ci goto scan_number_done; 1156c5f01b2fSopenharmony_ci } 1157c5f01b2fSopenharmony_ci 1158c5f01b2fSopenharmony_ciscan_number_exponent: 1159c5f01b2fSopenharmony_ci // we just parsed an exponent 1160c5f01b2fSopenharmony_ci number_type = token_type::value_float; 1161c5f01b2fSopenharmony_ci switch (get()) 1162c5f01b2fSopenharmony_ci { 1163c5f01b2fSopenharmony_ci case '+': 1164c5f01b2fSopenharmony_ci case '-': 1165c5f01b2fSopenharmony_ci { 1166c5f01b2fSopenharmony_ci add(current); 1167c5f01b2fSopenharmony_ci goto scan_number_sign; 1168c5f01b2fSopenharmony_ci } 1169c5f01b2fSopenharmony_ci 1170c5f01b2fSopenharmony_ci case '0': 1171c5f01b2fSopenharmony_ci case '1': 1172c5f01b2fSopenharmony_ci case '2': 1173c5f01b2fSopenharmony_ci case '3': 1174c5f01b2fSopenharmony_ci case '4': 1175c5f01b2fSopenharmony_ci case '5': 1176c5f01b2fSopenharmony_ci case '6': 1177c5f01b2fSopenharmony_ci case '7': 1178c5f01b2fSopenharmony_ci case '8': 1179c5f01b2fSopenharmony_ci case '9': 1180c5f01b2fSopenharmony_ci { 1181c5f01b2fSopenharmony_ci add(current); 1182c5f01b2fSopenharmony_ci goto scan_number_any2; 1183c5f01b2fSopenharmony_ci } 1184c5f01b2fSopenharmony_ci 1185c5f01b2fSopenharmony_ci default: 1186c5f01b2fSopenharmony_ci { 1187c5f01b2fSopenharmony_ci error_message = 1188c5f01b2fSopenharmony_ci "invalid number; expected '+', '-', or digit after exponent"; 1189c5f01b2fSopenharmony_ci return token_type::parse_error; 1190c5f01b2fSopenharmony_ci } 1191c5f01b2fSopenharmony_ci } 1192c5f01b2fSopenharmony_ci 1193c5f01b2fSopenharmony_ciscan_number_sign: 1194c5f01b2fSopenharmony_ci // we just parsed an exponent sign 1195c5f01b2fSopenharmony_ci switch (get()) 1196c5f01b2fSopenharmony_ci { 1197c5f01b2fSopenharmony_ci case '0': 1198c5f01b2fSopenharmony_ci case '1': 1199c5f01b2fSopenharmony_ci case '2': 1200c5f01b2fSopenharmony_ci case '3': 1201c5f01b2fSopenharmony_ci case '4': 1202c5f01b2fSopenharmony_ci case '5': 1203c5f01b2fSopenharmony_ci case '6': 1204c5f01b2fSopenharmony_ci case '7': 1205c5f01b2fSopenharmony_ci case '8': 1206c5f01b2fSopenharmony_ci case '9': 1207c5f01b2fSopenharmony_ci { 1208c5f01b2fSopenharmony_ci add(current); 1209c5f01b2fSopenharmony_ci goto scan_number_any2; 1210c5f01b2fSopenharmony_ci } 1211c5f01b2fSopenharmony_ci 1212c5f01b2fSopenharmony_ci default: 1213c5f01b2fSopenharmony_ci { 1214c5f01b2fSopenharmony_ci error_message = "invalid number; expected digit after exponent sign"; 1215c5f01b2fSopenharmony_ci return token_type::parse_error; 1216c5f01b2fSopenharmony_ci } 1217c5f01b2fSopenharmony_ci } 1218c5f01b2fSopenharmony_ci 1219c5f01b2fSopenharmony_ciscan_number_any2: 1220c5f01b2fSopenharmony_ci // we just parsed a number after the exponent or exponent sign 1221c5f01b2fSopenharmony_ci switch (get()) 1222c5f01b2fSopenharmony_ci { 1223c5f01b2fSopenharmony_ci case '0': 1224c5f01b2fSopenharmony_ci case '1': 1225c5f01b2fSopenharmony_ci case '2': 1226c5f01b2fSopenharmony_ci case '3': 1227c5f01b2fSopenharmony_ci case '4': 1228c5f01b2fSopenharmony_ci case '5': 1229c5f01b2fSopenharmony_ci case '6': 1230c5f01b2fSopenharmony_ci case '7': 1231c5f01b2fSopenharmony_ci case '8': 1232c5f01b2fSopenharmony_ci case '9': 1233c5f01b2fSopenharmony_ci { 1234c5f01b2fSopenharmony_ci add(current); 1235c5f01b2fSopenharmony_ci goto scan_number_any2; 1236c5f01b2fSopenharmony_ci } 1237c5f01b2fSopenharmony_ci 1238c5f01b2fSopenharmony_ci default: 1239c5f01b2fSopenharmony_ci goto scan_number_done; 1240c5f01b2fSopenharmony_ci } 1241c5f01b2fSopenharmony_ci 1242c5f01b2fSopenharmony_ciscan_number_done: 1243c5f01b2fSopenharmony_ci // unget the character after the number (we only read it to know that 1244c5f01b2fSopenharmony_ci // we are done scanning a number) 1245c5f01b2fSopenharmony_ci unget(); 1246c5f01b2fSopenharmony_ci 1247c5f01b2fSopenharmony_ci char* endptr = nullptr; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) 1248c5f01b2fSopenharmony_ci errno = 0; 1249c5f01b2fSopenharmony_ci 1250c5f01b2fSopenharmony_ci // try to parse integers first and fall back to floats 1251c5f01b2fSopenharmony_ci if (number_type == token_type::value_unsigned) 1252c5f01b2fSopenharmony_ci { 1253c5f01b2fSopenharmony_ci const auto x = std::strtoull(token_buffer.data(), &endptr, 10); 1254c5f01b2fSopenharmony_ci 1255c5f01b2fSopenharmony_ci // we checked the number format before 1256c5f01b2fSopenharmony_ci JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); 1257c5f01b2fSopenharmony_ci 1258c5f01b2fSopenharmony_ci if (errno == 0) 1259c5f01b2fSopenharmony_ci { 1260c5f01b2fSopenharmony_ci value_unsigned = static_cast<number_unsigned_t>(x); 1261c5f01b2fSopenharmony_ci if (value_unsigned == x) 1262c5f01b2fSopenharmony_ci { 1263c5f01b2fSopenharmony_ci return token_type::value_unsigned; 1264c5f01b2fSopenharmony_ci } 1265c5f01b2fSopenharmony_ci } 1266c5f01b2fSopenharmony_ci } 1267c5f01b2fSopenharmony_ci else if (number_type == token_type::value_integer) 1268c5f01b2fSopenharmony_ci { 1269c5f01b2fSopenharmony_ci const auto x = std::strtoll(token_buffer.data(), &endptr, 10); 1270c5f01b2fSopenharmony_ci 1271c5f01b2fSopenharmony_ci // we checked the number format before 1272c5f01b2fSopenharmony_ci JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); 1273c5f01b2fSopenharmony_ci 1274c5f01b2fSopenharmony_ci if (errno == 0) 1275c5f01b2fSopenharmony_ci { 1276c5f01b2fSopenharmony_ci value_integer = static_cast<number_integer_t>(x); 1277c5f01b2fSopenharmony_ci if (value_integer == x) 1278c5f01b2fSopenharmony_ci { 1279c5f01b2fSopenharmony_ci return token_type::value_integer; 1280c5f01b2fSopenharmony_ci } 1281c5f01b2fSopenharmony_ci } 1282c5f01b2fSopenharmony_ci } 1283c5f01b2fSopenharmony_ci 1284c5f01b2fSopenharmony_ci // this code is reached if we parse a floating-point number or if an 1285c5f01b2fSopenharmony_ci // integer conversion above failed 1286c5f01b2fSopenharmony_ci strtof(value_float, token_buffer.data(), &endptr); 1287c5f01b2fSopenharmony_ci 1288c5f01b2fSopenharmony_ci // we checked the number format before 1289c5f01b2fSopenharmony_ci JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); 1290c5f01b2fSopenharmony_ci 1291c5f01b2fSopenharmony_ci return token_type::value_float; 1292c5f01b2fSopenharmony_ci } 1293c5f01b2fSopenharmony_ci 1294c5f01b2fSopenharmony_ci /*! 1295c5f01b2fSopenharmony_ci @param[in] literal_text the literal text to expect 1296c5f01b2fSopenharmony_ci @param[in] length the length of the passed literal text 1297c5f01b2fSopenharmony_ci @param[in] return_type the token type to return on success 1298c5f01b2fSopenharmony_ci */ 1299c5f01b2fSopenharmony_ci JSON_HEDLEY_NON_NULL(2) 1300c5f01b2fSopenharmony_ci token_type scan_literal(const char_type* literal_text, const std::size_t length, 1301c5f01b2fSopenharmony_ci token_type return_type) 1302c5f01b2fSopenharmony_ci { 1303c5f01b2fSopenharmony_ci JSON_ASSERT(std::char_traits<char_type>::to_char_type(current) == literal_text[0]); 1304c5f01b2fSopenharmony_ci for (std::size_t i = 1; i < length; ++i) 1305c5f01b2fSopenharmony_ci { 1306c5f01b2fSopenharmony_ci if (JSON_HEDLEY_UNLIKELY(std::char_traits<char_type>::to_char_type(get()) != literal_text[i])) 1307c5f01b2fSopenharmony_ci { 1308c5f01b2fSopenharmony_ci error_message = "invalid literal"; 1309c5f01b2fSopenharmony_ci return token_type::parse_error; 1310c5f01b2fSopenharmony_ci } 1311c5f01b2fSopenharmony_ci } 1312c5f01b2fSopenharmony_ci return return_type; 1313c5f01b2fSopenharmony_ci } 1314c5f01b2fSopenharmony_ci 1315c5f01b2fSopenharmony_ci ///////////////////// 1316c5f01b2fSopenharmony_ci // input management 1317c5f01b2fSopenharmony_ci ///////////////////// 1318c5f01b2fSopenharmony_ci 1319c5f01b2fSopenharmony_ci /// reset token_buffer; current character is beginning of token 1320c5f01b2fSopenharmony_ci void reset() noexcept 1321c5f01b2fSopenharmony_ci { 1322c5f01b2fSopenharmony_ci token_buffer.clear(); 1323c5f01b2fSopenharmony_ci token_string.clear(); 1324c5f01b2fSopenharmony_ci token_string.push_back(std::char_traits<char_type>::to_char_type(current)); 1325c5f01b2fSopenharmony_ci } 1326c5f01b2fSopenharmony_ci 1327c5f01b2fSopenharmony_ci /* 1328c5f01b2fSopenharmony_ci @brief get next character from the input 1329c5f01b2fSopenharmony_ci 1330c5f01b2fSopenharmony_ci This function provides the interface to the used input adapter. It does 1331c5f01b2fSopenharmony_ci not throw in case the input reached EOF, but returns a 1332c5f01b2fSopenharmony_ci `std::char_traits<char>::eof()` in that case. Stores the scanned characters 1333c5f01b2fSopenharmony_ci for use in error messages. 1334c5f01b2fSopenharmony_ci 1335c5f01b2fSopenharmony_ci @return character read from the input 1336c5f01b2fSopenharmony_ci */ 1337c5f01b2fSopenharmony_ci char_int_type get() 1338c5f01b2fSopenharmony_ci { 1339c5f01b2fSopenharmony_ci ++position.chars_read_total; 1340c5f01b2fSopenharmony_ci ++position.chars_read_current_line; 1341c5f01b2fSopenharmony_ci 1342c5f01b2fSopenharmony_ci if (next_unget) 1343c5f01b2fSopenharmony_ci { 1344c5f01b2fSopenharmony_ci // just reset the next_unget variable and work with current 1345c5f01b2fSopenharmony_ci next_unget = false; 1346c5f01b2fSopenharmony_ci } 1347c5f01b2fSopenharmony_ci else 1348c5f01b2fSopenharmony_ci { 1349c5f01b2fSopenharmony_ci current = ia.get_character(); 1350c5f01b2fSopenharmony_ci } 1351c5f01b2fSopenharmony_ci 1352c5f01b2fSopenharmony_ci if (JSON_HEDLEY_LIKELY(current != std::char_traits<char_type>::eof())) 1353c5f01b2fSopenharmony_ci { 1354c5f01b2fSopenharmony_ci token_string.push_back(std::char_traits<char_type>::to_char_type(current)); 1355c5f01b2fSopenharmony_ci } 1356c5f01b2fSopenharmony_ci 1357c5f01b2fSopenharmony_ci if (current == '\n') 1358c5f01b2fSopenharmony_ci { 1359c5f01b2fSopenharmony_ci ++position.lines_read; 1360c5f01b2fSopenharmony_ci position.chars_read_current_line = 0; 1361c5f01b2fSopenharmony_ci } 1362c5f01b2fSopenharmony_ci 1363c5f01b2fSopenharmony_ci return current; 1364c5f01b2fSopenharmony_ci } 1365c5f01b2fSopenharmony_ci 1366c5f01b2fSopenharmony_ci /*! 1367c5f01b2fSopenharmony_ci @brief unget current character (read it again on next get) 1368c5f01b2fSopenharmony_ci 1369c5f01b2fSopenharmony_ci We implement unget by setting variable next_unget to true. The input is not 1370c5f01b2fSopenharmony_ci changed - we just simulate ungetting by modifying chars_read_total, 1371c5f01b2fSopenharmony_ci chars_read_current_line, and token_string. The next call to get() will 1372c5f01b2fSopenharmony_ci behave as if the unget character is read again. 1373c5f01b2fSopenharmony_ci */ 1374c5f01b2fSopenharmony_ci void unget() 1375c5f01b2fSopenharmony_ci { 1376c5f01b2fSopenharmony_ci next_unget = true; 1377c5f01b2fSopenharmony_ci 1378c5f01b2fSopenharmony_ci --position.chars_read_total; 1379c5f01b2fSopenharmony_ci 1380c5f01b2fSopenharmony_ci // in case we "unget" a newline, we have to also decrement the lines_read 1381c5f01b2fSopenharmony_ci if (position.chars_read_current_line == 0) 1382c5f01b2fSopenharmony_ci { 1383c5f01b2fSopenharmony_ci if (position.lines_read > 0) 1384c5f01b2fSopenharmony_ci { 1385c5f01b2fSopenharmony_ci --position.lines_read; 1386c5f01b2fSopenharmony_ci } 1387c5f01b2fSopenharmony_ci } 1388c5f01b2fSopenharmony_ci else 1389c5f01b2fSopenharmony_ci { 1390c5f01b2fSopenharmony_ci --position.chars_read_current_line; 1391c5f01b2fSopenharmony_ci } 1392c5f01b2fSopenharmony_ci 1393c5f01b2fSopenharmony_ci if (JSON_HEDLEY_LIKELY(current != std::char_traits<char_type>::eof())) 1394c5f01b2fSopenharmony_ci { 1395c5f01b2fSopenharmony_ci JSON_ASSERT(!token_string.empty()); 1396c5f01b2fSopenharmony_ci token_string.pop_back(); 1397c5f01b2fSopenharmony_ci } 1398c5f01b2fSopenharmony_ci } 1399c5f01b2fSopenharmony_ci 1400c5f01b2fSopenharmony_ci /// add a character to token_buffer 1401c5f01b2fSopenharmony_ci void add(char_int_type c) 1402c5f01b2fSopenharmony_ci { 1403c5f01b2fSopenharmony_ci token_buffer.push_back(static_cast<typename string_t::value_type>(c)); 1404c5f01b2fSopenharmony_ci } 1405c5f01b2fSopenharmony_ci 1406c5f01b2fSopenharmony_ci public: 1407c5f01b2fSopenharmony_ci ///////////////////// 1408c5f01b2fSopenharmony_ci // value getters 1409c5f01b2fSopenharmony_ci ///////////////////// 1410c5f01b2fSopenharmony_ci 1411c5f01b2fSopenharmony_ci /// return integer value 1412c5f01b2fSopenharmony_ci constexpr number_integer_t get_number_integer() const noexcept 1413c5f01b2fSopenharmony_ci { 1414c5f01b2fSopenharmony_ci return value_integer; 1415c5f01b2fSopenharmony_ci } 1416c5f01b2fSopenharmony_ci 1417c5f01b2fSopenharmony_ci /// return unsigned integer value 1418c5f01b2fSopenharmony_ci constexpr number_unsigned_t get_number_unsigned() const noexcept 1419c5f01b2fSopenharmony_ci { 1420c5f01b2fSopenharmony_ci return value_unsigned; 1421c5f01b2fSopenharmony_ci } 1422c5f01b2fSopenharmony_ci 1423c5f01b2fSopenharmony_ci /// return floating-point value 1424c5f01b2fSopenharmony_ci constexpr number_float_t get_number_float() const noexcept 1425c5f01b2fSopenharmony_ci { 1426c5f01b2fSopenharmony_ci return value_float; 1427c5f01b2fSopenharmony_ci } 1428c5f01b2fSopenharmony_ci 1429c5f01b2fSopenharmony_ci /// return current string value (implicitly resets the token; useful only once) 1430c5f01b2fSopenharmony_ci string_t& get_string() 1431c5f01b2fSopenharmony_ci { 1432c5f01b2fSopenharmony_ci return token_buffer; 1433c5f01b2fSopenharmony_ci } 1434c5f01b2fSopenharmony_ci 1435c5f01b2fSopenharmony_ci ///////////////////// 1436c5f01b2fSopenharmony_ci // diagnostics 1437c5f01b2fSopenharmony_ci ///////////////////// 1438c5f01b2fSopenharmony_ci 1439c5f01b2fSopenharmony_ci /// return position of last read token 1440c5f01b2fSopenharmony_ci constexpr position_t get_position() const noexcept 1441c5f01b2fSopenharmony_ci { 1442c5f01b2fSopenharmony_ci return position; 1443c5f01b2fSopenharmony_ci } 1444c5f01b2fSopenharmony_ci 1445c5f01b2fSopenharmony_ci /// return the last read token (for errors only). Will never contain EOF 1446c5f01b2fSopenharmony_ci /// (an arbitrary value that is not a valid char value, often -1), because 1447c5f01b2fSopenharmony_ci /// 255 may legitimately occur. May contain NUL, which should be escaped. 1448c5f01b2fSopenharmony_ci std::string get_token_string() const 1449c5f01b2fSopenharmony_ci { 1450c5f01b2fSopenharmony_ci // escape control characters 1451c5f01b2fSopenharmony_ci std::string result; 1452c5f01b2fSopenharmony_ci for (const auto c : token_string) 1453c5f01b2fSopenharmony_ci { 1454c5f01b2fSopenharmony_ci if (static_cast<unsigned char>(c) <= '\x1F') 1455c5f01b2fSopenharmony_ci { 1456c5f01b2fSopenharmony_ci // escape control characters 1457c5f01b2fSopenharmony_ci std::array<char, 9> cs{{}}; 1458c5f01b2fSopenharmony_ci static_cast<void>((std::snprintf)(cs.data(), cs.size(), "<U+%.4X>", static_cast<unsigned char>(c))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) 1459c5f01b2fSopenharmony_ci result += cs.data(); 1460c5f01b2fSopenharmony_ci } 1461c5f01b2fSopenharmony_ci else 1462c5f01b2fSopenharmony_ci { 1463c5f01b2fSopenharmony_ci // add character as is 1464c5f01b2fSopenharmony_ci result.push_back(static_cast<std::string::value_type>(c)); 1465c5f01b2fSopenharmony_ci } 1466c5f01b2fSopenharmony_ci } 1467c5f01b2fSopenharmony_ci 1468c5f01b2fSopenharmony_ci return result; 1469c5f01b2fSopenharmony_ci } 1470c5f01b2fSopenharmony_ci 1471c5f01b2fSopenharmony_ci /// return syntax error message 1472c5f01b2fSopenharmony_ci JSON_HEDLEY_RETURNS_NON_NULL 1473c5f01b2fSopenharmony_ci constexpr const char* get_error_message() const noexcept 1474c5f01b2fSopenharmony_ci { 1475c5f01b2fSopenharmony_ci return error_message; 1476c5f01b2fSopenharmony_ci } 1477c5f01b2fSopenharmony_ci 1478c5f01b2fSopenharmony_ci ///////////////////// 1479c5f01b2fSopenharmony_ci // actual scanner 1480c5f01b2fSopenharmony_ci ///////////////////// 1481c5f01b2fSopenharmony_ci 1482c5f01b2fSopenharmony_ci /*! 1483c5f01b2fSopenharmony_ci @brief skip the UTF-8 byte order mark 1484c5f01b2fSopenharmony_ci @return true iff there is no BOM or the correct BOM has been skipped 1485c5f01b2fSopenharmony_ci */ 1486c5f01b2fSopenharmony_ci bool skip_bom() 1487c5f01b2fSopenharmony_ci { 1488c5f01b2fSopenharmony_ci if (get() == 0xEF) 1489c5f01b2fSopenharmony_ci { 1490c5f01b2fSopenharmony_ci // check if we completely parse the BOM 1491c5f01b2fSopenharmony_ci return get() == 0xBB && get() == 0xBF; 1492c5f01b2fSopenharmony_ci } 1493c5f01b2fSopenharmony_ci 1494c5f01b2fSopenharmony_ci // the first character is not the beginning of the BOM; unget it to 1495c5f01b2fSopenharmony_ci // process is later 1496c5f01b2fSopenharmony_ci unget(); 1497c5f01b2fSopenharmony_ci return true; 1498c5f01b2fSopenharmony_ci } 1499c5f01b2fSopenharmony_ci 1500c5f01b2fSopenharmony_ci void skip_whitespace() 1501c5f01b2fSopenharmony_ci { 1502c5f01b2fSopenharmony_ci do 1503c5f01b2fSopenharmony_ci { 1504c5f01b2fSopenharmony_ci get(); 1505c5f01b2fSopenharmony_ci } 1506c5f01b2fSopenharmony_ci while (current == ' ' || current == '\t' || current == '\n' || current == '\r'); 1507c5f01b2fSopenharmony_ci } 1508c5f01b2fSopenharmony_ci 1509c5f01b2fSopenharmony_ci token_type scan() 1510c5f01b2fSopenharmony_ci { 1511c5f01b2fSopenharmony_ci // initially, skip the BOM 1512c5f01b2fSopenharmony_ci if (position.chars_read_total == 0 && !skip_bom()) 1513c5f01b2fSopenharmony_ci { 1514c5f01b2fSopenharmony_ci error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given"; 1515c5f01b2fSopenharmony_ci return token_type::parse_error; 1516c5f01b2fSopenharmony_ci } 1517c5f01b2fSopenharmony_ci 1518c5f01b2fSopenharmony_ci // read next character and ignore whitespace 1519c5f01b2fSopenharmony_ci skip_whitespace(); 1520c5f01b2fSopenharmony_ci 1521c5f01b2fSopenharmony_ci // ignore comments 1522c5f01b2fSopenharmony_ci while (ignore_comments && current == '/') 1523c5f01b2fSopenharmony_ci { 1524c5f01b2fSopenharmony_ci if (!scan_comment()) 1525c5f01b2fSopenharmony_ci { 1526c5f01b2fSopenharmony_ci return token_type::parse_error; 1527c5f01b2fSopenharmony_ci } 1528c5f01b2fSopenharmony_ci 1529c5f01b2fSopenharmony_ci // skip following whitespace 1530c5f01b2fSopenharmony_ci skip_whitespace(); 1531c5f01b2fSopenharmony_ci } 1532c5f01b2fSopenharmony_ci 1533c5f01b2fSopenharmony_ci switch (current) 1534c5f01b2fSopenharmony_ci { 1535c5f01b2fSopenharmony_ci // structural characters 1536c5f01b2fSopenharmony_ci case '[': 1537c5f01b2fSopenharmony_ci return token_type::begin_array; 1538c5f01b2fSopenharmony_ci case ']': 1539c5f01b2fSopenharmony_ci return token_type::end_array; 1540c5f01b2fSopenharmony_ci case '{': 1541c5f01b2fSopenharmony_ci return token_type::begin_object; 1542c5f01b2fSopenharmony_ci case '}': 1543c5f01b2fSopenharmony_ci return token_type::end_object; 1544c5f01b2fSopenharmony_ci case ':': 1545c5f01b2fSopenharmony_ci return token_type::name_separator; 1546c5f01b2fSopenharmony_ci case ',': 1547c5f01b2fSopenharmony_ci return token_type::value_separator; 1548c5f01b2fSopenharmony_ci 1549c5f01b2fSopenharmony_ci // literals 1550c5f01b2fSopenharmony_ci case 't': 1551c5f01b2fSopenharmony_ci { 1552c5f01b2fSopenharmony_ci std::array<char_type, 4> true_literal = {{static_cast<char_type>('t'), static_cast<char_type>('r'), static_cast<char_type>('u'), static_cast<char_type>('e')}}; 1553c5f01b2fSopenharmony_ci return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true); 1554c5f01b2fSopenharmony_ci } 1555c5f01b2fSopenharmony_ci case 'f': 1556c5f01b2fSopenharmony_ci { 1557c5f01b2fSopenharmony_ci std::array<char_type, 5> false_literal = {{static_cast<char_type>('f'), static_cast<char_type>('a'), static_cast<char_type>('l'), static_cast<char_type>('s'), static_cast<char_type>('e')}}; 1558c5f01b2fSopenharmony_ci return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false); 1559c5f01b2fSopenharmony_ci } 1560c5f01b2fSopenharmony_ci case 'n': 1561c5f01b2fSopenharmony_ci { 1562c5f01b2fSopenharmony_ci std::array<char_type, 4> null_literal = {{static_cast<char_type>('n'), static_cast<char_type>('u'), static_cast<char_type>('l'), static_cast<char_type>('l')}}; 1563c5f01b2fSopenharmony_ci return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null); 1564c5f01b2fSopenharmony_ci } 1565c5f01b2fSopenharmony_ci 1566c5f01b2fSopenharmony_ci // string 1567c5f01b2fSopenharmony_ci case '\"': 1568c5f01b2fSopenharmony_ci return scan_string(); 1569c5f01b2fSopenharmony_ci 1570c5f01b2fSopenharmony_ci // number 1571c5f01b2fSopenharmony_ci case '-': 1572c5f01b2fSopenharmony_ci case '0': 1573c5f01b2fSopenharmony_ci case '1': 1574c5f01b2fSopenharmony_ci case '2': 1575c5f01b2fSopenharmony_ci case '3': 1576c5f01b2fSopenharmony_ci case '4': 1577c5f01b2fSopenharmony_ci case '5': 1578c5f01b2fSopenharmony_ci case '6': 1579c5f01b2fSopenharmony_ci case '7': 1580c5f01b2fSopenharmony_ci case '8': 1581c5f01b2fSopenharmony_ci case '9': 1582c5f01b2fSopenharmony_ci return scan_number(); 1583c5f01b2fSopenharmony_ci 1584c5f01b2fSopenharmony_ci // end of input (the null byte is needed when parsing from 1585c5f01b2fSopenharmony_ci // string literals) 1586c5f01b2fSopenharmony_ci case '\0': 1587c5f01b2fSopenharmony_ci case std::char_traits<char_type>::eof(): 1588c5f01b2fSopenharmony_ci return token_type::end_of_input; 1589c5f01b2fSopenharmony_ci 1590c5f01b2fSopenharmony_ci // error 1591c5f01b2fSopenharmony_ci default: 1592c5f01b2fSopenharmony_ci error_message = "invalid literal"; 1593c5f01b2fSopenharmony_ci return token_type::parse_error; 1594c5f01b2fSopenharmony_ci } 1595c5f01b2fSopenharmony_ci } 1596c5f01b2fSopenharmony_ci 1597c5f01b2fSopenharmony_ci private: 1598c5f01b2fSopenharmony_ci /// input adapter 1599c5f01b2fSopenharmony_ci InputAdapterType ia; 1600c5f01b2fSopenharmony_ci 1601c5f01b2fSopenharmony_ci /// whether comments should be ignored (true) or signaled as errors (false) 1602c5f01b2fSopenharmony_ci const bool ignore_comments = false; 1603c5f01b2fSopenharmony_ci 1604c5f01b2fSopenharmony_ci /// the current character 1605c5f01b2fSopenharmony_ci char_int_type current = std::char_traits<char_type>::eof(); 1606c5f01b2fSopenharmony_ci 1607c5f01b2fSopenharmony_ci /// whether the next get() call should just return current 1608c5f01b2fSopenharmony_ci bool next_unget = false; 1609c5f01b2fSopenharmony_ci 1610c5f01b2fSopenharmony_ci /// the start position of the current token 1611c5f01b2fSopenharmony_ci position_t position {}; 1612c5f01b2fSopenharmony_ci 1613c5f01b2fSopenharmony_ci /// raw input token string (for error messages) 1614c5f01b2fSopenharmony_ci std::vector<char_type> token_string {}; 1615c5f01b2fSopenharmony_ci 1616c5f01b2fSopenharmony_ci /// buffer for variable-length tokens (numbers, strings) 1617c5f01b2fSopenharmony_ci string_t token_buffer {}; 1618c5f01b2fSopenharmony_ci 1619c5f01b2fSopenharmony_ci /// a description of occurred lexer errors 1620c5f01b2fSopenharmony_ci const char* error_message = ""; 1621c5f01b2fSopenharmony_ci 1622c5f01b2fSopenharmony_ci // number values 1623c5f01b2fSopenharmony_ci number_integer_t value_integer = 0; 1624c5f01b2fSopenharmony_ci number_unsigned_t value_unsigned = 0; 1625c5f01b2fSopenharmony_ci number_float_t value_float = 0; 1626c5f01b2fSopenharmony_ci 1627c5f01b2fSopenharmony_ci /// the decimal point 1628c5f01b2fSopenharmony_ci const char_int_type decimal_point_char = '.'; 1629c5f01b2fSopenharmony_ci}; 1630c5f01b2fSopenharmony_ci 1631c5f01b2fSopenharmony_ci} // namespace detail 1632c5f01b2fSopenharmony_ciNLOHMANN_JSON_NAMESPACE_END 1633