1c5f01b2fSopenharmony_ci//     __ _____ _____ _____
2c5f01b2fSopenharmony_ci//  __|  |   __|     |   | |  JSON for Modern C++
3c5f01b2fSopenharmony_ci// |  |  |__   |  |  | | | |  version 3.11.2
4c5f01b2fSopenharmony_ci// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
5c5f01b2fSopenharmony_ci//
6c5f01b2fSopenharmony_ci// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
7c5f01b2fSopenharmony_ci// SPDX-License-Identifier: MIT
8c5f01b2fSopenharmony_ci
9c5f01b2fSopenharmony_ci#pragma once
10c5f01b2fSopenharmony_ci
11c5f01b2fSopenharmony_ci#include <array> // array
12c5f01b2fSopenharmony_ci#include <clocale> // localeconv
13c5f01b2fSopenharmony_ci#include <cstddef> // size_t
14c5f01b2fSopenharmony_ci#include <cstdio> // snprintf
15c5f01b2fSopenharmony_ci#include <cstdlib> // strtof, strtod, strtold, strtoll, strtoull
16c5f01b2fSopenharmony_ci#include <initializer_list> // initializer_list
17c5f01b2fSopenharmony_ci#include <string> // char_traits, string
18c5f01b2fSopenharmony_ci#include <utility> // move
19c5f01b2fSopenharmony_ci#include <vector> // vector
20c5f01b2fSopenharmony_ci
21c5f01b2fSopenharmony_ci#include <nlohmann/detail/input/input_adapters.hpp>
22c5f01b2fSopenharmony_ci#include <nlohmann/detail/input/position_t.hpp>
23c5f01b2fSopenharmony_ci#include <nlohmann/detail/macro_scope.hpp>
24c5f01b2fSopenharmony_ci
25c5f01b2fSopenharmony_ciNLOHMANN_JSON_NAMESPACE_BEGIN
26c5f01b2fSopenharmony_cinamespace detail
27c5f01b2fSopenharmony_ci{
28c5f01b2fSopenharmony_ci
29c5f01b2fSopenharmony_ci///////////
30c5f01b2fSopenharmony_ci// lexer //
31c5f01b2fSopenharmony_ci///////////
32c5f01b2fSopenharmony_ci
33c5f01b2fSopenharmony_citemplate<typename BasicJsonType>
34c5f01b2fSopenharmony_ciclass lexer_base
35c5f01b2fSopenharmony_ci{
36c5f01b2fSopenharmony_ci  public:
37c5f01b2fSopenharmony_ci    /// token types for the parser
38c5f01b2fSopenharmony_ci    enum class token_type
39c5f01b2fSopenharmony_ci    {
40c5f01b2fSopenharmony_ci        uninitialized,    ///< indicating the scanner is uninitialized
41c5f01b2fSopenharmony_ci        literal_true,     ///< the `true` literal
42c5f01b2fSopenharmony_ci        literal_false,    ///< the `false` literal
43c5f01b2fSopenharmony_ci        literal_null,     ///< the `null` literal
44c5f01b2fSopenharmony_ci        value_string,     ///< a string -- use get_string() for actual value
45c5f01b2fSopenharmony_ci        value_unsigned,   ///< an unsigned integer -- use get_number_unsigned() for actual value
46c5f01b2fSopenharmony_ci        value_integer,    ///< a signed integer -- use get_number_integer() for actual value
47c5f01b2fSopenharmony_ci        value_float,      ///< an floating point number -- use get_number_float() for actual value
48c5f01b2fSopenharmony_ci        begin_array,      ///< the character for array begin `[`
49c5f01b2fSopenharmony_ci        begin_object,     ///< the character for object begin `{`
50c5f01b2fSopenharmony_ci        end_array,        ///< the character for array end `]`
51c5f01b2fSopenharmony_ci        end_object,       ///< the character for object end `}`
52c5f01b2fSopenharmony_ci        name_separator,   ///< the name separator `:`
53c5f01b2fSopenharmony_ci        value_separator,  ///< the value separator `,`
54c5f01b2fSopenharmony_ci        parse_error,      ///< indicating a parse error
55c5f01b2fSopenharmony_ci        end_of_input,     ///< indicating the end of the input buffer
56c5f01b2fSopenharmony_ci        literal_or_value  ///< a literal or the begin of a value (only for diagnostics)
57c5f01b2fSopenharmony_ci    };
58c5f01b2fSopenharmony_ci
59c5f01b2fSopenharmony_ci    /// return name of values of type token_type (only used for errors)
60c5f01b2fSopenharmony_ci    JSON_HEDLEY_RETURNS_NON_NULL
61c5f01b2fSopenharmony_ci    JSON_HEDLEY_CONST
62c5f01b2fSopenharmony_ci    static const char* token_type_name(const token_type t) noexcept
63c5f01b2fSopenharmony_ci    {
64c5f01b2fSopenharmony_ci        switch (t)
65c5f01b2fSopenharmony_ci        {
66c5f01b2fSopenharmony_ci            case token_type::uninitialized:
67c5f01b2fSopenharmony_ci                return "<uninitialized>";
68c5f01b2fSopenharmony_ci            case token_type::literal_true:
69c5f01b2fSopenharmony_ci                return "true literal";
70c5f01b2fSopenharmony_ci            case token_type::literal_false:
71c5f01b2fSopenharmony_ci                return "false literal";
72c5f01b2fSopenharmony_ci            case token_type::literal_null:
73c5f01b2fSopenharmony_ci                return "null literal";
74c5f01b2fSopenharmony_ci            case token_type::value_string:
75c5f01b2fSopenharmony_ci                return "string literal";
76c5f01b2fSopenharmony_ci            case token_type::value_unsigned:
77c5f01b2fSopenharmony_ci            case token_type::value_integer:
78c5f01b2fSopenharmony_ci            case token_type::value_float:
79c5f01b2fSopenharmony_ci                return "number literal";
80c5f01b2fSopenharmony_ci            case token_type::begin_array:
81c5f01b2fSopenharmony_ci                return "'['";
82c5f01b2fSopenharmony_ci            case token_type::begin_object:
83c5f01b2fSopenharmony_ci                return "'{'";
84c5f01b2fSopenharmony_ci            case token_type::end_array:
85c5f01b2fSopenharmony_ci                return "']'";
86c5f01b2fSopenharmony_ci            case token_type::end_object:
87c5f01b2fSopenharmony_ci                return "'}'";
88c5f01b2fSopenharmony_ci            case token_type::name_separator:
89c5f01b2fSopenharmony_ci                return "':'";
90c5f01b2fSopenharmony_ci            case token_type::value_separator:
91c5f01b2fSopenharmony_ci                return "','";
92c5f01b2fSopenharmony_ci            case token_type::parse_error:
93c5f01b2fSopenharmony_ci                return "<parse error>";
94c5f01b2fSopenharmony_ci            case token_type::end_of_input:
95c5f01b2fSopenharmony_ci                return "end of input";
96c5f01b2fSopenharmony_ci            case token_type::literal_or_value:
97c5f01b2fSopenharmony_ci                return "'[', '{', or a literal";
98c5f01b2fSopenharmony_ci            // LCOV_EXCL_START
99c5f01b2fSopenharmony_ci            default: // catch non-enum values
100c5f01b2fSopenharmony_ci                return "unknown token";
101c5f01b2fSopenharmony_ci                // LCOV_EXCL_STOP
102c5f01b2fSopenharmony_ci        }
103c5f01b2fSopenharmony_ci    }
104c5f01b2fSopenharmony_ci};
105c5f01b2fSopenharmony_ci/*!
106c5f01b2fSopenharmony_ci@brief lexical analysis
107c5f01b2fSopenharmony_ci
108c5f01b2fSopenharmony_ciThis class organizes the lexical analysis during JSON deserialization.
109c5f01b2fSopenharmony_ci*/
110c5f01b2fSopenharmony_citemplate<typename BasicJsonType, typename InputAdapterType>
111c5f01b2fSopenharmony_ciclass lexer : public lexer_base<BasicJsonType>
112c5f01b2fSopenharmony_ci{
113c5f01b2fSopenharmony_ci    using number_integer_t = typename BasicJsonType::number_integer_t;
114c5f01b2fSopenharmony_ci    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
115c5f01b2fSopenharmony_ci    using number_float_t = typename BasicJsonType::number_float_t;
116c5f01b2fSopenharmony_ci    using string_t = typename BasicJsonType::string_t;
117c5f01b2fSopenharmony_ci    using char_type = typename InputAdapterType::char_type;
118c5f01b2fSopenharmony_ci    using char_int_type = typename std::char_traits<char_type>::int_type;
119c5f01b2fSopenharmony_ci
120c5f01b2fSopenharmony_ci  public:
121c5f01b2fSopenharmony_ci    using token_type = typename lexer_base<BasicJsonType>::token_type;
122c5f01b2fSopenharmony_ci
123c5f01b2fSopenharmony_ci    explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) noexcept
124c5f01b2fSopenharmony_ci        : ia(std::move(adapter))
125c5f01b2fSopenharmony_ci        , ignore_comments(ignore_comments_)
126c5f01b2fSopenharmony_ci        , decimal_point_char(static_cast<char_int_type>(get_decimal_point()))
127c5f01b2fSopenharmony_ci    {}
128c5f01b2fSopenharmony_ci
129c5f01b2fSopenharmony_ci    // delete because of pointer members
130c5f01b2fSopenharmony_ci    lexer(const lexer&) = delete;
131c5f01b2fSopenharmony_ci    lexer(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
132c5f01b2fSopenharmony_ci    lexer& operator=(lexer&) = delete;
133c5f01b2fSopenharmony_ci    lexer& operator=(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
134c5f01b2fSopenharmony_ci    ~lexer() = default;
135c5f01b2fSopenharmony_ci
136c5f01b2fSopenharmony_ci  private:
137c5f01b2fSopenharmony_ci    /////////////////////
138c5f01b2fSopenharmony_ci    // locales
139c5f01b2fSopenharmony_ci    /////////////////////
140c5f01b2fSopenharmony_ci
141c5f01b2fSopenharmony_ci    /// return the locale-dependent decimal point
142c5f01b2fSopenharmony_ci    JSON_HEDLEY_PURE
143c5f01b2fSopenharmony_ci    static char get_decimal_point() noexcept
144c5f01b2fSopenharmony_ci    {
145c5f01b2fSopenharmony_ci        const auto* loc = localeconv();
146c5f01b2fSopenharmony_ci        JSON_ASSERT(loc != nullptr);
147c5f01b2fSopenharmony_ci        return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point);
148c5f01b2fSopenharmony_ci    }
149c5f01b2fSopenharmony_ci
150c5f01b2fSopenharmony_ci    /////////////////////
151c5f01b2fSopenharmony_ci    // scan functions
152c5f01b2fSopenharmony_ci    /////////////////////
153c5f01b2fSopenharmony_ci
154c5f01b2fSopenharmony_ci    /*!
155c5f01b2fSopenharmony_ci    @brief get codepoint from 4 hex characters following `\u`
156c5f01b2fSopenharmony_ci
157c5f01b2fSopenharmony_ci    For input "\u c1 c2 c3 c4" the codepoint is:
158c5f01b2fSopenharmony_ci      (c1 * 0x1000) + (c2 * 0x0100) + (c3 * 0x0010) + c4
159c5f01b2fSopenharmony_ci    = (c1 << 12) + (c2 << 8) + (c3 << 4) + (c4 << 0)
160c5f01b2fSopenharmony_ci
161c5f01b2fSopenharmony_ci    Furthermore, the possible characters '0'..'9', 'A'..'F', and 'a'..'f'
162c5f01b2fSopenharmony_ci    must be converted to the integers 0x0..0x9, 0xA..0xF, 0xA..0xF, resp. The
163c5f01b2fSopenharmony_ci    conversion is done by subtracting the offset (0x30, 0x37, and 0x57)
164c5f01b2fSopenharmony_ci    between the ASCII value of the character and the desired integer value.
165c5f01b2fSopenharmony_ci
166c5f01b2fSopenharmony_ci    @return codepoint (0x0000..0xFFFF) or -1 in case of an error (e.g. EOF or
167c5f01b2fSopenharmony_ci            non-hex character)
168c5f01b2fSopenharmony_ci    */
169c5f01b2fSopenharmony_ci    int get_codepoint()
170c5f01b2fSopenharmony_ci    {
171c5f01b2fSopenharmony_ci        // this function only makes sense after reading `\u`
172c5f01b2fSopenharmony_ci        JSON_ASSERT(current == 'u');
173c5f01b2fSopenharmony_ci        int codepoint = 0;
174c5f01b2fSopenharmony_ci
175c5f01b2fSopenharmony_ci        const auto factors = { 12u, 8u, 4u, 0u };
176c5f01b2fSopenharmony_ci        for (const auto factor : factors)
177c5f01b2fSopenharmony_ci        {
178c5f01b2fSopenharmony_ci            get();
179c5f01b2fSopenharmony_ci
180c5f01b2fSopenharmony_ci            if (current >= '0' && current <= '9')
181c5f01b2fSopenharmony_ci            {
182c5f01b2fSopenharmony_ci                codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x30u) << factor);
183c5f01b2fSopenharmony_ci            }
184c5f01b2fSopenharmony_ci            else if (current >= 'A' && current <= 'F')
185c5f01b2fSopenharmony_ci            {
186c5f01b2fSopenharmony_ci                codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x37u) << factor);
187c5f01b2fSopenharmony_ci            }
188c5f01b2fSopenharmony_ci            else if (current >= 'a' && current <= 'f')
189c5f01b2fSopenharmony_ci            {
190c5f01b2fSopenharmony_ci                codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x57u) << factor);
191c5f01b2fSopenharmony_ci            }
192c5f01b2fSopenharmony_ci            else
193c5f01b2fSopenharmony_ci            {
194c5f01b2fSopenharmony_ci                return -1;
195c5f01b2fSopenharmony_ci            }
196c5f01b2fSopenharmony_ci        }
197c5f01b2fSopenharmony_ci
198c5f01b2fSopenharmony_ci        JSON_ASSERT(0x0000 <= codepoint && codepoint <= 0xFFFF);
199c5f01b2fSopenharmony_ci        return codepoint;
200c5f01b2fSopenharmony_ci    }
201c5f01b2fSopenharmony_ci
202c5f01b2fSopenharmony_ci    /*!
203c5f01b2fSopenharmony_ci    @brief check if the next byte(s) are inside a given range
204c5f01b2fSopenharmony_ci
205c5f01b2fSopenharmony_ci    Adds the current byte and, for each passed range, reads a new byte and
206c5f01b2fSopenharmony_ci    checks if it is inside the range. If a violation was detected, set up an
207c5f01b2fSopenharmony_ci    error message and return false. Otherwise, return true.
208c5f01b2fSopenharmony_ci
209c5f01b2fSopenharmony_ci    @param[in] ranges  list of integers; interpreted as list of pairs of
210c5f01b2fSopenharmony_ci                       inclusive lower and upper bound, respectively
211c5f01b2fSopenharmony_ci
212c5f01b2fSopenharmony_ci    @pre The passed list @a ranges must have 2, 4, or 6 elements; that is,
213c5f01b2fSopenharmony_ci         1, 2, or 3 pairs. This precondition is enforced by an assertion.
214c5f01b2fSopenharmony_ci
215c5f01b2fSopenharmony_ci    @return true if and only if no range violation was detected
216c5f01b2fSopenharmony_ci    */
217c5f01b2fSopenharmony_ci    bool next_byte_in_range(std::initializer_list<char_int_type> ranges)
218c5f01b2fSopenharmony_ci    {
219c5f01b2fSopenharmony_ci        JSON_ASSERT(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6);
220c5f01b2fSopenharmony_ci        add(current);
221c5f01b2fSopenharmony_ci
222c5f01b2fSopenharmony_ci        for (auto range = ranges.begin(); range != ranges.end(); ++range)
223c5f01b2fSopenharmony_ci        {
224c5f01b2fSopenharmony_ci            get();
225c5f01b2fSopenharmony_ci            if (JSON_HEDLEY_LIKELY(*range <= current && current <= *(++range)))
226c5f01b2fSopenharmony_ci            {
227c5f01b2fSopenharmony_ci                add(current);
228c5f01b2fSopenharmony_ci            }
229c5f01b2fSopenharmony_ci            else
230c5f01b2fSopenharmony_ci            {
231c5f01b2fSopenharmony_ci                error_message = "invalid string: ill-formed UTF-8 byte";
232c5f01b2fSopenharmony_ci                return false;
233c5f01b2fSopenharmony_ci            }
234c5f01b2fSopenharmony_ci        }
235c5f01b2fSopenharmony_ci
236c5f01b2fSopenharmony_ci        return true;
237c5f01b2fSopenharmony_ci    }
238c5f01b2fSopenharmony_ci
239c5f01b2fSopenharmony_ci    /*!
240c5f01b2fSopenharmony_ci    @brief scan a string literal
241c5f01b2fSopenharmony_ci
242c5f01b2fSopenharmony_ci    This function scans a string according to Sect. 7 of RFC 8259. While
243c5f01b2fSopenharmony_ci    scanning, bytes are escaped and copied into buffer token_buffer. Then the
244c5f01b2fSopenharmony_ci    function returns successfully, token_buffer is *not* null-terminated (as it
245c5f01b2fSopenharmony_ci    may contain \0 bytes), and token_buffer.size() is the number of bytes in the
246c5f01b2fSopenharmony_ci    string.
247c5f01b2fSopenharmony_ci
248c5f01b2fSopenharmony_ci    @return token_type::value_string if string could be successfully scanned,
249c5f01b2fSopenharmony_ci            token_type::parse_error otherwise
250c5f01b2fSopenharmony_ci
251c5f01b2fSopenharmony_ci    @note In case of errors, variable error_message contains a textual
252c5f01b2fSopenharmony_ci          description.
253c5f01b2fSopenharmony_ci    */
254c5f01b2fSopenharmony_ci    token_type scan_string()
255c5f01b2fSopenharmony_ci    {
256c5f01b2fSopenharmony_ci        // reset token_buffer (ignore opening quote)
257c5f01b2fSopenharmony_ci        reset();
258c5f01b2fSopenharmony_ci
259c5f01b2fSopenharmony_ci        // we entered the function by reading an open quote
260c5f01b2fSopenharmony_ci        JSON_ASSERT(current == '\"');
261c5f01b2fSopenharmony_ci
262c5f01b2fSopenharmony_ci        while (true)
263c5f01b2fSopenharmony_ci        {
264c5f01b2fSopenharmony_ci            // get next character
265c5f01b2fSopenharmony_ci            switch (get())
266c5f01b2fSopenharmony_ci            {
267c5f01b2fSopenharmony_ci                // end of file while parsing string
268c5f01b2fSopenharmony_ci                case std::char_traits<char_type>::eof():
269c5f01b2fSopenharmony_ci                {
270c5f01b2fSopenharmony_ci                    error_message = "invalid string: missing closing quote";
271c5f01b2fSopenharmony_ci                    return token_type::parse_error;
272c5f01b2fSopenharmony_ci                }
273c5f01b2fSopenharmony_ci
274c5f01b2fSopenharmony_ci                // closing quote
275c5f01b2fSopenharmony_ci                case '\"':
276c5f01b2fSopenharmony_ci                {
277c5f01b2fSopenharmony_ci                    return token_type::value_string;
278c5f01b2fSopenharmony_ci                }
279c5f01b2fSopenharmony_ci
280c5f01b2fSopenharmony_ci                // escapes
281c5f01b2fSopenharmony_ci                case '\\':
282c5f01b2fSopenharmony_ci                {
283c5f01b2fSopenharmony_ci                    switch (get())
284c5f01b2fSopenharmony_ci                    {
285c5f01b2fSopenharmony_ci                        // quotation mark
286c5f01b2fSopenharmony_ci                        case '\"':
287c5f01b2fSopenharmony_ci                            add('\"');
288c5f01b2fSopenharmony_ci                            break;
289c5f01b2fSopenharmony_ci                        // reverse solidus
290c5f01b2fSopenharmony_ci                        case '\\':
291c5f01b2fSopenharmony_ci                            add('\\');
292c5f01b2fSopenharmony_ci                            break;
293c5f01b2fSopenharmony_ci                        // solidus
294c5f01b2fSopenharmony_ci                        case '/':
295c5f01b2fSopenharmony_ci                            add('/');
296c5f01b2fSopenharmony_ci                            break;
297c5f01b2fSopenharmony_ci                        // backspace
298c5f01b2fSopenharmony_ci                        case 'b':
299c5f01b2fSopenharmony_ci                            add('\b');
300c5f01b2fSopenharmony_ci                            break;
301c5f01b2fSopenharmony_ci                        // form feed
302c5f01b2fSopenharmony_ci                        case 'f':
303c5f01b2fSopenharmony_ci                            add('\f');
304c5f01b2fSopenharmony_ci                            break;
305c5f01b2fSopenharmony_ci                        // line feed
306c5f01b2fSopenharmony_ci                        case 'n':
307c5f01b2fSopenharmony_ci                            add('\n');
308c5f01b2fSopenharmony_ci                            break;
309c5f01b2fSopenharmony_ci                        // carriage return
310c5f01b2fSopenharmony_ci                        case 'r':
311c5f01b2fSopenharmony_ci                            add('\r');
312c5f01b2fSopenharmony_ci                            break;
313c5f01b2fSopenharmony_ci                        // tab
314c5f01b2fSopenharmony_ci                        case 't':
315c5f01b2fSopenharmony_ci                            add('\t');
316c5f01b2fSopenharmony_ci                            break;
317c5f01b2fSopenharmony_ci
318c5f01b2fSopenharmony_ci                        // unicode escapes
319c5f01b2fSopenharmony_ci                        case 'u':
320c5f01b2fSopenharmony_ci                        {
321c5f01b2fSopenharmony_ci                            const int codepoint1 = get_codepoint();
322c5f01b2fSopenharmony_ci                            int codepoint = codepoint1; // start with codepoint1
323c5f01b2fSopenharmony_ci
324c5f01b2fSopenharmony_ci                            if (JSON_HEDLEY_UNLIKELY(codepoint1 == -1))
325c5f01b2fSopenharmony_ci                            {
326c5f01b2fSopenharmony_ci                                error_message = "invalid string: '\\u' must be followed by 4 hex digits";
327c5f01b2fSopenharmony_ci                                return token_type::parse_error;
328c5f01b2fSopenharmony_ci                            }
329c5f01b2fSopenharmony_ci
330c5f01b2fSopenharmony_ci                            // check if code point is a high surrogate
331c5f01b2fSopenharmony_ci                            if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF)
332c5f01b2fSopenharmony_ci                            {
333c5f01b2fSopenharmony_ci                                // expect next \uxxxx entry
334c5f01b2fSopenharmony_ci                                if (JSON_HEDLEY_LIKELY(get() == '\\' && get() == 'u'))
335c5f01b2fSopenharmony_ci                                {
336c5f01b2fSopenharmony_ci                                    const int codepoint2 = get_codepoint();
337c5f01b2fSopenharmony_ci
338c5f01b2fSopenharmony_ci                                    if (JSON_HEDLEY_UNLIKELY(codepoint2 == -1))
339c5f01b2fSopenharmony_ci                                    {
340c5f01b2fSopenharmony_ci                                        error_message = "invalid string: '\\u' must be followed by 4 hex digits";
341c5f01b2fSopenharmony_ci                                        return token_type::parse_error;
342c5f01b2fSopenharmony_ci                                    }
343c5f01b2fSopenharmony_ci
344c5f01b2fSopenharmony_ci                                    // check if codepoint2 is a low surrogate
345c5f01b2fSopenharmony_ci                                    if (JSON_HEDLEY_LIKELY(0xDC00 <= codepoint2 && codepoint2 <= 0xDFFF))
346c5f01b2fSopenharmony_ci                                    {
347c5f01b2fSopenharmony_ci                                        // overwrite codepoint
348c5f01b2fSopenharmony_ci                                        codepoint = static_cast<int>(
349c5f01b2fSopenharmony_ci                                                        // high surrogate occupies the most significant 22 bits
350c5f01b2fSopenharmony_ci                                                        (static_cast<unsigned int>(codepoint1) << 10u)
351c5f01b2fSopenharmony_ci                                                        // low surrogate occupies the least significant 15 bits
352c5f01b2fSopenharmony_ci                                                        + static_cast<unsigned int>(codepoint2)
353c5f01b2fSopenharmony_ci                                                        // there is still the 0xD800, 0xDC00 and 0x10000 noise
354c5f01b2fSopenharmony_ci                                                        // in the result, so we have to subtract with:
355c5f01b2fSopenharmony_ci                                                        // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00
356c5f01b2fSopenharmony_ci                                                        - 0x35FDC00u);
357c5f01b2fSopenharmony_ci                                    }
358c5f01b2fSopenharmony_ci                                    else
359c5f01b2fSopenharmony_ci                                    {
360c5f01b2fSopenharmony_ci                                        error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
361c5f01b2fSopenharmony_ci                                        return token_type::parse_error;
362c5f01b2fSopenharmony_ci                                    }
363c5f01b2fSopenharmony_ci                                }
364c5f01b2fSopenharmony_ci                                else
365c5f01b2fSopenharmony_ci                                {
366c5f01b2fSopenharmony_ci                                    error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
367c5f01b2fSopenharmony_ci                                    return token_type::parse_error;
368c5f01b2fSopenharmony_ci                                }
369c5f01b2fSopenharmony_ci                            }
370c5f01b2fSopenharmony_ci                            else
371c5f01b2fSopenharmony_ci                            {
372c5f01b2fSopenharmony_ci                                if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF))
373c5f01b2fSopenharmony_ci                                {
374c5f01b2fSopenharmony_ci                                    error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF";
375c5f01b2fSopenharmony_ci                                    return token_type::parse_error;
376c5f01b2fSopenharmony_ci                                }
377c5f01b2fSopenharmony_ci                            }
378c5f01b2fSopenharmony_ci
379c5f01b2fSopenharmony_ci                            // result of the above calculation yields a proper codepoint
380c5f01b2fSopenharmony_ci                            JSON_ASSERT(0x00 <= codepoint && codepoint <= 0x10FFFF);
381c5f01b2fSopenharmony_ci
382c5f01b2fSopenharmony_ci                            // translate codepoint into bytes
383c5f01b2fSopenharmony_ci                            if (codepoint < 0x80)
384c5f01b2fSopenharmony_ci                            {
385c5f01b2fSopenharmony_ci                                // 1-byte characters: 0xxxxxxx (ASCII)
386c5f01b2fSopenharmony_ci                                add(static_cast<char_int_type>(codepoint));
387c5f01b2fSopenharmony_ci                            }
388c5f01b2fSopenharmony_ci                            else if (codepoint <= 0x7FF)
389c5f01b2fSopenharmony_ci                            {
390c5f01b2fSopenharmony_ci                                // 2-byte characters: 110xxxxx 10xxxxxx
391c5f01b2fSopenharmony_ci                                add(static_cast<char_int_type>(0xC0u | (static_cast<unsigned int>(codepoint) >> 6u)));
392c5f01b2fSopenharmony_ci                                add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
393c5f01b2fSopenharmony_ci                            }
394c5f01b2fSopenharmony_ci                            else if (codepoint <= 0xFFFF)
395c5f01b2fSopenharmony_ci                            {
396c5f01b2fSopenharmony_ci                                // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
397c5f01b2fSopenharmony_ci                                add(static_cast<char_int_type>(0xE0u | (static_cast<unsigned int>(codepoint) >> 12u)));
398c5f01b2fSopenharmony_ci                                add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
399c5f01b2fSopenharmony_ci                                add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
400c5f01b2fSopenharmony_ci                            }
401c5f01b2fSopenharmony_ci                            else
402c5f01b2fSopenharmony_ci                            {
403c5f01b2fSopenharmony_ci                                // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
404c5f01b2fSopenharmony_ci                                add(static_cast<char_int_type>(0xF0u | (static_cast<unsigned int>(codepoint) >> 18u)));
405c5f01b2fSopenharmony_ci                                add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 12u) & 0x3Fu)));
406c5f01b2fSopenharmony_ci                                add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
407c5f01b2fSopenharmony_ci                                add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
408c5f01b2fSopenharmony_ci                            }
409c5f01b2fSopenharmony_ci
410c5f01b2fSopenharmony_ci                            break;
411c5f01b2fSopenharmony_ci                        }
412c5f01b2fSopenharmony_ci
413c5f01b2fSopenharmony_ci                        // other characters after escape
414c5f01b2fSopenharmony_ci                        default:
415c5f01b2fSopenharmony_ci                            error_message = "invalid string: forbidden character after backslash";
416c5f01b2fSopenharmony_ci                            return token_type::parse_error;
417c5f01b2fSopenharmony_ci                    }
418c5f01b2fSopenharmony_ci
419c5f01b2fSopenharmony_ci                    break;
420c5f01b2fSopenharmony_ci                }
421c5f01b2fSopenharmony_ci
422c5f01b2fSopenharmony_ci                // invalid control characters
423c5f01b2fSopenharmony_ci                case 0x00:
424c5f01b2fSopenharmony_ci                {
425c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000";
426c5f01b2fSopenharmony_ci                    return token_type::parse_error;
427c5f01b2fSopenharmony_ci                }
428c5f01b2fSopenharmony_ci
429c5f01b2fSopenharmony_ci                case 0x01:
430c5f01b2fSopenharmony_ci                {
431c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001";
432c5f01b2fSopenharmony_ci                    return token_type::parse_error;
433c5f01b2fSopenharmony_ci                }
434c5f01b2fSopenharmony_ci
435c5f01b2fSopenharmony_ci                case 0x02:
436c5f01b2fSopenharmony_ci                {
437c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002";
438c5f01b2fSopenharmony_ci                    return token_type::parse_error;
439c5f01b2fSopenharmony_ci                }
440c5f01b2fSopenharmony_ci
441c5f01b2fSopenharmony_ci                case 0x03:
442c5f01b2fSopenharmony_ci                {
443c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003";
444c5f01b2fSopenharmony_ci                    return token_type::parse_error;
445c5f01b2fSopenharmony_ci                }
446c5f01b2fSopenharmony_ci
447c5f01b2fSopenharmony_ci                case 0x04:
448c5f01b2fSopenharmony_ci                {
449c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004";
450c5f01b2fSopenharmony_ci                    return token_type::parse_error;
451c5f01b2fSopenharmony_ci                }
452c5f01b2fSopenharmony_ci
453c5f01b2fSopenharmony_ci                case 0x05:
454c5f01b2fSopenharmony_ci                {
455c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005";
456c5f01b2fSopenharmony_ci                    return token_type::parse_error;
457c5f01b2fSopenharmony_ci                }
458c5f01b2fSopenharmony_ci
459c5f01b2fSopenharmony_ci                case 0x06:
460c5f01b2fSopenharmony_ci                {
461c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006";
462c5f01b2fSopenharmony_ci                    return token_type::parse_error;
463c5f01b2fSopenharmony_ci                }
464c5f01b2fSopenharmony_ci
465c5f01b2fSopenharmony_ci                case 0x07:
466c5f01b2fSopenharmony_ci                {
467c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007";
468c5f01b2fSopenharmony_ci                    return token_type::parse_error;
469c5f01b2fSopenharmony_ci                }
470c5f01b2fSopenharmony_ci
471c5f01b2fSopenharmony_ci                case 0x08:
472c5f01b2fSopenharmony_ci                {
473c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b";
474c5f01b2fSopenharmony_ci                    return token_type::parse_error;
475c5f01b2fSopenharmony_ci                }
476c5f01b2fSopenharmony_ci
477c5f01b2fSopenharmony_ci                case 0x09:
478c5f01b2fSopenharmony_ci                {
479c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t";
480c5f01b2fSopenharmony_ci                    return token_type::parse_error;
481c5f01b2fSopenharmony_ci                }
482c5f01b2fSopenharmony_ci
483c5f01b2fSopenharmony_ci                case 0x0A:
484c5f01b2fSopenharmony_ci                {
485c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n";
486c5f01b2fSopenharmony_ci                    return token_type::parse_error;
487c5f01b2fSopenharmony_ci                }
488c5f01b2fSopenharmony_ci
489c5f01b2fSopenharmony_ci                case 0x0B:
490c5f01b2fSopenharmony_ci                {
491c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B";
492c5f01b2fSopenharmony_ci                    return token_type::parse_error;
493c5f01b2fSopenharmony_ci                }
494c5f01b2fSopenharmony_ci
495c5f01b2fSopenharmony_ci                case 0x0C:
496c5f01b2fSopenharmony_ci                {
497c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f";
498c5f01b2fSopenharmony_ci                    return token_type::parse_error;
499c5f01b2fSopenharmony_ci                }
500c5f01b2fSopenharmony_ci
501c5f01b2fSopenharmony_ci                case 0x0D:
502c5f01b2fSopenharmony_ci                {
503c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r";
504c5f01b2fSopenharmony_ci                    return token_type::parse_error;
505c5f01b2fSopenharmony_ci                }
506c5f01b2fSopenharmony_ci
507c5f01b2fSopenharmony_ci                case 0x0E:
508c5f01b2fSopenharmony_ci                {
509c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E";
510c5f01b2fSopenharmony_ci                    return token_type::parse_error;
511c5f01b2fSopenharmony_ci                }
512c5f01b2fSopenharmony_ci
513c5f01b2fSopenharmony_ci                case 0x0F:
514c5f01b2fSopenharmony_ci                {
515c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F";
516c5f01b2fSopenharmony_ci                    return token_type::parse_error;
517c5f01b2fSopenharmony_ci                }
518c5f01b2fSopenharmony_ci
519c5f01b2fSopenharmony_ci                case 0x10:
520c5f01b2fSopenharmony_ci                {
521c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010";
522c5f01b2fSopenharmony_ci                    return token_type::parse_error;
523c5f01b2fSopenharmony_ci                }
524c5f01b2fSopenharmony_ci
525c5f01b2fSopenharmony_ci                case 0x11:
526c5f01b2fSopenharmony_ci                {
527c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011";
528c5f01b2fSopenharmony_ci                    return token_type::parse_error;
529c5f01b2fSopenharmony_ci                }
530c5f01b2fSopenharmony_ci
531c5f01b2fSopenharmony_ci                case 0x12:
532c5f01b2fSopenharmony_ci                {
533c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012";
534c5f01b2fSopenharmony_ci                    return token_type::parse_error;
535c5f01b2fSopenharmony_ci                }
536c5f01b2fSopenharmony_ci
537c5f01b2fSopenharmony_ci                case 0x13:
538c5f01b2fSopenharmony_ci                {
539c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013";
540c5f01b2fSopenharmony_ci                    return token_type::parse_error;
541c5f01b2fSopenharmony_ci                }
542c5f01b2fSopenharmony_ci
543c5f01b2fSopenharmony_ci                case 0x14:
544c5f01b2fSopenharmony_ci                {
545c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014";
546c5f01b2fSopenharmony_ci                    return token_type::parse_error;
547c5f01b2fSopenharmony_ci                }
548c5f01b2fSopenharmony_ci
549c5f01b2fSopenharmony_ci                case 0x15:
550c5f01b2fSopenharmony_ci                {
551c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015";
552c5f01b2fSopenharmony_ci                    return token_type::parse_error;
553c5f01b2fSopenharmony_ci                }
554c5f01b2fSopenharmony_ci
555c5f01b2fSopenharmony_ci                case 0x16:
556c5f01b2fSopenharmony_ci                {
557c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016";
558c5f01b2fSopenharmony_ci                    return token_type::parse_error;
559c5f01b2fSopenharmony_ci                }
560c5f01b2fSopenharmony_ci
561c5f01b2fSopenharmony_ci                case 0x17:
562c5f01b2fSopenharmony_ci                {
563c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017";
564c5f01b2fSopenharmony_ci                    return token_type::parse_error;
565c5f01b2fSopenharmony_ci                }
566c5f01b2fSopenharmony_ci
567c5f01b2fSopenharmony_ci                case 0x18:
568c5f01b2fSopenharmony_ci                {
569c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018";
570c5f01b2fSopenharmony_ci                    return token_type::parse_error;
571c5f01b2fSopenharmony_ci                }
572c5f01b2fSopenharmony_ci
573c5f01b2fSopenharmony_ci                case 0x19:
574c5f01b2fSopenharmony_ci                {
575c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019";
576c5f01b2fSopenharmony_ci                    return token_type::parse_error;
577c5f01b2fSopenharmony_ci                }
578c5f01b2fSopenharmony_ci
579c5f01b2fSopenharmony_ci                case 0x1A:
580c5f01b2fSopenharmony_ci                {
581c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A";
582c5f01b2fSopenharmony_ci                    return token_type::parse_error;
583c5f01b2fSopenharmony_ci                }
584c5f01b2fSopenharmony_ci
585c5f01b2fSopenharmony_ci                case 0x1B:
586c5f01b2fSopenharmony_ci                {
587c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B";
588c5f01b2fSopenharmony_ci                    return token_type::parse_error;
589c5f01b2fSopenharmony_ci                }
590c5f01b2fSopenharmony_ci
591c5f01b2fSopenharmony_ci                case 0x1C:
592c5f01b2fSopenharmony_ci                {
593c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C";
594c5f01b2fSopenharmony_ci                    return token_type::parse_error;
595c5f01b2fSopenharmony_ci                }
596c5f01b2fSopenharmony_ci
597c5f01b2fSopenharmony_ci                case 0x1D:
598c5f01b2fSopenharmony_ci                {
599c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D";
600c5f01b2fSopenharmony_ci                    return token_type::parse_error;
601c5f01b2fSopenharmony_ci                }
602c5f01b2fSopenharmony_ci
603c5f01b2fSopenharmony_ci                case 0x1E:
604c5f01b2fSopenharmony_ci                {
605c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E";
606c5f01b2fSopenharmony_ci                    return token_type::parse_error;
607c5f01b2fSopenharmony_ci                }
608c5f01b2fSopenharmony_ci
609c5f01b2fSopenharmony_ci                case 0x1F:
610c5f01b2fSopenharmony_ci                {
611c5f01b2fSopenharmony_ci                    error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F";
612c5f01b2fSopenharmony_ci                    return token_type::parse_error;
613c5f01b2fSopenharmony_ci                }
614c5f01b2fSopenharmony_ci
615c5f01b2fSopenharmony_ci                // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace))
616c5f01b2fSopenharmony_ci                case 0x20:
617c5f01b2fSopenharmony_ci                case 0x21:
618c5f01b2fSopenharmony_ci                case 0x23:
619c5f01b2fSopenharmony_ci                case 0x24:
620c5f01b2fSopenharmony_ci                case 0x25:
621c5f01b2fSopenharmony_ci                case 0x26:
622c5f01b2fSopenharmony_ci                case 0x27:
623c5f01b2fSopenharmony_ci                case 0x28:
624c5f01b2fSopenharmony_ci                case 0x29:
625c5f01b2fSopenharmony_ci                case 0x2A:
626c5f01b2fSopenharmony_ci                case 0x2B:
627c5f01b2fSopenharmony_ci                case 0x2C:
628c5f01b2fSopenharmony_ci                case 0x2D:
629c5f01b2fSopenharmony_ci                case 0x2E:
630c5f01b2fSopenharmony_ci                case 0x2F:
631c5f01b2fSopenharmony_ci                case 0x30:
632c5f01b2fSopenharmony_ci                case 0x31:
633c5f01b2fSopenharmony_ci                case 0x32:
634c5f01b2fSopenharmony_ci                case 0x33:
635c5f01b2fSopenharmony_ci                case 0x34:
636c5f01b2fSopenharmony_ci                case 0x35:
637c5f01b2fSopenharmony_ci                case 0x36:
638c5f01b2fSopenharmony_ci                case 0x37:
639c5f01b2fSopenharmony_ci                case 0x38:
640c5f01b2fSopenharmony_ci                case 0x39:
641c5f01b2fSopenharmony_ci                case 0x3A:
642c5f01b2fSopenharmony_ci                case 0x3B:
643c5f01b2fSopenharmony_ci                case 0x3C:
644c5f01b2fSopenharmony_ci                case 0x3D:
645c5f01b2fSopenharmony_ci                case 0x3E:
646c5f01b2fSopenharmony_ci                case 0x3F:
647c5f01b2fSopenharmony_ci                case 0x40:
648c5f01b2fSopenharmony_ci                case 0x41:
649c5f01b2fSopenharmony_ci                case 0x42:
650c5f01b2fSopenharmony_ci                case 0x43:
651c5f01b2fSopenharmony_ci                case 0x44:
652c5f01b2fSopenharmony_ci                case 0x45:
653c5f01b2fSopenharmony_ci                case 0x46:
654c5f01b2fSopenharmony_ci                case 0x47:
655c5f01b2fSopenharmony_ci                case 0x48:
656c5f01b2fSopenharmony_ci                case 0x49:
657c5f01b2fSopenharmony_ci                case 0x4A:
658c5f01b2fSopenharmony_ci                case 0x4B:
659c5f01b2fSopenharmony_ci                case 0x4C:
660c5f01b2fSopenharmony_ci                case 0x4D:
661c5f01b2fSopenharmony_ci                case 0x4E:
662c5f01b2fSopenharmony_ci                case 0x4F:
663c5f01b2fSopenharmony_ci                case 0x50:
664c5f01b2fSopenharmony_ci                case 0x51:
665c5f01b2fSopenharmony_ci                case 0x52:
666c5f01b2fSopenharmony_ci                case 0x53:
667c5f01b2fSopenharmony_ci                case 0x54:
668c5f01b2fSopenharmony_ci                case 0x55:
669c5f01b2fSopenharmony_ci                case 0x56:
670c5f01b2fSopenharmony_ci                case 0x57:
671c5f01b2fSopenharmony_ci                case 0x58:
672c5f01b2fSopenharmony_ci                case 0x59:
673c5f01b2fSopenharmony_ci                case 0x5A:
674c5f01b2fSopenharmony_ci                case 0x5B:
675c5f01b2fSopenharmony_ci                case 0x5D:
676c5f01b2fSopenharmony_ci                case 0x5E:
677c5f01b2fSopenharmony_ci                case 0x5F:
678c5f01b2fSopenharmony_ci                case 0x60:
679c5f01b2fSopenharmony_ci                case 0x61:
680c5f01b2fSopenharmony_ci                case 0x62:
681c5f01b2fSopenharmony_ci                case 0x63:
682c5f01b2fSopenharmony_ci                case 0x64:
683c5f01b2fSopenharmony_ci                case 0x65:
684c5f01b2fSopenharmony_ci                case 0x66:
685c5f01b2fSopenharmony_ci                case 0x67:
686c5f01b2fSopenharmony_ci                case 0x68:
687c5f01b2fSopenharmony_ci                case 0x69:
688c5f01b2fSopenharmony_ci                case 0x6A:
689c5f01b2fSopenharmony_ci                case 0x6B:
690c5f01b2fSopenharmony_ci                case 0x6C:
691c5f01b2fSopenharmony_ci                case 0x6D:
692c5f01b2fSopenharmony_ci                case 0x6E:
693c5f01b2fSopenharmony_ci                case 0x6F:
694c5f01b2fSopenharmony_ci                case 0x70:
695c5f01b2fSopenharmony_ci                case 0x71:
696c5f01b2fSopenharmony_ci                case 0x72:
697c5f01b2fSopenharmony_ci                case 0x73:
698c5f01b2fSopenharmony_ci                case 0x74:
699c5f01b2fSopenharmony_ci                case 0x75:
700c5f01b2fSopenharmony_ci                case 0x76:
701c5f01b2fSopenharmony_ci                case 0x77:
702c5f01b2fSopenharmony_ci                case 0x78:
703c5f01b2fSopenharmony_ci                case 0x79:
704c5f01b2fSopenharmony_ci                case 0x7A:
705c5f01b2fSopenharmony_ci                case 0x7B:
706c5f01b2fSopenharmony_ci                case 0x7C:
707c5f01b2fSopenharmony_ci                case 0x7D:
708c5f01b2fSopenharmony_ci                case 0x7E:
709c5f01b2fSopenharmony_ci                case 0x7F:
710c5f01b2fSopenharmony_ci                {
711c5f01b2fSopenharmony_ci                    add(current);
712c5f01b2fSopenharmony_ci                    break;
713c5f01b2fSopenharmony_ci                }
714c5f01b2fSopenharmony_ci
715c5f01b2fSopenharmony_ci                // U+0080..U+07FF: bytes C2..DF 80..BF
716c5f01b2fSopenharmony_ci                case 0xC2:
717c5f01b2fSopenharmony_ci                case 0xC3:
718c5f01b2fSopenharmony_ci                case 0xC4:
719c5f01b2fSopenharmony_ci                case 0xC5:
720c5f01b2fSopenharmony_ci                case 0xC6:
721c5f01b2fSopenharmony_ci                case 0xC7:
722c5f01b2fSopenharmony_ci                case 0xC8:
723c5f01b2fSopenharmony_ci                case 0xC9:
724c5f01b2fSopenharmony_ci                case 0xCA:
725c5f01b2fSopenharmony_ci                case 0xCB:
726c5f01b2fSopenharmony_ci                case 0xCC:
727c5f01b2fSopenharmony_ci                case 0xCD:
728c5f01b2fSopenharmony_ci                case 0xCE:
729c5f01b2fSopenharmony_ci                case 0xCF:
730c5f01b2fSopenharmony_ci                case 0xD0:
731c5f01b2fSopenharmony_ci                case 0xD1:
732c5f01b2fSopenharmony_ci                case 0xD2:
733c5f01b2fSopenharmony_ci                case 0xD3:
734c5f01b2fSopenharmony_ci                case 0xD4:
735c5f01b2fSopenharmony_ci                case 0xD5:
736c5f01b2fSopenharmony_ci                case 0xD6:
737c5f01b2fSopenharmony_ci                case 0xD7:
738c5f01b2fSopenharmony_ci                case 0xD8:
739c5f01b2fSopenharmony_ci                case 0xD9:
740c5f01b2fSopenharmony_ci                case 0xDA:
741c5f01b2fSopenharmony_ci                case 0xDB:
742c5f01b2fSopenharmony_ci                case 0xDC:
743c5f01b2fSopenharmony_ci                case 0xDD:
744c5f01b2fSopenharmony_ci                case 0xDE:
745c5f01b2fSopenharmony_ci                case 0xDF:
746c5f01b2fSopenharmony_ci                {
747c5f01b2fSopenharmony_ci                    if (JSON_HEDLEY_UNLIKELY(!next_byte_in_range({0x80, 0xBF})))
748c5f01b2fSopenharmony_ci                    {
749c5f01b2fSopenharmony_ci                        return token_type::parse_error;
750c5f01b2fSopenharmony_ci                    }
751c5f01b2fSopenharmony_ci                    break;
752c5f01b2fSopenharmony_ci                }
753c5f01b2fSopenharmony_ci
754c5f01b2fSopenharmony_ci                // U+0800..U+0FFF: bytes E0 A0..BF 80..BF
755c5f01b2fSopenharmony_ci                case 0xE0:
756c5f01b2fSopenharmony_ci                {
757c5f01b2fSopenharmony_ci                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF}))))
758c5f01b2fSopenharmony_ci                    {
759c5f01b2fSopenharmony_ci                        return token_type::parse_error;
760c5f01b2fSopenharmony_ci                    }
761c5f01b2fSopenharmony_ci                    break;
762c5f01b2fSopenharmony_ci                }
763c5f01b2fSopenharmony_ci
764c5f01b2fSopenharmony_ci                // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF
765c5f01b2fSopenharmony_ci                // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF
766c5f01b2fSopenharmony_ci                case 0xE1:
767c5f01b2fSopenharmony_ci                case 0xE2:
768c5f01b2fSopenharmony_ci                case 0xE3:
769c5f01b2fSopenharmony_ci                case 0xE4:
770c5f01b2fSopenharmony_ci                case 0xE5:
771c5f01b2fSopenharmony_ci                case 0xE6:
772c5f01b2fSopenharmony_ci                case 0xE7:
773c5f01b2fSopenharmony_ci                case 0xE8:
774c5f01b2fSopenharmony_ci                case 0xE9:
775c5f01b2fSopenharmony_ci                case 0xEA:
776c5f01b2fSopenharmony_ci                case 0xEB:
777c5f01b2fSopenharmony_ci                case 0xEC:
778c5f01b2fSopenharmony_ci                case 0xEE:
779c5f01b2fSopenharmony_ci                case 0xEF:
780c5f01b2fSopenharmony_ci                {
781c5f01b2fSopenharmony_ci                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF}))))
782c5f01b2fSopenharmony_ci                    {
783c5f01b2fSopenharmony_ci                        return token_type::parse_error;
784c5f01b2fSopenharmony_ci                    }
785c5f01b2fSopenharmony_ci                    break;
786c5f01b2fSopenharmony_ci                }
787c5f01b2fSopenharmony_ci
788c5f01b2fSopenharmony_ci                // U+D000..U+D7FF: bytes ED 80..9F 80..BF
789c5f01b2fSopenharmony_ci                case 0xED:
790c5f01b2fSopenharmony_ci                {
791c5f01b2fSopenharmony_ci                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x9F, 0x80, 0xBF}))))
792c5f01b2fSopenharmony_ci                    {
793c5f01b2fSopenharmony_ci                        return token_type::parse_error;
794c5f01b2fSopenharmony_ci                    }
795c5f01b2fSopenharmony_ci                    break;
796c5f01b2fSopenharmony_ci                }
797c5f01b2fSopenharmony_ci
798c5f01b2fSopenharmony_ci                // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
799c5f01b2fSopenharmony_ci                case 0xF0:
800c5f01b2fSopenharmony_ci                {
801c5f01b2fSopenharmony_ci                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
802c5f01b2fSopenharmony_ci                    {
803c5f01b2fSopenharmony_ci                        return token_type::parse_error;
804c5f01b2fSopenharmony_ci                    }
805c5f01b2fSopenharmony_ci                    break;
806c5f01b2fSopenharmony_ci                }
807c5f01b2fSopenharmony_ci
808c5f01b2fSopenharmony_ci                // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
809c5f01b2fSopenharmony_ci                case 0xF1:
810c5f01b2fSopenharmony_ci                case 0xF2:
811c5f01b2fSopenharmony_ci                case 0xF3:
812c5f01b2fSopenharmony_ci                {
813c5f01b2fSopenharmony_ci                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
814c5f01b2fSopenharmony_ci                    {
815c5f01b2fSopenharmony_ci                        return token_type::parse_error;
816c5f01b2fSopenharmony_ci                    }
817c5f01b2fSopenharmony_ci                    break;
818c5f01b2fSopenharmony_ci                }
819c5f01b2fSopenharmony_ci
820c5f01b2fSopenharmony_ci                // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
821c5f01b2fSopenharmony_ci                case 0xF4:
822c5f01b2fSopenharmony_ci                {
823c5f01b2fSopenharmony_ci                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}))))
824c5f01b2fSopenharmony_ci                    {
825c5f01b2fSopenharmony_ci                        return token_type::parse_error;
826c5f01b2fSopenharmony_ci                    }
827c5f01b2fSopenharmony_ci                    break;
828c5f01b2fSopenharmony_ci                }
829c5f01b2fSopenharmony_ci
830c5f01b2fSopenharmony_ci                // remaining bytes (80..C1 and F5..FF) are ill-formed
831c5f01b2fSopenharmony_ci                default:
832c5f01b2fSopenharmony_ci                {
833c5f01b2fSopenharmony_ci                    error_message = "invalid string: ill-formed UTF-8 byte";
834c5f01b2fSopenharmony_ci                    return token_type::parse_error;
835c5f01b2fSopenharmony_ci                }
836c5f01b2fSopenharmony_ci            }
837c5f01b2fSopenharmony_ci        }
838c5f01b2fSopenharmony_ci    }
839c5f01b2fSopenharmony_ci
840c5f01b2fSopenharmony_ci    /*!
841c5f01b2fSopenharmony_ci     * @brief scan a comment
842c5f01b2fSopenharmony_ci     * @return whether comment could be scanned successfully
843c5f01b2fSopenharmony_ci     */
844c5f01b2fSopenharmony_ci    bool scan_comment()
845c5f01b2fSopenharmony_ci    {
846c5f01b2fSopenharmony_ci        switch (get())
847c5f01b2fSopenharmony_ci        {
848c5f01b2fSopenharmony_ci            // single-line comments skip input until a newline or EOF is read
849c5f01b2fSopenharmony_ci            case '/':
850c5f01b2fSopenharmony_ci            {
851c5f01b2fSopenharmony_ci                while (true)
852c5f01b2fSopenharmony_ci                {
853c5f01b2fSopenharmony_ci                    switch (get())
854c5f01b2fSopenharmony_ci                    {
855c5f01b2fSopenharmony_ci                        case '\n':
856c5f01b2fSopenharmony_ci                        case '\r':
857c5f01b2fSopenharmony_ci                        case std::char_traits<char_type>::eof():
858c5f01b2fSopenharmony_ci                        case '\0':
859c5f01b2fSopenharmony_ci                            return true;
860c5f01b2fSopenharmony_ci
861c5f01b2fSopenharmony_ci                        default:
862c5f01b2fSopenharmony_ci                            break;
863c5f01b2fSopenharmony_ci                    }
864c5f01b2fSopenharmony_ci                }
865c5f01b2fSopenharmony_ci            }
866c5f01b2fSopenharmony_ci
867c5f01b2fSopenharmony_ci            // multi-line comments skip input until */ is read
868c5f01b2fSopenharmony_ci            case '*':
869c5f01b2fSopenharmony_ci            {
870c5f01b2fSopenharmony_ci                while (true)
871c5f01b2fSopenharmony_ci                {
872c5f01b2fSopenharmony_ci                    switch (get())
873c5f01b2fSopenharmony_ci                    {
874c5f01b2fSopenharmony_ci                        case std::char_traits<char_type>::eof():
875c5f01b2fSopenharmony_ci                        case '\0':
876c5f01b2fSopenharmony_ci                        {
877c5f01b2fSopenharmony_ci                            error_message = "invalid comment; missing closing '*/'";
878c5f01b2fSopenharmony_ci                            return false;
879c5f01b2fSopenharmony_ci                        }
880c5f01b2fSopenharmony_ci
881c5f01b2fSopenharmony_ci                        case '*':
882c5f01b2fSopenharmony_ci                        {
883c5f01b2fSopenharmony_ci                            switch (get())
884c5f01b2fSopenharmony_ci                            {
885c5f01b2fSopenharmony_ci                                case '/':
886c5f01b2fSopenharmony_ci                                    return true;
887c5f01b2fSopenharmony_ci
888c5f01b2fSopenharmony_ci                                default:
889c5f01b2fSopenharmony_ci                                {
890c5f01b2fSopenharmony_ci                                    unget();
891c5f01b2fSopenharmony_ci                                    continue;
892c5f01b2fSopenharmony_ci                                }
893c5f01b2fSopenharmony_ci                            }
894c5f01b2fSopenharmony_ci                        }
895c5f01b2fSopenharmony_ci
896c5f01b2fSopenharmony_ci                        default:
897c5f01b2fSopenharmony_ci                            continue;
898c5f01b2fSopenharmony_ci                    }
899c5f01b2fSopenharmony_ci                }
900c5f01b2fSopenharmony_ci            }
901c5f01b2fSopenharmony_ci
902c5f01b2fSopenharmony_ci            // unexpected character after reading '/'
903c5f01b2fSopenharmony_ci            default:
904c5f01b2fSopenharmony_ci            {
905c5f01b2fSopenharmony_ci                error_message = "invalid comment; expecting '/' or '*' after '/'";
906c5f01b2fSopenharmony_ci                return false;
907c5f01b2fSopenharmony_ci            }
908c5f01b2fSopenharmony_ci        }
909c5f01b2fSopenharmony_ci    }
910c5f01b2fSopenharmony_ci
911c5f01b2fSopenharmony_ci    JSON_HEDLEY_NON_NULL(2)
912c5f01b2fSopenharmony_ci    static void strtof(float& f, const char* str, char** endptr) noexcept
913c5f01b2fSopenharmony_ci    {
914c5f01b2fSopenharmony_ci        f = std::strtof(str, endptr);
915c5f01b2fSopenharmony_ci    }
916c5f01b2fSopenharmony_ci
917c5f01b2fSopenharmony_ci    JSON_HEDLEY_NON_NULL(2)
918c5f01b2fSopenharmony_ci    static void strtof(double& f, const char* str, char** endptr) noexcept
919c5f01b2fSopenharmony_ci    {
920c5f01b2fSopenharmony_ci        f = std::strtod(str, endptr);
921c5f01b2fSopenharmony_ci    }
922c5f01b2fSopenharmony_ci
923c5f01b2fSopenharmony_ci    JSON_HEDLEY_NON_NULL(2)
924c5f01b2fSopenharmony_ci    static void strtof(long double& f, const char* str, char** endptr) noexcept
925c5f01b2fSopenharmony_ci    {
926c5f01b2fSopenharmony_ci        f = std::strtold(str, endptr);
927c5f01b2fSopenharmony_ci    }
928c5f01b2fSopenharmony_ci
929c5f01b2fSopenharmony_ci    /*!
930c5f01b2fSopenharmony_ci    @brief scan a number literal
931c5f01b2fSopenharmony_ci
932c5f01b2fSopenharmony_ci    This function scans a string according to Sect. 6 of RFC 8259.
933c5f01b2fSopenharmony_ci
934c5f01b2fSopenharmony_ci    The function is realized with a deterministic finite state machine derived
935c5f01b2fSopenharmony_ci    from the grammar described in RFC 8259. Starting in state "init", the
936c5f01b2fSopenharmony_ci    input is read and used to determined the next state. Only state "done"
937c5f01b2fSopenharmony_ci    accepts the number. State "error" is a trap state to model errors. In the
938c5f01b2fSopenharmony_ci    table below, "anything" means any character but the ones listed before.
939c5f01b2fSopenharmony_ci
940c5f01b2fSopenharmony_ci    state    | 0        | 1-9      | e E      | +       | -       | .        | anything
941c5f01b2fSopenharmony_ci    ---------|----------|----------|----------|---------|---------|----------|-----------
942c5f01b2fSopenharmony_ci    init     | zero     | any1     | [error]  | [error] | minus   | [error]  | [error]
943c5f01b2fSopenharmony_ci    minus    | zero     | any1     | [error]  | [error] | [error] | [error]  | [error]
944c5f01b2fSopenharmony_ci    zero     | done     | done     | exponent | done    | done    | decimal1 | done
945c5f01b2fSopenharmony_ci    any1     | any1     | any1     | exponent | done    | done    | decimal1 | done
946c5f01b2fSopenharmony_ci    decimal1 | decimal2 | decimal2 | [error]  | [error] | [error] | [error]  | [error]
947c5f01b2fSopenharmony_ci    decimal2 | decimal2 | decimal2 | exponent | done    | done    | done     | done
948c5f01b2fSopenharmony_ci    exponent | any2     | any2     | [error]  | sign    | sign    | [error]  | [error]
949c5f01b2fSopenharmony_ci    sign     | any2     | any2     | [error]  | [error] | [error] | [error]  | [error]
950c5f01b2fSopenharmony_ci    any2     | any2     | any2     | done     | done    | done    | done     | done
951c5f01b2fSopenharmony_ci
952c5f01b2fSopenharmony_ci    The state machine is realized with one label per state (prefixed with
953c5f01b2fSopenharmony_ci    "scan_number_") and `goto` statements between them. The state machine
954c5f01b2fSopenharmony_ci    contains cycles, but any cycle can be left when EOF is read. Therefore,
955c5f01b2fSopenharmony_ci    the function is guaranteed to terminate.
956c5f01b2fSopenharmony_ci
957c5f01b2fSopenharmony_ci    During scanning, the read bytes are stored in token_buffer. This string is
958c5f01b2fSopenharmony_ci    then converted to a signed integer, an unsigned integer, or a
959c5f01b2fSopenharmony_ci    floating-point number.
960c5f01b2fSopenharmony_ci
961c5f01b2fSopenharmony_ci    @return token_type::value_unsigned, token_type::value_integer, or
962c5f01b2fSopenharmony_ci            token_type::value_float if number could be successfully scanned,
963c5f01b2fSopenharmony_ci            token_type::parse_error otherwise
964c5f01b2fSopenharmony_ci
965c5f01b2fSopenharmony_ci    @note The scanner is independent of the current locale. Internally, the
966c5f01b2fSopenharmony_ci          locale's decimal point is used instead of `.` to work with the
967c5f01b2fSopenharmony_ci          locale-dependent converters.
968c5f01b2fSopenharmony_ci    */
969c5f01b2fSopenharmony_ci    token_type scan_number()  // lgtm [cpp/use-of-goto]
970c5f01b2fSopenharmony_ci    {
971c5f01b2fSopenharmony_ci        // reset token_buffer to store the number's bytes
972c5f01b2fSopenharmony_ci        reset();
973c5f01b2fSopenharmony_ci
974c5f01b2fSopenharmony_ci        // the type of the parsed number; initially set to unsigned; will be
975c5f01b2fSopenharmony_ci        // changed if minus sign, decimal point or exponent is read
976c5f01b2fSopenharmony_ci        token_type number_type = token_type::value_unsigned;
977c5f01b2fSopenharmony_ci
978c5f01b2fSopenharmony_ci        // state (init): we just found out we need to scan a number
979c5f01b2fSopenharmony_ci        switch (current)
980c5f01b2fSopenharmony_ci        {
981c5f01b2fSopenharmony_ci            case '-':
982c5f01b2fSopenharmony_ci            {
983c5f01b2fSopenharmony_ci                add(current);
984c5f01b2fSopenharmony_ci                goto scan_number_minus;
985c5f01b2fSopenharmony_ci            }
986c5f01b2fSopenharmony_ci
987c5f01b2fSopenharmony_ci            case '0':
988c5f01b2fSopenharmony_ci            {
989c5f01b2fSopenharmony_ci                add(current);
990c5f01b2fSopenharmony_ci                goto scan_number_zero;
991c5f01b2fSopenharmony_ci            }
992c5f01b2fSopenharmony_ci
993c5f01b2fSopenharmony_ci            case '1':
994c5f01b2fSopenharmony_ci            case '2':
995c5f01b2fSopenharmony_ci            case '3':
996c5f01b2fSopenharmony_ci            case '4':
997c5f01b2fSopenharmony_ci            case '5':
998c5f01b2fSopenharmony_ci            case '6':
999c5f01b2fSopenharmony_ci            case '7':
1000c5f01b2fSopenharmony_ci            case '8':
1001c5f01b2fSopenharmony_ci            case '9':
1002c5f01b2fSopenharmony_ci            {
1003c5f01b2fSopenharmony_ci                add(current);
1004c5f01b2fSopenharmony_ci                goto scan_number_any1;
1005c5f01b2fSopenharmony_ci            }
1006c5f01b2fSopenharmony_ci
1007c5f01b2fSopenharmony_ci            // all other characters are rejected outside scan_number()
1008c5f01b2fSopenharmony_ci            default:            // LCOV_EXCL_LINE
1009c5f01b2fSopenharmony_ci                JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
1010c5f01b2fSopenharmony_ci        }
1011c5f01b2fSopenharmony_ci
1012c5f01b2fSopenharmony_ciscan_number_minus:
1013c5f01b2fSopenharmony_ci        // state: we just parsed a leading minus sign
1014c5f01b2fSopenharmony_ci        number_type = token_type::value_integer;
1015c5f01b2fSopenharmony_ci        switch (get())
1016c5f01b2fSopenharmony_ci        {
1017c5f01b2fSopenharmony_ci            case '0':
1018c5f01b2fSopenharmony_ci            {
1019c5f01b2fSopenharmony_ci                add(current);
1020c5f01b2fSopenharmony_ci                goto scan_number_zero;
1021c5f01b2fSopenharmony_ci            }
1022c5f01b2fSopenharmony_ci
1023c5f01b2fSopenharmony_ci            case '1':
1024c5f01b2fSopenharmony_ci            case '2':
1025c5f01b2fSopenharmony_ci            case '3':
1026c5f01b2fSopenharmony_ci            case '4':
1027c5f01b2fSopenharmony_ci            case '5':
1028c5f01b2fSopenharmony_ci            case '6':
1029c5f01b2fSopenharmony_ci            case '7':
1030c5f01b2fSopenharmony_ci            case '8':
1031c5f01b2fSopenharmony_ci            case '9':
1032c5f01b2fSopenharmony_ci            {
1033c5f01b2fSopenharmony_ci                add(current);
1034c5f01b2fSopenharmony_ci                goto scan_number_any1;
1035c5f01b2fSopenharmony_ci            }
1036c5f01b2fSopenharmony_ci
1037c5f01b2fSopenharmony_ci            default:
1038c5f01b2fSopenharmony_ci            {
1039c5f01b2fSopenharmony_ci                error_message = "invalid number; expected digit after '-'";
1040c5f01b2fSopenharmony_ci                return token_type::parse_error;
1041c5f01b2fSopenharmony_ci            }
1042c5f01b2fSopenharmony_ci        }
1043c5f01b2fSopenharmony_ci
1044c5f01b2fSopenharmony_ciscan_number_zero:
1045c5f01b2fSopenharmony_ci        // state: we just parse a zero (maybe with a leading minus sign)
1046c5f01b2fSopenharmony_ci        switch (get())
1047c5f01b2fSopenharmony_ci        {
1048c5f01b2fSopenharmony_ci            case '.':
1049c5f01b2fSopenharmony_ci            {
1050c5f01b2fSopenharmony_ci                add(decimal_point_char);
1051c5f01b2fSopenharmony_ci                goto scan_number_decimal1;
1052c5f01b2fSopenharmony_ci            }
1053c5f01b2fSopenharmony_ci
1054c5f01b2fSopenharmony_ci            case 'e':
1055c5f01b2fSopenharmony_ci            case 'E':
1056c5f01b2fSopenharmony_ci            {
1057c5f01b2fSopenharmony_ci                add(current);
1058c5f01b2fSopenharmony_ci                goto scan_number_exponent;
1059c5f01b2fSopenharmony_ci            }
1060c5f01b2fSopenharmony_ci
1061c5f01b2fSopenharmony_ci            default:
1062c5f01b2fSopenharmony_ci                goto scan_number_done;
1063c5f01b2fSopenharmony_ci        }
1064c5f01b2fSopenharmony_ci
1065c5f01b2fSopenharmony_ciscan_number_any1:
1066c5f01b2fSopenharmony_ci        // state: we just parsed a number 0-9 (maybe with a leading minus sign)
1067c5f01b2fSopenharmony_ci        switch (get())
1068c5f01b2fSopenharmony_ci        {
1069c5f01b2fSopenharmony_ci            case '0':
1070c5f01b2fSopenharmony_ci            case '1':
1071c5f01b2fSopenharmony_ci            case '2':
1072c5f01b2fSopenharmony_ci            case '3':
1073c5f01b2fSopenharmony_ci            case '4':
1074c5f01b2fSopenharmony_ci            case '5':
1075c5f01b2fSopenharmony_ci            case '6':
1076c5f01b2fSopenharmony_ci            case '7':
1077c5f01b2fSopenharmony_ci            case '8':
1078c5f01b2fSopenharmony_ci            case '9':
1079c5f01b2fSopenharmony_ci            {
1080c5f01b2fSopenharmony_ci                add(current);
1081c5f01b2fSopenharmony_ci                goto scan_number_any1;
1082c5f01b2fSopenharmony_ci            }
1083c5f01b2fSopenharmony_ci
1084c5f01b2fSopenharmony_ci            case '.':
1085c5f01b2fSopenharmony_ci            {
1086c5f01b2fSopenharmony_ci                add(decimal_point_char);
1087c5f01b2fSopenharmony_ci                goto scan_number_decimal1;
1088c5f01b2fSopenharmony_ci            }
1089c5f01b2fSopenharmony_ci
1090c5f01b2fSopenharmony_ci            case 'e':
1091c5f01b2fSopenharmony_ci            case 'E':
1092c5f01b2fSopenharmony_ci            {
1093c5f01b2fSopenharmony_ci                add(current);
1094c5f01b2fSopenharmony_ci                goto scan_number_exponent;
1095c5f01b2fSopenharmony_ci            }
1096c5f01b2fSopenharmony_ci
1097c5f01b2fSopenharmony_ci            default:
1098c5f01b2fSopenharmony_ci                goto scan_number_done;
1099c5f01b2fSopenharmony_ci        }
1100c5f01b2fSopenharmony_ci
1101c5f01b2fSopenharmony_ciscan_number_decimal1:
1102c5f01b2fSopenharmony_ci        // state: we just parsed a decimal point
1103c5f01b2fSopenharmony_ci        number_type = token_type::value_float;
1104c5f01b2fSopenharmony_ci        switch (get())
1105c5f01b2fSopenharmony_ci        {
1106c5f01b2fSopenharmony_ci            case '0':
1107c5f01b2fSopenharmony_ci            case '1':
1108c5f01b2fSopenharmony_ci            case '2':
1109c5f01b2fSopenharmony_ci            case '3':
1110c5f01b2fSopenharmony_ci            case '4':
1111c5f01b2fSopenharmony_ci            case '5':
1112c5f01b2fSopenharmony_ci            case '6':
1113c5f01b2fSopenharmony_ci            case '7':
1114c5f01b2fSopenharmony_ci            case '8':
1115c5f01b2fSopenharmony_ci            case '9':
1116c5f01b2fSopenharmony_ci            {
1117c5f01b2fSopenharmony_ci                add(current);
1118c5f01b2fSopenharmony_ci                goto scan_number_decimal2;
1119c5f01b2fSopenharmony_ci            }
1120c5f01b2fSopenharmony_ci
1121c5f01b2fSopenharmony_ci            default:
1122c5f01b2fSopenharmony_ci            {
1123c5f01b2fSopenharmony_ci                error_message = "invalid number; expected digit after '.'";
1124c5f01b2fSopenharmony_ci                return token_type::parse_error;
1125c5f01b2fSopenharmony_ci            }
1126c5f01b2fSopenharmony_ci        }
1127c5f01b2fSopenharmony_ci
1128c5f01b2fSopenharmony_ciscan_number_decimal2:
1129c5f01b2fSopenharmony_ci        // we just parsed at least one number after a decimal point
1130c5f01b2fSopenharmony_ci        switch (get())
1131c5f01b2fSopenharmony_ci        {
1132c5f01b2fSopenharmony_ci            case '0':
1133c5f01b2fSopenharmony_ci            case '1':
1134c5f01b2fSopenharmony_ci            case '2':
1135c5f01b2fSopenharmony_ci            case '3':
1136c5f01b2fSopenharmony_ci            case '4':
1137c5f01b2fSopenharmony_ci            case '5':
1138c5f01b2fSopenharmony_ci            case '6':
1139c5f01b2fSopenharmony_ci            case '7':
1140c5f01b2fSopenharmony_ci            case '8':
1141c5f01b2fSopenharmony_ci            case '9':
1142c5f01b2fSopenharmony_ci            {
1143c5f01b2fSopenharmony_ci                add(current);
1144c5f01b2fSopenharmony_ci                goto scan_number_decimal2;
1145c5f01b2fSopenharmony_ci            }
1146c5f01b2fSopenharmony_ci
1147c5f01b2fSopenharmony_ci            case 'e':
1148c5f01b2fSopenharmony_ci            case 'E':
1149c5f01b2fSopenharmony_ci            {
1150c5f01b2fSopenharmony_ci                add(current);
1151c5f01b2fSopenharmony_ci                goto scan_number_exponent;
1152c5f01b2fSopenharmony_ci            }
1153c5f01b2fSopenharmony_ci
1154c5f01b2fSopenharmony_ci            default:
1155c5f01b2fSopenharmony_ci                goto scan_number_done;
1156c5f01b2fSopenharmony_ci        }
1157c5f01b2fSopenharmony_ci
1158c5f01b2fSopenharmony_ciscan_number_exponent:
1159c5f01b2fSopenharmony_ci        // we just parsed an exponent
1160c5f01b2fSopenharmony_ci        number_type = token_type::value_float;
1161c5f01b2fSopenharmony_ci        switch (get())
1162c5f01b2fSopenharmony_ci        {
1163c5f01b2fSopenharmony_ci            case '+':
1164c5f01b2fSopenharmony_ci            case '-':
1165c5f01b2fSopenharmony_ci            {
1166c5f01b2fSopenharmony_ci                add(current);
1167c5f01b2fSopenharmony_ci                goto scan_number_sign;
1168c5f01b2fSopenharmony_ci            }
1169c5f01b2fSopenharmony_ci
1170c5f01b2fSopenharmony_ci            case '0':
1171c5f01b2fSopenharmony_ci            case '1':
1172c5f01b2fSopenharmony_ci            case '2':
1173c5f01b2fSopenharmony_ci            case '3':
1174c5f01b2fSopenharmony_ci            case '4':
1175c5f01b2fSopenharmony_ci            case '5':
1176c5f01b2fSopenharmony_ci            case '6':
1177c5f01b2fSopenharmony_ci            case '7':
1178c5f01b2fSopenharmony_ci            case '8':
1179c5f01b2fSopenharmony_ci            case '9':
1180c5f01b2fSopenharmony_ci            {
1181c5f01b2fSopenharmony_ci                add(current);
1182c5f01b2fSopenharmony_ci                goto scan_number_any2;
1183c5f01b2fSopenharmony_ci            }
1184c5f01b2fSopenharmony_ci
1185c5f01b2fSopenharmony_ci            default:
1186c5f01b2fSopenharmony_ci            {
1187c5f01b2fSopenharmony_ci                error_message =
1188c5f01b2fSopenharmony_ci                    "invalid number; expected '+', '-', or digit after exponent";
1189c5f01b2fSopenharmony_ci                return token_type::parse_error;
1190c5f01b2fSopenharmony_ci            }
1191c5f01b2fSopenharmony_ci        }
1192c5f01b2fSopenharmony_ci
1193c5f01b2fSopenharmony_ciscan_number_sign:
1194c5f01b2fSopenharmony_ci        // we just parsed an exponent sign
1195c5f01b2fSopenharmony_ci        switch (get())
1196c5f01b2fSopenharmony_ci        {
1197c5f01b2fSopenharmony_ci            case '0':
1198c5f01b2fSopenharmony_ci            case '1':
1199c5f01b2fSopenharmony_ci            case '2':
1200c5f01b2fSopenharmony_ci            case '3':
1201c5f01b2fSopenharmony_ci            case '4':
1202c5f01b2fSopenharmony_ci            case '5':
1203c5f01b2fSopenharmony_ci            case '6':
1204c5f01b2fSopenharmony_ci            case '7':
1205c5f01b2fSopenharmony_ci            case '8':
1206c5f01b2fSopenharmony_ci            case '9':
1207c5f01b2fSopenharmony_ci            {
1208c5f01b2fSopenharmony_ci                add(current);
1209c5f01b2fSopenharmony_ci                goto scan_number_any2;
1210c5f01b2fSopenharmony_ci            }
1211c5f01b2fSopenharmony_ci
1212c5f01b2fSopenharmony_ci            default:
1213c5f01b2fSopenharmony_ci            {
1214c5f01b2fSopenharmony_ci                error_message = "invalid number; expected digit after exponent sign";
1215c5f01b2fSopenharmony_ci                return token_type::parse_error;
1216c5f01b2fSopenharmony_ci            }
1217c5f01b2fSopenharmony_ci        }
1218c5f01b2fSopenharmony_ci
1219c5f01b2fSopenharmony_ciscan_number_any2:
1220c5f01b2fSopenharmony_ci        // we just parsed a number after the exponent or exponent sign
1221c5f01b2fSopenharmony_ci        switch (get())
1222c5f01b2fSopenharmony_ci        {
1223c5f01b2fSopenharmony_ci            case '0':
1224c5f01b2fSopenharmony_ci            case '1':
1225c5f01b2fSopenharmony_ci            case '2':
1226c5f01b2fSopenharmony_ci            case '3':
1227c5f01b2fSopenharmony_ci            case '4':
1228c5f01b2fSopenharmony_ci            case '5':
1229c5f01b2fSopenharmony_ci            case '6':
1230c5f01b2fSopenharmony_ci            case '7':
1231c5f01b2fSopenharmony_ci            case '8':
1232c5f01b2fSopenharmony_ci            case '9':
1233c5f01b2fSopenharmony_ci            {
1234c5f01b2fSopenharmony_ci                add(current);
1235c5f01b2fSopenharmony_ci                goto scan_number_any2;
1236c5f01b2fSopenharmony_ci            }
1237c5f01b2fSopenharmony_ci
1238c5f01b2fSopenharmony_ci            default:
1239c5f01b2fSopenharmony_ci                goto scan_number_done;
1240c5f01b2fSopenharmony_ci        }
1241c5f01b2fSopenharmony_ci
1242c5f01b2fSopenharmony_ciscan_number_done:
1243c5f01b2fSopenharmony_ci        // unget the character after the number (we only read it to know that
1244c5f01b2fSopenharmony_ci        // we are done scanning a number)
1245c5f01b2fSopenharmony_ci        unget();
1246c5f01b2fSopenharmony_ci
1247c5f01b2fSopenharmony_ci        char* endptr = nullptr; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
1248c5f01b2fSopenharmony_ci        errno = 0;
1249c5f01b2fSopenharmony_ci
1250c5f01b2fSopenharmony_ci        // try to parse integers first and fall back to floats
1251c5f01b2fSopenharmony_ci        if (number_type == token_type::value_unsigned)
1252c5f01b2fSopenharmony_ci        {
1253c5f01b2fSopenharmony_ci            const auto x = std::strtoull(token_buffer.data(), &endptr, 10);
1254c5f01b2fSopenharmony_ci
1255c5f01b2fSopenharmony_ci            // we checked the number format before
1256c5f01b2fSopenharmony_ci            JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1257c5f01b2fSopenharmony_ci
1258c5f01b2fSopenharmony_ci            if (errno == 0)
1259c5f01b2fSopenharmony_ci            {
1260c5f01b2fSopenharmony_ci                value_unsigned = static_cast<number_unsigned_t>(x);
1261c5f01b2fSopenharmony_ci                if (value_unsigned == x)
1262c5f01b2fSopenharmony_ci                {
1263c5f01b2fSopenharmony_ci                    return token_type::value_unsigned;
1264c5f01b2fSopenharmony_ci                }
1265c5f01b2fSopenharmony_ci            }
1266c5f01b2fSopenharmony_ci        }
1267c5f01b2fSopenharmony_ci        else if (number_type == token_type::value_integer)
1268c5f01b2fSopenharmony_ci        {
1269c5f01b2fSopenharmony_ci            const auto x = std::strtoll(token_buffer.data(), &endptr, 10);
1270c5f01b2fSopenharmony_ci
1271c5f01b2fSopenharmony_ci            // we checked the number format before
1272c5f01b2fSopenharmony_ci            JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1273c5f01b2fSopenharmony_ci
1274c5f01b2fSopenharmony_ci            if (errno == 0)
1275c5f01b2fSopenharmony_ci            {
1276c5f01b2fSopenharmony_ci                value_integer = static_cast<number_integer_t>(x);
1277c5f01b2fSopenharmony_ci                if (value_integer == x)
1278c5f01b2fSopenharmony_ci                {
1279c5f01b2fSopenharmony_ci                    return token_type::value_integer;
1280c5f01b2fSopenharmony_ci                }
1281c5f01b2fSopenharmony_ci            }
1282c5f01b2fSopenharmony_ci        }
1283c5f01b2fSopenharmony_ci
1284c5f01b2fSopenharmony_ci        // this code is reached if we parse a floating-point number or if an
1285c5f01b2fSopenharmony_ci        // integer conversion above failed
1286c5f01b2fSopenharmony_ci        strtof(value_float, token_buffer.data(), &endptr);
1287c5f01b2fSopenharmony_ci
1288c5f01b2fSopenharmony_ci        // we checked the number format before
1289c5f01b2fSopenharmony_ci        JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
1290c5f01b2fSopenharmony_ci
1291c5f01b2fSopenharmony_ci        return token_type::value_float;
1292c5f01b2fSopenharmony_ci    }
1293c5f01b2fSopenharmony_ci
1294c5f01b2fSopenharmony_ci    /*!
1295c5f01b2fSopenharmony_ci    @param[in] literal_text  the literal text to expect
1296c5f01b2fSopenharmony_ci    @param[in] length        the length of the passed literal text
1297c5f01b2fSopenharmony_ci    @param[in] return_type   the token type to return on success
1298c5f01b2fSopenharmony_ci    */
1299c5f01b2fSopenharmony_ci    JSON_HEDLEY_NON_NULL(2)
1300c5f01b2fSopenharmony_ci    token_type scan_literal(const char_type* literal_text, const std::size_t length,
1301c5f01b2fSopenharmony_ci                            token_type return_type)
1302c5f01b2fSopenharmony_ci    {
1303c5f01b2fSopenharmony_ci        JSON_ASSERT(std::char_traits<char_type>::to_char_type(current) == literal_text[0]);
1304c5f01b2fSopenharmony_ci        for (std::size_t i = 1; i < length; ++i)
1305c5f01b2fSopenharmony_ci        {
1306c5f01b2fSopenharmony_ci            if (JSON_HEDLEY_UNLIKELY(std::char_traits<char_type>::to_char_type(get()) != literal_text[i]))
1307c5f01b2fSopenharmony_ci            {
1308c5f01b2fSopenharmony_ci                error_message = "invalid literal";
1309c5f01b2fSopenharmony_ci                return token_type::parse_error;
1310c5f01b2fSopenharmony_ci            }
1311c5f01b2fSopenharmony_ci        }
1312c5f01b2fSopenharmony_ci        return return_type;
1313c5f01b2fSopenharmony_ci    }
1314c5f01b2fSopenharmony_ci
1315c5f01b2fSopenharmony_ci    /////////////////////
1316c5f01b2fSopenharmony_ci    // input management
1317c5f01b2fSopenharmony_ci    /////////////////////
1318c5f01b2fSopenharmony_ci
1319c5f01b2fSopenharmony_ci    /// reset token_buffer; current character is beginning of token
1320c5f01b2fSopenharmony_ci    void reset() noexcept
1321c5f01b2fSopenharmony_ci    {
1322c5f01b2fSopenharmony_ci        token_buffer.clear();
1323c5f01b2fSopenharmony_ci        token_string.clear();
1324c5f01b2fSopenharmony_ci        token_string.push_back(std::char_traits<char_type>::to_char_type(current));
1325c5f01b2fSopenharmony_ci    }
1326c5f01b2fSopenharmony_ci
1327c5f01b2fSopenharmony_ci    /*
1328c5f01b2fSopenharmony_ci    @brief get next character from the input
1329c5f01b2fSopenharmony_ci
1330c5f01b2fSopenharmony_ci    This function provides the interface to the used input adapter. It does
1331c5f01b2fSopenharmony_ci    not throw in case the input reached EOF, but returns a
1332c5f01b2fSopenharmony_ci    `std::char_traits<char>::eof()` in that case.  Stores the scanned characters
1333c5f01b2fSopenharmony_ci    for use in error messages.
1334c5f01b2fSopenharmony_ci
1335c5f01b2fSopenharmony_ci    @return character read from the input
1336c5f01b2fSopenharmony_ci    */
1337c5f01b2fSopenharmony_ci    char_int_type get()
1338c5f01b2fSopenharmony_ci    {
1339c5f01b2fSopenharmony_ci        ++position.chars_read_total;
1340c5f01b2fSopenharmony_ci        ++position.chars_read_current_line;
1341c5f01b2fSopenharmony_ci
1342c5f01b2fSopenharmony_ci        if (next_unget)
1343c5f01b2fSopenharmony_ci        {
1344c5f01b2fSopenharmony_ci            // just reset the next_unget variable and work with current
1345c5f01b2fSopenharmony_ci            next_unget = false;
1346c5f01b2fSopenharmony_ci        }
1347c5f01b2fSopenharmony_ci        else
1348c5f01b2fSopenharmony_ci        {
1349c5f01b2fSopenharmony_ci            current = ia.get_character();
1350c5f01b2fSopenharmony_ci        }
1351c5f01b2fSopenharmony_ci
1352c5f01b2fSopenharmony_ci        if (JSON_HEDLEY_LIKELY(current != std::char_traits<char_type>::eof()))
1353c5f01b2fSopenharmony_ci        {
1354c5f01b2fSopenharmony_ci            token_string.push_back(std::char_traits<char_type>::to_char_type(current));
1355c5f01b2fSopenharmony_ci        }
1356c5f01b2fSopenharmony_ci
1357c5f01b2fSopenharmony_ci        if (current == '\n')
1358c5f01b2fSopenharmony_ci        {
1359c5f01b2fSopenharmony_ci            ++position.lines_read;
1360c5f01b2fSopenharmony_ci            position.chars_read_current_line = 0;
1361c5f01b2fSopenharmony_ci        }
1362c5f01b2fSopenharmony_ci
1363c5f01b2fSopenharmony_ci        return current;
1364c5f01b2fSopenharmony_ci    }
1365c5f01b2fSopenharmony_ci
1366c5f01b2fSopenharmony_ci    /*!
1367c5f01b2fSopenharmony_ci    @brief unget current character (read it again on next get)
1368c5f01b2fSopenharmony_ci
1369c5f01b2fSopenharmony_ci    We implement unget by setting variable next_unget to true. The input is not
1370c5f01b2fSopenharmony_ci    changed - we just simulate ungetting by modifying chars_read_total,
1371c5f01b2fSopenharmony_ci    chars_read_current_line, and token_string. The next call to get() will
1372c5f01b2fSopenharmony_ci    behave as if the unget character is read again.
1373c5f01b2fSopenharmony_ci    */
1374c5f01b2fSopenharmony_ci    void unget()
1375c5f01b2fSopenharmony_ci    {
1376c5f01b2fSopenharmony_ci        next_unget = true;
1377c5f01b2fSopenharmony_ci
1378c5f01b2fSopenharmony_ci        --position.chars_read_total;
1379c5f01b2fSopenharmony_ci
1380c5f01b2fSopenharmony_ci        // in case we "unget" a newline, we have to also decrement the lines_read
1381c5f01b2fSopenharmony_ci        if (position.chars_read_current_line == 0)
1382c5f01b2fSopenharmony_ci        {
1383c5f01b2fSopenharmony_ci            if (position.lines_read > 0)
1384c5f01b2fSopenharmony_ci            {
1385c5f01b2fSopenharmony_ci                --position.lines_read;
1386c5f01b2fSopenharmony_ci            }
1387c5f01b2fSopenharmony_ci        }
1388c5f01b2fSopenharmony_ci        else
1389c5f01b2fSopenharmony_ci        {
1390c5f01b2fSopenharmony_ci            --position.chars_read_current_line;
1391c5f01b2fSopenharmony_ci        }
1392c5f01b2fSopenharmony_ci
1393c5f01b2fSopenharmony_ci        if (JSON_HEDLEY_LIKELY(current != std::char_traits<char_type>::eof()))
1394c5f01b2fSopenharmony_ci        {
1395c5f01b2fSopenharmony_ci            JSON_ASSERT(!token_string.empty());
1396c5f01b2fSopenharmony_ci            token_string.pop_back();
1397c5f01b2fSopenharmony_ci        }
1398c5f01b2fSopenharmony_ci    }
1399c5f01b2fSopenharmony_ci
1400c5f01b2fSopenharmony_ci    /// add a character to token_buffer
1401c5f01b2fSopenharmony_ci    void add(char_int_type c)
1402c5f01b2fSopenharmony_ci    {
1403c5f01b2fSopenharmony_ci        token_buffer.push_back(static_cast<typename string_t::value_type>(c));
1404c5f01b2fSopenharmony_ci    }
1405c5f01b2fSopenharmony_ci
1406c5f01b2fSopenharmony_ci  public:
1407c5f01b2fSopenharmony_ci    /////////////////////
1408c5f01b2fSopenharmony_ci    // value getters
1409c5f01b2fSopenharmony_ci    /////////////////////
1410c5f01b2fSopenharmony_ci
1411c5f01b2fSopenharmony_ci    /// return integer value
1412c5f01b2fSopenharmony_ci    constexpr number_integer_t get_number_integer() const noexcept
1413c5f01b2fSopenharmony_ci    {
1414c5f01b2fSopenharmony_ci        return value_integer;
1415c5f01b2fSopenharmony_ci    }
1416c5f01b2fSopenharmony_ci
1417c5f01b2fSopenharmony_ci    /// return unsigned integer value
1418c5f01b2fSopenharmony_ci    constexpr number_unsigned_t get_number_unsigned() const noexcept
1419c5f01b2fSopenharmony_ci    {
1420c5f01b2fSopenharmony_ci        return value_unsigned;
1421c5f01b2fSopenharmony_ci    }
1422c5f01b2fSopenharmony_ci
1423c5f01b2fSopenharmony_ci    /// return floating-point value
1424c5f01b2fSopenharmony_ci    constexpr number_float_t get_number_float() const noexcept
1425c5f01b2fSopenharmony_ci    {
1426c5f01b2fSopenharmony_ci        return value_float;
1427c5f01b2fSopenharmony_ci    }
1428c5f01b2fSopenharmony_ci
1429c5f01b2fSopenharmony_ci    /// return current string value (implicitly resets the token; useful only once)
1430c5f01b2fSopenharmony_ci    string_t& get_string()
1431c5f01b2fSopenharmony_ci    {
1432c5f01b2fSopenharmony_ci        return token_buffer;
1433c5f01b2fSopenharmony_ci    }
1434c5f01b2fSopenharmony_ci
1435c5f01b2fSopenharmony_ci    /////////////////////
1436c5f01b2fSopenharmony_ci    // diagnostics
1437c5f01b2fSopenharmony_ci    /////////////////////
1438c5f01b2fSopenharmony_ci
1439c5f01b2fSopenharmony_ci    /// return position of last read token
1440c5f01b2fSopenharmony_ci    constexpr position_t get_position() const noexcept
1441c5f01b2fSopenharmony_ci    {
1442c5f01b2fSopenharmony_ci        return position;
1443c5f01b2fSopenharmony_ci    }
1444c5f01b2fSopenharmony_ci
1445c5f01b2fSopenharmony_ci    /// return the last read token (for errors only).  Will never contain EOF
1446c5f01b2fSopenharmony_ci    /// (an arbitrary value that is not a valid char value, often -1), because
1447c5f01b2fSopenharmony_ci    /// 255 may legitimately occur.  May contain NUL, which should be escaped.
1448c5f01b2fSopenharmony_ci    std::string get_token_string() const
1449c5f01b2fSopenharmony_ci    {
1450c5f01b2fSopenharmony_ci        // escape control characters
1451c5f01b2fSopenharmony_ci        std::string result;
1452c5f01b2fSopenharmony_ci        for (const auto c : token_string)
1453c5f01b2fSopenharmony_ci        {
1454c5f01b2fSopenharmony_ci            if (static_cast<unsigned char>(c) <= '\x1F')
1455c5f01b2fSopenharmony_ci            {
1456c5f01b2fSopenharmony_ci                // escape control characters
1457c5f01b2fSopenharmony_ci                std::array<char, 9> cs{{}};
1458c5f01b2fSopenharmony_ci                static_cast<void>((std::snprintf)(cs.data(), cs.size(), "<U+%.4X>", static_cast<unsigned char>(c))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
1459c5f01b2fSopenharmony_ci                result += cs.data();
1460c5f01b2fSopenharmony_ci            }
1461c5f01b2fSopenharmony_ci            else
1462c5f01b2fSopenharmony_ci            {
1463c5f01b2fSopenharmony_ci                // add character as is
1464c5f01b2fSopenharmony_ci                result.push_back(static_cast<std::string::value_type>(c));
1465c5f01b2fSopenharmony_ci            }
1466c5f01b2fSopenharmony_ci        }
1467c5f01b2fSopenharmony_ci
1468c5f01b2fSopenharmony_ci        return result;
1469c5f01b2fSopenharmony_ci    }
1470c5f01b2fSopenharmony_ci
1471c5f01b2fSopenharmony_ci    /// return syntax error message
1472c5f01b2fSopenharmony_ci    JSON_HEDLEY_RETURNS_NON_NULL
1473c5f01b2fSopenharmony_ci    constexpr const char* get_error_message() const noexcept
1474c5f01b2fSopenharmony_ci    {
1475c5f01b2fSopenharmony_ci        return error_message;
1476c5f01b2fSopenharmony_ci    }
1477c5f01b2fSopenharmony_ci
1478c5f01b2fSopenharmony_ci    /////////////////////
1479c5f01b2fSopenharmony_ci    // actual scanner
1480c5f01b2fSopenharmony_ci    /////////////////////
1481c5f01b2fSopenharmony_ci
1482c5f01b2fSopenharmony_ci    /*!
1483c5f01b2fSopenharmony_ci    @brief skip the UTF-8 byte order mark
1484c5f01b2fSopenharmony_ci    @return true iff there is no BOM or the correct BOM has been skipped
1485c5f01b2fSopenharmony_ci    */
1486c5f01b2fSopenharmony_ci    bool skip_bom()
1487c5f01b2fSopenharmony_ci    {
1488c5f01b2fSopenharmony_ci        if (get() == 0xEF)
1489c5f01b2fSopenharmony_ci        {
1490c5f01b2fSopenharmony_ci            // check if we completely parse the BOM
1491c5f01b2fSopenharmony_ci            return get() == 0xBB && get() == 0xBF;
1492c5f01b2fSopenharmony_ci        }
1493c5f01b2fSopenharmony_ci
1494c5f01b2fSopenharmony_ci        // the first character is not the beginning of the BOM; unget it to
1495c5f01b2fSopenharmony_ci        // process is later
1496c5f01b2fSopenharmony_ci        unget();
1497c5f01b2fSopenharmony_ci        return true;
1498c5f01b2fSopenharmony_ci    }
1499c5f01b2fSopenharmony_ci
1500c5f01b2fSopenharmony_ci    void skip_whitespace()
1501c5f01b2fSopenharmony_ci    {
1502c5f01b2fSopenharmony_ci        do
1503c5f01b2fSopenharmony_ci        {
1504c5f01b2fSopenharmony_ci            get();
1505c5f01b2fSopenharmony_ci        }
1506c5f01b2fSopenharmony_ci        while (current == ' ' || current == '\t' || current == '\n' || current == '\r');
1507c5f01b2fSopenharmony_ci    }
1508c5f01b2fSopenharmony_ci
1509c5f01b2fSopenharmony_ci    token_type scan()
1510c5f01b2fSopenharmony_ci    {
1511c5f01b2fSopenharmony_ci        // initially, skip the BOM
1512c5f01b2fSopenharmony_ci        if (position.chars_read_total == 0 && !skip_bom())
1513c5f01b2fSopenharmony_ci        {
1514c5f01b2fSopenharmony_ci            error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
1515c5f01b2fSopenharmony_ci            return token_type::parse_error;
1516c5f01b2fSopenharmony_ci        }
1517c5f01b2fSopenharmony_ci
1518c5f01b2fSopenharmony_ci        // read next character and ignore whitespace
1519c5f01b2fSopenharmony_ci        skip_whitespace();
1520c5f01b2fSopenharmony_ci
1521c5f01b2fSopenharmony_ci        // ignore comments
1522c5f01b2fSopenharmony_ci        while (ignore_comments && current == '/')
1523c5f01b2fSopenharmony_ci        {
1524c5f01b2fSopenharmony_ci            if (!scan_comment())
1525c5f01b2fSopenharmony_ci            {
1526c5f01b2fSopenharmony_ci                return token_type::parse_error;
1527c5f01b2fSopenharmony_ci            }
1528c5f01b2fSopenharmony_ci
1529c5f01b2fSopenharmony_ci            // skip following whitespace
1530c5f01b2fSopenharmony_ci            skip_whitespace();
1531c5f01b2fSopenharmony_ci        }
1532c5f01b2fSopenharmony_ci
1533c5f01b2fSopenharmony_ci        switch (current)
1534c5f01b2fSopenharmony_ci        {
1535c5f01b2fSopenharmony_ci            // structural characters
1536c5f01b2fSopenharmony_ci            case '[':
1537c5f01b2fSopenharmony_ci                return token_type::begin_array;
1538c5f01b2fSopenharmony_ci            case ']':
1539c5f01b2fSopenharmony_ci                return token_type::end_array;
1540c5f01b2fSopenharmony_ci            case '{':
1541c5f01b2fSopenharmony_ci                return token_type::begin_object;
1542c5f01b2fSopenharmony_ci            case '}':
1543c5f01b2fSopenharmony_ci                return token_type::end_object;
1544c5f01b2fSopenharmony_ci            case ':':
1545c5f01b2fSopenharmony_ci                return token_type::name_separator;
1546c5f01b2fSopenharmony_ci            case ',':
1547c5f01b2fSopenharmony_ci                return token_type::value_separator;
1548c5f01b2fSopenharmony_ci
1549c5f01b2fSopenharmony_ci            // literals
1550c5f01b2fSopenharmony_ci            case 't':
1551c5f01b2fSopenharmony_ci            {
1552c5f01b2fSopenharmony_ci                std::array<char_type, 4> true_literal = {{static_cast<char_type>('t'), static_cast<char_type>('r'), static_cast<char_type>('u'), static_cast<char_type>('e')}};
1553c5f01b2fSopenharmony_ci                return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true);
1554c5f01b2fSopenharmony_ci            }
1555c5f01b2fSopenharmony_ci            case 'f':
1556c5f01b2fSopenharmony_ci            {
1557c5f01b2fSopenharmony_ci                std::array<char_type, 5> false_literal = {{static_cast<char_type>('f'), static_cast<char_type>('a'), static_cast<char_type>('l'), static_cast<char_type>('s'), static_cast<char_type>('e')}};
1558c5f01b2fSopenharmony_ci                return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false);
1559c5f01b2fSopenharmony_ci            }
1560c5f01b2fSopenharmony_ci            case 'n':
1561c5f01b2fSopenharmony_ci            {
1562c5f01b2fSopenharmony_ci                std::array<char_type, 4> null_literal = {{static_cast<char_type>('n'), static_cast<char_type>('u'), static_cast<char_type>('l'), static_cast<char_type>('l')}};
1563c5f01b2fSopenharmony_ci                return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null);
1564c5f01b2fSopenharmony_ci            }
1565c5f01b2fSopenharmony_ci
1566c5f01b2fSopenharmony_ci            // string
1567c5f01b2fSopenharmony_ci            case '\"':
1568c5f01b2fSopenharmony_ci                return scan_string();
1569c5f01b2fSopenharmony_ci
1570c5f01b2fSopenharmony_ci            // number
1571c5f01b2fSopenharmony_ci            case '-':
1572c5f01b2fSopenharmony_ci            case '0':
1573c5f01b2fSopenharmony_ci            case '1':
1574c5f01b2fSopenharmony_ci            case '2':
1575c5f01b2fSopenharmony_ci            case '3':
1576c5f01b2fSopenharmony_ci            case '4':
1577c5f01b2fSopenharmony_ci            case '5':
1578c5f01b2fSopenharmony_ci            case '6':
1579c5f01b2fSopenharmony_ci            case '7':
1580c5f01b2fSopenharmony_ci            case '8':
1581c5f01b2fSopenharmony_ci            case '9':
1582c5f01b2fSopenharmony_ci                return scan_number();
1583c5f01b2fSopenharmony_ci
1584c5f01b2fSopenharmony_ci            // end of input (the null byte is needed when parsing from
1585c5f01b2fSopenharmony_ci            // string literals)
1586c5f01b2fSopenharmony_ci            case '\0':
1587c5f01b2fSopenharmony_ci            case std::char_traits<char_type>::eof():
1588c5f01b2fSopenharmony_ci                return token_type::end_of_input;
1589c5f01b2fSopenharmony_ci
1590c5f01b2fSopenharmony_ci            // error
1591c5f01b2fSopenharmony_ci            default:
1592c5f01b2fSopenharmony_ci                error_message = "invalid literal";
1593c5f01b2fSopenharmony_ci                return token_type::parse_error;
1594c5f01b2fSopenharmony_ci        }
1595c5f01b2fSopenharmony_ci    }
1596c5f01b2fSopenharmony_ci
1597c5f01b2fSopenharmony_ci  private:
1598c5f01b2fSopenharmony_ci    /// input adapter
1599c5f01b2fSopenharmony_ci    InputAdapterType ia;
1600c5f01b2fSopenharmony_ci
1601c5f01b2fSopenharmony_ci    /// whether comments should be ignored (true) or signaled as errors (false)
1602c5f01b2fSopenharmony_ci    const bool ignore_comments = false;
1603c5f01b2fSopenharmony_ci
1604c5f01b2fSopenharmony_ci    /// the current character
1605c5f01b2fSopenharmony_ci    char_int_type current = std::char_traits<char_type>::eof();
1606c5f01b2fSopenharmony_ci
1607c5f01b2fSopenharmony_ci    /// whether the next get() call should just return current
1608c5f01b2fSopenharmony_ci    bool next_unget = false;
1609c5f01b2fSopenharmony_ci
1610c5f01b2fSopenharmony_ci    /// the start position of the current token
1611c5f01b2fSopenharmony_ci    position_t position {};
1612c5f01b2fSopenharmony_ci
1613c5f01b2fSopenharmony_ci    /// raw input token string (for error messages)
1614c5f01b2fSopenharmony_ci    std::vector<char_type> token_string {};
1615c5f01b2fSopenharmony_ci
1616c5f01b2fSopenharmony_ci    /// buffer for variable-length tokens (numbers, strings)
1617c5f01b2fSopenharmony_ci    string_t token_buffer {};
1618c5f01b2fSopenharmony_ci
1619c5f01b2fSopenharmony_ci    /// a description of occurred lexer errors
1620c5f01b2fSopenharmony_ci    const char* error_message = "";
1621c5f01b2fSopenharmony_ci
1622c5f01b2fSopenharmony_ci    // number values
1623c5f01b2fSopenharmony_ci    number_integer_t value_integer = 0;
1624c5f01b2fSopenharmony_ci    number_unsigned_t value_unsigned = 0;
1625c5f01b2fSopenharmony_ci    number_float_t value_float = 0;
1626c5f01b2fSopenharmony_ci
1627c5f01b2fSopenharmony_ci    /// the decimal point
1628c5f01b2fSopenharmony_ci    const char_int_type decimal_point_char = '.';
1629c5f01b2fSopenharmony_ci};
1630c5f01b2fSopenharmony_ci
1631c5f01b2fSopenharmony_ci}  // namespace detail
1632c5f01b2fSopenharmony_ciNLOHMANN_JSON_NAMESPACE_END
1633