1//     __ _____ _____ _____
2//  __|  |   __|     |   | |  JSON for Modern C++ (supporting code)
3// |  |  |__   |  |  | | | |  version 3.11.2
4// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
5//
6// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
7// SPDX-License-Identifier: MIT
8
9#include "doctest_compatibility.h"
10
11#define JSON_TESTS_PRIVATE
12#include <nlohmann/json.hpp>
13using nlohmann::json;
14
15namespace
16{
17// shortcut to scan a string literal
18json::lexer::token_type scan_string(const char* s, bool ignore_comments = false);
19json::lexer::token_type scan_string(const char* s, const bool ignore_comments)
20{
21    auto ia = nlohmann::detail::input_adapter(s);
22    return nlohmann::detail::lexer<json, decltype(ia)>(std::move(ia), ignore_comments).scan(); // NOLINT(hicpp-move-const-arg,performance-move-const-arg)
23}
24} // namespace
25
26std::string get_error_message(const char* s, bool ignore_comments = false);
27std::string get_error_message(const char* s, const bool ignore_comments)
28{
29    auto ia = nlohmann::detail::input_adapter(s);
30    auto lexer = nlohmann::detail::lexer<json, decltype(ia)>(std::move(ia), ignore_comments); // NOLINT(hicpp-move-const-arg,performance-move-const-arg)
31    lexer.scan();
32    return lexer.get_error_message();
33}
34
35TEST_CASE("lexer class")
36{
37    SECTION("scan")
38    {
39        SECTION("structural characters")
40        {
41            CHECK((scan_string("[") == json::lexer::token_type::begin_array));
42            CHECK((scan_string("]") == json::lexer::token_type::end_array));
43            CHECK((scan_string("{") == json::lexer::token_type::begin_object));
44            CHECK((scan_string("}") == json::lexer::token_type::end_object));
45            CHECK((scan_string(",") == json::lexer::token_type::value_separator));
46            CHECK((scan_string(":") == json::lexer::token_type::name_separator));
47        }
48
49        SECTION("literal names")
50        {
51            CHECK((scan_string("null") == json::lexer::token_type::literal_null));
52            CHECK((scan_string("true") == json::lexer::token_type::literal_true));
53            CHECK((scan_string("false") == json::lexer::token_type::literal_false));
54        }
55
56        SECTION("numbers")
57        {
58            CHECK((scan_string("0") == json::lexer::token_type::value_unsigned));
59            CHECK((scan_string("1") == json::lexer::token_type::value_unsigned));
60            CHECK((scan_string("2") == json::lexer::token_type::value_unsigned));
61            CHECK((scan_string("3") == json::lexer::token_type::value_unsigned));
62            CHECK((scan_string("4") == json::lexer::token_type::value_unsigned));
63            CHECK((scan_string("5") == json::lexer::token_type::value_unsigned));
64            CHECK((scan_string("6") == json::lexer::token_type::value_unsigned));
65            CHECK((scan_string("7") == json::lexer::token_type::value_unsigned));
66            CHECK((scan_string("8") == json::lexer::token_type::value_unsigned));
67            CHECK((scan_string("9") == json::lexer::token_type::value_unsigned));
68
69            CHECK((scan_string("-0") == json::lexer::token_type::value_integer));
70            CHECK((scan_string("-1") == json::lexer::token_type::value_integer));
71
72            CHECK((scan_string("1.1") == json::lexer::token_type::value_float));
73            CHECK((scan_string("-1.1") == json::lexer::token_type::value_float));
74            CHECK((scan_string("1E10") == json::lexer::token_type::value_float));
75        }
76
77        SECTION("whitespace")
78        {
79            // result is end_of_input, because not token is following
80            CHECK((scan_string(" ") == json::lexer::token_type::end_of_input));
81            CHECK((scan_string("\t") == json::lexer::token_type::end_of_input));
82            CHECK((scan_string("\n") == json::lexer::token_type::end_of_input));
83            CHECK((scan_string("\r") == json::lexer::token_type::end_of_input));
84            CHECK((scan_string(" \t\n\r\n\t ") == json::lexer::token_type::end_of_input));
85        }
86    }
87
88    SECTION("token_type_name")
89    {
90        CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::uninitialized)) == "<uninitialized>"));
91        CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::literal_true)) == "true literal"));
92        CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::literal_false)) == "false literal"));
93        CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::literal_null)) == "null literal"));
94        CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_string)) == "string literal"));
95        CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_unsigned)) == "number literal"));
96        CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_integer)) == "number literal"));
97        CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_float)) == "number literal"));
98        CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::begin_array)) == "'['"));
99        CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::begin_object)) == "'{'"));
100        CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::end_array)) == "']'"));
101        CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::end_object)) == "'}'"));
102        CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::name_separator)) == "':'"));
103        CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::value_separator)) == "','"));
104        CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::parse_error)) == "<parse error>"));
105        CHECK((std::string(json::lexer::token_type_name(json::lexer::token_type::end_of_input)) == "end of input"));
106    }
107
108    SECTION("parse errors on first character")
109    {
110        for (int c = 1; c < 128; ++c)
111        {
112            // create string from the ASCII code
113            const auto s = std::string(1, static_cast<char>(c));
114            // store scan() result
115            const auto res = scan_string(s.c_str());
116
117            CAPTURE(s)
118
119            switch (c)
120            {
121                // single characters that are valid tokens
122                case ('['):
123                case (']'):
124                case ('{'):
125                case ('}'):
126                case (','):
127                case (':'):
128                case ('0'):
129                case ('1'):
130                case ('2'):
131                case ('3'):
132                case ('4'):
133                case ('5'):
134                case ('6'):
135                case ('7'):
136                case ('8'):
137                case ('9'):
138                {
139                    CHECK((res != json::lexer::token_type::parse_error));
140                    break;
141                }
142
143                // whitespace
144                case (' '):
145                case ('\t'):
146                case ('\n'):
147                case ('\r'):
148                {
149                    CHECK((res == json::lexer::token_type::end_of_input));
150                    break;
151                }
152
153                // anything else is not expected
154                default:
155                {
156                    CHECK((res == json::lexer::token_type::parse_error));
157                    break;
158                }
159            }
160        }
161    }
162
163    SECTION("very large string")
164    {
165        // strings larger than 1024 bytes yield a resize of the lexer's yytext buffer
166        std::string s("\"");
167        s += std::string(2048, 'x');
168        s += "\"";
169        CHECK((scan_string(s.c_str()) == json::lexer::token_type::value_string));
170    }
171
172    SECTION("fail on comments")
173    {
174        CHECK((scan_string("/", false) == json::lexer::token_type::parse_error));
175        CHECK(get_error_message("/", false) == "invalid literal");
176
177        CHECK((scan_string("/!", false) == json::lexer::token_type::parse_error));
178        CHECK(get_error_message("/!", false) == "invalid literal");
179        CHECK((scan_string("/*", false) == json::lexer::token_type::parse_error));
180        CHECK(get_error_message("/*", false) == "invalid literal");
181        CHECK((scan_string("/**", false) == json::lexer::token_type::parse_error));
182        CHECK(get_error_message("/**", false) == "invalid literal");
183
184        CHECK((scan_string("//", false) == json::lexer::token_type::parse_error));
185        CHECK(get_error_message("//", false) == "invalid literal");
186        CHECK((scan_string("/**/", false) == json::lexer::token_type::parse_error));
187        CHECK(get_error_message("/**/", false) == "invalid literal");
188        CHECK((scan_string("/** /", false) == json::lexer::token_type::parse_error));
189        CHECK(get_error_message("/** /", false) == "invalid literal");
190
191        CHECK((scan_string("/***/", false) == json::lexer::token_type::parse_error));
192        CHECK(get_error_message("/***/", false) == "invalid literal");
193        CHECK((scan_string("/* true */", false) == json::lexer::token_type::parse_error));
194        CHECK(get_error_message("/* true */", false) == "invalid literal");
195        CHECK((scan_string("/*/**/", false) == json::lexer::token_type::parse_error));
196        CHECK(get_error_message("/*/**/", false) == "invalid literal");
197        CHECK((scan_string("/*/* */", false) == json::lexer::token_type::parse_error));
198        CHECK(get_error_message("/*/* */", false) == "invalid literal");
199    }
200
201    SECTION("ignore comments")
202    {
203        CHECK((scan_string("/", true) == json::lexer::token_type::parse_error));
204        CHECK(get_error_message("/", true) == "invalid comment; expecting '/' or '*' after '/'");
205
206        CHECK((scan_string("/!", true) == json::lexer::token_type::parse_error));
207        CHECK(get_error_message("/!", true) == "invalid comment; expecting '/' or '*' after '/'");
208        CHECK((scan_string("/*", true) == json::lexer::token_type::parse_error));
209        CHECK(get_error_message("/*", true) == "invalid comment; missing closing '*/'");
210        CHECK((scan_string("/**", true) == json::lexer::token_type::parse_error));
211        CHECK(get_error_message("/**", true) == "invalid comment; missing closing '*/'");
212
213        CHECK((scan_string("//", true) == json::lexer::token_type::end_of_input));
214        CHECK((scan_string("/**/", true) == json::lexer::token_type::end_of_input));
215        CHECK((scan_string("/** /", true) == json::lexer::token_type::parse_error));
216        CHECK(get_error_message("/** /", true) == "invalid comment; missing closing '*/'");
217
218        CHECK((scan_string("/***/", true) == json::lexer::token_type::end_of_input));
219        CHECK((scan_string("/* true */", true) == json::lexer::token_type::end_of_input));
220        CHECK((scan_string("/*/**/", true) == json::lexer::token_type::end_of_input));
221        CHECK((scan_string("/*/* */", true) == json::lexer::token_type::end_of_input));
222
223        CHECK((scan_string("//\n//\n", true) == json::lexer::token_type::end_of_input));
224        CHECK((scan_string("/**//**//**/", true) == json::lexer::token_type::end_of_input));
225    }
226}
227