1//     __ _____ _____ _____
2//  __|  |   __|     |   | |  JSON for Modern C++ (supporting code)
3// |  |  |__   |  |  | | | |  version 3.11.2
4// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
5//
6// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
7// SPDX-License-Identifier: MIT
8
9#include "doctest_compatibility.h"
10
11// for some reason including this after the json header leads to linker errors with VS 2017...
12#include <locale>
13
14#include <nlohmann/json.hpp>
15using nlohmann::json;
16
17#include <fstream>
18#include <sstream>
19#include <iostream>
20#include <iomanip>
21#include "make_test_data_available.hpp"
22
23// this test suite uses static variables with non-trivial destructors
24DOCTEST_CLANG_SUPPRESS_WARNING_PUSH
25DOCTEST_CLANG_SUPPRESS_WARNING("-Wexit-time-destructors")
26
27namespace
28{
29extern size_t calls;
30size_t calls = 0;
31
32void check_utf8dump(bool success_expected, int byte1, int byte2, int byte3, int byte4);
33
34void check_utf8dump(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1)
35{
36    static std::string json_string;
37    json_string.clear();
38
39    CAPTURE(byte1)
40    CAPTURE(byte2)
41    CAPTURE(byte3)
42    CAPTURE(byte4)
43
44    json_string += std::string(1, static_cast<char>(byte1));
45
46    if (byte2 != -1)
47    {
48        json_string += std::string(1, static_cast<char>(byte2));
49    }
50
51    if (byte3 != -1)
52    {
53        json_string += std::string(1, static_cast<char>(byte3));
54    }
55
56    if (byte4 != -1)
57    {
58        json_string += std::string(1, static_cast<char>(byte4));
59    }
60
61    CAPTURE(json_string)
62
63    // store the string in a JSON value
64    static json j;
65    static json j2;
66    j = json_string;
67    j2 = "abc" + json_string + "xyz";
68
69    static std::string s_ignored;
70    static std::string s_ignored2;
71    static std::string s_ignored_ascii;
72    static std::string s_ignored2_ascii;
73    static std::string s_replaced;
74    static std::string s_replaced2;
75    static std::string s_replaced_ascii;
76    static std::string s_replaced2_ascii;
77
78    // dumping with ignore/replace must not throw in any case
79    s_ignored = j.dump(-1, ' ', false, json::error_handler_t::ignore);
80    s_ignored2 = j2.dump(-1, ' ', false, json::error_handler_t::ignore);
81    s_ignored_ascii = j.dump(-1, ' ', true, json::error_handler_t::ignore);
82    s_ignored2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::ignore);
83    s_replaced = j.dump(-1, ' ', false, json::error_handler_t::replace);
84    s_replaced2 = j2.dump(-1, ' ', false, json::error_handler_t::replace);
85    s_replaced_ascii = j.dump(-1, ' ', true, json::error_handler_t::replace);
86    s_replaced2_ascii = j2.dump(-1, ' ', true, json::error_handler_t::replace);
87
88    if (success_expected)
89    {
90        static std::string s_strict;
91        // strict mode must not throw if success is expected
92        s_strict = j.dump();
93        // all dumps should agree on the string
94        CHECK(s_strict == s_ignored);
95        CHECK(s_strict == s_replaced);
96    }
97    else
98    {
99        // strict mode must throw if success is not expected
100        CHECK_THROWS_AS(j.dump(), json::type_error&);
101        // ignore and replace must create different dumps
102        CHECK(s_ignored != s_replaced);
103
104        // check that replace string contains a replacement character
105        CHECK(s_replaced.find("\xEF\xBF\xBD") != std::string::npos);
106    }
107
108    // check that prefix and suffix are preserved
109    CHECK(s_ignored2.substr(1, 3) == "abc");
110    CHECK(s_ignored2.substr(s_ignored2.size() - 4, 3) == "xyz");
111    CHECK(s_ignored2_ascii.substr(1, 3) == "abc");
112    CHECK(s_ignored2_ascii.substr(s_ignored2_ascii.size() - 4, 3) == "xyz");
113    CHECK(s_replaced2.substr(1, 3) == "abc");
114    CHECK(s_replaced2.substr(s_replaced2.size() - 4, 3) == "xyz");
115    CHECK(s_replaced2_ascii.substr(1, 3) == "abc");
116    CHECK(s_replaced2_ascii.substr(s_replaced2_ascii.size() - 4, 3) == "xyz");
117}
118
119void check_utf8string(bool success_expected, int byte1, int byte2, int byte3, int byte4);
120
121// create and check a JSON string with up to four UTF-8 bytes
122void check_utf8string(bool success_expected, int byte1, int byte2 = -1, int byte3 = -1, int byte4 = -1)
123{
124    if (++calls % 100000 == 0)
125    {
126        std::cout << calls << " of 5517507 UTF-8 strings checked" << std::endl;
127    }
128
129    static std::string json_string;
130    json_string = "\"";
131
132    CAPTURE(byte1)
133    json_string += std::string(1, static_cast<char>(byte1));
134
135    if (byte2 != -1)
136    {
137        CAPTURE(byte2)
138        json_string += std::string(1, static_cast<char>(byte2));
139    }
140
141    if (byte3 != -1)
142    {
143        CAPTURE(byte3)
144        json_string += std::string(1, static_cast<char>(byte3));
145    }
146
147    if (byte4 != -1)
148    {
149        CAPTURE(byte4)
150        json_string += std::string(1, static_cast<char>(byte4));
151    }
152
153    json_string += "\"";
154
155    CAPTURE(json_string)
156
157    json _;
158    if (success_expected)
159    {
160        CHECK_NOTHROW(_ = json::parse(json_string));
161    }
162    else
163    {
164        CHECK_THROWS_AS(_ = json::parse(json_string), json::parse_error&);
165    }
166}
167} // namespace
168
169TEST_CASE("Unicode (4/5)" * doctest::skip())
170{
171    SECTION("RFC 3629")
172    {
173        /*
174        RFC 3629 describes in Sect. 4 the syntax of UTF-8 byte sequences as
175        follows:
176
177            A UTF-8 string is a sequence of octets representing a sequence of UCS
178            characters.  An octet sequence is valid UTF-8 only if it matches the
179            following syntax, which is derived from the rules for encoding UTF-8
180            and is expressed in the ABNF of [RFC2234].
181
182            UTF8-octets = *( UTF8-char )
183            UTF8-char   = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
184            UTF8-1      = %x00-7F
185            UTF8-2      = %xC2-DF UTF8-tail
186            UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
187                          %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
188            UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
189                          %xF4 %x80-8F 2( UTF8-tail )
190            UTF8-tail   = %x80-BF
191        */
192
193        SECTION("UTF8-4 (xF1-F3 UTF8-tail UTF8-tail UTF8-tail)")
194        {
195            SECTION("well-formed")
196            {
197                for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
198                {
199                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
200                    {
201                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
202                        {
203                            for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
204                            {
205                                check_utf8string(true, byte1, byte2, byte3, byte4);
206                                check_utf8dump(true, byte1, byte2, byte3, byte4);
207                            }
208                        }
209                    }
210                }
211            }
212
213            SECTION("ill-formed: missing second byte")
214            {
215                for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
216                {
217                    check_utf8string(false, byte1);
218                    check_utf8dump(false, byte1);
219                }
220            }
221
222            SECTION("ill-formed: missing third byte")
223            {
224                for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
225                {
226                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
227                    {
228                        check_utf8string(false, byte1, byte2);
229                        check_utf8dump(false, byte1, byte2);
230                    }
231                }
232            }
233
234            SECTION("ill-formed: missing fourth byte")
235            {
236                for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
237                {
238                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
239                    {
240                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
241                        {
242                            check_utf8string(false, byte1, byte2, byte3);
243                            check_utf8dump(false, byte1, byte2, byte3);
244                        }
245                    }
246                }
247            }
248
249            SECTION("ill-formed: wrong second byte")
250            {
251                for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
252                {
253                    for (int byte2 = 0x00; byte2 <= 0xFF; ++byte2)
254                    {
255                        // skip correct second byte
256                        if (0x80 <= byte2 && byte2 <= 0xBF)
257                        {
258                            continue;
259                        }
260
261                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
262                        {
263                            for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
264                            {
265                                check_utf8string(false, byte1, byte2, byte3, byte4);
266                                check_utf8dump(false, byte1, byte2, byte3, byte4);
267                            }
268                        }
269                    }
270                }
271            }
272
273            SECTION("ill-formed: wrong third byte")
274            {
275                for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
276                {
277                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
278                    {
279                        for (int byte3 = 0x00; byte3 <= 0xFF; ++byte3)
280                        {
281                            // skip correct third byte
282                            if (0x80 <= byte3 && byte3 <= 0xBF)
283                            {
284                                continue;
285                            }
286
287                            for (int byte4 = 0x80; byte4 <= 0xBF; ++byte4)
288                            {
289                                check_utf8string(false, byte1, byte2, byte3, byte4);
290                                check_utf8dump(false, byte1, byte2, byte3, byte4);
291                            }
292                        }
293                    }
294                }
295            }
296
297            SECTION("ill-formed: wrong fourth byte")
298            {
299                for (int byte1 = 0xF1; byte1 <= 0xF3; ++byte1)
300                {
301                    for (int byte2 = 0x80; byte2 <= 0xBF; ++byte2)
302                    {
303                        for (int byte3 = 0x80; byte3 <= 0xBF; ++byte3)
304                        {
305                            for (int byte4 = 0x00; byte4 <= 0xFF; ++byte4)
306                            {
307                                // skip correct fourth byte
308                                if (0x80 <= byte3 && byte3 <= 0xBF)
309                                {
310                                    continue;
311                                }
312
313                                check_utf8string(false, byte1, byte2, byte3, byte4);
314                                check_utf8dump(false, byte1, byte2, byte3, byte4);
315                            }
316                        }
317                    }
318                }
319            }
320        }
321    }
322}
323
324DOCTEST_CLANG_SUPPRESS_WARNING_POP
325