1c5f01b2fSopenharmony_ci//     __ _____ _____ _____
2c5f01b2fSopenharmony_ci//  __|  |   __|     |   | |  JSON for Modern C++ (supporting code)
3c5f01b2fSopenharmony_ci// |  |  |__   |  |  | | | |  version 3.11.2
4c5f01b2fSopenharmony_ci// |_____|_____|_____|_|___|  https://github.com/nlohmann/json
5c5f01b2fSopenharmony_ci//
6c5f01b2fSopenharmony_ci// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
7c5f01b2fSopenharmony_ci// SPDX-License-Identifier: MIT
8c5f01b2fSopenharmony_ci
9c5f01b2fSopenharmony_ci#include "doctest_compatibility.h"
10c5f01b2fSopenharmony_ci
11c5f01b2fSopenharmony_ci// for some reason including this after the json header leads to linker errors with VS 2017...
12c5f01b2fSopenharmony_ci#include <locale>
13c5f01b2fSopenharmony_ci#include <nlohmann/json.hpp>
14c5f01b2fSopenharmony_ciusing nlohmann::json;
15c5f01b2fSopenharmony_ci
16c5f01b2fSopenharmony_ci#include <fstream>
17c5f01b2fSopenharmony_ci#include <sstream>
18c5f01b2fSopenharmony_ci#include <iomanip>
19c5f01b2fSopenharmony_ci#include "make_test_data_available.hpp"
20c5f01b2fSopenharmony_ci
21c5f01b2fSopenharmony_ciTEST_CASE("Unicode (1/5)" * doctest::skip())
22c5f01b2fSopenharmony_ci{
23c5f01b2fSopenharmony_ci    SECTION("\\uxxxx sequences")
24c5f01b2fSopenharmony_ci    {
25c5f01b2fSopenharmony_ci        // create an escaped string from a code point
26c5f01b2fSopenharmony_ci        const auto codepoint_to_unicode = [](std::size_t cp)
27c5f01b2fSopenharmony_ci        {
28c5f01b2fSopenharmony_ci            // code points are represented as a six-character sequence: a
29c5f01b2fSopenharmony_ci            // reverse solidus, followed by the lowercase letter u, followed
30c5f01b2fSopenharmony_ci            // by four hexadecimal digits that encode the character's code
31c5f01b2fSopenharmony_ci            // point
32c5f01b2fSopenharmony_ci            std::stringstream ss;
33c5f01b2fSopenharmony_ci            ss << "\\u" << std::setw(4) << std::setfill('0') << std::hex << cp;
34c5f01b2fSopenharmony_ci            return ss.str();
35c5f01b2fSopenharmony_ci        };
36c5f01b2fSopenharmony_ci
37c5f01b2fSopenharmony_ci        SECTION("correct sequences")
38c5f01b2fSopenharmony_ci        {
39c5f01b2fSopenharmony_ci            // generate all UTF-8 code points; in total, 1112064 code points are
40c5f01b2fSopenharmony_ci            // generated: 0x1FFFFF code points - 2048 invalid values between
41c5f01b2fSopenharmony_ci            // 0xD800 and 0xDFFF.
42c5f01b2fSopenharmony_ci            for (std::size_t cp = 0; cp <= 0x10FFFFu; ++cp)
43c5f01b2fSopenharmony_ci            {
44c5f01b2fSopenharmony_ci                // string to store the code point as in \uxxxx format
45c5f01b2fSopenharmony_ci                std::string json_text = "\"";
46c5f01b2fSopenharmony_ci
47c5f01b2fSopenharmony_ci                // decide whether to use one or two \uxxxx sequences
48c5f01b2fSopenharmony_ci                if (cp < 0x10000u)
49c5f01b2fSopenharmony_ci                {
50c5f01b2fSopenharmony_ci                    // The Unicode standard permanently reserves these code point
51c5f01b2fSopenharmony_ci                    // values for UTF-16 encoding of the high and low surrogates, and
52c5f01b2fSopenharmony_ci                    // they will never be assigned a character, so there should be no
53c5f01b2fSopenharmony_ci                    // reason to encode them. The official Unicode standard says that
54c5f01b2fSopenharmony_ci                    // no UTF forms, including UTF-16, can encode these code points.
55c5f01b2fSopenharmony_ci                    if (cp >= 0xD800u && cp <= 0xDFFFu)
56c5f01b2fSopenharmony_ci                    {
57c5f01b2fSopenharmony_ci                        // if we would not skip these code points, we would get a
58c5f01b2fSopenharmony_ci                        // "missing low surrogate" exception
59c5f01b2fSopenharmony_ci                        continue;
60c5f01b2fSopenharmony_ci                    }
61c5f01b2fSopenharmony_ci
62c5f01b2fSopenharmony_ci                    // code points in the Basic Multilingual Plane can be
63c5f01b2fSopenharmony_ci                    // represented with one \uxxxx sequence
64c5f01b2fSopenharmony_ci                    json_text += codepoint_to_unicode(cp);
65c5f01b2fSopenharmony_ci                }
66c5f01b2fSopenharmony_ci                else
67c5f01b2fSopenharmony_ci                {
68c5f01b2fSopenharmony_ci                    // To escape an extended character that is not in the Basic
69c5f01b2fSopenharmony_ci                    // Multilingual Plane, the character is represented as a
70c5f01b2fSopenharmony_ci                    // 12-character sequence, encoding the UTF-16 surrogate pair
71c5f01b2fSopenharmony_ci                    const auto codepoint1 = 0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu);
72c5f01b2fSopenharmony_ci                    const auto codepoint2 = 0xdc00u + ((cp - 0x10000u) & 0x3ffu);
73c5f01b2fSopenharmony_ci                    json_text += codepoint_to_unicode(codepoint1) + codepoint_to_unicode(codepoint2);
74c5f01b2fSopenharmony_ci                }
75c5f01b2fSopenharmony_ci
76c5f01b2fSopenharmony_ci                json_text += "\"";
77c5f01b2fSopenharmony_ci                CAPTURE(json_text)
78c5f01b2fSopenharmony_ci                json _;
79c5f01b2fSopenharmony_ci                CHECK_NOTHROW(_ = json::parse(json_text));
80c5f01b2fSopenharmony_ci            }
81c5f01b2fSopenharmony_ci        }
82c5f01b2fSopenharmony_ci
83c5f01b2fSopenharmony_ci        SECTION("incorrect sequences")
84c5f01b2fSopenharmony_ci        {
85c5f01b2fSopenharmony_ci            SECTION("incorrect surrogate values")
86c5f01b2fSopenharmony_ci            {
87c5f01b2fSopenharmony_ci                json _;
88c5f01b2fSopenharmony_ci
89c5f01b2fSopenharmony_ci                CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uDC00\\uDC00\""), "[json.exception.parse_error.101] parse error at line 1, column 7: syntax error while parsing value - invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF; last read: '\"\\uDC00'", json::parse_error&);
90c5f01b2fSopenharmony_ci
91c5f01b2fSopenharmony_ci                CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD7FF\\uDC00\""), "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF; last read: '\"\\uD7FF\\uDC00'", json::parse_error&);
92c5f01b2fSopenharmony_ci
93c5f01b2fSopenharmony_ci                CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD800]\""), "[json.exception.parse_error.101] parse error at line 1, column 8: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD800]'", json::parse_error&);
94c5f01b2fSopenharmony_ci
95c5f01b2fSopenharmony_ci                CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD800\\v\""), "[json.exception.parse_error.101] parse error at line 1, column 9: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD800\\v'", json::parse_error&);
96c5f01b2fSopenharmony_ci
97c5f01b2fSopenharmony_ci                CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD800\\u123\""), "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\uD800\\u123\"'", json::parse_error&);
98c5f01b2fSopenharmony_ci
99c5f01b2fSopenharmony_ci                CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD800\\uDBFF\""), "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD800\\uDBFF'", json::parse_error&);
100c5f01b2fSopenharmony_ci
101c5f01b2fSopenharmony_ci                CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD800\\uE000\""), "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD800\\uE000'", json::parse_error&);
102c5f01b2fSopenharmony_ci            }
103c5f01b2fSopenharmony_ci        }
104c5f01b2fSopenharmony_ci
105c5f01b2fSopenharmony_ci#if 0
106c5f01b2fSopenharmony_ci        SECTION("incorrect sequences")
107c5f01b2fSopenharmony_ci        {
108c5f01b2fSopenharmony_ci            SECTION("high surrogate without low surrogate")
109c5f01b2fSopenharmony_ci            {
110c5f01b2fSopenharmony_ci                // D800..DBFF are high surrogates and must be followed by low
111c5f01b2fSopenharmony_ci                // surrogates DC00..DFFF; here, nothing follows
112c5f01b2fSopenharmony_ci                for (std::size_t cp = 0xD800u; cp <= 0xDBFFu; ++cp)
113c5f01b2fSopenharmony_ci                {
114c5f01b2fSopenharmony_ci                    std::string json_text = "\"" + codepoint_to_unicode(cp) + "\"";
115c5f01b2fSopenharmony_ci                    CAPTURE(json_text)
116c5f01b2fSopenharmony_ci                    CHECK_THROWS_AS(json::parse(json_text), json::parse_error&);
117c5f01b2fSopenharmony_ci                }
118c5f01b2fSopenharmony_ci            }
119c5f01b2fSopenharmony_ci
120c5f01b2fSopenharmony_ci            SECTION("high surrogate with wrong low surrogate")
121c5f01b2fSopenharmony_ci            {
122c5f01b2fSopenharmony_ci                // D800..DBFF are high surrogates and must be followed by low
123c5f01b2fSopenharmony_ci                // surrogates DC00..DFFF; here a different sequence follows
124c5f01b2fSopenharmony_ci                for (std::size_t cp1 = 0xD800u; cp1 <= 0xDBFFu; ++cp1)
125c5f01b2fSopenharmony_ci                {
126c5f01b2fSopenharmony_ci                    for (std::size_t cp2 = 0x0000u; cp2 <= 0xFFFFu; ++cp2)
127c5f01b2fSopenharmony_ci                    {
128c5f01b2fSopenharmony_ci                        if (0xDC00u <= cp2 && cp2 <= 0xDFFFu)
129c5f01b2fSopenharmony_ci                        {
130c5f01b2fSopenharmony_ci                            continue;
131c5f01b2fSopenharmony_ci                        }
132c5f01b2fSopenharmony_ci
133c5f01b2fSopenharmony_ci                        std::string json_text = "\"" + codepoint_to_unicode(cp1) + codepoint_to_unicode(cp2) + "\"";
134c5f01b2fSopenharmony_ci                        CAPTURE(json_text)
135c5f01b2fSopenharmony_ci                        CHECK_THROWS_AS(json::parse(json_text), json::parse_error&);
136c5f01b2fSopenharmony_ci                    }
137c5f01b2fSopenharmony_ci                }
138c5f01b2fSopenharmony_ci            }
139c5f01b2fSopenharmony_ci
140c5f01b2fSopenharmony_ci            SECTION("low surrogate without high surrogate")
141c5f01b2fSopenharmony_ci            {
142c5f01b2fSopenharmony_ci                // low surrogates DC00..DFFF must follow high surrogates; here,
143c5f01b2fSopenharmony_ci                // they occur alone
144c5f01b2fSopenharmony_ci                for (std::size_t cp = 0xDC00u; cp <= 0xDFFFu; ++cp)
145c5f01b2fSopenharmony_ci                {
146c5f01b2fSopenharmony_ci                    std::string json_text = "\"" + codepoint_to_unicode(cp) + "\"";
147c5f01b2fSopenharmony_ci                    CAPTURE(json_text)
148c5f01b2fSopenharmony_ci                    CHECK_THROWS_AS(json::parse(json_text), json::parse_error&);
149c5f01b2fSopenharmony_ci                }
150c5f01b2fSopenharmony_ci            }
151c5f01b2fSopenharmony_ci
152c5f01b2fSopenharmony_ci        }
153c5f01b2fSopenharmony_ci#endif
154c5f01b2fSopenharmony_ci    }
155c5f01b2fSopenharmony_ci
156c5f01b2fSopenharmony_ci    SECTION("read all unicode characters")
157c5f01b2fSopenharmony_ci    {
158c5f01b2fSopenharmony_ci        // read a file with all unicode characters stored as single-character
159c5f01b2fSopenharmony_ci        // strings in a JSON array
160c5f01b2fSopenharmony_ci        std::ifstream f(TEST_DATA_DIRECTORY "/json_nlohmann_tests/all_unicode.json");
161c5f01b2fSopenharmony_ci        json j;
162c5f01b2fSopenharmony_ci        CHECK_NOTHROW(f >> j);
163c5f01b2fSopenharmony_ci
164c5f01b2fSopenharmony_ci        // the array has 1112064 + 1 elements (a terminating "null" value)
165c5f01b2fSopenharmony_ci        // Note: 1112064 = 0x1FFFFF code points - 2048 invalid values between
166c5f01b2fSopenharmony_ci        // 0xD800 and 0xDFFF.
167c5f01b2fSopenharmony_ci        CHECK(j.size() == 1112065);
168c5f01b2fSopenharmony_ci
169c5f01b2fSopenharmony_ci        SECTION("check JSON Pointers")
170c5f01b2fSopenharmony_ci        {
171c5f01b2fSopenharmony_ci            for (const auto& s : j)
172c5f01b2fSopenharmony_ci            {
173c5f01b2fSopenharmony_ci                // skip non-string JSON values
174c5f01b2fSopenharmony_ci                if (!s.is_string())
175c5f01b2fSopenharmony_ci                {
176c5f01b2fSopenharmony_ci                    continue;
177c5f01b2fSopenharmony_ci                }
178c5f01b2fSopenharmony_ci
179c5f01b2fSopenharmony_ci                auto ptr = s.get<std::string>();
180c5f01b2fSopenharmony_ci
181c5f01b2fSopenharmony_ci                // tilde must be followed by 0 or 1
182c5f01b2fSopenharmony_ci                if (ptr == "~")
183c5f01b2fSopenharmony_ci                {
184c5f01b2fSopenharmony_ci                    ptr += "0";
185c5f01b2fSopenharmony_ci                }
186c5f01b2fSopenharmony_ci
187c5f01b2fSopenharmony_ci                // JSON Pointers must begin with "/"
188c5f01b2fSopenharmony_ci                ptr.insert(0, "/");
189c5f01b2fSopenharmony_ci
190c5f01b2fSopenharmony_ci                CHECK_NOTHROW(json::json_pointer("/" + ptr));
191c5f01b2fSopenharmony_ci
192c5f01b2fSopenharmony_ci                // check escape/unescape roundtrip
193c5f01b2fSopenharmony_ci                auto escaped = nlohmann::detail::escape(ptr);
194c5f01b2fSopenharmony_ci                nlohmann::detail::unescape(escaped);
195c5f01b2fSopenharmony_ci                CHECK(escaped == ptr);
196c5f01b2fSopenharmony_ci            }
197c5f01b2fSopenharmony_ci        }
198c5f01b2fSopenharmony_ci    }
199c5f01b2fSopenharmony_ci
200c5f01b2fSopenharmony_ci    SECTION("ignore byte-order-mark")
201c5f01b2fSopenharmony_ci    {
202c5f01b2fSopenharmony_ci        SECTION("in a stream")
203c5f01b2fSopenharmony_ci        {
204c5f01b2fSopenharmony_ci            // read a file with a UTF-8 BOM
205c5f01b2fSopenharmony_ci            std::ifstream f(TEST_DATA_DIRECTORY "/json_nlohmann_tests/bom.json");
206c5f01b2fSopenharmony_ci            json j;
207c5f01b2fSopenharmony_ci            CHECK_NOTHROW(f >> j);
208c5f01b2fSopenharmony_ci        }
209c5f01b2fSopenharmony_ci
210c5f01b2fSopenharmony_ci        SECTION("with an iterator")
211c5f01b2fSopenharmony_ci        {
212c5f01b2fSopenharmony_ci            std::string i = "\xef\xbb\xbf{\n   \"foo\": true\n}";
213c5f01b2fSopenharmony_ci            json _;
214c5f01b2fSopenharmony_ci            CHECK_NOTHROW(_ = json::parse(i.begin(), i.end()));
215c5f01b2fSopenharmony_ci        }
216c5f01b2fSopenharmony_ci    }
217c5f01b2fSopenharmony_ci
218c5f01b2fSopenharmony_ci    SECTION("error for incomplete/wrong BOM")
219c5f01b2fSopenharmony_ci    {
220c5f01b2fSopenharmony_ci        json _;
221c5f01b2fSopenharmony_ci        CHECK_THROWS_AS(_ = json::parse("\xef\xbb"), json::parse_error&);
222c5f01b2fSopenharmony_ci        CHECK_THROWS_AS(_ = json::parse("\xef\xbb\xbb"), json::parse_error&);
223c5f01b2fSopenharmony_ci    }
224c5f01b2fSopenharmony_ci}
225c5f01b2fSopenharmony_ci
226c5f01b2fSopenharmony_cinamespace
227c5f01b2fSopenharmony_ci{
228c5f01b2fSopenharmony_civoid roundtrip(bool success_expected, const std::string& s);
229c5f01b2fSopenharmony_ci
230c5f01b2fSopenharmony_civoid roundtrip(bool success_expected, const std::string& s)
231c5f01b2fSopenharmony_ci{
232c5f01b2fSopenharmony_ci    CAPTURE(s)
233c5f01b2fSopenharmony_ci    json _;
234c5f01b2fSopenharmony_ci
235c5f01b2fSopenharmony_ci    // create JSON string value
236c5f01b2fSopenharmony_ci    json j = s;
237c5f01b2fSopenharmony_ci    // create JSON text
238c5f01b2fSopenharmony_ci    std::string ps = std::string("\"") + s + "\"";
239c5f01b2fSopenharmony_ci
240c5f01b2fSopenharmony_ci    if (success_expected)
241c5f01b2fSopenharmony_ci    {
242c5f01b2fSopenharmony_ci        // serialization succeeds
243c5f01b2fSopenharmony_ci        CHECK_NOTHROW(j.dump());
244c5f01b2fSopenharmony_ci
245c5f01b2fSopenharmony_ci        // exclude parse test for U+0000
246c5f01b2fSopenharmony_ci        if (s[0] != '\0')
247c5f01b2fSopenharmony_ci        {
248c5f01b2fSopenharmony_ci            // parsing JSON text succeeds
249c5f01b2fSopenharmony_ci            CHECK_NOTHROW(_ = json::parse(ps));
250c5f01b2fSopenharmony_ci        }
251c5f01b2fSopenharmony_ci
252c5f01b2fSopenharmony_ci        // roundtrip succeeds
253c5f01b2fSopenharmony_ci        CHECK_NOTHROW(_ = json::parse(j.dump()));
254c5f01b2fSopenharmony_ci
255c5f01b2fSopenharmony_ci        // after roundtrip, the same string is stored
256c5f01b2fSopenharmony_ci        json jr = json::parse(j.dump());
257c5f01b2fSopenharmony_ci        CHECK(jr.get<std::string>() == s);
258c5f01b2fSopenharmony_ci    }
259c5f01b2fSopenharmony_ci    else
260c5f01b2fSopenharmony_ci    {
261c5f01b2fSopenharmony_ci        // serialization fails
262c5f01b2fSopenharmony_ci        CHECK_THROWS_AS(j.dump(), json::type_error&);
263c5f01b2fSopenharmony_ci
264c5f01b2fSopenharmony_ci        // parsing JSON text fails
265c5f01b2fSopenharmony_ci        CHECK_THROWS_AS(_ = json::parse(ps), json::parse_error&);
266c5f01b2fSopenharmony_ci    }
267c5f01b2fSopenharmony_ci}
268c5f01b2fSopenharmony_ci} // namespace
269c5f01b2fSopenharmony_ci
270c5f01b2fSopenharmony_ciTEST_CASE("Markus Kuhn's UTF-8 decoder capability and stress test")
271c5f01b2fSopenharmony_ci{
272c5f01b2fSopenharmony_ci    // Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> - 2015-08-28 - CC BY 4.0
273c5f01b2fSopenharmony_ci    // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
274c5f01b2fSopenharmony_ci
275c5f01b2fSopenharmony_ci    SECTION("1  Some correct UTF-8 text")
276c5f01b2fSopenharmony_ci    {
277c5f01b2fSopenharmony_ci        roundtrip(true, "κόσμε");
278c5f01b2fSopenharmony_ci    }
279c5f01b2fSopenharmony_ci
280c5f01b2fSopenharmony_ci    SECTION("2  Boundary condition test cases")
281c5f01b2fSopenharmony_ci    {
282c5f01b2fSopenharmony_ci        SECTION("2.1  First possible sequence of a certain length")
283c5f01b2fSopenharmony_ci        {
284c5f01b2fSopenharmony_ci            // 2.1.1  1 byte  (U-00000000)
285c5f01b2fSopenharmony_ci            roundtrip(true, std::string("\0", 1));
286c5f01b2fSopenharmony_ci            // 2.1.2  2 bytes (U-00000080)
287c5f01b2fSopenharmony_ci            roundtrip(true, "\xc2\x80");
288c5f01b2fSopenharmony_ci            // 2.1.3  3 bytes (U-00000800)
289c5f01b2fSopenharmony_ci            roundtrip(true, "\xe0\xa0\x80");
290c5f01b2fSopenharmony_ci            // 2.1.4  4 bytes (U-00010000)
291c5f01b2fSopenharmony_ci            roundtrip(true, "\xf0\x90\x80\x80");
292c5f01b2fSopenharmony_ci
293c5f01b2fSopenharmony_ci            // 2.1.5  5 bytes (U-00200000)
294c5f01b2fSopenharmony_ci            roundtrip(false, "\xF8\x88\x80\x80\x80");
295c5f01b2fSopenharmony_ci            // 2.1.6  6 bytes (U-04000000)
296c5f01b2fSopenharmony_ci            roundtrip(false, "\xFC\x84\x80\x80\x80\x80");
297c5f01b2fSopenharmony_ci        }
298c5f01b2fSopenharmony_ci
299c5f01b2fSopenharmony_ci        SECTION("2.2  Last possible sequence of a certain length")
300c5f01b2fSopenharmony_ci        {
301c5f01b2fSopenharmony_ci            // 2.2.1  1 byte  (U-0000007F)
302c5f01b2fSopenharmony_ci            roundtrip(true, "\x7f");
303c5f01b2fSopenharmony_ci            // 2.2.2  2 bytes (U-000007FF)
304c5f01b2fSopenharmony_ci            roundtrip(true, "\xdf\xbf");
305c5f01b2fSopenharmony_ci            // 2.2.3  3 bytes (U-0000FFFF)
306c5f01b2fSopenharmony_ci            roundtrip(true, "\xef\xbf\xbf");
307c5f01b2fSopenharmony_ci
308c5f01b2fSopenharmony_ci            // 2.2.4  4 bytes (U-001FFFFF)
309c5f01b2fSopenharmony_ci            roundtrip(false, "\xF7\xBF\xBF\xBF");
310c5f01b2fSopenharmony_ci            // 2.2.5  5 bytes (U-03FFFFFF)
311c5f01b2fSopenharmony_ci            roundtrip(false, "\xFB\xBF\xBF\xBF\xBF");
312c5f01b2fSopenharmony_ci            // 2.2.6  6 bytes (U-7FFFFFFF)
313c5f01b2fSopenharmony_ci            roundtrip(false, "\xFD\xBF\xBF\xBF\xBF\xBF");
314c5f01b2fSopenharmony_ci        }
315c5f01b2fSopenharmony_ci
316c5f01b2fSopenharmony_ci        SECTION("2.3  Other boundary conditions")
317c5f01b2fSopenharmony_ci        {
318c5f01b2fSopenharmony_ci            // 2.3.1  U-0000D7FF = ed 9f bf
319c5f01b2fSopenharmony_ci            roundtrip(true, "\xed\x9f\xbf");
320c5f01b2fSopenharmony_ci            // 2.3.2  U-0000E000 = ee 80 80
321c5f01b2fSopenharmony_ci            roundtrip(true, "\xee\x80\x80");
322c5f01b2fSopenharmony_ci            // 2.3.3  U-0000FFFD = ef bf bd
323c5f01b2fSopenharmony_ci            roundtrip(true, "\xef\xbf\xbd");
324c5f01b2fSopenharmony_ci            // 2.3.4  U-0010FFFF = f4 8f bf bf
325c5f01b2fSopenharmony_ci            roundtrip(true, "\xf4\x8f\xbf\xbf");
326c5f01b2fSopenharmony_ci
327c5f01b2fSopenharmony_ci            // 2.3.5  U-00110000 = f4 90 80 80
328c5f01b2fSopenharmony_ci            roundtrip(false, "\xf4\x90\x80\x80");
329c5f01b2fSopenharmony_ci        }
330c5f01b2fSopenharmony_ci    }
331c5f01b2fSopenharmony_ci
332c5f01b2fSopenharmony_ci    SECTION("3  Malformed sequences")
333c5f01b2fSopenharmony_ci    {
334c5f01b2fSopenharmony_ci        SECTION("3.1  Unexpected continuation bytes")
335c5f01b2fSopenharmony_ci        {
336c5f01b2fSopenharmony_ci            // Each unexpected continuation byte should be separately signalled as a
337c5f01b2fSopenharmony_ci            // malformed sequence of its own.
338c5f01b2fSopenharmony_ci
339c5f01b2fSopenharmony_ci            // 3.1.1  First continuation byte 0x80
340c5f01b2fSopenharmony_ci            roundtrip(false, "\x80");
341c5f01b2fSopenharmony_ci            // 3.1.2  Last  continuation byte 0xbf
342c5f01b2fSopenharmony_ci            roundtrip(false, "\xbf");
343c5f01b2fSopenharmony_ci
344c5f01b2fSopenharmony_ci            // 3.1.3  2 continuation bytes
345c5f01b2fSopenharmony_ci            roundtrip(false, "\x80\xbf");
346c5f01b2fSopenharmony_ci            // 3.1.4  3 continuation bytes
347c5f01b2fSopenharmony_ci            roundtrip(false, "\x80\xbf\x80");
348c5f01b2fSopenharmony_ci            // 3.1.5  4 continuation bytes
349c5f01b2fSopenharmony_ci            roundtrip(false, "\x80\xbf\x80\xbf");
350c5f01b2fSopenharmony_ci            // 3.1.6  5 continuation bytes
351c5f01b2fSopenharmony_ci            roundtrip(false, "\x80\xbf\x80\xbf\x80");
352c5f01b2fSopenharmony_ci            // 3.1.7  6 continuation bytes
353c5f01b2fSopenharmony_ci            roundtrip(false, "\x80\xbf\x80\xbf\x80\xbf");
354c5f01b2fSopenharmony_ci            // 3.1.8  7 continuation bytes
355c5f01b2fSopenharmony_ci            roundtrip(false, "\x80\xbf\x80\xbf\x80\xbf\x80");
356c5f01b2fSopenharmony_ci
357c5f01b2fSopenharmony_ci            // 3.1.9  Sequence of all 64 possible continuation bytes (0x80-0xbf)
358c5f01b2fSopenharmony_ci            roundtrip(false, "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf");
359c5f01b2fSopenharmony_ci        }
360c5f01b2fSopenharmony_ci
361c5f01b2fSopenharmony_ci        SECTION("3.2  Lonely start characters")
362c5f01b2fSopenharmony_ci        {
363c5f01b2fSopenharmony_ci            // 3.2.1  All 32 first bytes of 2-byte sequences (0xc0-0xdf)
364c5f01b2fSopenharmony_ci            roundtrip(false, "\xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf");
365c5f01b2fSopenharmony_ci            // 3.2.2  All 16 first bytes of 3-byte sequences (0xe0-0xef)
366c5f01b2fSopenharmony_ci            roundtrip(false, "\xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef");
367c5f01b2fSopenharmony_ci            // 3.2.3  All 8 first bytes of 4-byte sequences (0xf0-0xf7)
368c5f01b2fSopenharmony_ci            roundtrip(false, "\xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7");
369c5f01b2fSopenharmony_ci            // 3.2.4  All 4 first bytes of 5-byte sequences (0xf8-0xfb)
370c5f01b2fSopenharmony_ci            roundtrip(false, "\xf8 \xf9 \xfa \xfb");
371c5f01b2fSopenharmony_ci            // 3.2.5  All 2 first bytes of 6-byte sequences (0xfc-0xfd)
372c5f01b2fSopenharmony_ci            roundtrip(false, "\xfc \xfd");
373c5f01b2fSopenharmony_ci        }
374c5f01b2fSopenharmony_ci
375c5f01b2fSopenharmony_ci        SECTION("3.3  Sequences with last continuation byte missing")
376c5f01b2fSopenharmony_ci        {
377c5f01b2fSopenharmony_ci            // All bytes of an incomplete sequence should be signalled as a single
378c5f01b2fSopenharmony_ci            // malformed sequence, i.e., you should see only a single replacement
379c5f01b2fSopenharmony_ci            // character in each of the next 10 tests. (Characters as in section 2)
380c5f01b2fSopenharmony_ci
381c5f01b2fSopenharmony_ci            // 3.3.1  2-byte sequence with last byte missing (U+0000)
382c5f01b2fSopenharmony_ci            roundtrip(false, "\xc0");
383c5f01b2fSopenharmony_ci            // 3.3.2  3-byte sequence with last byte missing (U+0000)
384c5f01b2fSopenharmony_ci            roundtrip(false, "\xe0\x80");
385c5f01b2fSopenharmony_ci            // 3.3.3  4-byte sequence with last byte missing (U+0000)
386c5f01b2fSopenharmony_ci            roundtrip(false, "\xf0\x80\x80");
387c5f01b2fSopenharmony_ci            // 3.3.4  5-byte sequence with last byte missing (U+0000)
388c5f01b2fSopenharmony_ci            roundtrip(false, "\xf8\x80\x80\x80");
389c5f01b2fSopenharmony_ci            // 3.3.5  6-byte sequence with last byte missing (U+0000)
390c5f01b2fSopenharmony_ci            roundtrip(false, "\xfc\x80\x80\x80\x80");
391c5f01b2fSopenharmony_ci            // 3.3.6  2-byte sequence with last byte missing (U-000007FF)
392c5f01b2fSopenharmony_ci            roundtrip(false, "\xdf");
393c5f01b2fSopenharmony_ci            // 3.3.7  3-byte sequence with last byte missing (U-0000FFFF)
394c5f01b2fSopenharmony_ci            roundtrip(false, "\xef\xbf");
395c5f01b2fSopenharmony_ci            // 3.3.8  4-byte sequence with last byte missing (U-001FFFFF)
396c5f01b2fSopenharmony_ci            roundtrip(false, "\xf7\xbf\xbf");
397c5f01b2fSopenharmony_ci            // 3.3.9  5-byte sequence with last byte missing (U-03FFFFFF)
398c5f01b2fSopenharmony_ci            roundtrip(false, "\xfb\xbf\xbf\xbf");
399c5f01b2fSopenharmony_ci            // 3.3.10 6-byte sequence with last byte missing (U-7FFFFFFF)
400c5f01b2fSopenharmony_ci            roundtrip(false, "\xfd\xbf\xbf\xbf\xbf");
401c5f01b2fSopenharmony_ci        }
402c5f01b2fSopenharmony_ci
403c5f01b2fSopenharmony_ci        SECTION("3.4  Concatenation of incomplete sequences")
404c5f01b2fSopenharmony_ci        {
405c5f01b2fSopenharmony_ci            // All the 10 sequences of 3.3 concatenated, you should see 10 malformed
406c5f01b2fSopenharmony_ci            // sequences being signalled:
407c5f01b2fSopenharmony_ci            roundtrip(false, "\xc0\xe0\x80\xf0\x80\x80\xf8\x80\x80\x80\xfc\x80\x80\x80\x80\xdf\xef\xbf\xf7\xbf\xbf\xfb\xbf\xbf\xbf\xfd\xbf\xbf\xbf\xbf");
408c5f01b2fSopenharmony_ci        }
409c5f01b2fSopenharmony_ci
410c5f01b2fSopenharmony_ci        SECTION("3.5  Impossible bytes")
411c5f01b2fSopenharmony_ci        {
412c5f01b2fSopenharmony_ci            // The following two bytes cannot appear in a correct UTF-8 string
413c5f01b2fSopenharmony_ci
414c5f01b2fSopenharmony_ci            // 3.5.1  fe
415c5f01b2fSopenharmony_ci            roundtrip(false, "\xfe");
416c5f01b2fSopenharmony_ci            // 3.5.2  ff
417c5f01b2fSopenharmony_ci            roundtrip(false, "\xff");
418c5f01b2fSopenharmony_ci            // 3.5.3  fe fe ff ff
419c5f01b2fSopenharmony_ci            roundtrip(false, "\xfe\xfe\xff\xff");
420c5f01b2fSopenharmony_ci        }
421c5f01b2fSopenharmony_ci    }
422c5f01b2fSopenharmony_ci
423c5f01b2fSopenharmony_ci    SECTION("4  Overlong sequences")
424c5f01b2fSopenharmony_ci    {
425c5f01b2fSopenharmony_ci        // The following sequences are not malformed according to the letter of
426c5f01b2fSopenharmony_ci        // the Unicode 2.0 standard. However, they are longer then necessary and
427c5f01b2fSopenharmony_ci        // a correct UTF-8 encoder is not allowed to produce them. A "safe UTF-8
428c5f01b2fSopenharmony_ci        // decoder" should reject them just like malformed sequences for two
429c5f01b2fSopenharmony_ci        // reasons: (1) It helps to debug applications if overlong sequences are
430c5f01b2fSopenharmony_ci        // not treated as valid representations of characters, because this helps
431c5f01b2fSopenharmony_ci        // to spot problems more quickly. (2) Overlong sequences provide
432c5f01b2fSopenharmony_ci        // alternative representations of characters, that could maliciously be
433c5f01b2fSopenharmony_ci        // used to bypass filters that check only for ASCII characters. For
434c5f01b2fSopenharmony_ci        // instance, a 2-byte encoded line feed (LF) would not be caught by a
435c5f01b2fSopenharmony_ci        // line counter that counts only 0x0a bytes, but it would still be
436c5f01b2fSopenharmony_ci        // processed as a line feed by an unsafe UTF-8 decoder later in the
437c5f01b2fSopenharmony_ci        // pipeline. From a security point of view, ASCII compatibility of UTF-8
438c5f01b2fSopenharmony_ci        // sequences means also, that ASCII characters are *only* allowed to be
439c5f01b2fSopenharmony_ci        // represented by ASCII bytes in the range 0x00-0x7f. To ensure this
440c5f01b2fSopenharmony_ci        // aspect of ASCII compatibility, use only "safe UTF-8 decoders" that
441c5f01b2fSopenharmony_ci        // reject overlong UTF-8 sequences for which a shorter encoding exists.
442c5f01b2fSopenharmony_ci
443c5f01b2fSopenharmony_ci        SECTION("4.1  Examples of an overlong ASCII character")
444c5f01b2fSopenharmony_ci        {
445c5f01b2fSopenharmony_ci            // With a safe UTF-8 decoder, all of the following five overlong
446c5f01b2fSopenharmony_ci            // representations of the ASCII character slash ("/") should be rejected
447c5f01b2fSopenharmony_ci            // like a malformed UTF-8 sequence, for instance by substituting it with
448c5f01b2fSopenharmony_ci            // a replacement character. If you see a slash below, you do not have a
449c5f01b2fSopenharmony_ci            // safe UTF-8 decoder!
450c5f01b2fSopenharmony_ci
451c5f01b2fSopenharmony_ci            // 4.1.1 U+002F = c0 af
452c5f01b2fSopenharmony_ci            roundtrip(false, "\xc0\xaf");
453c5f01b2fSopenharmony_ci            // 4.1.2 U+002F = e0 80 af
454c5f01b2fSopenharmony_ci            roundtrip(false, "\xe0\x80\xaf");
455c5f01b2fSopenharmony_ci            // 4.1.3 U+002F = f0 80 80 af
456c5f01b2fSopenharmony_ci            roundtrip(false, "\xf0\x80\x80\xaf");
457c5f01b2fSopenharmony_ci            // 4.1.4 U+002F = f8 80 80 80 af
458c5f01b2fSopenharmony_ci            roundtrip(false, "\xf8\x80\x80\x80\xaf");
459c5f01b2fSopenharmony_ci            // 4.1.5 U+002F = fc 80 80 80 80 af
460c5f01b2fSopenharmony_ci            roundtrip(false, "\xfc\x80\x80\x80\x80\xaf");
461c5f01b2fSopenharmony_ci        }
462c5f01b2fSopenharmony_ci
463c5f01b2fSopenharmony_ci        SECTION("4.2  Maximum overlong sequences")
464c5f01b2fSopenharmony_ci        {
465c5f01b2fSopenharmony_ci            // Below you see the highest Unicode value that is still resulting in an
466c5f01b2fSopenharmony_ci            // overlong sequence if represented with the given number of bytes. This
467c5f01b2fSopenharmony_ci            // is a boundary test for safe UTF-8 decoders. All five characters should
468c5f01b2fSopenharmony_ci            // be rejected like malformed UTF-8 sequences.
469c5f01b2fSopenharmony_ci
470c5f01b2fSopenharmony_ci            // 4.2.1  U-0000007F = c1 bf
471c5f01b2fSopenharmony_ci            roundtrip(false, "\xc1\xbf");
472c5f01b2fSopenharmony_ci            // 4.2.2  U-000007FF = e0 9f bf
473c5f01b2fSopenharmony_ci            roundtrip(false, "\xe0\x9f\xbf");
474c5f01b2fSopenharmony_ci            // 4.2.3  U-0000FFFF = f0 8f bf bf
475c5f01b2fSopenharmony_ci            roundtrip(false, "\xf0\x8f\xbf\xbf");
476c5f01b2fSopenharmony_ci            // 4.2.4  U-001FFFFF = f8 87 bf bf bf
477c5f01b2fSopenharmony_ci            roundtrip(false, "\xf8\x87\xbf\xbf\xbf");
478c5f01b2fSopenharmony_ci            // 4.2.5  U-03FFFFFF = fc 83 bf bf bf bf
479c5f01b2fSopenharmony_ci            roundtrip(false, "\xfc\x83\xbf\xbf\xbf\xbf");
480c5f01b2fSopenharmony_ci        }
481c5f01b2fSopenharmony_ci
482c5f01b2fSopenharmony_ci        SECTION("4.3  Overlong representation of the NUL character")
483c5f01b2fSopenharmony_ci        {
484c5f01b2fSopenharmony_ci            // The following five sequences should also be rejected like malformed
485c5f01b2fSopenharmony_ci            // UTF-8 sequences and should not be treated like the ASCII NUL
486c5f01b2fSopenharmony_ci            // character.
487c5f01b2fSopenharmony_ci
488c5f01b2fSopenharmony_ci            // 4.3.1  U+0000 = c0 80
489c5f01b2fSopenharmony_ci            roundtrip(false, "\xc0\x80");
490c5f01b2fSopenharmony_ci            // 4.3.2  U+0000 = e0 80 80
491c5f01b2fSopenharmony_ci            roundtrip(false, "\xe0\x80\x80");
492c5f01b2fSopenharmony_ci            // 4.3.3  U+0000 = f0 80 80 80
493c5f01b2fSopenharmony_ci            roundtrip(false, "\xf0\x80\x80\x80");
494c5f01b2fSopenharmony_ci            // 4.3.4  U+0000 = f8 80 80 80 80
495c5f01b2fSopenharmony_ci            roundtrip(false, "\xf8\x80\x80\x80\x80");
496c5f01b2fSopenharmony_ci            // 4.3.5  U+0000 = fc 80 80 80 80 80
497c5f01b2fSopenharmony_ci            roundtrip(false, "\xfc\x80\x80\x80\x80\x80");
498c5f01b2fSopenharmony_ci        }
499c5f01b2fSopenharmony_ci    }
500c5f01b2fSopenharmony_ci
501c5f01b2fSopenharmony_ci    SECTION("5  Illegal code positions")
502c5f01b2fSopenharmony_ci    {
503c5f01b2fSopenharmony_ci        // The following UTF-8 sequences should be rejected like malformed
504c5f01b2fSopenharmony_ci        // sequences, because they never represent valid ISO 10646 characters and
505c5f01b2fSopenharmony_ci        // a UTF-8 decoder that accepts them might introduce security problems
506c5f01b2fSopenharmony_ci        // comparable to overlong UTF-8 sequences.
507c5f01b2fSopenharmony_ci
508c5f01b2fSopenharmony_ci        SECTION("5.1 Single UTF-16 surrogates")
509c5f01b2fSopenharmony_ci        {
510c5f01b2fSopenharmony_ci            // 5.1.1  U+D800 = ed a0 80
511c5f01b2fSopenharmony_ci            roundtrip(false, "\xed\xa0\x80");
512c5f01b2fSopenharmony_ci            // 5.1.2  U+DB7F = ed ad bf
513c5f01b2fSopenharmony_ci            roundtrip(false, "\xed\xad\xbf");
514c5f01b2fSopenharmony_ci            // 5.1.3  U+DB80 = ed ae 80
515c5f01b2fSopenharmony_ci            roundtrip(false, "\xed\xae\x80");
516c5f01b2fSopenharmony_ci            // 5.1.4  U+DBFF = ed af bf
517c5f01b2fSopenharmony_ci            roundtrip(false, "\xed\xaf\xbf");
518c5f01b2fSopenharmony_ci            // 5.1.5  U+DC00 = ed b0 80
519c5f01b2fSopenharmony_ci            roundtrip(false, "\xed\xb0\x80");
520c5f01b2fSopenharmony_ci            // 5.1.6  U+DF80 = ed be 80
521c5f01b2fSopenharmony_ci            roundtrip(false, "\xed\xbe\x80");
522c5f01b2fSopenharmony_ci            // 5.1.7  U+DFFF = ed bf bf
523c5f01b2fSopenharmony_ci            roundtrip(false, "\xed\xbf\xbf");
524c5f01b2fSopenharmony_ci        }
525c5f01b2fSopenharmony_ci
526c5f01b2fSopenharmony_ci        SECTION("5.2 Paired UTF-16 surrogates")
527c5f01b2fSopenharmony_ci        {
528c5f01b2fSopenharmony_ci            // 5.2.1  U+D800 U+DC00 = ed a0 80 ed b0 80
529c5f01b2fSopenharmony_ci            roundtrip(false, "\xed\xa0\x80\xed\xb0\x80");
530c5f01b2fSopenharmony_ci            // 5.2.2  U+D800 U+DFFF = ed a0 80 ed bf bf
531c5f01b2fSopenharmony_ci            roundtrip(false, "\xed\xa0\x80\xed\xbf\xbf");
532c5f01b2fSopenharmony_ci            // 5.2.3  U+DB7F U+DC00 = ed ad bf ed b0 80
533c5f01b2fSopenharmony_ci            roundtrip(false, "\xed\xad\xbf\xed\xb0\x80");
534c5f01b2fSopenharmony_ci            // 5.2.4  U+DB7F U+DFFF = ed ad bf ed bf bf
535c5f01b2fSopenharmony_ci            roundtrip(false, "\xed\xad\xbf\xed\xbf\xbf");
536c5f01b2fSopenharmony_ci            // 5.2.5  U+DB80 U+DC00 = ed ae 80 ed b0 80
537c5f01b2fSopenharmony_ci            roundtrip(false, "\xed\xae\x80\xed\xb0\x80");
538c5f01b2fSopenharmony_ci            // 5.2.6  U+DB80 U+DFFF = ed ae 80 ed bf bf
539c5f01b2fSopenharmony_ci            roundtrip(false, "\xed\xae\x80\xed\xbf\xbf");
540c5f01b2fSopenharmony_ci            // 5.2.7  U+DBFF U+DC00 = ed af bf ed b0 80
541c5f01b2fSopenharmony_ci            roundtrip(false, "\xed\xaf\xbf\xed\xb0\x80");
542c5f01b2fSopenharmony_ci            // 5.2.8  U+DBFF U+DFFF = ed af bf ed bf bf
543c5f01b2fSopenharmony_ci            roundtrip(false, "\xed\xaf\xbf\xed\xbf\xbf");
544c5f01b2fSopenharmony_ci        }
545c5f01b2fSopenharmony_ci
546c5f01b2fSopenharmony_ci        SECTION("5.3 Noncharacter code positions")
547c5f01b2fSopenharmony_ci        {
548c5f01b2fSopenharmony_ci            // The following "noncharacters" are "reserved for internal use" by
549c5f01b2fSopenharmony_ci            // applications, and according to older versions of the Unicode Standard
550c5f01b2fSopenharmony_ci            // "should never be interchanged". Unicode Corrigendum #9 dropped the
551c5f01b2fSopenharmony_ci            // latter restriction. Nevertheless, their presence in incoming UTF-8 data
552c5f01b2fSopenharmony_ci            // can remain a potential security risk, depending on what use is made of
553c5f01b2fSopenharmony_ci            // these codes subsequently. Examples of such internal use:
554c5f01b2fSopenharmony_ci            //
555c5f01b2fSopenharmony_ci            //  - Some file APIs with 16-bit characters may use the integer value -1
556c5f01b2fSopenharmony_ci            //    = U+FFFF to signal an end-of-file (EOF) or error condition.
557c5f01b2fSopenharmony_ci            //
558c5f01b2fSopenharmony_ci            //  - In some UTF-16 receivers, code point U+FFFE might trigger a
559c5f01b2fSopenharmony_ci            //    byte-swap operation (to convert between UTF-16LE and UTF-16BE).
560c5f01b2fSopenharmony_ci            //
561c5f01b2fSopenharmony_ci            // With such internal use of noncharacters, it may be desirable and safer
562c5f01b2fSopenharmony_ci            // to block those code points in UTF-8 decoders, as they should never
563c5f01b2fSopenharmony_ci            // occur legitimately in incoming UTF-8 data, and could trigger unsafe
564c5f01b2fSopenharmony_ci            // behaviour in subsequent processing.
565c5f01b2fSopenharmony_ci
566c5f01b2fSopenharmony_ci            // Particularly problematic noncharacters in 16-bit applications:
567c5f01b2fSopenharmony_ci
568c5f01b2fSopenharmony_ci            // 5.3.1  U+FFFE = ef bf be
569c5f01b2fSopenharmony_ci            roundtrip(true, "\xef\xbf\xbe");
570c5f01b2fSopenharmony_ci            // 5.3.2  U+FFFF = ef bf bf
571c5f01b2fSopenharmony_ci            roundtrip(true, "\xef\xbf\xbf");
572c5f01b2fSopenharmony_ci
573c5f01b2fSopenharmony_ci            // 5.3.3  U+FDD0 .. U+FDEF
574c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\x90");
575c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\x91");
576c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\x92");
577c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\x93");
578c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\x94");
579c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\x95");
580c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\x96");
581c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\x97");
582c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\x98");
583c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\x99");
584c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\x9A");
585c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\x9B");
586c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\x9C");
587c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\x9D");
588c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\x9E");
589c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\x9F");
590c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\xA0");
591c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\xA1");
592c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\xA2");
593c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\xA3");
594c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\xA4");
595c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\xA5");
596c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\xA6");
597c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\xA7");
598c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\xA8");
599c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\xA9");
600c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\xAA");
601c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\xAB");
602c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\xAC");
603c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\xAD");
604c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\xAE");
605c5f01b2fSopenharmony_ci            roundtrip(true, "\xEF\xB7\xAF");
606c5f01b2fSopenharmony_ci
607c5f01b2fSopenharmony_ci            // 5.3.4  U+nFFFE U+nFFFF (for n = 1..10)
608c5f01b2fSopenharmony_ci            roundtrip(true, "\xF0\x9F\xBF\xBF");
609c5f01b2fSopenharmony_ci            roundtrip(true, "\xF0\xAF\xBF\xBF");
610c5f01b2fSopenharmony_ci            roundtrip(true, "\xF0\xBF\xBF\xBF");
611c5f01b2fSopenharmony_ci            roundtrip(true, "\xF1\x8F\xBF\xBF");
612c5f01b2fSopenharmony_ci            roundtrip(true, "\xF1\x9F\xBF\xBF");
613c5f01b2fSopenharmony_ci            roundtrip(true, "\xF1\xAF\xBF\xBF");
614c5f01b2fSopenharmony_ci            roundtrip(true, "\xF1\xBF\xBF\xBF");
615c5f01b2fSopenharmony_ci            roundtrip(true, "\xF2\x8F\xBF\xBF");
616c5f01b2fSopenharmony_ci            roundtrip(true, "\xF2\x9F\xBF\xBF");
617c5f01b2fSopenharmony_ci            roundtrip(true, "\xF2\xAF\xBF\xBF");
618c5f01b2fSopenharmony_ci        }
619c5f01b2fSopenharmony_ci    }
620c5f01b2fSopenharmony_ci}
621