1c5f01b2fSopenharmony_ci// __ _____ _____ _____ 2c5f01b2fSopenharmony_ci// __| | __| | | | JSON for Modern C++ (supporting code) 3c5f01b2fSopenharmony_ci// | | |__ | | | | | | version 3.11.2 4c5f01b2fSopenharmony_ci// |_____|_____|_____|_|___| https://github.com/nlohmann/json 5c5f01b2fSopenharmony_ci// 6c5f01b2fSopenharmony_ci// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me> 7c5f01b2fSopenharmony_ci// SPDX-License-Identifier: MIT 8c5f01b2fSopenharmony_ci 9c5f01b2fSopenharmony_ci#include "doctest_compatibility.h" 10c5f01b2fSopenharmony_ci 11c5f01b2fSopenharmony_ci// for some reason including this after the json header leads to linker errors with VS 2017... 12c5f01b2fSopenharmony_ci#include <locale> 13c5f01b2fSopenharmony_ci#include <nlohmann/json.hpp> 14c5f01b2fSopenharmony_ciusing nlohmann::json; 15c5f01b2fSopenharmony_ci 16c5f01b2fSopenharmony_ci#include <fstream> 17c5f01b2fSopenharmony_ci#include <sstream> 18c5f01b2fSopenharmony_ci#include <iomanip> 19c5f01b2fSopenharmony_ci#include "make_test_data_available.hpp" 20c5f01b2fSopenharmony_ci 21c5f01b2fSopenharmony_ciTEST_CASE("Unicode (1/5)" * doctest::skip()) 22c5f01b2fSopenharmony_ci{ 23c5f01b2fSopenharmony_ci SECTION("\\uxxxx sequences") 24c5f01b2fSopenharmony_ci { 25c5f01b2fSopenharmony_ci // create an escaped string from a code point 26c5f01b2fSopenharmony_ci const auto codepoint_to_unicode = [](std::size_t cp) 27c5f01b2fSopenharmony_ci { 28c5f01b2fSopenharmony_ci // code points are represented as a six-character sequence: a 29c5f01b2fSopenharmony_ci // reverse solidus, followed by the lowercase letter u, followed 30c5f01b2fSopenharmony_ci // by four hexadecimal digits that encode the character's code 31c5f01b2fSopenharmony_ci // point 32c5f01b2fSopenharmony_ci std::stringstream ss; 33c5f01b2fSopenharmony_ci ss << "\\u" << std::setw(4) << std::setfill('0') << std::hex << cp; 34c5f01b2fSopenharmony_ci return ss.str(); 35c5f01b2fSopenharmony_ci }; 36c5f01b2fSopenharmony_ci 37c5f01b2fSopenharmony_ci SECTION("correct sequences") 38c5f01b2fSopenharmony_ci { 39c5f01b2fSopenharmony_ci // generate all UTF-8 code points; in total, 1112064 code points are 40c5f01b2fSopenharmony_ci // generated: 0x1FFFFF code points - 2048 invalid values between 41c5f01b2fSopenharmony_ci // 0xD800 and 0xDFFF. 42c5f01b2fSopenharmony_ci for (std::size_t cp = 0; cp <= 0x10FFFFu; ++cp) 43c5f01b2fSopenharmony_ci { 44c5f01b2fSopenharmony_ci // string to store the code point as in \uxxxx format 45c5f01b2fSopenharmony_ci std::string json_text = "\""; 46c5f01b2fSopenharmony_ci 47c5f01b2fSopenharmony_ci // decide whether to use one or two \uxxxx sequences 48c5f01b2fSopenharmony_ci if (cp < 0x10000u) 49c5f01b2fSopenharmony_ci { 50c5f01b2fSopenharmony_ci // The Unicode standard permanently reserves these code point 51c5f01b2fSopenharmony_ci // values for UTF-16 encoding of the high and low surrogates, and 52c5f01b2fSopenharmony_ci // they will never be assigned a character, so there should be no 53c5f01b2fSopenharmony_ci // reason to encode them. The official Unicode standard says that 54c5f01b2fSopenharmony_ci // no UTF forms, including UTF-16, can encode these code points. 55c5f01b2fSopenharmony_ci if (cp >= 0xD800u && cp <= 0xDFFFu) 56c5f01b2fSopenharmony_ci { 57c5f01b2fSopenharmony_ci // if we would not skip these code points, we would get a 58c5f01b2fSopenharmony_ci // "missing low surrogate" exception 59c5f01b2fSopenharmony_ci continue; 60c5f01b2fSopenharmony_ci } 61c5f01b2fSopenharmony_ci 62c5f01b2fSopenharmony_ci // code points in the Basic Multilingual Plane can be 63c5f01b2fSopenharmony_ci // represented with one \uxxxx sequence 64c5f01b2fSopenharmony_ci json_text += codepoint_to_unicode(cp); 65c5f01b2fSopenharmony_ci } 66c5f01b2fSopenharmony_ci else 67c5f01b2fSopenharmony_ci { 68c5f01b2fSopenharmony_ci // To escape an extended character that is not in the Basic 69c5f01b2fSopenharmony_ci // Multilingual Plane, the character is represented as a 70c5f01b2fSopenharmony_ci // 12-character sequence, encoding the UTF-16 surrogate pair 71c5f01b2fSopenharmony_ci const auto codepoint1 = 0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu); 72c5f01b2fSopenharmony_ci const auto codepoint2 = 0xdc00u + ((cp - 0x10000u) & 0x3ffu); 73c5f01b2fSopenharmony_ci json_text += codepoint_to_unicode(codepoint1) + codepoint_to_unicode(codepoint2); 74c5f01b2fSopenharmony_ci } 75c5f01b2fSopenharmony_ci 76c5f01b2fSopenharmony_ci json_text += "\""; 77c5f01b2fSopenharmony_ci CAPTURE(json_text) 78c5f01b2fSopenharmony_ci json _; 79c5f01b2fSopenharmony_ci CHECK_NOTHROW(_ = json::parse(json_text)); 80c5f01b2fSopenharmony_ci } 81c5f01b2fSopenharmony_ci } 82c5f01b2fSopenharmony_ci 83c5f01b2fSopenharmony_ci SECTION("incorrect sequences") 84c5f01b2fSopenharmony_ci { 85c5f01b2fSopenharmony_ci SECTION("incorrect surrogate values") 86c5f01b2fSopenharmony_ci { 87c5f01b2fSopenharmony_ci json _; 88c5f01b2fSopenharmony_ci 89c5f01b2fSopenharmony_ci CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uDC00\\uDC00\""), "[json.exception.parse_error.101] parse error at line 1, column 7: syntax error while parsing value - invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF; last read: '\"\\uDC00'", json::parse_error&); 90c5f01b2fSopenharmony_ci 91c5f01b2fSopenharmony_ci CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD7FF\\uDC00\""), "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF; last read: '\"\\uD7FF\\uDC00'", json::parse_error&); 92c5f01b2fSopenharmony_ci 93c5f01b2fSopenharmony_ci CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD800]\""), "[json.exception.parse_error.101] parse error at line 1, column 8: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD800]'", json::parse_error&); 94c5f01b2fSopenharmony_ci 95c5f01b2fSopenharmony_ci CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD800\\v\""), "[json.exception.parse_error.101] parse error at line 1, column 9: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD800\\v'", json::parse_error&); 96c5f01b2fSopenharmony_ci 97c5f01b2fSopenharmony_ci CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD800\\u123\""), "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: '\\u' must be followed by 4 hex digits; last read: '\"\\uD800\\u123\"'", json::parse_error&); 98c5f01b2fSopenharmony_ci 99c5f01b2fSopenharmony_ci CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD800\\uDBFF\""), "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD800\\uDBFF'", json::parse_error&); 100c5f01b2fSopenharmony_ci 101c5f01b2fSopenharmony_ci CHECK_THROWS_WITH_AS(_ = json::parse("\"\\uD800\\uE000\""), "[json.exception.parse_error.101] parse error at line 1, column 13: syntax error while parsing value - invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF; last read: '\"\\uD800\\uE000'", json::parse_error&); 102c5f01b2fSopenharmony_ci } 103c5f01b2fSopenharmony_ci } 104c5f01b2fSopenharmony_ci 105c5f01b2fSopenharmony_ci#if 0 106c5f01b2fSopenharmony_ci SECTION("incorrect sequences") 107c5f01b2fSopenharmony_ci { 108c5f01b2fSopenharmony_ci SECTION("high surrogate without low surrogate") 109c5f01b2fSopenharmony_ci { 110c5f01b2fSopenharmony_ci // D800..DBFF are high surrogates and must be followed by low 111c5f01b2fSopenharmony_ci // surrogates DC00..DFFF; here, nothing follows 112c5f01b2fSopenharmony_ci for (std::size_t cp = 0xD800u; cp <= 0xDBFFu; ++cp) 113c5f01b2fSopenharmony_ci { 114c5f01b2fSopenharmony_ci std::string json_text = "\"" + codepoint_to_unicode(cp) + "\""; 115c5f01b2fSopenharmony_ci CAPTURE(json_text) 116c5f01b2fSopenharmony_ci CHECK_THROWS_AS(json::parse(json_text), json::parse_error&); 117c5f01b2fSopenharmony_ci } 118c5f01b2fSopenharmony_ci } 119c5f01b2fSopenharmony_ci 120c5f01b2fSopenharmony_ci SECTION("high surrogate with wrong low surrogate") 121c5f01b2fSopenharmony_ci { 122c5f01b2fSopenharmony_ci // D800..DBFF are high surrogates and must be followed by low 123c5f01b2fSopenharmony_ci // surrogates DC00..DFFF; here a different sequence follows 124c5f01b2fSopenharmony_ci for (std::size_t cp1 = 0xD800u; cp1 <= 0xDBFFu; ++cp1) 125c5f01b2fSopenharmony_ci { 126c5f01b2fSopenharmony_ci for (std::size_t cp2 = 0x0000u; cp2 <= 0xFFFFu; ++cp2) 127c5f01b2fSopenharmony_ci { 128c5f01b2fSopenharmony_ci if (0xDC00u <= cp2 && cp2 <= 0xDFFFu) 129c5f01b2fSopenharmony_ci { 130c5f01b2fSopenharmony_ci continue; 131c5f01b2fSopenharmony_ci } 132c5f01b2fSopenharmony_ci 133c5f01b2fSopenharmony_ci std::string json_text = "\"" + codepoint_to_unicode(cp1) + codepoint_to_unicode(cp2) + "\""; 134c5f01b2fSopenharmony_ci CAPTURE(json_text) 135c5f01b2fSopenharmony_ci CHECK_THROWS_AS(json::parse(json_text), json::parse_error&); 136c5f01b2fSopenharmony_ci } 137c5f01b2fSopenharmony_ci } 138c5f01b2fSopenharmony_ci } 139c5f01b2fSopenharmony_ci 140c5f01b2fSopenharmony_ci SECTION("low surrogate without high surrogate") 141c5f01b2fSopenharmony_ci { 142c5f01b2fSopenharmony_ci // low surrogates DC00..DFFF must follow high surrogates; here, 143c5f01b2fSopenharmony_ci // they occur alone 144c5f01b2fSopenharmony_ci for (std::size_t cp = 0xDC00u; cp <= 0xDFFFu; ++cp) 145c5f01b2fSopenharmony_ci { 146c5f01b2fSopenharmony_ci std::string json_text = "\"" + codepoint_to_unicode(cp) + "\""; 147c5f01b2fSopenharmony_ci CAPTURE(json_text) 148c5f01b2fSopenharmony_ci CHECK_THROWS_AS(json::parse(json_text), json::parse_error&); 149c5f01b2fSopenharmony_ci } 150c5f01b2fSopenharmony_ci } 151c5f01b2fSopenharmony_ci 152c5f01b2fSopenharmony_ci } 153c5f01b2fSopenharmony_ci#endif 154c5f01b2fSopenharmony_ci } 155c5f01b2fSopenharmony_ci 156c5f01b2fSopenharmony_ci SECTION("read all unicode characters") 157c5f01b2fSopenharmony_ci { 158c5f01b2fSopenharmony_ci // read a file with all unicode characters stored as single-character 159c5f01b2fSopenharmony_ci // strings in a JSON array 160c5f01b2fSopenharmony_ci std::ifstream f(TEST_DATA_DIRECTORY "/json_nlohmann_tests/all_unicode.json"); 161c5f01b2fSopenharmony_ci json j; 162c5f01b2fSopenharmony_ci CHECK_NOTHROW(f >> j); 163c5f01b2fSopenharmony_ci 164c5f01b2fSopenharmony_ci // the array has 1112064 + 1 elements (a terminating "null" value) 165c5f01b2fSopenharmony_ci // Note: 1112064 = 0x1FFFFF code points - 2048 invalid values between 166c5f01b2fSopenharmony_ci // 0xD800 and 0xDFFF. 167c5f01b2fSopenharmony_ci CHECK(j.size() == 1112065); 168c5f01b2fSopenharmony_ci 169c5f01b2fSopenharmony_ci SECTION("check JSON Pointers") 170c5f01b2fSopenharmony_ci { 171c5f01b2fSopenharmony_ci for (const auto& s : j) 172c5f01b2fSopenharmony_ci { 173c5f01b2fSopenharmony_ci // skip non-string JSON values 174c5f01b2fSopenharmony_ci if (!s.is_string()) 175c5f01b2fSopenharmony_ci { 176c5f01b2fSopenharmony_ci continue; 177c5f01b2fSopenharmony_ci } 178c5f01b2fSopenharmony_ci 179c5f01b2fSopenharmony_ci auto ptr = s.get<std::string>(); 180c5f01b2fSopenharmony_ci 181c5f01b2fSopenharmony_ci // tilde must be followed by 0 or 1 182c5f01b2fSopenharmony_ci if (ptr == "~") 183c5f01b2fSopenharmony_ci { 184c5f01b2fSopenharmony_ci ptr += "0"; 185c5f01b2fSopenharmony_ci } 186c5f01b2fSopenharmony_ci 187c5f01b2fSopenharmony_ci // JSON Pointers must begin with "/" 188c5f01b2fSopenharmony_ci ptr.insert(0, "/"); 189c5f01b2fSopenharmony_ci 190c5f01b2fSopenharmony_ci CHECK_NOTHROW(json::json_pointer("/" + ptr)); 191c5f01b2fSopenharmony_ci 192c5f01b2fSopenharmony_ci // check escape/unescape roundtrip 193c5f01b2fSopenharmony_ci auto escaped = nlohmann::detail::escape(ptr); 194c5f01b2fSopenharmony_ci nlohmann::detail::unescape(escaped); 195c5f01b2fSopenharmony_ci CHECK(escaped == ptr); 196c5f01b2fSopenharmony_ci } 197c5f01b2fSopenharmony_ci } 198c5f01b2fSopenharmony_ci } 199c5f01b2fSopenharmony_ci 200c5f01b2fSopenharmony_ci SECTION("ignore byte-order-mark") 201c5f01b2fSopenharmony_ci { 202c5f01b2fSopenharmony_ci SECTION("in a stream") 203c5f01b2fSopenharmony_ci { 204c5f01b2fSopenharmony_ci // read a file with a UTF-8 BOM 205c5f01b2fSopenharmony_ci std::ifstream f(TEST_DATA_DIRECTORY "/json_nlohmann_tests/bom.json"); 206c5f01b2fSopenharmony_ci json j; 207c5f01b2fSopenharmony_ci CHECK_NOTHROW(f >> j); 208c5f01b2fSopenharmony_ci } 209c5f01b2fSopenharmony_ci 210c5f01b2fSopenharmony_ci SECTION("with an iterator") 211c5f01b2fSopenharmony_ci { 212c5f01b2fSopenharmony_ci std::string i = "\xef\xbb\xbf{\n \"foo\": true\n}"; 213c5f01b2fSopenharmony_ci json _; 214c5f01b2fSopenharmony_ci CHECK_NOTHROW(_ = json::parse(i.begin(), i.end())); 215c5f01b2fSopenharmony_ci } 216c5f01b2fSopenharmony_ci } 217c5f01b2fSopenharmony_ci 218c5f01b2fSopenharmony_ci SECTION("error for incomplete/wrong BOM") 219c5f01b2fSopenharmony_ci { 220c5f01b2fSopenharmony_ci json _; 221c5f01b2fSopenharmony_ci CHECK_THROWS_AS(_ = json::parse("\xef\xbb"), json::parse_error&); 222c5f01b2fSopenharmony_ci CHECK_THROWS_AS(_ = json::parse("\xef\xbb\xbb"), json::parse_error&); 223c5f01b2fSopenharmony_ci } 224c5f01b2fSopenharmony_ci} 225c5f01b2fSopenharmony_ci 226c5f01b2fSopenharmony_cinamespace 227c5f01b2fSopenharmony_ci{ 228c5f01b2fSopenharmony_civoid roundtrip(bool success_expected, const std::string& s); 229c5f01b2fSopenharmony_ci 230c5f01b2fSopenharmony_civoid roundtrip(bool success_expected, const std::string& s) 231c5f01b2fSopenharmony_ci{ 232c5f01b2fSopenharmony_ci CAPTURE(s) 233c5f01b2fSopenharmony_ci json _; 234c5f01b2fSopenharmony_ci 235c5f01b2fSopenharmony_ci // create JSON string value 236c5f01b2fSopenharmony_ci json j = s; 237c5f01b2fSopenharmony_ci // create JSON text 238c5f01b2fSopenharmony_ci std::string ps = std::string("\"") + s + "\""; 239c5f01b2fSopenharmony_ci 240c5f01b2fSopenharmony_ci if (success_expected) 241c5f01b2fSopenharmony_ci { 242c5f01b2fSopenharmony_ci // serialization succeeds 243c5f01b2fSopenharmony_ci CHECK_NOTHROW(j.dump()); 244c5f01b2fSopenharmony_ci 245c5f01b2fSopenharmony_ci // exclude parse test for U+0000 246c5f01b2fSopenharmony_ci if (s[0] != '\0') 247c5f01b2fSopenharmony_ci { 248c5f01b2fSopenharmony_ci // parsing JSON text succeeds 249c5f01b2fSopenharmony_ci CHECK_NOTHROW(_ = json::parse(ps)); 250c5f01b2fSopenharmony_ci } 251c5f01b2fSopenharmony_ci 252c5f01b2fSopenharmony_ci // roundtrip succeeds 253c5f01b2fSopenharmony_ci CHECK_NOTHROW(_ = json::parse(j.dump())); 254c5f01b2fSopenharmony_ci 255c5f01b2fSopenharmony_ci // after roundtrip, the same string is stored 256c5f01b2fSopenharmony_ci json jr = json::parse(j.dump()); 257c5f01b2fSopenharmony_ci CHECK(jr.get<std::string>() == s); 258c5f01b2fSopenharmony_ci } 259c5f01b2fSopenharmony_ci else 260c5f01b2fSopenharmony_ci { 261c5f01b2fSopenharmony_ci // serialization fails 262c5f01b2fSopenharmony_ci CHECK_THROWS_AS(j.dump(), json::type_error&); 263c5f01b2fSopenharmony_ci 264c5f01b2fSopenharmony_ci // parsing JSON text fails 265c5f01b2fSopenharmony_ci CHECK_THROWS_AS(_ = json::parse(ps), json::parse_error&); 266c5f01b2fSopenharmony_ci } 267c5f01b2fSopenharmony_ci} 268c5f01b2fSopenharmony_ci} // namespace 269c5f01b2fSopenharmony_ci 270c5f01b2fSopenharmony_ciTEST_CASE("Markus Kuhn's UTF-8 decoder capability and stress test") 271c5f01b2fSopenharmony_ci{ 272c5f01b2fSopenharmony_ci // Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> - 2015-08-28 - CC BY 4.0 273c5f01b2fSopenharmony_ci // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt 274c5f01b2fSopenharmony_ci 275c5f01b2fSopenharmony_ci SECTION("1 Some correct UTF-8 text") 276c5f01b2fSopenharmony_ci { 277c5f01b2fSopenharmony_ci roundtrip(true, "κόσμε"); 278c5f01b2fSopenharmony_ci } 279c5f01b2fSopenharmony_ci 280c5f01b2fSopenharmony_ci SECTION("2 Boundary condition test cases") 281c5f01b2fSopenharmony_ci { 282c5f01b2fSopenharmony_ci SECTION("2.1 First possible sequence of a certain length") 283c5f01b2fSopenharmony_ci { 284c5f01b2fSopenharmony_ci // 2.1.1 1 byte (U-00000000) 285c5f01b2fSopenharmony_ci roundtrip(true, std::string("\0", 1)); 286c5f01b2fSopenharmony_ci // 2.1.2 2 bytes (U-00000080) 287c5f01b2fSopenharmony_ci roundtrip(true, "\xc2\x80"); 288c5f01b2fSopenharmony_ci // 2.1.3 3 bytes (U-00000800) 289c5f01b2fSopenharmony_ci roundtrip(true, "\xe0\xa0\x80"); 290c5f01b2fSopenharmony_ci // 2.1.4 4 bytes (U-00010000) 291c5f01b2fSopenharmony_ci roundtrip(true, "\xf0\x90\x80\x80"); 292c5f01b2fSopenharmony_ci 293c5f01b2fSopenharmony_ci // 2.1.5 5 bytes (U-00200000) 294c5f01b2fSopenharmony_ci roundtrip(false, "\xF8\x88\x80\x80\x80"); 295c5f01b2fSopenharmony_ci // 2.1.6 6 bytes (U-04000000) 296c5f01b2fSopenharmony_ci roundtrip(false, "\xFC\x84\x80\x80\x80\x80"); 297c5f01b2fSopenharmony_ci } 298c5f01b2fSopenharmony_ci 299c5f01b2fSopenharmony_ci SECTION("2.2 Last possible sequence of a certain length") 300c5f01b2fSopenharmony_ci { 301c5f01b2fSopenharmony_ci // 2.2.1 1 byte (U-0000007F) 302c5f01b2fSopenharmony_ci roundtrip(true, "\x7f"); 303c5f01b2fSopenharmony_ci // 2.2.2 2 bytes (U-000007FF) 304c5f01b2fSopenharmony_ci roundtrip(true, "\xdf\xbf"); 305c5f01b2fSopenharmony_ci // 2.2.3 3 bytes (U-0000FFFF) 306c5f01b2fSopenharmony_ci roundtrip(true, "\xef\xbf\xbf"); 307c5f01b2fSopenharmony_ci 308c5f01b2fSopenharmony_ci // 2.2.4 4 bytes (U-001FFFFF) 309c5f01b2fSopenharmony_ci roundtrip(false, "\xF7\xBF\xBF\xBF"); 310c5f01b2fSopenharmony_ci // 2.2.5 5 bytes (U-03FFFFFF) 311c5f01b2fSopenharmony_ci roundtrip(false, "\xFB\xBF\xBF\xBF\xBF"); 312c5f01b2fSopenharmony_ci // 2.2.6 6 bytes (U-7FFFFFFF) 313c5f01b2fSopenharmony_ci roundtrip(false, "\xFD\xBF\xBF\xBF\xBF\xBF"); 314c5f01b2fSopenharmony_ci } 315c5f01b2fSopenharmony_ci 316c5f01b2fSopenharmony_ci SECTION("2.3 Other boundary conditions") 317c5f01b2fSopenharmony_ci { 318c5f01b2fSopenharmony_ci // 2.3.1 U-0000D7FF = ed 9f bf 319c5f01b2fSopenharmony_ci roundtrip(true, "\xed\x9f\xbf"); 320c5f01b2fSopenharmony_ci // 2.3.2 U-0000E000 = ee 80 80 321c5f01b2fSopenharmony_ci roundtrip(true, "\xee\x80\x80"); 322c5f01b2fSopenharmony_ci // 2.3.3 U-0000FFFD = ef bf bd 323c5f01b2fSopenharmony_ci roundtrip(true, "\xef\xbf\xbd"); 324c5f01b2fSopenharmony_ci // 2.3.4 U-0010FFFF = f4 8f bf bf 325c5f01b2fSopenharmony_ci roundtrip(true, "\xf4\x8f\xbf\xbf"); 326c5f01b2fSopenharmony_ci 327c5f01b2fSopenharmony_ci // 2.3.5 U-00110000 = f4 90 80 80 328c5f01b2fSopenharmony_ci roundtrip(false, "\xf4\x90\x80\x80"); 329c5f01b2fSopenharmony_ci } 330c5f01b2fSopenharmony_ci } 331c5f01b2fSopenharmony_ci 332c5f01b2fSopenharmony_ci SECTION("3 Malformed sequences") 333c5f01b2fSopenharmony_ci { 334c5f01b2fSopenharmony_ci SECTION("3.1 Unexpected continuation bytes") 335c5f01b2fSopenharmony_ci { 336c5f01b2fSopenharmony_ci // Each unexpected continuation byte should be separately signalled as a 337c5f01b2fSopenharmony_ci // malformed sequence of its own. 338c5f01b2fSopenharmony_ci 339c5f01b2fSopenharmony_ci // 3.1.1 First continuation byte 0x80 340c5f01b2fSopenharmony_ci roundtrip(false, "\x80"); 341c5f01b2fSopenharmony_ci // 3.1.2 Last continuation byte 0xbf 342c5f01b2fSopenharmony_ci roundtrip(false, "\xbf"); 343c5f01b2fSopenharmony_ci 344c5f01b2fSopenharmony_ci // 3.1.3 2 continuation bytes 345c5f01b2fSopenharmony_ci roundtrip(false, "\x80\xbf"); 346c5f01b2fSopenharmony_ci // 3.1.4 3 continuation bytes 347c5f01b2fSopenharmony_ci roundtrip(false, "\x80\xbf\x80"); 348c5f01b2fSopenharmony_ci // 3.1.5 4 continuation bytes 349c5f01b2fSopenharmony_ci roundtrip(false, "\x80\xbf\x80\xbf"); 350c5f01b2fSopenharmony_ci // 3.1.6 5 continuation bytes 351c5f01b2fSopenharmony_ci roundtrip(false, "\x80\xbf\x80\xbf\x80"); 352c5f01b2fSopenharmony_ci // 3.1.7 6 continuation bytes 353c5f01b2fSopenharmony_ci roundtrip(false, "\x80\xbf\x80\xbf\x80\xbf"); 354c5f01b2fSopenharmony_ci // 3.1.8 7 continuation bytes 355c5f01b2fSopenharmony_ci roundtrip(false, "\x80\xbf\x80\xbf\x80\xbf\x80"); 356c5f01b2fSopenharmony_ci 357c5f01b2fSopenharmony_ci // 3.1.9 Sequence of all 64 possible continuation bytes (0x80-0xbf) 358c5f01b2fSopenharmony_ci roundtrip(false, "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"); 359c5f01b2fSopenharmony_ci } 360c5f01b2fSopenharmony_ci 361c5f01b2fSopenharmony_ci SECTION("3.2 Lonely start characters") 362c5f01b2fSopenharmony_ci { 363c5f01b2fSopenharmony_ci // 3.2.1 All 32 first bytes of 2-byte sequences (0xc0-0xdf) 364c5f01b2fSopenharmony_ci roundtrip(false, "\xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf"); 365c5f01b2fSopenharmony_ci // 3.2.2 All 16 first bytes of 3-byte sequences (0xe0-0xef) 366c5f01b2fSopenharmony_ci roundtrip(false, "\xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef"); 367c5f01b2fSopenharmony_ci // 3.2.3 All 8 first bytes of 4-byte sequences (0xf0-0xf7) 368c5f01b2fSopenharmony_ci roundtrip(false, "\xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7"); 369c5f01b2fSopenharmony_ci // 3.2.4 All 4 first bytes of 5-byte sequences (0xf8-0xfb) 370c5f01b2fSopenharmony_ci roundtrip(false, "\xf8 \xf9 \xfa \xfb"); 371c5f01b2fSopenharmony_ci // 3.2.5 All 2 first bytes of 6-byte sequences (0xfc-0xfd) 372c5f01b2fSopenharmony_ci roundtrip(false, "\xfc \xfd"); 373c5f01b2fSopenharmony_ci } 374c5f01b2fSopenharmony_ci 375c5f01b2fSopenharmony_ci SECTION("3.3 Sequences with last continuation byte missing") 376c5f01b2fSopenharmony_ci { 377c5f01b2fSopenharmony_ci // All bytes of an incomplete sequence should be signalled as a single 378c5f01b2fSopenharmony_ci // malformed sequence, i.e., you should see only a single replacement 379c5f01b2fSopenharmony_ci // character in each of the next 10 tests. (Characters as in section 2) 380c5f01b2fSopenharmony_ci 381c5f01b2fSopenharmony_ci // 3.3.1 2-byte sequence with last byte missing (U+0000) 382c5f01b2fSopenharmony_ci roundtrip(false, "\xc0"); 383c5f01b2fSopenharmony_ci // 3.3.2 3-byte sequence with last byte missing (U+0000) 384c5f01b2fSopenharmony_ci roundtrip(false, "\xe0\x80"); 385c5f01b2fSopenharmony_ci // 3.3.3 4-byte sequence with last byte missing (U+0000) 386c5f01b2fSopenharmony_ci roundtrip(false, "\xf0\x80\x80"); 387c5f01b2fSopenharmony_ci // 3.3.4 5-byte sequence with last byte missing (U+0000) 388c5f01b2fSopenharmony_ci roundtrip(false, "\xf8\x80\x80\x80"); 389c5f01b2fSopenharmony_ci // 3.3.5 6-byte sequence with last byte missing (U+0000) 390c5f01b2fSopenharmony_ci roundtrip(false, "\xfc\x80\x80\x80\x80"); 391c5f01b2fSopenharmony_ci // 3.3.6 2-byte sequence with last byte missing (U-000007FF) 392c5f01b2fSopenharmony_ci roundtrip(false, "\xdf"); 393c5f01b2fSopenharmony_ci // 3.3.7 3-byte sequence with last byte missing (U-0000FFFF) 394c5f01b2fSopenharmony_ci roundtrip(false, "\xef\xbf"); 395c5f01b2fSopenharmony_ci // 3.3.8 4-byte sequence with last byte missing (U-001FFFFF) 396c5f01b2fSopenharmony_ci roundtrip(false, "\xf7\xbf\xbf"); 397c5f01b2fSopenharmony_ci // 3.3.9 5-byte sequence with last byte missing (U-03FFFFFF) 398c5f01b2fSopenharmony_ci roundtrip(false, "\xfb\xbf\xbf\xbf"); 399c5f01b2fSopenharmony_ci // 3.3.10 6-byte sequence with last byte missing (U-7FFFFFFF) 400c5f01b2fSopenharmony_ci roundtrip(false, "\xfd\xbf\xbf\xbf\xbf"); 401c5f01b2fSopenharmony_ci } 402c5f01b2fSopenharmony_ci 403c5f01b2fSopenharmony_ci SECTION("3.4 Concatenation of incomplete sequences") 404c5f01b2fSopenharmony_ci { 405c5f01b2fSopenharmony_ci // All the 10 sequences of 3.3 concatenated, you should see 10 malformed 406c5f01b2fSopenharmony_ci // sequences being signalled: 407c5f01b2fSopenharmony_ci roundtrip(false, "\xc0\xe0\x80\xf0\x80\x80\xf8\x80\x80\x80\xfc\x80\x80\x80\x80\xdf\xef\xbf\xf7\xbf\xbf\xfb\xbf\xbf\xbf\xfd\xbf\xbf\xbf\xbf"); 408c5f01b2fSopenharmony_ci } 409c5f01b2fSopenharmony_ci 410c5f01b2fSopenharmony_ci SECTION("3.5 Impossible bytes") 411c5f01b2fSopenharmony_ci { 412c5f01b2fSopenharmony_ci // The following two bytes cannot appear in a correct UTF-8 string 413c5f01b2fSopenharmony_ci 414c5f01b2fSopenharmony_ci // 3.5.1 fe 415c5f01b2fSopenharmony_ci roundtrip(false, "\xfe"); 416c5f01b2fSopenharmony_ci // 3.5.2 ff 417c5f01b2fSopenharmony_ci roundtrip(false, "\xff"); 418c5f01b2fSopenharmony_ci // 3.5.3 fe fe ff ff 419c5f01b2fSopenharmony_ci roundtrip(false, "\xfe\xfe\xff\xff"); 420c5f01b2fSopenharmony_ci } 421c5f01b2fSopenharmony_ci } 422c5f01b2fSopenharmony_ci 423c5f01b2fSopenharmony_ci SECTION("4 Overlong sequences") 424c5f01b2fSopenharmony_ci { 425c5f01b2fSopenharmony_ci // The following sequences are not malformed according to the letter of 426c5f01b2fSopenharmony_ci // the Unicode 2.0 standard. However, they are longer then necessary and 427c5f01b2fSopenharmony_ci // a correct UTF-8 encoder is not allowed to produce them. A "safe UTF-8 428c5f01b2fSopenharmony_ci // decoder" should reject them just like malformed sequences for two 429c5f01b2fSopenharmony_ci // reasons: (1) It helps to debug applications if overlong sequences are 430c5f01b2fSopenharmony_ci // not treated as valid representations of characters, because this helps 431c5f01b2fSopenharmony_ci // to spot problems more quickly. (2) Overlong sequences provide 432c5f01b2fSopenharmony_ci // alternative representations of characters, that could maliciously be 433c5f01b2fSopenharmony_ci // used to bypass filters that check only for ASCII characters. For 434c5f01b2fSopenharmony_ci // instance, a 2-byte encoded line feed (LF) would not be caught by a 435c5f01b2fSopenharmony_ci // line counter that counts only 0x0a bytes, but it would still be 436c5f01b2fSopenharmony_ci // processed as a line feed by an unsafe UTF-8 decoder later in the 437c5f01b2fSopenharmony_ci // pipeline. From a security point of view, ASCII compatibility of UTF-8 438c5f01b2fSopenharmony_ci // sequences means also, that ASCII characters are *only* allowed to be 439c5f01b2fSopenharmony_ci // represented by ASCII bytes in the range 0x00-0x7f. To ensure this 440c5f01b2fSopenharmony_ci // aspect of ASCII compatibility, use only "safe UTF-8 decoders" that 441c5f01b2fSopenharmony_ci // reject overlong UTF-8 sequences for which a shorter encoding exists. 442c5f01b2fSopenharmony_ci 443c5f01b2fSopenharmony_ci SECTION("4.1 Examples of an overlong ASCII character") 444c5f01b2fSopenharmony_ci { 445c5f01b2fSopenharmony_ci // With a safe UTF-8 decoder, all of the following five overlong 446c5f01b2fSopenharmony_ci // representations of the ASCII character slash ("/") should be rejected 447c5f01b2fSopenharmony_ci // like a malformed UTF-8 sequence, for instance by substituting it with 448c5f01b2fSopenharmony_ci // a replacement character. If you see a slash below, you do not have a 449c5f01b2fSopenharmony_ci // safe UTF-8 decoder! 450c5f01b2fSopenharmony_ci 451c5f01b2fSopenharmony_ci // 4.1.1 U+002F = c0 af 452c5f01b2fSopenharmony_ci roundtrip(false, "\xc0\xaf"); 453c5f01b2fSopenharmony_ci // 4.1.2 U+002F = e0 80 af 454c5f01b2fSopenharmony_ci roundtrip(false, "\xe0\x80\xaf"); 455c5f01b2fSopenharmony_ci // 4.1.3 U+002F = f0 80 80 af 456c5f01b2fSopenharmony_ci roundtrip(false, "\xf0\x80\x80\xaf"); 457c5f01b2fSopenharmony_ci // 4.1.4 U+002F = f8 80 80 80 af 458c5f01b2fSopenharmony_ci roundtrip(false, "\xf8\x80\x80\x80\xaf"); 459c5f01b2fSopenharmony_ci // 4.1.5 U+002F = fc 80 80 80 80 af 460c5f01b2fSopenharmony_ci roundtrip(false, "\xfc\x80\x80\x80\x80\xaf"); 461c5f01b2fSopenharmony_ci } 462c5f01b2fSopenharmony_ci 463c5f01b2fSopenharmony_ci SECTION("4.2 Maximum overlong sequences") 464c5f01b2fSopenharmony_ci { 465c5f01b2fSopenharmony_ci // Below you see the highest Unicode value that is still resulting in an 466c5f01b2fSopenharmony_ci // overlong sequence if represented with the given number of bytes. This 467c5f01b2fSopenharmony_ci // is a boundary test for safe UTF-8 decoders. All five characters should 468c5f01b2fSopenharmony_ci // be rejected like malformed UTF-8 sequences. 469c5f01b2fSopenharmony_ci 470c5f01b2fSopenharmony_ci // 4.2.1 U-0000007F = c1 bf 471c5f01b2fSopenharmony_ci roundtrip(false, "\xc1\xbf"); 472c5f01b2fSopenharmony_ci // 4.2.2 U-000007FF = e0 9f bf 473c5f01b2fSopenharmony_ci roundtrip(false, "\xe0\x9f\xbf"); 474c5f01b2fSopenharmony_ci // 4.2.3 U-0000FFFF = f0 8f bf bf 475c5f01b2fSopenharmony_ci roundtrip(false, "\xf0\x8f\xbf\xbf"); 476c5f01b2fSopenharmony_ci // 4.2.4 U-001FFFFF = f8 87 bf bf bf 477c5f01b2fSopenharmony_ci roundtrip(false, "\xf8\x87\xbf\xbf\xbf"); 478c5f01b2fSopenharmony_ci // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf 479c5f01b2fSopenharmony_ci roundtrip(false, "\xfc\x83\xbf\xbf\xbf\xbf"); 480c5f01b2fSopenharmony_ci } 481c5f01b2fSopenharmony_ci 482c5f01b2fSopenharmony_ci SECTION("4.3 Overlong representation of the NUL character") 483c5f01b2fSopenharmony_ci { 484c5f01b2fSopenharmony_ci // The following five sequences should also be rejected like malformed 485c5f01b2fSopenharmony_ci // UTF-8 sequences and should not be treated like the ASCII NUL 486c5f01b2fSopenharmony_ci // character. 487c5f01b2fSopenharmony_ci 488c5f01b2fSopenharmony_ci // 4.3.1 U+0000 = c0 80 489c5f01b2fSopenharmony_ci roundtrip(false, "\xc0\x80"); 490c5f01b2fSopenharmony_ci // 4.3.2 U+0000 = e0 80 80 491c5f01b2fSopenharmony_ci roundtrip(false, "\xe0\x80\x80"); 492c5f01b2fSopenharmony_ci // 4.3.3 U+0000 = f0 80 80 80 493c5f01b2fSopenharmony_ci roundtrip(false, "\xf0\x80\x80\x80"); 494c5f01b2fSopenharmony_ci // 4.3.4 U+0000 = f8 80 80 80 80 495c5f01b2fSopenharmony_ci roundtrip(false, "\xf8\x80\x80\x80\x80"); 496c5f01b2fSopenharmony_ci // 4.3.5 U+0000 = fc 80 80 80 80 80 497c5f01b2fSopenharmony_ci roundtrip(false, "\xfc\x80\x80\x80\x80\x80"); 498c5f01b2fSopenharmony_ci } 499c5f01b2fSopenharmony_ci } 500c5f01b2fSopenharmony_ci 501c5f01b2fSopenharmony_ci SECTION("5 Illegal code positions") 502c5f01b2fSopenharmony_ci { 503c5f01b2fSopenharmony_ci // The following UTF-8 sequences should be rejected like malformed 504c5f01b2fSopenharmony_ci // sequences, because they never represent valid ISO 10646 characters and 505c5f01b2fSopenharmony_ci // a UTF-8 decoder that accepts them might introduce security problems 506c5f01b2fSopenharmony_ci // comparable to overlong UTF-8 sequences. 507c5f01b2fSopenharmony_ci 508c5f01b2fSopenharmony_ci SECTION("5.1 Single UTF-16 surrogates") 509c5f01b2fSopenharmony_ci { 510c5f01b2fSopenharmony_ci // 5.1.1 U+D800 = ed a0 80 511c5f01b2fSopenharmony_ci roundtrip(false, "\xed\xa0\x80"); 512c5f01b2fSopenharmony_ci // 5.1.2 U+DB7F = ed ad bf 513c5f01b2fSopenharmony_ci roundtrip(false, "\xed\xad\xbf"); 514c5f01b2fSopenharmony_ci // 5.1.3 U+DB80 = ed ae 80 515c5f01b2fSopenharmony_ci roundtrip(false, "\xed\xae\x80"); 516c5f01b2fSopenharmony_ci // 5.1.4 U+DBFF = ed af bf 517c5f01b2fSopenharmony_ci roundtrip(false, "\xed\xaf\xbf"); 518c5f01b2fSopenharmony_ci // 5.1.5 U+DC00 = ed b0 80 519c5f01b2fSopenharmony_ci roundtrip(false, "\xed\xb0\x80"); 520c5f01b2fSopenharmony_ci // 5.1.6 U+DF80 = ed be 80 521c5f01b2fSopenharmony_ci roundtrip(false, "\xed\xbe\x80"); 522c5f01b2fSopenharmony_ci // 5.1.7 U+DFFF = ed bf bf 523c5f01b2fSopenharmony_ci roundtrip(false, "\xed\xbf\xbf"); 524c5f01b2fSopenharmony_ci } 525c5f01b2fSopenharmony_ci 526c5f01b2fSopenharmony_ci SECTION("5.2 Paired UTF-16 surrogates") 527c5f01b2fSopenharmony_ci { 528c5f01b2fSopenharmony_ci // 5.2.1 U+D800 U+DC00 = ed a0 80 ed b0 80 529c5f01b2fSopenharmony_ci roundtrip(false, "\xed\xa0\x80\xed\xb0\x80"); 530c5f01b2fSopenharmony_ci // 5.2.2 U+D800 U+DFFF = ed a0 80 ed bf bf 531c5f01b2fSopenharmony_ci roundtrip(false, "\xed\xa0\x80\xed\xbf\xbf"); 532c5f01b2fSopenharmony_ci // 5.2.3 U+DB7F U+DC00 = ed ad bf ed b0 80 533c5f01b2fSopenharmony_ci roundtrip(false, "\xed\xad\xbf\xed\xb0\x80"); 534c5f01b2fSopenharmony_ci // 5.2.4 U+DB7F U+DFFF = ed ad bf ed bf bf 535c5f01b2fSopenharmony_ci roundtrip(false, "\xed\xad\xbf\xed\xbf\xbf"); 536c5f01b2fSopenharmony_ci // 5.2.5 U+DB80 U+DC00 = ed ae 80 ed b0 80 537c5f01b2fSopenharmony_ci roundtrip(false, "\xed\xae\x80\xed\xb0\x80"); 538c5f01b2fSopenharmony_ci // 5.2.6 U+DB80 U+DFFF = ed ae 80 ed bf bf 539c5f01b2fSopenharmony_ci roundtrip(false, "\xed\xae\x80\xed\xbf\xbf"); 540c5f01b2fSopenharmony_ci // 5.2.7 U+DBFF U+DC00 = ed af bf ed b0 80 541c5f01b2fSopenharmony_ci roundtrip(false, "\xed\xaf\xbf\xed\xb0\x80"); 542c5f01b2fSopenharmony_ci // 5.2.8 U+DBFF U+DFFF = ed af bf ed bf bf 543c5f01b2fSopenharmony_ci roundtrip(false, "\xed\xaf\xbf\xed\xbf\xbf"); 544c5f01b2fSopenharmony_ci } 545c5f01b2fSopenharmony_ci 546c5f01b2fSopenharmony_ci SECTION("5.3 Noncharacter code positions") 547c5f01b2fSopenharmony_ci { 548c5f01b2fSopenharmony_ci // The following "noncharacters" are "reserved for internal use" by 549c5f01b2fSopenharmony_ci // applications, and according to older versions of the Unicode Standard 550c5f01b2fSopenharmony_ci // "should never be interchanged". Unicode Corrigendum #9 dropped the 551c5f01b2fSopenharmony_ci // latter restriction. Nevertheless, their presence in incoming UTF-8 data 552c5f01b2fSopenharmony_ci // can remain a potential security risk, depending on what use is made of 553c5f01b2fSopenharmony_ci // these codes subsequently. Examples of such internal use: 554c5f01b2fSopenharmony_ci // 555c5f01b2fSopenharmony_ci // - Some file APIs with 16-bit characters may use the integer value -1 556c5f01b2fSopenharmony_ci // = U+FFFF to signal an end-of-file (EOF) or error condition. 557c5f01b2fSopenharmony_ci // 558c5f01b2fSopenharmony_ci // - In some UTF-16 receivers, code point U+FFFE might trigger a 559c5f01b2fSopenharmony_ci // byte-swap operation (to convert between UTF-16LE and UTF-16BE). 560c5f01b2fSopenharmony_ci // 561c5f01b2fSopenharmony_ci // With such internal use of noncharacters, it may be desirable and safer 562c5f01b2fSopenharmony_ci // to block those code points in UTF-8 decoders, as they should never 563c5f01b2fSopenharmony_ci // occur legitimately in incoming UTF-8 data, and could trigger unsafe 564c5f01b2fSopenharmony_ci // behaviour in subsequent processing. 565c5f01b2fSopenharmony_ci 566c5f01b2fSopenharmony_ci // Particularly problematic noncharacters in 16-bit applications: 567c5f01b2fSopenharmony_ci 568c5f01b2fSopenharmony_ci // 5.3.1 U+FFFE = ef bf be 569c5f01b2fSopenharmony_ci roundtrip(true, "\xef\xbf\xbe"); 570c5f01b2fSopenharmony_ci // 5.3.2 U+FFFF = ef bf bf 571c5f01b2fSopenharmony_ci roundtrip(true, "\xef\xbf\xbf"); 572c5f01b2fSopenharmony_ci 573c5f01b2fSopenharmony_ci // 5.3.3 U+FDD0 .. U+FDEF 574c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\x90"); 575c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\x91"); 576c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\x92"); 577c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\x93"); 578c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\x94"); 579c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\x95"); 580c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\x96"); 581c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\x97"); 582c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\x98"); 583c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\x99"); 584c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\x9A"); 585c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\x9B"); 586c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\x9C"); 587c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\x9D"); 588c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\x9E"); 589c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\x9F"); 590c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\xA0"); 591c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\xA1"); 592c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\xA2"); 593c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\xA3"); 594c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\xA4"); 595c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\xA5"); 596c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\xA6"); 597c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\xA7"); 598c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\xA8"); 599c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\xA9"); 600c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\xAA"); 601c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\xAB"); 602c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\xAC"); 603c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\xAD"); 604c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\xAE"); 605c5f01b2fSopenharmony_ci roundtrip(true, "\xEF\xB7\xAF"); 606c5f01b2fSopenharmony_ci 607c5f01b2fSopenharmony_ci // 5.3.4 U+nFFFE U+nFFFF (for n = 1..10) 608c5f01b2fSopenharmony_ci roundtrip(true, "\xF0\x9F\xBF\xBF"); 609c5f01b2fSopenharmony_ci roundtrip(true, "\xF0\xAF\xBF\xBF"); 610c5f01b2fSopenharmony_ci roundtrip(true, "\xF0\xBF\xBF\xBF"); 611c5f01b2fSopenharmony_ci roundtrip(true, "\xF1\x8F\xBF\xBF"); 612c5f01b2fSopenharmony_ci roundtrip(true, "\xF1\x9F\xBF\xBF"); 613c5f01b2fSopenharmony_ci roundtrip(true, "\xF1\xAF\xBF\xBF"); 614c5f01b2fSopenharmony_ci roundtrip(true, "\xF1\xBF\xBF\xBF"); 615c5f01b2fSopenharmony_ci roundtrip(true, "\xF2\x8F\xBF\xBF"); 616c5f01b2fSopenharmony_ci roundtrip(true, "\xF2\x9F\xBF\xBF"); 617c5f01b2fSopenharmony_ci roundtrip(true, "\xF2\xAF\xBF\xBF"); 618c5f01b2fSopenharmony_ci } 619c5f01b2fSopenharmony_ci } 620c5f01b2fSopenharmony_ci} 621