17db96d56Sopenharmony_ciimport unittest 27db96d56Sopenharmony_ciimport sys 37db96d56Sopenharmony_cifrom test import support 47db96d56Sopenharmony_cifrom test.support import import_helper 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_citry: 77db96d56Sopenharmony_ci import _testcapi 87db96d56Sopenharmony_ciexcept ImportError: 97db96d56Sopenharmony_ci _testcapi = None 107db96d56Sopenharmony_ci 117db96d56Sopenharmony_ci 127db96d56Sopenharmony_ciclass CAPITest(unittest.TestCase): 137db96d56Sopenharmony_ci 147db96d56Sopenharmony_ci # Test PyUnicode_FromFormat() 157db96d56Sopenharmony_ci def test_from_format(self): 167db96d56Sopenharmony_ci import_helper.import_module('ctypes') 177db96d56Sopenharmony_ci from ctypes import ( 187db96d56Sopenharmony_ci c_char_p, 197db96d56Sopenharmony_ci pythonapi, py_object, sizeof, 207db96d56Sopenharmony_ci c_int, c_long, c_longlong, c_ssize_t, 217db96d56Sopenharmony_ci c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p) 227db96d56Sopenharmony_ci name = "PyUnicode_FromFormat" 237db96d56Sopenharmony_ci _PyUnicode_FromFormat = getattr(pythonapi, name) 247db96d56Sopenharmony_ci _PyUnicode_FromFormat.argtypes = (c_char_p,) 257db96d56Sopenharmony_ci _PyUnicode_FromFormat.restype = py_object 267db96d56Sopenharmony_ci 277db96d56Sopenharmony_ci def PyUnicode_FromFormat(format, *args): 287db96d56Sopenharmony_ci cargs = tuple( 297db96d56Sopenharmony_ci py_object(arg) if isinstance(arg, str) else arg 307db96d56Sopenharmony_ci for arg in args) 317db96d56Sopenharmony_ci return _PyUnicode_FromFormat(format, *cargs) 327db96d56Sopenharmony_ci 337db96d56Sopenharmony_ci def check_format(expected, format, *args): 347db96d56Sopenharmony_ci text = PyUnicode_FromFormat(format, *args) 357db96d56Sopenharmony_ci self.assertEqual(expected, text) 367db96d56Sopenharmony_ci 377db96d56Sopenharmony_ci # ascii format, non-ascii argument 387db96d56Sopenharmony_ci check_format('ascii\x7f=unicode\xe9', 397db96d56Sopenharmony_ci b'ascii\x7f=%U', 'unicode\xe9') 407db96d56Sopenharmony_ci 417db96d56Sopenharmony_ci # non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV() 427db96d56Sopenharmony_ci # raises an error 437db96d56Sopenharmony_ci self.assertRaisesRegex(ValueError, 447db96d56Sopenharmony_ci r'^PyUnicode_FromFormatV\(\) expects an ASCII-encoded format ' 457db96d56Sopenharmony_ci 'string, got a non-ASCII byte: 0xe9$', 467db96d56Sopenharmony_ci PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii') 477db96d56Sopenharmony_ci 487db96d56Sopenharmony_ci # test "%c" 497db96d56Sopenharmony_ci check_format('\uabcd', 507db96d56Sopenharmony_ci b'%c', c_int(0xabcd)) 517db96d56Sopenharmony_ci check_format('\U0010ffff', 527db96d56Sopenharmony_ci b'%c', c_int(0x10ffff)) 537db96d56Sopenharmony_ci with self.assertRaises(OverflowError): 547db96d56Sopenharmony_ci PyUnicode_FromFormat(b'%c', c_int(0x110000)) 557db96d56Sopenharmony_ci # Issue #18183 567db96d56Sopenharmony_ci check_format('\U00010000\U00100000', 577db96d56Sopenharmony_ci b'%c%c', c_int(0x10000), c_int(0x100000)) 587db96d56Sopenharmony_ci 597db96d56Sopenharmony_ci # test "%" 607db96d56Sopenharmony_ci check_format('%', 617db96d56Sopenharmony_ci b'%') 627db96d56Sopenharmony_ci check_format('%', 637db96d56Sopenharmony_ci b'%%') 647db96d56Sopenharmony_ci check_format('%s', 657db96d56Sopenharmony_ci b'%%s') 667db96d56Sopenharmony_ci check_format('[%]', 677db96d56Sopenharmony_ci b'[%%]') 687db96d56Sopenharmony_ci check_format('%abc', 697db96d56Sopenharmony_ci b'%%%s', b'abc') 707db96d56Sopenharmony_ci 717db96d56Sopenharmony_ci # truncated string 727db96d56Sopenharmony_ci check_format('abc', 737db96d56Sopenharmony_ci b'%.3s', b'abcdef') 747db96d56Sopenharmony_ci check_format('abc[\ufffd', 757db96d56Sopenharmony_ci b'%.5s', 'abc[\u20ac]'.encode('utf8')) 767db96d56Sopenharmony_ci check_format("'\\u20acABC'", 777db96d56Sopenharmony_ci b'%A', '\u20acABC') 787db96d56Sopenharmony_ci check_format("'\\u20", 797db96d56Sopenharmony_ci b'%.5A', '\u20acABCDEF') 807db96d56Sopenharmony_ci check_format("'\u20acABC'", 817db96d56Sopenharmony_ci b'%R', '\u20acABC') 827db96d56Sopenharmony_ci check_format("'\u20acA", 837db96d56Sopenharmony_ci b'%.3R', '\u20acABCDEF') 847db96d56Sopenharmony_ci check_format('\u20acAB', 857db96d56Sopenharmony_ci b'%.3S', '\u20acABCDEF') 867db96d56Sopenharmony_ci check_format('\u20acAB', 877db96d56Sopenharmony_ci b'%.3U', '\u20acABCDEF') 887db96d56Sopenharmony_ci check_format('\u20acAB', 897db96d56Sopenharmony_ci b'%.3V', '\u20acABCDEF', None) 907db96d56Sopenharmony_ci check_format('abc[\ufffd', 917db96d56Sopenharmony_ci b'%.5V', None, 'abc[\u20ac]'.encode('utf8')) 927db96d56Sopenharmony_ci 937db96d56Sopenharmony_ci # following tests comes from #7330 947db96d56Sopenharmony_ci # test width modifier and precision modifier with %S 957db96d56Sopenharmony_ci check_format("repr= abc", 967db96d56Sopenharmony_ci b'repr=%5S', 'abc') 977db96d56Sopenharmony_ci check_format("repr=ab", 987db96d56Sopenharmony_ci b'repr=%.2S', 'abc') 997db96d56Sopenharmony_ci check_format("repr= ab", 1007db96d56Sopenharmony_ci b'repr=%5.2S', 'abc') 1017db96d56Sopenharmony_ci 1027db96d56Sopenharmony_ci # test width modifier and precision modifier with %R 1037db96d56Sopenharmony_ci check_format("repr= 'abc'", 1047db96d56Sopenharmony_ci b'repr=%8R', 'abc') 1057db96d56Sopenharmony_ci check_format("repr='ab", 1067db96d56Sopenharmony_ci b'repr=%.3R', 'abc') 1077db96d56Sopenharmony_ci check_format("repr= 'ab", 1087db96d56Sopenharmony_ci b'repr=%5.3R', 'abc') 1097db96d56Sopenharmony_ci 1107db96d56Sopenharmony_ci # test width modifier and precision modifier with %A 1117db96d56Sopenharmony_ci check_format("repr= 'abc'", 1127db96d56Sopenharmony_ci b'repr=%8A', 'abc') 1137db96d56Sopenharmony_ci check_format("repr='ab", 1147db96d56Sopenharmony_ci b'repr=%.3A', 'abc') 1157db96d56Sopenharmony_ci check_format("repr= 'ab", 1167db96d56Sopenharmony_ci b'repr=%5.3A', 'abc') 1177db96d56Sopenharmony_ci 1187db96d56Sopenharmony_ci # test width modifier and precision modifier with %s 1197db96d56Sopenharmony_ci check_format("repr= abc", 1207db96d56Sopenharmony_ci b'repr=%5s', b'abc') 1217db96d56Sopenharmony_ci check_format("repr=ab", 1227db96d56Sopenharmony_ci b'repr=%.2s', b'abc') 1237db96d56Sopenharmony_ci check_format("repr= ab", 1247db96d56Sopenharmony_ci b'repr=%5.2s', b'abc') 1257db96d56Sopenharmony_ci 1267db96d56Sopenharmony_ci # test width modifier and precision modifier with %U 1277db96d56Sopenharmony_ci check_format("repr= abc", 1287db96d56Sopenharmony_ci b'repr=%5U', 'abc') 1297db96d56Sopenharmony_ci check_format("repr=ab", 1307db96d56Sopenharmony_ci b'repr=%.2U', 'abc') 1317db96d56Sopenharmony_ci check_format("repr= ab", 1327db96d56Sopenharmony_ci b'repr=%5.2U', 'abc') 1337db96d56Sopenharmony_ci 1347db96d56Sopenharmony_ci # test width modifier and precision modifier with %V 1357db96d56Sopenharmony_ci check_format("repr= abc", 1367db96d56Sopenharmony_ci b'repr=%5V', 'abc', b'123') 1377db96d56Sopenharmony_ci check_format("repr=ab", 1387db96d56Sopenharmony_ci b'repr=%.2V', 'abc', b'123') 1397db96d56Sopenharmony_ci check_format("repr= ab", 1407db96d56Sopenharmony_ci b'repr=%5.2V', 'abc', b'123') 1417db96d56Sopenharmony_ci check_format("repr= 123", 1427db96d56Sopenharmony_ci b'repr=%5V', None, b'123') 1437db96d56Sopenharmony_ci check_format("repr=12", 1447db96d56Sopenharmony_ci b'repr=%.2V', None, b'123') 1457db96d56Sopenharmony_ci check_format("repr= 12", 1467db96d56Sopenharmony_ci b'repr=%5.2V', None, b'123') 1477db96d56Sopenharmony_ci 1487db96d56Sopenharmony_ci # test integer formats (%i, %d, %u) 1497db96d56Sopenharmony_ci check_format('010', 1507db96d56Sopenharmony_ci b'%03i', c_int(10)) 1517db96d56Sopenharmony_ci check_format('0010', 1527db96d56Sopenharmony_ci b'%0.4i', c_int(10)) 1537db96d56Sopenharmony_ci check_format('-123', 1547db96d56Sopenharmony_ci b'%i', c_int(-123)) 1557db96d56Sopenharmony_ci check_format('-123', 1567db96d56Sopenharmony_ci b'%li', c_long(-123)) 1577db96d56Sopenharmony_ci check_format('-123', 1587db96d56Sopenharmony_ci b'%lli', c_longlong(-123)) 1597db96d56Sopenharmony_ci check_format('-123', 1607db96d56Sopenharmony_ci b'%zi', c_ssize_t(-123)) 1617db96d56Sopenharmony_ci 1627db96d56Sopenharmony_ci check_format('-123', 1637db96d56Sopenharmony_ci b'%d', c_int(-123)) 1647db96d56Sopenharmony_ci check_format('-123', 1657db96d56Sopenharmony_ci b'%ld', c_long(-123)) 1667db96d56Sopenharmony_ci check_format('-123', 1677db96d56Sopenharmony_ci b'%lld', c_longlong(-123)) 1687db96d56Sopenharmony_ci check_format('-123', 1697db96d56Sopenharmony_ci b'%zd', c_ssize_t(-123)) 1707db96d56Sopenharmony_ci 1717db96d56Sopenharmony_ci check_format('123', 1727db96d56Sopenharmony_ci b'%u', c_uint(123)) 1737db96d56Sopenharmony_ci check_format('123', 1747db96d56Sopenharmony_ci b'%lu', c_ulong(123)) 1757db96d56Sopenharmony_ci check_format('123', 1767db96d56Sopenharmony_ci b'%llu', c_ulonglong(123)) 1777db96d56Sopenharmony_ci check_format('123', 1787db96d56Sopenharmony_ci b'%zu', c_size_t(123)) 1797db96d56Sopenharmony_ci 1807db96d56Sopenharmony_ci # test long output 1817db96d56Sopenharmony_ci min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1)) 1827db96d56Sopenharmony_ci max_longlong = -min_longlong - 1 1837db96d56Sopenharmony_ci check_format(str(min_longlong), 1847db96d56Sopenharmony_ci b'%lld', c_longlong(min_longlong)) 1857db96d56Sopenharmony_ci check_format(str(max_longlong), 1867db96d56Sopenharmony_ci b'%lld', c_longlong(max_longlong)) 1877db96d56Sopenharmony_ci max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1 1887db96d56Sopenharmony_ci check_format(str(max_ulonglong), 1897db96d56Sopenharmony_ci b'%llu', c_ulonglong(max_ulonglong)) 1907db96d56Sopenharmony_ci PyUnicode_FromFormat(b'%p', c_void_p(-1)) 1917db96d56Sopenharmony_ci 1927db96d56Sopenharmony_ci # test padding (width and/or precision) 1937db96d56Sopenharmony_ci check_format('123'.rjust(10, '0'), 1947db96d56Sopenharmony_ci b'%010i', c_int(123)) 1957db96d56Sopenharmony_ci check_format('123'.rjust(100), 1967db96d56Sopenharmony_ci b'%100i', c_int(123)) 1977db96d56Sopenharmony_ci check_format('123'.rjust(100, '0'), 1987db96d56Sopenharmony_ci b'%.100i', c_int(123)) 1997db96d56Sopenharmony_ci check_format('123'.rjust(80, '0').rjust(100), 2007db96d56Sopenharmony_ci b'%100.80i', c_int(123)) 2017db96d56Sopenharmony_ci 2027db96d56Sopenharmony_ci check_format('123'.rjust(10, '0'), 2037db96d56Sopenharmony_ci b'%010u', c_uint(123)) 2047db96d56Sopenharmony_ci check_format('123'.rjust(100), 2057db96d56Sopenharmony_ci b'%100u', c_uint(123)) 2067db96d56Sopenharmony_ci check_format('123'.rjust(100, '0'), 2077db96d56Sopenharmony_ci b'%.100u', c_uint(123)) 2087db96d56Sopenharmony_ci check_format('123'.rjust(80, '0').rjust(100), 2097db96d56Sopenharmony_ci b'%100.80u', c_uint(123)) 2107db96d56Sopenharmony_ci 2117db96d56Sopenharmony_ci check_format('123'.rjust(10, '0'), 2127db96d56Sopenharmony_ci b'%010x', c_int(0x123)) 2137db96d56Sopenharmony_ci check_format('123'.rjust(100), 2147db96d56Sopenharmony_ci b'%100x', c_int(0x123)) 2157db96d56Sopenharmony_ci check_format('123'.rjust(100, '0'), 2167db96d56Sopenharmony_ci b'%.100x', c_int(0x123)) 2177db96d56Sopenharmony_ci check_format('123'.rjust(80, '0').rjust(100), 2187db96d56Sopenharmony_ci b'%100.80x', c_int(0x123)) 2197db96d56Sopenharmony_ci 2207db96d56Sopenharmony_ci # test %A 2217db96d56Sopenharmony_ci check_format(r"%A:'abc\xe9\uabcd\U0010ffff'", 2227db96d56Sopenharmony_ci b'%%A:%A', 'abc\xe9\uabcd\U0010ffff') 2237db96d56Sopenharmony_ci 2247db96d56Sopenharmony_ci # test %V 2257db96d56Sopenharmony_ci check_format('repr=abc', 2267db96d56Sopenharmony_ci b'repr=%V', 'abc', b'xyz') 2277db96d56Sopenharmony_ci 2287db96d56Sopenharmony_ci # test %p 2297db96d56Sopenharmony_ci # We cannot test the exact result, 2307db96d56Sopenharmony_ci # because it returns a hex representation of a C pointer, 2317db96d56Sopenharmony_ci # which is going to be different each time. But, we can test the format. 2327db96d56Sopenharmony_ci p_format_regex = r'^0x[a-zA-Z0-9]{3,}$' 2337db96d56Sopenharmony_ci p_format1 = PyUnicode_FromFormat(b'%p', 'abc') 2347db96d56Sopenharmony_ci self.assertIsInstance(p_format1, str) 2357db96d56Sopenharmony_ci self.assertRegex(p_format1, p_format_regex) 2367db96d56Sopenharmony_ci 2377db96d56Sopenharmony_ci p_format2 = PyUnicode_FromFormat(b'%p %p', '123456', b'xyz') 2387db96d56Sopenharmony_ci self.assertIsInstance(p_format2, str) 2397db96d56Sopenharmony_ci self.assertRegex(p_format2, 2407db96d56Sopenharmony_ci r'0x[a-zA-Z0-9]{3,} 0x[a-zA-Z0-9]{3,}') 2417db96d56Sopenharmony_ci 2427db96d56Sopenharmony_ci # Extra args are ignored: 2437db96d56Sopenharmony_ci p_format3 = PyUnicode_FromFormat(b'%p', '123456', None, b'xyz') 2447db96d56Sopenharmony_ci self.assertIsInstance(p_format3, str) 2457db96d56Sopenharmony_ci self.assertRegex(p_format3, p_format_regex) 2467db96d56Sopenharmony_ci 2477db96d56Sopenharmony_ci # Test string decode from parameter of %s using utf-8. 2487db96d56Sopenharmony_ci # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of 2497db96d56Sopenharmony_ci # '\u4eba\u6c11' 2507db96d56Sopenharmony_ci check_format('repr=\u4eba\u6c11', 2517db96d56Sopenharmony_ci b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91') 2527db96d56Sopenharmony_ci 2537db96d56Sopenharmony_ci #Test replace error handler. 2547db96d56Sopenharmony_ci check_format('repr=abc\ufffd', 2557db96d56Sopenharmony_ci b'repr=%V', None, b'abc\xff') 2567db96d56Sopenharmony_ci 2577db96d56Sopenharmony_ci # not supported: copy the raw format string. these tests are just here 2587db96d56Sopenharmony_ci # to check for crashes and should not be considered as specifications 2597db96d56Sopenharmony_ci check_format('%s', 2607db96d56Sopenharmony_ci b'%1%s', b'abc') 2617db96d56Sopenharmony_ci check_format('%1abc', 2627db96d56Sopenharmony_ci b'%1abc') 2637db96d56Sopenharmony_ci check_format('%+i', 2647db96d56Sopenharmony_ci b'%+i', c_int(10)) 2657db96d56Sopenharmony_ci check_format('%.%s', 2667db96d56Sopenharmony_ci b'%.%s', b'abc') 2677db96d56Sopenharmony_ci 2687db96d56Sopenharmony_ci # Issue #33817: empty strings 2697db96d56Sopenharmony_ci check_format('', 2707db96d56Sopenharmony_ci b'') 2717db96d56Sopenharmony_ci check_format('', 2727db96d56Sopenharmony_ci b'%s', b'') 2737db96d56Sopenharmony_ci 2747db96d56Sopenharmony_ci # Test PyUnicode_AsWideChar() 2757db96d56Sopenharmony_ci @support.cpython_only 2767db96d56Sopenharmony_ci @unittest.skipIf(_testcapi is None, 'need _testcapi module') 2777db96d56Sopenharmony_ci def test_aswidechar(self): 2787db96d56Sopenharmony_ci from _testcapi import unicode_aswidechar 2797db96d56Sopenharmony_ci import_helper.import_module('ctypes') 2807db96d56Sopenharmony_ci from ctypes import c_wchar, sizeof 2817db96d56Sopenharmony_ci 2827db96d56Sopenharmony_ci wchar, size = unicode_aswidechar('abcdef', 2) 2837db96d56Sopenharmony_ci self.assertEqual(size, 2) 2847db96d56Sopenharmony_ci self.assertEqual(wchar, 'ab') 2857db96d56Sopenharmony_ci 2867db96d56Sopenharmony_ci wchar, size = unicode_aswidechar('abc', 3) 2877db96d56Sopenharmony_ci self.assertEqual(size, 3) 2887db96d56Sopenharmony_ci self.assertEqual(wchar, 'abc') 2897db96d56Sopenharmony_ci 2907db96d56Sopenharmony_ci wchar, size = unicode_aswidechar('abc', 4) 2917db96d56Sopenharmony_ci self.assertEqual(size, 3) 2927db96d56Sopenharmony_ci self.assertEqual(wchar, 'abc\0') 2937db96d56Sopenharmony_ci 2947db96d56Sopenharmony_ci wchar, size = unicode_aswidechar('abc', 10) 2957db96d56Sopenharmony_ci self.assertEqual(size, 3) 2967db96d56Sopenharmony_ci self.assertEqual(wchar, 'abc\0') 2977db96d56Sopenharmony_ci 2987db96d56Sopenharmony_ci wchar, size = unicode_aswidechar('abc\0def', 20) 2997db96d56Sopenharmony_ci self.assertEqual(size, 7) 3007db96d56Sopenharmony_ci self.assertEqual(wchar, 'abc\0def\0') 3017db96d56Sopenharmony_ci 3027db96d56Sopenharmony_ci nonbmp = chr(0x10ffff) 3037db96d56Sopenharmony_ci if sizeof(c_wchar) == 2: 3047db96d56Sopenharmony_ci buflen = 3 3057db96d56Sopenharmony_ci nchar = 2 3067db96d56Sopenharmony_ci else: # sizeof(c_wchar) == 4 3077db96d56Sopenharmony_ci buflen = 2 3087db96d56Sopenharmony_ci nchar = 1 3097db96d56Sopenharmony_ci wchar, size = unicode_aswidechar(nonbmp, buflen) 3107db96d56Sopenharmony_ci self.assertEqual(size, nchar) 3117db96d56Sopenharmony_ci self.assertEqual(wchar, nonbmp + '\0') 3127db96d56Sopenharmony_ci 3137db96d56Sopenharmony_ci # Test PyUnicode_AsWideCharString() 3147db96d56Sopenharmony_ci @support.cpython_only 3157db96d56Sopenharmony_ci @unittest.skipIf(_testcapi is None, 'need _testcapi module') 3167db96d56Sopenharmony_ci def test_aswidecharstring(self): 3177db96d56Sopenharmony_ci from _testcapi import unicode_aswidecharstring 3187db96d56Sopenharmony_ci import_helper.import_module('ctypes') 3197db96d56Sopenharmony_ci from ctypes import c_wchar, sizeof 3207db96d56Sopenharmony_ci 3217db96d56Sopenharmony_ci wchar, size = unicode_aswidecharstring('abc') 3227db96d56Sopenharmony_ci self.assertEqual(size, 3) 3237db96d56Sopenharmony_ci self.assertEqual(wchar, 'abc\0') 3247db96d56Sopenharmony_ci 3257db96d56Sopenharmony_ci wchar, size = unicode_aswidecharstring('abc\0def') 3267db96d56Sopenharmony_ci self.assertEqual(size, 7) 3277db96d56Sopenharmony_ci self.assertEqual(wchar, 'abc\0def\0') 3287db96d56Sopenharmony_ci 3297db96d56Sopenharmony_ci nonbmp = chr(0x10ffff) 3307db96d56Sopenharmony_ci if sizeof(c_wchar) == 2: 3317db96d56Sopenharmony_ci nchar = 2 3327db96d56Sopenharmony_ci else: # sizeof(c_wchar) == 4 3337db96d56Sopenharmony_ci nchar = 1 3347db96d56Sopenharmony_ci wchar, size = unicode_aswidecharstring(nonbmp) 3357db96d56Sopenharmony_ci self.assertEqual(size, nchar) 3367db96d56Sopenharmony_ci self.assertEqual(wchar, nonbmp + '\0') 3377db96d56Sopenharmony_ci 3387db96d56Sopenharmony_ci # Test PyUnicode_AsUCS4() 3397db96d56Sopenharmony_ci @support.cpython_only 3407db96d56Sopenharmony_ci @unittest.skipIf(_testcapi is None, 'need _testcapi module') 3417db96d56Sopenharmony_ci def test_asucs4(self): 3427db96d56Sopenharmony_ci from _testcapi import unicode_asucs4 3437db96d56Sopenharmony_ci for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', 3447db96d56Sopenharmony_ci 'a\ud800b\udfffc', '\ud834\udd1e']: 3457db96d56Sopenharmony_ci l = len(s) 3467db96d56Sopenharmony_ci self.assertEqual(unicode_asucs4(s, l, True), s+'\0') 3477db96d56Sopenharmony_ci self.assertEqual(unicode_asucs4(s, l, False), s+'\uffff') 3487db96d56Sopenharmony_ci self.assertEqual(unicode_asucs4(s, l+1, True), s+'\0\uffff') 3497db96d56Sopenharmony_ci self.assertEqual(unicode_asucs4(s, l+1, False), s+'\0\uffff') 3507db96d56Sopenharmony_ci self.assertRaises(SystemError, unicode_asucs4, s, l-1, True) 3517db96d56Sopenharmony_ci self.assertRaises(SystemError, unicode_asucs4, s, l-2, False) 3527db96d56Sopenharmony_ci s = '\0'.join([s, s]) 3537db96d56Sopenharmony_ci self.assertEqual(unicode_asucs4(s, len(s), True), s+'\0') 3547db96d56Sopenharmony_ci self.assertEqual(unicode_asucs4(s, len(s), False), s+'\uffff') 3557db96d56Sopenharmony_ci 3567db96d56Sopenharmony_ci # Test PyUnicode_AsUTF8() 3577db96d56Sopenharmony_ci @support.cpython_only 3587db96d56Sopenharmony_ci @unittest.skipIf(_testcapi is None, 'need _testcapi module') 3597db96d56Sopenharmony_ci def test_asutf8(self): 3607db96d56Sopenharmony_ci from _testcapi import unicode_asutf8 3617db96d56Sopenharmony_ci 3627db96d56Sopenharmony_ci bmp = '\u0100' 3637db96d56Sopenharmony_ci bmp2 = '\uffff' 3647db96d56Sopenharmony_ci nonbmp = chr(0x10ffff) 3657db96d56Sopenharmony_ci 3667db96d56Sopenharmony_ci self.assertEqual(unicode_asutf8(bmp), b'\xc4\x80') 3677db96d56Sopenharmony_ci self.assertEqual(unicode_asutf8(bmp2), b'\xef\xbf\xbf') 3687db96d56Sopenharmony_ci self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf') 3697db96d56Sopenharmony_ci self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc') 3707db96d56Sopenharmony_ci 3717db96d56Sopenharmony_ci # Test PyUnicode_AsUTF8AndSize() 3727db96d56Sopenharmony_ci @support.cpython_only 3737db96d56Sopenharmony_ci @unittest.skipIf(_testcapi is None, 'need _testcapi module') 3747db96d56Sopenharmony_ci def test_asutf8andsize(self): 3757db96d56Sopenharmony_ci from _testcapi import unicode_asutf8andsize 3767db96d56Sopenharmony_ci 3777db96d56Sopenharmony_ci bmp = '\u0100' 3787db96d56Sopenharmony_ci bmp2 = '\uffff' 3797db96d56Sopenharmony_ci nonbmp = chr(0x10ffff) 3807db96d56Sopenharmony_ci 3817db96d56Sopenharmony_ci self.assertEqual(unicode_asutf8andsize(bmp), (b'\xc4\x80', 2)) 3827db96d56Sopenharmony_ci self.assertEqual(unicode_asutf8andsize(bmp2), (b'\xef\xbf\xbf', 3)) 3837db96d56Sopenharmony_ci self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4)) 3847db96d56Sopenharmony_ci self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc') 3857db96d56Sopenharmony_ci 3867db96d56Sopenharmony_ci # Test PyUnicode_FindChar() 3877db96d56Sopenharmony_ci @support.cpython_only 3887db96d56Sopenharmony_ci @unittest.skipIf(_testcapi is None, 'need _testcapi module') 3897db96d56Sopenharmony_ci def test_findchar(self): 3907db96d56Sopenharmony_ci from _testcapi import unicode_findchar 3917db96d56Sopenharmony_ci 3927db96d56Sopenharmony_ci for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1": 3937db96d56Sopenharmony_ci for i, ch in enumerate(str): 3947db96d56Sopenharmony_ci self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), 1), i) 3957db96d56Sopenharmony_ci self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), -1), i) 3967db96d56Sopenharmony_ci 3977db96d56Sopenharmony_ci str = "!>_<!" 3987db96d56Sopenharmony_ci self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), 1), -1) 3997db96d56Sopenharmony_ci self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), -1), -1) 4007db96d56Sopenharmony_ci # start < end 4017db96d56Sopenharmony_ci self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, 1), 4) 4027db96d56Sopenharmony_ci self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, -1), 4) 4037db96d56Sopenharmony_ci # start >= end 4047db96d56Sopenharmony_ci self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, 1), -1) 4057db96d56Sopenharmony_ci self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, 1), -1) 4067db96d56Sopenharmony_ci # negative 4077db96d56Sopenharmony_ci self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0) 4087db96d56Sopenharmony_ci self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0) 4097db96d56Sopenharmony_ci 4107db96d56Sopenharmony_ci # Test PyUnicode_CopyCharacters() 4117db96d56Sopenharmony_ci @support.cpython_only 4127db96d56Sopenharmony_ci @unittest.skipIf(_testcapi is None, 'need _testcapi module') 4137db96d56Sopenharmony_ci def test_copycharacters(self): 4147db96d56Sopenharmony_ci from _testcapi import unicode_copycharacters 4157db96d56Sopenharmony_ci 4167db96d56Sopenharmony_ci strings = [ 4177db96d56Sopenharmony_ci 'abcde', '\xa1\xa2\xa3\xa4\xa5', 4187db96d56Sopenharmony_ci '\u4f60\u597d\u4e16\u754c\uff01', 4197db96d56Sopenharmony_ci '\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604' 4207db96d56Sopenharmony_ci ] 4217db96d56Sopenharmony_ci 4227db96d56Sopenharmony_ci for idx, from_ in enumerate(strings): 4237db96d56Sopenharmony_ci # wide -> narrow: exceed maxchar limitation 4247db96d56Sopenharmony_ci for to in strings[:idx]: 4257db96d56Sopenharmony_ci self.assertRaises( 4267db96d56Sopenharmony_ci SystemError, 4277db96d56Sopenharmony_ci unicode_copycharacters, to, 0, from_, 0, 5 4287db96d56Sopenharmony_ci ) 4297db96d56Sopenharmony_ci # same kind 4307db96d56Sopenharmony_ci for from_start in range(5): 4317db96d56Sopenharmony_ci self.assertEqual( 4327db96d56Sopenharmony_ci unicode_copycharacters(from_, 0, from_, from_start, 5), 4337db96d56Sopenharmony_ci (from_[from_start:from_start+5].ljust(5, '\0'), 4347db96d56Sopenharmony_ci 5-from_start) 4357db96d56Sopenharmony_ci ) 4367db96d56Sopenharmony_ci for to_start in range(5): 4377db96d56Sopenharmony_ci self.assertEqual( 4387db96d56Sopenharmony_ci unicode_copycharacters(from_, to_start, from_, to_start, 5), 4397db96d56Sopenharmony_ci (from_[to_start:to_start+5].rjust(5, '\0'), 4407db96d56Sopenharmony_ci 5-to_start) 4417db96d56Sopenharmony_ci ) 4427db96d56Sopenharmony_ci # narrow -> wide 4437db96d56Sopenharmony_ci # Tests omitted since this creates invalid strings. 4447db96d56Sopenharmony_ci 4457db96d56Sopenharmony_ci s = strings[0] 4467db96d56Sopenharmony_ci self.assertRaises(IndexError, unicode_copycharacters, s, 6, s, 0, 5) 4477db96d56Sopenharmony_ci self.assertRaises(IndexError, unicode_copycharacters, s, -1, s, 0, 5) 4487db96d56Sopenharmony_ci self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, 6, 5) 4497db96d56Sopenharmony_ci self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, -1, 5) 4507db96d56Sopenharmony_ci self.assertRaises(SystemError, unicode_copycharacters, s, 1, s, 0, 5) 4517db96d56Sopenharmony_ci self.assertRaises(SystemError, unicode_copycharacters, s, 0, s, 0, -1) 4527db96d56Sopenharmony_ci self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0) 4537db96d56Sopenharmony_ci 4547db96d56Sopenharmony_ci @support.cpython_only 4557db96d56Sopenharmony_ci @unittest.skipIf(_testcapi is None, 'need _testcapi module') 4567db96d56Sopenharmony_ci def test_pep393_utf8_caching_bug(self): 4577db96d56Sopenharmony_ci # Issue #25709: Problem with string concatenation and utf-8 cache 4587db96d56Sopenharmony_ci from _testcapi import getargs_s_hash 4597db96d56Sopenharmony_ci for k in 0x24, 0xa4, 0x20ac, 0x1f40d: 4607db96d56Sopenharmony_ci s = '' 4617db96d56Sopenharmony_ci for i in range(5): 4627db96d56Sopenharmony_ci # Due to CPython specific optimization the 's' string can be 4637db96d56Sopenharmony_ci # resized in-place. 4647db96d56Sopenharmony_ci s += chr(k) 4657db96d56Sopenharmony_ci # Parsing with the "s#" format code calls indirectly 4667db96d56Sopenharmony_ci # PyUnicode_AsUTF8AndSize() which creates the UTF-8 4677db96d56Sopenharmony_ci # encoded string cached in the Unicode object. 4687db96d56Sopenharmony_ci self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1)) 4697db96d56Sopenharmony_ci # Check that the second call returns the same result 4707db96d56Sopenharmony_ci self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1)) 4717db96d56Sopenharmony_ci 4727db96d56Sopenharmony_ci 4737db96d56Sopenharmony_ciif __name__ == "__main__": 4747db96d56Sopenharmony_ci unittest.main() 475