17db96d56Sopenharmony_ciimport unittest
27db96d56Sopenharmony_ciimport sys
37db96d56Sopenharmony_cifrom test import support
47db96d56Sopenharmony_cifrom test.support import import_helper
57db96d56Sopenharmony_ci
67db96d56Sopenharmony_citry:
77db96d56Sopenharmony_ci    import _testcapi
87db96d56Sopenharmony_ciexcept ImportError:
97db96d56Sopenharmony_ci    _testcapi = None
107db96d56Sopenharmony_ci
117db96d56Sopenharmony_ci
127db96d56Sopenharmony_ciclass CAPITest(unittest.TestCase):
137db96d56Sopenharmony_ci
147db96d56Sopenharmony_ci    # Test PyUnicode_FromFormat()
157db96d56Sopenharmony_ci    def test_from_format(self):
167db96d56Sopenharmony_ci        import_helper.import_module('ctypes')
177db96d56Sopenharmony_ci        from ctypes import (
187db96d56Sopenharmony_ci            c_char_p,
197db96d56Sopenharmony_ci            pythonapi, py_object, sizeof,
207db96d56Sopenharmony_ci            c_int, c_long, c_longlong, c_ssize_t,
217db96d56Sopenharmony_ci            c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
227db96d56Sopenharmony_ci        name = "PyUnicode_FromFormat"
237db96d56Sopenharmony_ci        _PyUnicode_FromFormat = getattr(pythonapi, name)
247db96d56Sopenharmony_ci        _PyUnicode_FromFormat.argtypes = (c_char_p,)
257db96d56Sopenharmony_ci        _PyUnicode_FromFormat.restype = py_object
267db96d56Sopenharmony_ci
277db96d56Sopenharmony_ci        def PyUnicode_FromFormat(format, *args):
287db96d56Sopenharmony_ci            cargs = tuple(
297db96d56Sopenharmony_ci                py_object(arg) if isinstance(arg, str) else arg
307db96d56Sopenharmony_ci                for arg in args)
317db96d56Sopenharmony_ci            return _PyUnicode_FromFormat(format, *cargs)
327db96d56Sopenharmony_ci
337db96d56Sopenharmony_ci        def check_format(expected, format, *args):
347db96d56Sopenharmony_ci            text = PyUnicode_FromFormat(format, *args)
357db96d56Sopenharmony_ci            self.assertEqual(expected, text)
367db96d56Sopenharmony_ci
377db96d56Sopenharmony_ci        # ascii format, non-ascii argument
387db96d56Sopenharmony_ci        check_format('ascii\x7f=unicode\xe9',
397db96d56Sopenharmony_ci                     b'ascii\x7f=%U', 'unicode\xe9')
407db96d56Sopenharmony_ci
417db96d56Sopenharmony_ci        # non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
427db96d56Sopenharmony_ci        # raises an error
437db96d56Sopenharmony_ci        self.assertRaisesRegex(ValueError,
447db96d56Sopenharmony_ci            r'^PyUnicode_FromFormatV\(\) expects an ASCII-encoded format '
457db96d56Sopenharmony_ci            'string, got a non-ASCII byte: 0xe9$',
467db96d56Sopenharmony_ci            PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
477db96d56Sopenharmony_ci
487db96d56Sopenharmony_ci        # test "%c"
497db96d56Sopenharmony_ci        check_format('\uabcd',
507db96d56Sopenharmony_ci                     b'%c', c_int(0xabcd))
517db96d56Sopenharmony_ci        check_format('\U0010ffff',
527db96d56Sopenharmony_ci                     b'%c', c_int(0x10ffff))
537db96d56Sopenharmony_ci        with self.assertRaises(OverflowError):
547db96d56Sopenharmony_ci            PyUnicode_FromFormat(b'%c', c_int(0x110000))
557db96d56Sopenharmony_ci        # Issue #18183
567db96d56Sopenharmony_ci        check_format('\U00010000\U00100000',
577db96d56Sopenharmony_ci                     b'%c%c', c_int(0x10000), c_int(0x100000))
587db96d56Sopenharmony_ci
597db96d56Sopenharmony_ci        # test "%"
607db96d56Sopenharmony_ci        check_format('%',
617db96d56Sopenharmony_ci                     b'%')
627db96d56Sopenharmony_ci        check_format('%',
637db96d56Sopenharmony_ci                     b'%%')
647db96d56Sopenharmony_ci        check_format('%s',
657db96d56Sopenharmony_ci                     b'%%s')
667db96d56Sopenharmony_ci        check_format('[%]',
677db96d56Sopenharmony_ci                     b'[%%]')
687db96d56Sopenharmony_ci        check_format('%abc',
697db96d56Sopenharmony_ci                     b'%%%s', b'abc')
707db96d56Sopenharmony_ci
717db96d56Sopenharmony_ci        # truncated string
727db96d56Sopenharmony_ci        check_format('abc',
737db96d56Sopenharmony_ci                     b'%.3s', b'abcdef')
747db96d56Sopenharmony_ci        check_format('abc[\ufffd',
757db96d56Sopenharmony_ci                     b'%.5s', 'abc[\u20ac]'.encode('utf8'))
767db96d56Sopenharmony_ci        check_format("'\\u20acABC'",
777db96d56Sopenharmony_ci                     b'%A', '\u20acABC')
787db96d56Sopenharmony_ci        check_format("'\\u20",
797db96d56Sopenharmony_ci                     b'%.5A', '\u20acABCDEF')
807db96d56Sopenharmony_ci        check_format("'\u20acABC'",
817db96d56Sopenharmony_ci                     b'%R', '\u20acABC')
827db96d56Sopenharmony_ci        check_format("'\u20acA",
837db96d56Sopenharmony_ci                     b'%.3R', '\u20acABCDEF')
847db96d56Sopenharmony_ci        check_format('\u20acAB',
857db96d56Sopenharmony_ci                     b'%.3S', '\u20acABCDEF')
867db96d56Sopenharmony_ci        check_format('\u20acAB',
877db96d56Sopenharmony_ci                     b'%.3U', '\u20acABCDEF')
887db96d56Sopenharmony_ci        check_format('\u20acAB',
897db96d56Sopenharmony_ci                     b'%.3V', '\u20acABCDEF', None)
907db96d56Sopenharmony_ci        check_format('abc[\ufffd',
917db96d56Sopenharmony_ci                     b'%.5V', None, 'abc[\u20ac]'.encode('utf8'))
927db96d56Sopenharmony_ci
937db96d56Sopenharmony_ci        # following tests comes from #7330
947db96d56Sopenharmony_ci        # test width modifier and precision modifier with %S
957db96d56Sopenharmony_ci        check_format("repr=  abc",
967db96d56Sopenharmony_ci                     b'repr=%5S', 'abc')
977db96d56Sopenharmony_ci        check_format("repr=ab",
987db96d56Sopenharmony_ci                     b'repr=%.2S', 'abc')
997db96d56Sopenharmony_ci        check_format("repr=   ab",
1007db96d56Sopenharmony_ci                     b'repr=%5.2S', 'abc')
1017db96d56Sopenharmony_ci
1027db96d56Sopenharmony_ci        # test width modifier and precision modifier with %R
1037db96d56Sopenharmony_ci        check_format("repr=   'abc'",
1047db96d56Sopenharmony_ci                     b'repr=%8R', 'abc')
1057db96d56Sopenharmony_ci        check_format("repr='ab",
1067db96d56Sopenharmony_ci                     b'repr=%.3R', 'abc')
1077db96d56Sopenharmony_ci        check_format("repr=  'ab",
1087db96d56Sopenharmony_ci                     b'repr=%5.3R', 'abc')
1097db96d56Sopenharmony_ci
1107db96d56Sopenharmony_ci        # test width modifier and precision modifier with %A
1117db96d56Sopenharmony_ci        check_format("repr=   'abc'",
1127db96d56Sopenharmony_ci                     b'repr=%8A', 'abc')
1137db96d56Sopenharmony_ci        check_format("repr='ab",
1147db96d56Sopenharmony_ci                     b'repr=%.3A', 'abc')
1157db96d56Sopenharmony_ci        check_format("repr=  'ab",
1167db96d56Sopenharmony_ci                     b'repr=%5.3A', 'abc')
1177db96d56Sopenharmony_ci
1187db96d56Sopenharmony_ci        # test width modifier and precision modifier with %s
1197db96d56Sopenharmony_ci        check_format("repr=  abc",
1207db96d56Sopenharmony_ci                     b'repr=%5s', b'abc')
1217db96d56Sopenharmony_ci        check_format("repr=ab",
1227db96d56Sopenharmony_ci                     b'repr=%.2s', b'abc')
1237db96d56Sopenharmony_ci        check_format("repr=   ab",
1247db96d56Sopenharmony_ci                     b'repr=%5.2s', b'abc')
1257db96d56Sopenharmony_ci
1267db96d56Sopenharmony_ci        # test width modifier and precision modifier with %U
1277db96d56Sopenharmony_ci        check_format("repr=  abc",
1287db96d56Sopenharmony_ci                     b'repr=%5U', 'abc')
1297db96d56Sopenharmony_ci        check_format("repr=ab",
1307db96d56Sopenharmony_ci                     b'repr=%.2U', 'abc')
1317db96d56Sopenharmony_ci        check_format("repr=   ab",
1327db96d56Sopenharmony_ci                     b'repr=%5.2U', 'abc')
1337db96d56Sopenharmony_ci
1347db96d56Sopenharmony_ci        # test width modifier and precision modifier with %V
1357db96d56Sopenharmony_ci        check_format("repr=  abc",
1367db96d56Sopenharmony_ci                     b'repr=%5V', 'abc', b'123')
1377db96d56Sopenharmony_ci        check_format("repr=ab",
1387db96d56Sopenharmony_ci                     b'repr=%.2V', 'abc', b'123')
1397db96d56Sopenharmony_ci        check_format("repr=   ab",
1407db96d56Sopenharmony_ci                     b'repr=%5.2V', 'abc', b'123')
1417db96d56Sopenharmony_ci        check_format("repr=  123",
1427db96d56Sopenharmony_ci                     b'repr=%5V', None, b'123')
1437db96d56Sopenharmony_ci        check_format("repr=12",
1447db96d56Sopenharmony_ci                     b'repr=%.2V', None, b'123')
1457db96d56Sopenharmony_ci        check_format("repr=   12",
1467db96d56Sopenharmony_ci                     b'repr=%5.2V', None, b'123')
1477db96d56Sopenharmony_ci
1487db96d56Sopenharmony_ci        # test integer formats (%i, %d, %u)
1497db96d56Sopenharmony_ci        check_format('010',
1507db96d56Sopenharmony_ci                     b'%03i', c_int(10))
1517db96d56Sopenharmony_ci        check_format('0010',
1527db96d56Sopenharmony_ci                     b'%0.4i', c_int(10))
1537db96d56Sopenharmony_ci        check_format('-123',
1547db96d56Sopenharmony_ci                     b'%i', c_int(-123))
1557db96d56Sopenharmony_ci        check_format('-123',
1567db96d56Sopenharmony_ci                     b'%li', c_long(-123))
1577db96d56Sopenharmony_ci        check_format('-123',
1587db96d56Sopenharmony_ci                     b'%lli', c_longlong(-123))
1597db96d56Sopenharmony_ci        check_format('-123',
1607db96d56Sopenharmony_ci                     b'%zi', c_ssize_t(-123))
1617db96d56Sopenharmony_ci
1627db96d56Sopenharmony_ci        check_format('-123',
1637db96d56Sopenharmony_ci                     b'%d', c_int(-123))
1647db96d56Sopenharmony_ci        check_format('-123',
1657db96d56Sopenharmony_ci                     b'%ld', c_long(-123))
1667db96d56Sopenharmony_ci        check_format('-123',
1677db96d56Sopenharmony_ci                     b'%lld', c_longlong(-123))
1687db96d56Sopenharmony_ci        check_format('-123',
1697db96d56Sopenharmony_ci                     b'%zd', c_ssize_t(-123))
1707db96d56Sopenharmony_ci
1717db96d56Sopenharmony_ci        check_format('123',
1727db96d56Sopenharmony_ci                     b'%u', c_uint(123))
1737db96d56Sopenharmony_ci        check_format('123',
1747db96d56Sopenharmony_ci                     b'%lu', c_ulong(123))
1757db96d56Sopenharmony_ci        check_format('123',
1767db96d56Sopenharmony_ci                     b'%llu', c_ulonglong(123))
1777db96d56Sopenharmony_ci        check_format('123',
1787db96d56Sopenharmony_ci                     b'%zu', c_size_t(123))
1797db96d56Sopenharmony_ci
1807db96d56Sopenharmony_ci        # test long output
1817db96d56Sopenharmony_ci        min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
1827db96d56Sopenharmony_ci        max_longlong = -min_longlong - 1
1837db96d56Sopenharmony_ci        check_format(str(min_longlong),
1847db96d56Sopenharmony_ci                     b'%lld', c_longlong(min_longlong))
1857db96d56Sopenharmony_ci        check_format(str(max_longlong),
1867db96d56Sopenharmony_ci                     b'%lld', c_longlong(max_longlong))
1877db96d56Sopenharmony_ci        max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
1887db96d56Sopenharmony_ci        check_format(str(max_ulonglong),
1897db96d56Sopenharmony_ci                     b'%llu', c_ulonglong(max_ulonglong))
1907db96d56Sopenharmony_ci        PyUnicode_FromFormat(b'%p', c_void_p(-1))
1917db96d56Sopenharmony_ci
1927db96d56Sopenharmony_ci        # test padding (width and/or precision)
1937db96d56Sopenharmony_ci        check_format('123'.rjust(10, '0'),
1947db96d56Sopenharmony_ci                     b'%010i', c_int(123))
1957db96d56Sopenharmony_ci        check_format('123'.rjust(100),
1967db96d56Sopenharmony_ci                     b'%100i', c_int(123))
1977db96d56Sopenharmony_ci        check_format('123'.rjust(100, '0'),
1987db96d56Sopenharmony_ci                     b'%.100i', c_int(123))
1997db96d56Sopenharmony_ci        check_format('123'.rjust(80, '0').rjust(100),
2007db96d56Sopenharmony_ci                     b'%100.80i', c_int(123))
2017db96d56Sopenharmony_ci
2027db96d56Sopenharmony_ci        check_format('123'.rjust(10, '0'),
2037db96d56Sopenharmony_ci                     b'%010u', c_uint(123))
2047db96d56Sopenharmony_ci        check_format('123'.rjust(100),
2057db96d56Sopenharmony_ci                     b'%100u', c_uint(123))
2067db96d56Sopenharmony_ci        check_format('123'.rjust(100, '0'),
2077db96d56Sopenharmony_ci                     b'%.100u', c_uint(123))
2087db96d56Sopenharmony_ci        check_format('123'.rjust(80, '0').rjust(100),
2097db96d56Sopenharmony_ci                     b'%100.80u', c_uint(123))
2107db96d56Sopenharmony_ci
2117db96d56Sopenharmony_ci        check_format('123'.rjust(10, '0'),
2127db96d56Sopenharmony_ci                     b'%010x', c_int(0x123))
2137db96d56Sopenharmony_ci        check_format('123'.rjust(100),
2147db96d56Sopenharmony_ci                     b'%100x', c_int(0x123))
2157db96d56Sopenharmony_ci        check_format('123'.rjust(100, '0'),
2167db96d56Sopenharmony_ci                     b'%.100x', c_int(0x123))
2177db96d56Sopenharmony_ci        check_format('123'.rjust(80, '0').rjust(100),
2187db96d56Sopenharmony_ci                     b'%100.80x', c_int(0x123))
2197db96d56Sopenharmony_ci
2207db96d56Sopenharmony_ci        # test %A
2217db96d56Sopenharmony_ci        check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
2227db96d56Sopenharmony_ci                     b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
2237db96d56Sopenharmony_ci
2247db96d56Sopenharmony_ci        # test %V
2257db96d56Sopenharmony_ci        check_format('repr=abc',
2267db96d56Sopenharmony_ci                     b'repr=%V', 'abc', b'xyz')
2277db96d56Sopenharmony_ci
2287db96d56Sopenharmony_ci        # test %p
2297db96d56Sopenharmony_ci        # We cannot test the exact result,
2307db96d56Sopenharmony_ci        # because it returns a hex representation of a C pointer,
2317db96d56Sopenharmony_ci        # which is going to be different each time. But, we can test the format.
2327db96d56Sopenharmony_ci        p_format_regex = r'^0x[a-zA-Z0-9]{3,}$'
2337db96d56Sopenharmony_ci        p_format1 = PyUnicode_FromFormat(b'%p', 'abc')
2347db96d56Sopenharmony_ci        self.assertIsInstance(p_format1, str)
2357db96d56Sopenharmony_ci        self.assertRegex(p_format1, p_format_regex)
2367db96d56Sopenharmony_ci
2377db96d56Sopenharmony_ci        p_format2 = PyUnicode_FromFormat(b'%p %p', '123456', b'xyz')
2387db96d56Sopenharmony_ci        self.assertIsInstance(p_format2, str)
2397db96d56Sopenharmony_ci        self.assertRegex(p_format2,
2407db96d56Sopenharmony_ci                         r'0x[a-zA-Z0-9]{3,} 0x[a-zA-Z0-9]{3,}')
2417db96d56Sopenharmony_ci
2427db96d56Sopenharmony_ci        # Extra args are ignored:
2437db96d56Sopenharmony_ci        p_format3 = PyUnicode_FromFormat(b'%p', '123456', None, b'xyz')
2447db96d56Sopenharmony_ci        self.assertIsInstance(p_format3, str)
2457db96d56Sopenharmony_ci        self.assertRegex(p_format3, p_format_regex)
2467db96d56Sopenharmony_ci
2477db96d56Sopenharmony_ci        # Test string decode from parameter of %s using utf-8.
2487db96d56Sopenharmony_ci        # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
2497db96d56Sopenharmony_ci        # '\u4eba\u6c11'
2507db96d56Sopenharmony_ci        check_format('repr=\u4eba\u6c11',
2517db96d56Sopenharmony_ci                     b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
2527db96d56Sopenharmony_ci
2537db96d56Sopenharmony_ci        #Test replace error handler.
2547db96d56Sopenharmony_ci        check_format('repr=abc\ufffd',
2557db96d56Sopenharmony_ci                     b'repr=%V', None, b'abc\xff')
2567db96d56Sopenharmony_ci
2577db96d56Sopenharmony_ci        # not supported: copy the raw format string. these tests are just here
2587db96d56Sopenharmony_ci        # to check for crashes and should not be considered as specifications
2597db96d56Sopenharmony_ci        check_format('%s',
2607db96d56Sopenharmony_ci                     b'%1%s', b'abc')
2617db96d56Sopenharmony_ci        check_format('%1abc',
2627db96d56Sopenharmony_ci                     b'%1abc')
2637db96d56Sopenharmony_ci        check_format('%+i',
2647db96d56Sopenharmony_ci                     b'%+i', c_int(10))
2657db96d56Sopenharmony_ci        check_format('%.%s',
2667db96d56Sopenharmony_ci                     b'%.%s', b'abc')
2677db96d56Sopenharmony_ci
2687db96d56Sopenharmony_ci        # Issue #33817: empty strings
2697db96d56Sopenharmony_ci        check_format('',
2707db96d56Sopenharmony_ci                     b'')
2717db96d56Sopenharmony_ci        check_format('',
2727db96d56Sopenharmony_ci                     b'%s', b'')
2737db96d56Sopenharmony_ci
2747db96d56Sopenharmony_ci    # Test PyUnicode_AsWideChar()
2757db96d56Sopenharmony_ci    @support.cpython_only
2767db96d56Sopenharmony_ci    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
2777db96d56Sopenharmony_ci    def test_aswidechar(self):
2787db96d56Sopenharmony_ci        from _testcapi import unicode_aswidechar
2797db96d56Sopenharmony_ci        import_helper.import_module('ctypes')
2807db96d56Sopenharmony_ci        from ctypes import c_wchar, sizeof
2817db96d56Sopenharmony_ci
2827db96d56Sopenharmony_ci        wchar, size = unicode_aswidechar('abcdef', 2)
2837db96d56Sopenharmony_ci        self.assertEqual(size, 2)
2847db96d56Sopenharmony_ci        self.assertEqual(wchar, 'ab')
2857db96d56Sopenharmony_ci
2867db96d56Sopenharmony_ci        wchar, size = unicode_aswidechar('abc', 3)
2877db96d56Sopenharmony_ci        self.assertEqual(size, 3)
2887db96d56Sopenharmony_ci        self.assertEqual(wchar, 'abc')
2897db96d56Sopenharmony_ci
2907db96d56Sopenharmony_ci        wchar, size = unicode_aswidechar('abc', 4)
2917db96d56Sopenharmony_ci        self.assertEqual(size, 3)
2927db96d56Sopenharmony_ci        self.assertEqual(wchar, 'abc\0')
2937db96d56Sopenharmony_ci
2947db96d56Sopenharmony_ci        wchar, size = unicode_aswidechar('abc', 10)
2957db96d56Sopenharmony_ci        self.assertEqual(size, 3)
2967db96d56Sopenharmony_ci        self.assertEqual(wchar, 'abc\0')
2977db96d56Sopenharmony_ci
2987db96d56Sopenharmony_ci        wchar, size = unicode_aswidechar('abc\0def', 20)
2997db96d56Sopenharmony_ci        self.assertEqual(size, 7)
3007db96d56Sopenharmony_ci        self.assertEqual(wchar, 'abc\0def\0')
3017db96d56Sopenharmony_ci
3027db96d56Sopenharmony_ci        nonbmp = chr(0x10ffff)
3037db96d56Sopenharmony_ci        if sizeof(c_wchar) == 2:
3047db96d56Sopenharmony_ci            buflen = 3
3057db96d56Sopenharmony_ci            nchar = 2
3067db96d56Sopenharmony_ci        else: # sizeof(c_wchar) == 4
3077db96d56Sopenharmony_ci            buflen = 2
3087db96d56Sopenharmony_ci            nchar = 1
3097db96d56Sopenharmony_ci        wchar, size = unicode_aswidechar(nonbmp, buflen)
3107db96d56Sopenharmony_ci        self.assertEqual(size, nchar)
3117db96d56Sopenharmony_ci        self.assertEqual(wchar, nonbmp + '\0')
3127db96d56Sopenharmony_ci
3137db96d56Sopenharmony_ci    # Test PyUnicode_AsWideCharString()
3147db96d56Sopenharmony_ci    @support.cpython_only
3157db96d56Sopenharmony_ci    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
3167db96d56Sopenharmony_ci    def test_aswidecharstring(self):
3177db96d56Sopenharmony_ci        from _testcapi import unicode_aswidecharstring
3187db96d56Sopenharmony_ci        import_helper.import_module('ctypes')
3197db96d56Sopenharmony_ci        from ctypes import c_wchar, sizeof
3207db96d56Sopenharmony_ci
3217db96d56Sopenharmony_ci        wchar, size = unicode_aswidecharstring('abc')
3227db96d56Sopenharmony_ci        self.assertEqual(size, 3)
3237db96d56Sopenharmony_ci        self.assertEqual(wchar, 'abc\0')
3247db96d56Sopenharmony_ci
3257db96d56Sopenharmony_ci        wchar, size = unicode_aswidecharstring('abc\0def')
3267db96d56Sopenharmony_ci        self.assertEqual(size, 7)
3277db96d56Sopenharmony_ci        self.assertEqual(wchar, 'abc\0def\0')
3287db96d56Sopenharmony_ci
3297db96d56Sopenharmony_ci        nonbmp = chr(0x10ffff)
3307db96d56Sopenharmony_ci        if sizeof(c_wchar) == 2:
3317db96d56Sopenharmony_ci            nchar = 2
3327db96d56Sopenharmony_ci        else: # sizeof(c_wchar) == 4
3337db96d56Sopenharmony_ci            nchar = 1
3347db96d56Sopenharmony_ci        wchar, size = unicode_aswidecharstring(nonbmp)
3357db96d56Sopenharmony_ci        self.assertEqual(size, nchar)
3367db96d56Sopenharmony_ci        self.assertEqual(wchar, nonbmp + '\0')
3377db96d56Sopenharmony_ci
3387db96d56Sopenharmony_ci    # Test PyUnicode_AsUCS4()
3397db96d56Sopenharmony_ci    @support.cpython_only
3407db96d56Sopenharmony_ci    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
3417db96d56Sopenharmony_ci    def test_asucs4(self):
3427db96d56Sopenharmony_ci        from _testcapi import unicode_asucs4
3437db96d56Sopenharmony_ci        for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
3447db96d56Sopenharmony_ci                  'a\ud800b\udfffc', '\ud834\udd1e']:
3457db96d56Sopenharmony_ci            l = len(s)
3467db96d56Sopenharmony_ci            self.assertEqual(unicode_asucs4(s, l, True), s+'\0')
3477db96d56Sopenharmony_ci            self.assertEqual(unicode_asucs4(s, l, False), s+'\uffff')
3487db96d56Sopenharmony_ci            self.assertEqual(unicode_asucs4(s, l+1, True), s+'\0\uffff')
3497db96d56Sopenharmony_ci            self.assertEqual(unicode_asucs4(s, l+1, False), s+'\0\uffff')
3507db96d56Sopenharmony_ci            self.assertRaises(SystemError, unicode_asucs4, s, l-1, True)
3517db96d56Sopenharmony_ci            self.assertRaises(SystemError, unicode_asucs4, s, l-2, False)
3527db96d56Sopenharmony_ci            s = '\0'.join([s, s])
3537db96d56Sopenharmony_ci            self.assertEqual(unicode_asucs4(s, len(s), True), s+'\0')
3547db96d56Sopenharmony_ci            self.assertEqual(unicode_asucs4(s, len(s), False), s+'\uffff')
3557db96d56Sopenharmony_ci
3567db96d56Sopenharmony_ci    # Test PyUnicode_AsUTF8()
3577db96d56Sopenharmony_ci    @support.cpython_only
3587db96d56Sopenharmony_ci    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
3597db96d56Sopenharmony_ci    def test_asutf8(self):
3607db96d56Sopenharmony_ci        from _testcapi import unicode_asutf8
3617db96d56Sopenharmony_ci
3627db96d56Sopenharmony_ci        bmp = '\u0100'
3637db96d56Sopenharmony_ci        bmp2 = '\uffff'
3647db96d56Sopenharmony_ci        nonbmp = chr(0x10ffff)
3657db96d56Sopenharmony_ci
3667db96d56Sopenharmony_ci        self.assertEqual(unicode_asutf8(bmp), b'\xc4\x80')
3677db96d56Sopenharmony_ci        self.assertEqual(unicode_asutf8(bmp2), b'\xef\xbf\xbf')
3687db96d56Sopenharmony_ci        self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf')
3697db96d56Sopenharmony_ci        self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc')
3707db96d56Sopenharmony_ci
3717db96d56Sopenharmony_ci    # Test PyUnicode_AsUTF8AndSize()
3727db96d56Sopenharmony_ci    @support.cpython_only
3737db96d56Sopenharmony_ci    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
3747db96d56Sopenharmony_ci    def test_asutf8andsize(self):
3757db96d56Sopenharmony_ci        from _testcapi import unicode_asutf8andsize
3767db96d56Sopenharmony_ci
3777db96d56Sopenharmony_ci        bmp = '\u0100'
3787db96d56Sopenharmony_ci        bmp2 = '\uffff'
3797db96d56Sopenharmony_ci        nonbmp = chr(0x10ffff)
3807db96d56Sopenharmony_ci
3817db96d56Sopenharmony_ci        self.assertEqual(unicode_asutf8andsize(bmp), (b'\xc4\x80', 2))
3827db96d56Sopenharmony_ci        self.assertEqual(unicode_asutf8andsize(bmp2), (b'\xef\xbf\xbf', 3))
3837db96d56Sopenharmony_ci        self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
3847db96d56Sopenharmony_ci        self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')
3857db96d56Sopenharmony_ci
3867db96d56Sopenharmony_ci    # Test PyUnicode_FindChar()
3877db96d56Sopenharmony_ci    @support.cpython_only
3887db96d56Sopenharmony_ci    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
3897db96d56Sopenharmony_ci    def test_findchar(self):
3907db96d56Sopenharmony_ci        from _testcapi import unicode_findchar
3917db96d56Sopenharmony_ci
3927db96d56Sopenharmony_ci        for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
3937db96d56Sopenharmony_ci            for i, ch in enumerate(str):
3947db96d56Sopenharmony_ci                self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), 1), i)
3957db96d56Sopenharmony_ci                self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), -1), i)
3967db96d56Sopenharmony_ci
3977db96d56Sopenharmony_ci        str = "!>_<!"
3987db96d56Sopenharmony_ci        self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), 1), -1)
3997db96d56Sopenharmony_ci        self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), -1), -1)
4007db96d56Sopenharmony_ci        # start < end
4017db96d56Sopenharmony_ci        self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, 1), 4)
4027db96d56Sopenharmony_ci        self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, -1), 4)
4037db96d56Sopenharmony_ci        # start >= end
4047db96d56Sopenharmony_ci        self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, 1), -1)
4057db96d56Sopenharmony_ci        self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, 1), -1)
4067db96d56Sopenharmony_ci        # negative
4077db96d56Sopenharmony_ci        self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0)
4087db96d56Sopenharmony_ci        self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0)
4097db96d56Sopenharmony_ci
4107db96d56Sopenharmony_ci    # Test PyUnicode_CopyCharacters()
4117db96d56Sopenharmony_ci    @support.cpython_only
4127db96d56Sopenharmony_ci    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
4137db96d56Sopenharmony_ci    def test_copycharacters(self):
4147db96d56Sopenharmony_ci        from _testcapi import unicode_copycharacters
4157db96d56Sopenharmony_ci
4167db96d56Sopenharmony_ci        strings = [
4177db96d56Sopenharmony_ci            'abcde', '\xa1\xa2\xa3\xa4\xa5',
4187db96d56Sopenharmony_ci            '\u4f60\u597d\u4e16\u754c\uff01',
4197db96d56Sopenharmony_ci            '\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'
4207db96d56Sopenharmony_ci        ]
4217db96d56Sopenharmony_ci
4227db96d56Sopenharmony_ci        for idx, from_ in enumerate(strings):
4237db96d56Sopenharmony_ci            # wide -> narrow: exceed maxchar limitation
4247db96d56Sopenharmony_ci            for to in strings[:idx]:
4257db96d56Sopenharmony_ci                self.assertRaises(
4267db96d56Sopenharmony_ci                    SystemError,
4277db96d56Sopenharmony_ci                    unicode_copycharacters, to, 0, from_, 0, 5
4287db96d56Sopenharmony_ci                )
4297db96d56Sopenharmony_ci            # same kind
4307db96d56Sopenharmony_ci            for from_start in range(5):
4317db96d56Sopenharmony_ci                self.assertEqual(
4327db96d56Sopenharmony_ci                    unicode_copycharacters(from_, 0, from_, from_start, 5),
4337db96d56Sopenharmony_ci                    (from_[from_start:from_start+5].ljust(5, '\0'),
4347db96d56Sopenharmony_ci                     5-from_start)
4357db96d56Sopenharmony_ci                )
4367db96d56Sopenharmony_ci            for to_start in range(5):
4377db96d56Sopenharmony_ci                self.assertEqual(
4387db96d56Sopenharmony_ci                    unicode_copycharacters(from_, to_start, from_, to_start, 5),
4397db96d56Sopenharmony_ci                    (from_[to_start:to_start+5].rjust(5, '\0'),
4407db96d56Sopenharmony_ci                     5-to_start)
4417db96d56Sopenharmony_ci                )
4427db96d56Sopenharmony_ci            # narrow -> wide
4437db96d56Sopenharmony_ci            # Tests omitted since this creates invalid strings.
4447db96d56Sopenharmony_ci
4457db96d56Sopenharmony_ci        s = strings[0]
4467db96d56Sopenharmony_ci        self.assertRaises(IndexError, unicode_copycharacters, s, 6, s, 0, 5)
4477db96d56Sopenharmony_ci        self.assertRaises(IndexError, unicode_copycharacters, s, -1, s, 0, 5)
4487db96d56Sopenharmony_ci        self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, 6, 5)
4497db96d56Sopenharmony_ci        self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, -1, 5)
4507db96d56Sopenharmony_ci        self.assertRaises(SystemError, unicode_copycharacters, s, 1, s, 0, 5)
4517db96d56Sopenharmony_ci        self.assertRaises(SystemError, unicode_copycharacters, s, 0, s, 0, -1)
4527db96d56Sopenharmony_ci        self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0)
4537db96d56Sopenharmony_ci
4547db96d56Sopenharmony_ci    @support.cpython_only
4557db96d56Sopenharmony_ci    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
4567db96d56Sopenharmony_ci    def test_pep393_utf8_caching_bug(self):
4577db96d56Sopenharmony_ci        # Issue #25709: Problem with string concatenation and utf-8 cache
4587db96d56Sopenharmony_ci        from _testcapi import getargs_s_hash
4597db96d56Sopenharmony_ci        for k in 0x24, 0xa4, 0x20ac, 0x1f40d:
4607db96d56Sopenharmony_ci            s = ''
4617db96d56Sopenharmony_ci            for i in range(5):
4627db96d56Sopenharmony_ci                # Due to CPython specific optimization the 's' string can be
4637db96d56Sopenharmony_ci                # resized in-place.
4647db96d56Sopenharmony_ci                s += chr(k)
4657db96d56Sopenharmony_ci                # Parsing with the "s#" format code calls indirectly
4667db96d56Sopenharmony_ci                # PyUnicode_AsUTF8AndSize() which creates the UTF-8
4677db96d56Sopenharmony_ci                # encoded string cached in the Unicode object.
4687db96d56Sopenharmony_ci                self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
4697db96d56Sopenharmony_ci                # Check that the second call returns the same result
4707db96d56Sopenharmony_ci                self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
4717db96d56Sopenharmony_ci
4727db96d56Sopenharmony_ci
4737db96d56Sopenharmony_ciif __name__ == "__main__":
4747db96d56Sopenharmony_ci    unittest.main()
475