17db96d56Sopenharmony_ci"""
27db96d56Sopenharmony_ciTest the implementation of the PEP 540: the UTF-8 Mode.
37db96d56Sopenharmony_ci"""
47db96d56Sopenharmony_ci
57db96d56Sopenharmony_ciimport locale
67db96d56Sopenharmony_ciimport subprocess
77db96d56Sopenharmony_ciimport sys
87db96d56Sopenharmony_ciimport textwrap
97db96d56Sopenharmony_ciimport unittest
107db96d56Sopenharmony_cifrom test import support
117db96d56Sopenharmony_cifrom test.support.script_helper import assert_python_ok, assert_python_failure
127db96d56Sopenharmony_cifrom test.support import os_helper
137db96d56Sopenharmony_ci
147db96d56Sopenharmony_ci
157db96d56Sopenharmony_ciMS_WINDOWS = (sys.platform == 'win32')
167db96d56Sopenharmony_ciPOSIX_LOCALES = ('C', 'POSIX')
177db96d56Sopenharmony_ciVXWORKS = (sys.platform == "vxworks")
187db96d56Sopenharmony_ci
197db96d56Sopenharmony_ciclass UTF8ModeTests(unittest.TestCase):
207db96d56Sopenharmony_ci    DEFAULT_ENV = {
217db96d56Sopenharmony_ci        'PYTHONUTF8': '',
227db96d56Sopenharmony_ci        'PYTHONLEGACYWINDOWSFSENCODING': '',
237db96d56Sopenharmony_ci        'PYTHONCOERCECLOCALE': '0',
247db96d56Sopenharmony_ci    }
257db96d56Sopenharmony_ci
267db96d56Sopenharmony_ci    def posix_locale(self):
277db96d56Sopenharmony_ci        loc = locale.setlocale(locale.LC_CTYPE, None)
287db96d56Sopenharmony_ci        return (loc in POSIX_LOCALES)
297db96d56Sopenharmony_ci
307db96d56Sopenharmony_ci    def get_output(self, *args, failure=False, **kw):
317db96d56Sopenharmony_ci        kw = dict(self.DEFAULT_ENV, **kw)
327db96d56Sopenharmony_ci        if failure:
337db96d56Sopenharmony_ci            out = assert_python_failure(*args, **kw)
347db96d56Sopenharmony_ci            out = out[2]
357db96d56Sopenharmony_ci        else:
367db96d56Sopenharmony_ci            out = assert_python_ok(*args, **kw)
377db96d56Sopenharmony_ci            out = out[1]
387db96d56Sopenharmony_ci        return out.decode().rstrip("\n\r")
397db96d56Sopenharmony_ci
407db96d56Sopenharmony_ci    @unittest.skipIf(MS_WINDOWS, 'Windows has no POSIX locale')
417db96d56Sopenharmony_ci    def test_posix_locale(self):
427db96d56Sopenharmony_ci        code = 'import sys; print(sys.flags.utf8_mode)'
437db96d56Sopenharmony_ci
447db96d56Sopenharmony_ci        for loc in POSIX_LOCALES:
457db96d56Sopenharmony_ci            with self.subTest(LC_ALL=loc):
467db96d56Sopenharmony_ci                out = self.get_output('-c', code, LC_ALL=loc)
477db96d56Sopenharmony_ci                self.assertEqual(out, '1')
487db96d56Sopenharmony_ci
497db96d56Sopenharmony_ci    def test_xoption(self):
507db96d56Sopenharmony_ci        code = 'import sys; print(sys.flags.utf8_mode)'
517db96d56Sopenharmony_ci
527db96d56Sopenharmony_ci        out = self.get_output('-X', 'utf8', '-c', code)
537db96d56Sopenharmony_ci        self.assertEqual(out, '1')
547db96d56Sopenharmony_ci
557db96d56Sopenharmony_ci        # undocumented but accepted syntax: -X utf8=1
567db96d56Sopenharmony_ci        out = self.get_output('-X', 'utf8=1', '-c', code)
577db96d56Sopenharmony_ci        self.assertEqual(out, '1')
587db96d56Sopenharmony_ci
597db96d56Sopenharmony_ci        out = self.get_output('-X', 'utf8=0', '-c', code)
607db96d56Sopenharmony_ci        self.assertEqual(out, '0')
617db96d56Sopenharmony_ci
627db96d56Sopenharmony_ci        if MS_WINDOWS:
637db96d56Sopenharmony_ci            # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode
647db96d56Sopenharmony_ci            # and has the priority over -X utf8
657db96d56Sopenharmony_ci            out = self.get_output('-X', 'utf8', '-c', code,
667db96d56Sopenharmony_ci                                  PYTHONLEGACYWINDOWSFSENCODING='1')
677db96d56Sopenharmony_ci            self.assertEqual(out, '0')
687db96d56Sopenharmony_ci
697db96d56Sopenharmony_ci    def test_env_var(self):
707db96d56Sopenharmony_ci        code = 'import sys; print(sys.flags.utf8_mode)'
717db96d56Sopenharmony_ci
727db96d56Sopenharmony_ci        out = self.get_output('-c', code, PYTHONUTF8='1')
737db96d56Sopenharmony_ci        self.assertEqual(out, '1')
747db96d56Sopenharmony_ci
757db96d56Sopenharmony_ci        out = self.get_output('-c', code, PYTHONUTF8='0')
767db96d56Sopenharmony_ci        self.assertEqual(out, '0')
777db96d56Sopenharmony_ci
787db96d56Sopenharmony_ci        # -X utf8 has the priority over PYTHONUTF8
797db96d56Sopenharmony_ci        out = self.get_output('-X', 'utf8=0', '-c', code, PYTHONUTF8='1')
807db96d56Sopenharmony_ci        self.assertEqual(out, '0')
817db96d56Sopenharmony_ci
827db96d56Sopenharmony_ci        if MS_WINDOWS:
837db96d56Sopenharmony_ci            # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
847db96d56Sopenharmony_ci            # and has the priority over PYTHONUTF8
857db96d56Sopenharmony_ci            out = self.get_output('-X', 'utf8', '-c', code, PYTHONUTF8='1',
867db96d56Sopenharmony_ci                                  PYTHONLEGACYWINDOWSFSENCODING='1')
877db96d56Sopenharmony_ci            self.assertEqual(out, '0')
887db96d56Sopenharmony_ci
897db96d56Sopenharmony_ci        # Cannot test with the POSIX locale, since the POSIX locale enables
907db96d56Sopenharmony_ci        # the UTF-8 mode
917db96d56Sopenharmony_ci        if not self.posix_locale():
927db96d56Sopenharmony_ci            # PYTHONUTF8 should be ignored if -E is used
937db96d56Sopenharmony_ci            out = self.get_output('-E', '-c', code, PYTHONUTF8='1')
947db96d56Sopenharmony_ci            self.assertEqual(out, '0')
957db96d56Sopenharmony_ci
967db96d56Sopenharmony_ci        # invalid mode
977db96d56Sopenharmony_ci        out = self.get_output('-c', code, PYTHONUTF8='xxx', failure=True)
987db96d56Sopenharmony_ci        self.assertIn('invalid PYTHONUTF8 environment variable value',
997db96d56Sopenharmony_ci                      out.rstrip())
1007db96d56Sopenharmony_ci
1017db96d56Sopenharmony_ci    def test_filesystemencoding(self):
1027db96d56Sopenharmony_ci        code = textwrap.dedent('''
1037db96d56Sopenharmony_ci            import sys
1047db96d56Sopenharmony_ci            print("{}/{}".format(sys.getfilesystemencoding(),
1057db96d56Sopenharmony_ci                                 sys.getfilesystemencodeerrors()))
1067db96d56Sopenharmony_ci        ''')
1077db96d56Sopenharmony_ci
1087db96d56Sopenharmony_ci        if MS_WINDOWS:
1097db96d56Sopenharmony_ci            expected = 'utf-8/surrogatepass'
1107db96d56Sopenharmony_ci        else:
1117db96d56Sopenharmony_ci            expected = 'utf-8/surrogateescape'
1127db96d56Sopenharmony_ci
1137db96d56Sopenharmony_ci        out = self.get_output('-X', 'utf8', '-c', code)
1147db96d56Sopenharmony_ci        self.assertEqual(out, expected)
1157db96d56Sopenharmony_ci
1167db96d56Sopenharmony_ci        if MS_WINDOWS:
1177db96d56Sopenharmony_ci            # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
1187db96d56Sopenharmony_ci            # and has the priority over -X utf8 and PYTHONUTF8
1197db96d56Sopenharmony_ci            out = self.get_output('-X', 'utf8', '-c', code,
1207db96d56Sopenharmony_ci                                  PYTHONUTF8='strict',
1217db96d56Sopenharmony_ci                                  PYTHONLEGACYWINDOWSFSENCODING='1')
1227db96d56Sopenharmony_ci            self.assertEqual(out, 'mbcs/replace')
1237db96d56Sopenharmony_ci
1247db96d56Sopenharmony_ci    def test_stdio(self):
1257db96d56Sopenharmony_ci        code = textwrap.dedent('''
1267db96d56Sopenharmony_ci            import sys
1277db96d56Sopenharmony_ci            print(f"stdin: {sys.stdin.encoding}/{sys.stdin.errors}")
1287db96d56Sopenharmony_ci            print(f"stdout: {sys.stdout.encoding}/{sys.stdout.errors}")
1297db96d56Sopenharmony_ci            print(f"stderr: {sys.stderr.encoding}/{sys.stderr.errors}")
1307db96d56Sopenharmony_ci        ''')
1317db96d56Sopenharmony_ci
1327db96d56Sopenharmony_ci        out = self.get_output('-X', 'utf8', '-c', code,
1337db96d56Sopenharmony_ci                              PYTHONIOENCODING='')
1347db96d56Sopenharmony_ci        self.assertEqual(out.splitlines(),
1357db96d56Sopenharmony_ci                         ['stdin: utf-8/surrogateescape',
1367db96d56Sopenharmony_ci                          'stdout: utf-8/surrogateescape',
1377db96d56Sopenharmony_ci                          'stderr: utf-8/backslashreplace'])
1387db96d56Sopenharmony_ci
1397db96d56Sopenharmony_ci        # PYTHONIOENCODING has the priority over PYTHONUTF8
1407db96d56Sopenharmony_ci        out = self.get_output('-X', 'utf8', '-c', code,
1417db96d56Sopenharmony_ci                              PYTHONIOENCODING="latin1")
1427db96d56Sopenharmony_ci        self.assertEqual(out.splitlines(),
1437db96d56Sopenharmony_ci                         ['stdin: iso8859-1/strict',
1447db96d56Sopenharmony_ci                          'stdout: iso8859-1/strict',
1457db96d56Sopenharmony_ci                          'stderr: iso8859-1/backslashreplace'])
1467db96d56Sopenharmony_ci
1477db96d56Sopenharmony_ci        out = self.get_output('-X', 'utf8', '-c', code,
1487db96d56Sopenharmony_ci                              PYTHONIOENCODING=":namereplace")
1497db96d56Sopenharmony_ci        self.assertEqual(out.splitlines(),
1507db96d56Sopenharmony_ci                         ['stdin: utf-8/namereplace',
1517db96d56Sopenharmony_ci                          'stdout: utf-8/namereplace',
1527db96d56Sopenharmony_ci                          'stderr: utf-8/backslashreplace'])
1537db96d56Sopenharmony_ci
1547db96d56Sopenharmony_ci    def test_io(self):
1557db96d56Sopenharmony_ci        code = textwrap.dedent('''
1567db96d56Sopenharmony_ci            import sys
1577db96d56Sopenharmony_ci            filename = sys.argv[1]
1587db96d56Sopenharmony_ci            with open(filename) as fp:
1597db96d56Sopenharmony_ci                print(f"{fp.encoding}/{fp.errors}")
1607db96d56Sopenharmony_ci        ''')
1617db96d56Sopenharmony_ci        filename = __file__
1627db96d56Sopenharmony_ci
1637db96d56Sopenharmony_ci        out = self.get_output('-c', code, filename, PYTHONUTF8='1')
1647db96d56Sopenharmony_ci        self.assertEqual(out.lower(), 'utf-8/strict')
1657db96d56Sopenharmony_ci
1667db96d56Sopenharmony_ci    def _check_io_encoding(self, module, encoding=None, errors=None):
1677db96d56Sopenharmony_ci        filename = __file__
1687db96d56Sopenharmony_ci
1697db96d56Sopenharmony_ci        # Encoding explicitly set
1707db96d56Sopenharmony_ci        args = []
1717db96d56Sopenharmony_ci        if encoding:
1727db96d56Sopenharmony_ci            args.append(f'encoding={encoding!r}')
1737db96d56Sopenharmony_ci        if errors:
1747db96d56Sopenharmony_ci            args.append(f'errors={errors!r}')
1757db96d56Sopenharmony_ci        code = textwrap.dedent('''
1767db96d56Sopenharmony_ci            import sys
1777db96d56Sopenharmony_ci            from %s import open
1787db96d56Sopenharmony_ci            filename = sys.argv[1]
1797db96d56Sopenharmony_ci            with open(filename, %s) as fp:
1807db96d56Sopenharmony_ci                print(f"{fp.encoding}/{fp.errors}")
1817db96d56Sopenharmony_ci        ''') % (module, ', '.join(args))
1827db96d56Sopenharmony_ci        out = self.get_output('-c', code, filename,
1837db96d56Sopenharmony_ci                              PYTHONUTF8='1')
1847db96d56Sopenharmony_ci
1857db96d56Sopenharmony_ci        if not encoding:
1867db96d56Sopenharmony_ci            encoding = 'utf-8'
1877db96d56Sopenharmony_ci        if not errors:
1887db96d56Sopenharmony_ci            errors = 'strict'
1897db96d56Sopenharmony_ci        self.assertEqual(out.lower(), f'{encoding}/{errors}')
1907db96d56Sopenharmony_ci
1917db96d56Sopenharmony_ci    def check_io_encoding(self, module):
1927db96d56Sopenharmony_ci        self._check_io_encoding(module, encoding="latin1")
1937db96d56Sopenharmony_ci        self._check_io_encoding(module, errors="namereplace")
1947db96d56Sopenharmony_ci        self._check_io_encoding(module,
1957db96d56Sopenharmony_ci                                encoding="latin1", errors="namereplace")
1967db96d56Sopenharmony_ci
1977db96d56Sopenharmony_ci    def test_io_encoding(self):
1987db96d56Sopenharmony_ci        self.check_io_encoding('io')
1997db96d56Sopenharmony_ci
2007db96d56Sopenharmony_ci    def test_pyio_encoding(self):
2017db96d56Sopenharmony_ci        self.check_io_encoding('_pyio')
2027db96d56Sopenharmony_ci
2037db96d56Sopenharmony_ci    def test_locale_getpreferredencoding(self):
2047db96d56Sopenharmony_ci        code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))'
2057db96d56Sopenharmony_ci        out = self.get_output('-X', 'utf8', '-c', code)
2067db96d56Sopenharmony_ci        self.assertEqual(out, 'utf-8 utf-8')
2077db96d56Sopenharmony_ci
2087db96d56Sopenharmony_ci        for loc in POSIX_LOCALES:
2097db96d56Sopenharmony_ci            with self.subTest(LC_ALL=loc):
2107db96d56Sopenharmony_ci                out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
2117db96d56Sopenharmony_ci                self.assertEqual(out, 'utf-8 utf-8')
2127db96d56Sopenharmony_ci
2137db96d56Sopenharmony_ci    @unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
2147db96d56Sopenharmony_ci    def test_cmd_line(self):
2157db96d56Sopenharmony_ci        arg = 'h\xe9\u20ac'.encode('utf-8')
2167db96d56Sopenharmony_ci        arg_utf8 = arg.decode('utf-8')
2177db96d56Sopenharmony_ci        arg_ascii = arg.decode('ascii', 'surrogateescape')
2187db96d56Sopenharmony_ci        code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))'
2197db96d56Sopenharmony_ci
2207db96d56Sopenharmony_ci        def check(utf8_opt, expected, **kw):
2217db96d56Sopenharmony_ci            out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw)
2227db96d56Sopenharmony_ci            args = out.partition(':')[2].rstrip()
2237db96d56Sopenharmony_ci            self.assertEqual(args, ascii(expected), out)
2247db96d56Sopenharmony_ci
2257db96d56Sopenharmony_ci        check('utf8', [arg_utf8])
2267db96d56Sopenharmony_ci        for loc in POSIX_LOCALES:
2277db96d56Sopenharmony_ci            with self.subTest(LC_ALL=loc):
2287db96d56Sopenharmony_ci                check('utf8', [arg_utf8], LC_ALL=loc)
2297db96d56Sopenharmony_ci
2307db96d56Sopenharmony_ci        if sys.platform == 'darwin' or support.is_android or VXWORKS:
2317db96d56Sopenharmony_ci            c_arg = arg_utf8
2327db96d56Sopenharmony_ci        elif sys.platform.startswith("aix"):
2337db96d56Sopenharmony_ci            c_arg = arg.decode('iso-8859-1')
2347db96d56Sopenharmony_ci        else:
2357db96d56Sopenharmony_ci            c_arg = arg_ascii
2367db96d56Sopenharmony_ci        for loc in POSIX_LOCALES:
2377db96d56Sopenharmony_ci            with self.subTest(LC_ALL=loc):
2387db96d56Sopenharmony_ci                check('utf8=0', [c_arg], LC_ALL=loc)
2397db96d56Sopenharmony_ci
2407db96d56Sopenharmony_ci    def test_optim_level(self):
2417db96d56Sopenharmony_ci        # CPython: check that Py_Main() doesn't increment Py_OptimizeFlag
2427db96d56Sopenharmony_ci        # twice when -X utf8 requires to parse the configuration twice (when
2437db96d56Sopenharmony_ci        # the encoding changes after reading the configuration, the
2447db96d56Sopenharmony_ci        # configuration is read again with the new encoding).
2457db96d56Sopenharmony_ci        code = 'import sys; print(sys.flags.optimize)'
2467db96d56Sopenharmony_ci        out = self.get_output('-X', 'utf8', '-O', '-c', code)
2477db96d56Sopenharmony_ci        self.assertEqual(out, '1')
2487db96d56Sopenharmony_ci        out = self.get_output('-X', 'utf8', '-OO', '-c', code)
2497db96d56Sopenharmony_ci        self.assertEqual(out, '2')
2507db96d56Sopenharmony_ci
2517db96d56Sopenharmony_ci        code = 'import sys; print(sys.flags.ignore_environment)'
2527db96d56Sopenharmony_ci        out = self.get_output('-X', 'utf8', '-E', '-c', code)
2537db96d56Sopenharmony_ci        self.assertEqual(out, '1')
2547db96d56Sopenharmony_ci
2557db96d56Sopenharmony_ci    @unittest.skipIf(MS_WINDOWS,
2567db96d56Sopenharmony_ci                     "os.device_encoding() doesn't implement "
2577db96d56Sopenharmony_ci                     "the UTF-8 Mode on Windows")
2587db96d56Sopenharmony_ci    @support.requires_subprocess()
2597db96d56Sopenharmony_ci    def test_device_encoding(self):
2607db96d56Sopenharmony_ci        # Use stdout as TTY
2617db96d56Sopenharmony_ci        if not sys.stdout.isatty():
2627db96d56Sopenharmony_ci            self.skipTest("sys.stdout is not a TTY")
2637db96d56Sopenharmony_ci
2647db96d56Sopenharmony_ci        filename = 'out.txt'
2657db96d56Sopenharmony_ci        self.addCleanup(os_helper.unlink, filename)
2667db96d56Sopenharmony_ci
2677db96d56Sopenharmony_ci        code = (f'import os, sys; fd = sys.stdout.fileno(); '
2687db96d56Sopenharmony_ci                f'out = open({filename!r}, "w", encoding="utf-8"); '
2697db96d56Sopenharmony_ci                f'print(os.isatty(fd), os.device_encoding(fd), file=out); '
2707db96d56Sopenharmony_ci                f'out.close()')
2717db96d56Sopenharmony_ci        cmd = [sys.executable, '-X', 'utf8', '-c', code]
2727db96d56Sopenharmony_ci        # The stdout TTY is inherited to the child process
2737db96d56Sopenharmony_ci        proc = subprocess.run(cmd, text=True)
2747db96d56Sopenharmony_ci        self.assertEqual(proc.returncode, 0, proc)
2757db96d56Sopenharmony_ci
2767db96d56Sopenharmony_ci        # In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY
2777db96d56Sopenharmony_ci        with open(filename, encoding="utf8") as fp:
2787db96d56Sopenharmony_ci            out = fp.read().rstrip()
2797db96d56Sopenharmony_ci        self.assertEqual(out, 'True utf-8')
2807db96d56Sopenharmony_ci
2817db96d56Sopenharmony_ci
2827db96d56Sopenharmony_ciif __name__ == "__main__":
2837db96d56Sopenharmony_ci    unittest.main()
284