17db96d56Sopenharmony_ciimport codecs
27db96d56Sopenharmony_ciimport html.entities
37db96d56Sopenharmony_ciimport itertools
47db96d56Sopenharmony_ciimport sys
57db96d56Sopenharmony_ciimport unicodedata
67db96d56Sopenharmony_ciimport unittest
77db96d56Sopenharmony_ci
87db96d56Sopenharmony_ci
97db96d56Sopenharmony_ciclass PosReturn:
107db96d56Sopenharmony_ci    # this can be used for configurable callbacks
117db96d56Sopenharmony_ci
127db96d56Sopenharmony_ci    def __init__(self):
137db96d56Sopenharmony_ci        self.pos = 0
147db96d56Sopenharmony_ci
157db96d56Sopenharmony_ci    def handle(self, exc):
167db96d56Sopenharmony_ci        oldpos = self.pos
177db96d56Sopenharmony_ci        realpos = oldpos
187db96d56Sopenharmony_ci        if realpos<0:
197db96d56Sopenharmony_ci            realpos = len(exc.object) + realpos
207db96d56Sopenharmony_ci        # if we don't advance this time, terminate on the next call
217db96d56Sopenharmony_ci        # otherwise we'd get an endless loop
227db96d56Sopenharmony_ci        if realpos <= exc.start:
237db96d56Sopenharmony_ci            self.pos = len(exc.object)
247db96d56Sopenharmony_ci        return ("<?>", oldpos)
257db96d56Sopenharmony_ci
267db96d56Sopenharmony_ciclass RepeatedPosReturn:
277db96d56Sopenharmony_ci    def __init__(self, repl="<?>"):
287db96d56Sopenharmony_ci        self.repl = repl
297db96d56Sopenharmony_ci        self.pos = 0
307db96d56Sopenharmony_ci        self.count = 0
317db96d56Sopenharmony_ci
327db96d56Sopenharmony_ci    def handle(self, exc):
337db96d56Sopenharmony_ci        if self.count > 0:
347db96d56Sopenharmony_ci            self.count -= 1
357db96d56Sopenharmony_ci            return (self.repl, self.pos)
367db96d56Sopenharmony_ci        return (self.repl, exc.end)
377db96d56Sopenharmony_ci
387db96d56Sopenharmony_ci# A UnicodeEncodeError object with a bad start attribute
397db96d56Sopenharmony_ciclass BadStartUnicodeEncodeError(UnicodeEncodeError):
407db96d56Sopenharmony_ci    def __init__(self):
417db96d56Sopenharmony_ci        UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad")
427db96d56Sopenharmony_ci        self.start = []
437db96d56Sopenharmony_ci
447db96d56Sopenharmony_ci# A UnicodeEncodeError object with a bad object attribute
457db96d56Sopenharmony_ciclass BadObjectUnicodeEncodeError(UnicodeEncodeError):
467db96d56Sopenharmony_ci    def __init__(self):
477db96d56Sopenharmony_ci        UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad")
487db96d56Sopenharmony_ci        self.object = []
497db96d56Sopenharmony_ci
507db96d56Sopenharmony_ci# A UnicodeDecodeError object without an end attribute
517db96d56Sopenharmony_ciclass NoEndUnicodeDecodeError(UnicodeDecodeError):
527db96d56Sopenharmony_ci    def __init__(self):
537db96d56Sopenharmony_ci        UnicodeDecodeError.__init__(self, "ascii", bytearray(b""), 0, 1, "bad")
547db96d56Sopenharmony_ci        del self.end
557db96d56Sopenharmony_ci
567db96d56Sopenharmony_ci# A UnicodeDecodeError object with a bad object attribute
577db96d56Sopenharmony_ciclass BadObjectUnicodeDecodeError(UnicodeDecodeError):
587db96d56Sopenharmony_ci    def __init__(self):
597db96d56Sopenharmony_ci        UnicodeDecodeError.__init__(self, "ascii", bytearray(b""), 0, 1, "bad")
607db96d56Sopenharmony_ci        self.object = []
617db96d56Sopenharmony_ci
627db96d56Sopenharmony_ci# A UnicodeTranslateError object without a start attribute
637db96d56Sopenharmony_ciclass NoStartUnicodeTranslateError(UnicodeTranslateError):
647db96d56Sopenharmony_ci    def __init__(self):
657db96d56Sopenharmony_ci        UnicodeTranslateError.__init__(self, "", 0, 1, "bad")
667db96d56Sopenharmony_ci        del self.start
677db96d56Sopenharmony_ci
687db96d56Sopenharmony_ci# A UnicodeTranslateError object without an end attribute
697db96d56Sopenharmony_ciclass NoEndUnicodeTranslateError(UnicodeTranslateError):
707db96d56Sopenharmony_ci    def __init__(self):
717db96d56Sopenharmony_ci        UnicodeTranslateError.__init__(self,  "", 0, 1, "bad")
727db96d56Sopenharmony_ci        del self.end
737db96d56Sopenharmony_ci
747db96d56Sopenharmony_ci# A UnicodeTranslateError object without an object attribute
757db96d56Sopenharmony_ciclass NoObjectUnicodeTranslateError(UnicodeTranslateError):
767db96d56Sopenharmony_ci    def __init__(self):
777db96d56Sopenharmony_ci        UnicodeTranslateError.__init__(self, "", 0, 1, "bad")
787db96d56Sopenharmony_ci        del self.object
797db96d56Sopenharmony_ci
807db96d56Sopenharmony_ciclass CodecCallbackTest(unittest.TestCase):
817db96d56Sopenharmony_ci
827db96d56Sopenharmony_ci    def test_xmlcharrefreplace(self):
837db96d56Sopenharmony_ci        # replace unencodable characters which numeric character entities.
847db96d56Sopenharmony_ci        # For ascii, latin-1 and charmaps this is completely implemented
857db96d56Sopenharmony_ci        # in C and should be reasonably fast.
867db96d56Sopenharmony_ci        s = "\u30b9\u30d1\u30e2 \xe4nd eggs"
877db96d56Sopenharmony_ci        self.assertEqual(
887db96d56Sopenharmony_ci            s.encode("ascii", "xmlcharrefreplace"),
897db96d56Sopenharmony_ci            b"&#12473;&#12497;&#12514; &#228;nd eggs"
907db96d56Sopenharmony_ci        )
917db96d56Sopenharmony_ci        self.assertEqual(
927db96d56Sopenharmony_ci            s.encode("latin-1", "xmlcharrefreplace"),
937db96d56Sopenharmony_ci            b"&#12473;&#12497;&#12514; \xe4nd eggs"
947db96d56Sopenharmony_ci        )
957db96d56Sopenharmony_ci
967db96d56Sopenharmony_ci    def test_xmlcharnamereplace(self):
977db96d56Sopenharmony_ci        # This time use a named character entity for unencodable
987db96d56Sopenharmony_ci        # characters, if one is available.
997db96d56Sopenharmony_ci
1007db96d56Sopenharmony_ci        def xmlcharnamereplace(exc):
1017db96d56Sopenharmony_ci            if not isinstance(exc, UnicodeEncodeError):
1027db96d56Sopenharmony_ci                raise TypeError("don't know how to handle %r" % exc)
1037db96d56Sopenharmony_ci            l = []
1047db96d56Sopenharmony_ci            for c in exc.object[exc.start:exc.end]:
1057db96d56Sopenharmony_ci                try:
1067db96d56Sopenharmony_ci                    l.append("&%s;" % html.entities.codepoint2name[ord(c)])
1077db96d56Sopenharmony_ci                except KeyError:
1087db96d56Sopenharmony_ci                    l.append("&#%d;" % ord(c))
1097db96d56Sopenharmony_ci            return ("".join(l), exc.end)
1107db96d56Sopenharmony_ci
1117db96d56Sopenharmony_ci        codecs.register_error(
1127db96d56Sopenharmony_ci            "test.xmlcharnamereplace", xmlcharnamereplace)
1137db96d56Sopenharmony_ci
1147db96d56Sopenharmony_ci        sin = "\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
1157db96d56Sopenharmony_ci        sout = b"&laquo;&real;&raquo; = &lang;&#4660;&euro;&rang;"
1167db96d56Sopenharmony_ci        self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
1177db96d56Sopenharmony_ci        sout = b"\xab&real;\xbb = &lang;&#4660;&euro;&rang;"
1187db96d56Sopenharmony_ci        self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout)
1197db96d56Sopenharmony_ci        sout = b"\xab&real;\xbb = &lang;&#4660;\xa4&rang;"
1207db96d56Sopenharmony_ci        self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
1217db96d56Sopenharmony_ci
1227db96d56Sopenharmony_ci    def test_uninamereplace(self):
1237db96d56Sopenharmony_ci        # We're using the names from the unicode database this time,
1247db96d56Sopenharmony_ci        # and we're doing "syntax highlighting" here, i.e. we include
1257db96d56Sopenharmony_ci        # the replaced text in ANSI escape sequences. For this it is
1267db96d56Sopenharmony_ci        # useful that the error handler is not called for every single
1277db96d56Sopenharmony_ci        # unencodable character, but for a complete sequence of
1287db96d56Sopenharmony_ci        # unencodable characters, otherwise we would output many
1297db96d56Sopenharmony_ci        # unnecessary escape sequences.
1307db96d56Sopenharmony_ci
1317db96d56Sopenharmony_ci        def uninamereplace(exc):
1327db96d56Sopenharmony_ci            if not isinstance(exc, UnicodeEncodeError):
1337db96d56Sopenharmony_ci                raise TypeError("don't know how to handle %r" % exc)
1347db96d56Sopenharmony_ci            l = []
1357db96d56Sopenharmony_ci            for c in exc.object[exc.start:exc.end]:
1367db96d56Sopenharmony_ci                l.append(unicodedata.name(c, "0x%x" % ord(c)))
1377db96d56Sopenharmony_ci            return ("\033[1m%s\033[0m" % ", ".join(l), exc.end)
1387db96d56Sopenharmony_ci
1397db96d56Sopenharmony_ci        codecs.register_error(
1407db96d56Sopenharmony_ci            "test.uninamereplace", uninamereplace)
1417db96d56Sopenharmony_ci
1427db96d56Sopenharmony_ci        sin = "\xac\u1234\u20ac\u8000"
1437db96d56Sopenharmony_ci        sout = b"\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
1447db96d56Sopenharmony_ci        self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
1457db96d56Sopenharmony_ci
1467db96d56Sopenharmony_ci        sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
1477db96d56Sopenharmony_ci        self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout)
1487db96d56Sopenharmony_ci
1497db96d56Sopenharmony_ci        sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
1507db96d56Sopenharmony_ci        self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
1517db96d56Sopenharmony_ci
1527db96d56Sopenharmony_ci    def test_backslashescape(self):
1537db96d56Sopenharmony_ci        # Does the same as the "unicode-escape" encoding, but with different
1547db96d56Sopenharmony_ci        # base encodings.
1557db96d56Sopenharmony_ci        sin = "a\xac\u1234\u20ac\u8000\U0010ffff"
1567db96d56Sopenharmony_ci        sout = b"a\\xac\\u1234\\u20ac\\u8000\\U0010ffff"
1577db96d56Sopenharmony_ci        self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
1587db96d56Sopenharmony_ci
1597db96d56Sopenharmony_ci        sout = b"a\xac\\u1234\\u20ac\\u8000\\U0010ffff"
1607db96d56Sopenharmony_ci        self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
1617db96d56Sopenharmony_ci
1627db96d56Sopenharmony_ci        sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff"
1637db96d56Sopenharmony_ci        self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
1647db96d56Sopenharmony_ci
1657db96d56Sopenharmony_ci    def test_nameescape(self):
1667db96d56Sopenharmony_ci        # Does the same as backslashescape, but prefers ``\N{...}`` escape
1677db96d56Sopenharmony_ci        # sequences.
1687db96d56Sopenharmony_ci        sin = "a\xac\u1234\u20ac\u8000\U0010ffff"
1697db96d56Sopenharmony_ci        sout = (b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
1707db96d56Sopenharmony_ci                b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
1717db96d56Sopenharmony_ci        self.assertEqual(sin.encode("ascii", "namereplace"), sout)
1727db96d56Sopenharmony_ci
1737db96d56Sopenharmony_ci        sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
1747db96d56Sopenharmony_ci                b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
1757db96d56Sopenharmony_ci        self.assertEqual(sin.encode("latin-1", "namereplace"), sout)
1767db96d56Sopenharmony_ci
1777db96d56Sopenharmony_ci        sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\xa4'
1787db96d56Sopenharmony_ci                b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
1797db96d56Sopenharmony_ci        self.assertEqual(sin.encode("iso-8859-15", "namereplace"), sout)
1807db96d56Sopenharmony_ci
1817db96d56Sopenharmony_ci    def test_decoding_callbacks(self):
1827db96d56Sopenharmony_ci        # This is a test for a decoding callback handler
1837db96d56Sopenharmony_ci        # that allows the decoding of the invalid sequence
1847db96d56Sopenharmony_ci        # "\xc0\x80" and returns "\x00" instead of raising an error.
1857db96d56Sopenharmony_ci        # All other illegal sequences will be handled strictly.
1867db96d56Sopenharmony_ci        def relaxedutf8(exc):
1877db96d56Sopenharmony_ci            if not isinstance(exc, UnicodeDecodeError):
1887db96d56Sopenharmony_ci                raise TypeError("don't know how to handle %r" % exc)
1897db96d56Sopenharmony_ci            if exc.object[exc.start:exc.start+2] == b"\xc0\x80":
1907db96d56Sopenharmony_ci                return ("\x00", exc.start+2) # retry after two bytes
1917db96d56Sopenharmony_ci            else:
1927db96d56Sopenharmony_ci                raise exc
1937db96d56Sopenharmony_ci
1947db96d56Sopenharmony_ci        codecs.register_error("test.relaxedutf8", relaxedutf8)
1957db96d56Sopenharmony_ci
1967db96d56Sopenharmony_ci        # all the "\xc0\x80" will be decoded to "\x00"
1977db96d56Sopenharmony_ci        sin = b"a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
1987db96d56Sopenharmony_ci        sout = "a\x00b\x00c\xfc\x00\x00"
1997db96d56Sopenharmony_ci        self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
2007db96d56Sopenharmony_ci
2017db96d56Sopenharmony_ci        # "\xc0\x81" is not valid and a UnicodeDecodeError will be raised
2027db96d56Sopenharmony_ci        sin = b"\xc0\x80\xc0\x81"
2037db96d56Sopenharmony_ci        self.assertRaises(UnicodeDecodeError, sin.decode,
2047db96d56Sopenharmony_ci                          "utf-8", "test.relaxedutf8")
2057db96d56Sopenharmony_ci
2067db96d56Sopenharmony_ci    def test_charmapencode(self):
2077db96d56Sopenharmony_ci        # For charmap encodings the replacement string will be
2087db96d56Sopenharmony_ci        # mapped through the encoding again. This means, that
2097db96d56Sopenharmony_ci        # to be able to use e.g. the "replace" handler, the
2107db96d56Sopenharmony_ci        # charmap has to have a mapping for "?".
2117db96d56Sopenharmony_ci        charmap = dict((ord(c), bytes(2*c.upper(), 'ascii')) for c in "abcdefgh")
2127db96d56Sopenharmony_ci        sin = "abc"
2137db96d56Sopenharmony_ci        sout = b"AABBCC"
2147db96d56Sopenharmony_ci        self.assertEqual(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
2157db96d56Sopenharmony_ci
2167db96d56Sopenharmony_ci        sin = "abcA"
2177db96d56Sopenharmony_ci        self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
2187db96d56Sopenharmony_ci
2197db96d56Sopenharmony_ci        charmap[ord("?")] = b"XYZ"
2207db96d56Sopenharmony_ci        sin = "abcDEF"
2217db96d56Sopenharmony_ci        sout = b"AABBCCXYZXYZXYZ"
2227db96d56Sopenharmony_ci        self.assertEqual(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
2237db96d56Sopenharmony_ci
2247db96d56Sopenharmony_ci        charmap[ord("?")] = "XYZ" # wrong type in mapping
2257db96d56Sopenharmony_ci        self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
2267db96d56Sopenharmony_ci
2277db96d56Sopenharmony_ci    def test_callbacks(self):
2287db96d56Sopenharmony_ci        def handler1(exc):
2297db96d56Sopenharmony_ci            r = range(exc.start, exc.end)
2307db96d56Sopenharmony_ci            if isinstance(exc, UnicodeEncodeError):
2317db96d56Sopenharmony_ci                l = ["<%d>" % ord(exc.object[pos]) for pos in r]
2327db96d56Sopenharmony_ci            elif isinstance(exc, UnicodeDecodeError):
2337db96d56Sopenharmony_ci                l = ["<%d>" % exc.object[pos] for pos in r]
2347db96d56Sopenharmony_ci            else:
2357db96d56Sopenharmony_ci                raise TypeError("don't know how to handle %r" % exc)
2367db96d56Sopenharmony_ci            return ("[%s]" % "".join(l), exc.end)
2377db96d56Sopenharmony_ci
2387db96d56Sopenharmony_ci        codecs.register_error("test.handler1", handler1)
2397db96d56Sopenharmony_ci
2407db96d56Sopenharmony_ci        def handler2(exc):
2417db96d56Sopenharmony_ci            if not isinstance(exc, UnicodeDecodeError):
2427db96d56Sopenharmony_ci                raise TypeError("don't know how to handle %r" % exc)
2437db96d56Sopenharmony_ci            l = ["<%d>" % exc.object[pos] for pos in range(exc.start, exc.end)]
2447db96d56Sopenharmony_ci            return ("[%s]" % "".join(l), exc.end+1) # skip one character
2457db96d56Sopenharmony_ci
2467db96d56Sopenharmony_ci        codecs.register_error("test.handler2", handler2)
2477db96d56Sopenharmony_ci
2487db96d56Sopenharmony_ci        s = b"\x00\x81\x7f\x80\xff"
2497db96d56Sopenharmony_ci
2507db96d56Sopenharmony_ci        self.assertEqual(
2517db96d56Sopenharmony_ci            s.decode("ascii", "test.handler1"),
2527db96d56Sopenharmony_ci            "\x00[<129>]\x7f[<128>][<255>]"
2537db96d56Sopenharmony_ci        )
2547db96d56Sopenharmony_ci        self.assertEqual(
2557db96d56Sopenharmony_ci            s.decode("ascii", "test.handler2"),
2567db96d56Sopenharmony_ci            "\x00[<129>][<128>]"
2577db96d56Sopenharmony_ci        )
2587db96d56Sopenharmony_ci
2597db96d56Sopenharmony_ci        self.assertEqual(
2607db96d56Sopenharmony_ci            b"\\u3042\\u3xxx".decode("unicode-escape", "test.handler1"),
2617db96d56Sopenharmony_ci            "\u3042[<92><117><51>]xxx"
2627db96d56Sopenharmony_ci        )
2637db96d56Sopenharmony_ci
2647db96d56Sopenharmony_ci        self.assertEqual(
2657db96d56Sopenharmony_ci            b"\\u3042\\u3xx".decode("unicode-escape", "test.handler1"),
2667db96d56Sopenharmony_ci            "\u3042[<92><117><51>]xx"
2677db96d56Sopenharmony_ci        )
2687db96d56Sopenharmony_ci
2697db96d56Sopenharmony_ci        self.assertEqual(
2707db96d56Sopenharmony_ci            codecs.charmap_decode(b"abc", "test.handler1", {ord("a"): "z"})[0],
2717db96d56Sopenharmony_ci            "z[<98>][<99>]"
2727db96d56Sopenharmony_ci        )
2737db96d56Sopenharmony_ci
2747db96d56Sopenharmony_ci        self.assertEqual(
2757db96d56Sopenharmony_ci            "g\xfc\xdfrk".encode("ascii", "test.handler1"),
2767db96d56Sopenharmony_ci            b"g[<252><223>]rk"
2777db96d56Sopenharmony_ci        )
2787db96d56Sopenharmony_ci
2797db96d56Sopenharmony_ci        self.assertEqual(
2807db96d56Sopenharmony_ci            "g\xfc\xdf".encode("ascii", "test.handler1"),
2817db96d56Sopenharmony_ci            b"g[<252><223>]"
2827db96d56Sopenharmony_ci        )
2837db96d56Sopenharmony_ci
2847db96d56Sopenharmony_ci    def test_longstrings(self):
2857db96d56Sopenharmony_ci        # test long strings to check for memory overflow problems
2867db96d56Sopenharmony_ci        errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
2877db96d56Sopenharmony_ci                   "backslashreplace", "namereplace"]
2887db96d56Sopenharmony_ci        # register the handlers under different names,
2897db96d56Sopenharmony_ci        # to prevent the codec from recognizing the name
2907db96d56Sopenharmony_ci        for err in errors:
2917db96d56Sopenharmony_ci            codecs.register_error("test." + err, codecs.lookup_error(err))
2927db96d56Sopenharmony_ci        l = 1000
2937db96d56Sopenharmony_ci        errors += [ "test." + err for err in errors ]
2947db96d56Sopenharmony_ci        for uni in [ s*l for s in ("x", "\u3042", "a\xe4") ]:
2957db96d56Sopenharmony_ci            for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
2967db96d56Sopenharmony_ci                        "utf-8", "utf-7", "utf-16", "utf-32"):
2977db96d56Sopenharmony_ci                for err in errors:
2987db96d56Sopenharmony_ci                    try:
2997db96d56Sopenharmony_ci                        uni.encode(enc, err)
3007db96d56Sopenharmony_ci                    except UnicodeError:
3017db96d56Sopenharmony_ci                        pass
3027db96d56Sopenharmony_ci
3037db96d56Sopenharmony_ci    def check_exceptionobjectargs(self, exctype, args, msg):
3047db96d56Sopenharmony_ci        # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
3057db96d56Sopenharmony_ci        # check with one missing argument
3067db96d56Sopenharmony_ci        self.assertRaises(TypeError, exctype, *args[:-1])
3077db96d56Sopenharmony_ci        # check with one argument too much
3087db96d56Sopenharmony_ci        self.assertRaises(TypeError, exctype, *(args + ["too much"]))
3097db96d56Sopenharmony_ci        # check with one argument of the wrong type
3107db96d56Sopenharmony_ci        wrongargs = [ "spam", b"eggs", b"spam", 42, 1.0, None ]
3117db96d56Sopenharmony_ci        for i in range(len(args)):
3127db96d56Sopenharmony_ci            for wrongarg in wrongargs:
3137db96d56Sopenharmony_ci                if type(wrongarg) is type(args[i]):
3147db96d56Sopenharmony_ci                    continue
3157db96d56Sopenharmony_ci                # build argument array
3167db96d56Sopenharmony_ci                callargs = []
3177db96d56Sopenharmony_ci                for j in range(len(args)):
3187db96d56Sopenharmony_ci                    if i==j:
3197db96d56Sopenharmony_ci                        callargs.append(wrongarg)
3207db96d56Sopenharmony_ci                    else:
3217db96d56Sopenharmony_ci                        callargs.append(args[i])
3227db96d56Sopenharmony_ci                self.assertRaises(TypeError, exctype, *callargs)
3237db96d56Sopenharmony_ci
3247db96d56Sopenharmony_ci        # check with the correct number and type of arguments
3257db96d56Sopenharmony_ci        exc = exctype(*args)
3267db96d56Sopenharmony_ci        self.assertEqual(str(exc), msg)
3277db96d56Sopenharmony_ci
3287db96d56Sopenharmony_ci    def test_unicodeencodeerror(self):
3297db96d56Sopenharmony_ci        self.check_exceptionobjectargs(
3307db96d56Sopenharmony_ci            UnicodeEncodeError,
3317db96d56Sopenharmony_ci            ["ascii", "g\xfcrk", 1, 2, "ouch"],
3327db96d56Sopenharmony_ci            "'ascii' codec can't encode character '\\xfc' in position 1: ouch"
3337db96d56Sopenharmony_ci        )
3347db96d56Sopenharmony_ci        self.check_exceptionobjectargs(
3357db96d56Sopenharmony_ci            UnicodeEncodeError,
3367db96d56Sopenharmony_ci            ["ascii", "g\xfcrk", 1, 4, "ouch"],
3377db96d56Sopenharmony_ci            "'ascii' codec can't encode characters in position 1-3: ouch"
3387db96d56Sopenharmony_ci        )
3397db96d56Sopenharmony_ci        self.check_exceptionobjectargs(
3407db96d56Sopenharmony_ci            UnicodeEncodeError,
3417db96d56Sopenharmony_ci            ["ascii", "\xfcx", 0, 1, "ouch"],
3427db96d56Sopenharmony_ci            "'ascii' codec can't encode character '\\xfc' in position 0: ouch"
3437db96d56Sopenharmony_ci        )
3447db96d56Sopenharmony_ci        self.check_exceptionobjectargs(
3457db96d56Sopenharmony_ci            UnicodeEncodeError,
3467db96d56Sopenharmony_ci            ["ascii", "\u0100x", 0, 1, "ouch"],
3477db96d56Sopenharmony_ci            "'ascii' codec can't encode character '\\u0100' in position 0: ouch"
3487db96d56Sopenharmony_ci        )
3497db96d56Sopenharmony_ci        self.check_exceptionobjectargs(
3507db96d56Sopenharmony_ci            UnicodeEncodeError,
3517db96d56Sopenharmony_ci            ["ascii", "\uffffx", 0, 1, "ouch"],
3527db96d56Sopenharmony_ci            "'ascii' codec can't encode character '\\uffff' in position 0: ouch"
3537db96d56Sopenharmony_ci        )
3547db96d56Sopenharmony_ci        self.check_exceptionobjectargs(
3557db96d56Sopenharmony_ci            UnicodeEncodeError,
3567db96d56Sopenharmony_ci            ["ascii", "\U00010000x", 0, 1, "ouch"],
3577db96d56Sopenharmony_ci            "'ascii' codec can't encode character '\\U00010000' in position 0: ouch"
3587db96d56Sopenharmony_ci        )
3597db96d56Sopenharmony_ci
3607db96d56Sopenharmony_ci    def test_unicodedecodeerror(self):
3617db96d56Sopenharmony_ci        self.check_exceptionobjectargs(
3627db96d56Sopenharmony_ci            UnicodeDecodeError,
3637db96d56Sopenharmony_ci            ["ascii", bytearray(b"g\xfcrk"), 1, 2, "ouch"],
3647db96d56Sopenharmony_ci            "'ascii' codec can't decode byte 0xfc in position 1: ouch"
3657db96d56Sopenharmony_ci        )
3667db96d56Sopenharmony_ci        self.check_exceptionobjectargs(
3677db96d56Sopenharmony_ci            UnicodeDecodeError,
3687db96d56Sopenharmony_ci            ["ascii", bytearray(b"g\xfcrk"), 1, 3, "ouch"],
3697db96d56Sopenharmony_ci            "'ascii' codec can't decode bytes in position 1-2: ouch"
3707db96d56Sopenharmony_ci        )
3717db96d56Sopenharmony_ci
3727db96d56Sopenharmony_ci    def test_unicodetranslateerror(self):
3737db96d56Sopenharmony_ci        self.check_exceptionobjectargs(
3747db96d56Sopenharmony_ci            UnicodeTranslateError,
3757db96d56Sopenharmony_ci            ["g\xfcrk", 1, 2, "ouch"],
3767db96d56Sopenharmony_ci            "can't translate character '\\xfc' in position 1: ouch"
3777db96d56Sopenharmony_ci        )
3787db96d56Sopenharmony_ci        self.check_exceptionobjectargs(
3797db96d56Sopenharmony_ci            UnicodeTranslateError,
3807db96d56Sopenharmony_ci            ["g\u0100rk", 1, 2, "ouch"],
3817db96d56Sopenharmony_ci            "can't translate character '\\u0100' in position 1: ouch"
3827db96d56Sopenharmony_ci        )
3837db96d56Sopenharmony_ci        self.check_exceptionobjectargs(
3847db96d56Sopenharmony_ci            UnicodeTranslateError,
3857db96d56Sopenharmony_ci            ["g\uffffrk", 1, 2, "ouch"],
3867db96d56Sopenharmony_ci            "can't translate character '\\uffff' in position 1: ouch"
3877db96d56Sopenharmony_ci        )
3887db96d56Sopenharmony_ci        self.check_exceptionobjectargs(
3897db96d56Sopenharmony_ci            UnicodeTranslateError,
3907db96d56Sopenharmony_ci            ["g\U00010000rk", 1, 2, "ouch"],
3917db96d56Sopenharmony_ci            "can't translate character '\\U00010000' in position 1: ouch"
3927db96d56Sopenharmony_ci        )
3937db96d56Sopenharmony_ci        self.check_exceptionobjectargs(
3947db96d56Sopenharmony_ci            UnicodeTranslateError,
3957db96d56Sopenharmony_ci            ["g\xfcrk", 1, 3, "ouch"],
3967db96d56Sopenharmony_ci            "can't translate characters in position 1-2: ouch"
3977db96d56Sopenharmony_ci        )
3987db96d56Sopenharmony_ci
3997db96d56Sopenharmony_ci    def test_badandgoodstrictexceptions(self):
4007db96d56Sopenharmony_ci        # "strict" complains about a non-exception passed in
4017db96d56Sopenharmony_ci        self.assertRaises(
4027db96d56Sopenharmony_ci            TypeError,
4037db96d56Sopenharmony_ci            codecs.strict_errors,
4047db96d56Sopenharmony_ci            42
4057db96d56Sopenharmony_ci        )
4067db96d56Sopenharmony_ci        # "strict" complains about the wrong exception type
4077db96d56Sopenharmony_ci        self.assertRaises(
4087db96d56Sopenharmony_ci            Exception,
4097db96d56Sopenharmony_ci            codecs.strict_errors,
4107db96d56Sopenharmony_ci            Exception("ouch")
4117db96d56Sopenharmony_ci        )
4127db96d56Sopenharmony_ci
4137db96d56Sopenharmony_ci        # If the correct exception is passed in, "strict" raises it
4147db96d56Sopenharmony_ci        self.assertRaises(
4157db96d56Sopenharmony_ci            UnicodeEncodeError,
4167db96d56Sopenharmony_ci            codecs.strict_errors,
4177db96d56Sopenharmony_ci            UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")
4187db96d56Sopenharmony_ci        )
4197db96d56Sopenharmony_ci        self.assertRaises(
4207db96d56Sopenharmony_ci            UnicodeDecodeError,
4217db96d56Sopenharmony_ci            codecs.strict_errors,
4227db96d56Sopenharmony_ci            UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
4237db96d56Sopenharmony_ci        )
4247db96d56Sopenharmony_ci        self.assertRaises(
4257db96d56Sopenharmony_ci            UnicodeTranslateError,
4267db96d56Sopenharmony_ci            codecs.strict_errors,
4277db96d56Sopenharmony_ci            UnicodeTranslateError("\u3042", 0, 1, "ouch")
4287db96d56Sopenharmony_ci        )
4297db96d56Sopenharmony_ci
4307db96d56Sopenharmony_ci    def test_badandgoodignoreexceptions(self):
4317db96d56Sopenharmony_ci        # "ignore" complains about a non-exception passed in
4327db96d56Sopenharmony_ci        self.assertRaises(
4337db96d56Sopenharmony_ci           TypeError,
4347db96d56Sopenharmony_ci           codecs.ignore_errors,
4357db96d56Sopenharmony_ci           42
4367db96d56Sopenharmony_ci        )
4377db96d56Sopenharmony_ci        # "ignore" complains about the wrong exception type
4387db96d56Sopenharmony_ci        self.assertRaises(
4397db96d56Sopenharmony_ci           TypeError,
4407db96d56Sopenharmony_ci           codecs.ignore_errors,
4417db96d56Sopenharmony_ci           UnicodeError("ouch")
4427db96d56Sopenharmony_ci        )
4437db96d56Sopenharmony_ci        # If the correct exception is passed in, "ignore" returns an empty replacement
4447db96d56Sopenharmony_ci        self.assertEqual(
4457db96d56Sopenharmony_ci            codecs.ignore_errors(
4467db96d56Sopenharmony_ci                UnicodeEncodeError("ascii", "a\u3042b", 1, 2, "ouch")),
4477db96d56Sopenharmony_ci            ("", 2)
4487db96d56Sopenharmony_ci        )
4497db96d56Sopenharmony_ci        self.assertEqual(
4507db96d56Sopenharmony_ci            codecs.ignore_errors(
4517db96d56Sopenharmony_ci                UnicodeDecodeError("ascii", bytearray(b"a\xffb"), 1, 2, "ouch")),
4527db96d56Sopenharmony_ci            ("", 2)
4537db96d56Sopenharmony_ci        )
4547db96d56Sopenharmony_ci        self.assertEqual(
4557db96d56Sopenharmony_ci            codecs.ignore_errors(
4567db96d56Sopenharmony_ci                UnicodeTranslateError("a\u3042b", 1, 2, "ouch")),
4577db96d56Sopenharmony_ci            ("", 2)
4587db96d56Sopenharmony_ci        )
4597db96d56Sopenharmony_ci
4607db96d56Sopenharmony_ci    def test_badandgoodreplaceexceptions(self):
4617db96d56Sopenharmony_ci        # "replace" complains about a non-exception passed in
4627db96d56Sopenharmony_ci        self.assertRaises(
4637db96d56Sopenharmony_ci           TypeError,
4647db96d56Sopenharmony_ci           codecs.replace_errors,
4657db96d56Sopenharmony_ci           42
4667db96d56Sopenharmony_ci        )
4677db96d56Sopenharmony_ci        # "replace" complains about the wrong exception type
4687db96d56Sopenharmony_ci        self.assertRaises(
4697db96d56Sopenharmony_ci           TypeError,
4707db96d56Sopenharmony_ci           codecs.replace_errors,
4717db96d56Sopenharmony_ci           UnicodeError("ouch")
4727db96d56Sopenharmony_ci        )
4737db96d56Sopenharmony_ci        self.assertRaises(
4747db96d56Sopenharmony_ci            TypeError,
4757db96d56Sopenharmony_ci            codecs.replace_errors,
4767db96d56Sopenharmony_ci            BadObjectUnicodeEncodeError()
4777db96d56Sopenharmony_ci        )
4787db96d56Sopenharmony_ci        self.assertRaises(
4797db96d56Sopenharmony_ci            TypeError,
4807db96d56Sopenharmony_ci            codecs.replace_errors,
4817db96d56Sopenharmony_ci            BadObjectUnicodeDecodeError()
4827db96d56Sopenharmony_ci        )
4837db96d56Sopenharmony_ci        # With the correct exception, "replace" returns an "?" or "\ufffd" replacement
4847db96d56Sopenharmony_ci        self.assertEqual(
4857db96d56Sopenharmony_ci            codecs.replace_errors(
4867db96d56Sopenharmony_ci                UnicodeEncodeError("ascii", "a\u3042b", 1, 2, "ouch")),
4877db96d56Sopenharmony_ci            ("?", 2)
4887db96d56Sopenharmony_ci        )
4897db96d56Sopenharmony_ci        self.assertEqual(
4907db96d56Sopenharmony_ci            codecs.replace_errors(
4917db96d56Sopenharmony_ci                UnicodeDecodeError("ascii", bytearray(b"a\xffb"), 1, 2, "ouch")),
4927db96d56Sopenharmony_ci            ("\ufffd", 2)
4937db96d56Sopenharmony_ci        )
4947db96d56Sopenharmony_ci        self.assertEqual(
4957db96d56Sopenharmony_ci            codecs.replace_errors(
4967db96d56Sopenharmony_ci                UnicodeTranslateError("a\u3042b", 1, 2, "ouch")),
4977db96d56Sopenharmony_ci            ("\ufffd", 2)
4987db96d56Sopenharmony_ci        )
4997db96d56Sopenharmony_ci
5007db96d56Sopenharmony_ci    def test_badandgoodxmlcharrefreplaceexceptions(self):
5017db96d56Sopenharmony_ci        # "xmlcharrefreplace" complains about a non-exception passed in
5027db96d56Sopenharmony_ci        self.assertRaises(
5037db96d56Sopenharmony_ci           TypeError,
5047db96d56Sopenharmony_ci           codecs.xmlcharrefreplace_errors,
5057db96d56Sopenharmony_ci           42
5067db96d56Sopenharmony_ci        )
5077db96d56Sopenharmony_ci        # "xmlcharrefreplace" complains about the wrong exception types
5087db96d56Sopenharmony_ci        self.assertRaises(
5097db96d56Sopenharmony_ci           TypeError,
5107db96d56Sopenharmony_ci           codecs.xmlcharrefreplace_errors,
5117db96d56Sopenharmony_ci           UnicodeError("ouch")
5127db96d56Sopenharmony_ci        )
5137db96d56Sopenharmony_ci        # "xmlcharrefreplace" can only be used for encoding
5147db96d56Sopenharmony_ci        self.assertRaises(
5157db96d56Sopenharmony_ci            TypeError,
5167db96d56Sopenharmony_ci            codecs.xmlcharrefreplace_errors,
5177db96d56Sopenharmony_ci            UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
5187db96d56Sopenharmony_ci        )
5197db96d56Sopenharmony_ci        self.assertRaises(
5207db96d56Sopenharmony_ci            TypeError,
5217db96d56Sopenharmony_ci            codecs.xmlcharrefreplace_errors,
5227db96d56Sopenharmony_ci            UnicodeTranslateError("\u3042", 0, 1, "ouch")
5237db96d56Sopenharmony_ci        )
5247db96d56Sopenharmony_ci        # Use the correct exception
5257db96d56Sopenharmony_ci        cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 99999, 100000,
5267db96d56Sopenharmony_ci              999999, 1000000)
5277db96d56Sopenharmony_ci        cs += (0xd800, 0xdfff)
5287db96d56Sopenharmony_ci        s = "".join(chr(c) for c in cs)
5297db96d56Sopenharmony_ci        self.assertEqual(
5307db96d56Sopenharmony_ci            codecs.xmlcharrefreplace_errors(
5317db96d56Sopenharmony_ci                UnicodeEncodeError("ascii", "a" + s + "b",
5327db96d56Sopenharmony_ci                                   1, 1 + len(s), "ouch")
5337db96d56Sopenharmony_ci            ),
5347db96d56Sopenharmony_ci            ("".join("&#%d;" % c for c in cs), 1 + len(s))
5357db96d56Sopenharmony_ci        )
5367db96d56Sopenharmony_ci
5377db96d56Sopenharmony_ci    def test_badandgoodbackslashreplaceexceptions(self):
5387db96d56Sopenharmony_ci        # "backslashreplace" complains about a non-exception passed in
5397db96d56Sopenharmony_ci        self.assertRaises(
5407db96d56Sopenharmony_ci           TypeError,
5417db96d56Sopenharmony_ci           codecs.backslashreplace_errors,
5427db96d56Sopenharmony_ci           42
5437db96d56Sopenharmony_ci        )
5447db96d56Sopenharmony_ci        # "backslashreplace" complains about the wrong exception types
5457db96d56Sopenharmony_ci        self.assertRaises(
5467db96d56Sopenharmony_ci           TypeError,
5477db96d56Sopenharmony_ci           codecs.backslashreplace_errors,
5487db96d56Sopenharmony_ci           UnicodeError("ouch")
5497db96d56Sopenharmony_ci        )
5507db96d56Sopenharmony_ci        # Use the correct exception
5517db96d56Sopenharmony_ci        tests = [
5527db96d56Sopenharmony_ci            ("\u3042", "\\u3042"),
5537db96d56Sopenharmony_ci            ("\n", "\\x0a"),
5547db96d56Sopenharmony_ci            ("a", "\\x61"),
5557db96d56Sopenharmony_ci            ("\x00", "\\x00"),
5567db96d56Sopenharmony_ci            ("\xff", "\\xff"),
5577db96d56Sopenharmony_ci            ("\u0100", "\\u0100"),
5587db96d56Sopenharmony_ci            ("\uffff", "\\uffff"),
5597db96d56Sopenharmony_ci            ("\U00010000", "\\U00010000"),
5607db96d56Sopenharmony_ci            ("\U0010ffff", "\\U0010ffff"),
5617db96d56Sopenharmony_ci            # Lone surrogates
5627db96d56Sopenharmony_ci            ("\ud800", "\\ud800"),
5637db96d56Sopenharmony_ci            ("\udfff", "\\udfff"),
5647db96d56Sopenharmony_ci            ("\ud800\udfff", "\\ud800\\udfff"),
5657db96d56Sopenharmony_ci        ]
5667db96d56Sopenharmony_ci        for s, r in tests:
5677db96d56Sopenharmony_ci            with self.subTest(str=s):
5687db96d56Sopenharmony_ci                self.assertEqual(
5697db96d56Sopenharmony_ci                    codecs.backslashreplace_errors(
5707db96d56Sopenharmony_ci                        UnicodeEncodeError("ascii", "a" + s + "b",
5717db96d56Sopenharmony_ci                                           1, 1 + len(s), "ouch")),
5727db96d56Sopenharmony_ci                    (r, 1 + len(s))
5737db96d56Sopenharmony_ci                )
5747db96d56Sopenharmony_ci                self.assertEqual(
5757db96d56Sopenharmony_ci                    codecs.backslashreplace_errors(
5767db96d56Sopenharmony_ci                        UnicodeTranslateError("a" + s + "b",
5777db96d56Sopenharmony_ci                                              1, 1 + len(s), "ouch")),
5787db96d56Sopenharmony_ci                    (r, 1 + len(s))
5797db96d56Sopenharmony_ci                )
5807db96d56Sopenharmony_ci        tests = [
5817db96d56Sopenharmony_ci            (b"a", "\\x61"),
5827db96d56Sopenharmony_ci            (b"\n", "\\x0a"),
5837db96d56Sopenharmony_ci            (b"\x00", "\\x00"),
5847db96d56Sopenharmony_ci            (b"\xff", "\\xff"),
5857db96d56Sopenharmony_ci        ]
5867db96d56Sopenharmony_ci        for b, r in tests:
5877db96d56Sopenharmony_ci            with self.subTest(bytes=b):
5887db96d56Sopenharmony_ci                self.assertEqual(
5897db96d56Sopenharmony_ci                    codecs.backslashreplace_errors(
5907db96d56Sopenharmony_ci                        UnicodeDecodeError("ascii", bytearray(b"a" + b + b"b"),
5917db96d56Sopenharmony_ci                                           1, 2, "ouch")),
5927db96d56Sopenharmony_ci                    (r, 2)
5937db96d56Sopenharmony_ci                )
5947db96d56Sopenharmony_ci
5957db96d56Sopenharmony_ci    def test_badandgoodnamereplaceexceptions(self):
5967db96d56Sopenharmony_ci        # "namereplace" complains about a non-exception passed in
5977db96d56Sopenharmony_ci        self.assertRaises(
5987db96d56Sopenharmony_ci           TypeError,
5997db96d56Sopenharmony_ci           codecs.namereplace_errors,
6007db96d56Sopenharmony_ci           42
6017db96d56Sopenharmony_ci        )
6027db96d56Sopenharmony_ci        # "namereplace" complains about the wrong exception types
6037db96d56Sopenharmony_ci        self.assertRaises(
6047db96d56Sopenharmony_ci           TypeError,
6057db96d56Sopenharmony_ci           codecs.namereplace_errors,
6067db96d56Sopenharmony_ci           UnicodeError("ouch")
6077db96d56Sopenharmony_ci        )
6087db96d56Sopenharmony_ci        # "namereplace" can only be used for encoding
6097db96d56Sopenharmony_ci        self.assertRaises(
6107db96d56Sopenharmony_ci            TypeError,
6117db96d56Sopenharmony_ci            codecs.namereplace_errors,
6127db96d56Sopenharmony_ci            UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
6137db96d56Sopenharmony_ci        )
6147db96d56Sopenharmony_ci        self.assertRaises(
6157db96d56Sopenharmony_ci            TypeError,
6167db96d56Sopenharmony_ci            codecs.namereplace_errors,
6177db96d56Sopenharmony_ci            UnicodeTranslateError("\u3042", 0, 1, "ouch")
6187db96d56Sopenharmony_ci        )
6197db96d56Sopenharmony_ci        # Use the correct exception
6207db96d56Sopenharmony_ci        tests = [
6217db96d56Sopenharmony_ci            ("\u3042", "\\N{HIRAGANA LETTER A}"),
6227db96d56Sopenharmony_ci            ("\x00", "\\x00"),
6237db96d56Sopenharmony_ci            ("\ufbf9", "\\N{ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH "
6247db96d56Sopenharmony_ci                       "HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM}"),
6257db96d56Sopenharmony_ci            ("\U000e007f", "\\N{CANCEL TAG}"),
6267db96d56Sopenharmony_ci            ("\U0010ffff", "\\U0010ffff"),
6277db96d56Sopenharmony_ci            # Lone surrogates
6287db96d56Sopenharmony_ci            ("\ud800", "\\ud800"),
6297db96d56Sopenharmony_ci            ("\udfff", "\\udfff"),
6307db96d56Sopenharmony_ci            ("\ud800\udfff", "\\ud800\\udfff"),
6317db96d56Sopenharmony_ci        ]
6327db96d56Sopenharmony_ci        for s, r in tests:
6337db96d56Sopenharmony_ci            with self.subTest(str=s):
6347db96d56Sopenharmony_ci                self.assertEqual(
6357db96d56Sopenharmony_ci                    codecs.namereplace_errors(
6367db96d56Sopenharmony_ci                        UnicodeEncodeError("ascii", "a" + s + "b",
6377db96d56Sopenharmony_ci                                           1, 1 + len(s), "ouch")),
6387db96d56Sopenharmony_ci                    (r, 1 + len(s))
6397db96d56Sopenharmony_ci                )
6407db96d56Sopenharmony_ci
6417db96d56Sopenharmony_ci    def test_badandgoodsurrogateescapeexceptions(self):
6427db96d56Sopenharmony_ci        surrogateescape_errors = codecs.lookup_error('surrogateescape')
6437db96d56Sopenharmony_ci        # "surrogateescape" complains about a non-exception passed in
6447db96d56Sopenharmony_ci        self.assertRaises(
6457db96d56Sopenharmony_ci           TypeError,
6467db96d56Sopenharmony_ci           surrogateescape_errors,
6477db96d56Sopenharmony_ci           42
6487db96d56Sopenharmony_ci        )
6497db96d56Sopenharmony_ci        # "surrogateescape" complains about the wrong exception types
6507db96d56Sopenharmony_ci        self.assertRaises(
6517db96d56Sopenharmony_ci           TypeError,
6527db96d56Sopenharmony_ci           surrogateescape_errors,
6537db96d56Sopenharmony_ci           UnicodeError("ouch")
6547db96d56Sopenharmony_ci        )
6557db96d56Sopenharmony_ci        # "surrogateescape" can not be used for translating
6567db96d56Sopenharmony_ci        self.assertRaises(
6577db96d56Sopenharmony_ci            TypeError,
6587db96d56Sopenharmony_ci            surrogateescape_errors,
6597db96d56Sopenharmony_ci            UnicodeTranslateError("\udc80", 0, 1, "ouch")
6607db96d56Sopenharmony_ci        )
6617db96d56Sopenharmony_ci        # Use the correct exception
6627db96d56Sopenharmony_ci        for s in ("a", "\udc7f", "\udd00"):
6637db96d56Sopenharmony_ci            with self.subTest(str=s):
6647db96d56Sopenharmony_ci                self.assertRaises(
6657db96d56Sopenharmony_ci                    UnicodeEncodeError,
6667db96d56Sopenharmony_ci                    surrogateescape_errors,
6677db96d56Sopenharmony_ci                    UnicodeEncodeError("ascii", s, 0, 1, "ouch")
6687db96d56Sopenharmony_ci                )
6697db96d56Sopenharmony_ci        self.assertEqual(
6707db96d56Sopenharmony_ci            surrogateescape_errors(
6717db96d56Sopenharmony_ci                UnicodeEncodeError("ascii", "a\udc80b", 1, 2, "ouch")),
6727db96d56Sopenharmony_ci            (b"\x80", 2)
6737db96d56Sopenharmony_ci        )
6747db96d56Sopenharmony_ci        self.assertRaises(
6757db96d56Sopenharmony_ci            UnicodeDecodeError,
6767db96d56Sopenharmony_ci            surrogateescape_errors,
6777db96d56Sopenharmony_ci            UnicodeDecodeError("ascii", bytearray(b"a"), 0, 1, "ouch")
6787db96d56Sopenharmony_ci        )
6797db96d56Sopenharmony_ci        self.assertEqual(
6807db96d56Sopenharmony_ci            surrogateescape_errors(
6817db96d56Sopenharmony_ci                UnicodeDecodeError("ascii", bytearray(b"a\x80b"), 1, 2, "ouch")),
6827db96d56Sopenharmony_ci            ("\udc80", 2)
6837db96d56Sopenharmony_ci        )
6847db96d56Sopenharmony_ci
6857db96d56Sopenharmony_ci    def test_badandgoodsurrogatepassexceptions(self):
6867db96d56Sopenharmony_ci        surrogatepass_errors = codecs.lookup_error('surrogatepass')
6877db96d56Sopenharmony_ci        # "surrogatepass" complains about a non-exception passed in
6887db96d56Sopenharmony_ci        self.assertRaises(
6897db96d56Sopenharmony_ci           TypeError,
6907db96d56Sopenharmony_ci           surrogatepass_errors,
6917db96d56Sopenharmony_ci           42
6927db96d56Sopenharmony_ci        )
6937db96d56Sopenharmony_ci        # "surrogatepass" complains about the wrong exception types
6947db96d56Sopenharmony_ci        self.assertRaises(
6957db96d56Sopenharmony_ci           TypeError,
6967db96d56Sopenharmony_ci           surrogatepass_errors,
6977db96d56Sopenharmony_ci           UnicodeError("ouch")
6987db96d56Sopenharmony_ci        )
6997db96d56Sopenharmony_ci        # "surrogatepass" can not be used for translating
7007db96d56Sopenharmony_ci        self.assertRaises(
7017db96d56Sopenharmony_ci            TypeError,
7027db96d56Sopenharmony_ci            surrogatepass_errors,
7037db96d56Sopenharmony_ci            UnicodeTranslateError("\ud800", 0, 1, "ouch")
7047db96d56Sopenharmony_ci        )
7057db96d56Sopenharmony_ci        # Use the correct exception
7067db96d56Sopenharmony_ci        for enc in ("utf-8", "utf-16le", "utf-16be", "utf-32le", "utf-32be"):
7077db96d56Sopenharmony_ci            with self.subTest(encoding=enc):
7087db96d56Sopenharmony_ci                self.assertRaises(
7097db96d56Sopenharmony_ci                    UnicodeEncodeError,
7107db96d56Sopenharmony_ci                    surrogatepass_errors,
7117db96d56Sopenharmony_ci                    UnicodeEncodeError(enc, "a", 0, 1, "ouch")
7127db96d56Sopenharmony_ci                )
7137db96d56Sopenharmony_ci                self.assertRaises(
7147db96d56Sopenharmony_ci                    UnicodeDecodeError,
7157db96d56Sopenharmony_ci                    surrogatepass_errors,
7167db96d56Sopenharmony_ci                    UnicodeDecodeError(enc, "a".encode(enc), 0, 1, "ouch")
7177db96d56Sopenharmony_ci                )
7187db96d56Sopenharmony_ci        for s in ("\ud800", "\udfff", "\ud800\udfff"):
7197db96d56Sopenharmony_ci            with self.subTest(str=s):
7207db96d56Sopenharmony_ci                self.assertRaises(
7217db96d56Sopenharmony_ci                    UnicodeEncodeError,
7227db96d56Sopenharmony_ci                    surrogatepass_errors,
7237db96d56Sopenharmony_ci                    UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
7247db96d56Sopenharmony_ci                )
7257db96d56Sopenharmony_ci        tests = [
7267db96d56Sopenharmony_ci            ("utf-8", "\ud800", b'\xed\xa0\x80', 3),
7277db96d56Sopenharmony_ci            ("utf-16le", "\ud800", b'\x00\xd8', 2),
7287db96d56Sopenharmony_ci            ("utf-16be", "\ud800", b'\xd8\x00', 2),
7297db96d56Sopenharmony_ci            ("utf-32le", "\ud800", b'\x00\xd8\x00\x00', 4),
7307db96d56Sopenharmony_ci            ("utf-32be", "\ud800", b'\x00\x00\xd8\x00', 4),
7317db96d56Sopenharmony_ci            ("utf-8", "\udfff", b'\xed\xbf\xbf', 3),
7327db96d56Sopenharmony_ci            ("utf-16le", "\udfff", b'\xff\xdf', 2),
7337db96d56Sopenharmony_ci            ("utf-16be", "\udfff", b'\xdf\xff', 2),
7347db96d56Sopenharmony_ci            ("utf-32le", "\udfff", b'\xff\xdf\x00\x00', 4),
7357db96d56Sopenharmony_ci            ("utf-32be", "\udfff", b'\x00\x00\xdf\xff', 4),
7367db96d56Sopenharmony_ci            ("utf-8", "\ud800\udfff", b'\xed\xa0\x80\xed\xbf\xbf', 3),
7377db96d56Sopenharmony_ci            ("utf-16le", "\ud800\udfff", b'\x00\xd8\xff\xdf', 2),
7387db96d56Sopenharmony_ci            ("utf-16be", "\ud800\udfff", b'\xd8\x00\xdf\xff', 2),
7397db96d56Sopenharmony_ci            ("utf-32le", "\ud800\udfff", b'\x00\xd8\x00\x00\xff\xdf\x00\x00', 4),
7407db96d56Sopenharmony_ci            ("utf-32be", "\ud800\udfff", b'\x00\x00\xd8\x00\x00\x00\xdf\xff', 4),
7417db96d56Sopenharmony_ci        ]
7427db96d56Sopenharmony_ci        for enc, s, b, n in tests:
7437db96d56Sopenharmony_ci            with self.subTest(encoding=enc, str=s, bytes=b):
7447db96d56Sopenharmony_ci                self.assertEqual(
7457db96d56Sopenharmony_ci                    surrogatepass_errors(
7467db96d56Sopenharmony_ci                        UnicodeEncodeError(enc, "a" + s + "b",
7477db96d56Sopenharmony_ci                                           1, 1 + len(s), "ouch")),
7487db96d56Sopenharmony_ci                    (b, 1 + len(s))
7497db96d56Sopenharmony_ci                )
7507db96d56Sopenharmony_ci                self.assertEqual(
7517db96d56Sopenharmony_ci                    surrogatepass_errors(
7527db96d56Sopenharmony_ci                        UnicodeDecodeError(enc, bytearray(b"a" + b[:n] + b"b"),
7537db96d56Sopenharmony_ci                                           1, 1 + n, "ouch")),
7547db96d56Sopenharmony_ci                    (s[:1], 1 + n)
7557db96d56Sopenharmony_ci                )
7567db96d56Sopenharmony_ci
7577db96d56Sopenharmony_ci    def test_badhandlerresults(self):
7587db96d56Sopenharmony_ci        results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
7597db96d56Sopenharmony_ci        encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
7607db96d56Sopenharmony_ci
7617db96d56Sopenharmony_ci        for res in results:
7627db96d56Sopenharmony_ci            codecs.register_error("test.badhandler", lambda x: res)
7637db96d56Sopenharmony_ci            for enc in encs:
7647db96d56Sopenharmony_ci                self.assertRaises(
7657db96d56Sopenharmony_ci                    TypeError,
7667db96d56Sopenharmony_ci                    "\u3042".encode,
7677db96d56Sopenharmony_ci                    enc,
7687db96d56Sopenharmony_ci                    "test.badhandler"
7697db96d56Sopenharmony_ci                )
7707db96d56Sopenharmony_ci            for (enc, bytes) in (
7717db96d56Sopenharmony_ci                ("ascii", b"\xff"),
7727db96d56Sopenharmony_ci                ("utf-8", b"\xff"),
7737db96d56Sopenharmony_ci                ("utf-7", b"+x-"),
7747db96d56Sopenharmony_ci            ):
7757db96d56Sopenharmony_ci                self.assertRaises(
7767db96d56Sopenharmony_ci                    TypeError,
7777db96d56Sopenharmony_ci                    bytes.decode,
7787db96d56Sopenharmony_ci                    enc,
7797db96d56Sopenharmony_ci                    "test.badhandler"
7807db96d56Sopenharmony_ci                )
7817db96d56Sopenharmony_ci
7827db96d56Sopenharmony_ci    def test_lookup(self):
7837db96d56Sopenharmony_ci        self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
7847db96d56Sopenharmony_ci        self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore"))
7857db96d56Sopenharmony_ci        self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
7867db96d56Sopenharmony_ci        self.assertEqual(
7877db96d56Sopenharmony_ci            codecs.xmlcharrefreplace_errors,
7887db96d56Sopenharmony_ci            codecs.lookup_error("xmlcharrefreplace")
7897db96d56Sopenharmony_ci        )
7907db96d56Sopenharmony_ci        self.assertEqual(
7917db96d56Sopenharmony_ci            codecs.backslashreplace_errors,
7927db96d56Sopenharmony_ci            codecs.lookup_error("backslashreplace")
7937db96d56Sopenharmony_ci        )
7947db96d56Sopenharmony_ci        self.assertEqual(
7957db96d56Sopenharmony_ci            codecs.namereplace_errors,
7967db96d56Sopenharmony_ci            codecs.lookup_error("namereplace")
7977db96d56Sopenharmony_ci        )
7987db96d56Sopenharmony_ci
7997db96d56Sopenharmony_ci    def test_encode_nonascii_replacement(self):
8007db96d56Sopenharmony_ci        def handle(exc):
8017db96d56Sopenharmony_ci            if isinstance(exc, UnicodeEncodeError):
8027db96d56Sopenharmony_ci                return (repl, exc.end)
8037db96d56Sopenharmony_ci            raise TypeError("don't know how to handle %r" % exc)
8047db96d56Sopenharmony_ci        codecs.register_error("test.replacing", handle)
8057db96d56Sopenharmony_ci
8067db96d56Sopenharmony_ci        for enc, input, repl in (
8077db96d56Sopenharmony_ci                ("ascii", "[¤]", "abc"),
8087db96d56Sopenharmony_ci                ("iso-8859-1", "[€]", "½¾"),
8097db96d56Sopenharmony_ci                ("iso-8859-15", "[¤]", "œŸ"),
8107db96d56Sopenharmony_ci        ):
8117db96d56Sopenharmony_ci            res = input.encode(enc, "test.replacing")
8127db96d56Sopenharmony_ci            self.assertEqual(res, ("[" + repl + "]").encode(enc))
8137db96d56Sopenharmony_ci
8147db96d56Sopenharmony_ci        for enc, input, repl in (
8157db96d56Sopenharmony_ci                ("utf-8", "[\udc80]", "\U0001f40d"),
8167db96d56Sopenharmony_ci                ("utf-16", "[\udc80]", "\U0001f40d"),
8177db96d56Sopenharmony_ci                ("utf-32", "[\udc80]", "\U0001f40d"),
8187db96d56Sopenharmony_ci        ):
8197db96d56Sopenharmony_ci            with self.subTest(encoding=enc):
8207db96d56Sopenharmony_ci                with self.assertRaises(UnicodeEncodeError) as cm:
8217db96d56Sopenharmony_ci                    input.encode(enc, "test.replacing")
8227db96d56Sopenharmony_ci                exc = cm.exception
8237db96d56Sopenharmony_ci                self.assertEqual(exc.start, 1)
8247db96d56Sopenharmony_ci                self.assertEqual(exc.end, 2)
8257db96d56Sopenharmony_ci                self.assertEqual(exc.object, input)
8267db96d56Sopenharmony_ci
8277db96d56Sopenharmony_ci    def test_encode_unencodable_replacement(self):
8287db96d56Sopenharmony_ci        def unencrepl(exc):
8297db96d56Sopenharmony_ci            if isinstance(exc, UnicodeEncodeError):
8307db96d56Sopenharmony_ci                return (repl, exc.end)
8317db96d56Sopenharmony_ci            else:
8327db96d56Sopenharmony_ci                raise TypeError("don't know how to handle %r" % exc)
8337db96d56Sopenharmony_ci        codecs.register_error("test.unencreplhandler", unencrepl)
8347db96d56Sopenharmony_ci
8357db96d56Sopenharmony_ci        for enc, input, repl in (
8367db96d56Sopenharmony_ci                ("ascii", "[¤]", "½"),
8377db96d56Sopenharmony_ci                ("iso-8859-1", "[€]", "œ"),
8387db96d56Sopenharmony_ci                ("iso-8859-15", "[¤]", "½"),
8397db96d56Sopenharmony_ci                ("utf-8", "[\udc80]", "\udcff"),
8407db96d56Sopenharmony_ci                ("utf-16", "[\udc80]", "\udcff"),
8417db96d56Sopenharmony_ci                ("utf-32", "[\udc80]", "\udcff"),
8427db96d56Sopenharmony_ci        ):
8437db96d56Sopenharmony_ci            with self.subTest(encoding=enc):
8447db96d56Sopenharmony_ci                with self.assertRaises(UnicodeEncodeError) as cm:
8457db96d56Sopenharmony_ci                    input.encode(enc, "test.unencreplhandler")
8467db96d56Sopenharmony_ci                exc = cm.exception
8477db96d56Sopenharmony_ci                self.assertEqual(exc.start, 1)
8487db96d56Sopenharmony_ci                self.assertEqual(exc.end, 2)
8497db96d56Sopenharmony_ci                self.assertEqual(exc.object, input)
8507db96d56Sopenharmony_ci
8517db96d56Sopenharmony_ci    def test_encode_bytes_replacement(self):
8527db96d56Sopenharmony_ci        def handle(exc):
8537db96d56Sopenharmony_ci            if isinstance(exc, UnicodeEncodeError):
8547db96d56Sopenharmony_ci                return (repl, exc.end)
8557db96d56Sopenharmony_ci            raise TypeError("don't know how to handle %r" % exc)
8567db96d56Sopenharmony_ci        codecs.register_error("test.replacing", handle)
8577db96d56Sopenharmony_ci
8587db96d56Sopenharmony_ci        # It works even if the bytes sequence is not decodable.
8597db96d56Sopenharmony_ci        for enc, input, repl in (
8607db96d56Sopenharmony_ci                ("ascii", "[¤]", b"\xbd\xbe"),
8617db96d56Sopenharmony_ci                ("iso-8859-1", "[€]", b"\xbd\xbe"),
8627db96d56Sopenharmony_ci                ("iso-8859-15", "[¤]", b"\xbd\xbe"),
8637db96d56Sopenharmony_ci                ("utf-8", "[\udc80]", b"\xbd\xbe"),
8647db96d56Sopenharmony_ci                ("utf-16le", "[\udc80]", b"\xbd\xbe"),
8657db96d56Sopenharmony_ci                ("utf-16be", "[\udc80]", b"\xbd\xbe"),
8667db96d56Sopenharmony_ci                ("utf-32le", "[\udc80]", b"\xbc\xbd\xbe\xbf"),
8677db96d56Sopenharmony_ci                ("utf-32be", "[\udc80]", b"\xbc\xbd\xbe\xbf"),
8687db96d56Sopenharmony_ci        ):
8697db96d56Sopenharmony_ci            with self.subTest(encoding=enc):
8707db96d56Sopenharmony_ci                res = input.encode(enc, "test.replacing")
8717db96d56Sopenharmony_ci                self.assertEqual(res, "[".encode(enc) + repl + "]".encode(enc))
8727db96d56Sopenharmony_ci
8737db96d56Sopenharmony_ci    def test_encode_odd_bytes_replacement(self):
8747db96d56Sopenharmony_ci        def handle(exc):
8757db96d56Sopenharmony_ci            if isinstance(exc, UnicodeEncodeError):
8767db96d56Sopenharmony_ci                return (repl, exc.end)
8777db96d56Sopenharmony_ci            raise TypeError("don't know how to handle %r" % exc)
8787db96d56Sopenharmony_ci        codecs.register_error("test.replacing", handle)
8797db96d56Sopenharmony_ci
8807db96d56Sopenharmony_ci        input = "[\udc80]"
8817db96d56Sopenharmony_ci        # Tests in which the replacement bytestring contains not whole number
8827db96d56Sopenharmony_ci        # of code units.
8837db96d56Sopenharmony_ci        for enc, repl in (
8847db96d56Sopenharmony_ci            *itertools.product(("utf-16le", "utf-16be"),
8857db96d56Sopenharmony_ci                               [b"a", b"abc"]),
8867db96d56Sopenharmony_ci            *itertools.product(("utf-32le", "utf-32be"),
8877db96d56Sopenharmony_ci                               [b"a", b"ab", b"abc", b"abcde"]),
8887db96d56Sopenharmony_ci        ):
8897db96d56Sopenharmony_ci            with self.subTest(encoding=enc, repl=repl):
8907db96d56Sopenharmony_ci                with self.assertRaises(UnicodeEncodeError) as cm:
8917db96d56Sopenharmony_ci                    input.encode(enc, "test.replacing")
8927db96d56Sopenharmony_ci                exc = cm.exception
8937db96d56Sopenharmony_ci                self.assertEqual(exc.start, 1)
8947db96d56Sopenharmony_ci                self.assertEqual(exc.end, 2)
8957db96d56Sopenharmony_ci                self.assertEqual(exc.object, input)
8967db96d56Sopenharmony_ci                self.assertEqual(exc.reason, "surrogates not allowed")
8977db96d56Sopenharmony_ci
8987db96d56Sopenharmony_ci    def test_badregistercall(self):
8997db96d56Sopenharmony_ci        # enhance coverage of:
9007db96d56Sopenharmony_ci        # Modules/_codecsmodule.c::register_error()
9017db96d56Sopenharmony_ci        # Python/codecs.c::PyCodec_RegisterError()
9027db96d56Sopenharmony_ci        self.assertRaises(TypeError, codecs.register_error, 42)
9037db96d56Sopenharmony_ci        self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42)
9047db96d56Sopenharmony_ci
9057db96d56Sopenharmony_ci    def test_badlookupcall(self):
9067db96d56Sopenharmony_ci        # enhance coverage of:
9077db96d56Sopenharmony_ci        # Modules/_codecsmodule.c::lookup_error()
9087db96d56Sopenharmony_ci        self.assertRaises(TypeError, codecs.lookup_error)
9097db96d56Sopenharmony_ci
9107db96d56Sopenharmony_ci    def test_unknownhandler(self):
9117db96d56Sopenharmony_ci        # enhance coverage of:
9127db96d56Sopenharmony_ci        # Modules/_codecsmodule.c::lookup_error()
9137db96d56Sopenharmony_ci        self.assertRaises(LookupError, codecs.lookup_error, "test.unknown")
9147db96d56Sopenharmony_ci
9157db96d56Sopenharmony_ci    def test_xmlcharrefvalues(self):
9167db96d56Sopenharmony_ci        # enhance coverage of:
9177db96d56Sopenharmony_ci        # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
9187db96d56Sopenharmony_ci        # and inline implementations
9197db96d56Sopenharmony_ci        v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000,
9207db96d56Sopenharmony_ci             500000, 1000000)
9217db96d56Sopenharmony_ci        s = "".join([chr(x) for x in v])
9227db96d56Sopenharmony_ci        codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
9237db96d56Sopenharmony_ci        for enc in ("ascii", "iso-8859-15"):
9247db96d56Sopenharmony_ci            for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
9257db96d56Sopenharmony_ci                s.encode(enc, err)
9267db96d56Sopenharmony_ci
9277db96d56Sopenharmony_ci    def test_decodehelper(self):
9287db96d56Sopenharmony_ci        # enhance coverage of:
9297db96d56Sopenharmony_ci        # Objects/unicodeobject.c::unicode_decode_call_errorhandler()
9307db96d56Sopenharmony_ci        # and callers
9317db96d56Sopenharmony_ci        self.assertRaises(LookupError, b"\xff".decode, "ascii", "test.unknown")
9327db96d56Sopenharmony_ci
9337db96d56Sopenharmony_ci        def baddecodereturn1(exc):
9347db96d56Sopenharmony_ci            return 42
9357db96d56Sopenharmony_ci        codecs.register_error("test.baddecodereturn1", baddecodereturn1)
9367db96d56Sopenharmony_ci        self.assertRaises(TypeError, b"\xff".decode, "ascii", "test.baddecodereturn1")
9377db96d56Sopenharmony_ci        self.assertRaises(TypeError, b"\\".decode, "unicode-escape", "test.baddecodereturn1")
9387db96d56Sopenharmony_ci        self.assertRaises(TypeError, b"\\x0".decode, "unicode-escape", "test.baddecodereturn1")
9397db96d56Sopenharmony_ci        self.assertRaises(TypeError, b"\\x0y".decode, "unicode-escape", "test.baddecodereturn1")
9407db96d56Sopenharmony_ci        self.assertRaises(TypeError, b"\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1")
9417db96d56Sopenharmony_ci        self.assertRaises(TypeError, b"\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1")
9427db96d56Sopenharmony_ci
9437db96d56Sopenharmony_ci        def baddecodereturn2(exc):
9447db96d56Sopenharmony_ci            return ("?", None)
9457db96d56Sopenharmony_ci        codecs.register_error("test.baddecodereturn2", baddecodereturn2)
9467db96d56Sopenharmony_ci        self.assertRaises(TypeError, b"\xff".decode, "ascii", "test.baddecodereturn2")
9477db96d56Sopenharmony_ci
9487db96d56Sopenharmony_ci        handler = PosReturn()
9497db96d56Sopenharmony_ci        codecs.register_error("test.posreturn", handler.handle)
9507db96d56Sopenharmony_ci
9517db96d56Sopenharmony_ci        # Valid negative position
9527db96d56Sopenharmony_ci        handler.pos = -1
9537db96d56Sopenharmony_ci        self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>0")
9547db96d56Sopenharmony_ci
9557db96d56Sopenharmony_ci        # Valid negative position
9567db96d56Sopenharmony_ci        handler.pos = -2
9577db96d56Sopenharmony_ci        self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?><?>")
9587db96d56Sopenharmony_ci
9597db96d56Sopenharmony_ci        # Negative position out of bounds
9607db96d56Sopenharmony_ci        handler.pos = -3
9617db96d56Sopenharmony_ci        self.assertRaises(IndexError, b"\xff0".decode, "ascii", "test.posreturn")
9627db96d56Sopenharmony_ci
9637db96d56Sopenharmony_ci        # Valid positive position
9647db96d56Sopenharmony_ci        handler.pos = 1
9657db96d56Sopenharmony_ci        self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>0")
9667db96d56Sopenharmony_ci
9677db96d56Sopenharmony_ci        # Largest valid positive position (one beyond end of input)
9687db96d56Sopenharmony_ci        handler.pos = 2
9697db96d56Sopenharmony_ci        self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>")
9707db96d56Sopenharmony_ci
9717db96d56Sopenharmony_ci        # Invalid positive position
9727db96d56Sopenharmony_ci        handler.pos = 3
9737db96d56Sopenharmony_ci        self.assertRaises(IndexError, b"\xff0".decode, "ascii", "test.posreturn")
9747db96d56Sopenharmony_ci
9757db96d56Sopenharmony_ci        # Restart at the "0"
9767db96d56Sopenharmony_ci        handler.pos = 6
9777db96d56Sopenharmony_ci        self.assertEqual(b"\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), "<?>0")
9787db96d56Sopenharmony_ci
9797db96d56Sopenharmony_ci        class D(dict):
9807db96d56Sopenharmony_ci            def __getitem__(self, key):
9817db96d56Sopenharmony_ci                raise ValueError
9827db96d56Sopenharmony_ci        self.assertRaises(UnicodeError, codecs.charmap_decode, b"\xff", "strict", {0xff: None})
9837db96d56Sopenharmony_ci        self.assertRaises(ValueError, codecs.charmap_decode, b"\xff", "strict", D())
9847db96d56Sopenharmony_ci        self.assertRaises(TypeError, codecs.charmap_decode, b"\xff", "strict", {0xff: sys.maxunicode+1})
9857db96d56Sopenharmony_ci
9867db96d56Sopenharmony_ci    def test_encodehelper(self):
9877db96d56Sopenharmony_ci        # enhance coverage of:
9887db96d56Sopenharmony_ci        # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
9897db96d56Sopenharmony_ci        # and callers
9907db96d56Sopenharmony_ci        self.assertRaises(LookupError, "\xff".encode, "ascii", "test.unknown")
9917db96d56Sopenharmony_ci
9927db96d56Sopenharmony_ci        def badencodereturn1(exc):
9937db96d56Sopenharmony_ci            return 42
9947db96d56Sopenharmony_ci        codecs.register_error("test.badencodereturn1", badencodereturn1)
9957db96d56Sopenharmony_ci        self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn1")
9967db96d56Sopenharmony_ci
9977db96d56Sopenharmony_ci        def badencodereturn2(exc):
9987db96d56Sopenharmony_ci            return ("?", None)
9997db96d56Sopenharmony_ci        codecs.register_error("test.badencodereturn2", badencodereturn2)
10007db96d56Sopenharmony_ci        self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn2")
10017db96d56Sopenharmony_ci
10027db96d56Sopenharmony_ci        handler = PosReturn()
10037db96d56Sopenharmony_ci        codecs.register_error("test.posreturn", handler.handle)
10047db96d56Sopenharmony_ci
10057db96d56Sopenharmony_ci        # Valid negative position
10067db96d56Sopenharmony_ci        handler.pos = -1
10077db96d56Sopenharmony_ci        self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>0")
10087db96d56Sopenharmony_ci
10097db96d56Sopenharmony_ci        # Valid negative position
10107db96d56Sopenharmony_ci        handler.pos = -2
10117db96d56Sopenharmony_ci        self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?><?>")
10127db96d56Sopenharmony_ci
10137db96d56Sopenharmony_ci        # Negative position out of bounds
10147db96d56Sopenharmony_ci        handler.pos = -3
10157db96d56Sopenharmony_ci        self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn")
10167db96d56Sopenharmony_ci
10177db96d56Sopenharmony_ci        # Valid positive position
10187db96d56Sopenharmony_ci        handler.pos = 1
10197db96d56Sopenharmony_ci        self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>0")
10207db96d56Sopenharmony_ci
10217db96d56Sopenharmony_ci        # Largest valid positive position (one beyond end of input
10227db96d56Sopenharmony_ci        handler.pos = 2
10237db96d56Sopenharmony_ci        self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>")
10247db96d56Sopenharmony_ci
10257db96d56Sopenharmony_ci        # Invalid positive position
10267db96d56Sopenharmony_ci        handler.pos = 3
10277db96d56Sopenharmony_ci        self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn")
10287db96d56Sopenharmony_ci
10297db96d56Sopenharmony_ci        handler.pos = 0
10307db96d56Sopenharmony_ci
10317db96d56Sopenharmony_ci        class D(dict):
10327db96d56Sopenharmony_ci            def __getitem__(self, key):
10337db96d56Sopenharmony_ci                raise ValueError
10347db96d56Sopenharmony_ci        for err in ("strict", "replace", "xmlcharrefreplace",
10357db96d56Sopenharmony_ci                    "backslashreplace", "namereplace", "test.posreturn"):
10367db96d56Sopenharmony_ci            self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None})
10377db96d56Sopenharmony_ci            self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D())
10387db96d56Sopenharmony_ci            self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300})
10397db96d56Sopenharmony_ci
10407db96d56Sopenharmony_ci    def test_decodehelper_bug36819(self):
10417db96d56Sopenharmony_ci        handler = RepeatedPosReturn("x")
10427db96d56Sopenharmony_ci        codecs.register_error("test.bug36819", handler.handle)
10437db96d56Sopenharmony_ci
10447db96d56Sopenharmony_ci        testcases = [
10457db96d56Sopenharmony_ci            ("ascii", b"\xff"),
10467db96d56Sopenharmony_ci            ("utf-8", b"\xff"),
10477db96d56Sopenharmony_ci            ("utf-16be", b'\xdc\x80'),
10487db96d56Sopenharmony_ci            ("utf-32be", b'\x00\x00\xdc\x80'),
10497db96d56Sopenharmony_ci            ("iso-8859-6", b"\xff"),
10507db96d56Sopenharmony_ci        ]
10517db96d56Sopenharmony_ci        for enc, bad in testcases:
10527db96d56Sopenharmony_ci            input = "abcd".encode(enc) + bad
10537db96d56Sopenharmony_ci            with self.subTest(encoding=enc):
10547db96d56Sopenharmony_ci                handler.count = 50
10557db96d56Sopenharmony_ci                decoded = input.decode(enc, "test.bug36819")
10567db96d56Sopenharmony_ci                self.assertEqual(decoded, 'abcdx' * 51)
10577db96d56Sopenharmony_ci
10587db96d56Sopenharmony_ci    def test_encodehelper_bug36819(self):
10597db96d56Sopenharmony_ci        handler = RepeatedPosReturn()
10607db96d56Sopenharmony_ci        codecs.register_error("test.bug36819", handler.handle)
10617db96d56Sopenharmony_ci
10627db96d56Sopenharmony_ci        input = "abcd\udc80"
10637db96d56Sopenharmony_ci        encodings = ["ascii", "latin1", "utf-8", "utf-16", "utf-32"]  # built-in
10647db96d56Sopenharmony_ci        encodings += ["iso-8859-15"]  # charmap codec
10657db96d56Sopenharmony_ci        if sys.platform == 'win32':
10667db96d56Sopenharmony_ci            encodings = ["mbcs", "oem"]  # code page codecs
10677db96d56Sopenharmony_ci
10687db96d56Sopenharmony_ci        handler.repl = "\udcff"
10697db96d56Sopenharmony_ci        for enc in encodings:
10707db96d56Sopenharmony_ci            with self.subTest(encoding=enc):
10717db96d56Sopenharmony_ci                handler.count = 50
10727db96d56Sopenharmony_ci                with self.assertRaises(UnicodeEncodeError) as cm:
10737db96d56Sopenharmony_ci                    input.encode(enc, "test.bug36819")
10747db96d56Sopenharmony_ci                exc = cm.exception
10757db96d56Sopenharmony_ci                self.assertEqual(exc.start, 4)
10767db96d56Sopenharmony_ci                self.assertEqual(exc.end, 5)
10777db96d56Sopenharmony_ci                self.assertEqual(exc.object, input)
10787db96d56Sopenharmony_ci        if sys.platform == "win32":
10797db96d56Sopenharmony_ci            handler.count = 50
10807db96d56Sopenharmony_ci            with self.assertRaises(UnicodeEncodeError) as cm:
10817db96d56Sopenharmony_ci                codecs.code_page_encode(437, input, "test.bug36819")
10827db96d56Sopenharmony_ci            exc = cm.exception
10837db96d56Sopenharmony_ci            self.assertEqual(exc.start, 4)
10847db96d56Sopenharmony_ci            self.assertEqual(exc.end, 5)
10857db96d56Sopenharmony_ci            self.assertEqual(exc.object, input)
10867db96d56Sopenharmony_ci
10877db96d56Sopenharmony_ci        handler.repl = "x"
10887db96d56Sopenharmony_ci        for enc in encodings:
10897db96d56Sopenharmony_ci            with self.subTest(encoding=enc):
10907db96d56Sopenharmony_ci                # The interpreter should segfault after a handful of attempts.
10917db96d56Sopenharmony_ci                # 50 was chosen to try to ensure a segfault without a fix,
10927db96d56Sopenharmony_ci                # but not OOM a machine with one.
10937db96d56Sopenharmony_ci                handler.count = 50
10947db96d56Sopenharmony_ci                encoded = input.encode(enc, "test.bug36819")
10957db96d56Sopenharmony_ci                self.assertEqual(encoded.decode(enc), "abcdx" * 51)
10967db96d56Sopenharmony_ci        if sys.platform == "win32":
10977db96d56Sopenharmony_ci            handler.count = 50
10987db96d56Sopenharmony_ci            encoded = codecs.code_page_encode(437, input, "test.bug36819")
10997db96d56Sopenharmony_ci            self.assertEqual(encoded[0].decode(), "abcdx" * 51)
11007db96d56Sopenharmony_ci            self.assertEqual(encoded[1], len(input))
11017db96d56Sopenharmony_ci
11027db96d56Sopenharmony_ci    def test_translatehelper(self):
11037db96d56Sopenharmony_ci        # enhance coverage of:
11047db96d56Sopenharmony_ci        # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
11057db96d56Sopenharmony_ci        # and callers
11067db96d56Sopenharmony_ci        # (Unfortunately the errors argument is not directly accessible
11077db96d56Sopenharmony_ci        # from Python, so we can't test that much)
11087db96d56Sopenharmony_ci        class D(dict):
11097db96d56Sopenharmony_ci            def __getitem__(self, key):
11107db96d56Sopenharmony_ci                raise ValueError
11117db96d56Sopenharmony_ci        #self.assertRaises(ValueError, "\xff".translate, D())
11127db96d56Sopenharmony_ci        self.assertRaises(ValueError, "\xff".translate, {0xff: sys.maxunicode+1})
11137db96d56Sopenharmony_ci        self.assertRaises(TypeError, "\xff".translate, {0xff: ()})
11147db96d56Sopenharmony_ci
11157db96d56Sopenharmony_ci    def test_bug828737(self):
11167db96d56Sopenharmony_ci        charmap = {
11177db96d56Sopenharmony_ci            ord("&"): "&amp;",
11187db96d56Sopenharmony_ci            ord("<"): "&lt;",
11197db96d56Sopenharmony_ci            ord(">"): "&gt;",
11207db96d56Sopenharmony_ci            ord('"'): "&quot;",
11217db96d56Sopenharmony_ci        }
11227db96d56Sopenharmony_ci
11237db96d56Sopenharmony_ci        for n in (1, 10, 100, 1000):
11247db96d56Sopenharmony_ci            text = 'abc<def>ghi'*n
11257db96d56Sopenharmony_ci            text.translate(charmap)
11267db96d56Sopenharmony_ci
11277db96d56Sopenharmony_ci    def test_mutatingdecodehandler(self):
11287db96d56Sopenharmony_ci        baddata = [
11297db96d56Sopenharmony_ci            ("ascii", b"\xff"),
11307db96d56Sopenharmony_ci            ("utf-7", b"++"),
11317db96d56Sopenharmony_ci            ("utf-8",  b"\xff"),
11327db96d56Sopenharmony_ci            ("utf-16", b"\xff"),
11337db96d56Sopenharmony_ci            ("utf-32", b"\xff"),
11347db96d56Sopenharmony_ci            ("unicode-escape", b"\\u123g"),
11357db96d56Sopenharmony_ci            ("raw-unicode-escape", b"\\u123g"),
11367db96d56Sopenharmony_ci        ]
11377db96d56Sopenharmony_ci
11387db96d56Sopenharmony_ci        def replacing(exc):
11397db96d56Sopenharmony_ci            if isinstance(exc, UnicodeDecodeError):
11407db96d56Sopenharmony_ci                exc.object = 42
11417db96d56Sopenharmony_ci                return ("\u4242", 0)
11427db96d56Sopenharmony_ci            else:
11437db96d56Sopenharmony_ci                raise TypeError("don't know how to handle %r" % exc)
11447db96d56Sopenharmony_ci        codecs.register_error("test.replacing", replacing)
11457db96d56Sopenharmony_ci
11467db96d56Sopenharmony_ci        for (encoding, data) in baddata:
11477db96d56Sopenharmony_ci            with self.assertRaises(TypeError):
11487db96d56Sopenharmony_ci                data.decode(encoding, "test.replacing")
11497db96d56Sopenharmony_ci
11507db96d56Sopenharmony_ci        def mutating(exc):
11517db96d56Sopenharmony_ci            if isinstance(exc, UnicodeDecodeError):
11527db96d56Sopenharmony_ci                exc.object = b""
11537db96d56Sopenharmony_ci                return ("\u4242", 0)
11547db96d56Sopenharmony_ci            else:
11557db96d56Sopenharmony_ci                raise TypeError("don't know how to handle %r" % exc)
11567db96d56Sopenharmony_ci        codecs.register_error("test.mutating", mutating)
11577db96d56Sopenharmony_ci        # If the decoder doesn't pick up the modified input the following
11587db96d56Sopenharmony_ci        # will lead to an endless loop
11597db96d56Sopenharmony_ci        for (encoding, data) in baddata:
11607db96d56Sopenharmony_ci            self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
11617db96d56Sopenharmony_ci
11627db96d56Sopenharmony_ci    # issue32583
11637db96d56Sopenharmony_ci    def test_crashing_decode_handler(self):
11647db96d56Sopenharmony_ci        # better generating one more character to fill the extra space slot
11657db96d56Sopenharmony_ci        # so in debug build it can steadily fail
11667db96d56Sopenharmony_ci        def forward_shorter_than_end(exc):
11677db96d56Sopenharmony_ci            if isinstance(exc, UnicodeDecodeError):
11687db96d56Sopenharmony_ci                # size one character, 0 < forward < exc.end
11697db96d56Sopenharmony_ci                return ('\ufffd', exc.start+1)
11707db96d56Sopenharmony_ci            else:
11717db96d56Sopenharmony_ci                raise TypeError("don't know how to handle %r" % exc)
11727db96d56Sopenharmony_ci        codecs.register_error(
11737db96d56Sopenharmony_ci            "test.forward_shorter_than_end", forward_shorter_than_end)
11747db96d56Sopenharmony_ci
11757db96d56Sopenharmony_ci        self.assertEqual(
11767db96d56Sopenharmony_ci            b'\xd8\xd8\xd8\xd8\xd8\x00\x00\x00'.decode(
11777db96d56Sopenharmony_ci                'utf-16-le', 'test.forward_shorter_than_end'),
11787db96d56Sopenharmony_ci            '\ufffd\ufffd\ufffd\ufffd\xd8\x00'
11797db96d56Sopenharmony_ci        )
11807db96d56Sopenharmony_ci        self.assertEqual(
11817db96d56Sopenharmony_ci            b'\xd8\xd8\xd8\xd8\x00\xd8\x00\x00'.decode(
11827db96d56Sopenharmony_ci                'utf-16-be', 'test.forward_shorter_than_end'),
11837db96d56Sopenharmony_ci            '\ufffd\ufffd\ufffd\ufffd\xd8\x00'
11847db96d56Sopenharmony_ci        )
11857db96d56Sopenharmony_ci        self.assertEqual(
11867db96d56Sopenharmony_ci            b'\x11\x11\x11\x11\x11\x00\x00\x00\x00\x00\x00'.decode(
11877db96d56Sopenharmony_ci                'utf-32-le', 'test.forward_shorter_than_end'),
11887db96d56Sopenharmony_ci            '\ufffd\ufffd\ufffd\u1111\x00'
11897db96d56Sopenharmony_ci        )
11907db96d56Sopenharmony_ci        self.assertEqual(
11917db96d56Sopenharmony_ci            b'\x11\x11\x11\x00\x00\x11\x11\x00\x00\x00\x00'.decode(
11927db96d56Sopenharmony_ci                'utf-32-be', 'test.forward_shorter_than_end'),
11937db96d56Sopenharmony_ci            '\ufffd\ufffd\ufffd\u1111\x00'
11947db96d56Sopenharmony_ci        )
11957db96d56Sopenharmony_ci
11967db96d56Sopenharmony_ci        def replace_with_long(exc):
11977db96d56Sopenharmony_ci            if isinstance(exc, UnicodeDecodeError):
11987db96d56Sopenharmony_ci                exc.object = b"\x00" * 8
11997db96d56Sopenharmony_ci                return ('\ufffd', exc.start)
12007db96d56Sopenharmony_ci            else:
12017db96d56Sopenharmony_ci                raise TypeError("don't know how to handle %r" % exc)
12027db96d56Sopenharmony_ci        codecs.register_error("test.replace_with_long", replace_with_long)
12037db96d56Sopenharmony_ci
12047db96d56Sopenharmony_ci        self.assertEqual(
12057db96d56Sopenharmony_ci            b'\x00'.decode('utf-16', 'test.replace_with_long'),
12067db96d56Sopenharmony_ci            '\ufffd\x00\x00\x00\x00'
12077db96d56Sopenharmony_ci        )
12087db96d56Sopenharmony_ci        self.assertEqual(
12097db96d56Sopenharmony_ci            b'\x00'.decode('utf-32', 'test.replace_with_long'),
12107db96d56Sopenharmony_ci            '\ufffd\x00\x00'
12117db96d56Sopenharmony_ci        )
12127db96d56Sopenharmony_ci
12137db96d56Sopenharmony_ci
12147db96d56Sopenharmony_ci    def test_fake_error_class(self):
12157db96d56Sopenharmony_ci        handlers = [
12167db96d56Sopenharmony_ci            codecs.strict_errors,
12177db96d56Sopenharmony_ci            codecs.ignore_errors,
12187db96d56Sopenharmony_ci            codecs.replace_errors,
12197db96d56Sopenharmony_ci            codecs.backslashreplace_errors,
12207db96d56Sopenharmony_ci            codecs.namereplace_errors,
12217db96d56Sopenharmony_ci            codecs.xmlcharrefreplace_errors,
12227db96d56Sopenharmony_ci            codecs.lookup_error('surrogateescape'),
12237db96d56Sopenharmony_ci            codecs.lookup_error('surrogatepass'),
12247db96d56Sopenharmony_ci        ]
12257db96d56Sopenharmony_ci        for cls in UnicodeEncodeError, UnicodeDecodeError, UnicodeTranslateError:
12267db96d56Sopenharmony_ci            class FakeUnicodeError(str):
12277db96d56Sopenharmony_ci                __class__ = cls
12287db96d56Sopenharmony_ci            for handler in handlers:
12297db96d56Sopenharmony_ci                with self.subTest(handler=handler, error_class=cls):
12307db96d56Sopenharmony_ci                    self.assertRaises(TypeError, handler, FakeUnicodeError())
12317db96d56Sopenharmony_ci            class FakeUnicodeError(Exception):
12327db96d56Sopenharmony_ci                __class__ = cls
12337db96d56Sopenharmony_ci            for handler in handlers:
12347db96d56Sopenharmony_ci                with self.subTest(handler=handler, error_class=cls):
12357db96d56Sopenharmony_ci                    with self.assertRaises((TypeError, FakeUnicodeError)):
12367db96d56Sopenharmony_ci                        handler(FakeUnicodeError())
12377db96d56Sopenharmony_ci
12387db96d56Sopenharmony_ci
12397db96d56Sopenharmony_ciif __name__ == "__main__":
12407db96d56Sopenharmony_ci    unittest.main()
1241