17db96d56Sopenharmony_ciimport codecs 27db96d56Sopenharmony_ciimport html.entities 37db96d56Sopenharmony_ciimport itertools 47db96d56Sopenharmony_ciimport sys 57db96d56Sopenharmony_ciimport unicodedata 67db96d56Sopenharmony_ciimport unittest 77db96d56Sopenharmony_ci 87db96d56Sopenharmony_ci 97db96d56Sopenharmony_ciclass PosReturn: 107db96d56Sopenharmony_ci # this can be used for configurable callbacks 117db96d56Sopenharmony_ci 127db96d56Sopenharmony_ci def __init__(self): 137db96d56Sopenharmony_ci self.pos = 0 147db96d56Sopenharmony_ci 157db96d56Sopenharmony_ci def handle(self, exc): 167db96d56Sopenharmony_ci oldpos = self.pos 177db96d56Sopenharmony_ci realpos = oldpos 187db96d56Sopenharmony_ci if realpos<0: 197db96d56Sopenharmony_ci realpos = len(exc.object) + realpos 207db96d56Sopenharmony_ci # if we don't advance this time, terminate on the next call 217db96d56Sopenharmony_ci # otherwise we'd get an endless loop 227db96d56Sopenharmony_ci if realpos <= exc.start: 237db96d56Sopenharmony_ci self.pos = len(exc.object) 247db96d56Sopenharmony_ci return ("<?>", oldpos) 257db96d56Sopenharmony_ci 267db96d56Sopenharmony_ciclass RepeatedPosReturn: 277db96d56Sopenharmony_ci def __init__(self, repl="<?>"): 287db96d56Sopenharmony_ci self.repl = repl 297db96d56Sopenharmony_ci self.pos = 0 307db96d56Sopenharmony_ci self.count = 0 317db96d56Sopenharmony_ci 327db96d56Sopenharmony_ci def handle(self, exc): 337db96d56Sopenharmony_ci if self.count > 0: 347db96d56Sopenharmony_ci self.count -= 1 357db96d56Sopenharmony_ci return (self.repl, self.pos) 367db96d56Sopenharmony_ci return (self.repl, exc.end) 377db96d56Sopenharmony_ci 387db96d56Sopenharmony_ci# A UnicodeEncodeError object with a bad start attribute 397db96d56Sopenharmony_ciclass BadStartUnicodeEncodeError(UnicodeEncodeError): 407db96d56Sopenharmony_ci def __init__(self): 417db96d56Sopenharmony_ci UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad") 427db96d56Sopenharmony_ci self.start = [] 437db96d56Sopenharmony_ci 447db96d56Sopenharmony_ci# A UnicodeEncodeError object with a bad object attribute 457db96d56Sopenharmony_ciclass BadObjectUnicodeEncodeError(UnicodeEncodeError): 467db96d56Sopenharmony_ci def __init__(self): 477db96d56Sopenharmony_ci UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad") 487db96d56Sopenharmony_ci self.object = [] 497db96d56Sopenharmony_ci 507db96d56Sopenharmony_ci# A UnicodeDecodeError object without an end attribute 517db96d56Sopenharmony_ciclass NoEndUnicodeDecodeError(UnicodeDecodeError): 527db96d56Sopenharmony_ci def __init__(self): 537db96d56Sopenharmony_ci UnicodeDecodeError.__init__(self, "ascii", bytearray(b""), 0, 1, "bad") 547db96d56Sopenharmony_ci del self.end 557db96d56Sopenharmony_ci 567db96d56Sopenharmony_ci# A UnicodeDecodeError object with a bad object attribute 577db96d56Sopenharmony_ciclass BadObjectUnicodeDecodeError(UnicodeDecodeError): 587db96d56Sopenharmony_ci def __init__(self): 597db96d56Sopenharmony_ci UnicodeDecodeError.__init__(self, "ascii", bytearray(b""), 0, 1, "bad") 607db96d56Sopenharmony_ci self.object = [] 617db96d56Sopenharmony_ci 627db96d56Sopenharmony_ci# A UnicodeTranslateError object without a start attribute 637db96d56Sopenharmony_ciclass NoStartUnicodeTranslateError(UnicodeTranslateError): 647db96d56Sopenharmony_ci def __init__(self): 657db96d56Sopenharmony_ci UnicodeTranslateError.__init__(self, "", 0, 1, "bad") 667db96d56Sopenharmony_ci del self.start 677db96d56Sopenharmony_ci 687db96d56Sopenharmony_ci# A UnicodeTranslateError object without an end attribute 697db96d56Sopenharmony_ciclass NoEndUnicodeTranslateError(UnicodeTranslateError): 707db96d56Sopenharmony_ci def __init__(self): 717db96d56Sopenharmony_ci UnicodeTranslateError.__init__(self, "", 0, 1, "bad") 727db96d56Sopenharmony_ci del self.end 737db96d56Sopenharmony_ci 747db96d56Sopenharmony_ci# A UnicodeTranslateError object without an object attribute 757db96d56Sopenharmony_ciclass NoObjectUnicodeTranslateError(UnicodeTranslateError): 767db96d56Sopenharmony_ci def __init__(self): 777db96d56Sopenharmony_ci UnicodeTranslateError.__init__(self, "", 0, 1, "bad") 787db96d56Sopenharmony_ci del self.object 797db96d56Sopenharmony_ci 807db96d56Sopenharmony_ciclass CodecCallbackTest(unittest.TestCase): 817db96d56Sopenharmony_ci 827db96d56Sopenharmony_ci def test_xmlcharrefreplace(self): 837db96d56Sopenharmony_ci # replace unencodable characters which numeric character entities. 847db96d56Sopenharmony_ci # For ascii, latin-1 and charmaps this is completely implemented 857db96d56Sopenharmony_ci # in C and should be reasonably fast. 867db96d56Sopenharmony_ci s = "\u30b9\u30d1\u30e2 \xe4nd eggs" 877db96d56Sopenharmony_ci self.assertEqual( 887db96d56Sopenharmony_ci s.encode("ascii", "xmlcharrefreplace"), 897db96d56Sopenharmony_ci b"スパモ änd eggs" 907db96d56Sopenharmony_ci ) 917db96d56Sopenharmony_ci self.assertEqual( 927db96d56Sopenharmony_ci s.encode("latin-1", "xmlcharrefreplace"), 937db96d56Sopenharmony_ci b"スパモ \xe4nd eggs" 947db96d56Sopenharmony_ci ) 957db96d56Sopenharmony_ci 967db96d56Sopenharmony_ci def test_xmlcharnamereplace(self): 977db96d56Sopenharmony_ci # This time use a named character entity for unencodable 987db96d56Sopenharmony_ci # characters, if one is available. 997db96d56Sopenharmony_ci 1007db96d56Sopenharmony_ci def xmlcharnamereplace(exc): 1017db96d56Sopenharmony_ci if not isinstance(exc, UnicodeEncodeError): 1027db96d56Sopenharmony_ci raise TypeError("don't know how to handle %r" % exc) 1037db96d56Sopenharmony_ci l = [] 1047db96d56Sopenharmony_ci for c in exc.object[exc.start:exc.end]: 1057db96d56Sopenharmony_ci try: 1067db96d56Sopenharmony_ci l.append("&%s;" % html.entities.codepoint2name[ord(c)]) 1077db96d56Sopenharmony_ci except KeyError: 1087db96d56Sopenharmony_ci l.append("&#%d;" % ord(c)) 1097db96d56Sopenharmony_ci return ("".join(l), exc.end) 1107db96d56Sopenharmony_ci 1117db96d56Sopenharmony_ci codecs.register_error( 1127db96d56Sopenharmony_ci "test.xmlcharnamereplace", xmlcharnamereplace) 1137db96d56Sopenharmony_ci 1147db96d56Sopenharmony_ci sin = "\xab\u211c\xbb = \u2329\u1234\u20ac\u232a" 1157db96d56Sopenharmony_ci sout = b"«ℜ» = ⟨ሴ€⟩" 1167db96d56Sopenharmony_ci self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout) 1177db96d56Sopenharmony_ci sout = b"\xabℜ\xbb = ⟨ሴ€⟩" 1187db96d56Sopenharmony_ci self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout) 1197db96d56Sopenharmony_ci sout = b"\xabℜ\xbb = ⟨ሴ\xa4⟩" 1207db96d56Sopenharmony_ci self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout) 1217db96d56Sopenharmony_ci 1227db96d56Sopenharmony_ci def test_uninamereplace(self): 1237db96d56Sopenharmony_ci # We're using the names from the unicode database this time, 1247db96d56Sopenharmony_ci # and we're doing "syntax highlighting" here, i.e. we include 1257db96d56Sopenharmony_ci # the replaced text in ANSI escape sequences. For this it is 1267db96d56Sopenharmony_ci # useful that the error handler is not called for every single 1277db96d56Sopenharmony_ci # unencodable character, but for a complete sequence of 1287db96d56Sopenharmony_ci # unencodable characters, otherwise we would output many 1297db96d56Sopenharmony_ci # unnecessary escape sequences. 1307db96d56Sopenharmony_ci 1317db96d56Sopenharmony_ci def uninamereplace(exc): 1327db96d56Sopenharmony_ci if not isinstance(exc, UnicodeEncodeError): 1337db96d56Sopenharmony_ci raise TypeError("don't know how to handle %r" % exc) 1347db96d56Sopenharmony_ci l = [] 1357db96d56Sopenharmony_ci for c in exc.object[exc.start:exc.end]: 1367db96d56Sopenharmony_ci l.append(unicodedata.name(c, "0x%x" % ord(c))) 1377db96d56Sopenharmony_ci return ("\033[1m%s\033[0m" % ", ".join(l), exc.end) 1387db96d56Sopenharmony_ci 1397db96d56Sopenharmony_ci codecs.register_error( 1407db96d56Sopenharmony_ci "test.uninamereplace", uninamereplace) 1417db96d56Sopenharmony_ci 1427db96d56Sopenharmony_ci sin = "\xac\u1234\u20ac\u8000" 1437db96d56Sopenharmony_ci sout = b"\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" 1447db96d56Sopenharmony_ci self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout) 1457db96d56Sopenharmony_ci 1467db96d56Sopenharmony_ci sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" 1477db96d56Sopenharmony_ci self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout) 1487db96d56Sopenharmony_ci 1497db96d56Sopenharmony_ci sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m" 1507db96d56Sopenharmony_ci self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout) 1517db96d56Sopenharmony_ci 1527db96d56Sopenharmony_ci def test_backslashescape(self): 1537db96d56Sopenharmony_ci # Does the same as the "unicode-escape" encoding, but with different 1547db96d56Sopenharmony_ci # base encodings. 1557db96d56Sopenharmony_ci sin = "a\xac\u1234\u20ac\u8000\U0010ffff" 1567db96d56Sopenharmony_ci sout = b"a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" 1577db96d56Sopenharmony_ci self.assertEqual(sin.encode("ascii", "backslashreplace"), sout) 1587db96d56Sopenharmony_ci 1597db96d56Sopenharmony_ci sout = b"a\xac\\u1234\\u20ac\\u8000\\U0010ffff" 1607db96d56Sopenharmony_ci self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout) 1617db96d56Sopenharmony_ci 1627db96d56Sopenharmony_ci sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff" 1637db96d56Sopenharmony_ci self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) 1647db96d56Sopenharmony_ci 1657db96d56Sopenharmony_ci def test_nameescape(self): 1667db96d56Sopenharmony_ci # Does the same as backslashescape, but prefers ``\N{...}`` escape 1677db96d56Sopenharmony_ci # sequences. 1687db96d56Sopenharmony_ci sin = "a\xac\u1234\u20ac\u8000\U0010ffff" 1697db96d56Sopenharmony_ci sout = (b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}' 1707db96d56Sopenharmony_ci b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') 1717db96d56Sopenharmony_ci self.assertEqual(sin.encode("ascii", "namereplace"), sout) 1727db96d56Sopenharmony_ci 1737db96d56Sopenharmony_ci sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}' 1747db96d56Sopenharmony_ci b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') 1757db96d56Sopenharmony_ci self.assertEqual(sin.encode("latin-1", "namereplace"), sout) 1767db96d56Sopenharmony_ci 1777db96d56Sopenharmony_ci sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\xa4' 1787db96d56Sopenharmony_ci b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') 1797db96d56Sopenharmony_ci self.assertEqual(sin.encode("iso-8859-15", "namereplace"), sout) 1807db96d56Sopenharmony_ci 1817db96d56Sopenharmony_ci def test_decoding_callbacks(self): 1827db96d56Sopenharmony_ci # This is a test for a decoding callback handler 1837db96d56Sopenharmony_ci # that allows the decoding of the invalid sequence 1847db96d56Sopenharmony_ci # "\xc0\x80" and returns "\x00" instead of raising an error. 1857db96d56Sopenharmony_ci # All other illegal sequences will be handled strictly. 1867db96d56Sopenharmony_ci def relaxedutf8(exc): 1877db96d56Sopenharmony_ci if not isinstance(exc, UnicodeDecodeError): 1887db96d56Sopenharmony_ci raise TypeError("don't know how to handle %r" % exc) 1897db96d56Sopenharmony_ci if exc.object[exc.start:exc.start+2] == b"\xc0\x80": 1907db96d56Sopenharmony_ci return ("\x00", exc.start+2) # retry after two bytes 1917db96d56Sopenharmony_ci else: 1927db96d56Sopenharmony_ci raise exc 1937db96d56Sopenharmony_ci 1947db96d56Sopenharmony_ci codecs.register_error("test.relaxedutf8", relaxedutf8) 1957db96d56Sopenharmony_ci 1967db96d56Sopenharmony_ci # all the "\xc0\x80" will be decoded to "\x00" 1977db96d56Sopenharmony_ci sin = b"a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80" 1987db96d56Sopenharmony_ci sout = "a\x00b\x00c\xfc\x00\x00" 1997db96d56Sopenharmony_ci self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout) 2007db96d56Sopenharmony_ci 2017db96d56Sopenharmony_ci # "\xc0\x81" is not valid and a UnicodeDecodeError will be raised 2027db96d56Sopenharmony_ci sin = b"\xc0\x80\xc0\x81" 2037db96d56Sopenharmony_ci self.assertRaises(UnicodeDecodeError, sin.decode, 2047db96d56Sopenharmony_ci "utf-8", "test.relaxedutf8") 2057db96d56Sopenharmony_ci 2067db96d56Sopenharmony_ci def test_charmapencode(self): 2077db96d56Sopenharmony_ci # For charmap encodings the replacement string will be 2087db96d56Sopenharmony_ci # mapped through the encoding again. This means, that 2097db96d56Sopenharmony_ci # to be able to use e.g. the "replace" handler, the 2107db96d56Sopenharmony_ci # charmap has to have a mapping for "?". 2117db96d56Sopenharmony_ci charmap = dict((ord(c), bytes(2*c.upper(), 'ascii')) for c in "abcdefgh") 2127db96d56Sopenharmony_ci sin = "abc" 2137db96d56Sopenharmony_ci sout = b"AABBCC" 2147db96d56Sopenharmony_ci self.assertEqual(codecs.charmap_encode(sin, "strict", charmap)[0], sout) 2157db96d56Sopenharmony_ci 2167db96d56Sopenharmony_ci sin = "abcA" 2177db96d56Sopenharmony_ci self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap) 2187db96d56Sopenharmony_ci 2197db96d56Sopenharmony_ci charmap[ord("?")] = b"XYZ" 2207db96d56Sopenharmony_ci sin = "abcDEF" 2217db96d56Sopenharmony_ci sout = b"AABBCCXYZXYZXYZ" 2227db96d56Sopenharmony_ci self.assertEqual(codecs.charmap_encode(sin, "replace", charmap)[0], sout) 2237db96d56Sopenharmony_ci 2247db96d56Sopenharmony_ci charmap[ord("?")] = "XYZ" # wrong type in mapping 2257db96d56Sopenharmony_ci self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) 2267db96d56Sopenharmony_ci 2277db96d56Sopenharmony_ci def test_callbacks(self): 2287db96d56Sopenharmony_ci def handler1(exc): 2297db96d56Sopenharmony_ci r = range(exc.start, exc.end) 2307db96d56Sopenharmony_ci if isinstance(exc, UnicodeEncodeError): 2317db96d56Sopenharmony_ci l = ["<%d>" % ord(exc.object[pos]) for pos in r] 2327db96d56Sopenharmony_ci elif isinstance(exc, UnicodeDecodeError): 2337db96d56Sopenharmony_ci l = ["<%d>" % exc.object[pos] for pos in r] 2347db96d56Sopenharmony_ci else: 2357db96d56Sopenharmony_ci raise TypeError("don't know how to handle %r" % exc) 2367db96d56Sopenharmony_ci return ("[%s]" % "".join(l), exc.end) 2377db96d56Sopenharmony_ci 2387db96d56Sopenharmony_ci codecs.register_error("test.handler1", handler1) 2397db96d56Sopenharmony_ci 2407db96d56Sopenharmony_ci def handler2(exc): 2417db96d56Sopenharmony_ci if not isinstance(exc, UnicodeDecodeError): 2427db96d56Sopenharmony_ci raise TypeError("don't know how to handle %r" % exc) 2437db96d56Sopenharmony_ci l = ["<%d>" % exc.object[pos] for pos in range(exc.start, exc.end)] 2447db96d56Sopenharmony_ci return ("[%s]" % "".join(l), exc.end+1) # skip one character 2457db96d56Sopenharmony_ci 2467db96d56Sopenharmony_ci codecs.register_error("test.handler2", handler2) 2477db96d56Sopenharmony_ci 2487db96d56Sopenharmony_ci s = b"\x00\x81\x7f\x80\xff" 2497db96d56Sopenharmony_ci 2507db96d56Sopenharmony_ci self.assertEqual( 2517db96d56Sopenharmony_ci s.decode("ascii", "test.handler1"), 2527db96d56Sopenharmony_ci "\x00[<129>]\x7f[<128>][<255>]" 2537db96d56Sopenharmony_ci ) 2547db96d56Sopenharmony_ci self.assertEqual( 2557db96d56Sopenharmony_ci s.decode("ascii", "test.handler2"), 2567db96d56Sopenharmony_ci "\x00[<129>][<128>]" 2577db96d56Sopenharmony_ci ) 2587db96d56Sopenharmony_ci 2597db96d56Sopenharmony_ci self.assertEqual( 2607db96d56Sopenharmony_ci b"\\u3042\\u3xxx".decode("unicode-escape", "test.handler1"), 2617db96d56Sopenharmony_ci "\u3042[<92><117><51>]xxx" 2627db96d56Sopenharmony_ci ) 2637db96d56Sopenharmony_ci 2647db96d56Sopenharmony_ci self.assertEqual( 2657db96d56Sopenharmony_ci b"\\u3042\\u3xx".decode("unicode-escape", "test.handler1"), 2667db96d56Sopenharmony_ci "\u3042[<92><117><51>]xx" 2677db96d56Sopenharmony_ci ) 2687db96d56Sopenharmony_ci 2697db96d56Sopenharmony_ci self.assertEqual( 2707db96d56Sopenharmony_ci codecs.charmap_decode(b"abc", "test.handler1", {ord("a"): "z"})[0], 2717db96d56Sopenharmony_ci "z[<98>][<99>]" 2727db96d56Sopenharmony_ci ) 2737db96d56Sopenharmony_ci 2747db96d56Sopenharmony_ci self.assertEqual( 2757db96d56Sopenharmony_ci "g\xfc\xdfrk".encode("ascii", "test.handler1"), 2767db96d56Sopenharmony_ci b"g[<252><223>]rk" 2777db96d56Sopenharmony_ci ) 2787db96d56Sopenharmony_ci 2797db96d56Sopenharmony_ci self.assertEqual( 2807db96d56Sopenharmony_ci "g\xfc\xdf".encode("ascii", "test.handler1"), 2817db96d56Sopenharmony_ci b"g[<252><223>]" 2827db96d56Sopenharmony_ci ) 2837db96d56Sopenharmony_ci 2847db96d56Sopenharmony_ci def test_longstrings(self): 2857db96d56Sopenharmony_ci # test long strings to check for memory overflow problems 2867db96d56Sopenharmony_ci errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", 2877db96d56Sopenharmony_ci "backslashreplace", "namereplace"] 2887db96d56Sopenharmony_ci # register the handlers under different names, 2897db96d56Sopenharmony_ci # to prevent the codec from recognizing the name 2907db96d56Sopenharmony_ci for err in errors: 2917db96d56Sopenharmony_ci codecs.register_error("test." + err, codecs.lookup_error(err)) 2927db96d56Sopenharmony_ci l = 1000 2937db96d56Sopenharmony_ci errors += [ "test." + err for err in errors ] 2947db96d56Sopenharmony_ci for uni in [ s*l for s in ("x", "\u3042", "a\xe4") ]: 2957db96d56Sopenharmony_ci for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", 2967db96d56Sopenharmony_ci "utf-8", "utf-7", "utf-16", "utf-32"): 2977db96d56Sopenharmony_ci for err in errors: 2987db96d56Sopenharmony_ci try: 2997db96d56Sopenharmony_ci uni.encode(enc, err) 3007db96d56Sopenharmony_ci except UnicodeError: 3017db96d56Sopenharmony_ci pass 3027db96d56Sopenharmony_ci 3037db96d56Sopenharmony_ci def check_exceptionobjectargs(self, exctype, args, msg): 3047db96d56Sopenharmony_ci # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion 3057db96d56Sopenharmony_ci # check with one missing argument 3067db96d56Sopenharmony_ci self.assertRaises(TypeError, exctype, *args[:-1]) 3077db96d56Sopenharmony_ci # check with one argument too much 3087db96d56Sopenharmony_ci self.assertRaises(TypeError, exctype, *(args + ["too much"])) 3097db96d56Sopenharmony_ci # check with one argument of the wrong type 3107db96d56Sopenharmony_ci wrongargs = [ "spam", b"eggs", b"spam", 42, 1.0, None ] 3117db96d56Sopenharmony_ci for i in range(len(args)): 3127db96d56Sopenharmony_ci for wrongarg in wrongargs: 3137db96d56Sopenharmony_ci if type(wrongarg) is type(args[i]): 3147db96d56Sopenharmony_ci continue 3157db96d56Sopenharmony_ci # build argument array 3167db96d56Sopenharmony_ci callargs = [] 3177db96d56Sopenharmony_ci for j in range(len(args)): 3187db96d56Sopenharmony_ci if i==j: 3197db96d56Sopenharmony_ci callargs.append(wrongarg) 3207db96d56Sopenharmony_ci else: 3217db96d56Sopenharmony_ci callargs.append(args[i]) 3227db96d56Sopenharmony_ci self.assertRaises(TypeError, exctype, *callargs) 3237db96d56Sopenharmony_ci 3247db96d56Sopenharmony_ci # check with the correct number and type of arguments 3257db96d56Sopenharmony_ci exc = exctype(*args) 3267db96d56Sopenharmony_ci self.assertEqual(str(exc), msg) 3277db96d56Sopenharmony_ci 3287db96d56Sopenharmony_ci def test_unicodeencodeerror(self): 3297db96d56Sopenharmony_ci self.check_exceptionobjectargs( 3307db96d56Sopenharmony_ci UnicodeEncodeError, 3317db96d56Sopenharmony_ci ["ascii", "g\xfcrk", 1, 2, "ouch"], 3327db96d56Sopenharmony_ci "'ascii' codec can't encode character '\\xfc' in position 1: ouch" 3337db96d56Sopenharmony_ci ) 3347db96d56Sopenharmony_ci self.check_exceptionobjectargs( 3357db96d56Sopenharmony_ci UnicodeEncodeError, 3367db96d56Sopenharmony_ci ["ascii", "g\xfcrk", 1, 4, "ouch"], 3377db96d56Sopenharmony_ci "'ascii' codec can't encode characters in position 1-3: ouch" 3387db96d56Sopenharmony_ci ) 3397db96d56Sopenharmony_ci self.check_exceptionobjectargs( 3407db96d56Sopenharmony_ci UnicodeEncodeError, 3417db96d56Sopenharmony_ci ["ascii", "\xfcx", 0, 1, "ouch"], 3427db96d56Sopenharmony_ci "'ascii' codec can't encode character '\\xfc' in position 0: ouch" 3437db96d56Sopenharmony_ci ) 3447db96d56Sopenharmony_ci self.check_exceptionobjectargs( 3457db96d56Sopenharmony_ci UnicodeEncodeError, 3467db96d56Sopenharmony_ci ["ascii", "\u0100x", 0, 1, "ouch"], 3477db96d56Sopenharmony_ci "'ascii' codec can't encode character '\\u0100' in position 0: ouch" 3487db96d56Sopenharmony_ci ) 3497db96d56Sopenharmony_ci self.check_exceptionobjectargs( 3507db96d56Sopenharmony_ci UnicodeEncodeError, 3517db96d56Sopenharmony_ci ["ascii", "\uffffx", 0, 1, "ouch"], 3527db96d56Sopenharmony_ci "'ascii' codec can't encode character '\\uffff' in position 0: ouch" 3537db96d56Sopenharmony_ci ) 3547db96d56Sopenharmony_ci self.check_exceptionobjectargs( 3557db96d56Sopenharmony_ci UnicodeEncodeError, 3567db96d56Sopenharmony_ci ["ascii", "\U00010000x", 0, 1, "ouch"], 3577db96d56Sopenharmony_ci "'ascii' codec can't encode character '\\U00010000' in position 0: ouch" 3587db96d56Sopenharmony_ci ) 3597db96d56Sopenharmony_ci 3607db96d56Sopenharmony_ci def test_unicodedecodeerror(self): 3617db96d56Sopenharmony_ci self.check_exceptionobjectargs( 3627db96d56Sopenharmony_ci UnicodeDecodeError, 3637db96d56Sopenharmony_ci ["ascii", bytearray(b"g\xfcrk"), 1, 2, "ouch"], 3647db96d56Sopenharmony_ci "'ascii' codec can't decode byte 0xfc in position 1: ouch" 3657db96d56Sopenharmony_ci ) 3667db96d56Sopenharmony_ci self.check_exceptionobjectargs( 3677db96d56Sopenharmony_ci UnicodeDecodeError, 3687db96d56Sopenharmony_ci ["ascii", bytearray(b"g\xfcrk"), 1, 3, "ouch"], 3697db96d56Sopenharmony_ci "'ascii' codec can't decode bytes in position 1-2: ouch" 3707db96d56Sopenharmony_ci ) 3717db96d56Sopenharmony_ci 3727db96d56Sopenharmony_ci def test_unicodetranslateerror(self): 3737db96d56Sopenharmony_ci self.check_exceptionobjectargs( 3747db96d56Sopenharmony_ci UnicodeTranslateError, 3757db96d56Sopenharmony_ci ["g\xfcrk", 1, 2, "ouch"], 3767db96d56Sopenharmony_ci "can't translate character '\\xfc' in position 1: ouch" 3777db96d56Sopenharmony_ci ) 3787db96d56Sopenharmony_ci self.check_exceptionobjectargs( 3797db96d56Sopenharmony_ci UnicodeTranslateError, 3807db96d56Sopenharmony_ci ["g\u0100rk", 1, 2, "ouch"], 3817db96d56Sopenharmony_ci "can't translate character '\\u0100' in position 1: ouch" 3827db96d56Sopenharmony_ci ) 3837db96d56Sopenharmony_ci self.check_exceptionobjectargs( 3847db96d56Sopenharmony_ci UnicodeTranslateError, 3857db96d56Sopenharmony_ci ["g\uffffrk", 1, 2, "ouch"], 3867db96d56Sopenharmony_ci "can't translate character '\\uffff' in position 1: ouch" 3877db96d56Sopenharmony_ci ) 3887db96d56Sopenharmony_ci self.check_exceptionobjectargs( 3897db96d56Sopenharmony_ci UnicodeTranslateError, 3907db96d56Sopenharmony_ci ["g\U00010000rk", 1, 2, "ouch"], 3917db96d56Sopenharmony_ci "can't translate character '\\U00010000' in position 1: ouch" 3927db96d56Sopenharmony_ci ) 3937db96d56Sopenharmony_ci self.check_exceptionobjectargs( 3947db96d56Sopenharmony_ci UnicodeTranslateError, 3957db96d56Sopenharmony_ci ["g\xfcrk", 1, 3, "ouch"], 3967db96d56Sopenharmony_ci "can't translate characters in position 1-2: ouch" 3977db96d56Sopenharmony_ci ) 3987db96d56Sopenharmony_ci 3997db96d56Sopenharmony_ci def test_badandgoodstrictexceptions(self): 4007db96d56Sopenharmony_ci # "strict" complains about a non-exception passed in 4017db96d56Sopenharmony_ci self.assertRaises( 4027db96d56Sopenharmony_ci TypeError, 4037db96d56Sopenharmony_ci codecs.strict_errors, 4047db96d56Sopenharmony_ci 42 4057db96d56Sopenharmony_ci ) 4067db96d56Sopenharmony_ci # "strict" complains about the wrong exception type 4077db96d56Sopenharmony_ci self.assertRaises( 4087db96d56Sopenharmony_ci Exception, 4097db96d56Sopenharmony_ci codecs.strict_errors, 4107db96d56Sopenharmony_ci Exception("ouch") 4117db96d56Sopenharmony_ci ) 4127db96d56Sopenharmony_ci 4137db96d56Sopenharmony_ci # If the correct exception is passed in, "strict" raises it 4147db96d56Sopenharmony_ci self.assertRaises( 4157db96d56Sopenharmony_ci UnicodeEncodeError, 4167db96d56Sopenharmony_ci codecs.strict_errors, 4177db96d56Sopenharmony_ci UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch") 4187db96d56Sopenharmony_ci ) 4197db96d56Sopenharmony_ci self.assertRaises( 4207db96d56Sopenharmony_ci UnicodeDecodeError, 4217db96d56Sopenharmony_ci codecs.strict_errors, 4227db96d56Sopenharmony_ci UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") 4237db96d56Sopenharmony_ci ) 4247db96d56Sopenharmony_ci self.assertRaises( 4257db96d56Sopenharmony_ci UnicodeTranslateError, 4267db96d56Sopenharmony_ci codecs.strict_errors, 4277db96d56Sopenharmony_ci UnicodeTranslateError("\u3042", 0, 1, "ouch") 4287db96d56Sopenharmony_ci ) 4297db96d56Sopenharmony_ci 4307db96d56Sopenharmony_ci def test_badandgoodignoreexceptions(self): 4317db96d56Sopenharmony_ci # "ignore" complains about a non-exception passed in 4327db96d56Sopenharmony_ci self.assertRaises( 4337db96d56Sopenharmony_ci TypeError, 4347db96d56Sopenharmony_ci codecs.ignore_errors, 4357db96d56Sopenharmony_ci 42 4367db96d56Sopenharmony_ci ) 4377db96d56Sopenharmony_ci # "ignore" complains about the wrong exception type 4387db96d56Sopenharmony_ci self.assertRaises( 4397db96d56Sopenharmony_ci TypeError, 4407db96d56Sopenharmony_ci codecs.ignore_errors, 4417db96d56Sopenharmony_ci UnicodeError("ouch") 4427db96d56Sopenharmony_ci ) 4437db96d56Sopenharmony_ci # If the correct exception is passed in, "ignore" returns an empty replacement 4447db96d56Sopenharmony_ci self.assertEqual( 4457db96d56Sopenharmony_ci codecs.ignore_errors( 4467db96d56Sopenharmony_ci UnicodeEncodeError("ascii", "a\u3042b", 1, 2, "ouch")), 4477db96d56Sopenharmony_ci ("", 2) 4487db96d56Sopenharmony_ci ) 4497db96d56Sopenharmony_ci self.assertEqual( 4507db96d56Sopenharmony_ci codecs.ignore_errors( 4517db96d56Sopenharmony_ci UnicodeDecodeError("ascii", bytearray(b"a\xffb"), 1, 2, "ouch")), 4527db96d56Sopenharmony_ci ("", 2) 4537db96d56Sopenharmony_ci ) 4547db96d56Sopenharmony_ci self.assertEqual( 4557db96d56Sopenharmony_ci codecs.ignore_errors( 4567db96d56Sopenharmony_ci UnicodeTranslateError("a\u3042b", 1, 2, "ouch")), 4577db96d56Sopenharmony_ci ("", 2) 4587db96d56Sopenharmony_ci ) 4597db96d56Sopenharmony_ci 4607db96d56Sopenharmony_ci def test_badandgoodreplaceexceptions(self): 4617db96d56Sopenharmony_ci # "replace" complains about a non-exception passed in 4627db96d56Sopenharmony_ci self.assertRaises( 4637db96d56Sopenharmony_ci TypeError, 4647db96d56Sopenharmony_ci codecs.replace_errors, 4657db96d56Sopenharmony_ci 42 4667db96d56Sopenharmony_ci ) 4677db96d56Sopenharmony_ci # "replace" complains about the wrong exception type 4687db96d56Sopenharmony_ci self.assertRaises( 4697db96d56Sopenharmony_ci TypeError, 4707db96d56Sopenharmony_ci codecs.replace_errors, 4717db96d56Sopenharmony_ci UnicodeError("ouch") 4727db96d56Sopenharmony_ci ) 4737db96d56Sopenharmony_ci self.assertRaises( 4747db96d56Sopenharmony_ci TypeError, 4757db96d56Sopenharmony_ci codecs.replace_errors, 4767db96d56Sopenharmony_ci BadObjectUnicodeEncodeError() 4777db96d56Sopenharmony_ci ) 4787db96d56Sopenharmony_ci self.assertRaises( 4797db96d56Sopenharmony_ci TypeError, 4807db96d56Sopenharmony_ci codecs.replace_errors, 4817db96d56Sopenharmony_ci BadObjectUnicodeDecodeError() 4827db96d56Sopenharmony_ci ) 4837db96d56Sopenharmony_ci # With the correct exception, "replace" returns an "?" or "\ufffd" replacement 4847db96d56Sopenharmony_ci self.assertEqual( 4857db96d56Sopenharmony_ci codecs.replace_errors( 4867db96d56Sopenharmony_ci UnicodeEncodeError("ascii", "a\u3042b", 1, 2, "ouch")), 4877db96d56Sopenharmony_ci ("?", 2) 4887db96d56Sopenharmony_ci ) 4897db96d56Sopenharmony_ci self.assertEqual( 4907db96d56Sopenharmony_ci codecs.replace_errors( 4917db96d56Sopenharmony_ci UnicodeDecodeError("ascii", bytearray(b"a\xffb"), 1, 2, "ouch")), 4927db96d56Sopenharmony_ci ("\ufffd", 2) 4937db96d56Sopenharmony_ci ) 4947db96d56Sopenharmony_ci self.assertEqual( 4957db96d56Sopenharmony_ci codecs.replace_errors( 4967db96d56Sopenharmony_ci UnicodeTranslateError("a\u3042b", 1, 2, "ouch")), 4977db96d56Sopenharmony_ci ("\ufffd", 2) 4987db96d56Sopenharmony_ci ) 4997db96d56Sopenharmony_ci 5007db96d56Sopenharmony_ci def test_badandgoodxmlcharrefreplaceexceptions(self): 5017db96d56Sopenharmony_ci # "xmlcharrefreplace" complains about a non-exception passed in 5027db96d56Sopenharmony_ci self.assertRaises( 5037db96d56Sopenharmony_ci TypeError, 5047db96d56Sopenharmony_ci codecs.xmlcharrefreplace_errors, 5057db96d56Sopenharmony_ci 42 5067db96d56Sopenharmony_ci ) 5077db96d56Sopenharmony_ci # "xmlcharrefreplace" complains about the wrong exception types 5087db96d56Sopenharmony_ci self.assertRaises( 5097db96d56Sopenharmony_ci TypeError, 5107db96d56Sopenharmony_ci codecs.xmlcharrefreplace_errors, 5117db96d56Sopenharmony_ci UnicodeError("ouch") 5127db96d56Sopenharmony_ci ) 5137db96d56Sopenharmony_ci # "xmlcharrefreplace" can only be used for encoding 5147db96d56Sopenharmony_ci self.assertRaises( 5157db96d56Sopenharmony_ci TypeError, 5167db96d56Sopenharmony_ci codecs.xmlcharrefreplace_errors, 5177db96d56Sopenharmony_ci UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") 5187db96d56Sopenharmony_ci ) 5197db96d56Sopenharmony_ci self.assertRaises( 5207db96d56Sopenharmony_ci TypeError, 5217db96d56Sopenharmony_ci codecs.xmlcharrefreplace_errors, 5227db96d56Sopenharmony_ci UnicodeTranslateError("\u3042", 0, 1, "ouch") 5237db96d56Sopenharmony_ci ) 5247db96d56Sopenharmony_ci # Use the correct exception 5257db96d56Sopenharmony_ci cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 99999, 100000, 5267db96d56Sopenharmony_ci 999999, 1000000) 5277db96d56Sopenharmony_ci cs += (0xd800, 0xdfff) 5287db96d56Sopenharmony_ci s = "".join(chr(c) for c in cs) 5297db96d56Sopenharmony_ci self.assertEqual( 5307db96d56Sopenharmony_ci codecs.xmlcharrefreplace_errors( 5317db96d56Sopenharmony_ci UnicodeEncodeError("ascii", "a" + s + "b", 5327db96d56Sopenharmony_ci 1, 1 + len(s), "ouch") 5337db96d56Sopenharmony_ci ), 5347db96d56Sopenharmony_ci ("".join("&#%d;" % c for c in cs), 1 + len(s)) 5357db96d56Sopenharmony_ci ) 5367db96d56Sopenharmony_ci 5377db96d56Sopenharmony_ci def test_badandgoodbackslashreplaceexceptions(self): 5387db96d56Sopenharmony_ci # "backslashreplace" complains about a non-exception passed in 5397db96d56Sopenharmony_ci self.assertRaises( 5407db96d56Sopenharmony_ci TypeError, 5417db96d56Sopenharmony_ci codecs.backslashreplace_errors, 5427db96d56Sopenharmony_ci 42 5437db96d56Sopenharmony_ci ) 5447db96d56Sopenharmony_ci # "backslashreplace" complains about the wrong exception types 5457db96d56Sopenharmony_ci self.assertRaises( 5467db96d56Sopenharmony_ci TypeError, 5477db96d56Sopenharmony_ci codecs.backslashreplace_errors, 5487db96d56Sopenharmony_ci UnicodeError("ouch") 5497db96d56Sopenharmony_ci ) 5507db96d56Sopenharmony_ci # Use the correct exception 5517db96d56Sopenharmony_ci tests = [ 5527db96d56Sopenharmony_ci ("\u3042", "\\u3042"), 5537db96d56Sopenharmony_ci ("\n", "\\x0a"), 5547db96d56Sopenharmony_ci ("a", "\\x61"), 5557db96d56Sopenharmony_ci ("\x00", "\\x00"), 5567db96d56Sopenharmony_ci ("\xff", "\\xff"), 5577db96d56Sopenharmony_ci ("\u0100", "\\u0100"), 5587db96d56Sopenharmony_ci ("\uffff", "\\uffff"), 5597db96d56Sopenharmony_ci ("\U00010000", "\\U00010000"), 5607db96d56Sopenharmony_ci ("\U0010ffff", "\\U0010ffff"), 5617db96d56Sopenharmony_ci # Lone surrogates 5627db96d56Sopenharmony_ci ("\ud800", "\\ud800"), 5637db96d56Sopenharmony_ci ("\udfff", "\\udfff"), 5647db96d56Sopenharmony_ci ("\ud800\udfff", "\\ud800\\udfff"), 5657db96d56Sopenharmony_ci ] 5667db96d56Sopenharmony_ci for s, r in tests: 5677db96d56Sopenharmony_ci with self.subTest(str=s): 5687db96d56Sopenharmony_ci self.assertEqual( 5697db96d56Sopenharmony_ci codecs.backslashreplace_errors( 5707db96d56Sopenharmony_ci UnicodeEncodeError("ascii", "a" + s + "b", 5717db96d56Sopenharmony_ci 1, 1 + len(s), "ouch")), 5727db96d56Sopenharmony_ci (r, 1 + len(s)) 5737db96d56Sopenharmony_ci ) 5747db96d56Sopenharmony_ci self.assertEqual( 5757db96d56Sopenharmony_ci codecs.backslashreplace_errors( 5767db96d56Sopenharmony_ci UnicodeTranslateError("a" + s + "b", 5777db96d56Sopenharmony_ci 1, 1 + len(s), "ouch")), 5787db96d56Sopenharmony_ci (r, 1 + len(s)) 5797db96d56Sopenharmony_ci ) 5807db96d56Sopenharmony_ci tests = [ 5817db96d56Sopenharmony_ci (b"a", "\\x61"), 5827db96d56Sopenharmony_ci (b"\n", "\\x0a"), 5837db96d56Sopenharmony_ci (b"\x00", "\\x00"), 5847db96d56Sopenharmony_ci (b"\xff", "\\xff"), 5857db96d56Sopenharmony_ci ] 5867db96d56Sopenharmony_ci for b, r in tests: 5877db96d56Sopenharmony_ci with self.subTest(bytes=b): 5887db96d56Sopenharmony_ci self.assertEqual( 5897db96d56Sopenharmony_ci codecs.backslashreplace_errors( 5907db96d56Sopenharmony_ci UnicodeDecodeError("ascii", bytearray(b"a" + b + b"b"), 5917db96d56Sopenharmony_ci 1, 2, "ouch")), 5927db96d56Sopenharmony_ci (r, 2) 5937db96d56Sopenharmony_ci ) 5947db96d56Sopenharmony_ci 5957db96d56Sopenharmony_ci def test_badandgoodnamereplaceexceptions(self): 5967db96d56Sopenharmony_ci # "namereplace" complains about a non-exception passed in 5977db96d56Sopenharmony_ci self.assertRaises( 5987db96d56Sopenharmony_ci TypeError, 5997db96d56Sopenharmony_ci codecs.namereplace_errors, 6007db96d56Sopenharmony_ci 42 6017db96d56Sopenharmony_ci ) 6027db96d56Sopenharmony_ci # "namereplace" complains about the wrong exception types 6037db96d56Sopenharmony_ci self.assertRaises( 6047db96d56Sopenharmony_ci TypeError, 6057db96d56Sopenharmony_ci codecs.namereplace_errors, 6067db96d56Sopenharmony_ci UnicodeError("ouch") 6077db96d56Sopenharmony_ci ) 6087db96d56Sopenharmony_ci # "namereplace" can only be used for encoding 6097db96d56Sopenharmony_ci self.assertRaises( 6107db96d56Sopenharmony_ci TypeError, 6117db96d56Sopenharmony_ci codecs.namereplace_errors, 6127db96d56Sopenharmony_ci UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") 6137db96d56Sopenharmony_ci ) 6147db96d56Sopenharmony_ci self.assertRaises( 6157db96d56Sopenharmony_ci TypeError, 6167db96d56Sopenharmony_ci codecs.namereplace_errors, 6177db96d56Sopenharmony_ci UnicodeTranslateError("\u3042", 0, 1, "ouch") 6187db96d56Sopenharmony_ci ) 6197db96d56Sopenharmony_ci # Use the correct exception 6207db96d56Sopenharmony_ci tests = [ 6217db96d56Sopenharmony_ci ("\u3042", "\\N{HIRAGANA LETTER A}"), 6227db96d56Sopenharmony_ci ("\x00", "\\x00"), 6237db96d56Sopenharmony_ci ("\ufbf9", "\\N{ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH " 6247db96d56Sopenharmony_ci "HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM}"), 6257db96d56Sopenharmony_ci ("\U000e007f", "\\N{CANCEL TAG}"), 6267db96d56Sopenharmony_ci ("\U0010ffff", "\\U0010ffff"), 6277db96d56Sopenharmony_ci # Lone surrogates 6287db96d56Sopenharmony_ci ("\ud800", "\\ud800"), 6297db96d56Sopenharmony_ci ("\udfff", "\\udfff"), 6307db96d56Sopenharmony_ci ("\ud800\udfff", "\\ud800\\udfff"), 6317db96d56Sopenharmony_ci ] 6327db96d56Sopenharmony_ci for s, r in tests: 6337db96d56Sopenharmony_ci with self.subTest(str=s): 6347db96d56Sopenharmony_ci self.assertEqual( 6357db96d56Sopenharmony_ci codecs.namereplace_errors( 6367db96d56Sopenharmony_ci UnicodeEncodeError("ascii", "a" + s + "b", 6377db96d56Sopenharmony_ci 1, 1 + len(s), "ouch")), 6387db96d56Sopenharmony_ci (r, 1 + len(s)) 6397db96d56Sopenharmony_ci ) 6407db96d56Sopenharmony_ci 6417db96d56Sopenharmony_ci def test_badandgoodsurrogateescapeexceptions(self): 6427db96d56Sopenharmony_ci surrogateescape_errors = codecs.lookup_error('surrogateescape') 6437db96d56Sopenharmony_ci # "surrogateescape" complains about a non-exception passed in 6447db96d56Sopenharmony_ci self.assertRaises( 6457db96d56Sopenharmony_ci TypeError, 6467db96d56Sopenharmony_ci surrogateescape_errors, 6477db96d56Sopenharmony_ci 42 6487db96d56Sopenharmony_ci ) 6497db96d56Sopenharmony_ci # "surrogateescape" complains about the wrong exception types 6507db96d56Sopenharmony_ci self.assertRaises( 6517db96d56Sopenharmony_ci TypeError, 6527db96d56Sopenharmony_ci surrogateescape_errors, 6537db96d56Sopenharmony_ci UnicodeError("ouch") 6547db96d56Sopenharmony_ci ) 6557db96d56Sopenharmony_ci # "surrogateescape" can not be used for translating 6567db96d56Sopenharmony_ci self.assertRaises( 6577db96d56Sopenharmony_ci TypeError, 6587db96d56Sopenharmony_ci surrogateescape_errors, 6597db96d56Sopenharmony_ci UnicodeTranslateError("\udc80", 0, 1, "ouch") 6607db96d56Sopenharmony_ci ) 6617db96d56Sopenharmony_ci # Use the correct exception 6627db96d56Sopenharmony_ci for s in ("a", "\udc7f", "\udd00"): 6637db96d56Sopenharmony_ci with self.subTest(str=s): 6647db96d56Sopenharmony_ci self.assertRaises( 6657db96d56Sopenharmony_ci UnicodeEncodeError, 6667db96d56Sopenharmony_ci surrogateescape_errors, 6677db96d56Sopenharmony_ci UnicodeEncodeError("ascii", s, 0, 1, "ouch") 6687db96d56Sopenharmony_ci ) 6697db96d56Sopenharmony_ci self.assertEqual( 6707db96d56Sopenharmony_ci surrogateescape_errors( 6717db96d56Sopenharmony_ci UnicodeEncodeError("ascii", "a\udc80b", 1, 2, "ouch")), 6727db96d56Sopenharmony_ci (b"\x80", 2) 6737db96d56Sopenharmony_ci ) 6747db96d56Sopenharmony_ci self.assertRaises( 6757db96d56Sopenharmony_ci UnicodeDecodeError, 6767db96d56Sopenharmony_ci surrogateescape_errors, 6777db96d56Sopenharmony_ci UnicodeDecodeError("ascii", bytearray(b"a"), 0, 1, "ouch") 6787db96d56Sopenharmony_ci ) 6797db96d56Sopenharmony_ci self.assertEqual( 6807db96d56Sopenharmony_ci surrogateescape_errors( 6817db96d56Sopenharmony_ci UnicodeDecodeError("ascii", bytearray(b"a\x80b"), 1, 2, "ouch")), 6827db96d56Sopenharmony_ci ("\udc80", 2) 6837db96d56Sopenharmony_ci ) 6847db96d56Sopenharmony_ci 6857db96d56Sopenharmony_ci def test_badandgoodsurrogatepassexceptions(self): 6867db96d56Sopenharmony_ci surrogatepass_errors = codecs.lookup_error('surrogatepass') 6877db96d56Sopenharmony_ci # "surrogatepass" complains about a non-exception passed in 6887db96d56Sopenharmony_ci self.assertRaises( 6897db96d56Sopenharmony_ci TypeError, 6907db96d56Sopenharmony_ci surrogatepass_errors, 6917db96d56Sopenharmony_ci 42 6927db96d56Sopenharmony_ci ) 6937db96d56Sopenharmony_ci # "surrogatepass" complains about the wrong exception types 6947db96d56Sopenharmony_ci self.assertRaises( 6957db96d56Sopenharmony_ci TypeError, 6967db96d56Sopenharmony_ci surrogatepass_errors, 6977db96d56Sopenharmony_ci UnicodeError("ouch") 6987db96d56Sopenharmony_ci ) 6997db96d56Sopenharmony_ci # "surrogatepass" can not be used for translating 7007db96d56Sopenharmony_ci self.assertRaises( 7017db96d56Sopenharmony_ci TypeError, 7027db96d56Sopenharmony_ci surrogatepass_errors, 7037db96d56Sopenharmony_ci UnicodeTranslateError("\ud800", 0, 1, "ouch") 7047db96d56Sopenharmony_ci ) 7057db96d56Sopenharmony_ci # Use the correct exception 7067db96d56Sopenharmony_ci for enc in ("utf-8", "utf-16le", "utf-16be", "utf-32le", "utf-32be"): 7077db96d56Sopenharmony_ci with self.subTest(encoding=enc): 7087db96d56Sopenharmony_ci self.assertRaises( 7097db96d56Sopenharmony_ci UnicodeEncodeError, 7107db96d56Sopenharmony_ci surrogatepass_errors, 7117db96d56Sopenharmony_ci UnicodeEncodeError(enc, "a", 0, 1, "ouch") 7127db96d56Sopenharmony_ci ) 7137db96d56Sopenharmony_ci self.assertRaises( 7147db96d56Sopenharmony_ci UnicodeDecodeError, 7157db96d56Sopenharmony_ci surrogatepass_errors, 7167db96d56Sopenharmony_ci UnicodeDecodeError(enc, "a".encode(enc), 0, 1, "ouch") 7177db96d56Sopenharmony_ci ) 7187db96d56Sopenharmony_ci for s in ("\ud800", "\udfff", "\ud800\udfff"): 7197db96d56Sopenharmony_ci with self.subTest(str=s): 7207db96d56Sopenharmony_ci self.assertRaises( 7217db96d56Sopenharmony_ci UnicodeEncodeError, 7227db96d56Sopenharmony_ci surrogatepass_errors, 7237db96d56Sopenharmony_ci UnicodeEncodeError("ascii", s, 0, len(s), "ouch") 7247db96d56Sopenharmony_ci ) 7257db96d56Sopenharmony_ci tests = [ 7267db96d56Sopenharmony_ci ("utf-8", "\ud800", b'\xed\xa0\x80', 3), 7277db96d56Sopenharmony_ci ("utf-16le", "\ud800", b'\x00\xd8', 2), 7287db96d56Sopenharmony_ci ("utf-16be", "\ud800", b'\xd8\x00', 2), 7297db96d56Sopenharmony_ci ("utf-32le", "\ud800", b'\x00\xd8\x00\x00', 4), 7307db96d56Sopenharmony_ci ("utf-32be", "\ud800", b'\x00\x00\xd8\x00', 4), 7317db96d56Sopenharmony_ci ("utf-8", "\udfff", b'\xed\xbf\xbf', 3), 7327db96d56Sopenharmony_ci ("utf-16le", "\udfff", b'\xff\xdf', 2), 7337db96d56Sopenharmony_ci ("utf-16be", "\udfff", b'\xdf\xff', 2), 7347db96d56Sopenharmony_ci ("utf-32le", "\udfff", b'\xff\xdf\x00\x00', 4), 7357db96d56Sopenharmony_ci ("utf-32be", "\udfff", b'\x00\x00\xdf\xff', 4), 7367db96d56Sopenharmony_ci ("utf-8", "\ud800\udfff", b'\xed\xa0\x80\xed\xbf\xbf', 3), 7377db96d56Sopenharmony_ci ("utf-16le", "\ud800\udfff", b'\x00\xd8\xff\xdf', 2), 7387db96d56Sopenharmony_ci ("utf-16be", "\ud800\udfff", b'\xd8\x00\xdf\xff', 2), 7397db96d56Sopenharmony_ci ("utf-32le", "\ud800\udfff", b'\x00\xd8\x00\x00\xff\xdf\x00\x00', 4), 7407db96d56Sopenharmony_ci ("utf-32be", "\ud800\udfff", b'\x00\x00\xd8\x00\x00\x00\xdf\xff', 4), 7417db96d56Sopenharmony_ci ] 7427db96d56Sopenharmony_ci for enc, s, b, n in tests: 7437db96d56Sopenharmony_ci with self.subTest(encoding=enc, str=s, bytes=b): 7447db96d56Sopenharmony_ci self.assertEqual( 7457db96d56Sopenharmony_ci surrogatepass_errors( 7467db96d56Sopenharmony_ci UnicodeEncodeError(enc, "a" + s + "b", 7477db96d56Sopenharmony_ci 1, 1 + len(s), "ouch")), 7487db96d56Sopenharmony_ci (b, 1 + len(s)) 7497db96d56Sopenharmony_ci ) 7507db96d56Sopenharmony_ci self.assertEqual( 7517db96d56Sopenharmony_ci surrogatepass_errors( 7527db96d56Sopenharmony_ci UnicodeDecodeError(enc, bytearray(b"a" + b[:n] + b"b"), 7537db96d56Sopenharmony_ci 1, 1 + n, "ouch")), 7547db96d56Sopenharmony_ci (s[:1], 1 + n) 7557db96d56Sopenharmony_ci ) 7567db96d56Sopenharmony_ci 7577db96d56Sopenharmony_ci def test_badhandlerresults(self): 7587db96d56Sopenharmony_ci results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) 7597db96d56Sopenharmony_ci encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15") 7607db96d56Sopenharmony_ci 7617db96d56Sopenharmony_ci for res in results: 7627db96d56Sopenharmony_ci codecs.register_error("test.badhandler", lambda x: res) 7637db96d56Sopenharmony_ci for enc in encs: 7647db96d56Sopenharmony_ci self.assertRaises( 7657db96d56Sopenharmony_ci TypeError, 7667db96d56Sopenharmony_ci "\u3042".encode, 7677db96d56Sopenharmony_ci enc, 7687db96d56Sopenharmony_ci "test.badhandler" 7697db96d56Sopenharmony_ci ) 7707db96d56Sopenharmony_ci for (enc, bytes) in ( 7717db96d56Sopenharmony_ci ("ascii", b"\xff"), 7727db96d56Sopenharmony_ci ("utf-8", b"\xff"), 7737db96d56Sopenharmony_ci ("utf-7", b"+x-"), 7747db96d56Sopenharmony_ci ): 7757db96d56Sopenharmony_ci self.assertRaises( 7767db96d56Sopenharmony_ci TypeError, 7777db96d56Sopenharmony_ci bytes.decode, 7787db96d56Sopenharmony_ci enc, 7797db96d56Sopenharmony_ci "test.badhandler" 7807db96d56Sopenharmony_ci ) 7817db96d56Sopenharmony_ci 7827db96d56Sopenharmony_ci def test_lookup(self): 7837db96d56Sopenharmony_ci self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict")) 7847db96d56Sopenharmony_ci self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore")) 7857db96d56Sopenharmony_ci self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict")) 7867db96d56Sopenharmony_ci self.assertEqual( 7877db96d56Sopenharmony_ci codecs.xmlcharrefreplace_errors, 7887db96d56Sopenharmony_ci codecs.lookup_error("xmlcharrefreplace") 7897db96d56Sopenharmony_ci ) 7907db96d56Sopenharmony_ci self.assertEqual( 7917db96d56Sopenharmony_ci codecs.backslashreplace_errors, 7927db96d56Sopenharmony_ci codecs.lookup_error("backslashreplace") 7937db96d56Sopenharmony_ci ) 7947db96d56Sopenharmony_ci self.assertEqual( 7957db96d56Sopenharmony_ci codecs.namereplace_errors, 7967db96d56Sopenharmony_ci codecs.lookup_error("namereplace") 7977db96d56Sopenharmony_ci ) 7987db96d56Sopenharmony_ci 7997db96d56Sopenharmony_ci def test_encode_nonascii_replacement(self): 8007db96d56Sopenharmony_ci def handle(exc): 8017db96d56Sopenharmony_ci if isinstance(exc, UnicodeEncodeError): 8027db96d56Sopenharmony_ci return (repl, exc.end) 8037db96d56Sopenharmony_ci raise TypeError("don't know how to handle %r" % exc) 8047db96d56Sopenharmony_ci codecs.register_error("test.replacing", handle) 8057db96d56Sopenharmony_ci 8067db96d56Sopenharmony_ci for enc, input, repl in ( 8077db96d56Sopenharmony_ci ("ascii", "[¤]", "abc"), 8087db96d56Sopenharmony_ci ("iso-8859-1", "[€]", "½¾"), 8097db96d56Sopenharmony_ci ("iso-8859-15", "[¤]", "œŸ"), 8107db96d56Sopenharmony_ci ): 8117db96d56Sopenharmony_ci res = input.encode(enc, "test.replacing") 8127db96d56Sopenharmony_ci self.assertEqual(res, ("[" + repl + "]").encode(enc)) 8137db96d56Sopenharmony_ci 8147db96d56Sopenharmony_ci for enc, input, repl in ( 8157db96d56Sopenharmony_ci ("utf-8", "[\udc80]", "\U0001f40d"), 8167db96d56Sopenharmony_ci ("utf-16", "[\udc80]", "\U0001f40d"), 8177db96d56Sopenharmony_ci ("utf-32", "[\udc80]", "\U0001f40d"), 8187db96d56Sopenharmony_ci ): 8197db96d56Sopenharmony_ci with self.subTest(encoding=enc): 8207db96d56Sopenharmony_ci with self.assertRaises(UnicodeEncodeError) as cm: 8217db96d56Sopenharmony_ci input.encode(enc, "test.replacing") 8227db96d56Sopenharmony_ci exc = cm.exception 8237db96d56Sopenharmony_ci self.assertEqual(exc.start, 1) 8247db96d56Sopenharmony_ci self.assertEqual(exc.end, 2) 8257db96d56Sopenharmony_ci self.assertEqual(exc.object, input) 8267db96d56Sopenharmony_ci 8277db96d56Sopenharmony_ci def test_encode_unencodable_replacement(self): 8287db96d56Sopenharmony_ci def unencrepl(exc): 8297db96d56Sopenharmony_ci if isinstance(exc, UnicodeEncodeError): 8307db96d56Sopenharmony_ci return (repl, exc.end) 8317db96d56Sopenharmony_ci else: 8327db96d56Sopenharmony_ci raise TypeError("don't know how to handle %r" % exc) 8337db96d56Sopenharmony_ci codecs.register_error("test.unencreplhandler", unencrepl) 8347db96d56Sopenharmony_ci 8357db96d56Sopenharmony_ci for enc, input, repl in ( 8367db96d56Sopenharmony_ci ("ascii", "[¤]", "½"), 8377db96d56Sopenharmony_ci ("iso-8859-1", "[€]", "œ"), 8387db96d56Sopenharmony_ci ("iso-8859-15", "[¤]", "½"), 8397db96d56Sopenharmony_ci ("utf-8", "[\udc80]", "\udcff"), 8407db96d56Sopenharmony_ci ("utf-16", "[\udc80]", "\udcff"), 8417db96d56Sopenharmony_ci ("utf-32", "[\udc80]", "\udcff"), 8427db96d56Sopenharmony_ci ): 8437db96d56Sopenharmony_ci with self.subTest(encoding=enc): 8447db96d56Sopenharmony_ci with self.assertRaises(UnicodeEncodeError) as cm: 8457db96d56Sopenharmony_ci input.encode(enc, "test.unencreplhandler") 8467db96d56Sopenharmony_ci exc = cm.exception 8477db96d56Sopenharmony_ci self.assertEqual(exc.start, 1) 8487db96d56Sopenharmony_ci self.assertEqual(exc.end, 2) 8497db96d56Sopenharmony_ci self.assertEqual(exc.object, input) 8507db96d56Sopenharmony_ci 8517db96d56Sopenharmony_ci def test_encode_bytes_replacement(self): 8527db96d56Sopenharmony_ci def handle(exc): 8537db96d56Sopenharmony_ci if isinstance(exc, UnicodeEncodeError): 8547db96d56Sopenharmony_ci return (repl, exc.end) 8557db96d56Sopenharmony_ci raise TypeError("don't know how to handle %r" % exc) 8567db96d56Sopenharmony_ci codecs.register_error("test.replacing", handle) 8577db96d56Sopenharmony_ci 8587db96d56Sopenharmony_ci # It works even if the bytes sequence is not decodable. 8597db96d56Sopenharmony_ci for enc, input, repl in ( 8607db96d56Sopenharmony_ci ("ascii", "[¤]", b"\xbd\xbe"), 8617db96d56Sopenharmony_ci ("iso-8859-1", "[€]", b"\xbd\xbe"), 8627db96d56Sopenharmony_ci ("iso-8859-15", "[¤]", b"\xbd\xbe"), 8637db96d56Sopenharmony_ci ("utf-8", "[\udc80]", b"\xbd\xbe"), 8647db96d56Sopenharmony_ci ("utf-16le", "[\udc80]", b"\xbd\xbe"), 8657db96d56Sopenharmony_ci ("utf-16be", "[\udc80]", b"\xbd\xbe"), 8667db96d56Sopenharmony_ci ("utf-32le", "[\udc80]", b"\xbc\xbd\xbe\xbf"), 8677db96d56Sopenharmony_ci ("utf-32be", "[\udc80]", b"\xbc\xbd\xbe\xbf"), 8687db96d56Sopenharmony_ci ): 8697db96d56Sopenharmony_ci with self.subTest(encoding=enc): 8707db96d56Sopenharmony_ci res = input.encode(enc, "test.replacing") 8717db96d56Sopenharmony_ci self.assertEqual(res, "[".encode(enc) + repl + "]".encode(enc)) 8727db96d56Sopenharmony_ci 8737db96d56Sopenharmony_ci def test_encode_odd_bytes_replacement(self): 8747db96d56Sopenharmony_ci def handle(exc): 8757db96d56Sopenharmony_ci if isinstance(exc, UnicodeEncodeError): 8767db96d56Sopenharmony_ci return (repl, exc.end) 8777db96d56Sopenharmony_ci raise TypeError("don't know how to handle %r" % exc) 8787db96d56Sopenharmony_ci codecs.register_error("test.replacing", handle) 8797db96d56Sopenharmony_ci 8807db96d56Sopenharmony_ci input = "[\udc80]" 8817db96d56Sopenharmony_ci # Tests in which the replacement bytestring contains not whole number 8827db96d56Sopenharmony_ci # of code units. 8837db96d56Sopenharmony_ci for enc, repl in ( 8847db96d56Sopenharmony_ci *itertools.product(("utf-16le", "utf-16be"), 8857db96d56Sopenharmony_ci [b"a", b"abc"]), 8867db96d56Sopenharmony_ci *itertools.product(("utf-32le", "utf-32be"), 8877db96d56Sopenharmony_ci [b"a", b"ab", b"abc", b"abcde"]), 8887db96d56Sopenharmony_ci ): 8897db96d56Sopenharmony_ci with self.subTest(encoding=enc, repl=repl): 8907db96d56Sopenharmony_ci with self.assertRaises(UnicodeEncodeError) as cm: 8917db96d56Sopenharmony_ci input.encode(enc, "test.replacing") 8927db96d56Sopenharmony_ci exc = cm.exception 8937db96d56Sopenharmony_ci self.assertEqual(exc.start, 1) 8947db96d56Sopenharmony_ci self.assertEqual(exc.end, 2) 8957db96d56Sopenharmony_ci self.assertEqual(exc.object, input) 8967db96d56Sopenharmony_ci self.assertEqual(exc.reason, "surrogates not allowed") 8977db96d56Sopenharmony_ci 8987db96d56Sopenharmony_ci def test_badregistercall(self): 8997db96d56Sopenharmony_ci # enhance coverage of: 9007db96d56Sopenharmony_ci # Modules/_codecsmodule.c::register_error() 9017db96d56Sopenharmony_ci # Python/codecs.c::PyCodec_RegisterError() 9027db96d56Sopenharmony_ci self.assertRaises(TypeError, codecs.register_error, 42) 9037db96d56Sopenharmony_ci self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42) 9047db96d56Sopenharmony_ci 9057db96d56Sopenharmony_ci def test_badlookupcall(self): 9067db96d56Sopenharmony_ci # enhance coverage of: 9077db96d56Sopenharmony_ci # Modules/_codecsmodule.c::lookup_error() 9087db96d56Sopenharmony_ci self.assertRaises(TypeError, codecs.lookup_error) 9097db96d56Sopenharmony_ci 9107db96d56Sopenharmony_ci def test_unknownhandler(self): 9117db96d56Sopenharmony_ci # enhance coverage of: 9127db96d56Sopenharmony_ci # Modules/_codecsmodule.c::lookup_error() 9137db96d56Sopenharmony_ci self.assertRaises(LookupError, codecs.lookup_error, "test.unknown") 9147db96d56Sopenharmony_ci 9157db96d56Sopenharmony_ci def test_xmlcharrefvalues(self): 9167db96d56Sopenharmony_ci # enhance coverage of: 9177db96d56Sopenharmony_ci # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors() 9187db96d56Sopenharmony_ci # and inline implementations 9197db96d56Sopenharmony_ci v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000, 9207db96d56Sopenharmony_ci 500000, 1000000) 9217db96d56Sopenharmony_ci s = "".join([chr(x) for x in v]) 9227db96d56Sopenharmony_ci codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors) 9237db96d56Sopenharmony_ci for enc in ("ascii", "iso-8859-15"): 9247db96d56Sopenharmony_ci for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"): 9257db96d56Sopenharmony_ci s.encode(enc, err) 9267db96d56Sopenharmony_ci 9277db96d56Sopenharmony_ci def test_decodehelper(self): 9287db96d56Sopenharmony_ci # enhance coverage of: 9297db96d56Sopenharmony_ci # Objects/unicodeobject.c::unicode_decode_call_errorhandler() 9307db96d56Sopenharmony_ci # and callers 9317db96d56Sopenharmony_ci self.assertRaises(LookupError, b"\xff".decode, "ascii", "test.unknown") 9327db96d56Sopenharmony_ci 9337db96d56Sopenharmony_ci def baddecodereturn1(exc): 9347db96d56Sopenharmony_ci return 42 9357db96d56Sopenharmony_ci codecs.register_error("test.baddecodereturn1", baddecodereturn1) 9367db96d56Sopenharmony_ci self.assertRaises(TypeError, b"\xff".decode, "ascii", "test.baddecodereturn1") 9377db96d56Sopenharmony_ci self.assertRaises(TypeError, b"\\".decode, "unicode-escape", "test.baddecodereturn1") 9387db96d56Sopenharmony_ci self.assertRaises(TypeError, b"\\x0".decode, "unicode-escape", "test.baddecodereturn1") 9397db96d56Sopenharmony_ci self.assertRaises(TypeError, b"\\x0y".decode, "unicode-escape", "test.baddecodereturn1") 9407db96d56Sopenharmony_ci self.assertRaises(TypeError, b"\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1") 9417db96d56Sopenharmony_ci self.assertRaises(TypeError, b"\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1") 9427db96d56Sopenharmony_ci 9437db96d56Sopenharmony_ci def baddecodereturn2(exc): 9447db96d56Sopenharmony_ci return ("?", None) 9457db96d56Sopenharmony_ci codecs.register_error("test.baddecodereturn2", baddecodereturn2) 9467db96d56Sopenharmony_ci self.assertRaises(TypeError, b"\xff".decode, "ascii", "test.baddecodereturn2") 9477db96d56Sopenharmony_ci 9487db96d56Sopenharmony_ci handler = PosReturn() 9497db96d56Sopenharmony_ci codecs.register_error("test.posreturn", handler.handle) 9507db96d56Sopenharmony_ci 9517db96d56Sopenharmony_ci # Valid negative position 9527db96d56Sopenharmony_ci handler.pos = -1 9537db96d56Sopenharmony_ci self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>0") 9547db96d56Sopenharmony_ci 9557db96d56Sopenharmony_ci # Valid negative position 9567db96d56Sopenharmony_ci handler.pos = -2 9577db96d56Sopenharmony_ci self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?><?>") 9587db96d56Sopenharmony_ci 9597db96d56Sopenharmony_ci # Negative position out of bounds 9607db96d56Sopenharmony_ci handler.pos = -3 9617db96d56Sopenharmony_ci self.assertRaises(IndexError, b"\xff0".decode, "ascii", "test.posreturn") 9627db96d56Sopenharmony_ci 9637db96d56Sopenharmony_ci # Valid positive position 9647db96d56Sopenharmony_ci handler.pos = 1 9657db96d56Sopenharmony_ci self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>0") 9667db96d56Sopenharmony_ci 9677db96d56Sopenharmony_ci # Largest valid positive position (one beyond end of input) 9687db96d56Sopenharmony_ci handler.pos = 2 9697db96d56Sopenharmony_ci self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>") 9707db96d56Sopenharmony_ci 9717db96d56Sopenharmony_ci # Invalid positive position 9727db96d56Sopenharmony_ci handler.pos = 3 9737db96d56Sopenharmony_ci self.assertRaises(IndexError, b"\xff0".decode, "ascii", "test.posreturn") 9747db96d56Sopenharmony_ci 9757db96d56Sopenharmony_ci # Restart at the "0" 9767db96d56Sopenharmony_ci handler.pos = 6 9777db96d56Sopenharmony_ci self.assertEqual(b"\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), "<?>0") 9787db96d56Sopenharmony_ci 9797db96d56Sopenharmony_ci class D(dict): 9807db96d56Sopenharmony_ci def __getitem__(self, key): 9817db96d56Sopenharmony_ci raise ValueError 9827db96d56Sopenharmony_ci self.assertRaises(UnicodeError, codecs.charmap_decode, b"\xff", "strict", {0xff: None}) 9837db96d56Sopenharmony_ci self.assertRaises(ValueError, codecs.charmap_decode, b"\xff", "strict", D()) 9847db96d56Sopenharmony_ci self.assertRaises(TypeError, codecs.charmap_decode, b"\xff", "strict", {0xff: sys.maxunicode+1}) 9857db96d56Sopenharmony_ci 9867db96d56Sopenharmony_ci def test_encodehelper(self): 9877db96d56Sopenharmony_ci # enhance coverage of: 9887db96d56Sopenharmony_ci # Objects/unicodeobject.c::unicode_encode_call_errorhandler() 9897db96d56Sopenharmony_ci # and callers 9907db96d56Sopenharmony_ci self.assertRaises(LookupError, "\xff".encode, "ascii", "test.unknown") 9917db96d56Sopenharmony_ci 9927db96d56Sopenharmony_ci def badencodereturn1(exc): 9937db96d56Sopenharmony_ci return 42 9947db96d56Sopenharmony_ci codecs.register_error("test.badencodereturn1", badencodereturn1) 9957db96d56Sopenharmony_ci self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn1") 9967db96d56Sopenharmony_ci 9977db96d56Sopenharmony_ci def badencodereturn2(exc): 9987db96d56Sopenharmony_ci return ("?", None) 9997db96d56Sopenharmony_ci codecs.register_error("test.badencodereturn2", badencodereturn2) 10007db96d56Sopenharmony_ci self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn2") 10017db96d56Sopenharmony_ci 10027db96d56Sopenharmony_ci handler = PosReturn() 10037db96d56Sopenharmony_ci codecs.register_error("test.posreturn", handler.handle) 10047db96d56Sopenharmony_ci 10057db96d56Sopenharmony_ci # Valid negative position 10067db96d56Sopenharmony_ci handler.pos = -1 10077db96d56Sopenharmony_ci self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>0") 10087db96d56Sopenharmony_ci 10097db96d56Sopenharmony_ci # Valid negative position 10107db96d56Sopenharmony_ci handler.pos = -2 10117db96d56Sopenharmony_ci self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?><?>") 10127db96d56Sopenharmony_ci 10137db96d56Sopenharmony_ci # Negative position out of bounds 10147db96d56Sopenharmony_ci handler.pos = -3 10157db96d56Sopenharmony_ci self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn") 10167db96d56Sopenharmony_ci 10177db96d56Sopenharmony_ci # Valid positive position 10187db96d56Sopenharmony_ci handler.pos = 1 10197db96d56Sopenharmony_ci self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>0") 10207db96d56Sopenharmony_ci 10217db96d56Sopenharmony_ci # Largest valid positive position (one beyond end of input 10227db96d56Sopenharmony_ci handler.pos = 2 10237db96d56Sopenharmony_ci self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>") 10247db96d56Sopenharmony_ci 10257db96d56Sopenharmony_ci # Invalid positive position 10267db96d56Sopenharmony_ci handler.pos = 3 10277db96d56Sopenharmony_ci self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn") 10287db96d56Sopenharmony_ci 10297db96d56Sopenharmony_ci handler.pos = 0 10307db96d56Sopenharmony_ci 10317db96d56Sopenharmony_ci class D(dict): 10327db96d56Sopenharmony_ci def __getitem__(self, key): 10337db96d56Sopenharmony_ci raise ValueError 10347db96d56Sopenharmony_ci for err in ("strict", "replace", "xmlcharrefreplace", 10357db96d56Sopenharmony_ci "backslashreplace", "namereplace", "test.posreturn"): 10367db96d56Sopenharmony_ci self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None}) 10377db96d56Sopenharmony_ci self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D()) 10387db96d56Sopenharmony_ci self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300}) 10397db96d56Sopenharmony_ci 10407db96d56Sopenharmony_ci def test_decodehelper_bug36819(self): 10417db96d56Sopenharmony_ci handler = RepeatedPosReturn("x") 10427db96d56Sopenharmony_ci codecs.register_error("test.bug36819", handler.handle) 10437db96d56Sopenharmony_ci 10447db96d56Sopenharmony_ci testcases = [ 10457db96d56Sopenharmony_ci ("ascii", b"\xff"), 10467db96d56Sopenharmony_ci ("utf-8", b"\xff"), 10477db96d56Sopenharmony_ci ("utf-16be", b'\xdc\x80'), 10487db96d56Sopenharmony_ci ("utf-32be", b'\x00\x00\xdc\x80'), 10497db96d56Sopenharmony_ci ("iso-8859-6", b"\xff"), 10507db96d56Sopenharmony_ci ] 10517db96d56Sopenharmony_ci for enc, bad in testcases: 10527db96d56Sopenharmony_ci input = "abcd".encode(enc) + bad 10537db96d56Sopenharmony_ci with self.subTest(encoding=enc): 10547db96d56Sopenharmony_ci handler.count = 50 10557db96d56Sopenharmony_ci decoded = input.decode(enc, "test.bug36819") 10567db96d56Sopenharmony_ci self.assertEqual(decoded, 'abcdx' * 51) 10577db96d56Sopenharmony_ci 10587db96d56Sopenharmony_ci def test_encodehelper_bug36819(self): 10597db96d56Sopenharmony_ci handler = RepeatedPosReturn() 10607db96d56Sopenharmony_ci codecs.register_error("test.bug36819", handler.handle) 10617db96d56Sopenharmony_ci 10627db96d56Sopenharmony_ci input = "abcd\udc80" 10637db96d56Sopenharmony_ci encodings = ["ascii", "latin1", "utf-8", "utf-16", "utf-32"] # built-in 10647db96d56Sopenharmony_ci encodings += ["iso-8859-15"] # charmap codec 10657db96d56Sopenharmony_ci if sys.platform == 'win32': 10667db96d56Sopenharmony_ci encodings = ["mbcs", "oem"] # code page codecs 10677db96d56Sopenharmony_ci 10687db96d56Sopenharmony_ci handler.repl = "\udcff" 10697db96d56Sopenharmony_ci for enc in encodings: 10707db96d56Sopenharmony_ci with self.subTest(encoding=enc): 10717db96d56Sopenharmony_ci handler.count = 50 10727db96d56Sopenharmony_ci with self.assertRaises(UnicodeEncodeError) as cm: 10737db96d56Sopenharmony_ci input.encode(enc, "test.bug36819") 10747db96d56Sopenharmony_ci exc = cm.exception 10757db96d56Sopenharmony_ci self.assertEqual(exc.start, 4) 10767db96d56Sopenharmony_ci self.assertEqual(exc.end, 5) 10777db96d56Sopenharmony_ci self.assertEqual(exc.object, input) 10787db96d56Sopenharmony_ci if sys.platform == "win32": 10797db96d56Sopenharmony_ci handler.count = 50 10807db96d56Sopenharmony_ci with self.assertRaises(UnicodeEncodeError) as cm: 10817db96d56Sopenharmony_ci codecs.code_page_encode(437, input, "test.bug36819") 10827db96d56Sopenharmony_ci exc = cm.exception 10837db96d56Sopenharmony_ci self.assertEqual(exc.start, 4) 10847db96d56Sopenharmony_ci self.assertEqual(exc.end, 5) 10857db96d56Sopenharmony_ci self.assertEqual(exc.object, input) 10867db96d56Sopenharmony_ci 10877db96d56Sopenharmony_ci handler.repl = "x" 10887db96d56Sopenharmony_ci for enc in encodings: 10897db96d56Sopenharmony_ci with self.subTest(encoding=enc): 10907db96d56Sopenharmony_ci # The interpreter should segfault after a handful of attempts. 10917db96d56Sopenharmony_ci # 50 was chosen to try to ensure a segfault without a fix, 10927db96d56Sopenharmony_ci # but not OOM a machine with one. 10937db96d56Sopenharmony_ci handler.count = 50 10947db96d56Sopenharmony_ci encoded = input.encode(enc, "test.bug36819") 10957db96d56Sopenharmony_ci self.assertEqual(encoded.decode(enc), "abcdx" * 51) 10967db96d56Sopenharmony_ci if sys.platform == "win32": 10977db96d56Sopenharmony_ci handler.count = 50 10987db96d56Sopenharmony_ci encoded = codecs.code_page_encode(437, input, "test.bug36819") 10997db96d56Sopenharmony_ci self.assertEqual(encoded[0].decode(), "abcdx" * 51) 11007db96d56Sopenharmony_ci self.assertEqual(encoded[1], len(input)) 11017db96d56Sopenharmony_ci 11027db96d56Sopenharmony_ci def test_translatehelper(self): 11037db96d56Sopenharmony_ci # enhance coverage of: 11047db96d56Sopenharmony_ci # Objects/unicodeobject.c::unicode_encode_call_errorhandler() 11057db96d56Sopenharmony_ci # and callers 11067db96d56Sopenharmony_ci # (Unfortunately the errors argument is not directly accessible 11077db96d56Sopenharmony_ci # from Python, so we can't test that much) 11087db96d56Sopenharmony_ci class D(dict): 11097db96d56Sopenharmony_ci def __getitem__(self, key): 11107db96d56Sopenharmony_ci raise ValueError 11117db96d56Sopenharmony_ci #self.assertRaises(ValueError, "\xff".translate, D()) 11127db96d56Sopenharmony_ci self.assertRaises(ValueError, "\xff".translate, {0xff: sys.maxunicode+1}) 11137db96d56Sopenharmony_ci self.assertRaises(TypeError, "\xff".translate, {0xff: ()}) 11147db96d56Sopenharmony_ci 11157db96d56Sopenharmony_ci def test_bug828737(self): 11167db96d56Sopenharmony_ci charmap = { 11177db96d56Sopenharmony_ci ord("&"): "&", 11187db96d56Sopenharmony_ci ord("<"): "<", 11197db96d56Sopenharmony_ci ord(">"): ">", 11207db96d56Sopenharmony_ci ord('"'): """, 11217db96d56Sopenharmony_ci } 11227db96d56Sopenharmony_ci 11237db96d56Sopenharmony_ci for n in (1, 10, 100, 1000): 11247db96d56Sopenharmony_ci text = 'abc<def>ghi'*n 11257db96d56Sopenharmony_ci text.translate(charmap) 11267db96d56Sopenharmony_ci 11277db96d56Sopenharmony_ci def test_mutatingdecodehandler(self): 11287db96d56Sopenharmony_ci baddata = [ 11297db96d56Sopenharmony_ci ("ascii", b"\xff"), 11307db96d56Sopenharmony_ci ("utf-7", b"++"), 11317db96d56Sopenharmony_ci ("utf-8", b"\xff"), 11327db96d56Sopenharmony_ci ("utf-16", b"\xff"), 11337db96d56Sopenharmony_ci ("utf-32", b"\xff"), 11347db96d56Sopenharmony_ci ("unicode-escape", b"\\u123g"), 11357db96d56Sopenharmony_ci ("raw-unicode-escape", b"\\u123g"), 11367db96d56Sopenharmony_ci ] 11377db96d56Sopenharmony_ci 11387db96d56Sopenharmony_ci def replacing(exc): 11397db96d56Sopenharmony_ci if isinstance(exc, UnicodeDecodeError): 11407db96d56Sopenharmony_ci exc.object = 42 11417db96d56Sopenharmony_ci return ("\u4242", 0) 11427db96d56Sopenharmony_ci else: 11437db96d56Sopenharmony_ci raise TypeError("don't know how to handle %r" % exc) 11447db96d56Sopenharmony_ci codecs.register_error("test.replacing", replacing) 11457db96d56Sopenharmony_ci 11467db96d56Sopenharmony_ci for (encoding, data) in baddata: 11477db96d56Sopenharmony_ci with self.assertRaises(TypeError): 11487db96d56Sopenharmony_ci data.decode(encoding, "test.replacing") 11497db96d56Sopenharmony_ci 11507db96d56Sopenharmony_ci def mutating(exc): 11517db96d56Sopenharmony_ci if isinstance(exc, UnicodeDecodeError): 11527db96d56Sopenharmony_ci exc.object = b"" 11537db96d56Sopenharmony_ci return ("\u4242", 0) 11547db96d56Sopenharmony_ci else: 11557db96d56Sopenharmony_ci raise TypeError("don't know how to handle %r" % exc) 11567db96d56Sopenharmony_ci codecs.register_error("test.mutating", mutating) 11577db96d56Sopenharmony_ci # If the decoder doesn't pick up the modified input the following 11587db96d56Sopenharmony_ci # will lead to an endless loop 11597db96d56Sopenharmony_ci for (encoding, data) in baddata: 11607db96d56Sopenharmony_ci self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242") 11617db96d56Sopenharmony_ci 11627db96d56Sopenharmony_ci # issue32583 11637db96d56Sopenharmony_ci def test_crashing_decode_handler(self): 11647db96d56Sopenharmony_ci # better generating one more character to fill the extra space slot 11657db96d56Sopenharmony_ci # so in debug build it can steadily fail 11667db96d56Sopenharmony_ci def forward_shorter_than_end(exc): 11677db96d56Sopenharmony_ci if isinstance(exc, UnicodeDecodeError): 11687db96d56Sopenharmony_ci # size one character, 0 < forward < exc.end 11697db96d56Sopenharmony_ci return ('\ufffd', exc.start+1) 11707db96d56Sopenharmony_ci else: 11717db96d56Sopenharmony_ci raise TypeError("don't know how to handle %r" % exc) 11727db96d56Sopenharmony_ci codecs.register_error( 11737db96d56Sopenharmony_ci "test.forward_shorter_than_end", forward_shorter_than_end) 11747db96d56Sopenharmony_ci 11757db96d56Sopenharmony_ci self.assertEqual( 11767db96d56Sopenharmony_ci b'\xd8\xd8\xd8\xd8\xd8\x00\x00\x00'.decode( 11777db96d56Sopenharmony_ci 'utf-16-le', 'test.forward_shorter_than_end'), 11787db96d56Sopenharmony_ci '\ufffd\ufffd\ufffd\ufffd\xd8\x00' 11797db96d56Sopenharmony_ci ) 11807db96d56Sopenharmony_ci self.assertEqual( 11817db96d56Sopenharmony_ci b'\xd8\xd8\xd8\xd8\x00\xd8\x00\x00'.decode( 11827db96d56Sopenharmony_ci 'utf-16-be', 'test.forward_shorter_than_end'), 11837db96d56Sopenharmony_ci '\ufffd\ufffd\ufffd\ufffd\xd8\x00' 11847db96d56Sopenharmony_ci ) 11857db96d56Sopenharmony_ci self.assertEqual( 11867db96d56Sopenharmony_ci b'\x11\x11\x11\x11\x11\x00\x00\x00\x00\x00\x00'.decode( 11877db96d56Sopenharmony_ci 'utf-32-le', 'test.forward_shorter_than_end'), 11887db96d56Sopenharmony_ci '\ufffd\ufffd\ufffd\u1111\x00' 11897db96d56Sopenharmony_ci ) 11907db96d56Sopenharmony_ci self.assertEqual( 11917db96d56Sopenharmony_ci b'\x11\x11\x11\x00\x00\x11\x11\x00\x00\x00\x00'.decode( 11927db96d56Sopenharmony_ci 'utf-32-be', 'test.forward_shorter_than_end'), 11937db96d56Sopenharmony_ci '\ufffd\ufffd\ufffd\u1111\x00' 11947db96d56Sopenharmony_ci ) 11957db96d56Sopenharmony_ci 11967db96d56Sopenharmony_ci def replace_with_long(exc): 11977db96d56Sopenharmony_ci if isinstance(exc, UnicodeDecodeError): 11987db96d56Sopenharmony_ci exc.object = b"\x00" * 8 11997db96d56Sopenharmony_ci return ('\ufffd', exc.start) 12007db96d56Sopenharmony_ci else: 12017db96d56Sopenharmony_ci raise TypeError("don't know how to handle %r" % exc) 12027db96d56Sopenharmony_ci codecs.register_error("test.replace_with_long", replace_with_long) 12037db96d56Sopenharmony_ci 12047db96d56Sopenharmony_ci self.assertEqual( 12057db96d56Sopenharmony_ci b'\x00'.decode('utf-16', 'test.replace_with_long'), 12067db96d56Sopenharmony_ci '\ufffd\x00\x00\x00\x00' 12077db96d56Sopenharmony_ci ) 12087db96d56Sopenharmony_ci self.assertEqual( 12097db96d56Sopenharmony_ci b'\x00'.decode('utf-32', 'test.replace_with_long'), 12107db96d56Sopenharmony_ci '\ufffd\x00\x00' 12117db96d56Sopenharmony_ci ) 12127db96d56Sopenharmony_ci 12137db96d56Sopenharmony_ci 12147db96d56Sopenharmony_ci def test_fake_error_class(self): 12157db96d56Sopenharmony_ci handlers = [ 12167db96d56Sopenharmony_ci codecs.strict_errors, 12177db96d56Sopenharmony_ci codecs.ignore_errors, 12187db96d56Sopenharmony_ci codecs.replace_errors, 12197db96d56Sopenharmony_ci codecs.backslashreplace_errors, 12207db96d56Sopenharmony_ci codecs.namereplace_errors, 12217db96d56Sopenharmony_ci codecs.xmlcharrefreplace_errors, 12227db96d56Sopenharmony_ci codecs.lookup_error('surrogateescape'), 12237db96d56Sopenharmony_ci codecs.lookup_error('surrogatepass'), 12247db96d56Sopenharmony_ci ] 12257db96d56Sopenharmony_ci for cls in UnicodeEncodeError, UnicodeDecodeError, UnicodeTranslateError: 12267db96d56Sopenharmony_ci class FakeUnicodeError(str): 12277db96d56Sopenharmony_ci __class__ = cls 12287db96d56Sopenharmony_ci for handler in handlers: 12297db96d56Sopenharmony_ci with self.subTest(handler=handler, error_class=cls): 12307db96d56Sopenharmony_ci self.assertRaises(TypeError, handler, FakeUnicodeError()) 12317db96d56Sopenharmony_ci class FakeUnicodeError(Exception): 12327db96d56Sopenharmony_ci __class__ = cls 12337db96d56Sopenharmony_ci for handler in handlers: 12347db96d56Sopenharmony_ci with self.subTest(handler=handler, error_class=cls): 12357db96d56Sopenharmony_ci with self.assertRaises((TypeError, FakeUnicodeError)): 12367db96d56Sopenharmony_ci handler(FakeUnicodeError()) 12377db96d56Sopenharmony_ci 12387db96d56Sopenharmony_ci 12397db96d56Sopenharmony_ciif __name__ == "__main__": 12407db96d56Sopenharmony_ci unittest.main() 1241