Lines Matching refs:self
31 def coding_checker(self, coder):
33 self.assertEqual(coder(input), (expect, len(input)))
61 def __init__(self, buffer):
62 self._buffer = buffer
64 def write(self, chars):
65 self._buffer += chars
67 def read(self, size=-1):
69 s = self._buffer
70 self._buffer = self._buffer[:0] # make empty
73 s = self._buffer[:size]
74 self._buffer = self._buffer[size:]
79 def check_state_handling_decode(self, encoding, u, s):
84 self.assertIsInstance(state[1], int)
91 self.assertTrue(not d.decode(state[0]))
93 self.assertEqual(state, d.getstate())
99 self.assertEqual(u, part1+part2)
101 def check_state_handling_encode(self, encoding, u, s):
109 self.assertEqual(s, part1+part2)
113 def check_partial(self, input, partialresults):
119 r = codecs.getreader(self.encoding)(q)
121 for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True):
124 self.assertEqual(result, partialresult)
126 self.assertEqual(r.read(), "")
127 self.assertEqual(r.bytebuffer, b"")
130 d = codecs.getincrementaldecoder(self.encoding)()
132 for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True):
134 self.assertEqual(result, partialresult)
136 self.assertEqual(d.decode(b"", True), "")
137 self.assertEqual(d.buffer, b"")
142 for (c, partialresult) in zip(input.encode(self.encoding), partialresults, strict=True):
144 self.assertEqual(result, partialresult)
146 self.assertEqual(d.decode(b"", True), "")
147 self.assertEqual(d.buffer, b"")
150 encoded = input.encode(self.encoding)
151 self.assertEqual(
153 "".join(codecs.iterdecode([bytes([c]) for c in encoded], self.encoding))
156 def test_readline(self):
158 stream = io.BytesIO(input.encode(self.encoding))
159 return codecs.getreader(self.encoding)(stream)
174 self.assertEqual(readalllines(s, True), sexpected)
175 self.assertEqual(readalllines(s, False), sexpectednoends)
176 self.assertEqual(readalllines(s, True, 10), sexpected)
177 self.assertEqual(readalllines(s, False, 10), sexpectednoends)
186 self.assertEqual(readalllines("".join(vw), True), "|".join(vw))
187 self.assertEqual(readalllines("".join(vw), False), "|".join(vwo))
196 self.assertEqual(
200 self.assertEqual(
206 self.assertEqual(
210 self.assertEqual(
215 def test_mixed_readline_and_read(self):
222 stream = io.BytesIO(data.encode(self.encoding))
223 return codecs.getreader(self.encoding)(stream)
227 self.assertEqual(f.readline(), lines[0])
228 self.assertEqual(f.read(), ''.join(lines[1:]))
229 self.assertEqual(f.read(), '')
233 self.assertEqual(f.readline(), lines[0])
234 self.assertEqual(f.read(1), lines[1][0])
235 self.assertEqual(f.read(0), '')
236 self.assertEqual(f.read(100), data[len(lines[0]) + 1:][:100])
240 self.assertEqual(f.readline(), lines[0])
241 self.assertEqual(f.readlines(), lines[1:])
242 self.assertEqual(f.read(), '')
246 self.assertEqual(f.read(size=40, chars=5), data[:5])
247 self.assertEqual(f.read(), data[5:])
248 self.assertEqual(f.read(), '')
252 self.assertEqual(f.read(size=40, chars=5), data[:5])
253 self.assertEqual(f.read(1), data[5])
254 self.assertEqual(f.read(0), '')
255 self.assertEqual(f.read(100), data[6:106])
259 self.assertEqual(f.read(size=40, chars=5), data[:5])
260 self.assertEqual(f.readlines(), [lines[0][5:]] + lines[1:])
261 self.assertEqual(f.read(), '')
263 def test_bug1175396(self):
280 'user=self.SessionCtx.user\r\n',
281 'storageEngine=self.SessionCtx.storageEngine\r\n',
293 ' self.abort("cannot load articles")\r\n',
297 'arg=self.Request.getArg()\r\n',
300 ' self.write("<h2>Today\'s articles</h2>")\r\n',
305 ' self.Yredirect("active.y")\r\n',
308 ' self.Yredirect("login.y")\r\n',
311 ' showdate = self.Request.getParameter("date")\r\n',
312 ' self.write("<h2>Articles written on %s</h2>"% frog.util.mediumdatestr(showdate))\r\n',
316 ' self.write("<h2>Recent articles</h2>")\r\n',
327 stream = io.BytesIO("".join(s).encode(self.encoding))
328 reader = codecs.getreader(self.encoding)(stream)
330 self.assertEqual(line, s[i])
332 def test_readlinequeue(self):
334 writer = codecs.getwriter(self.encoding)(q)
335 reader = codecs.getreader(self.encoding)(q)
339 self.assertEqual(reader.readline(keepends=False), "foo")
341 self.assertEqual(reader.readline(keepends=False), "")
342 self.assertEqual(reader.readline(keepends=False), "bar")
344 self.assertEqual(reader.readline(keepends=False), "baz")
345 self.assertEqual(reader.readline(keepends=False), "")
349 self.assertEqual(reader.readline(keepends=True), "foo\r")
351 self.assertEqual(reader.readline(keepends=True), "\n")
352 self.assertEqual(reader.readline(keepends=True), "bar\r")
354 self.assertEqual(reader.readline(keepends=True), "baz")
355 self.assertEqual(reader.readline(keepends=True), "")
357 self.assertEqual(reader.readline(keepends=True), "foo\r\n")
359 def test_bug1098990_a(self):
364 s = (s1+s2+s3).encode(self.encoding)
366 reader = codecs.getreader(self.encoding)(stream)
367 self.assertEqual(reader.readline(), s1)
368 self.assertEqual(reader.readline(), s2)
369 self.assertEqual(reader.readline(), s3)
370 self.assertEqual(reader.readline(), "")
372 def test_bug1098990_b(self):
379 s = (s1+s2+s3+s4+s5).encode(self.encoding)
381 reader = codecs.getreader(self.encoding)(stream)
382 self.assertEqual(reader.readline(), s1)
383 self.assertEqual(reader.readline(), s2)
384 self.assertEqual(reader.readline(), s3)
385 self.assertEqual(reader.readline(), s4)
386 self.assertEqual(reader.readline(), s5)
387 self.assertEqual(reader.readline(), "")
391 def test_lone_surrogates(self):
392 self.assertRaises(UnicodeEncodeError, "\ud800".encode, self.encoding)
393 self.assertEqual("[\uDC80]".encode(self.encoding, "backslashreplace"),
394 "[\\udc80]".encode(self.encoding))
395 self.assertEqual("[\uDC80]".encode(self.encoding, "namereplace"),
396 "[\\udc80]".encode(self.encoding))
397 self.assertEqual("[\uDC80]".encode(self.encoding, "xmlcharrefreplace"),
398 "[�]".encode(self.encoding))
399 self.assertEqual("[\uDC80]".encode(self.encoding, "ignore"),
400 "[]".encode(self.encoding))
401 self.assertEqual("[\uDC80]".encode(self.encoding, "replace"),
402 "[?]".encode(self.encoding))
405 self.assertEqual("[\uD800\uDC80]".encode(self.encoding, "ignore"),
406 "[]".encode(self.encoding))
407 self.assertEqual("[\uD800\uDC80]".encode(self.encoding, "replace"),
408 "[??]".encode(self.encoding))
410 bom = "".encode(self.encoding)
413 before_sequence = before.encode(self.encoding)[len(bom):]
414 after_sequence = after.encode(self.encoding)[len(bom):]
417 self.ill_formed_sequence + after_sequence)
418 self.assertRaises(UnicodeDecodeError, test_sequence.decode,
419 self.encoding)
420 self.assertEqual(test_string.encode(self.encoding,
423 self.assertEqual(test_sequence.decode(self.encoding,
426 self.assertEqual(test_sequence.decode(self.encoding, "ignore"),
428 self.assertEqual(test_sequence.decode(self.encoding, "replace"),
429 before + self.ill_formed_sequence_replace + after)
431 for b in self.ill_formed_sequence)
432 self.assertEqual(test_sequence.decode(self.encoding, "backslashreplace"),
435 def test_incremental_surrogatepass(self):
439 data = '\uD901'.encode(self.encoding, 'surrogatepass')
441 dec = codecs.getincrementaldecoder(self.encoding)('surrogatepass')
442 self.assertEqual(dec.decode(data[:i]), '')
443 self.assertEqual(dec.decode(data[i:], True), '\uD901')
445 data = '\uDC02'.encode(self.encoding, 'surrogatepass')
447 dec = codecs.getincrementaldecoder(self.encoding)('surrogatepass')
448 self.assertEqual(dec.decode(data[:i]), '')
449 self.assertEqual(dec.decode(data[i:]), '\uDC02')
466 def test_only_one_bom(self):
467 _,_,reader,writer = codecs.lookup(self.encoding)
475 self.assertTrue(d == self.spamle or d == self.spambe)
479 self.assertEqual(f.read(), "spamspam")
481 def test_badbom(self):
483 f = codecs.getreader(self.encoding)(s)
484 self.assertRaises(UnicodeError, f.read)
487 f = codecs.getreader(self.encoding)(s)
488 self.assertRaises(UnicodeError, f.read)
490 def test_partial(self):
491 self.check_partial(
521 def test_handlers(self):
522 self.assertEqual(('\ufffd', 1),
524 self.assertEqual(('', 1),
527 def test_errors(self):
528 self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode,
531 def test_decoder_state(self):
532 self.check_state_handling_decode(self.encoding,
533 "spamspam", self.spamle)
534 self.check_state_handling_decode(self.encoding,
535 "spamspam", self.spambe)
537 def test_issue8941(self):
541 self.assertEqual('\U00010000' * 1024,
544 self.assertEqual('\U00010000' * 1024,
552 def test_partial(self):
553 self.check_partial(
579 def test_simple(self):
580 self.assertEqual("\U00010203".encode(self.encoding), b"\x03\x02\x01\x00")
582 def test_errors(self):
583 self.assertRaises(UnicodeDecodeError, codecs.utf_32_le_decode,
586 def test_issue8941(self):
590 self.assertEqual('\U00010000' * 1024,
598 def test_partial(self):
599 self.check_partial(
625 def test_simple(self):
626 self.assertEqual("\U00010203".encode(self.encoding), b"\x00\x01\x02\x03")
628 def test_errors(self):
629 self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode,
632 def test_issue8941(self):
636 self.assertEqual('\U00010000' * 1024,
650 def test_only_one_bom(self):
651 _,_,reader,writer = codecs.lookup(self.encoding)
659 self.assertTrue(d == self.spamle or d == self.spambe)
663 self.assertEqual(f.read(), "spamspam")
665 def test_badbom(self):
667 f = codecs.getreader(self.encoding)(s)
668 self.assertRaises(UnicodeError, f.read)
671 f = codecs.getreader(self.encoding)(s)
672 self.assertRaises(UnicodeError, f.read)
674 def test_partial(self):
675 self.check_partial(
695 def test_handlers(self):
696 self.assertEqual(('\ufffd', 1),
698 self.assertEqual(('', 1),
701 def test_errors(self):
702 self.assertRaises(UnicodeDecodeError, codecs.utf_16_decode,
705 def test_decoder_state(self):
706 self.check_state_handling_decode(self.encoding,
707 "spamspam", self.spamle)
708 self.check_state_handling_decode(self.encoding,
709 "spamspam", self.spambe)
711 def test_bug691291(self):
718 s = s1.encode(self.encoding)
719 self.addCleanup(os_helper.unlink, os_helper.TESTFN)
723 encoding=self.encoding) as reader:
724 self.assertEqual(reader.read(), s1)
726 def test_invalid_modes(self):
728 with self.assertRaises(ValueError) as cm:
729 codecs.open(os_helper.TESTFN, mode, encoding=self.encoding)
730 self.assertIn('invalid mode', str(cm.exception))
733 with self.assertRaises(ValueError) as cm:
734 codecs.open(os_helper.TESTFN, mode, encoding=self.encoding)
735 self.assertIn("can't have text and binary mode at once",
743 def test_partial(self):
744 self.check_partial(
762 def test_errors(self):
773 self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode,
775 self.assertEqual(raw.decode('utf-16le', 'replace'), expected)
777 def test_nonbmp(self):
778 self.assertEqual("\U00010203".encode(self.encoding),
780 self.assertEqual(b'\x00\xd8\x03\xde'.decode(self.encoding),
787 def test_partial(self):
788 self.check_partial(
806 def test_errors(self):
817 self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode,
819 self.assertEqual(raw.decode('utf-16be', 'replace'), expected)
821 def test_nonbmp(self):
822 self.assertEqual("\U00010203".encode(self.encoding),
824 self.assertEqual(b'\xd8\x00\xde\x03'.decode(self.encoding),
833 def test_partial(self):
834 self.check_partial(
855 def test_decoder_state(self):
857 self.check_state_handling_decode(self.encoding,
858 u, u.encode(self.encoding))
860 def test_decode_error(self):
867 with self.subTest(data=data, error_handler=error_handler,
869 self.assertEqual(data.decode(self.encoding, error_handler),
872 def test_lone_surrogates(self):
876 self.assertEqual("[\uDC80]".encode(self.encoding, "surrogateescape"),
877 self.BOM + b'[\x80]')
879 with self.assertRaises(UnicodeEncodeError) as cm:
880 "[\uDC80\uD800\uDFFF]".encode(self.encoding, "surrogateescape")
882 self.assertEqual(exc.object[exc.start:exc.end], '\uD800\uDFFF')
884 def test_surrogatepass_handler(self):
885 self.assertEqual("abc\ud800def".encode(self.encoding, "surrogatepass"),
886 self.BOM + b"abc\xed\xa0\x80def")
887 self.assertEqual("\U00010fff\uD800".encode(self.encoding, "surrogatepass"),
888 self.BOM + b"\xf0\x90\xbf\xbf\xed\xa0\x80")
889 self.assertEqual("[\uD800\uDC80]".encode(self.encoding, "surrogatepass"),
890 self.BOM + b'[\xed\xa0\x80\xed\xb2\x80]')
892 self.assertEqual(b"abc\xed\xa0\x80def".decode(self.encoding, "surrogatepass"),
894 self.assertEqual(b"\xf0\x90\xbf\xbf\xed\xa0\x80".decode(self.encoding, "surrogatepass"),
897 self.assertTrue(codecs.lookup_error("surrogatepass"))
898 with self.assertRaises(UnicodeDecodeError):
899 b"abc\xed\xa0".decode(self.encoding, "surrogatepass")
900 with self.assertRaises(UnicodeDecodeError):
901 b"abc\xed\xa0z".decode(self.encoding, "surrogatepass")
903 def test_incremental_errors(self):
916 with self.subTest(data=data):
917 dec = codecs.getincrementaldecoder(self.encoding)()
918 self.assertRaises(UnicodeDecodeError, dec.decode, data)
924 def test_ascii(self):
930 self.assertEqual(set_d.encode(self.encoding), set_d.encode('ascii'))
931 self.assertEqual(set_d.encode('ascii').decode(self.encoding), set_d)
934 self.assertEqual(set_o.encode(self.encoding), set_o.encode('ascii'))
935 self.assertEqual(set_o.encode('ascii').decode(self.encoding), set_o)
937 self.assertEqual('a+b'.encode(self.encoding), b'a+-b')
938 self.assertEqual(b'a+-b'.decode(self.encoding), 'a+b')
941 self.assertEqual(ws.encode(self.encoding), ws.encode('ascii'))
942 self.assertEqual(ws.encode('ascii').decode(self.encoding), ws)
946 self.assertEqual(other_ascii.encode(self.encoding),
950 def test_partial(self):
951 self.check_partial(
989 def test_errors(self):
1014 with self.subTest(raw=raw):
1015 self.assertRaises(UnicodeDecodeError, codecs.utf_7_decode,
1017 self.assertEqual(raw.decode('utf-7', 'replace'), expected)
1019 def test_nonbmp(self):
1020 self.assertEqual('\U000104A0'.encode(self.encoding), b'+2AHcoA-')
1021 self.assertEqual('\ud801\udca0'.encode(self.encoding), b'+2AHcoA-')
1022 self.assertEqual(b'+2AHcoA-'.decode(self.encoding), '\U000104A0')
1023 self.assertEqual(b'+2AHcoA'.decode(self.encoding), '\U000104A0')
1024 self.assertEqual('\u20ac\U000104A0'.encode(self.encoding), b'+IKzYAdyg-')
1025 self.assertEqual(b'+IKzYAdyg-'.decode(self.encoding), '\u20ac\U000104A0')
1026 self.assertEqual(b'+IKzYAdyg'.decode(self.encoding), '\u20ac\U000104A0')
1027 self.assertEqual('\u20ac\u20ac\U000104A0'.encode(self.encoding),
1029 self.assertEqual(b'+IKwgrNgB3KA-'.decode(self.encoding),
1031 self.assertEqual(b'+IKwgrNgB3KA'.decode(self.encoding),
1034 def test_lone_surrogates(self):
1051 with self.subTest(raw=raw):
1052 self.assertEqual(raw.decode('utf-7', 'replace'), expected)
1057 def test_errors(self):
1058 self.assertRaises(UnicodeDecodeError, codecs.utf_16_ex_decode, b"\xff", "strict", 0, True)
1060 def test_bad_args(self):
1061 self.assertRaises(TypeError, codecs.utf_16_ex_decode)
1065 def test_array(self):
1067 self.assertEqual(
1072 def test_empty(self):
1073 self.assertEqual(codecs.readbuffer_encode(""), (b"", 0))
1075 def test_bad_args(self):
1076 self.assertRaises(TypeError, codecs.readbuffer_encode)
1077 self.assertRaises(TypeError, codecs.readbuffer_encode, 42)
1083 def test_partial(self):
1084 self.check_partial(
1111 def test_bug1601501(self):
1113 self.assertEqual(str(b"\xef\xbb\xbf", "utf-8-sig"), "")
1115 def test_bom(self):
1118 self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
1120 def test_stream_bom(self):
1140 self.assertEqual(got, unistring)
1142 def test_stream_bare(self):
1162 self.assertEqual(got, unistring)
1166 def test_empty(self):
1167 self.assertEqual(codecs.escape_decode(b""), (b"", 0))
1168 self.assertEqual(codecs.escape_decode(bytearray()), (b"", 0))
1170 def test_raw(self):
1175 self.assertEqual(decode(b + b'0'), (b + b'0', 2))
1177 def test_escape(self):
1179 check = coding_checker(self, decode)
1202 with self.assertWarns(DeprecationWarning):
1204 with self.assertWarns(DeprecationWarning):
1206 with self.assertWarns(DeprecationWarning):
1208 with self.assertWarns(DeprecationWarning):
1210 with self.assertWarns(DeprecationWarning):
1213 with self.assertWarns(DeprecationWarning):
1216 def test_errors(self):
1218 self.assertRaises(ValueError, decode, br"\x")
1219 self.assertRaises(ValueError, decode, br"[\x]")
1220 self.assertEqual(decode(br"[\x]\x", "ignore"), (b"[]", 6))
1221 self.assertEqual(decode(br"[\x]\x", "replace"), (b"[?]?", 6))
1222 self.assertRaises(ValueError, decode, br"\x0")
1223 self.assertRaises(ValueError, decode, br"[\x0]")
1224 self.assertEqual(decode(br"[\x0]\x0", "ignore"), (b"[]", 8))
1225 self.assertEqual(decode(br"[\x0]\x0", "replace"), (b"[?]?", 8))
1338 def test_encode(self):
1345 self.assertEqual(
1350 def test_decode(self):
1352 self.assertEqual(uni, puny.decode("punycode"))
1354 self.assertEqual(uni, puny.decode("punycode"))
1356 def test_decode_invalid(self):
1362 with self.subTest(puny=puny, errors=errors):
1364 self.assertRaises(UnicodeError, puny.decode, "punycode", errors)
1366 self.assertEqual(puny.decode("punycode", errors), expected)
1524 def test_nameprep(self):
1534 self.assertRaises(UnicodeError, nameprep, orig)
1538 self.assertEqual(nameprep(orig), prepped)
1544 def test_builtin_decode(self):
1545 self.assertEqual(str(b"python.org", "idna"), "python.org")
1546 self.assertEqual(str(b"python.org.", "idna"), "python.org.")
1547 self.assertEqual(str(b"xn--pythn-mua.org", "idna"), "pyth\xf6n.org")
1548 self.assertEqual(str(b"xn--pythn-mua.org.", "idna"), "pyth\xf6n.org.")
1550 def test_builtin_encode(self):
1551 self.assertEqual("python.org".encode("idna"), b"python.org")
1552 self.assertEqual("python.org.".encode("idna"), b"python.org.")
1553 self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org")
1554 self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.")
1556 def test_builtin_decode_length_limit(self):
1557 with self.assertRaisesRegex(UnicodeError, "too long"):
1559 with self.assertRaisesRegex(UnicodeError, "too long"):
1562 def test_stream(self):
1565 self.assertEqual(r.read(), "")
1567 def test_incremental_decode(self):
1568 self.assertEqual(
1572 self.assertEqual(
1576 self.assertEqual(
1580 self.assertEqual(
1586 self.assertEqual(decoder.decode(b"xn--xam", ), "")
1587 self.assertEqual(decoder.decode(b"ple-9ta.o", ), "\xe4xample.")
1588 self.assertEqual(decoder.decode(b"rg"), "")
1589 self.assertEqual(decoder.decode(b"", True), "org")
1592 self.assertEqual(decoder.decode(b"xn--xam", ), "")
1593 self.assertEqual(decoder.decode(b"ple-9ta.o", ), "\xe4xample.")
1594 self.assertEqual(decoder.decode(b"rg."), "org.")
1595 self.assertEqual(decoder.decode(b"", True), "")
1597 def test_incremental_encode(self):
1598 self.assertEqual(
1602 self.assertEqual(
1606 self.assertEqual(
1610 self.assertEqual(
1616 self.assertEqual(encoder.encode("\xe4x"), b"")
1617 self.assertEqual(encoder.encode("ample.org"), b"xn--xample-9ta.")
1618 self.assertEqual(encoder.encode("", True), b"org")
1621 self.assertEqual(encoder.encode("\xe4x"), b"")
1622 self.assertEqual(encoder.encode("ample.org."), b"xn--xample-9ta.org.")
1623 self.assertEqual(encoder.encode("", True), b"")
1625 def test_errors(self):
1631 self.assertRaises(Exception, "python.org".encode, "idna", errors)
1632 self.assertRaises(Exception,
1638 def test_decode(self):
1639 self.assertEqual(codecs.decode(b'\xe4\xf6\xfc', 'latin-1'),
1641 self.assertRaises(TypeError, codecs.decode)
1642 self.assertEqual(codecs.decode(b'abc'), 'abc')
1643 self.assertRaises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii')
1646 self.assertEqual(codecs.decode(obj=b'\xe4\xf6\xfc', encoding='latin-1'),
1648 self.assertEqual(codecs.decode(b'[\xff]', 'ascii', errors='ignore'),
1651 def test_encode(self):
1652 self.assertEqual(codecs.encode('\xe4\xf6\xfc', 'latin-1'),
1654 self.assertRaises(TypeError, codecs.encode)
1655 self.assertRaises(LookupError, codecs.encode, "foo", "__spam__")
1656 self.assertEqual(codecs.encode('abc'), b'abc')
1657 self.assertRaises(UnicodeEncodeError, codecs.encode, '\xffff', 'ascii')
1660 self.assertEqual(codecs.encode(obj='\xe4\xf6\xfc', encoding='latin-1'),
1662 self.assertEqual(codecs.encode('[\xff]', 'ascii', errors='ignore'),
1665 def test_register(self):
1666 self.assertRaises(TypeError, codecs.register)
1667 self.assertRaises(TypeError, codecs.register, 42)
1669 def test_unregister(self):
1673 self.assertRaises(TypeError, codecs.lookup, name)
1678 self.assertRaises(LookupError, codecs.lookup, name)
1681 def test_lookup(self):
1682 self.assertRaises(TypeError, codecs.lookup)
1683 self.assertRaises(LookupError, codecs.lookup, "__spam__")
1684 self.assertRaises(LookupError, codecs.lookup, " ")
1686 def test_getencoder(self):
1687 self.assertRaises(TypeError, codecs.getencoder)
1688 self.assertRaises(LookupError, codecs.getencoder, "__spam__")
1690 def test_getdecoder(self):
1691 self.assertRaises(TypeError, codecs.getdecoder)
1692 self.assertRaises(LookupError, codecs.getdecoder, "__spam__")
1694 def test_getreader(self):
1695 self.assertRaises(TypeError, codecs.getreader)
1696 self.assertRaises(LookupError, codecs.getreader, "__spam__")
1698 def test_getwriter(self):
1699 self.assertRaises(TypeError, codecs.getwriter)
1700 self.assertRaises(LookupError, codecs.getwriter, "__spam__")
1702 def test_lookup_issue1813(self):
1706 self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
1711 self.skipTest('test needs Turkish locale')
1713 self.assertEqual(c.name, 'ascii')
1715 def test_all(self):
1734 self.assertCountEqual(api, codecs.__all__)
1738 def test_open(self):
1739 self.addCleanup(os_helper.unlink, os_helper.TESTFN)
1741 with self.subTest(mode), \
1743 self.assertIsInstance(file, codecs.StreamReaderWriter)
1745 def test_undefined(self):
1746 self.assertRaises(UnicodeError, codecs.encode, 'abc', 'undefined')
1747 self.assertRaises(UnicodeError, codecs.decode, b'abc', 'undefined')
1748 self.assertRaises(UnicodeError, codecs.encode, '', 'undefined')
1749 self.assertRaises(UnicodeError, codecs.decode, b'', 'undefined')
1751 self.assertRaises(UnicodeError,
1753 self.assertRaises(UnicodeError,
1756 def test_file_closes_if_lookup_error_raised(self):
1759 with self.assertRaises(LookupError):
1767 def setUp(self):
1768 self.reader = codecs.getreader('utf-8')
1769 self.stream = io.BytesIO(b'\xed\x95\x9c\n\xea\xb8\x80')
1771 def test_readlines(self):
1772 f = self.reader(self.stream)
1773 self.assertEqual(f.readlines(), ['\ud55c\n', '\uae00'])
1778 def test_basic(self):
1781 self.assertEqual(ef.read(), b'\\\xd5\n\x00\x00\xae')
1786 self.assertEqual(f.getvalue(), b'\xfc')
1908 def test_basics(self):
1918 self.assertEqual(encoding.replace("_", "-"),
1922 self.assertEqual(size, len(s), "encoding=%r" % encoding)
1924 self.assertEqual(chars, s, "encoding=%r" % encoding)
1934 self.assertTrue(type(chunk) is bytes, type(chunk))
1942 self.assertEqual(decodedresult, s, "encoding=%r" % encoding)
1961 self.assertEqual(decodedresult, s,
1967 self.assertEqual(result, s, "encoding=%r" % encoding)
1972 self.assertEqual(result, "")
1985 self.assertEqual(decodedresult, s,
1990 def test_basics_capi(self):
2010 self.assertEqual(decodedresult, s,
2024 self.assertEqual(decodedresult, s,
2027 def test_seek(self):
2040 self.assertEqual(s, data)
2042 def test_bad_decode_args(self):
2045 self.assertRaises(TypeError, decoder)
2047 self.assertRaises(TypeError, decoder, 42)
2049 def test_bad_encode_args(self):
2052 self.assertRaises(TypeError, encoder)
2054 def test_encoding_map_type_initialized(self):
2058 self.assertEqual(table_type, table_type)
2060 def test_decoder_state(self):
2065 self.check_state_handling_decode(encoding, u, u.encode(encoding))
2066 self.check_state_handling_encode(encoding, u, u.encode(encoding))
2070 def test_decode_with_string_map(self):
2071 self.assertEqual(
2076 self.assertEqual(
2081 self.assertRaises(UnicodeDecodeError,
2085 self.assertRaises(UnicodeDecodeError,
2089 self.assertEqual(
2094 self.assertEqual(
2099 self.assertEqual(
2104 self.assertEqual(
2109 self.assertEqual(
2114 self.assertEqual(
2120 self.assertEqual(
2125 def test_decode_with_int2str_map(self):
2126 self.assertEqual(
2132 self.assertEqual(
2138 self.assertEqual(
2144 self.assertEqual(
2150 self.assertRaises(UnicodeDecodeError,
2155 self.assertRaises(UnicodeDecodeError,
2161 self.assertRaises(UnicodeDecodeError,
2166 self.assertEqual(
2172 self.assertEqual(
2179 self.assertEqual(
2185 self.assertEqual(
2191 self.assertEqual(
2198 self.assertEqual(
2204 self.assertEqual(
2210 self.assertEqual(
2217 self.assertEqual(
2224 self.assertEqual(
2229 self.assertRaisesRegex(TypeError,
2235 self.assertRaisesRegex(TypeError,
2241 def test_decode_with_int2int_map(self):
2246 self.assertEqual(
2253 self.assertEqual(
2259 self.assertEqual(
2265 self.assertRaises(TypeError,
2270 self.assertRaises(UnicodeDecodeError,
2275 self.assertRaises(UnicodeDecodeError,
2280 self.assertEqual(
2286 self.assertEqual(
2292 self.assertEqual(
2298 self.assertEqual(
2304 self.assertEqual(
2310 self.assertEqual(
2318 def test_encodedfile(self):
2321 self.assertEqual(ef.read(), b"\xfc")
2322 self.assertTrue(f.closed)
2324 def test_streamreaderwriter(self):
2329 self.assertEqual(srw.read(), "\xfc")
2333 def test_decode_unicode(self):
2352 self.assertRaises(TypeError, decoder, "xxx")
2354 def test_unicode_escape(self):
2357 self.assertEqual(codecs.unicode_escape_decode(r"\u1234"), ("\u1234", 6))
2358 self.assertEqual(codecs.unicode_escape_decode(br"\u1234"), ("\u1234", 6))
2359 self.assertEqual(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
2360 self.assertEqual(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
2362 self.assertRaises(UnicodeDecodeError, codecs.unicode_escape_decode, br"\U00110000")
2363 self.assertEqual(codecs.unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10))
2364 self.assertEqual(codecs.unicode_escape_decode(r"\U00110000", "backslashreplace"),
2367 self.assertRaises(UnicodeDecodeError, codecs.raw_unicode_escape_decode, br"\U00110000")
2368 self.assertEqual(codecs.raw_unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10))
2369 self.assertEqual(codecs.raw_unicode_escape_decode(r"\U00110000", "backslashreplace"),
2378 def test_empty(self):
2379 self.assertEqual(codecs.unicode_escape_encode(""), (b"", 0))
2380 self.assertEqual(codecs.unicode_escape_decode(b""), ("", 0))
2382 def test_raw_encode(self):
2386 self.assertEqual(encode(chr(b)), (bytes([b]), 1))
2388 def test_raw_decode(self):
2392 self.assertEqual(decode(bytes([b]) + b'0'), (chr(b) + '0', 2))
2394 def test_escape_encode(self):
2396 check = coding_checker(self, encode)
2409 def test_escape_decode(self):
2411 check = coding_checker(self, decode)
2436 with self.assertWarns(DeprecationWarning):
2439 with self.assertWarns(DeprecationWarning):
2441 with self.assertWarns(DeprecationWarning):
2443 with self.assertWarns(DeprecationWarning):
2445 with self.assertWarns(DeprecationWarning):
2448 with self.assertWarns(DeprecationWarning):
2451 def test_decode_errors(self):
2455 self.assertRaises(UnicodeDecodeError, decode,
2457 self.assertRaises(UnicodeDecodeError, decode,
2460 self.assertEqual(decode(data, "ignore"), ("[]", len(data)))
2461 self.assertEqual(decode(data, "replace"),
2463 self.assertRaises(UnicodeDecodeError, decode, br"\U00110000")
2464 self.assertEqual(decode(br"\U00110000", "ignore"), ("", 10))
2465 self.assertEqual(decode(br"\U00110000", "replace"), ("\ufffd", 10))
2467 def test_partial(self):
2468 self.check_partial(
2511 def test_empty(self):
2512 self.assertEqual(codecs.raw_unicode_escape_encode(""), (b"", 0))
2513 self.assertEqual(codecs.raw_unicode_escape_decode(b""), ("", 0))
2515 def test_raw_encode(self):
2518 self.assertEqual(encode(chr(b)), (bytes([b]), 1))
2520 def test_raw_decode(self):
2523 self.assertEqual(decode(bytes([b]) + b'0'), (chr(b) + '0', 2))
2525 def test_escape_encode(self):
2527 check = coding_checker(self, encode)
2534 def test_escape_decode(self):
2536 check = coding_checker(self, decode)
2543 def test_decode_errors(self):
2547 self.assertRaises(UnicodeDecodeError, decode,
2549 self.assertRaises(UnicodeDecodeError, decode,
2552 self.assertEqual(decode(data, "ignore"), ("[]", len(data)))
2553 self.assertEqual(decode(data, "replace"),
2555 self.assertRaises(UnicodeDecodeError, decode, br"\U00110000")
2556 self.assertEqual(decode(br"\U00110000", "ignore"), ("", 10))
2557 self.assertEqual(decode(br"\U00110000", "replace"), ("\ufffd", 10))
2559 def test_partial(self):
2560 self.check_partial(
2591 def test_escape_encode(self):
2603 with self.subTest(data=data):
2604 self.assertEqual(codecs.escape_encode(data), output)
2605 self.assertRaises(TypeError, codecs.escape_encode, 'spam')
2606 self.assertRaises(TypeError, codecs.escape_encode, bytearray(b'spam'))
2611 def test_utf8(self):
2613 self.assertEqual(b"foo\x80bar".decode("utf-8", "surrogateescape"),
2615 self.assertEqual("foo\udc80bar".encode("utf-8", "surrogateescape"),
2618 self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "surrogateescape"),
2620 self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "surrogateescape"),
2623 def test_ascii(self):
2625 self.assertEqual(b"foo\x80bar".decode("ascii", "surrogateescape"),
2627 self.assertEqual("foo\udc80bar".encode("ascii", "surrogateescape"),
2630 def test_charmap(self):
2632 self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "surrogateescape"),
2634 self.assertEqual("foo\udca5bar".encode("iso-8859-3", "surrogateescape"),
2637 def test_latin1(self):
2639 self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin-1", "surrogateescape"),
2644 def test_seek0(self):
2652 self.addCleanup(os_helper.unlink, os_helper.TESTFN)
2659 self.assertEqual(f.read(), data * 2)
2661 self.assertEqual(f.read(), data * 2)
2666 self.assertNotEqual(f.tell(), 0)
2670 self.assertEqual(f.read(), data)
2675 self.assertNotEqual(f.writer.tell(), 0)
2679 self.assertEqual(f.read(), data)
2688 self.assertEqual(f.read(), data * 2)
2697 self.assertEqual(f.read(), data * 2)
2733 def test_basics(self):
2736 with self.subTest(encoding=encoding):
2739 self.assertEqual(size, len(binput))
2741 self.assertEqual(size, len(o))
2742 self.assertEqual(i, binput)
2744 def test_read(self):
2746 with self.subTest(encoding=encoding):
2750 self.assertEqual(sout, b"\x80")
2752 def test_readline(self):
2754 with self.subTest(encoding=encoding):
2758 self.assertEqual(sout, b"\x80")
2760 def test_buffer_api_usage(self):
2766 with self.subTest(encoding=encoding):
2771 self.assertEqual(view_encoded, data)
2774 self.assertEqual(data, original)
2776 self.assertEqual(view_decoded, data)
2778 def test_text_to_binary_denylists_binary_transforms(self):
2782 with self.subTest(encoding=encoding):
2786 with self.assertRaisesRegex(LookupError, msg) as failure:
2788 self.assertIsNone(failure.exception.__cause__)
2790 def test_text_to_binary_denylists_text_transforms(self):
2794 with self.assertRaisesRegex(LookupError, msg):
2797 def test_binary_to_text_denylists_binary_transforms(self):
2802 with self.subTest(encoding=encoding):
2807 with self.assertRaisesRegex(LookupError, msg):
2809 with self.assertRaisesRegex(LookupError, msg):
2812 def test_binary_to_text_denylists_text_transforms(self):
2815 with self.subTest(bad_input=bad_input):
2818 with self.assertRaisesRegex(LookupError, msg) as failure:
2820 self.assertIsNone(failure.exception.__cause__)
2823 def test_custom_zlib_error_is_wrapped(self):
2826 with self.assertRaisesRegex(Exception, msg) as failure:
2828 self.assertIsInstance(failure.exception.__cause__,
2831 def test_custom_hex_error_is_wrapped(self):
2834 with self.assertRaisesRegex(Exception, msg) as failure:
2836 self.assertIsInstance(failure.exception.__cause__,
2843 def test_aliases(self):
2847 with self.subTest(alias=alias):
2849 self.assertEqual(info.name, expected_name)
2851 def test_quopri_stateless(self):
2854 self.assertEqual(encoded, b"space=20tab=09eol=20\n")
2857 self.assertEqual(codecs.decode(unescaped, "quopri-codec"), unescaped)
2859 def test_uu_invalid(self):
2861 self.assertRaises(ValueError, codecs.decode, b"", "uu-codec")
2880 def setUp(self):
2881 self.codec_name = 'exception_chaining_test'
2883 self.addCleanup(codecs.unregister, _get_test_codec)
2892 self.obj_to_raise = RuntimeError
2894 def tearDown(self):
2895 _TEST_CODECS.pop(self.codec_name, None)
2897 encodings._cache.pop(self.codec_name, None)
2899 def set_codec(self, encode, decode):
2901 name=self.codec_name)
2902 _TEST_CODECS[self.codec_name] = codec_info
2905 def assertWrapped(self, operation, exc_type, msg):
2907 operation, self.codec_name, exc_type.__name__, msg)
2908 with self.assertRaisesRegex(exc_type, full_msg) as caught:
2910 self.assertIsInstance(caught.exception.__cause__, exc_type)
2911 self.assertIsNotNone(caught.exception.__cause__.__traceback__)
2913 def raise_obj(self, *args, **kwds):
2915 raise self.obj_to_raise
2917 def check_wrapped(self, obj_to_raise, msg, exc_type=RuntimeError):
2918 self.obj_to_raise = obj_to_raise
2919 self.set_codec(self.raise_obj, self.raise_obj)
2920 with self.assertWrapped("encoding", exc_type, msg):
2921 "str_input".encode(self.codec_name)
2922 with self.assertWrapped("encoding", exc_type, msg):
2923 codecs.encode("str_input", self.codec_name)
2924 with self.assertWrapped("decoding", exc_type, msg):
2925 b"bytes input".decode(self.codec_name)
2926 with self.assertWrapped("decoding", exc_type, msg):
2927 codecs.decode(b"bytes input", self.codec_name)
2929 def test_raise_by_type(self):
2930 self.check_wrapped(RuntimeError, "")
2932 def test_raise_by_value(self):
2934 self.check_wrapped(RuntimeError(msg), msg)
2936 def test_raise_grandchild_subclass_exact_size(self):
2940 self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError)
2942 def test_raise_subclass_with_weakref_support(self):
2946 self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError)
2948 def check_not_wrapped(self, obj_to_raise, msg):
2951 self.set_codec(raise_obj, raise_obj)
2952 with self.assertRaisesRegex(RuntimeError, msg):
2953 "str input".encode(self.codec_name)
2954 with self.assertRaisesRegex(RuntimeError, msg):
2955 codecs.encode("str input", self.codec_name)
2956 with self.assertRaisesRegex(RuntimeError, msg):
2957 b"bytes input".decode(self.codec_name)
2958 with self.assertRaisesRegex(RuntimeError, msg):
2959 codecs.decode(b"bytes input", self.codec_name)
2961 def test_init_override_is_not_wrapped(self):
2963 def __init__(self):
2965 self.check_not_wrapped(CustomInit, "")
2967 def test_new_override_is_not_wrapped(self):
2971 self.check_not_wrapped(CustomNew, "")
2973 def test_instance_attribute_is_not_wrapped(self):
2977 self.check_not_wrapped(exc, "^{}$".format(msg))
2979 def test_non_str_arg_is_not_wrapped(self):
2980 self.check_not_wrapped(RuntimeError(1), "1")
2982 def test_multiple_args_is_not_wrapped(self):
2984 self.check_not_wrapped(RuntimeError('a', 'b', 'c'), msg_re)
2987 def test_codec_lookup_failure_not_wrapped(self):
2988 msg = "^unknown encoding: {}$".format(self.codec_name)
2990 with self.assertRaisesRegex(LookupError, msg):
2991 "str input".encode(self.codec_name)
2992 with self.assertRaisesRegex(LookupError, msg):
2993 codecs.encode("str input", self.codec_name)
2994 with self.assertRaisesRegex(LookupError, msg):
2995 b"bytes input".decode(self.codec_name)
2996 with self.assertRaisesRegex(LookupError, msg):
2997 codecs.decode(b"bytes input", self.codec_name)
2999 def test_unflagged_non_text_codec_handling(self):
3009 self.set_codec(encode_to_str, decode_to_bytes)
3011 encoded = codecs.encode(None, self.codec_name)
3012 self.assertEqual(encoded, "not bytes!")
3013 decoded = codecs.decode(None, self.codec_name)
3014 self.assertEqual(decoded, b"not str!")
3018 msg = fmt.format(self.codec_name)
3019 with self.assertRaisesRegex(TypeError, msg):
3020 "str_input".encode(self.codec_name)
3023 msg = fmt.format(self.codec_name)
3024 with self.assertRaisesRegex(TypeError, msg):
3025 b"bytes input".decode(self.codec_name)
3034 def test_invalid_code_page(self):
3035 self.assertRaises(ValueError, codecs.code_page_encode, -1, 'a')
3036 self.assertRaises(ValueError, codecs.code_page_decode, -1, b'a')
3037 self.assertRaises(OSError, codecs.code_page_encode, 123, 'a')
3038 self.assertRaises(OSError, codecs.code_page_decode, 123, b'a')
3040 def test_code_page_name(self):
3041 self.assertRaisesRegex(UnicodeEncodeError, 'cp932',
3043 self.assertRaisesRegex(UnicodeDecodeError, 'cp932',
3045 self.assertRaisesRegex(UnicodeDecodeError, 'CP_UTF8',
3046 codecs.code_page_decode, self.CP_UTF8, b'\xff', 'strict', True)
3048 def check_decode(self, cp, tests):
3054 self.fail('Unable to decode %a from "cp%s" with '
3056 self.assertEqual(decoded[0], expected,
3060 self.assertGreaterEqual(decoded[1], 0)
3061 self.assertLessEqual(decoded[1], len(raw))
3063 self.assertRaises(UnicodeDecodeError,
3066 def check_encode(self, cp, tests):
3072 self.fail('Unable to encode %a to "cp%s" with '
3074 self.assertEqual(encoded[0], expected,
3077 self.assertEqual(encoded[1], len(text))
3079 self.assertRaises(UnicodeEncodeError,
3082 def test_cp932(self):
3083 self.check_encode(932, (
3099 self.check_decode(932, (
3115 def test_cp1252(self):
3116 self.check_encode(1252, (
3127 self.check_decode(1252, (
3133 def test_cp_utf7(self):
3135 self.check_encode(cp, (
3142 self.check_decode(cp, (
3153 def test_multibyte_encoding(self):
3154 self.check_decode(932, (
3158 self.check_decode(self.CP_UTF8, (
3162 self.check_encode(self.CP_UTF8, (
3167 def test_code_page_decode_flags(self):
3179 self.assertEqual(codecs.code_page_decode(cp, b'abc'), ('abc', 3), f'cp{cp}')
3183 self.assertEqual(codecs.code_page_decode(42, b'abc'),
3186 def test_incremental(self):
3188 self.assertEqual(decoded, ('', 0))
3193 self.assertEqual(decoded, ('\u9a3e', 2))
3198 self.assertEqual(decoded, ('\u9a3e\u9a3e', 4))
3203 self.assertEqual(decoded, ('abc', 3))
3205 def test_mbcs_alias(self):
3213 self.assertEqual(codec.name, 'mbcs')
3218 def test_large_input(self, size):
3224 self.assertEqual(len(encoded), size+2)
3226 self.assertEqual(decoded[1], len(encoded))
3228 self.assertEqual(len(decoded[0]), decoded[1])
3229 self.assertEqual(decoded[0][:10], '0123456701')
3230 self.assertEqual(decoded[0][-20:],
3236 def test_large_utf8_input(self, size):
3241 self.assertEqual(len(encoded), size // 8 * 10)
3243 self.assertEqual(decoded[1], len(encoded))
3245 self.assertEqual(len(decoded[0]), size)
3246 self.assertEqual(decoded[0][:10], '0123456\ud10001')
3247 self.assertEqual(decoded[0][-11:], '56\ud1000123456\ud100')
3251 def test_encode(self):
3252 self.assertEqual('abc123'.encode('ascii'), b'abc123')
3254 def test_encode_error(self):
3263 with self.subTest(data=data, error_handler=error_handler,
3265 self.assertEqual(data.encode('ascii', error_handler),
3268 def test_encode_surrogateescape_error(self):
3269 with self.assertRaises(UnicodeEncodeError):
3273 def test_decode(self):
3274 self.assertEqual(b'abc'.decode('ascii'), 'abc')
3276 def test_decode_error(self):
3283 with self.subTest(data=data, error_handler=error_handler,
3285 self.assertEqual(data.decode('ascii', error_handler),
3290 def test_encode(self):
3295 with self.subTest(data=data, expected=expected):
3296 self.assertEqual(data.encode('latin1'), expected)
3298 def test_encode_errors(self):
3307 with self.subTest(data=data, error_handler=error_handler,
3309 self.assertEqual(data.encode('latin1', error_handler),
3312 def test_encode_surrogateescape_error(self):
3313 with self.assertRaises(UnicodeEncodeError):
3317 def test_decode(self):
3322 with self.subTest(data=data, expected=expected):
3323 self.assertEqual(data.decode('latin1'), expected)
3327 def test_writelines(self):
3333 self.assertEqual(bio.getvalue(), b'ab')
3335 def test_write(self):
3344 self.assertEqual(bio.getvalue(), text.encode('utf-8'))
3346 def test_seeking_read(self):
3350 self.assertEqual(sr.readline(), b'line1\n')
3352 self.assertEqual(sr.readline(), b'line1\n')
3353 self.assertEqual(sr.readline(), b'line2\n')
3354 self.assertEqual(sr.readline(), b'line3\n')
3355 self.assertEqual(sr.readline(), b'')
3357 def test_seeking_write(self):
3365 self.assertEqual(sr.readline(), b'789\n')
3367 self.assertEqual(sr.readline(), b'1\n')
3368 self.assertEqual(sr.readline(), b'abc\n')
3369 self.assertEqual(sr.readline(), b'789\n')
3385 def encode(self, text, errors="strict"):
3388 def check_encode_strings(self, errors):
3389 for text in self.STRINGS:
3390 with self.subTest(text=text):
3392 expected = text.encode(self.ENCODING, errors)
3394 with self.assertRaises(RuntimeError) as cm:
3395 self.encode(text, errors)
3397 self.assertRegex(errmsg, r"encode error: pos=[0-9]+, reason=")
3399 encoded = self.encode(text, errors)
3400 self.assertEqual(encoded, expected)
3402 def test_encode_strict(self):
3403 self.check_encode_strings("strict")
3405 def test_encode_surrogateescape(self):
3406 self.check_encode_strings("surrogateescape")
3408 def test_encode_surrogatepass(self):
3410 self.encode('', 'surrogatepass')
3413 self.skipTest(f"{self.ENCODING!r} encoder doesn't support "
3418 self.check_encode_strings("surrogatepass")
3420 def test_encode_unsupported_error_handler(self):
3421 with self.assertRaises(ValueError) as cm:
3422 self.encode('', 'backslashreplace')
3423 self.assertEqual(str(cm.exception), 'unsupported error handler')
3425 def decode(self, encoded, errors="strict"):
3428 def check_decode_strings(self, errors):
3429 is_utf8 = (self.ENCODING == "utf-8")
3435 strings = list(self.BYTES_STRINGS)
3436 for text in self.STRINGS:
3438 encoded = text.encode(self.ENCODING, encode_errors)
3445 encoded2 = text.encode(self.ENCODING, 'surrogatepass')
3450 with self.subTest(encoded=encoded):
3452 expected = encoded.decode(self.ENCODING, errors)
3454 with self.assertRaises(RuntimeError) as cm:
3455 self.decode(encoded, errors)
3457 self.assertTrue(errmsg.startswith("decode error: "), errmsg)
3459 decoded = self.decode(encoded, errors)
3460 self.assertEqual(decoded, expected)
3462 def test_decode_strict(self):
3463 self.check_decode_strings("strict")
3465 def test_decode_surrogateescape(self):
3466 self.check_decode_strings("surrogateescape")
3468 def test_decode_surrogatepass(self):
3470 self.decode(b'', 'surrogatepass')
3473 self.skipTest(f"{self.ENCODING!r} decoder doesn't support "
3478 self.check_decode_strings("surrogatepass")
3480 def test_decode_unsupported_error_handler(self):
3481 with self.assertRaises(ValueError) as cm:
3482 self.decode(b'', 'backslashreplace')
3483 self.assertEqual(str(cm.exception), 'unsupported error handler')
3488 def test_encode(self):
3490 self.assertEqual(ciphertext, 'Pnrfne yvxrq pvcuref')
3492 def test_decode(self):
3494 self.assertEqual(plaintext, 'Et tu, Brute?')
3496 def test_incremental_encode(self):
3499 self.assertEqual(ciphertext, 'NOON ant Purely Onxre')
3501 def test_incremental_decode(self):
3504 self.assertEqual(plaintext, 'green Nerf rail gun')
3512 def test_rot13_func(self):
3518 self.assertEqual(
3525 def test_codecs_lookup(self):
3535 self.addCleanup(codecs.unregister, search_function)
3536 self.assertEqual(FOUND, codecs.lookup('aaa_8'))
3537 self.assertEqual(FOUND, codecs.lookup('AAA-8'))
3538 self.assertEqual(FOUND, codecs.lookup('AAA---8'))
3539 self.assertEqual(FOUND, codecs.lookup('AAA 8'))
3540 self.assertEqual(FOUND, codecs.lookup('aaa\xe9\u20ac-8'))
3541 self.assertEqual(NOT_FOUND, codecs.lookup('AAA.8'))
3542 self.assertEqual(NOT_FOUND, codecs.lookup('AAA...8'))
3543 self.assertEqual(NOT_FOUND, codecs.lookup('BBB-8'))
3544 self.assertEqual(NOT_FOUND, codecs.lookup('BBB.8'))
3545 self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))
3547 def test_encodings_normalize_encoding(self):
3550 self.assertEqual(normalize('utf_8'), 'utf_8')
3551 self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
3552 self.assertEqual(normalize('utf 8'), 'utf_8')
3555 self.assertEqual(normalize('UTF 8'), 'UTF_8')
3556 self.assertEqual(normalize('utf.8'), 'utf.8')
3557 self.assertEqual(normalize('utf...8'), 'utf...8')