Lines Matching refs:codecs
1 import codecs
81 d = codecs.getincrementaldecoder(encoding)()
96 d = codecs.getincrementaldecoder(encoding)()
103 d = codecs.getincrementalencoder(encoding)()
106 d = codecs.getincrementalencoder(encoding)()
119 r = codecs.getreader(self.encoding)(q)
130 d = codecs.getincrementaldecoder(self.encoding)()
153 "".join(codecs.iterdecode([bytes([c]) for c in encoded], self.encoding))
159 return codecs.getreader(self.encoding)(stream)
223 return codecs.getreader(self.encoding)(stream)
328 reader = codecs.getreader(self.encoding)(stream)
334 writer = codecs.getwriter(self.encoding)(q)
335 reader = codecs.getreader(self.encoding)(q)
366 reader = codecs.getreader(self.encoding)(stream)
381 reader = codecs.getreader(self.encoding)(stream)
441 dec = codecs.getincrementaldecoder(self.encoding)('surrogatepass')
447 dec = codecs.getincrementaldecoder(self.encoding)('surrogatepass')
467 _,_,reader,writer = codecs.lookup(self.encoding)
483 f = codecs.getreader(self.encoding)(s)
487 f = codecs.getreader(self.encoding)(s)
523 codecs.utf_32_decode(b'\x01', 'replace', True))
525 codecs.utf_32_decode(b'\x01', 'ignore', True))
528 self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode,
542 codecs.utf_32_decode(encoded_le)[0])
545 codecs.utf_32_decode(encoded_be)[0])
583 self.assertRaises(UnicodeDecodeError, codecs.utf_32_le_decode,
591 codecs.utf_32_le_decode(encoded)[0])
629 self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode,
637 codecs.utf_32_be_decode(encoded)[0])
651 _,_,reader,writer = codecs.lookup(self.encoding)
667 f = codecs.getreader(self.encoding)(s)
671 f = codecs.getreader(self.encoding)(s)
697 codecs.utf_16_decode(b'\x01', 'replace', True))
699 codecs.utf_16_decode(b'\x01', 'ignore', True))
702 self.assertRaises(UnicodeDecodeError, codecs.utf_16_decode,
722 with codecs.open(os_helper.TESTFN, 'r',
729 codecs.open(os_helper.TESTFN, mode, encoding=self.encoding)
734 codecs.open(os_helper.TESTFN, mode, encoding=self.encoding)
773 self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode,
817 self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode,
897 self.assertTrue(codecs.lookup_error("surrogatepass"))
917 dec = codecs.getincrementaldecoder(self.encoding)()
1015 self.assertRaises(UnicodeDecodeError, codecs.utf_7_decode,
1058 self.assertRaises(UnicodeDecodeError, codecs.utf_16_ex_decode, b"\xff", "strict", 0, True)
1061 self.assertRaises(TypeError, codecs.utf_16_ex_decode)
1068 codecs.readbuffer_encode(array.array("b", b"spam")),
1073 self.assertEqual(codecs.readbuffer_encode(""), (b"", 0))
1076 self.assertRaises(TypeError, codecs.readbuffer_encode)
1077 self.assertRaises(TypeError, codecs.readbuffer_encode, 42)
1081 BOM = codecs.BOM_UTF8
1116 d = codecs.getincrementaldecoder("utf-8-sig")()
1122 bytestring = codecs.BOM_UTF8 + b"ABC\xC2\xA1\xE2\x88\x80XYZ"
1124 reader = codecs.getreader("utf-8-sig")
1146 reader = codecs.getreader("utf-8-sig")
1167 self.assertEqual(codecs.escape_decode(b""), (b"", 0))
1168 self.assertEqual(codecs.escape_decode(bytearray()), (b"", 0))
1171 decode = codecs.escape_decode
1178 decode = codecs.escape_decode
1217 decode = codecs.escape_decode
1563 r = codecs.getreader("idna")(io.BytesIO(b"abc"))
1569 "".join(codecs.iterdecode((bytes([c]) for c in b"python.org"), "idna")),
1573 "".join(codecs.iterdecode((bytes([c]) for c in b"python.org."), "idna")),
1577 "".join(codecs.iterdecode((bytes([c]) for c in b"xn--pythn-mua.org."), "idna")),
1581 "".join(codecs.iterdecode((bytes([c]) for c in b"xn--pythn-mua.org."), "idna")),
1585 decoder = codecs.getincrementaldecoder("idna")()
1599 b"".join(codecs.iterencode("python.org", "idna")),
1603 b"".join(codecs.iterencode("python.org.", "idna")),
1607 b"".join(codecs.iterencode("pyth\xf6n.org.", "idna")),
1611 b"".join(codecs.iterencode("pyth\xf6n.org.", "idna")),
1615 encoder = codecs.getincrementalencoder("idna")()
1639 self.assertEqual(codecs.decode(b'\xe4\xf6\xfc', 'latin-1'),
1641 self.assertRaises(TypeError, codecs.decode)
1642 self.assertEqual(codecs.decode(b'abc'), 'abc')
1643 self.assertRaises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii')
1646 self.assertEqual(codecs.decode(obj=b'\xe4\xf6\xfc', encoding='latin-1'),
1648 self.assertEqual(codecs.decode(b'[\xff]', 'ascii', errors='ignore'),
1652 self.assertEqual(codecs.encode('\xe4\xf6\xfc', 'latin-1'),
1654 self.assertRaises(TypeError, codecs.encode)
1655 self.assertRaises(LookupError, codecs.encode, "foo", "__spam__")
1656 self.assertEqual(codecs.encode('abc'), b'abc')
1657 self.assertRaises(UnicodeEncodeError, codecs.encode, '\xffff', 'ascii')
1660 self.assertEqual(codecs.encode(obj='\xe4\xf6\xfc', encoding='latin-1'),
1662 self.assertEqual(codecs.encode('[\xff]', 'ascii', errors='ignore'),
1666 self.assertRaises(TypeError, codecs.register)
1667 self.assertRaises(TypeError, codecs.register, 42)
1672 codecs.register(search_function)
1673 self.assertRaises(TypeError, codecs.lookup, name)
1677 codecs.unregister(search_function)
1678 self.assertRaises(LookupError, codecs.lookup, name)
1682 self.assertRaises(TypeError, codecs.lookup)
1683 self.assertRaises(LookupError, codecs.lookup, "__spam__")
1684 self.assertRaises(LookupError, codecs.lookup, " ")
1687 self.assertRaises(TypeError, codecs.getencoder)
1688 self.assertRaises(LookupError, codecs.getencoder, "__spam__")
1691 self.assertRaises(TypeError, codecs.getdecoder)
1692 self.assertRaises(LookupError, codecs.getdecoder, "__spam__")
1695 self.assertRaises(TypeError, codecs.getreader)
1696 self.assertRaises(LookupError, codecs.getreader, "__spam__")
1699 self.assertRaises(TypeError, codecs.getwriter)
1700 self.assertRaises(LookupError, codecs.getwriter, "__spam__")
1703 # Issue #1813: under Turkish locales, lookup of some codecs failed
1712 c = codecs.lookup('ASCII')
1734 self.assertCountEqual(api, codecs.__all__)
1735 for api in codecs.__all__:
1736 getattr(codecs, api)
1742 codecs.open(os_helper.TESTFN, mode, 'ascii') as file:
1743 self.assertIsInstance(file, codecs.StreamReaderWriter)
1746 self.assertRaises(UnicodeError, codecs.encode, 'abc', 'undefined')
1747 self.assertRaises(UnicodeError, codecs.decode, b'abc', 'undefined')
1748 self.assertRaises(UnicodeError, codecs.encode, '', 'undefined')
1749 self.assertRaises(UnicodeError, codecs.decode, b'', 'undefined')
1752 codecs.encode, 'abc', 'undefined', errors)
1754 codecs.decode, b'abc', 'undefined', errors)
1760 codecs.open(os_helper.TESTFN, 'wt', 'invalid-encoding')
1768 self.reader = codecs.getreader('utf-8')
1780 ef = codecs.EncodedFile(f, 'utf-16-le', 'utf-8')
1784 ef = codecs.EncodedFile(f, 'utf-8', 'latin-1')
1892 if hasattr(codecs, "mbcs_encode"):
1894 if hasattr(codecs, "oem_encode"):
1909 s = "abc123" # all codecs should be able to encode these
1911 name = codecs.lookup(encoding).name
1921 (b, size) = codecs.getencoder(encoding)(s)
1923 (chars, size) = codecs.getdecoder(encoding)(b)
1929 writer = codecs.getwriter(encoding)(q)
1937 reader = codecs.getreader(encoding)(q)
1947 encoder = codecs.getincrementalencoder(encoding)()
1956 decoder = codecs.getincrementaldecoder(encoding)()
1965 result = "".join(codecs.iterdecode(
1966 codecs.iterencode(s, encoding), encoding))
1970 result = "".join(codecs.iterdecode(
1971 codecs.iterencode("", encoding), encoding))
1977 encoder = codecs.getincrementalencoder(encoding)("ignore")
1982 decoder = codecs.getincrementaldecoder(encoding)("ignore")
1991 s = "abc123" # all codecs should be able to encode these
2028 # all codecs should be able to encode these
2035 reader = codecs.getreader(encoding)(io.BytesIO(s.encode(encoding)))
2044 decoder = codecs.getdecoder(encoding)
2051 encoder = codecs.getencoder(encoding)
2072 codecs.charmap_decode(b"\x00\x01\x02", "strict", "abc"),
2077 codecs.charmap_decode(b"\x00\x01\x02", "strict", "\U0010FFFFbc"),
2082 codecs.charmap_decode, b"\x00\x01\x02", "strict", "ab"
2086 codecs.charmap_decode, b"\x00\x01\x02", "strict", "ab\ufffe"
2090 codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab"),
2095 codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab\ufffe"),
2100 codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace", "ab"),
2105 codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace", "ab\ufffe"),
2110 codecs.charmap_decode(b"\x00\x01\x02", "ignore", "ab"),
2115 codecs.charmap_decode(b"\x00\x01\x02", "ignore", "ab\ufffe"),
2121 codecs.charmap_decode(allbytes, "ignore", ""),
2127 codecs.charmap_decode(b"\x00\x01\x02", "strict",
2133 codecs.charmap_decode(b"\x00\x01\x02", "strict",
2139 codecs.charmap_decode(b"\x00\x01\x02", "strict",
2145 codecs.charmap_decode(b"\x00\x01\x02", "strict",
2151 codecs.charmap_decode, b"\x00\x01\x02", "strict",
2156 codecs.charmap_decode, b"\x00\x01\x02", "strict",
2162 codecs.charmap_decode, b"\x00\x01\x02", "strict",
2167 codecs.charmap_decode(b"\x00\x01\x02", "replace",
2173 codecs.charmap_decode(b"\x00\x01\x02", "replace",
2180 codecs.charmap_decode(b"\x00\x01\x02", "replace",
2186 codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace",
2192 codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace",
2199 codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace",
2205 codecs.charmap_decode(b"\x00\x01\x02", "ignore",
2211 codecs.charmap_decode(b"\x00\x01\x02", "ignore",
2218 codecs.charmap_decode(b"\x00\x01\x02", "ignore",
2225 codecs.charmap_decode(allbytes, "ignore", {}),
2231 codecs.charmap_decode,
2237 codecs.charmap_decode,
2247 codecs.charmap_decode(b"\x00\x01\x02", "strict",
2254 codecs.charmap_decode(b"\x00\x01\x02", "strict",
2260 codecs.charmap_decode(b"\x00\x01\x02", "strict",
2266 codecs.charmap_decode, b"\x00\x01\x02", "strict",
2271 codecs.charmap_decode, b"\x00\x01\x02", "strict",
2276 codecs.charmap_decode, b"\x00\x01\x02", "strict",
2281 codecs.charmap_decode(b"\x00\x01\x02", "replace",
2287 codecs.charmap_decode(b"\x00\x01\x02", "replace",
2293 codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace",
2299 codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace",
2305 codecs.charmap_decode(b"\x00\x01\x02", "ignore",
2311 codecs.charmap_decode(b"\x00\x01\x02", "ignore",
2320 with codecs.EncodedFile(f, "latin-1", "utf-8") as ef:
2326 info = codecs.lookup("utf-8")
2327 with codecs.StreamReaderWriter(f, info.streamreader,
2336 codecs.utf_7_decode,
2337 codecs.utf_8_decode,
2338 codecs.utf_16_le_decode,
2339 codecs.utf_16_be_decode,
2340 codecs.utf_16_ex_decode,
2341 codecs.utf_32_decode,
2342 codecs.utf_32_le_decode,
2343 codecs.utf_32_be_decode,
2344 codecs.utf_32_ex_decode,
2345 codecs.latin_1_decode,
2346 codecs.ascii_decode,
2347 codecs.charmap_decode,
2349 if hasattr(codecs, "mbcs_decode"):
2350 decoders.append(codecs.mbcs_decode)
2357 self.assertEqual(codecs.unicode_escape_decode(r"\u1234"), ("\u1234", 6))
2358 self.assertEqual(codecs.unicode_escape_decode(br"\u1234"), ("\u1234", 6))
2359 self.assertEqual(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
2360 self.assertEqual(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
2362 self.assertRaises(UnicodeDecodeError, codecs.unicode_escape_decode, br"\U00110000")
2363 self.assertEqual(codecs.unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10))
2364 self.assertEqual(codecs.unicode_escape_decode(r"\U00110000", "backslashreplace"),
2367 self.assertRaises(UnicodeDecodeError, codecs.raw_unicode_escape_decode, br"\U00110000")
2368 self.assertEqual(codecs.raw_unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10))
2369 self.assertEqual(codecs.raw_unicode_escape_decode(r"\U00110000", "backslashreplace"),
2379 self.assertEqual(codecs.unicode_escape_encode(""), (b"", 0))
2380 self.assertEqual(codecs.unicode_escape_decode(b""), ("", 0))
2383 encode = codecs.unicode_escape_encode
2389 decode = codecs.unicode_escape_decode
2395 encode = codecs.unicode_escape_encode
2410 decode = codecs.unicode_escape_decode
2452 decode = codecs.unicode_escape_decode
2512 self.assertEqual(codecs.raw_unicode_escape_encode(""), (b"", 0))
2513 self.assertEqual(codecs.raw_unicode_escape_decode(b""), ("", 0))
2516 encode = codecs.raw_unicode_escape_encode
2521 decode = codecs.raw_unicode_escape_decode
2526 encode = codecs.raw_unicode_escape_encode
2535 decode = codecs.raw_unicode_escape_decode
2544 decode = codecs.raw_unicode_escape_decode
2604 self.assertEqual(codecs.escape_encode(data), output)
2605 self.assertRaises(TypeError, codecs.escape_encode, 'spam')
2606 self.assertRaises(TypeError, codecs.escape_encode, bytearray(b'spam'))
2655 with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f:
2664 with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f:
2673 with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f:
2683 with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f:
2692 with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f:
2737 # generic codecs interface
2738 (o, size) = codecs.getencoder(encoding)(binput)
2740 (i, size) = codecs.getdecoder(encoding)(o)
2747 sin = codecs.encode(b"\x80", encoding)
2748 reader = codecs.getreader(encoding)(io.BytesIO(sin))
2755 sin = codecs.encode(b"\x80", encoding)
2756 reader = codecs.getreader(encoding)(io.BytesIO(sin))
2761 # We check all the transform codecs accept memoryview input
2769 data = codecs.encode(data, encoding)
2770 view_encoded = codecs.encode(view, encoding)
2773 data = codecs.decode(data, encoding)
2775 view_decoded = codecs.decode(view, encoding)
2779 # Check binary -> binary codecs give a good error for str input
2784 r"use codecs.encode\(\) to handle arbitrary codecs")
2791 # Check str.encode gives a good error message for str -> str codecs
2793 r"use codecs.encode\(\) to handle arbitrary codecs")
2799 # message for binary -> binary codecs
2803 encoded_data = codecs.encode(data, encoding)
2805 r"use codecs.decode\(\) to handle arbitrary codecs")
2817 r"use codecs.decode\(\) to handle arbitrary codecs")
2827 codecs.decode(b"hello", "zlib_codec")
2835 codecs.decode(b"hello", "hex_codec")
2845 expected_name = codecs.lookup(codec_name).name
2848 info = codecs.lookup(alias)
2853 encoded = codecs.encode(b"space tab\teol \n", "quopri-codec")
2857 self.assertEqual(codecs.decode(unescaped, "quopri-codec"), unescaped)
2861 self.assertRaises(ValueError, codecs.decode, b"", "uu-codec")
2882 codecs.register(_get_test_codec)
2883 self.addCleanup(codecs.unregister, _get_test_codec)
2889 # need to ensure the codecs call back in to the instance to find
2900 codec_info = codecs.CodecInfo(encode, decode,
2923 codecs.encode("str_input", self.codec_name)
2927 codecs.decode(b"bytes input", self.codec_name)
2955 codecs.encode("str input", self.codec_name)
2959 codecs.decode(b"bytes input", self.codec_name)
2993 codecs.encode("str input", self.codec_name)
2997 codecs.decode(b"bytes input", self.codec_name)
3000 # The stdlib non-text codecs are now marked so they're
3002 # However, third party codecs won't be flagged, so we still make
3010 # No input or output type checks on the codecs module functions
3011 encoded = codecs.encode(None, self.codec_name)
3013 decoded = codecs.decode(None, self.codec_name)
3017 r"use codecs.encode\(\) to encode to arbitrary types$")
3022 r"use codecs.decode\(\) to decode to arbitrary types$")
3035 self.assertRaises(ValueError, codecs.code_page_encode, -1, 'a')
3036 self.assertRaises(ValueError, codecs.code_page_decode, -1, b'a')
3037 self.assertRaises(OSError, codecs.code_page_encode, 123, 'a')
3038 self.assertRaises(OSError, codecs.code_page_decode, 123, b'a')
3042 codecs.code_page_encode, 932, '\xff')
3044 codecs.code_page_decode, 932, b'\x81\x00', 'strict', True)
3046 codecs.code_page_decode, self.CP_UTF8, b'\xff', 'strict', True)
3052 decoded = codecs.code_page_decode(cp, raw, errors, True)
3064 codecs.code_page_decode, cp, raw, errors, True)
3070 encoded = codecs.code_page_encode(cp, text, errors)
3080 codecs.code_page_encode, cp, text, errors)
3179 self.assertEqual(codecs.code_page_decode(cp, b'abc'), ('abc', 3), f'cp{cp}')
3183 self.assertEqual(codecs.code_page_decode(42, b'abc'),
3187 decoded = codecs.code_page_decode(932, b'\x82', 'strict', False)
3190 decoded = codecs.code_page_decode(932,
3195 decoded = codecs.code_page_decode(932,
3200 decoded = codecs.code_page_decode(932,
3212 codec = codecs.lookup(name)
3215 codecs.unregister(name)
3225 decoded = codecs.code_page_decode(932, encoded, 'surrogateescape', True)
3242 decoded = codecs.code_page_decode(65001, encoded, 'ignore', True)
3329 codec = codecs.lookup('ascii')
3330 sr = codecs.StreamRecoder(bio, codec.encode, codec.decode,
3337 codec = codecs.lookup('latin1')
3339 sr = codecs.StreamRecoder(bio, codec.encode, codec.decode,
3348 sr = codecs.EncodedFile(bio, 'utf-8', 'utf-16-le')
3359 sr = codecs.EncodedFile(bio, 'utf-8', 'utf-16-le')
3489 ciphertext = codecs.encode("Caesar liked ciphers", 'rot-13')
3493 plaintext = codecs.decode('Rg gh, Oehgr?', 'rot-13')
3497 encoder = codecs.getincrementalencoder('rot-13')()
3502 decoder = codecs.getincrementaldecoder('rot-13')()
3534 codecs.register(search_function)
3535 self.addCleanup(codecs.unregister, search_function)
3536 self.assertEqual(FOUND, codecs.lookup('aaa_8'))
3537 self.assertEqual(FOUND, codecs.lookup('AAA-8'))
3538 self.assertEqual(FOUND, codecs.lookup('AAA---8'))
3539 self.assertEqual(FOUND, codecs.lookup('AAA 8'))
3540 self.assertEqual(FOUND, codecs.lookup('aaa\xe9\u20ac-8'))
3541 self.assertEqual(NOT_FOUND, codecs.lookup('AAA.8'))
3542 self.assertEqual(NOT_FOUND, codecs.lookup('AAA...8'))
3543 self.assertEqual(NOT_FOUND, codecs.lookup('BBB-8'))
3544 self.assertEqual(NOT_FOUND, codecs.lookup('BBB.8'))
3545 self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))