17db96d56Sopenharmony_ci# 27db96d56Sopenharmony_ci# test_codecencodings_cn.py 37db96d56Sopenharmony_ci# Codec encoding tests for PRC encodings. 47db96d56Sopenharmony_ci# 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_cifrom test import multibytecodec_support 77db96d56Sopenharmony_ciimport unittest 87db96d56Sopenharmony_ci 97db96d56Sopenharmony_ciclass Test_GB2312(multibytecodec_support.TestBase, unittest.TestCase): 107db96d56Sopenharmony_ci encoding = 'gb2312' 117db96d56Sopenharmony_ci tstring = multibytecodec_support.load_teststring('gb2312') 127db96d56Sopenharmony_ci codectests = ( 137db96d56Sopenharmony_ci # invalid bytes 147db96d56Sopenharmony_ci (b"abc\x81\x81\xc1\xc4", "strict", None), 157db96d56Sopenharmony_ci (b"abc\xc8", "strict", None), 167db96d56Sopenharmony_ci (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), 177db96d56Sopenharmony_ci (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), 187db96d56Sopenharmony_ci (b"abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"), 197db96d56Sopenharmony_ci (b"\xc1\x64", "strict", None), 207db96d56Sopenharmony_ci ) 217db96d56Sopenharmony_ci 227db96d56Sopenharmony_ciclass Test_GBK(multibytecodec_support.TestBase, unittest.TestCase): 237db96d56Sopenharmony_ci encoding = 'gbk' 247db96d56Sopenharmony_ci tstring = multibytecodec_support.load_teststring('gbk') 257db96d56Sopenharmony_ci codectests = ( 267db96d56Sopenharmony_ci # invalid bytes 277db96d56Sopenharmony_ci (b"abc\x80\x80\xc1\xc4", "strict", None), 287db96d56Sopenharmony_ci (b"abc\xc8", "strict", None), 297db96d56Sopenharmony_ci (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), 307db96d56Sopenharmony_ci (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), 317db96d56Sopenharmony_ci (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"), 327db96d56Sopenharmony_ci (b"\x83\x34\x83\x31", "strict", None), 337db96d56Sopenharmony_ci ("\u30fb", "strict", None), 347db96d56Sopenharmony_ci ) 357db96d56Sopenharmony_ci 367db96d56Sopenharmony_ciclass Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase): 377db96d56Sopenharmony_ci encoding = 'gb18030' 387db96d56Sopenharmony_ci tstring = multibytecodec_support.load_teststring('gb18030') 397db96d56Sopenharmony_ci codectests = ( 407db96d56Sopenharmony_ci # invalid bytes 417db96d56Sopenharmony_ci (b"abc\x80\x80\xc1\xc4", "strict", None), 427db96d56Sopenharmony_ci (b"abc\xc8", "strict", None), 437db96d56Sopenharmony_ci (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), 447db96d56Sopenharmony_ci (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), 457db96d56Sopenharmony_ci (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"), 467db96d56Sopenharmony_ci (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"), 477db96d56Sopenharmony_ci ("\u30fb", "strict", b"\x819\xa79"), 487db96d56Sopenharmony_ci (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'), 497db96d56Sopenharmony_ci (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'), 507db96d56Sopenharmony_ci (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'), 517db96d56Sopenharmony_ci # issue29990 527db96d56Sopenharmony_ci (b"\xff\x30\x81\x30", "strict", None), 537db96d56Sopenharmony_ci (b"\x81\x30\xff\x30", "strict", None), 547db96d56Sopenharmony_ci (b"abc\x81\x39\xff\x39\xc1\xc4", "replace", "abc\ufffd\x39\ufffd\x39\u804a"), 557db96d56Sopenharmony_ci (b"abc\xab\x36\xff\x30def", "replace", 'abc\ufffd\x36\ufffd\x30def'), 567db96d56Sopenharmony_ci (b"abc\xbf\x38\xff\x32\xc1\xc4", "ignore", "abc\x38\x32\u804a"), 577db96d56Sopenharmony_ci ) 587db96d56Sopenharmony_ci has_iso10646 = True 597db96d56Sopenharmony_ci 607db96d56Sopenharmony_ciclass Test_HZ(multibytecodec_support.TestBase, unittest.TestCase): 617db96d56Sopenharmony_ci encoding = 'hz' 627db96d56Sopenharmony_ci tstring = multibytecodec_support.load_teststring('hz') 637db96d56Sopenharmony_ci codectests = ( 647db96d56Sopenharmony_ci # test '~\n' (3 lines) 657db96d56Sopenharmony_ci (b'This sentence is in ASCII.\n' 667db96d56Sopenharmony_ci b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n' 677db96d56Sopenharmony_ci b'~{NpJ)l6HK!#~}Bye.\n', 687db96d56Sopenharmony_ci 'strict', 697db96d56Sopenharmony_ci 'This sentence is in ASCII.\n' 707db96d56Sopenharmony_ci 'The next sentence is in GB.' 717db96d56Sopenharmony_ci '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' 727db96d56Sopenharmony_ci 'Bye.\n'), 737db96d56Sopenharmony_ci # test '~\n' (4 lines) 747db96d56Sopenharmony_ci (b'This sentence is in ASCII.\n' 757db96d56Sopenharmony_ci b'The next sentence is in GB.~\n' 767db96d56Sopenharmony_ci b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n' 777db96d56Sopenharmony_ci b'Bye.\n', 787db96d56Sopenharmony_ci 'strict', 797db96d56Sopenharmony_ci 'This sentence is in ASCII.\n' 807db96d56Sopenharmony_ci 'The next sentence is in GB.' 817db96d56Sopenharmony_ci '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' 827db96d56Sopenharmony_ci 'Bye.\n'), 837db96d56Sopenharmony_ci # invalid bytes 847db96d56Sopenharmony_ci (b'ab~cd', 'replace', 'ab\uFFFDcd'), 857db96d56Sopenharmony_ci (b'ab\xffcd', 'replace', 'ab\uFFFDcd'), 867db96d56Sopenharmony_ci (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'), 877db96d56Sopenharmony_ci (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'), 887db96d56Sopenharmony_ci (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"), 897db96d56Sopenharmony_ci # issue 30003 907db96d56Sopenharmony_ci ('ab~cd', 'strict', b'ab~~cd'), # escape ~ 917db96d56Sopenharmony_ci (b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode 927db96d56Sopenharmony_ci (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode 937db96d56Sopenharmony_ci ) 947db96d56Sopenharmony_ci 957db96d56Sopenharmony_ciif __name__ == "__main__": 967db96d56Sopenharmony_ci unittest.main() 97