17db96d56Sopenharmony_ciimport difflib 27db96d56Sopenharmony_cifrom test.support import findfile 37db96d56Sopenharmony_ciimport unittest 47db96d56Sopenharmony_ciimport doctest 57db96d56Sopenharmony_ciimport sys 67db96d56Sopenharmony_ci 77db96d56Sopenharmony_ci 87db96d56Sopenharmony_ciclass TestWithAscii(unittest.TestCase): 97db96d56Sopenharmony_ci def test_one_insert(self): 107db96d56Sopenharmony_ci sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100) 117db96d56Sopenharmony_ci self.assertAlmostEqual(sm.ratio(), 0.995, places=3) 127db96d56Sopenharmony_ci self.assertEqual(list(sm.get_opcodes()), 137db96d56Sopenharmony_ci [ ('insert', 0, 0, 0, 1), 147db96d56Sopenharmony_ci ('equal', 0, 100, 1, 101)]) 157db96d56Sopenharmony_ci self.assertEqual(sm.bpopular, set()) 167db96d56Sopenharmony_ci sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50) 177db96d56Sopenharmony_ci self.assertAlmostEqual(sm.ratio(), 0.995, places=3) 187db96d56Sopenharmony_ci self.assertEqual(list(sm.get_opcodes()), 197db96d56Sopenharmony_ci [ ('equal', 0, 50, 0, 50), 207db96d56Sopenharmony_ci ('insert', 50, 50, 50, 51), 217db96d56Sopenharmony_ci ('equal', 50, 100, 51, 101)]) 227db96d56Sopenharmony_ci self.assertEqual(sm.bpopular, set()) 237db96d56Sopenharmony_ci 247db96d56Sopenharmony_ci def test_one_delete(self): 257db96d56Sopenharmony_ci sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40) 267db96d56Sopenharmony_ci self.assertAlmostEqual(sm.ratio(), 0.994, places=3) 277db96d56Sopenharmony_ci self.assertEqual(list(sm.get_opcodes()), 287db96d56Sopenharmony_ci [ ('equal', 0, 40, 0, 40), 297db96d56Sopenharmony_ci ('delete', 40, 41, 40, 40), 307db96d56Sopenharmony_ci ('equal', 41, 81, 40, 80)]) 317db96d56Sopenharmony_ci 327db96d56Sopenharmony_ci def test_bjunk(self): 337db96d56Sopenharmony_ci sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ', 347db96d56Sopenharmony_ci a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40) 357db96d56Sopenharmony_ci self.assertEqual(sm.bjunk, set()) 367db96d56Sopenharmony_ci 377db96d56Sopenharmony_ci sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ', 387db96d56Sopenharmony_ci a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20) 397db96d56Sopenharmony_ci self.assertEqual(sm.bjunk, {' '}) 407db96d56Sopenharmony_ci 417db96d56Sopenharmony_ci sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'], 427db96d56Sopenharmony_ci a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20) 437db96d56Sopenharmony_ci self.assertEqual(sm.bjunk, {' ', 'b'}) 447db96d56Sopenharmony_ci 457db96d56Sopenharmony_ci 467db96d56Sopenharmony_ciclass TestAutojunk(unittest.TestCase): 477db96d56Sopenharmony_ci """Tests for the autojunk parameter added in 2.7""" 487db96d56Sopenharmony_ci def test_one_insert_homogenous_sequence(self): 497db96d56Sopenharmony_ci # By default autojunk=True and the heuristic kicks in for a sequence 507db96d56Sopenharmony_ci # of length 200+ 517db96d56Sopenharmony_ci seq1 = 'b' * 200 527db96d56Sopenharmony_ci seq2 = 'a' + 'b' * 200 537db96d56Sopenharmony_ci 547db96d56Sopenharmony_ci sm = difflib.SequenceMatcher(None, seq1, seq2) 557db96d56Sopenharmony_ci self.assertAlmostEqual(sm.ratio(), 0, places=3) 567db96d56Sopenharmony_ci self.assertEqual(sm.bpopular, {'b'}) 577db96d56Sopenharmony_ci 587db96d56Sopenharmony_ci # Now turn the heuristic off 597db96d56Sopenharmony_ci sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False) 607db96d56Sopenharmony_ci self.assertAlmostEqual(sm.ratio(), 0.9975, places=3) 617db96d56Sopenharmony_ci self.assertEqual(sm.bpopular, set()) 627db96d56Sopenharmony_ci 637db96d56Sopenharmony_ci 647db96d56Sopenharmony_ciclass TestSFbugs(unittest.TestCase): 657db96d56Sopenharmony_ci def test_ratio_for_null_seqn(self): 667db96d56Sopenharmony_ci # Check clearing of SF bug 763023 677db96d56Sopenharmony_ci s = difflib.SequenceMatcher(None, [], []) 687db96d56Sopenharmony_ci self.assertEqual(s.ratio(), 1) 697db96d56Sopenharmony_ci self.assertEqual(s.quick_ratio(), 1) 707db96d56Sopenharmony_ci self.assertEqual(s.real_quick_ratio(), 1) 717db96d56Sopenharmony_ci 727db96d56Sopenharmony_ci def test_comparing_empty_lists(self): 737db96d56Sopenharmony_ci # Check fix for bug #979794 747db96d56Sopenharmony_ci group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes() 757db96d56Sopenharmony_ci self.assertRaises(StopIteration, next, group_gen) 767db96d56Sopenharmony_ci diff_gen = difflib.unified_diff([], []) 777db96d56Sopenharmony_ci self.assertRaises(StopIteration, next, diff_gen) 787db96d56Sopenharmony_ci 797db96d56Sopenharmony_ci def test_matching_blocks_cache(self): 807db96d56Sopenharmony_ci # Issue #21635 817db96d56Sopenharmony_ci s = difflib.SequenceMatcher(None, "abxcd", "abcd") 827db96d56Sopenharmony_ci first = s.get_matching_blocks() 837db96d56Sopenharmony_ci second = s.get_matching_blocks() 847db96d56Sopenharmony_ci self.assertEqual(second[0].size, 2) 857db96d56Sopenharmony_ci self.assertEqual(second[1].size, 2) 867db96d56Sopenharmony_ci self.assertEqual(second[2].size, 0) 877db96d56Sopenharmony_ci 887db96d56Sopenharmony_ci def test_added_tab_hint(self): 897db96d56Sopenharmony_ci # Check fix for bug #1488943 907db96d56Sopenharmony_ci diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"])) 917db96d56Sopenharmony_ci self.assertEqual("- \tI am a buggy", diff[0]) 927db96d56Sopenharmony_ci self.assertEqual("? \t --\n", diff[1]) 937db96d56Sopenharmony_ci self.assertEqual("+ \t\tI am a bug", diff[2]) 947db96d56Sopenharmony_ci self.assertEqual("? +\n", diff[3]) 957db96d56Sopenharmony_ci 967db96d56Sopenharmony_ci def test_hint_indented_properly_with_tabs(self): 977db96d56Sopenharmony_ci diff = list(difflib.Differ().compare(["\t \t \t^"], ["\t \t \t^\n"])) 987db96d56Sopenharmony_ci self.assertEqual("- \t \t \t^", diff[0]) 997db96d56Sopenharmony_ci self.assertEqual("+ \t \t \t^\n", diff[1]) 1007db96d56Sopenharmony_ci self.assertEqual("? \t \t \t +\n", diff[2]) 1017db96d56Sopenharmony_ci 1027db96d56Sopenharmony_ci def test_mdiff_catch_stop_iteration(self): 1037db96d56Sopenharmony_ci # Issue #33224 1047db96d56Sopenharmony_ci self.assertEqual( 1057db96d56Sopenharmony_ci list(difflib._mdiff(["2"], ["3"], 1)), 1067db96d56Sopenharmony_ci [((1, '\x00-2\x01'), (1, '\x00+3\x01'), True)], 1077db96d56Sopenharmony_ci ) 1087db96d56Sopenharmony_ci 1097db96d56Sopenharmony_ci 1107db96d56Sopenharmony_cipatch914575_from1 = """ 1117db96d56Sopenharmony_ci 1. Beautiful is beTTer than ugly. 1127db96d56Sopenharmony_ci 2. Explicit is better than implicit. 1137db96d56Sopenharmony_ci 3. Simple is better than complex. 1147db96d56Sopenharmony_ci 4. Complex is better than complicated. 1157db96d56Sopenharmony_ci""" 1167db96d56Sopenharmony_ci 1177db96d56Sopenharmony_cipatch914575_to1 = """ 1187db96d56Sopenharmony_ci 1. Beautiful is better than ugly. 1197db96d56Sopenharmony_ci 3. Simple is better than complex. 1207db96d56Sopenharmony_ci 4. Complicated is better than complex. 1217db96d56Sopenharmony_ci 5. Flat is better than nested. 1227db96d56Sopenharmony_ci""" 1237db96d56Sopenharmony_ci 1247db96d56Sopenharmony_cipatch914575_nonascii_from1 = """ 1257db96d56Sopenharmony_ci 1. Beautiful is beTTer than ugly. 1267db96d56Sopenharmony_ci 2. Explicit is better than ımplıcıt. 1277db96d56Sopenharmony_ci 3. Simple is better than complex. 1287db96d56Sopenharmony_ci 4. Complex is better than complicated. 1297db96d56Sopenharmony_ci""" 1307db96d56Sopenharmony_ci 1317db96d56Sopenharmony_cipatch914575_nonascii_to1 = """ 1327db96d56Sopenharmony_ci 1. Beautiful is better than ügly. 1337db96d56Sopenharmony_ci 3. Sımple is better than complex. 1347db96d56Sopenharmony_ci 4. Complicated is better than cömplex. 1357db96d56Sopenharmony_ci 5. Flat is better than nested. 1367db96d56Sopenharmony_ci""" 1377db96d56Sopenharmony_ci 1387db96d56Sopenharmony_cipatch914575_from2 = """ 1397db96d56Sopenharmony_ci\t\tLine 1: preceded by from:[tt] to:[ssss] 1407db96d56Sopenharmony_ci \t\tLine 2: preceded by from:[sstt] to:[sssst] 1417db96d56Sopenharmony_ci \t \tLine 3: preceded by from:[sstst] to:[ssssss] 1427db96d56Sopenharmony_ciLine 4: \thas from:[sst] to:[sss] after : 1437db96d56Sopenharmony_ciLine 5: has from:[t] to:[ss] at end\t 1447db96d56Sopenharmony_ci""" 1457db96d56Sopenharmony_ci 1467db96d56Sopenharmony_cipatch914575_to2 = """ 1477db96d56Sopenharmony_ci Line 1: preceded by from:[tt] to:[ssss] 1487db96d56Sopenharmony_ci \tLine 2: preceded by from:[sstt] to:[sssst] 1497db96d56Sopenharmony_ci Line 3: preceded by from:[sstst] to:[ssssss] 1507db96d56Sopenharmony_ciLine 4: has from:[sst] to:[sss] after : 1517db96d56Sopenharmony_ciLine 5: has from:[t] to:[ss] at end 1527db96d56Sopenharmony_ci""" 1537db96d56Sopenharmony_ci 1547db96d56Sopenharmony_cipatch914575_from3 = """line 0 1557db96d56Sopenharmony_ci1234567890123456789012345689012345 1567db96d56Sopenharmony_ciline 1 1577db96d56Sopenharmony_ciline 2 1587db96d56Sopenharmony_ciline 3 1597db96d56Sopenharmony_ciline 4 changed 1607db96d56Sopenharmony_ciline 5 changed 1617db96d56Sopenharmony_ciline 6 changed 1627db96d56Sopenharmony_ciline 7 1637db96d56Sopenharmony_ciline 8 subtracted 1647db96d56Sopenharmony_ciline 9 1657db96d56Sopenharmony_ci1234567890123456789012345689012345 1667db96d56Sopenharmony_cishort line 1677db96d56Sopenharmony_cijust fits in!! 1687db96d56Sopenharmony_cijust fits in two lines yup!! 1697db96d56Sopenharmony_cithe end""" 1707db96d56Sopenharmony_ci 1717db96d56Sopenharmony_cipatch914575_to3 = """line 0 1727db96d56Sopenharmony_ci1234567890123456789012345689012345 1737db96d56Sopenharmony_ciline 1 1747db96d56Sopenharmony_ciline 2 added 1757db96d56Sopenharmony_ciline 3 1767db96d56Sopenharmony_ciline 4 chanGEd 1777db96d56Sopenharmony_ciline 5a chanGed 1787db96d56Sopenharmony_ciline 6a changEd 1797db96d56Sopenharmony_ciline 7 1807db96d56Sopenharmony_ciline 8 1817db96d56Sopenharmony_ciline 9 1827db96d56Sopenharmony_ci1234567890 1837db96d56Sopenharmony_cianother long line that needs to be wrapped 1847db96d56Sopenharmony_cijust fitS in!! 1857db96d56Sopenharmony_cijust fits in two lineS yup!! 1867db96d56Sopenharmony_cithe end""" 1877db96d56Sopenharmony_ci 1887db96d56Sopenharmony_ciclass TestSFpatches(unittest.TestCase): 1897db96d56Sopenharmony_ci 1907db96d56Sopenharmony_ci def test_html_diff(self): 1917db96d56Sopenharmony_ci # Check SF patch 914575 for generating HTML differences 1927db96d56Sopenharmony_ci f1a = ((patch914575_from1 + '123\n'*10)*3) 1937db96d56Sopenharmony_ci t1a = (patch914575_to1 + '123\n'*10)*3 1947db96d56Sopenharmony_ci f1b = '456\n'*10 + f1a 1957db96d56Sopenharmony_ci t1b = '456\n'*10 + t1a 1967db96d56Sopenharmony_ci f1a = f1a.splitlines() 1977db96d56Sopenharmony_ci t1a = t1a.splitlines() 1987db96d56Sopenharmony_ci f1b = f1b.splitlines() 1997db96d56Sopenharmony_ci t1b = t1b.splitlines() 2007db96d56Sopenharmony_ci f2 = patch914575_from2.splitlines() 2017db96d56Sopenharmony_ci t2 = patch914575_to2.splitlines() 2027db96d56Sopenharmony_ci f3 = patch914575_from3 2037db96d56Sopenharmony_ci t3 = patch914575_to3 2047db96d56Sopenharmony_ci i = difflib.HtmlDiff() 2057db96d56Sopenharmony_ci j = difflib.HtmlDiff(tabsize=2) 2067db96d56Sopenharmony_ci k = difflib.HtmlDiff(wrapcolumn=14) 2077db96d56Sopenharmony_ci 2087db96d56Sopenharmony_ci full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5) 2097db96d56Sopenharmony_ci tables = '\n'.join( 2107db96d56Sopenharmony_ci [ 2117db96d56Sopenharmony_ci '<h2>Context (first diff within numlines=5(default))</h2>', 2127db96d56Sopenharmony_ci i.make_table(f1a,t1a,'from','to',context=True), 2137db96d56Sopenharmony_ci '<h2>Context (first diff after numlines=5(default))</h2>', 2147db96d56Sopenharmony_ci i.make_table(f1b,t1b,'from','to',context=True), 2157db96d56Sopenharmony_ci '<h2>Context (numlines=6)</h2>', 2167db96d56Sopenharmony_ci i.make_table(f1a,t1a,'from','to',context=True,numlines=6), 2177db96d56Sopenharmony_ci '<h2>Context (numlines=0)</h2>', 2187db96d56Sopenharmony_ci i.make_table(f1a,t1a,'from','to',context=True,numlines=0), 2197db96d56Sopenharmony_ci '<h2>Same Context</h2>', 2207db96d56Sopenharmony_ci i.make_table(f1a,f1a,'from','to',context=True), 2217db96d56Sopenharmony_ci '<h2>Same Full</h2>', 2227db96d56Sopenharmony_ci i.make_table(f1a,f1a,'from','to',context=False), 2237db96d56Sopenharmony_ci '<h2>Empty Context</h2>', 2247db96d56Sopenharmony_ci i.make_table([],[],'from','to',context=True), 2257db96d56Sopenharmony_ci '<h2>Empty Full</h2>', 2267db96d56Sopenharmony_ci i.make_table([],[],'from','to',context=False), 2277db96d56Sopenharmony_ci '<h2>tabsize=2</h2>', 2287db96d56Sopenharmony_ci j.make_table(f2,t2), 2297db96d56Sopenharmony_ci '<h2>tabsize=default</h2>', 2307db96d56Sopenharmony_ci i.make_table(f2,t2), 2317db96d56Sopenharmony_ci '<h2>Context (wrapcolumn=14,numlines=0)</h2>', 2327db96d56Sopenharmony_ci k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0), 2337db96d56Sopenharmony_ci '<h2>wrapcolumn=14,splitlines()</h2>', 2347db96d56Sopenharmony_ci k.make_table(f3.splitlines(),t3.splitlines()), 2357db96d56Sopenharmony_ci '<h2>wrapcolumn=14,splitlines(True)</h2>', 2367db96d56Sopenharmony_ci k.make_table(f3.splitlines(True),t3.splitlines(True)), 2377db96d56Sopenharmony_ci ]) 2387db96d56Sopenharmony_ci actual = full.replace('</body>','\n%s\n</body>' % tables) 2397db96d56Sopenharmony_ci 2407db96d56Sopenharmony_ci # temporarily uncomment next two lines to baseline this test 2417db96d56Sopenharmony_ci #with open('test_difflib_expect.html','w') as fp: 2427db96d56Sopenharmony_ci # fp.write(actual) 2437db96d56Sopenharmony_ci 2447db96d56Sopenharmony_ci with open(findfile('test_difflib_expect.html'), encoding="utf-8") as fp: 2457db96d56Sopenharmony_ci self.assertEqual(actual, fp.read()) 2467db96d56Sopenharmony_ci 2477db96d56Sopenharmony_ci def test_recursion_limit(self): 2487db96d56Sopenharmony_ci # Check if the problem described in patch #1413711 exists. 2497db96d56Sopenharmony_ci limit = sys.getrecursionlimit() 2507db96d56Sopenharmony_ci old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)] 2517db96d56Sopenharmony_ci new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)] 2527db96d56Sopenharmony_ci difflib.SequenceMatcher(None, old, new).get_opcodes() 2537db96d56Sopenharmony_ci 2547db96d56Sopenharmony_ci def test_make_file_default_charset(self): 2557db96d56Sopenharmony_ci html_diff = difflib.HtmlDiff() 2567db96d56Sopenharmony_ci output = html_diff.make_file(patch914575_from1.splitlines(), 2577db96d56Sopenharmony_ci patch914575_to1.splitlines()) 2587db96d56Sopenharmony_ci self.assertIn('content="text/html; charset=utf-8"', output) 2597db96d56Sopenharmony_ci 2607db96d56Sopenharmony_ci def test_make_file_iso88591_charset(self): 2617db96d56Sopenharmony_ci html_diff = difflib.HtmlDiff() 2627db96d56Sopenharmony_ci output = html_diff.make_file(patch914575_from1.splitlines(), 2637db96d56Sopenharmony_ci patch914575_to1.splitlines(), 2647db96d56Sopenharmony_ci charset='iso-8859-1') 2657db96d56Sopenharmony_ci self.assertIn('content="text/html; charset=iso-8859-1"', output) 2667db96d56Sopenharmony_ci 2677db96d56Sopenharmony_ci def test_make_file_usascii_charset_with_nonascii_input(self): 2687db96d56Sopenharmony_ci html_diff = difflib.HtmlDiff() 2697db96d56Sopenharmony_ci output = html_diff.make_file(patch914575_nonascii_from1.splitlines(), 2707db96d56Sopenharmony_ci patch914575_nonascii_to1.splitlines(), 2717db96d56Sopenharmony_ci charset='us-ascii') 2727db96d56Sopenharmony_ci self.assertIn('content="text/html; charset=us-ascii"', output) 2737db96d56Sopenharmony_ci self.assertIn('ımplıcıt', output) 2747db96d56Sopenharmony_ci 2757db96d56Sopenharmony_ci 2767db96d56Sopenharmony_ciclass TestOutputFormat(unittest.TestCase): 2777db96d56Sopenharmony_ci def test_tab_delimiter(self): 2787db96d56Sopenharmony_ci args = ['one', 'two', 'Original', 'Current', 2797db96d56Sopenharmony_ci '2005-01-26 23:30:50', '2010-04-02 10:20:52'] 2807db96d56Sopenharmony_ci ud = difflib.unified_diff(*args, lineterm='') 2817db96d56Sopenharmony_ci self.assertEqual(list(ud)[0:2], [ 2827db96d56Sopenharmony_ci "--- Original\t2005-01-26 23:30:50", 2837db96d56Sopenharmony_ci "+++ Current\t2010-04-02 10:20:52"]) 2847db96d56Sopenharmony_ci cd = difflib.context_diff(*args, lineterm='') 2857db96d56Sopenharmony_ci self.assertEqual(list(cd)[0:2], [ 2867db96d56Sopenharmony_ci "*** Original\t2005-01-26 23:30:50", 2877db96d56Sopenharmony_ci "--- Current\t2010-04-02 10:20:52"]) 2887db96d56Sopenharmony_ci 2897db96d56Sopenharmony_ci def test_no_trailing_tab_on_empty_filedate(self): 2907db96d56Sopenharmony_ci args = ['one', 'two', 'Original', 'Current'] 2917db96d56Sopenharmony_ci ud = difflib.unified_diff(*args, lineterm='') 2927db96d56Sopenharmony_ci self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"]) 2937db96d56Sopenharmony_ci 2947db96d56Sopenharmony_ci cd = difflib.context_diff(*args, lineterm='') 2957db96d56Sopenharmony_ci self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"]) 2967db96d56Sopenharmony_ci 2977db96d56Sopenharmony_ci def test_range_format_unified(self): 2987db96d56Sopenharmony_ci # Per the diff spec at http://www.unix.org/single_unix_specification/ 2997db96d56Sopenharmony_ci spec = '''\ 3007db96d56Sopenharmony_ci Each <range> field shall be of the form: 3017db96d56Sopenharmony_ci %1d", <beginning line number> if the range contains exactly one line, 3027db96d56Sopenharmony_ci and: 3037db96d56Sopenharmony_ci "%1d,%1d", <beginning line number>, <number of lines> otherwise. 3047db96d56Sopenharmony_ci If a range is empty, its beginning line number shall be the number of 3057db96d56Sopenharmony_ci the line just before the range, or 0 if the empty range starts the file. 3067db96d56Sopenharmony_ci ''' 3077db96d56Sopenharmony_ci fmt = difflib._format_range_unified 3087db96d56Sopenharmony_ci self.assertEqual(fmt(3,3), '3,0') 3097db96d56Sopenharmony_ci self.assertEqual(fmt(3,4), '4') 3107db96d56Sopenharmony_ci self.assertEqual(fmt(3,5), '4,2') 3117db96d56Sopenharmony_ci self.assertEqual(fmt(3,6), '4,3') 3127db96d56Sopenharmony_ci self.assertEqual(fmt(0,0), '0,0') 3137db96d56Sopenharmony_ci 3147db96d56Sopenharmony_ci def test_range_format_context(self): 3157db96d56Sopenharmony_ci # Per the diff spec at http://www.unix.org/single_unix_specification/ 3167db96d56Sopenharmony_ci spec = '''\ 3177db96d56Sopenharmony_ci The range of lines in file1 shall be written in the following format 3187db96d56Sopenharmony_ci if the range contains two or more lines: 3197db96d56Sopenharmony_ci "*** %d,%d ****\n", <beginning line number>, <ending line number> 3207db96d56Sopenharmony_ci and the following format otherwise: 3217db96d56Sopenharmony_ci "*** %d ****\n", <ending line number> 3227db96d56Sopenharmony_ci The ending line number of an empty range shall be the number of the preceding line, 3237db96d56Sopenharmony_ci or 0 if the range is at the start of the file. 3247db96d56Sopenharmony_ci 3257db96d56Sopenharmony_ci Next, the range of lines in file2 shall be written in the following format 3267db96d56Sopenharmony_ci if the range contains two or more lines: 3277db96d56Sopenharmony_ci "--- %d,%d ----\n", <beginning line number>, <ending line number> 3287db96d56Sopenharmony_ci and the following format otherwise: 3297db96d56Sopenharmony_ci "--- %d ----\n", <ending line number> 3307db96d56Sopenharmony_ci ''' 3317db96d56Sopenharmony_ci fmt = difflib._format_range_context 3327db96d56Sopenharmony_ci self.assertEqual(fmt(3,3), '3') 3337db96d56Sopenharmony_ci self.assertEqual(fmt(3,4), '4') 3347db96d56Sopenharmony_ci self.assertEqual(fmt(3,5), '4,5') 3357db96d56Sopenharmony_ci self.assertEqual(fmt(3,6), '4,6') 3367db96d56Sopenharmony_ci self.assertEqual(fmt(0,0), '0') 3377db96d56Sopenharmony_ci 3387db96d56Sopenharmony_ci 3397db96d56Sopenharmony_ciclass TestBytes(unittest.TestCase): 3407db96d56Sopenharmony_ci # don't really care about the content of the output, just the fact 3417db96d56Sopenharmony_ci # that it's bytes and we don't crash 3427db96d56Sopenharmony_ci def check(self, diff): 3437db96d56Sopenharmony_ci diff = list(diff) # trigger exceptions first 3447db96d56Sopenharmony_ci for line in diff: 3457db96d56Sopenharmony_ci self.assertIsInstance( 3467db96d56Sopenharmony_ci line, bytes, 3477db96d56Sopenharmony_ci "all lines of diff should be bytes, but got: %r" % line) 3487db96d56Sopenharmony_ci 3497db96d56Sopenharmony_ci def test_byte_content(self): 3507db96d56Sopenharmony_ci # if we receive byte strings, we return byte strings 3517db96d56Sopenharmony_ci a = [b'hello', b'andr\xe9'] # iso-8859-1 bytes 3527db96d56Sopenharmony_ci b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes 3537db96d56Sopenharmony_ci 3547db96d56Sopenharmony_ci unified = difflib.unified_diff 3557db96d56Sopenharmony_ci context = difflib.context_diff 3567db96d56Sopenharmony_ci 3577db96d56Sopenharmony_ci check = self.check 3587db96d56Sopenharmony_ci check(difflib.diff_bytes(unified, a, a)) 3597db96d56Sopenharmony_ci check(difflib.diff_bytes(unified, a, b)) 3607db96d56Sopenharmony_ci 3617db96d56Sopenharmony_ci # now with filenames (content and filenames are all bytes!) 3627db96d56Sopenharmony_ci check(difflib.diff_bytes(unified, a, a, b'a', b'a')) 3637db96d56Sopenharmony_ci check(difflib.diff_bytes(unified, a, b, b'a', b'b')) 3647db96d56Sopenharmony_ci 3657db96d56Sopenharmony_ci # and with filenames and dates 3667db96d56Sopenharmony_ci check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013')) 3677db96d56Sopenharmony_ci check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013')) 3687db96d56Sopenharmony_ci 3697db96d56Sopenharmony_ci # same all over again, with context diff 3707db96d56Sopenharmony_ci check(difflib.diff_bytes(context, a, a)) 3717db96d56Sopenharmony_ci check(difflib.diff_bytes(context, a, b)) 3727db96d56Sopenharmony_ci check(difflib.diff_bytes(context, a, a, b'a', b'a')) 3737db96d56Sopenharmony_ci check(difflib.diff_bytes(context, a, b, b'a', b'b')) 3747db96d56Sopenharmony_ci check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013')) 3757db96d56Sopenharmony_ci check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013')) 3767db96d56Sopenharmony_ci 3777db96d56Sopenharmony_ci def test_byte_filenames(self): 3787db96d56Sopenharmony_ci # somebody renamed a file from ISO-8859-2 to UTF-8 3797db96d56Sopenharmony_ci fna = b'\xb3odz.txt' # "łodz.txt" 3807db96d56Sopenharmony_ci fnb = b'\xc5\x82odz.txt' 3817db96d56Sopenharmony_ci 3827db96d56Sopenharmony_ci # they transcoded the content at the same time 3837db96d56Sopenharmony_ci a = [b'\xa3odz is a city in Poland.'] 3847db96d56Sopenharmony_ci b = [b'\xc5\x81odz is a city in Poland.'] 3857db96d56Sopenharmony_ci 3867db96d56Sopenharmony_ci check = self.check 3877db96d56Sopenharmony_ci unified = difflib.unified_diff 3887db96d56Sopenharmony_ci context = difflib.context_diff 3897db96d56Sopenharmony_ci check(difflib.diff_bytes(unified, a, b, fna, fnb)) 3907db96d56Sopenharmony_ci check(difflib.diff_bytes(context, a, b, fna, fnb)) 3917db96d56Sopenharmony_ci 3927db96d56Sopenharmony_ci def assertDiff(expect, actual): 3937db96d56Sopenharmony_ci # do not compare expect and equal as lists, because unittest 3947db96d56Sopenharmony_ci # uses difflib to report difference between lists 3957db96d56Sopenharmony_ci actual = list(actual) 3967db96d56Sopenharmony_ci self.assertEqual(len(expect), len(actual)) 3977db96d56Sopenharmony_ci for e, a in zip(expect, actual): 3987db96d56Sopenharmony_ci self.assertEqual(e, a) 3997db96d56Sopenharmony_ci 4007db96d56Sopenharmony_ci expect = [ 4017db96d56Sopenharmony_ci b'--- \xb3odz.txt', 4027db96d56Sopenharmony_ci b'+++ \xc5\x82odz.txt', 4037db96d56Sopenharmony_ci b'@@ -1 +1 @@', 4047db96d56Sopenharmony_ci b'-\xa3odz is a city in Poland.', 4057db96d56Sopenharmony_ci b'+\xc5\x81odz is a city in Poland.', 4067db96d56Sopenharmony_ci ] 4077db96d56Sopenharmony_ci actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'') 4087db96d56Sopenharmony_ci assertDiff(expect, actual) 4097db96d56Sopenharmony_ci 4107db96d56Sopenharmony_ci # with dates (plain ASCII) 4117db96d56Sopenharmony_ci datea = b'2005-03-18' 4127db96d56Sopenharmony_ci dateb = b'2005-03-19' 4137db96d56Sopenharmony_ci check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb)) 4147db96d56Sopenharmony_ci check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb)) 4157db96d56Sopenharmony_ci 4167db96d56Sopenharmony_ci expect = [ 4177db96d56Sopenharmony_ci # note the mixed encodings here: this is deeply wrong by every 4187db96d56Sopenharmony_ci # tenet of Unicode, but it doesn't crash, it's parseable by 4197db96d56Sopenharmony_ci # patch, and it's how UNIX(tm) diff behaves 4207db96d56Sopenharmony_ci b'--- \xb3odz.txt\t2005-03-18', 4217db96d56Sopenharmony_ci b'+++ \xc5\x82odz.txt\t2005-03-19', 4227db96d56Sopenharmony_ci b'@@ -1 +1 @@', 4237db96d56Sopenharmony_ci b'-\xa3odz is a city in Poland.', 4247db96d56Sopenharmony_ci b'+\xc5\x81odz is a city in Poland.', 4257db96d56Sopenharmony_ci ] 4267db96d56Sopenharmony_ci actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb, 4277db96d56Sopenharmony_ci lineterm=b'') 4287db96d56Sopenharmony_ci assertDiff(expect, actual) 4297db96d56Sopenharmony_ci 4307db96d56Sopenharmony_ci def test_mixed_types_content(self): 4317db96d56Sopenharmony_ci # type of input content must be consistent: all str or all bytes 4327db96d56Sopenharmony_ci a = [b'hello'] 4337db96d56Sopenharmony_ci b = ['hello'] 4347db96d56Sopenharmony_ci 4357db96d56Sopenharmony_ci unified = difflib.unified_diff 4367db96d56Sopenharmony_ci context = difflib.context_diff 4377db96d56Sopenharmony_ci 4387db96d56Sopenharmony_ci expect = "lines to compare must be str, not bytes (b'hello')" 4397db96d56Sopenharmony_ci self._assert_type_error(expect, unified, a, b) 4407db96d56Sopenharmony_ci self._assert_type_error(expect, unified, b, a) 4417db96d56Sopenharmony_ci self._assert_type_error(expect, context, a, b) 4427db96d56Sopenharmony_ci self._assert_type_error(expect, context, b, a) 4437db96d56Sopenharmony_ci 4447db96d56Sopenharmony_ci expect = "all arguments must be bytes, not str ('hello')" 4457db96d56Sopenharmony_ci self._assert_type_error(expect, difflib.diff_bytes, unified, a, b) 4467db96d56Sopenharmony_ci self._assert_type_error(expect, difflib.diff_bytes, unified, b, a) 4477db96d56Sopenharmony_ci self._assert_type_error(expect, difflib.diff_bytes, context, a, b) 4487db96d56Sopenharmony_ci self._assert_type_error(expect, difflib.diff_bytes, context, b, a) 4497db96d56Sopenharmony_ci 4507db96d56Sopenharmony_ci def test_mixed_types_filenames(self): 4517db96d56Sopenharmony_ci # cannot pass filenames as bytes if content is str (this may not be 4527db96d56Sopenharmony_ci # the right behaviour, but at least the test demonstrates how 4537db96d56Sopenharmony_ci # things work) 4547db96d56Sopenharmony_ci a = ['hello\n'] 4557db96d56Sopenharmony_ci b = ['ohell\n'] 4567db96d56Sopenharmony_ci fna = b'ol\xe9.txt' # filename transcoded from ISO-8859-1 4577db96d56Sopenharmony_ci fnb = b'ol\xc3a9.txt' # to UTF-8 4587db96d56Sopenharmony_ci self._assert_type_error( 4597db96d56Sopenharmony_ci "all arguments must be str, not: b'ol\\xe9.txt'", 4607db96d56Sopenharmony_ci difflib.unified_diff, a, b, fna, fnb) 4617db96d56Sopenharmony_ci 4627db96d56Sopenharmony_ci def test_mixed_types_dates(self): 4637db96d56Sopenharmony_ci # type of dates must be consistent with type of contents 4647db96d56Sopenharmony_ci a = [b'foo\n'] 4657db96d56Sopenharmony_ci b = [b'bar\n'] 4667db96d56Sopenharmony_ci datea = '1 fév' 4677db96d56Sopenharmony_ci dateb = '3 fév' 4687db96d56Sopenharmony_ci self._assert_type_error( 4697db96d56Sopenharmony_ci "all arguments must be bytes, not str ('1 fév')", 4707db96d56Sopenharmony_ci difflib.diff_bytes, difflib.unified_diff, 4717db96d56Sopenharmony_ci a, b, b'a', b'b', datea, dateb) 4727db96d56Sopenharmony_ci 4737db96d56Sopenharmony_ci # if input is str, non-ASCII dates are fine 4747db96d56Sopenharmony_ci a = ['foo\n'] 4757db96d56Sopenharmony_ci b = ['bar\n'] 4767db96d56Sopenharmony_ci list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb)) 4777db96d56Sopenharmony_ci 4787db96d56Sopenharmony_ci def _assert_type_error(self, msg, generator, *args): 4797db96d56Sopenharmony_ci with self.assertRaises(TypeError) as ctx: 4807db96d56Sopenharmony_ci list(generator(*args)) 4817db96d56Sopenharmony_ci self.assertEqual(msg, str(ctx.exception)) 4827db96d56Sopenharmony_ci 4837db96d56Sopenharmony_ciclass TestJunkAPIs(unittest.TestCase): 4847db96d56Sopenharmony_ci def test_is_line_junk_true(self): 4857db96d56Sopenharmony_ci for line in ['#', ' ', ' #', '# ', ' # ', '']: 4867db96d56Sopenharmony_ci self.assertTrue(difflib.IS_LINE_JUNK(line), repr(line)) 4877db96d56Sopenharmony_ci 4887db96d56Sopenharmony_ci def test_is_line_junk_false(self): 4897db96d56Sopenharmony_ci for line in ['##', ' ##', '## ', 'abc ', 'abc #', 'Mr. Moose is up!']: 4907db96d56Sopenharmony_ci self.assertFalse(difflib.IS_LINE_JUNK(line), repr(line)) 4917db96d56Sopenharmony_ci 4927db96d56Sopenharmony_ci def test_is_line_junk_REDOS(self): 4937db96d56Sopenharmony_ci evil_input = ('\t' * 1000000) + '##' 4947db96d56Sopenharmony_ci self.assertFalse(difflib.IS_LINE_JUNK(evil_input)) 4957db96d56Sopenharmony_ci 4967db96d56Sopenharmony_ci def test_is_character_junk_true(self): 4977db96d56Sopenharmony_ci for char in [' ', '\t']: 4987db96d56Sopenharmony_ci self.assertTrue(difflib.IS_CHARACTER_JUNK(char), repr(char)) 4997db96d56Sopenharmony_ci 5007db96d56Sopenharmony_ci def test_is_character_junk_false(self): 5017db96d56Sopenharmony_ci for char in ['a', '#', '\n', '\f', '\r', '\v']: 5027db96d56Sopenharmony_ci self.assertFalse(difflib.IS_CHARACTER_JUNK(char), repr(char)) 5037db96d56Sopenharmony_ci 5047db96d56Sopenharmony_ciclass TestFindLongest(unittest.TestCase): 5057db96d56Sopenharmony_ci def longer_match_exists(self, a, b, n): 5067db96d56Sopenharmony_ci return any(b_part in a for b_part in 5077db96d56Sopenharmony_ci [b[i:i + n + 1] for i in range(0, len(b) - n - 1)]) 5087db96d56Sopenharmony_ci 5097db96d56Sopenharmony_ci def test_default_args(self): 5107db96d56Sopenharmony_ci a = 'foo bar' 5117db96d56Sopenharmony_ci b = 'foo baz bar' 5127db96d56Sopenharmony_ci sm = difflib.SequenceMatcher(a=a, b=b) 5137db96d56Sopenharmony_ci match = sm.find_longest_match() 5147db96d56Sopenharmony_ci self.assertEqual(match.a, 0) 5157db96d56Sopenharmony_ci self.assertEqual(match.b, 0) 5167db96d56Sopenharmony_ci self.assertEqual(match.size, 6) 5177db96d56Sopenharmony_ci self.assertEqual(a[match.a: match.a + match.size], 5187db96d56Sopenharmony_ci b[match.b: match.b + match.size]) 5197db96d56Sopenharmony_ci self.assertFalse(self.longer_match_exists(a, b, match.size)) 5207db96d56Sopenharmony_ci 5217db96d56Sopenharmony_ci match = sm.find_longest_match(alo=2, blo=4) 5227db96d56Sopenharmony_ci self.assertEqual(match.a, 3) 5237db96d56Sopenharmony_ci self.assertEqual(match.b, 7) 5247db96d56Sopenharmony_ci self.assertEqual(match.size, 4) 5257db96d56Sopenharmony_ci self.assertEqual(a[match.a: match.a + match.size], 5267db96d56Sopenharmony_ci b[match.b: match.b + match.size]) 5277db96d56Sopenharmony_ci self.assertFalse(self.longer_match_exists(a[2:], b[4:], match.size)) 5287db96d56Sopenharmony_ci 5297db96d56Sopenharmony_ci match = sm.find_longest_match(bhi=5, blo=1) 5307db96d56Sopenharmony_ci self.assertEqual(match.a, 1) 5317db96d56Sopenharmony_ci self.assertEqual(match.b, 1) 5327db96d56Sopenharmony_ci self.assertEqual(match.size, 4) 5337db96d56Sopenharmony_ci self.assertEqual(a[match.a: match.a + match.size], 5347db96d56Sopenharmony_ci b[match.b: match.b + match.size]) 5357db96d56Sopenharmony_ci self.assertFalse(self.longer_match_exists(a, b[1:5], match.size)) 5367db96d56Sopenharmony_ci 5377db96d56Sopenharmony_ci def test_longest_match_with_popular_chars(self): 5387db96d56Sopenharmony_ci a = 'dabcd' 5397db96d56Sopenharmony_ci b = 'd'*100 + 'abc' + 'd'*100 # length over 200 so popular used 5407db96d56Sopenharmony_ci sm = difflib.SequenceMatcher(a=a, b=b) 5417db96d56Sopenharmony_ci match = sm.find_longest_match(0, len(a), 0, len(b)) 5427db96d56Sopenharmony_ci self.assertEqual(match.a, 0) 5437db96d56Sopenharmony_ci self.assertEqual(match.b, 99) 5447db96d56Sopenharmony_ci self.assertEqual(match.size, 5) 5457db96d56Sopenharmony_ci self.assertEqual(a[match.a: match.a + match.size], 5467db96d56Sopenharmony_ci b[match.b: match.b + match.size]) 5477db96d56Sopenharmony_ci self.assertFalse(self.longer_match_exists(a, b, match.size)) 5487db96d56Sopenharmony_ci 5497db96d56Sopenharmony_ci 5507db96d56Sopenharmony_cidef setUpModule(): 5517db96d56Sopenharmony_ci difflib.HtmlDiff._default_prefix = 0 5527db96d56Sopenharmony_ci 5537db96d56Sopenharmony_ci 5547db96d56Sopenharmony_cidef load_tests(loader, tests, pattern): 5557db96d56Sopenharmony_ci tests.addTest(doctest.DocTestSuite(difflib)) 5567db96d56Sopenharmony_ci return tests 5577db96d56Sopenharmony_ci 5587db96d56Sopenharmony_ci 5597db96d56Sopenharmony_ciif __name__ == '__main__': 5607db96d56Sopenharmony_ci unittest.main() 561