xref: /third_party/python/Lib/test/test_difflib.py (revision 7db96d56)
17db96d56Sopenharmony_ciimport difflib
27db96d56Sopenharmony_cifrom test.support import findfile
37db96d56Sopenharmony_ciimport unittest
47db96d56Sopenharmony_ciimport doctest
57db96d56Sopenharmony_ciimport sys
67db96d56Sopenharmony_ci
77db96d56Sopenharmony_ci
87db96d56Sopenharmony_ciclass TestWithAscii(unittest.TestCase):
97db96d56Sopenharmony_ci    def test_one_insert(self):
107db96d56Sopenharmony_ci        sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100)
117db96d56Sopenharmony_ci        self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
127db96d56Sopenharmony_ci        self.assertEqual(list(sm.get_opcodes()),
137db96d56Sopenharmony_ci            [   ('insert', 0, 0, 0, 1),
147db96d56Sopenharmony_ci                ('equal', 0, 100, 1, 101)])
157db96d56Sopenharmony_ci        self.assertEqual(sm.bpopular, set())
167db96d56Sopenharmony_ci        sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50)
177db96d56Sopenharmony_ci        self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
187db96d56Sopenharmony_ci        self.assertEqual(list(sm.get_opcodes()),
197db96d56Sopenharmony_ci            [   ('equal', 0, 50, 0, 50),
207db96d56Sopenharmony_ci                ('insert', 50, 50, 50, 51),
217db96d56Sopenharmony_ci                ('equal', 50, 100, 51, 101)])
227db96d56Sopenharmony_ci        self.assertEqual(sm.bpopular, set())
237db96d56Sopenharmony_ci
247db96d56Sopenharmony_ci    def test_one_delete(self):
257db96d56Sopenharmony_ci        sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40)
267db96d56Sopenharmony_ci        self.assertAlmostEqual(sm.ratio(), 0.994, places=3)
277db96d56Sopenharmony_ci        self.assertEqual(list(sm.get_opcodes()),
287db96d56Sopenharmony_ci            [   ('equal', 0, 40, 0, 40),
297db96d56Sopenharmony_ci                ('delete', 40, 41, 40, 40),
307db96d56Sopenharmony_ci                ('equal', 41, 81, 40, 80)])
317db96d56Sopenharmony_ci
327db96d56Sopenharmony_ci    def test_bjunk(self):
337db96d56Sopenharmony_ci        sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
347db96d56Sopenharmony_ci                a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40)
357db96d56Sopenharmony_ci        self.assertEqual(sm.bjunk, set())
367db96d56Sopenharmony_ci
377db96d56Sopenharmony_ci        sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
387db96d56Sopenharmony_ci                a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
397db96d56Sopenharmony_ci        self.assertEqual(sm.bjunk, {' '})
407db96d56Sopenharmony_ci
417db96d56Sopenharmony_ci        sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'],
427db96d56Sopenharmony_ci                a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
437db96d56Sopenharmony_ci        self.assertEqual(sm.bjunk, {' ', 'b'})
447db96d56Sopenharmony_ci
457db96d56Sopenharmony_ci
467db96d56Sopenharmony_ciclass TestAutojunk(unittest.TestCase):
477db96d56Sopenharmony_ci    """Tests for the autojunk parameter added in 2.7"""
487db96d56Sopenharmony_ci    def test_one_insert_homogenous_sequence(self):
497db96d56Sopenharmony_ci        # By default autojunk=True and the heuristic kicks in for a sequence
507db96d56Sopenharmony_ci        # of length 200+
517db96d56Sopenharmony_ci        seq1 = 'b' * 200
527db96d56Sopenharmony_ci        seq2 = 'a' + 'b' * 200
537db96d56Sopenharmony_ci
547db96d56Sopenharmony_ci        sm = difflib.SequenceMatcher(None, seq1, seq2)
557db96d56Sopenharmony_ci        self.assertAlmostEqual(sm.ratio(), 0, places=3)
567db96d56Sopenharmony_ci        self.assertEqual(sm.bpopular, {'b'})
577db96d56Sopenharmony_ci
587db96d56Sopenharmony_ci        # Now turn the heuristic off
597db96d56Sopenharmony_ci        sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False)
607db96d56Sopenharmony_ci        self.assertAlmostEqual(sm.ratio(), 0.9975, places=3)
617db96d56Sopenharmony_ci        self.assertEqual(sm.bpopular, set())
627db96d56Sopenharmony_ci
637db96d56Sopenharmony_ci
647db96d56Sopenharmony_ciclass TestSFbugs(unittest.TestCase):
657db96d56Sopenharmony_ci    def test_ratio_for_null_seqn(self):
667db96d56Sopenharmony_ci        # Check clearing of SF bug 763023
677db96d56Sopenharmony_ci        s = difflib.SequenceMatcher(None, [], [])
687db96d56Sopenharmony_ci        self.assertEqual(s.ratio(), 1)
697db96d56Sopenharmony_ci        self.assertEqual(s.quick_ratio(), 1)
707db96d56Sopenharmony_ci        self.assertEqual(s.real_quick_ratio(), 1)
717db96d56Sopenharmony_ci
727db96d56Sopenharmony_ci    def test_comparing_empty_lists(self):
737db96d56Sopenharmony_ci        # Check fix for bug #979794
747db96d56Sopenharmony_ci        group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes()
757db96d56Sopenharmony_ci        self.assertRaises(StopIteration, next, group_gen)
767db96d56Sopenharmony_ci        diff_gen = difflib.unified_diff([], [])
777db96d56Sopenharmony_ci        self.assertRaises(StopIteration, next, diff_gen)
787db96d56Sopenharmony_ci
797db96d56Sopenharmony_ci    def test_matching_blocks_cache(self):
807db96d56Sopenharmony_ci        # Issue #21635
817db96d56Sopenharmony_ci        s = difflib.SequenceMatcher(None, "abxcd", "abcd")
827db96d56Sopenharmony_ci        first = s.get_matching_blocks()
837db96d56Sopenharmony_ci        second = s.get_matching_blocks()
847db96d56Sopenharmony_ci        self.assertEqual(second[0].size, 2)
857db96d56Sopenharmony_ci        self.assertEqual(second[1].size, 2)
867db96d56Sopenharmony_ci        self.assertEqual(second[2].size, 0)
877db96d56Sopenharmony_ci
887db96d56Sopenharmony_ci    def test_added_tab_hint(self):
897db96d56Sopenharmony_ci        # Check fix for bug #1488943
907db96d56Sopenharmony_ci        diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"]))
917db96d56Sopenharmony_ci        self.assertEqual("- \tI am a buggy", diff[0])
927db96d56Sopenharmony_ci        self.assertEqual("? \t          --\n", diff[1])
937db96d56Sopenharmony_ci        self.assertEqual("+ \t\tI am a bug", diff[2])
947db96d56Sopenharmony_ci        self.assertEqual("? +\n", diff[3])
957db96d56Sopenharmony_ci
967db96d56Sopenharmony_ci    def test_hint_indented_properly_with_tabs(self):
977db96d56Sopenharmony_ci        diff = list(difflib.Differ().compare(["\t \t \t^"], ["\t \t \t^\n"]))
987db96d56Sopenharmony_ci        self.assertEqual("- \t \t \t^", diff[0])
997db96d56Sopenharmony_ci        self.assertEqual("+ \t \t \t^\n", diff[1])
1007db96d56Sopenharmony_ci        self.assertEqual("? \t \t \t +\n", diff[2])
1017db96d56Sopenharmony_ci
1027db96d56Sopenharmony_ci    def test_mdiff_catch_stop_iteration(self):
1037db96d56Sopenharmony_ci        # Issue #33224
1047db96d56Sopenharmony_ci        self.assertEqual(
1057db96d56Sopenharmony_ci            list(difflib._mdiff(["2"], ["3"], 1)),
1067db96d56Sopenharmony_ci            [((1, '\x00-2\x01'), (1, '\x00+3\x01'), True)],
1077db96d56Sopenharmony_ci        )
1087db96d56Sopenharmony_ci
1097db96d56Sopenharmony_ci
1107db96d56Sopenharmony_cipatch914575_from1 = """
1117db96d56Sopenharmony_ci   1. Beautiful is beTTer than ugly.
1127db96d56Sopenharmony_ci   2. Explicit is better than implicit.
1137db96d56Sopenharmony_ci   3. Simple is better than complex.
1147db96d56Sopenharmony_ci   4. Complex is better than complicated.
1157db96d56Sopenharmony_ci"""
1167db96d56Sopenharmony_ci
1177db96d56Sopenharmony_cipatch914575_to1 = """
1187db96d56Sopenharmony_ci   1. Beautiful is better than ugly.
1197db96d56Sopenharmony_ci   3.   Simple is better than complex.
1207db96d56Sopenharmony_ci   4. Complicated is better than complex.
1217db96d56Sopenharmony_ci   5. Flat is better than nested.
1227db96d56Sopenharmony_ci"""
1237db96d56Sopenharmony_ci
1247db96d56Sopenharmony_cipatch914575_nonascii_from1 = """
1257db96d56Sopenharmony_ci   1. Beautiful is beTTer than ugly.
1267db96d56Sopenharmony_ci   2. Explicit is better than ımplıcıt.
1277db96d56Sopenharmony_ci   3. Simple is better than complex.
1287db96d56Sopenharmony_ci   4. Complex is better than complicated.
1297db96d56Sopenharmony_ci"""
1307db96d56Sopenharmony_ci
1317db96d56Sopenharmony_cipatch914575_nonascii_to1 = """
1327db96d56Sopenharmony_ci   1. Beautiful is better than ügly.
1337db96d56Sopenharmony_ci   3.   Sımple is better than complex.
1347db96d56Sopenharmony_ci   4. Complicated is better than cömplex.
1357db96d56Sopenharmony_ci   5. Flat is better than nested.
1367db96d56Sopenharmony_ci"""
1377db96d56Sopenharmony_ci
1387db96d56Sopenharmony_cipatch914575_from2 = """
1397db96d56Sopenharmony_ci\t\tLine 1: preceded by from:[tt] to:[ssss]
1407db96d56Sopenharmony_ci  \t\tLine 2: preceded by from:[sstt] to:[sssst]
1417db96d56Sopenharmony_ci  \t \tLine 3: preceded by from:[sstst] to:[ssssss]
1427db96d56Sopenharmony_ciLine 4:  \thas from:[sst] to:[sss] after :
1437db96d56Sopenharmony_ciLine 5: has from:[t] to:[ss] at end\t
1447db96d56Sopenharmony_ci"""
1457db96d56Sopenharmony_ci
1467db96d56Sopenharmony_cipatch914575_to2 = """
1477db96d56Sopenharmony_ci    Line 1: preceded by from:[tt] to:[ssss]
1487db96d56Sopenharmony_ci    \tLine 2: preceded by from:[sstt] to:[sssst]
1497db96d56Sopenharmony_ci      Line 3: preceded by from:[sstst] to:[ssssss]
1507db96d56Sopenharmony_ciLine 4:   has from:[sst] to:[sss] after :
1517db96d56Sopenharmony_ciLine 5: has from:[t] to:[ss] at end
1527db96d56Sopenharmony_ci"""
1537db96d56Sopenharmony_ci
1547db96d56Sopenharmony_cipatch914575_from3 = """line 0
1557db96d56Sopenharmony_ci1234567890123456789012345689012345
1567db96d56Sopenharmony_ciline 1
1577db96d56Sopenharmony_ciline 2
1587db96d56Sopenharmony_ciline 3
1597db96d56Sopenharmony_ciline 4   changed
1607db96d56Sopenharmony_ciline 5   changed
1617db96d56Sopenharmony_ciline 6   changed
1627db96d56Sopenharmony_ciline 7
1637db96d56Sopenharmony_ciline 8  subtracted
1647db96d56Sopenharmony_ciline 9
1657db96d56Sopenharmony_ci1234567890123456789012345689012345
1667db96d56Sopenharmony_cishort line
1677db96d56Sopenharmony_cijust fits in!!
1687db96d56Sopenharmony_cijust fits in two lines yup!!
1697db96d56Sopenharmony_cithe end"""
1707db96d56Sopenharmony_ci
1717db96d56Sopenharmony_cipatch914575_to3 = """line 0
1727db96d56Sopenharmony_ci1234567890123456789012345689012345
1737db96d56Sopenharmony_ciline 1
1747db96d56Sopenharmony_ciline 2    added
1757db96d56Sopenharmony_ciline 3
1767db96d56Sopenharmony_ciline 4   chanGEd
1777db96d56Sopenharmony_ciline 5a  chanGed
1787db96d56Sopenharmony_ciline 6a  changEd
1797db96d56Sopenharmony_ciline 7
1807db96d56Sopenharmony_ciline 8
1817db96d56Sopenharmony_ciline 9
1827db96d56Sopenharmony_ci1234567890
1837db96d56Sopenharmony_cianother long line that needs to be wrapped
1847db96d56Sopenharmony_cijust fitS in!!
1857db96d56Sopenharmony_cijust fits in two lineS yup!!
1867db96d56Sopenharmony_cithe end"""
1877db96d56Sopenharmony_ci
1887db96d56Sopenharmony_ciclass TestSFpatches(unittest.TestCase):
1897db96d56Sopenharmony_ci
1907db96d56Sopenharmony_ci    def test_html_diff(self):
1917db96d56Sopenharmony_ci        # Check SF patch 914575 for generating HTML differences
1927db96d56Sopenharmony_ci        f1a = ((patch914575_from1 + '123\n'*10)*3)
1937db96d56Sopenharmony_ci        t1a = (patch914575_to1 + '123\n'*10)*3
1947db96d56Sopenharmony_ci        f1b = '456\n'*10 + f1a
1957db96d56Sopenharmony_ci        t1b = '456\n'*10 + t1a
1967db96d56Sopenharmony_ci        f1a = f1a.splitlines()
1977db96d56Sopenharmony_ci        t1a = t1a.splitlines()
1987db96d56Sopenharmony_ci        f1b = f1b.splitlines()
1997db96d56Sopenharmony_ci        t1b = t1b.splitlines()
2007db96d56Sopenharmony_ci        f2 = patch914575_from2.splitlines()
2017db96d56Sopenharmony_ci        t2 = patch914575_to2.splitlines()
2027db96d56Sopenharmony_ci        f3 = patch914575_from3
2037db96d56Sopenharmony_ci        t3 = patch914575_to3
2047db96d56Sopenharmony_ci        i = difflib.HtmlDiff()
2057db96d56Sopenharmony_ci        j = difflib.HtmlDiff(tabsize=2)
2067db96d56Sopenharmony_ci        k = difflib.HtmlDiff(wrapcolumn=14)
2077db96d56Sopenharmony_ci
2087db96d56Sopenharmony_ci        full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5)
2097db96d56Sopenharmony_ci        tables = '\n'.join(
2107db96d56Sopenharmony_ci            [
2117db96d56Sopenharmony_ci             '<h2>Context (first diff within numlines=5(default))</h2>',
2127db96d56Sopenharmony_ci             i.make_table(f1a,t1a,'from','to',context=True),
2137db96d56Sopenharmony_ci             '<h2>Context (first diff after numlines=5(default))</h2>',
2147db96d56Sopenharmony_ci             i.make_table(f1b,t1b,'from','to',context=True),
2157db96d56Sopenharmony_ci             '<h2>Context (numlines=6)</h2>',
2167db96d56Sopenharmony_ci             i.make_table(f1a,t1a,'from','to',context=True,numlines=6),
2177db96d56Sopenharmony_ci             '<h2>Context (numlines=0)</h2>',
2187db96d56Sopenharmony_ci             i.make_table(f1a,t1a,'from','to',context=True,numlines=0),
2197db96d56Sopenharmony_ci             '<h2>Same Context</h2>',
2207db96d56Sopenharmony_ci             i.make_table(f1a,f1a,'from','to',context=True),
2217db96d56Sopenharmony_ci             '<h2>Same Full</h2>',
2227db96d56Sopenharmony_ci             i.make_table(f1a,f1a,'from','to',context=False),
2237db96d56Sopenharmony_ci             '<h2>Empty Context</h2>',
2247db96d56Sopenharmony_ci             i.make_table([],[],'from','to',context=True),
2257db96d56Sopenharmony_ci             '<h2>Empty Full</h2>',
2267db96d56Sopenharmony_ci             i.make_table([],[],'from','to',context=False),
2277db96d56Sopenharmony_ci             '<h2>tabsize=2</h2>',
2287db96d56Sopenharmony_ci             j.make_table(f2,t2),
2297db96d56Sopenharmony_ci             '<h2>tabsize=default</h2>',
2307db96d56Sopenharmony_ci             i.make_table(f2,t2),
2317db96d56Sopenharmony_ci             '<h2>Context (wrapcolumn=14,numlines=0)</h2>',
2327db96d56Sopenharmony_ci             k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0),
2337db96d56Sopenharmony_ci             '<h2>wrapcolumn=14,splitlines()</h2>',
2347db96d56Sopenharmony_ci             k.make_table(f3.splitlines(),t3.splitlines()),
2357db96d56Sopenharmony_ci             '<h2>wrapcolumn=14,splitlines(True)</h2>',
2367db96d56Sopenharmony_ci             k.make_table(f3.splitlines(True),t3.splitlines(True)),
2377db96d56Sopenharmony_ci             ])
2387db96d56Sopenharmony_ci        actual = full.replace('</body>','\n%s\n</body>' % tables)
2397db96d56Sopenharmony_ci
2407db96d56Sopenharmony_ci        # temporarily uncomment next two lines to baseline this test
2417db96d56Sopenharmony_ci        #with open('test_difflib_expect.html','w') as fp:
2427db96d56Sopenharmony_ci        #    fp.write(actual)
2437db96d56Sopenharmony_ci
2447db96d56Sopenharmony_ci        with open(findfile('test_difflib_expect.html'), encoding="utf-8") as fp:
2457db96d56Sopenharmony_ci            self.assertEqual(actual, fp.read())
2467db96d56Sopenharmony_ci
2477db96d56Sopenharmony_ci    def test_recursion_limit(self):
2487db96d56Sopenharmony_ci        # Check if the problem described in patch #1413711 exists.
2497db96d56Sopenharmony_ci        limit = sys.getrecursionlimit()
2507db96d56Sopenharmony_ci        old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)]
2517db96d56Sopenharmony_ci        new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
2527db96d56Sopenharmony_ci        difflib.SequenceMatcher(None, old, new).get_opcodes()
2537db96d56Sopenharmony_ci
2547db96d56Sopenharmony_ci    def test_make_file_default_charset(self):
2557db96d56Sopenharmony_ci        html_diff = difflib.HtmlDiff()
2567db96d56Sopenharmony_ci        output = html_diff.make_file(patch914575_from1.splitlines(),
2577db96d56Sopenharmony_ci                                     patch914575_to1.splitlines())
2587db96d56Sopenharmony_ci        self.assertIn('content="text/html; charset=utf-8"', output)
2597db96d56Sopenharmony_ci
2607db96d56Sopenharmony_ci    def test_make_file_iso88591_charset(self):
2617db96d56Sopenharmony_ci        html_diff = difflib.HtmlDiff()
2627db96d56Sopenharmony_ci        output = html_diff.make_file(patch914575_from1.splitlines(),
2637db96d56Sopenharmony_ci                                     patch914575_to1.splitlines(),
2647db96d56Sopenharmony_ci                                     charset='iso-8859-1')
2657db96d56Sopenharmony_ci        self.assertIn('content="text/html; charset=iso-8859-1"', output)
2667db96d56Sopenharmony_ci
2677db96d56Sopenharmony_ci    def test_make_file_usascii_charset_with_nonascii_input(self):
2687db96d56Sopenharmony_ci        html_diff = difflib.HtmlDiff()
2697db96d56Sopenharmony_ci        output = html_diff.make_file(patch914575_nonascii_from1.splitlines(),
2707db96d56Sopenharmony_ci                                     patch914575_nonascii_to1.splitlines(),
2717db96d56Sopenharmony_ci                                     charset='us-ascii')
2727db96d56Sopenharmony_ci        self.assertIn('content="text/html; charset=us-ascii"', output)
2737db96d56Sopenharmony_ci        self.assertIn('&#305;mpl&#305;c&#305;t', output)
2747db96d56Sopenharmony_ci
2757db96d56Sopenharmony_ci
2767db96d56Sopenharmony_ciclass TestOutputFormat(unittest.TestCase):
2777db96d56Sopenharmony_ci    def test_tab_delimiter(self):
2787db96d56Sopenharmony_ci        args = ['one', 'two', 'Original', 'Current',
2797db96d56Sopenharmony_ci            '2005-01-26 23:30:50', '2010-04-02 10:20:52']
2807db96d56Sopenharmony_ci        ud = difflib.unified_diff(*args, lineterm='')
2817db96d56Sopenharmony_ci        self.assertEqual(list(ud)[0:2], [
2827db96d56Sopenharmony_ci                           "--- Original\t2005-01-26 23:30:50",
2837db96d56Sopenharmony_ci                           "+++ Current\t2010-04-02 10:20:52"])
2847db96d56Sopenharmony_ci        cd = difflib.context_diff(*args, lineterm='')
2857db96d56Sopenharmony_ci        self.assertEqual(list(cd)[0:2], [
2867db96d56Sopenharmony_ci                           "*** Original\t2005-01-26 23:30:50",
2877db96d56Sopenharmony_ci                           "--- Current\t2010-04-02 10:20:52"])
2887db96d56Sopenharmony_ci
2897db96d56Sopenharmony_ci    def test_no_trailing_tab_on_empty_filedate(self):
2907db96d56Sopenharmony_ci        args = ['one', 'two', 'Original', 'Current']
2917db96d56Sopenharmony_ci        ud = difflib.unified_diff(*args, lineterm='')
2927db96d56Sopenharmony_ci        self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"])
2937db96d56Sopenharmony_ci
2947db96d56Sopenharmony_ci        cd = difflib.context_diff(*args, lineterm='')
2957db96d56Sopenharmony_ci        self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"])
2967db96d56Sopenharmony_ci
2977db96d56Sopenharmony_ci    def test_range_format_unified(self):
2987db96d56Sopenharmony_ci        # Per the diff spec at http://www.unix.org/single_unix_specification/
2997db96d56Sopenharmony_ci        spec = '''\
3007db96d56Sopenharmony_ci           Each <range> field shall be of the form:
3017db96d56Sopenharmony_ci             %1d", <beginning line number>  if the range contains exactly one line,
3027db96d56Sopenharmony_ci           and:
3037db96d56Sopenharmony_ci            "%1d,%1d", <beginning line number>, <number of lines> otherwise.
3047db96d56Sopenharmony_ci           If a range is empty, its beginning line number shall be the number of
3057db96d56Sopenharmony_ci           the line just before the range, or 0 if the empty range starts the file.
3067db96d56Sopenharmony_ci        '''
3077db96d56Sopenharmony_ci        fmt = difflib._format_range_unified
3087db96d56Sopenharmony_ci        self.assertEqual(fmt(3,3), '3,0')
3097db96d56Sopenharmony_ci        self.assertEqual(fmt(3,4), '4')
3107db96d56Sopenharmony_ci        self.assertEqual(fmt(3,5), '4,2')
3117db96d56Sopenharmony_ci        self.assertEqual(fmt(3,6), '4,3')
3127db96d56Sopenharmony_ci        self.assertEqual(fmt(0,0), '0,0')
3137db96d56Sopenharmony_ci
3147db96d56Sopenharmony_ci    def test_range_format_context(self):
3157db96d56Sopenharmony_ci        # Per the diff spec at http://www.unix.org/single_unix_specification/
3167db96d56Sopenharmony_ci        spec = '''\
3177db96d56Sopenharmony_ci           The range of lines in file1 shall be written in the following format
3187db96d56Sopenharmony_ci           if the range contains two or more lines:
3197db96d56Sopenharmony_ci               "*** %d,%d ****\n", <beginning line number>, <ending line number>
3207db96d56Sopenharmony_ci           and the following format otherwise:
3217db96d56Sopenharmony_ci               "*** %d ****\n", <ending line number>
3227db96d56Sopenharmony_ci           The ending line number of an empty range shall be the number of the preceding line,
3237db96d56Sopenharmony_ci           or 0 if the range is at the start of the file.
3247db96d56Sopenharmony_ci
3257db96d56Sopenharmony_ci           Next, the range of lines in file2 shall be written in the following format
3267db96d56Sopenharmony_ci           if the range contains two or more lines:
3277db96d56Sopenharmony_ci               "--- %d,%d ----\n", <beginning line number>, <ending line number>
3287db96d56Sopenharmony_ci           and the following format otherwise:
3297db96d56Sopenharmony_ci               "--- %d ----\n", <ending line number>
3307db96d56Sopenharmony_ci        '''
3317db96d56Sopenharmony_ci        fmt = difflib._format_range_context
3327db96d56Sopenharmony_ci        self.assertEqual(fmt(3,3), '3')
3337db96d56Sopenharmony_ci        self.assertEqual(fmt(3,4), '4')
3347db96d56Sopenharmony_ci        self.assertEqual(fmt(3,5), '4,5')
3357db96d56Sopenharmony_ci        self.assertEqual(fmt(3,6), '4,6')
3367db96d56Sopenharmony_ci        self.assertEqual(fmt(0,0), '0')
3377db96d56Sopenharmony_ci
3387db96d56Sopenharmony_ci
3397db96d56Sopenharmony_ciclass TestBytes(unittest.TestCase):
3407db96d56Sopenharmony_ci    # don't really care about the content of the output, just the fact
3417db96d56Sopenharmony_ci    # that it's bytes and we don't crash
3427db96d56Sopenharmony_ci    def check(self, diff):
3437db96d56Sopenharmony_ci        diff = list(diff)   # trigger exceptions first
3447db96d56Sopenharmony_ci        for line in diff:
3457db96d56Sopenharmony_ci            self.assertIsInstance(
3467db96d56Sopenharmony_ci                line, bytes,
3477db96d56Sopenharmony_ci                "all lines of diff should be bytes, but got: %r" % line)
3487db96d56Sopenharmony_ci
3497db96d56Sopenharmony_ci    def test_byte_content(self):
3507db96d56Sopenharmony_ci        # if we receive byte strings, we return byte strings
3517db96d56Sopenharmony_ci        a = [b'hello', b'andr\xe9']     # iso-8859-1 bytes
3527db96d56Sopenharmony_ci        b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes
3537db96d56Sopenharmony_ci
3547db96d56Sopenharmony_ci        unified = difflib.unified_diff
3557db96d56Sopenharmony_ci        context = difflib.context_diff
3567db96d56Sopenharmony_ci
3577db96d56Sopenharmony_ci        check = self.check
3587db96d56Sopenharmony_ci        check(difflib.diff_bytes(unified, a, a))
3597db96d56Sopenharmony_ci        check(difflib.diff_bytes(unified, a, b))
3607db96d56Sopenharmony_ci
3617db96d56Sopenharmony_ci        # now with filenames (content and filenames are all bytes!)
3627db96d56Sopenharmony_ci        check(difflib.diff_bytes(unified, a, a, b'a', b'a'))
3637db96d56Sopenharmony_ci        check(difflib.diff_bytes(unified, a, b, b'a', b'b'))
3647db96d56Sopenharmony_ci
3657db96d56Sopenharmony_ci        # and with filenames and dates
3667db96d56Sopenharmony_ci        check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013'))
3677db96d56Sopenharmony_ci        check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013'))
3687db96d56Sopenharmony_ci
3697db96d56Sopenharmony_ci        # same all over again, with context diff
3707db96d56Sopenharmony_ci        check(difflib.diff_bytes(context, a, a))
3717db96d56Sopenharmony_ci        check(difflib.diff_bytes(context, a, b))
3727db96d56Sopenharmony_ci        check(difflib.diff_bytes(context, a, a, b'a', b'a'))
3737db96d56Sopenharmony_ci        check(difflib.diff_bytes(context, a, b, b'a', b'b'))
3747db96d56Sopenharmony_ci        check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013'))
3757db96d56Sopenharmony_ci        check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013'))
3767db96d56Sopenharmony_ci
3777db96d56Sopenharmony_ci    def test_byte_filenames(self):
3787db96d56Sopenharmony_ci        # somebody renamed a file from ISO-8859-2 to UTF-8
3797db96d56Sopenharmony_ci        fna = b'\xb3odz.txt'    # "łodz.txt"
3807db96d56Sopenharmony_ci        fnb = b'\xc5\x82odz.txt'
3817db96d56Sopenharmony_ci
3827db96d56Sopenharmony_ci        # they transcoded the content at the same time
3837db96d56Sopenharmony_ci        a = [b'\xa3odz is a city in Poland.']
3847db96d56Sopenharmony_ci        b = [b'\xc5\x81odz is a city in Poland.']
3857db96d56Sopenharmony_ci
3867db96d56Sopenharmony_ci        check = self.check
3877db96d56Sopenharmony_ci        unified = difflib.unified_diff
3887db96d56Sopenharmony_ci        context = difflib.context_diff
3897db96d56Sopenharmony_ci        check(difflib.diff_bytes(unified, a, b, fna, fnb))
3907db96d56Sopenharmony_ci        check(difflib.diff_bytes(context, a, b, fna, fnb))
3917db96d56Sopenharmony_ci
3927db96d56Sopenharmony_ci        def assertDiff(expect, actual):
3937db96d56Sopenharmony_ci            # do not compare expect and equal as lists, because unittest
3947db96d56Sopenharmony_ci            # uses difflib to report difference between lists
3957db96d56Sopenharmony_ci            actual = list(actual)
3967db96d56Sopenharmony_ci            self.assertEqual(len(expect), len(actual))
3977db96d56Sopenharmony_ci            for e, a in zip(expect, actual):
3987db96d56Sopenharmony_ci                self.assertEqual(e, a)
3997db96d56Sopenharmony_ci
4007db96d56Sopenharmony_ci        expect = [
4017db96d56Sopenharmony_ci            b'--- \xb3odz.txt',
4027db96d56Sopenharmony_ci            b'+++ \xc5\x82odz.txt',
4037db96d56Sopenharmony_ci            b'@@ -1 +1 @@',
4047db96d56Sopenharmony_ci            b'-\xa3odz is a city in Poland.',
4057db96d56Sopenharmony_ci            b'+\xc5\x81odz is a city in Poland.',
4067db96d56Sopenharmony_ci        ]
4077db96d56Sopenharmony_ci        actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'')
4087db96d56Sopenharmony_ci        assertDiff(expect, actual)
4097db96d56Sopenharmony_ci
4107db96d56Sopenharmony_ci        # with dates (plain ASCII)
4117db96d56Sopenharmony_ci        datea = b'2005-03-18'
4127db96d56Sopenharmony_ci        dateb = b'2005-03-19'
4137db96d56Sopenharmony_ci        check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb))
4147db96d56Sopenharmony_ci        check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb))
4157db96d56Sopenharmony_ci
4167db96d56Sopenharmony_ci        expect = [
4177db96d56Sopenharmony_ci            # note the mixed encodings here: this is deeply wrong by every
4187db96d56Sopenharmony_ci            # tenet of Unicode, but it doesn't crash, it's parseable by
4197db96d56Sopenharmony_ci            # patch, and it's how UNIX(tm) diff behaves
4207db96d56Sopenharmony_ci            b'--- \xb3odz.txt\t2005-03-18',
4217db96d56Sopenharmony_ci            b'+++ \xc5\x82odz.txt\t2005-03-19',
4227db96d56Sopenharmony_ci            b'@@ -1 +1 @@',
4237db96d56Sopenharmony_ci            b'-\xa3odz is a city in Poland.',
4247db96d56Sopenharmony_ci            b'+\xc5\x81odz is a city in Poland.',
4257db96d56Sopenharmony_ci        ]
4267db96d56Sopenharmony_ci        actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb,
4277db96d56Sopenharmony_ci                                    lineterm=b'')
4287db96d56Sopenharmony_ci        assertDiff(expect, actual)
4297db96d56Sopenharmony_ci
4307db96d56Sopenharmony_ci    def test_mixed_types_content(self):
4317db96d56Sopenharmony_ci        # type of input content must be consistent: all str or all bytes
4327db96d56Sopenharmony_ci        a = [b'hello']
4337db96d56Sopenharmony_ci        b = ['hello']
4347db96d56Sopenharmony_ci
4357db96d56Sopenharmony_ci        unified = difflib.unified_diff
4367db96d56Sopenharmony_ci        context = difflib.context_diff
4377db96d56Sopenharmony_ci
4387db96d56Sopenharmony_ci        expect = "lines to compare must be str, not bytes (b'hello')"
4397db96d56Sopenharmony_ci        self._assert_type_error(expect, unified, a, b)
4407db96d56Sopenharmony_ci        self._assert_type_error(expect, unified, b, a)
4417db96d56Sopenharmony_ci        self._assert_type_error(expect, context, a, b)
4427db96d56Sopenharmony_ci        self._assert_type_error(expect, context, b, a)
4437db96d56Sopenharmony_ci
4447db96d56Sopenharmony_ci        expect = "all arguments must be bytes, not str ('hello')"
4457db96d56Sopenharmony_ci        self._assert_type_error(expect, difflib.diff_bytes, unified, a, b)
4467db96d56Sopenharmony_ci        self._assert_type_error(expect, difflib.diff_bytes, unified, b, a)
4477db96d56Sopenharmony_ci        self._assert_type_error(expect, difflib.diff_bytes, context, a, b)
4487db96d56Sopenharmony_ci        self._assert_type_error(expect, difflib.diff_bytes, context, b, a)
4497db96d56Sopenharmony_ci
4507db96d56Sopenharmony_ci    def test_mixed_types_filenames(self):
4517db96d56Sopenharmony_ci        # cannot pass filenames as bytes if content is str (this may not be
4527db96d56Sopenharmony_ci        # the right behaviour, but at least the test demonstrates how
4537db96d56Sopenharmony_ci        # things work)
4547db96d56Sopenharmony_ci        a = ['hello\n']
4557db96d56Sopenharmony_ci        b = ['ohell\n']
4567db96d56Sopenharmony_ci        fna = b'ol\xe9.txt'     # filename transcoded from ISO-8859-1
4577db96d56Sopenharmony_ci        fnb = b'ol\xc3a9.txt'   # to UTF-8
4587db96d56Sopenharmony_ci        self._assert_type_error(
4597db96d56Sopenharmony_ci            "all arguments must be str, not: b'ol\\xe9.txt'",
4607db96d56Sopenharmony_ci            difflib.unified_diff, a, b, fna, fnb)
4617db96d56Sopenharmony_ci
4627db96d56Sopenharmony_ci    def test_mixed_types_dates(self):
4637db96d56Sopenharmony_ci        # type of dates must be consistent with type of contents
4647db96d56Sopenharmony_ci        a = [b'foo\n']
4657db96d56Sopenharmony_ci        b = [b'bar\n']
4667db96d56Sopenharmony_ci        datea = '1 fév'
4677db96d56Sopenharmony_ci        dateb = '3 fév'
4687db96d56Sopenharmony_ci        self._assert_type_error(
4697db96d56Sopenharmony_ci            "all arguments must be bytes, not str ('1 fév')",
4707db96d56Sopenharmony_ci            difflib.diff_bytes, difflib.unified_diff,
4717db96d56Sopenharmony_ci            a, b, b'a', b'b', datea, dateb)
4727db96d56Sopenharmony_ci
4737db96d56Sopenharmony_ci        # if input is str, non-ASCII dates are fine
4747db96d56Sopenharmony_ci        a = ['foo\n']
4757db96d56Sopenharmony_ci        b = ['bar\n']
4767db96d56Sopenharmony_ci        list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb))
4777db96d56Sopenharmony_ci
4787db96d56Sopenharmony_ci    def _assert_type_error(self, msg, generator, *args):
4797db96d56Sopenharmony_ci        with self.assertRaises(TypeError) as ctx:
4807db96d56Sopenharmony_ci            list(generator(*args))
4817db96d56Sopenharmony_ci        self.assertEqual(msg, str(ctx.exception))
4827db96d56Sopenharmony_ci
4837db96d56Sopenharmony_ciclass TestJunkAPIs(unittest.TestCase):
4847db96d56Sopenharmony_ci    def test_is_line_junk_true(self):
4857db96d56Sopenharmony_ci        for line in ['#', '  ', ' #', '# ', ' # ', '']:
4867db96d56Sopenharmony_ci            self.assertTrue(difflib.IS_LINE_JUNK(line), repr(line))
4877db96d56Sopenharmony_ci
4887db96d56Sopenharmony_ci    def test_is_line_junk_false(self):
4897db96d56Sopenharmony_ci        for line in ['##', ' ##', '## ', 'abc ', 'abc #', 'Mr. Moose is up!']:
4907db96d56Sopenharmony_ci            self.assertFalse(difflib.IS_LINE_JUNK(line), repr(line))
4917db96d56Sopenharmony_ci
4927db96d56Sopenharmony_ci    def test_is_line_junk_REDOS(self):
4937db96d56Sopenharmony_ci        evil_input = ('\t' * 1000000) + '##'
4947db96d56Sopenharmony_ci        self.assertFalse(difflib.IS_LINE_JUNK(evil_input))
4957db96d56Sopenharmony_ci
4967db96d56Sopenharmony_ci    def test_is_character_junk_true(self):
4977db96d56Sopenharmony_ci        for char in [' ', '\t']:
4987db96d56Sopenharmony_ci            self.assertTrue(difflib.IS_CHARACTER_JUNK(char), repr(char))
4997db96d56Sopenharmony_ci
5007db96d56Sopenharmony_ci    def test_is_character_junk_false(self):
5017db96d56Sopenharmony_ci        for char in ['a', '#', '\n', '\f', '\r', '\v']:
5027db96d56Sopenharmony_ci            self.assertFalse(difflib.IS_CHARACTER_JUNK(char), repr(char))
5037db96d56Sopenharmony_ci
5047db96d56Sopenharmony_ciclass TestFindLongest(unittest.TestCase):
5057db96d56Sopenharmony_ci    def longer_match_exists(self, a, b, n):
5067db96d56Sopenharmony_ci        return any(b_part in a for b_part in
5077db96d56Sopenharmony_ci                   [b[i:i + n + 1] for i in range(0, len(b) - n - 1)])
5087db96d56Sopenharmony_ci
5097db96d56Sopenharmony_ci    def test_default_args(self):
5107db96d56Sopenharmony_ci        a = 'foo bar'
5117db96d56Sopenharmony_ci        b = 'foo baz bar'
5127db96d56Sopenharmony_ci        sm = difflib.SequenceMatcher(a=a, b=b)
5137db96d56Sopenharmony_ci        match = sm.find_longest_match()
5147db96d56Sopenharmony_ci        self.assertEqual(match.a, 0)
5157db96d56Sopenharmony_ci        self.assertEqual(match.b, 0)
5167db96d56Sopenharmony_ci        self.assertEqual(match.size, 6)
5177db96d56Sopenharmony_ci        self.assertEqual(a[match.a: match.a + match.size],
5187db96d56Sopenharmony_ci                         b[match.b: match.b + match.size])
5197db96d56Sopenharmony_ci        self.assertFalse(self.longer_match_exists(a, b, match.size))
5207db96d56Sopenharmony_ci
5217db96d56Sopenharmony_ci        match = sm.find_longest_match(alo=2, blo=4)
5227db96d56Sopenharmony_ci        self.assertEqual(match.a, 3)
5237db96d56Sopenharmony_ci        self.assertEqual(match.b, 7)
5247db96d56Sopenharmony_ci        self.assertEqual(match.size, 4)
5257db96d56Sopenharmony_ci        self.assertEqual(a[match.a: match.a + match.size],
5267db96d56Sopenharmony_ci                         b[match.b: match.b + match.size])
5277db96d56Sopenharmony_ci        self.assertFalse(self.longer_match_exists(a[2:], b[4:], match.size))
5287db96d56Sopenharmony_ci
5297db96d56Sopenharmony_ci        match = sm.find_longest_match(bhi=5, blo=1)
5307db96d56Sopenharmony_ci        self.assertEqual(match.a, 1)
5317db96d56Sopenharmony_ci        self.assertEqual(match.b, 1)
5327db96d56Sopenharmony_ci        self.assertEqual(match.size, 4)
5337db96d56Sopenharmony_ci        self.assertEqual(a[match.a: match.a + match.size],
5347db96d56Sopenharmony_ci                         b[match.b: match.b + match.size])
5357db96d56Sopenharmony_ci        self.assertFalse(self.longer_match_exists(a, b[1:5], match.size))
5367db96d56Sopenharmony_ci
5377db96d56Sopenharmony_ci    def test_longest_match_with_popular_chars(self):
5387db96d56Sopenharmony_ci        a = 'dabcd'
5397db96d56Sopenharmony_ci        b = 'd'*100 + 'abc' + 'd'*100  # length over 200 so popular used
5407db96d56Sopenharmony_ci        sm = difflib.SequenceMatcher(a=a, b=b)
5417db96d56Sopenharmony_ci        match = sm.find_longest_match(0, len(a), 0, len(b))
5427db96d56Sopenharmony_ci        self.assertEqual(match.a, 0)
5437db96d56Sopenharmony_ci        self.assertEqual(match.b, 99)
5447db96d56Sopenharmony_ci        self.assertEqual(match.size, 5)
5457db96d56Sopenharmony_ci        self.assertEqual(a[match.a: match.a + match.size],
5467db96d56Sopenharmony_ci                         b[match.b: match.b + match.size])
5477db96d56Sopenharmony_ci        self.assertFalse(self.longer_match_exists(a, b, match.size))
5487db96d56Sopenharmony_ci
5497db96d56Sopenharmony_ci
5507db96d56Sopenharmony_cidef setUpModule():
5517db96d56Sopenharmony_ci    difflib.HtmlDiff._default_prefix = 0
5527db96d56Sopenharmony_ci
5537db96d56Sopenharmony_ci
5547db96d56Sopenharmony_cidef load_tests(loader, tests, pattern):
5557db96d56Sopenharmony_ci    tests.addTest(doctest.DocTestSuite(difflib))
5567db96d56Sopenharmony_ci    return tests
5577db96d56Sopenharmony_ci
5587db96d56Sopenharmony_ci
5597db96d56Sopenharmony_ciif __name__ == '__main__':
5607db96d56Sopenharmony_ci    unittest.main()
561