17db96d56Sopenharmony_ci"""
27db96d56Sopenharmony_ciModule difflib -- helpers for computing deltas between objects.
37db96d56Sopenharmony_ci
47db96d56Sopenharmony_ciFunction get_close_matches(word, possibilities, n=3, cutoff=0.6):
57db96d56Sopenharmony_ci    Use SequenceMatcher to return list of the best "good enough" matches.
67db96d56Sopenharmony_ci
77db96d56Sopenharmony_ciFunction context_diff(a, b):
87db96d56Sopenharmony_ci    For two lists of strings, return a delta in context diff format.
97db96d56Sopenharmony_ci
107db96d56Sopenharmony_ciFunction ndiff(a, b):
117db96d56Sopenharmony_ci    Return a delta: the difference between `a` and `b` (lists of strings).
127db96d56Sopenharmony_ci
137db96d56Sopenharmony_ciFunction restore(delta, which):
147db96d56Sopenharmony_ci    Return one of the two sequences that generated an ndiff delta.
157db96d56Sopenharmony_ci
167db96d56Sopenharmony_ciFunction unified_diff(a, b):
177db96d56Sopenharmony_ci    For two lists of strings, return a delta in unified diff format.
187db96d56Sopenharmony_ci
197db96d56Sopenharmony_ciClass SequenceMatcher:
207db96d56Sopenharmony_ci    A flexible class for comparing pairs of sequences of any type.
217db96d56Sopenharmony_ci
227db96d56Sopenharmony_ciClass Differ:
237db96d56Sopenharmony_ci    For producing human-readable deltas from sequences of lines of text.
247db96d56Sopenharmony_ci
257db96d56Sopenharmony_ciClass HtmlDiff:
267db96d56Sopenharmony_ci    For producing HTML side by side comparison with change highlights.
277db96d56Sopenharmony_ci"""
287db96d56Sopenharmony_ci
297db96d56Sopenharmony_ci__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
307db96d56Sopenharmony_ci           'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff',
317db96d56Sopenharmony_ci           'unified_diff', 'diff_bytes', 'HtmlDiff', 'Match']
327db96d56Sopenharmony_ci
337db96d56Sopenharmony_cifrom heapq import nlargest as _nlargest
347db96d56Sopenharmony_cifrom collections import namedtuple as _namedtuple
357db96d56Sopenharmony_cifrom types import GenericAlias
367db96d56Sopenharmony_ci
377db96d56Sopenharmony_ciMatch = _namedtuple('Match', 'a b size')
387db96d56Sopenharmony_ci
397db96d56Sopenharmony_cidef _calculate_ratio(matches, length):
407db96d56Sopenharmony_ci    if length:
417db96d56Sopenharmony_ci        return 2.0 * matches / length
427db96d56Sopenharmony_ci    return 1.0
437db96d56Sopenharmony_ci
447db96d56Sopenharmony_ciclass SequenceMatcher:
457db96d56Sopenharmony_ci
467db96d56Sopenharmony_ci    """
477db96d56Sopenharmony_ci    SequenceMatcher is a flexible class for comparing pairs of sequences of
487db96d56Sopenharmony_ci    any type, so long as the sequence elements are hashable.  The basic
497db96d56Sopenharmony_ci    algorithm predates, and is a little fancier than, an algorithm
507db96d56Sopenharmony_ci    published in the late 1980's by Ratcliff and Obershelp under the
517db96d56Sopenharmony_ci    hyperbolic name "gestalt pattern matching".  The basic idea is to find
527db96d56Sopenharmony_ci    the longest contiguous matching subsequence that contains no "junk"
537db96d56Sopenharmony_ci    elements (R-O doesn't address junk).  The same idea is then applied
547db96d56Sopenharmony_ci    recursively to the pieces of the sequences to the left and to the right
557db96d56Sopenharmony_ci    of the matching subsequence.  This does not yield minimal edit
567db96d56Sopenharmony_ci    sequences, but does tend to yield matches that "look right" to people.
577db96d56Sopenharmony_ci
587db96d56Sopenharmony_ci    SequenceMatcher tries to compute a "human-friendly diff" between two
597db96d56Sopenharmony_ci    sequences.  Unlike e.g. UNIX(tm) diff, the fundamental notion is the
607db96d56Sopenharmony_ci    longest *contiguous* & junk-free matching subsequence.  That's what
617db96d56Sopenharmony_ci    catches peoples' eyes.  The Windows(tm) windiff has another interesting
627db96d56Sopenharmony_ci    notion, pairing up elements that appear uniquely in each sequence.
637db96d56Sopenharmony_ci    That, and the method here, appear to yield more intuitive difference
647db96d56Sopenharmony_ci    reports than does diff.  This method appears to be the least vulnerable
657db96d56Sopenharmony_ci    to syncing up on blocks of "junk lines", though (like blank lines in
667db96d56Sopenharmony_ci    ordinary text files, or maybe "<P>" lines in HTML files).  That may be
677db96d56Sopenharmony_ci    because this is the only method of the 3 that has a *concept* of
687db96d56Sopenharmony_ci    "junk" <wink>.
697db96d56Sopenharmony_ci
707db96d56Sopenharmony_ci    Example, comparing two strings, and considering blanks to be "junk":
717db96d56Sopenharmony_ci
727db96d56Sopenharmony_ci    >>> s = SequenceMatcher(lambda x: x == " ",
737db96d56Sopenharmony_ci    ...                     "private Thread currentThread;",
747db96d56Sopenharmony_ci    ...                     "private volatile Thread currentThread;")
757db96d56Sopenharmony_ci    >>>
767db96d56Sopenharmony_ci
777db96d56Sopenharmony_ci    .ratio() returns a float in [0, 1], measuring the "similarity" of the
787db96d56Sopenharmony_ci    sequences.  As a rule of thumb, a .ratio() value over 0.6 means the
797db96d56Sopenharmony_ci    sequences are close matches:
807db96d56Sopenharmony_ci
817db96d56Sopenharmony_ci    >>> print(round(s.ratio(), 3))
827db96d56Sopenharmony_ci    0.866
837db96d56Sopenharmony_ci    >>>
847db96d56Sopenharmony_ci
857db96d56Sopenharmony_ci    If you're only interested in where the sequences match,
867db96d56Sopenharmony_ci    .get_matching_blocks() is handy:
877db96d56Sopenharmony_ci
887db96d56Sopenharmony_ci    >>> for block in s.get_matching_blocks():
897db96d56Sopenharmony_ci    ...     print("a[%d] and b[%d] match for %d elements" % block)
907db96d56Sopenharmony_ci    a[0] and b[0] match for 8 elements
917db96d56Sopenharmony_ci    a[8] and b[17] match for 21 elements
927db96d56Sopenharmony_ci    a[29] and b[38] match for 0 elements
937db96d56Sopenharmony_ci
947db96d56Sopenharmony_ci    Note that the last tuple returned by .get_matching_blocks() is always a
957db96d56Sopenharmony_ci    dummy, (len(a), len(b), 0), and this is the only case in which the last
967db96d56Sopenharmony_ci    tuple element (number of elements matched) is 0.
977db96d56Sopenharmony_ci
987db96d56Sopenharmony_ci    If you want to know how to change the first sequence into the second,
997db96d56Sopenharmony_ci    use .get_opcodes():
1007db96d56Sopenharmony_ci
1017db96d56Sopenharmony_ci    >>> for opcode in s.get_opcodes():
1027db96d56Sopenharmony_ci    ...     print("%6s a[%d:%d] b[%d:%d]" % opcode)
1037db96d56Sopenharmony_ci     equal a[0:8] b[0:8]
1047db96d56Sopenharmony_ci    insert a[8:8] b[8:17]
1057db96d56Sopenharmony_ci     equal a[8:29] b[17:38]
1067db96d56Sopenharmony_ci
1077db96d56Sopenharmony_ci    See the Differ class for a fancy human-friendly file differencer, which
1087db96d56Sopenharmony_ci    uses SequenceMatcher both to compare sequences of lines, and to compare
1097db96d56Sopenharmony_ci    sequences of characters within similar (near-matching) lines.
1107db96d56Sopenharmony_ci
1117db96d56Sopenharmony_ci    See also function get_close_matches() in this module, which shows how
1127db96d56Sopenharmony_ci    simple code building on SequenceMatcher can be used to do useful work.
1137db96d56Sopenharmony_ci
1147db96d56Sopenharmony_ci    Timing:  Basic R-O is cubic time worst case and quadratic time expected
1157db96d56Sopenharmony_ci    case.  SequenceMatcher is quadratic time for the worst case and has
1167db96d56Sopenharmony_ci    expected-case behavior dependent in a complicated way on how many
1177db96d56Sopenharmony_ci    elements the sequences have in common; best case time is linear.
1187db96d56Sopenharmony_ci    """
1197db96d56Sopenharmony_ci
1207db96d56Sopenharmony_ci    def __init__(self, isjunk=None, a='', b='', autojunk=True):
1217db96d56Sopenharmony_ci        """Construct a SequenceMatcher.
1227db96d56Sopenharmony_ci
1237db96d56Sopenharmony_ci        Optional arg isjunk is None (the default), or a one-argument
1247db96d56Sopenharmony_ci        function that takes a sequence element and returns true iff the
1257db96d56Sopenharmony_ci        element is junk.  None is equivalent to passing "lambda x: 0", i.e.
1267db96d56Sopenharmony_ci        no elements are considered to be junk.  For example, pass
1277db96d56Sopenharmony_ci            lambda x: x in " \\t"
1287db96d56Sopenharmony_ci        if you're comparing lines as sequences of characters, and don't
1297db96d56Sopenharmony_ci        want to synch up on blanks or hard tabs.
1307db96d56Sopenharmony_ci
1317db96d56Sopenharmony_ci        Optional arg a is the first of two sequences to be compared.  By
1327db96d56Sopenharmony_ci        default, an empty string.  The elements of a must be hashable.  See
1337db96d56Sopenharmony_ci        also .set_seqs() and .set_seq1().
1347db96d56Sopenharmony_ci
1357db96d56Sopenharmony_ci        Optional arg b is the second of two sequences to be compared.  By
1367db96d56Sopenharmony_ci        default, an empty string.  The elements of b must be hashable. See
1377db96d56Sopenharmony_ci        also .set_seqs() and .set_seq2().
1387db96d56Sopenharmony_ci
1397db96d56Sopenharmony_ci        Optional arg autojunk should be set to False to disable the
1407db96d56Sopenharmony_ci        "automatic junk heuristic" that treats popular elements as junk
1417db96d56Sopenharmony_ci        (see module documentation for more information).
1427db96d56Sopenharmony_ci        """
1437db96d56Sopenharmony_ci
1447db96d56Sopenharmony_ci        # Members:
1457db96d56Sopenharmony_ci        # a
1467db96d56Sopenharmony_ci        #      first sequence
1477db96d56Sopenharmony_ci        # b
1487db96d56Sopenharmony_ci        #      second sequence; differences are computed as "what do
1497db96d56Sopenharmony_ci        #      we need to do to 'a' to change it into 'b'?"
1507db96d56Sopenharmony_ci        # b2j
1517db96d56Sopenharmony_ci        #      for x in b, b2j[x] is a list of the indices (into b)
1527db96d56Sopenharmony_ci        #      at which x appears; junk and popular elements do not appear
1537db96d56Sopenharmony_ci        # fullbcount
1547db96d56Sopenharmony_ci        #      for x in b, fullbcount[x] == the number of times x
1557db96d56Sopenharmony_ci        #      appears in b; only materialized if really needed (used
1567db96d56Sopenharmony_ci        #      only for computing quick_ratio())
1577db96d56Sopenharmony_ci        # matching_blocks
1587db96d56Sopenharmony_ci        #      a list of (i, j, k) triples, where a[i:i+k] == b[j:j+k];
1597db96d56Sopenharmony_ci        #      ascending & non-overlapping in i and in j; terminated by
1607db96d56Sopenharmony_ci        #      a dummy (len(a), len(b), 0) sentinel
1617db96d56Sopenharmony_ci        # opcodes
1627db96d56Sopenharmony_ci        #      a list of (tag, i1, i2, j1, j2) tuples, where tag is
1637db96d56Sopenharmony_ci        #      one of
1647db96d56Sopenharmony_ci        #          'replace'   a[i1:i2] should be replaced by b[j1:j2]
1657db96d56Sopenharmony_ci        #          'delete'    a[i1:i2] should be deleted
1667db96d56Sopenharmony_ci        #          'insert'    b[j1:j2] should be inserted
1677db96d56Sopenharmony_ci        #          'equal'     a[i1:i2] == b[j1:j2]
1687db96d56Sopenharmony_ci        # isjunk
1697db96d56Sopenharmony_ci        #      a user-supplied function taking a sequence element and
1707db96d56Sopenharmony_ci        #      returning true iff the element is "junk" -- this has
1717db96d56Sopenharmony_ci        #      subtle but helpful effects on the algorithm, which I'll
1727db96d56Sopenharmony_ci        #      get around to writing up someday <0.9 wink>.
1737db96d56Sopenharmony_ci        #      DON'T USE!  Only __chain_b uses this.  Use "in self.bjunk".
1747db96d56Sopenharmony_ci        # bjunk
1757db96d56Sopenharmony_ci        #      the items in b for which isjunk is True.
1767db96d56Sopenharmony_ci        # bpopular
1777db96d56Sopenharmony_ci        #      nonjunk items in b treated as junk by the heuristic (if used).
1787db96d56Sopenharmony_ci
1797db96d56Sopenharmony_ci        self.isjunk = isjunk
1807db96d56Sopenharmony_ci        self.a = self.b = None
1817db96d56Sopenharmony_ci        self.autojunk = autojunk
1827db96d56Sopenharmony_ci        self.set_seqs(a, b)
1837db96d56Sopenharmony_ci
1847db96d56Sopenharmony_ci    def set_seqs(self, a, b):
1857db96d56Sopenharmony_ci        """Set the two sequences to be compared.
1867db96d56Sopenharmony_ci
1877db96d56Sopenharmony_ci        >>> s = SequenceMatcher()
1887db96d56Sopenharmony_ci        >>> s.set_seqs("abcd", "bcde")
1897db96d56Sopenharmony_ci        >>> s.ratio()
1907db96d56Sopenharmony_ci        0.75
1917db96d56Sopenharmony_ci        """
1927db96d56Sopenharmony_ci
1937db96d56Sopenharmony_ci        self.set_seq1(a)
1947db96d56Sopenharmony_ci        self.set_seq2(b)
1957db96d56Sopenharmony_ci
1967db96d56Sopenharmony_ci    def set_seq1(self, a):
1977db96d56Sopenharmony_ci        """Set the first sequence to be compared.
1987db96d56Sopenharmony_ci
1997db96d56Sopenharmony_ci        The second sequence to be compared is not changed.
2007db96d56Sopenharmony_ci
2017db96d56Sopenharmony_ci        >>> s = SequenceMatcher(None, "abcd", "bcde")
2027db96d56Sopenharmony_ci        >>> s.ratio()
2037db96d56Sopenharmony_ci        0.75
2047db96d56Sopenharmony_ci        >>> s.set_seq1("bcde")
2057db96d56Sopenharmony_ci        >>> s.ratio()
2067db96d56Sopenharmony_ci        1.0
2077db96d56Sopenharmony_ci        >>>
2087db96d56Sopenharmony_ci
2097db96d56Sopenharmony_ci        SequenceMatcher computes and caches detailed information about the
2107db96d56Sopenharmony_ci        second sequence, so if you want to compare one sequence S against
2117db96d56Sopenharmony_ci        many sequences, use .set_seq2(S) once and call .set_seq1(x)
2127db96d56Sopenharmony_ci        repeatedly for each of the other sequences.
2137db96d56Sopenharmony_ci
2147db96d56Sopenharmony_ci        See also set_seqs() and set_seq2().
2157db96d56Sopenharmony_ci        """
2167db96d56Sopenharmony_ci
2177db96d56Sopenharmony_ci        if a is self.a:
2187db96d56Sopenharmony_ci            return
2197db96d56Sopenharmony_ci        self.a = a
2207db96d56Sopenharmony_ci        self.matching_blocks = self.opcodes = None
2217db96d56Sopenharmony_ci
2227db96d56Sopenharmony_ci    def set_seq2(self, b):
2237db96d56Sopenharmony_ci        """Set the second sequence to be compared.
2247db96d56Sopenharmony_ci
2257db96d56Sopenharmony_ci        The first sequence to be compared is not changed.
2267db96d56Sopenharmony_ci
2277db96d56Sopenharmony_ci        >>> s = SequenceMatcher(None, "abcd", "bcde")
2287db96d56Sopenharmony_ci        >>> s.ratio()
2297db96d56Sopenharmony_ci        0.75
2307db96d56Sopenharmony_ci        >>> s.set_seq2("abcd")
2317db96d56Sopenharmony_ci        >>> s.ratio()
2327db96d56Sopenharmony_ci        1.0
2337db96d56Sopenharmony_ci        >>>
2347db96d56Sopenharmony_ci
2357db96d56Sopenharmony_ci        SequenceMatcher computes and caches detailed information about the
2367db96d56Sopenharmony_ci        second sequence, so if you want to compare one sequence S against
2377db96d56Sopenharmony_ci        many sequences, use .set_seq2(S) once and call .set_seq1(x)
2387db96d56Sopenharmony_ci        repeatedly for each of the other sequences.
2397db96d56Sopenharmony_ci
2407db96d56Sopenharmony_ci        See also set_seqs() and set_seq1().
2417db96d56Sopenharmony_ci        """
2427db96d56Sopenharmony_ci
2437db96d56Sopenharmony_ci        if b is self.b:
2447db96d56Sopenharmony_ci            return
2457db96d56Sopenharmony_ci        self.b = b
2467db96d56Sopenharmony_ci        self.matching_blocks = self.opcodes = None
2477db96d56Sopenharmony_ci        self.fullbcount = None
2487db96d56Sopenharmony_ci        self.__chain_b()
2497db96d56Sopenharmony_ci
2507db96d56Sopenharmony_ci    # For each element x in b, set b2j[x] to a list of the indices in
2517db96d56Sopenharmony_ci    # b where x appears; the indices are in increasing order; note that
2527db96d56Sopenharmony_ci    # the number of times x appears in b is len(b2j[x]) ...
2537db96d56Sopenharmony_ci    # when self.isjunk is defined, junk elements don't show up in this
2547db96d56Sopenharmony_ci    # map at all, which stops the central find_longest_match method
2557db96d56Sopenharmony_ci    # from starting any matching block at a junk element ...
2567db96d56Sopenharmony_ci    # b2j also does not contain entries for "popular" elements, meaning
2577db96d56Sopenharmony_ci    # elements that account for more than 1 + 1% of the total elements, and
2587db96d56Sopenharmony_ci    # when the sequence is reasonably large (>= 200 elements); this can
2597db96d56Sopenharmony_ci    # be viewed as an adaptive notion of semi-junk, and yields an enormous
2607db96d56Sopenharmony_ci    # speedup when, e.g., comparing program files with hundreds of
2617db96d56Sopenharmony_ci    # instances of "return NULL;" ...
2627db96d56Sopenharmony_ci    # note that this is only called when b changes; so for cross-product
2637db96d56Sopenharmony_ci    # kinds of matches, it's best to call set_seq2 once, then set_seq1
2647db96d56Sopenharmony_ci    # repeatedly
2657db96d56Sopenharmony_ci
2667db96d56Sopenharmony_ci    def __chain_b(self):
2677db96d56Sopenharmony_ci        # Because isjunk is a user-defined (not C) function, and we test
2687db96d56Sopenharmony_ci        # for junk a LOT, it's important to minimize the number of calls.
2697db96d56Sopenharmony_ci        # Before the tricks described here, __chain_b was by far the most
2707db96d56Sopenharmony_ci        # time-consuming routine in the whole module!  If anyone sees
2717db96d56Sopenharmony_ci        # Jim Roskind, thank him again for profile.py -- I never would
2727db96d56Sopenharmony_ci        # have guessed that.
2737db96d56Sopenharmony_ci        # The first trick is to build b2j ignoring the possibility
2747db96d56Sopenharmony_ci        # of junk.  I.e., we don't call isjunk at all yet.  Throwing
2757db96d56Sopenharmony_ci        # out the junk later is much cheaper than building b2j "right"
2767db96d56Sopenharmony_ci        # from the start.
2777db96d56Sopenharmony_ci        b = self.b
2787db96d56Sopenharmony_ci        self.b2j = b2j = {}
2797db96d56Sopenharmony_ci
2807db96d56Sopenharmony_ci        for i, elt in enumerate(b):
2817db96d56Sopenharmony_ci            indices = b2j.setdefault(elt, [])
2827db96d56Sopenharmony_ci            indices.append(i)
2837db96d56Sopenharmony_ci
2847db96d56Sopenharmony_ci        # Purge junk elements
2857db96d56Sopenharmony_ci        self.bjunk = junk = set()
2867db96d56Sopenharmony_ci        isjunk = self.isjunk
2877db96d56Sopenharmony_ci        if isjunk:
2887db96d56Sopenharmony_ci            for elt in b2j.keys():
2897db96d56Sopenharmony_ci                if isjunk(elt):
2907db96d56Sopenharmony_ci                    junk.add(elt)
2917db96d56Sopenharmony_ci            for elt in junk: # separate loop avoids separate list of keys
2927db96d56Sopenharmony_ci                del b2j[elt]
2937db96d56Sopenharmony_ci
2947db96d56Sopenharmony_ci        # Purge popular elements that are not junk
2957db96d56Sopenharmony_ci        self.bpopular = popular = set()
2967db96d56Sopenharmony_ci        n = len(b)
2977db96d56Sopenharmony_ci        if self.autojunk and n >= 200:
2987db96d56Sopenharmony_ci            ntest = n // 100 + 1
2997db96d56Sopenharmony_ci            for elt, idxs in b2j.items():
3007db96d56Sopenharmony_ci                if len(idxs) > ntest:
3017db96d56Sopenharmony_ci                    popular.add(elt)
3027db96d56Sopenharmony_ci            for elt in popular: # ditto; as fast for 1% deletion
3037db96d56Sopenharmony_ci                del b2j[elt]
3047db96d56Sopenharmony_ci
3057db96d56Sopenharmony_ci    def find_longest_match(self, alo=0, ahi=None, blo=0, bhi=None):
3067db96d56Sopenharmony_ci        """Find longest matching block in a[alo:ahi] and b[blo:bhi].
3077db96d56Sopenharmony_ci
3087db96d56Sopenharmony_ci        By default it will find the longest match in the entirety of a and b.
3097db96d56Sopenharmony_ci
3107db96d56Sopenharmony_ci        If isjunk is not defined:
3117db96d56Sopenharmony_ci
3127db96d56Sopenharmony_ci        Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
3137db96d56Sopenharmony_ci            alo <= i <= i+k <= ahi
3147db96d56Sopenharmony_ci            blo <= j <= j+k <= bhi
3157db96d56Sopenharmony_ci        and for all (i',j',k') meeting those conditions,
3167db96d56Sopenharmony_ci            k >= k'
3177db96d56Sopenharmony_ci            i <= i'
3187db96d56Sopenharmony_ci            and if i == i', j <= j'
3197db96d56Sopenharmony_ci
3207db96d56Sopenharmony_ci        In other words, of all maximal matching blocks, return one that
3217db96d56Sopenharmony_ci        starts earliest in a, and of all those maximal matching blocks that
3227db96d56Sopenharmony_ci        start earliest in a, return the one that starts earliest in b.
3237db96d56Sopenharmony_ci
3247db96d56Sopenharmony_ci        >>> s = SequenceMatcher(None, " abcd", "abcd abcd")
3257db96d56Sopenharmony_ci        >>> s.find_longest_match(0, 5, 0, 9)
3267db96d56Sopenharmony_ci        Match(a=0, b=4, size=5)
3277db96d56Sopenharmony_ci
3287db96d56Sopenharmony_ci        If isjunk is defined, first the longest matching block is
3297db96d56Sopenharmony_ci        determined as above, but with the additional restriction that no
3307db96d56Sopenharmony_ci        junk element appears in the block.  Then that block is extended as
3317db96d56Sopenharmony_ci        far as possible by matching (only) junk elements on both sides.  So
3327db96d56Sopenharmony_ci        the resulting block never matches on junk except as identical junk
3337db96d56Sopenharmony_ci        happens to be adjacent to an "interesting" match.
3347db96d56Sopenharmony_ci
3357db96d56Sopenharmony_ci        Here's the same example as before, but considering blanks to be
3367db96d56Sopenharmony_ci        junk.  That prevents " abcd" from matching the " abcd" at the tail
3377db96d56Sopenharmony_ci        end of the second sequence directly.  Instead only the "abcd" can
3387db96d56Sopenharmony_ci        match, and matches the leftmost "abcd" in the second sequence:
3397db96d56Sopenharmony_ci
3407db96d56Sopenharmony_ci        >>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")
3417db96d56Sopenharmony_ci        >>> s.find_longest_match(0, 5, 0, 9)
3427db96d56Sopenharmony_ci        Match(a=1, b=0, size=4)
3437db96d56Sopenharmony_ci
3447db96d56Sopenharmony_ci        If no blocks match, return (alo, blo, 0).
3457db96d56Sopenharmony_ci
3467db96d56Sopenharmony_ci        >>> s = SequenceMatcher(None, "ab", "c")
3477db96d56Sopenharmony_ci        >>> s.find_longest_match(0, 2, 0, 1)
3487db96d56Sopenharmony_ci        Match(a=0, b=0, size=0)
3497db96d56Sopenharmony_ci        """
3507db96d56Sopenharmony_ci
3517db96d56Sopenharmony_ci        # CAUTION:  stripping common prefix or suffix would be incorrect.
3527db96d56Sopenharmony_ci        # E.g.,
3537db96d56Sopenharmony_ci        #    ab
3547db96d56Sopenharmony_ci        #    acab
3557db96d56Sopenharmony_ci        # Longest matching block is "ab", but if common prefix is
3567db96d56Sopenharmony_ci        # stripped, it's "a" (tied with "b").  UNIX(tm) diff does so
3577db96d56Sopenharmony_ci        # strip, so ends up claiming that ab is changed to acab by
3587db96d56Sopenharmony_ci        # inserting "ca" in the middle.  That's minimal but unintuitive:
3597db96d56Sopenharmony_ci        # "it's obvious" that someone inserted "ac" at the front.
3607db96d56Sopenharmony_ci        # Windiff ends up at the same place as diff, but by pairing up
3617db96d56Sopenharmony_ci        # the unique 'b's and then matching the first two 'a's.
3627db96d56Sopenharmony_ci
3637db96d56Sopenharmony_ci        a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.bjunk.__contains__
3647db96d56Sopenharmony_ci        if ahi is None:
3657db96d56Sopenharmony_ci            ahi = len(a)
3667db96d56Sopenharmony_ci        if bhi is None:
3677db96d56Sopenharmony_ci            bhi = len(b)
3687db96d56Sopenharmony_ci        besti, bestj, bestsize = alo, blo, 0
3697db96d56Sopenharmony_ci        # find longest junk-free match
3707db96d56Sopenharmony_ci        # during an iteration of the loop, j2len[j] = length of longest
3717db96d56Sopenharmony_ci        # junk-free match ending with a[i-1] and b[j]
3727db96d56Sopenharmony_ci        j2len = {}
3737db96d56Sopenharmony_ci        nothing = []
3747db96d56Sopenharmony_ci        for i in range(alo, ahi):
3757db96d56Sopenharmony_ci            # look at all instances of a[i] in b; note that because
3767db96d56Sopenharmony_ci            # b2j has no junk keys, the loop is skipped if a[i] is junk
3777db96d56Sopenharmony_ci            j2lenget = j2len.get
3787db96d56Sopenharmony_ci            newj2len = {}
3797db96d56Sopenharmony_ci            for j in b2j.get(a[i], nothing):
3807db96d56Sopenharmony_ci                # a[i] matches b[j]
3817db96d56Sopenharmony_ci                if j < blo:
3827db96d56Sopenharmony_ci                    continue
3837db96d56Sopenharmony_ci                if j >= bhi:
3847db96d56Sopenharmony_ci                    break
3857db96d56Sopenharmony_ci                k = newj2len[j] = j2lenget(j-1, 0) + 1
3867db96d56Sopenharmony_ci                if k > bestsize:
3877db96d56Sopenharmony_ci                    besti, bestj, bestsize = i-k+1, j-k+1, k
3887db96d56Sopenharmony_ci            j2len = newj2len
3897db96d56Sopenharmony_ci
3907db96d56Sopenharmony_ci        # Extend the best by non-junk elements on each end.  In particular,
3917db96d56Sopenharmony_ci        # "popular" non-junk elements aren't in b2j, which greatly speeds
3927db96d56Sopenharmony_ci        # the inner loop above, but also means "the best" match so far
3937db96d56Sopenharmony_ci        # doesn't contain any junk *or* popular non-junk elements.
3947db96d56Sopenharmony_ci        while besti > alo and bestj > blo and \
3957db96d56Sopenharmony_ci              not isbjunk(b[bestj-1]) and \
3967db96d56Sopenharmony_ci              a[besti-1] == b[bestj-1]:
3977db96d56Sopenharmony_ci            besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
3987db96d56Sopenharmony_ci        while besti+bestsize < ahi and bestj+bestsize < bhi and \
3997db96d56Sopenharmony_ci              not isbjunk(b[bestj+bestsize]) and \
4007db96d56Sopenharmony_ci              a[besti+bestsize] == b[bestj+bestsize]:
4017db96d56Sopenharmony_ci            bestsize += 1
4027db96d56Sopenharmony_ci
4037db96d56Sopenharmony_ci        # Now that we have a wholly interesting match (albeit possibly
4047db96d56Sopenharmony_ci        # empty!), we may as well suck up the matching junk on each
4057db96d56Sopenharmony_ci        # side of it too.  Can't think of a good reason not to, and it
4067db96d56Sopenharmony_ci        # saves post-processing the (possibly considerable) expense of
4077db96d56Sopenharmony_ci        # figuring out what to do with it.  In the case of an empty
4087db96d56Sopenharmony_ci        # interesting match, this is clearly the right thing to do,
4097db96d56Sopenharmony_ci        # because no other kind of match is possible in the regions.
4107db96d56Sopenharmony_ci        while besti > alo and bestj > blo and \
4117db96d56Sopenharmony_ci              isbjunk(b[bestj-1]) and \
4127db96d56Sopenharmony_ci              a[besti-1] == b[bestj-1]:
4137db96d56Sopenharmony_ci            besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
4147db96d56Sopenharmony_ci        while besti+bestsize < ahi and bestj+bestsize < bhi and \
4157db96d56Sopenharmony_ci              isbjunk(b[bestj+bestsize]) and \
4167db96d56Sopenharmony_ci              a[besti+bestsize] == b[bestj+bestsize]:
4177db96d56Sopenharmony_ci            bestsize = bestsize + 1
4187db96d56Sopenharmony_ci
4197db96d56Sopenharmony_ci        return Match(besti, bestj, bestsize)
4207db96d56Sopenharmony_ci
4217db96d56Sopenharmony_ci    def get_matching_blocks(self):
4227db96d56Sopenharmony_ci        """Return list of triples describing matching subsequences.
4237db96d56Sopenharmony_ci
4247db96d56Sopenharmony_ci        Each triple is of the form (i, j, n), and means that
4257db96d56Sopenharmony_ci        a[i:i+n] == b[j:j+n].  The triples are monotonically increasing in
4267db96d56Sopenharmony_ci        i and in j.  New in Python 2.5, it's also guaranteed that if
4277db96d56Sopenharmony_ci        (i, j, n) and (i', j', n') are adjacent triples in the list, and
4287db96d56Sopenharmony_ci        the second is not the last triple in the list, then i+n != i' or
4297db96d56Sopenharmony_ci        j+n != j'.  IOW, adjacent triples never describe adjacent equal
4307db96d56Sopenharmony_ci        blocks.
4317db96d56Sopenharmony_ci
4327db96d56Sopenharmony_ci        The last triple is a dummy, (len(a), len(b), 0), and is the only
4337db96d56Sopenharmony_ci        triple with n==0.
4347db96d56Sopenharmony_ci
4357db96d56Sopenharmony_ci        >>> s = SequenceMatcher(None, "abxcd", "abcd")
4367db96d56Sopenharmony_ci        >>> list(s.get_matching_blocks())
4377db96d56Sopenharmony_ci        [Match(a=0, b=0, size=2), Match(a=3, b=2, size=2), Match(a=5, b=4, size=0)]
4387db96d56Sopenharmony_ci        """
4397db96d56Sopenharmony_ci
4407db96d56Sopenharmony_ci        if self.matching_blocks is not None:
4417db96d56Sopenharmony_ci            return self.matching_blocks
4427db96d56Sopenharmony_ci        la, lb = len(self.a), len(self.b)
4437db96d56Sopenharmony_ci
4447db96d56Sopenharmony_ci        # This is most naturally expressed as a recursive algorithm, but
4457db96d56Sopenharmony_ci        # at least one user bumped into extreme use cases that exceeded
4467db96d56Sopenharmony_ci        # the recursion limit on their box.  So, now we maintain a list
4477db96d56Sopenharmony_ci        # ('queue`) of blocks we still need to look at, and append partial
4487db96d56Sopenharmony_ci        # results to `matching_blocks` in a loop; the matches are sorted
4497db96d56Sopenharmony_ci        # at the end.
4507db96d56Sopenharmony_ci        queue = [(0, la, 0, lb)]
4517db96d56Sopenharmony_ci        matching_blocks = []
4527db96d56Sopenharmony_ci        while queue:
4537db96d56Sopenharmony_ci            alo, ahi, blo, bhi = queue.pop()
4547db96d56Sopenharmony_ci            i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi)
4557db96d56Sopenharmony_ci            # a[alo:i] vs b[blo:j] unknown
4567db96d56Sopenharmony_ci            # a[i:i+k] same as b[j:j+k]
4577db96d56Sopenharmony_ci            # a[i+k:ahi] vs b[j+k:bhi] unknown
4587db96d56Sopenharmony_ci            if k:   # if k is 0, there was no matching block
4597db96d56Sopenharmony_ci                matching_blocks.append(x)
4607db96d56Sopenharmony_ci                if alo < i and blo < j:
4617db96d56Sopenharmony_ci                    queue.append((alo, i, blo, j))
4627db96d56Sopenharmony_ci                if i+k < ahi and j+k < bhi:
4637db96d56Sopenharmony_ci                    queue.append((i+k, ahi, j+k, bhi))
4647db96d56Sopenharmony_ci        matching_blocks.sort()
4657db96d56Sopenharmony_ci
4667db96d56Sopenharmony_ci        # It's possible that we have adjacent equal blocks in the
4677db96d56Sopenharmony_ci        # matching_blocks list now.  Starting with 2.5, this code was added
4687db96d56Sopenharmony_ci        # to collapse them.
4697db96d56Sopenharmony_ci        i1 = j1 = k1 = 0
4707db96d56Sopenharmony_ci        non_adjacent = []
4717db96d56Sopenharmony_ci        for i2, j2, k2 in matching_blocks:
4727db96d56Sopenharmony_ci            # Is this block adjacent to i1, j1, k1?
4737db96d56Sopenharmony_ci            if i1 + k1 == i2 and j1 + k1 == j2:
4747db96d56Sopenharmony_ci                # Yes, so collapse them -- this just increases the length of
4757db96d56Sopenharmony_ci                # the first block by the length of the second, and the first
4767db96d56Sopenharmony_ci                # block so lengthened remains the block to compare against.
4777db96d56Sopenharmony_ci                k1 += k2
4787db96d56Sopenharmony_ci            else:
4797db96d56Sopenharmony_ci                # Not adjacent.  Remember the first block (k1==0 means it's
4807db96d56Sopenharmony_ci                # the dummy we started with), and make the second block the
4817db96d56Sopenharmony_ci                # new block to compare against.
4827db96d56Sopenharmony_ci                if k1:
4837db96d56Sopenharmony_ci                    non_adjacent.append((i1, j1, k1))
4847db96d56Sopenharmony_ci                i1, j1, k1 = i2, j2, k2
4857db96d56Sopenharmony_ci        if k1:
4867db96d56Sopenharmony_ci            non_adjacent.append((i1, j1, k1))
4877db96d56Sopenharmony_ci
4887db96d56Sopenharmony_ci        non_adjacent.append( (la, lb, 0) )
4897db96d56Sopenharmony_ci        self.matching_blocks = list(map(Match._make, non_adjacent))
4907db96d56Sopenharmony_ci        return self.matching_blocks
4917db96d56Sopenharmony_ci
4927db96d56Sopenharmony_ci    def get_opcodes(self):
4937db96d56Sopenharmony_ci        """Return list of 5-tuples describing how to turn a into b.
4947db96d56Sopenharmony_ci
4957db96d56Sopenharmony_ci        Each tuple is of the form (tag, i1, i2, j1, j2).  The first tuple
4967db96d56Sopenharmony_ci        has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the
4977db96d56Sopenharmony_ci        tuple preceding it, and likewise for j1 == the previous j2.
4987db96d56Sopenharmony_ci
4997db96d56Sopenharmony_ci        The tags are strings, with these meanings:
5007db96d56Sopenharmony_ci
5017db96d56Sopenharmony_ci        'replace':  a[i1:i2] should be replaced by b[j1:j2]
5027db96d56Sopenharmony_ci        'delete':   a[i1:i2] should be deleted.
5037db96d56Sopenharmony_ci                    Note that j1==j2 in this case.
5047db96d56Sopenharmony_ci        'insert':   b[j1:j2] should be inserted at a[i1:i1].
5057db96d56Sopenharmony_ci                    Note that i1==i2 in this case.
5067db96d56Sopenharmony_ci        'equal':    a[i1:i2] == b[j1:j2]
5077db96d56Sopenharmony_ci
5087db96d56Sopenharmony_ci        >>> a = "qabxcd"
5097db96d56Sopenharmony_ci        >>> b = "abycdf"
5107db96d56Sopenharmony_ci        >>> s = SequenceMatcher(None, a, b)
5117db96d56Sopenharmony_ci        >>> for tag, i1, i2, j1, j2 in s.get_opcodes():
5127db96d56Sopenharmony_ci        ...    print(("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
5137db96d56Sopenharmony_ci        ...           (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])))
5147db96d56Sopenharmony_ci         delete a[0:1] (q) b[0:0] ()
5157db96d56Sopenharmony_ci          equal a[1:3] (ab) b[0:2] (ab)
5167db96d56Sopenharmony_ci        replace a[3:4] (x) b[2:3] (y)
5177db96d56Sopenharmony_ci          equal a[4:6] (cd) b[3:5] (cd)
5187db96d56Sopenharmony_ci         insert a[6:6] () b[5:6] (f)
5197db96d56Sopenharmony_ci        """
5207db96d56Sopenharmony_ci
5217db96d56Sopenharmony_ci        if self.opcodes is not None:
5227db96d56Sopenharmony_ci            return self.opcodes
5237db96d56Sopenharmony_ci        i = j = 0
5247db96d56Sopenharmony_ci        self.opcodes = answer = []
5257db96d56Sopenharmony_ci        for ai, bj, size in self.get_matching_blocks():
5267db96d56Sopenharmony_ci            # invariant:  we've pumped out correct diffs to change
5277db96d56Sopenharmony_ci            # a[:i] into b[:j], and the next matching block is
5287db96d56Sopenharmony_ci            # a[ai:ai+size] == b[bj:bj+size].  So we need to pump
5297db96d56Sopenharmony_ci            # out a diff to change a[i:ai] into b[j:bj], pump out
5307db96d56Sopenharmony_ci            # the matching block, and move (i,j) beyond the match
5317db96d56Sopenharmony_ci            tag = ''
5327db96d56Sopenharmony_ci            if i < ai and j < bj:
5337db96d56Sopenharmony_ci                tag = 'replace'
5347db96d56Sopenharmony_ci            elif i < ai:
5357db96d56Sopenharmony_ci                tag = 'delete'
5367db96d56Sopenharmony_ci            elif j < bj:
5377db96d56Sopenharmony_ci                tag = 'insert'
5387db96d56Sopenharmony_ci            if tag:
5397db96d56Sopenharmony_ci                answer.append( (tag, i, ai, j, bj) )
5407db96d56Sopenharmony_ci            i, j = ai+size, bj+size
5417db96d56Sopenharmony_ci            # the list of matching blocks is terminated by a
5427db96d56Sopenharmony_ci            # sentinel with size 0
5437db96d56Sopenharmony_ci            if size:
5447db96d56Sopenharmony_ci                answer.append( ('equal', ai, i, bj, j) )
5457db96d56Sopenharmony_ci        return answer
5467db96d56Sopenharmony_ci
5477db96d56Sopenharmony_ci    def get_grouped_opcodes(self, n=3):
5487db96d56Sopenharmony_ci        """ Isolate change clusters by eliminating ranges with no changes.
5497db96d56Sopenharmony_ci
5507db96d56Sopenharmony_ci        Return a generator of groups with up to n lines of context.
5517db96d56Sopenharmony_ci        Each group is in the same format as returned by get_opcodes().
5527db96d56Sopenharmony_ci
5537db96d56Sopenharmony_ci        >>> from pprint import pprint
5547db96d56Sopenharmony_ci        >>> a = list(map(str, range(1,40)))
5557db96d56Sopenharmony_ci        >>> b = a[:]
5567db96d56Sopenharmony_ci        >>> b[8:8] = ['i']     # Make an insertion
5577db96d56Sopenharmony_ci        >>> b[20] += 'x'       # Make a replacement
5587db96d56Sopenharmony_ci        >>> b[23:28] = []      # Make a deletion
5597db96d56Sopenharmony_ci        >>> b[30] += 'y'       # Make another replacement
5607db96d56Sopenharmony_ci        >>> pprint(list(SequenceMatcher(None,a,b).get_grouped_opcodes()))
5617db96d56Sopenharmony_ci        [[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],
5627db96d56Sopenharmony_ci         [('equal', 16, 19, 17, 20),
5637db96d56Sopenharmony_ci          ('replace', 19, 20, 20, 21),
5647db96d56Sopenharmony_ci          ('equal', 20, 22, 21, 23),
5657db96d56Sopenharmony_ci          ('delete', 22, 27, 23, 23),
5667db96d56Sopenharmony_ci          ('equal', 27, 30, 23, 26)],
5677db96d56Sopenharmony_ci         [('equal', 31, 34, 27, 30),
5687db96d56Sopenharmony_ci          ('replace', 34, 35, 30, 31),
5697db96d56Sopenharmony_ci          ('equal', 35, 38, 31, 34)]]
5707db96d56Sopenharmony_ci        """
5717db96d56Sopenharmony_ci
5727db96d56Sopenharmony_ci        codes = self.get_opcodes()
5737db96d56Sopenharmony_ci        if not codes:
5747db96d56Sopenharmony_ci            codes = [("equal", 0, 1, 0, 1)]
5757db96d56Sopenharmony_ci        # Fixup leading and trailing groups if they show no changes.
5767db96d56Sopenharmony_ci        if codes[0][0] == 'equal':
5777db96d56Sopenharmony_ci            tag, i1, i2, j1, j2 = codes[0]
5787db96d56Sopenharmony_ci            codes[0] = tag, max(i1, i2-n), i2, max(j1, j2-n), j2
5797db96d56Sopenharmony_ci        if codes[-1][0] == 'equal':
5807db96d56Sopenharmony_ci            tag, i1, i2, j1, j2 = codes[-1]
5817db96d56Sopenharmony_ci            codes[-1] = tag, i1, min(i2, i1+n), j1, min(j2, j1+n)
5827db96d56Sopenharmony_ci
5837db96d56Sopenharmony_ci        nn = n + n
5847db96d56Sopenharmony_ci        group = []
5857db96d56Sopenharmony_ci        for tag, i1, i2, j1, j2 in codes:
5867db96d56Sopenharmony_ci            # End the current group and start a new one whenever
5877db96d56Sopenharmony_ci            # there is a large range with no changes.
5887db96d56Sopenharmony_ci            if tag == 'equal' and i2-i1 > nn:
5897db96d56Sopenharmony_ci                group.append((tag, i1, min(i2, i1+n), j1, min(j2, j1+n)))
5907db96d56Sopenharmony_ci                yield group
5917db96d56Sopenharmony_ci                group = []
5927db96d56Sopenharmony_ci                i1, j1 = max(i1, i2-n), max(j1, j2-n)
5937db96d56Sopenharmony_ci            group.append((tag, i1, i2, j1 ,j2))
5947db96d56Sopenharmony_ci        if group and not (len(group)==1 and group[0][0] == 'equal'):
5957db96d56Sopenharmony_ci            yield group
5967db96d56Sopenharmony_ci
5977db96d56Sopenharmony_ci    def ratio(self):
5987db96d56Sopenharmony_ci        """Return a measure of the sequences' similarity (float in [0,1]).
5997db96d56Sopenharmony_ci
6007db96d56Sopenharmony_ci        Where T is the total number of elements in both sequences, and
6017db96d56Sopenharmony_ci        M is the number of matches, this is 2.0*M / T.
6027db96d56Sopenharmony_ci        Note that this is 1 if the sequences are identical, and 0 if
6037db96d56Sopenharmony_ci        they have nothing in common.
6047db96d56Sopenharmony_ci
6057db96d56Sopenharmony_ci        .ratio() is expensive to compute if you haven't already computed
6067db96d56Sopenharmony_ci        .get_matching_blocks() or .get_opcodes(), in which case you may
6077db96d56Sopenharmony_ci        want to try .quick_ratio() or .real_quick_ratio() first to get an
6087db96d56Sopenharmony_ci        upper bound.
6097db96d56Sopenharmony_ci
6107db96d56Sopenharmony_ci        >>> s = SequenceMatcher(None, "abcd", "bcde")
6117db96d56Sopenharmony_ci        >>> s.ratio()
6127db96d56Sopenharmony_ci        0.75
6137db96d56Sopenharmony_ci        >>> s.quick_ratio()
6147db96d56Sopenharmony_ci        0.75
6157db96d56Sopenharmony_ci        >>> s.real_quick_ratio()
6167db96d56Sopenharmony_ci        1.0
6177db96d56Sopenharmony_ci        """
6187db96d56Sopenharmony_ci
6197db96d56Sopenharmony_ci        matches = sum(triple[-1] for triple in self.get_matching_blocks())
6207db96d56Sopenharmony_ci        return _calculate_ratio(matches, len(self.a) + len(self.b))
6217db96d56Sopenharmony_ci
6227db96d56Sopenharmony_ci    def quick_ratio(self):
6237db96d56Sopenharmony_ci        """Return an upper bound on ratio() relatively quickly.
6247db96d56Sopenharmony_ci
6257db96d56Sopenharmony_ci        This isn't defined beyond that it is an upper bound on .ratio(), and
6267db96d56Sopenharmony_ci        is faster to compute.
6277db96d56Sopenharmony_ci        """
6287db96d56Sopenharmony_ci
6297db96d56Sopenharmony_ci        # viewing a and b as multisets, set matches to the cardinality
6307db96d56Sopenharmony_ci        # of their intersection; this counts the number of matches
6317db96d56Sopenharmony_ci        # without regard to order, so is clearly an upper bound
6327db96d56Sopenharmony_ci        if self.fullbcount is None:
6337db96d56Sopenharmony_ci            self.fullbcount = fullbcount = {}
6347db96d56Sopenharmony_ci            for elt in self.b:
6357db96d56Sopenharmony_ci                fullbcount[elt] = fullbcount.get(elt, 0) + 1
6367db96d56Sopenharmony_ci        fullbcount = self.fullbcount
6377db96d56Sopenharmony_ci        # avail[x] is the number of times x appears in 'b' less the
6387db96d56Sopenharmony_ci        # number of times we've seen it in 'a' so far ... kinda
6397db96d56Sopenharmony_ci        avail = {}
6407db96d56Sopenharmony_ci        availhas, matches = avail.__contains__, 0
6417db96d56Sopenharmony_ci        for elt in self.a:
6427db96d56Sopenharmony_ci            if availhas(elt):
6437db96d56Sopenharmony_ci                numb = avail[elt]
6447db96d56Sopenharmony_ci            else:
6457db96d56Sopenharmony_ci                numb = fullbcount.get(elt, 0)
6467db96d56Sopenharmony_ci            avail[elt] = numb - 1
6477db96d56Sopenharmony_ci            if numb > 0:
6487db96d56Sopenharmony_ci                matches = matches + 1
6497db96d56Sopenharmony_ci        return _calculate_ratio(matches, len(self.a) + len(self.b))
6507db96d56Sopenharmony_ci
6517db96d56Sopenharmony_ci    def real_quick_ratio(self):
6527db96d56Sopenharmony_ci        """Return an upper bound on ratio() very quickly.
6537db96d56Sopenharmony_ci
6547db96d56Sopenharmony_ci        This isn't defined beyond that it is an upper bound on .ratio(), and
6557db96d56Sopenharmony_ci        is faster to compute than either .ratio() or .quick_ratio().
6567db96d56Sopenharmony_ci        """
6577db96d56Sopenharmony_ci
6587db96d56Sopenharmony_ci        la, lb = len(self.a), len(self.b)
6597db96d56Sopenharmony_ci        # can't have more matches than the number of elements in the
6607db96d56Sopenharmony_ci        # shorter sequence
6617db96d56Sopenharmony_ci        return _calculate_ratio(min(la, lb), la + lb)
6627db96d56Sopenharmony_ci
6637db96d56Sopenharmony_ci    __class_getitem__ = classmethod(GenericAlias)
6647db96d56Sopenharmony_ci
6657db96d56Sopenharmony_ci
6667db96d56Sopenharmony_cidef get_close_matches(word, possibilities, n=3, cutoff=0.6):
6677db96d56Sopenharmony_ci    """Use SequenceMatcher to return list of the best "good enough" matches.
6687db96d56Sopenharmony_ci
6697db96d56Sopenharmony_ci    word is a sequence for which close matches are desired (typically a
6707db96d56Sopenharmony_ci    string).
6717db96d56Sopenharmony_ci
6727db96d56Sopenharmony_ci    possibilities is a list of sequences against which to match word
6737db96d56Sopenharmony_ci    (typically a list of strings).
6747db96d56Sopenharmony_ci
6757db96d56Sopenharmony_ci    Optional arg n (default 3) is the maximum number of close matches to
6767db96d56Sopenharmony_ci    return.  n must be > 0.
6777db96d56Sopenharmony_ci
6787db96d56Sopenharmony_ci    Optional arg cutoff (default 0.6) is a float in [0, 1].  Possibilities
6797db96d56Sopenharmony_ci    that don't score at least that similar to word are ignored.
6807db96d56Sopenharmony_ci
6817db96d56Sopenharmony_ci    The best (no more than n) matches among the possibilities are returned
6827db96d56Sopenharmony_ci    in a list, sorted by similarity score, most similar first.
6837db96d56Sopenharmony_ci
6847db96d56Sopenharmony_ci    >>> get_close_matches("appel", ["ape", "apple", "peach", "puppy"])
6857db96d56Sopenharmony_ci    ['apple', 'ape']
6867db96d56Sopenharmony_ci    >>> import keyword as _keyword
6877db96d56Sopenharmony_ci    >>> get_close_matches("wheel", _keyword.kwlist)
6887db96d56Sopenharmony_ci    ['while']
6897db96d56Sopenharmony_ci    >>> get_close_matches("Apple", _keyword.kwlist)
6907db96d56Sopenharmony_ci    []
6917db96d56Sopenharmony_ci    >>> get_close_matches("accept", _keyword.kwlist)
6927db96d56Sopenharmony_ci    ['except']
6937db96d56Sopenharmony_ci    """
6947db96d56Sopenharmony_ci
6957db96d56Sopenharmony_ci    if not n >  0:
6967db96d56Sopenharmony_ci        raise ValueError("n must be > 0: %r" % (n,))
6977db96d56Sopenharmony_ci    if not 0.0 <= cutoff <= 1.0:
6987db96d56Sopenharmony_ci        raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,))
6997db96d56Sopenharmony_ci    result = []
7007db96d56Sopenharmony_ci    s = SequenceMatcher()
7017db96d56Sopenharmony_ci    s.set_seq2(word)
7027db96d56Sopenharmony_ci    for x in possibilities:
7037db96d56Sopenharmony_ci        s.set_seq1(x)
7047db96d56Sopenharmony_ci        if s.real_quick_ratio() >= cutoff and \
7057db96d56Sopenharmony_ci           s.quick_ratio() >= cutoff and \
7067db96d56Sopenharmony_ci           s.ratio() >= cutoff:
7077db96d56Sopenharmony_ci            result.append((s.ratio(), x))
7087db96d56Sopenharmony_ci
7097db96d56Sopenharmony_ci    # Move the best scorers to head of list
7107db96d56Sopenharmony_ci    result = _nlargest(n, result)
7117db96d56Sopenharmony_ci    # Strip scores for the best n matches
7127db96d56Sopenharmony_ci    return [x for score, x in result]
7137db96d56Sopenharmony_ci
7147db96d56Sopenharmony_ci
7157db96d56Sopenharmony_cidef _keep_original_ws(s, tag_s):
7167db96d56Sopenharmony_ci    """Replace whitespace with the original whitespace characters in `s`"""
7177db96d56Sopenharmony_ci    return ''.join(
7187db96d56Sopenharmony_ci        c if tag_c == " " and c.isspace() else tag_c
7197db96d56Sopenharmony_ci        for c, tag_c in zip(s, tag_s)
7207db96d56Sopenharmony_ci    )
7217db96d56Sopenharmony_ci
7227db96d56Sopenharmony_ci
7237db96d56Sopenharmony_ci
7247db96d56Sopenharmony_ciclass Differ:
7257db96d56Sopenharmony_ci    r"""
7267db96d56Sopenharmony_ci    Differ is a class for comparing sequences of lines of text, and
7277db96d56Sopenharmony_ci    producing human-readable differences or deltas.  Differ uses
7287db96d56Sopenharmony_ci    SequenceMatcher both to compare sequences of lines, and to compare
7297db96d56Sopenharmony_ci    sequences of characters within similar (near-matching) lines.
7307db96d56Sopenharmony_ci
7317db96d56Sopenharmony_ci    Each line of a Differ delta begins with a two-letter code:
7327db96d56Sopenharmony_ci
7337db96d56Sopenharmony_ci        '- '    line unique to sequence 1
7347db96d56Sopenharmony_ci        '+ '    line unique to sequence 2
7357db96d56Sopenharmony_ci        '  '    line common to both sequences
7367db96d56Sopenharmony_ci        '? '    line not present in either input sequence
7377db96d56Sopenharmony_ci
7387db96d56Sopenharmony_ci    Lines beginning with '? ' attempt to guide the eye to intraline
7397db96d56Sopenharmony_ci    differences, and were not present in either input sequence.  These lines
7407db96d56Sopenharmony_ci    can be confusing if the sequences contain tab characters.
7417db96d56Sopenharmony_ci
7427db96d56Sopenharmony_ci    Note that Differ makes no claim to produce a *minimal* diff.  To the
7437db96d56Sopenharmony_ci    contrary, minimal diffs are often counter-intuitive, because they synch
7447db96d56Sopenharmony_ci    up anywhere possible, sometimes accidental matches 100 pages apart.
7457db96d56Sopenharmony_ci    Restricting synch points to contiguous matches preserves some notion of
7467db96d56Sopenharmony_ci    locality, at the occasional cost of producing a longer diff.
7477db96d56Sopenharmony_ci
7487db96d56Sopenharmony_ci    Example: Comparing two texts.
7497db96d56Sopenharmony_ci
7507db96d56Sopenharmony_ci    First we set up the texts, sequences of individual single-line strings
7517db96d56Sopenharmony_ci    ending with newlines (such sequences can also be obtained from the
7527db96d56Sopenharmony_ci    `readlines()` method of file-like objects):
7537db96d56Sopenharmony_ci
7547db96d56Sopenharmony_ci    >>> text1 = '''  1. Beautiful is better than ugly.
7557db96d56Sopenharmony_ci    ...   2. Explicit is better than implicit.
7567db96d56Sopenharmony_ci    ...   3. Simple is better than complex.
7577db96d56Sopenharmony_ci    ...   4. Complex is better than complicated.
7587db96d56Sopenharmony_ci    ... '''.splitlines(keepends=True)
7597db96d56Sopenharmony_ci    >>> len(text1)
7607db96d56Sopenharmony_ci    4
7617db96d56Sopenharmony_ci    >>> text1[0][-1]
7627db96d56Sopenharmony_ci    '\n'
7637db96d56Sopenharmony_ci    >>> text2 = '''  1. Beautiful is better than ugly.
7647db96d56Sopenharmony_ci    ...   3.   Simple is better than complex.
7657db96d56Sopenharmony_ci    ...   4. Complicated is better than complex.
7667db96d56Sopenharmony_ci    ...   5. Flat is better than nested.
7677db96d56Sopenharmony_ci    ... '''.splitlines(keepends=True)
7687db96d56Sopenharmony_ci
7697db96d56Sopenharmony_ci    Next we instantiate a Differ object:
7707db96d56Sopenharmony_ci
7717db96d56Sopenharmony_ci    >>> d = Differ()
7727db96d56Sopenharmony_ci
7737db96d56Sopenharmony_ci    Note that when instantiating a Differ object we may pass functions to
7747db96d56Sopenharmony_ci    filter out line and character 'junk'.  See Differ.__init__ for details.
7757db96d56Sopenharmony_ci
7767db96d56Sopenharmony_ci    Finally, we compare the two:
7777db96d56Sopenharmony_ci
7787db96d56Sopenharmony_ci    >>> result = list(d.compare(text1, text2))
7797db96d56Sopenharmony_ci
7807db96d56Sopenharmony_ci    'result' is a list of strings, so let's pretty-print it:
7817db96d56Sopenharmony_ci
7827db96d56Sopenharmony_ci    >>> from pprint import pprint as _pprint
7837db96d56Sopenharmony_ci    >>> _pprint(result)
7847db96d56Sopenharmony_ci    ['    1. Beautiful is better than ugly.\n',
7857db96d56Sopenharmony_ci     '-   2. Explicit is better than implicit.\n',
7867db96d56Sopenharmony_ci     '-   3. Simple is better than complex.\n',
7877db96d56Sopenharmony_ci     '+   3.   Simple is better than complex.\n',
7887db96d56Sopenharmony_ci     '?     ++\n',
7897db96d56Sopenharmony_ci     '-   4. Complex is better than complicated.\n',
7907db96d56Sopenharmony_ci     '?            ^                     ---- ^\n',
7917db96d56Sopenharmony_ci     '+   4. Complicated is better than complex.\n',
7927db96d56Sopenharmony_ci     '?           ++++ ^                      ^\n',
7937db96d56Sopenharmony_ci     '+   5. Flat is better than nested.\n']
7947db96d56Sopenharmony_ci
7957db96d56Sopenharmony_ci    As a single multi-line string it looks like this:
7967db96d56Sopenharmony_ci
7977db96d56Sopenharmony_ci    >>> print(''.join(result), end="")
7987db96d56Sopenharmony_ci        1. Beautiful is better than ugly.
7997db96d56Sopenharmony_ci    -   2. Explicit is better than implicit.
8007db96d56Sopenharmony_ci    -   3. Simple is better than complex.
8017db96d56Sopenharmony_ci    +   3.   Simple is better than complex.
8027db96d56Sopenharmony_ci    ?     ++
8037db96d56Sopenharmony_ci    -   4. Complex is better than complicated.
8047db96d56Sopenharmony_ci    ?            ^                     ---- ^
8057db96d56Sopenharmony_ci    +   4. Complicated is better than complex.
8067db96d56Sopenharmony_ci    ?           ++++ ^                      ^
8077db96d56Sopenharmony_ci    +   5. Flat is better than nested.
8087db96d56Sopenharmony_ci    """
8097db96d56Sopenharmony_ci
8107db96d56Sopenharmony_ci    def __init__(self, linejunk=None, charjunk=None):
8117db96d56Sopenharmony_ci        """
8127db96d56Sopenharmony_ci        Construct a text differencer, with optional filters.
8137db96d56Sopenharmony_ci
8147db96d56Sopenharmony_ci        The two optional keyword parameters are for filter functions:
8157db96d56Sopenharmony_ci
8167db96d56Sopenharmony_ci        - `linejunk`: A function that should accept a single string argument,
8177db96d56Sopenharmony_ci          and return true iff the string is junk. The module-level function
8187db96d56Sopenharmony_ci          `IS_LINE_JUNK` may be used to filter out lines without visible
8197db96d56Sopenharmony_ci          characters, except for at most one splat ('#').  It is recommended
8207db96d56Sopenharmony_ci          to leave linejunk None; the underlying SequenceMatcher class has
8217db96d56Sopenharmony_ci          an adaptive notion of "noise" lines that's better than any static
8227db96d56Sopenharmony_ci          definition the author has ever been able to craft.
8237db96d56Sopenharmony_ci
8247db96d56Sopenharmony_ci        - `charjunk`: A function that should accept a string of length 1. The
8257db96d56Sopenharmony_ci          module-level function `IS_CHARACTER_JUNK` may be used to filter out
8267db96d56Sopenharmony_ci          whitespace characters (a blank or tab; **note**: bad idea to include
8277db96d56Sopenharmony_ci          newline in this!).  Use of IS_CHARACTER_JUNK is recommended.
8287db96d56Sopenharmony_ci        """
8297db96d56Sopenharmony_ci
8307db96d56Sopenharmony_ci        self.linejunk = linejunk
8317db96d56Sopenharmony_ci        self.charjunk = charjunk
8327db96d56Sopenharmony_ci
8337db96d56Sopenharmony_ci    def compare(self, a, b):
8347db96d56Sopenharmony_ci        r"""
8357db96d56Sopenharmony_ci        Compare two sequences of lines; generate the resulting delta.
8367db96d56Sopenharmony_ci
8377db96d56Sopenharmony_ci        Each sequence must contain individual single-line strings ending with
8387db96d56Sopenharmony_ci        newlines. Such sequences can be obtained from the `readlines()` method
8397db96d56Sopenharmony_ci        of file-like objects.  The delta generated also consists of newline-
8407db96d56Sopenharmony_ci        terminated strings, ready to be printed as-is via the writelines()
8417db96d56Sopenharmony_ci        method of a file-like object.
8427db96d56Sopenharmony_ci
8437db96d56Sopenharmony_ci        Example:
8447db96d56Sopenharmony_ci
8457db96d56Sopenharmony_ci        >>> print(''.join(Differ().compare('one\ntwo\nthree\n'.splitlines(True),
8467db96d56Sopenharmony_ci        ...                                'ore\ntree\nemu\n'.splitlines(True))),
8477db96d56Sopenharmony_ci        ...       end="")
8487db96d56Sopenharmony_ci        - one
8497db96d56Sopenharmony_ci        ?  ^
8507db96d56Sopenharmony_ci        + ore
8517db96d56Sopenharmony_ci        ?  ^
8527db96d56Sopenharmony_ci        - two
8537db96d56Sopenharmony_ci        - three
8547db96d56Sopenharmony_ci        ?  -
8557db96d56Sopenharmony_ci        + tree
8567db96d56Sopenharmony_ci        + emu
8577db96d56Sopenharmony_ci        """
8587db96d56Sopenharmony_ci
8597db96d56Sopenharmony_ci        cruncher = SequenceMatcher(self.linejunk, a, b)
8607db96d56Sopenharmony_ci        for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
8617db96d56Sopenharmony_ci            if tag == 'replace':
8627db96d56Sopenharmony_ci                g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
8637db96d56Sopenharmony_ci            elif tag == 'delete':
8647db96d56Sopenharmony_ci                g = self._dump('-', a, alo, ahi)
8657db96d56Sopenharmony_ci            elif tag == 'insert':
8667db96d56Sopenharmony_ci                g = self._dump('+', b, blo, bhi)
8677db96d56Sopenharmony_ci            elif tag == 'equal':
8687db96d56Sopenharmony_ci                g = self._dump(' ', a, alo, ahi)
8697db96d56Sopenharmony_ci            else:
8707db96d56Sopenharmony_ci                raise ValueError('unknown tag %r' % (tag,))
8717db96d56Sopenharmony_ci
8727db96d56Sopenharmony_ci            yield from g
8737db96d56Sopenharmony_ci
8747db96d56Sopenharmony_ci    def _dump(self, tag, x, lo, hi):
8757db96d56Sopenharmony_ci        """Generate comparison results for a same-tagged range."""
8767db96d56Sopenharmony_ci        for i in range(lo, hi):
8777db96d56Sopenharmony_ci            yield '%s %s' % (tag, x[i])
8787db96d56Sopenharmony_ci
8797db96d56Sopenharmony_ci    def _plain_replace(self, a, alo, ahi, b, blo, bhi):
8807db96d56Sopenharmony_ci        assert alo < ahi and blo < bhi
8817db96d56Sopenharmony_ci        # dump the shorter block first -- reduces the burden on short-term
8827db96d56Sopenharmony_ci        # memory if the blocks are of very different sizes
8837db96d56Sopenharmony_ci        if bhi - blo < ahi - alo:
8847db96d56Sopenharmony_ci            first  = self._dump('+', b, blo, bhi)
8857db96d56Sopenharmony_ci            second = self._dump('-', a, alo, ahi)
8867db96d56Sopenharmony_ci        else:
8877db96d56Sopenharmony_ci            first  = self._dump('-', a, alo, ahi)
8887db96d56Sopenharmony_ci            second = self._dump('+', b, blo, bhi)
8897db96d56Sopenharmony_ci
8907db96d56Sopenharmony_ci        for g in first, second:
8917db96d56Sopenharmony_ci            yield from g
8927db96d56Sopenharmony_ci
8937db96d56Sopenharmony_ci    def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
8947db96d56Sopenharmony_ci        r"""
8957db96d56Sopenharmony_ci        When replacing one block of lines with another, search the blocks
8967db96d56Sopenharmony_ci        for *similar* lines; the best-matching pair (if any) is used as a
8977db96d56Sopenharmony_ci        synch point, and intraline difference marking is done on the
8987db96d56Sopenharmony_ci        similar pair. Lots of work, but often worth it.
8997db96d56Sopenharmony_ci
9007db96d56Sopenharmony_ci        Example:
9017db96d56Sopenharmony_ci
9027db96d56Sopenharmony_ci        >>> d = Differ()
9037db96d56Sopenharmony_ci        >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
9047db96d56Sopenharmony_ci        ...                            ['abcdefGhijkl\n'], 0, 1)
9057db96d56Sopenharmony_ci        >>> print(''.join(results), end="")
9067db96d56Sopenharmony_ci        - abcDefghiJkl
9077db96d56Sopenharmony_ci        ?    ^  ^  ^
9087db96d56Sopenharmony_ci        + abcdefGhijkl
9097db96d56Sopenharmony_ci        ?    ^  ^  ^
9107db96d56Sopenharmony_ci        """
9117db96d56Sopenharmony_ci
9127db96d56Sopenharmony_ci        # don't synch up unless the lines have a similarity score of at
9137db96d56Sopenharmony_ci        # least cutoff; best_ratio tracks the best score seen so far
9147db96d56Sopenharmony_ci        best_ratio, cutoff = 0.74, 0.75
9157db96d56Sopenharmony_ci        cruncher = SequenceMatcher(self.charjunk)
9167db96d56Sopenharmony_ci        eqi, eqj = None, None   # 1st indices of equal lines (if any)
9177db96d56Sopenharmony_ci
9187db96d56Sopenharmony_ci        # search for the pair that matches best without being identical
9197db96d56Sopenharmony_ci        # (identical lines must be junk lines, & we don't want to synch up
9207db96d56Sopenharmony_ci        # on junk -- unless we have to)
9217db96d56Sopenharmony_ci        for j in range(blo, bhi):
9227db96d56Sopenharmony_ci            bj = b[j]
9237db96d56Sopenharmony_ci            cruncher.set_seq2(bj)
9247db96d56Sopenharmony_ci            for i in range(alo, ahi):
9257db96d56Sopenharmony_ci                ai = a[i]
9267db96d56Sopenharmony_ci                if ai == bj:
9277db96d56Sopenharmony_ci                    if eqi is None:
9287db96d56Sopenharmony_ci                        eqi, eqj = i, j
9297db96d56Sopenharmony_ci                    continue
9307db96d56Sopenharmony_ci                cruncher.set_seq1(ai)
9317db96d56Sopenharmony_ci                # computing similarity is expensive, so use the quick
9327db96d56Sopenharmony_ci                # upper bounds first -- have seen this speed up messy
9337db96d56Sopenharmony_ci                # compares by a factor of 3.
9347db96d56Sopenharmony_ci                # note that ratio() is only expensive to compute the first
9357db96d56Sopenharmony_ci                # time it's called on a sequence pair; the expensive part
9367db96d56Sopenharmony_ci                # of the computation is cached by cruncher
9377db96d56Sopenharmony_ci                if cruncher.real_quick_ratio() > best_ratio and \
9387db96d56Sopenharmony_ci                      cruncher.quick_ratio() > best_ratio and \
9397db96d56Sopenharmony_ci                      cruncher.ratio() > best_ratio:
9407db96d56Sopenharmony_ci                    best_ratio, best_i, best_j = cruncher.ratio(), i, j
9417db96d56Sopenharmony_ci        if best_ratio < cutoff:
9427db96d56Sopenharmony_ci            # no non-identical "pretty close" pair
9437db96d56Sopenharmony_ci            if eqi is None:
9447db96d56Sopenharmony_ci                # no identical pair either -- treat it as a straight replace
9457db96d56Sopenharmony_ci                yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
9467db96d56Sopenharmony_ci                return
9477db96d56Sopenharmony_ci            # no close pair, but an identical pair -- synch up on that
9487db96d56Sopenharmony_ci            best_i, best_j, best_ratio = eqi, eqj, 1.0
9497db96d56Sopenharmony_ci        else:
9507db96d56Sopenharmony_ci            # there's a close pair, so forget the identical pair (if any)
9517db96d56Sopenharmony_ci            eqi = None
9527db96d56Sopenharmony_ci
9537db96d56Sopenharmony_ci        # a[best_i] very similar to b[best_j]; eqi is None iff they're not
9547db96d56Sopenharmony_ci        # identical
9557db96d56Sopenharmony_ci
9567db96d56Sopenharmony_ci        # pump out diffs from before the synch point
9577db96d56Sopenharmony_ci        yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
9587db96d56Sopenharmony_ci
9597db96d56Sopenharmony_ci        # do intraline marking on the synch pair
9607db96d56Sopenharmony_ci        aelt, belt = a[best_i], b[best_j]
9617db96d56Sopenharmony_ci        if eqi is None:
9627db96d56Sopenharmony_ci            # pump out a '-', '?', '+', '?' quad for the synched lines
9637db96d56Sopenharmony_ci            atags = btags = ""
9647db96d56Sopenharmony_ci            cruncher.set_seqs(aelt, belt)
9657db96d56Sopenharmony_ci            for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
9667db96d56Sopenharmony_ci                la, lb = ai2 - ai1, bj2 - bj1
9677db96d56Sopenharmony_ci                if tag == 'replace':
9687db96d56Sopenharmony_ci                    atags += '^' * la
9697db96d56Sopenharmony_ci                    btags += '^' * lb
9707db96d56Sopenharmony_ci                elif tag == 'delete':
9717db96d56Sopenharmony_ci                    atags += '-' * la
9727db96d56Sopenharmony_ci                elif tag == 'insert':
9737db96d56Sopenharmony_ci                    btags += '+' * lb
9747db96d56Sopenharmony_ci                elif tag == 'equal':
9757db96d56Sopenharmony_ci                    atags += ' ' * la
9767db96d56Sopenharmony_ci                    btags += ' ' * lb
9777db96d56Sopenharmony_ci                else:
9787db96d56Sopenharmony_ci                    raise ValueError('unknown tag %r' % (tag,))
9797db96d56Sopenharmony_ci            yield from self._qformat(aelt, belt, atags, btags)
9807db96d56Sopenharmony_ci        else:
9817db96d56Sopenharmony_ci            # the synch pair is identical
9827db96d56Sopenharmony_ci            yield '  ' + aelt
9837db96d56Sopenharmony_ci
9847db96d56Sopenharmony_ci        # pump out diffs from after the synch point
9857db96d56Sopenharmony_ci        yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
9867db96d56Sopenharmony_ci
9877db96d56Sopenharmony_ci    def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
9887db96d56Sopenharmony_ci        g = []
9897db96d56Sopenharmony_ci        if alo < ahi:
9907db96d56Sopenharmony_ci            if blo < bhi:
9917db96d56Sopenharmony_ci                g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
9927db96d56Sopenharmony_ci            else:
9937db96d56Sopenharmony_ci                g = self._dump('-', a, alo, ahi)
9947db96d56Sopenharmony_ci        elif blo < bhi:
9957db96d56Sopenharmony_ci            g = self._dump('+', b, blo, bhi)
9967db96d56Sopenharmony_ci
9977db96d56Sopenharmony_ci        yield from g
9987db96d56Sopenharmony_ci
9997db96d56Sopenharmony_ci    def _qformat(self, aline, bline, atags, btags):
10007db96d56Sopenharmony_ci        r"""
10017db96d56Sopenharmony_ci        Format "?" output and deal with tabs.
10027db96d56Sopenharmony_ci
10037db96d56Sopenharmony_ci        Example:
10047db96d56Sopenharmony_ci
10057db96d56Sopenharmony_ci        >>> d = Differ()
10067db96d56Sopenharmony_ci        >>> results = d._qformat('\tabcDefghiJkl\n', '\tabcdefGhijkl\n',
10077db96d56Sopenharmony_ci        ...                      '  ^ ^  ^      ', '  ^ ^  ^      ')
10087db96d56Sopenharmony_ci        >>> for line in results: print(repr(line))
10097db96d56Sopenharmony_ci        ...
10107db96d56Sopenharmony_ci        '- \tabcDefghiJkl\n'
10117db96d56Sopenharmony_ci        '? \t ^ ^  ^\n'
10127db96d56Sopenharmony_ci        '+ \tabcdefGhijkl\n'
10137db96d56Sopenharmony_ci        '? \t ^ ^  ^\n'
10147db96d56Sopenharmony_ci        """
10157db96d56Sopenharmony_ci        atags = _keep_original_ws(aline, atags).rstrip()
10167db96d56Sopenharmony_ci        btags = _keep_original_ws(bline, btags).rstrip()
10177db96d56Sopenharmony_ci
10187db96d56Sopenharmony_ci        yield "- " + aline
10197db96d56Sopenharmony_ci        if atags:
10207db96d56Sopenharmony_ci            yield f"? {atags}\n"
10217db96d56Sopenharmony_ci
10227db96d56Sopenharmony_ci        yield "+ " + bline
10237db96d56Sopenharmony_ci        if btags:
10247db96d56Sopenharmony_ci            yield f"? {btags}\n"
10257db96d56Sopenharmony_ci
10267db96d56Sopenharmony_ci# With respect to junk, an earlier version of ndiff simply refused to
10277db96d56Sopenharmony_ci# *start* a match with a junk element.  The result was cases like this:
10287db96d56Sopenharmony_ci#     before: private Thread currentThread;
10297db96d56Sopenharmony_ci#     after:  private volatile Thread currentThread;
10307db96d56Sopenharmony_ci# If you consider whitespace to be junk, the longest contiguous match
10317db96d56Sopenharmony_ci# not starting with junk is "e Thread currentThread".  So ndiff reported
10327db96d56Sopenharmony_ci# that "e volatil" was inserted between the 't' and the 'e' in "private".
10337db96d56Sopenharmony_ci# While an accurate view, to people that's absurd.  The current version
10347db96d56Sopenharmony_ci# looks for matching blocks that are entirely junk-free, then extends the
10357db96d56Sopenharmony_ci# longest one of those as far as possible but only with matching junk.
10367db96d56Sopenharmony_ci# So now "currentThread" is matched, then extended to suck up the
10377db96d56Sopenharmony_ci# preceding blank; then "private" is matched, and extended to suck up the
10387db96d56Sopenharmony_ci# following blank; then "Thread" is matched; and finally ndiff reports
10397db96d56Sopenharmony_ci# that "volatile " was inserted before "Thread".  The only quibble
10407db96d56Sopenharmony_ci# remaining is that perhaps it was really the case that " volatile"
10417db96d56Sopenharmony_ci# was inserted after "private".  I can live with that <wink>.
10427db96d56Sopenharmony_ci
10437db96d56Sopenharmony_ciimport re
10447db96d56Sopenharmony_ci
10457db96d56Sopenharmony_cidef IS_LINE_JUNK(line, pat=re.compile(r"\s*(?:#\s*)?$").match):
10467db96d56Sopenharmony_ci    r"""
10477db96d56Sopenharmony_ci    Return True for ignorable line: iff `line` is blank or contains a single '#'.
10487db96d56Sopenharmony_ci
10497db96d56Sopenharmony_ci    Examples:
10507db96d56Sopenharmony_ci
10517db96d56Sopenharmony_ci    >>> IS_LINE_JUNK('\n')
10527db96d56Sopenharmony_ci    True
10537db96d56Sopenharmony_ci    >>> IS_LINE_JUNK('  #   \n')
10547db96d56Sopenharmony_ci    True
10557db96d56Sopenharmony_ci    >>> IS_LINE_JUNK('hello\n')
10567db96d56Sopenharmony_ci    False
10577db96d56Sopenharmony_ci    """
10587db96d56Sopenharmony_ci
10597db96d56Sopenharmony_ci    return pat(line) is not None
10607db96d56Sopenharmony_ci
10617db96d56Sopenharmony_cidef IS_CHARACTER_JUNK(ch, ws=" \t"):
10627db96d56Sopenharmony_ci    r"""
10637db96d56Sopenharmony_ci    Return True for ignorable character: iff `ch` is a space or tab.
10647db96d56Sopenharmony_ci
10657db96d56Sopenharmony_ci    Examples:
10667db96d56Sopenharmony_ci
10677db96d56Sopenharmony_ci    >>> IS_CHARACTER_JUNK(' ')
10687db96d56Sopenharmony_ci    True
10697db96d56Sopenharmony_ci    >>> IS_CHARACTER_JUNK('\t')
10707db96d56Sopenharmony_ci    True
10717db96d56Sopenharmony_ci    >>> IS_CHARACTER_JUNK('\n')
10727db96d56Sopenharmony_ci    False
10737db96d56Sopenharmony_ci    >>> IS_CHARACTER_JUNK('x')
10747db96d56Sopenharmony_ci    False
10757db96d56Sopenharmony_ci    """
10767db96d56Sopenharmony_ci
10777db96d56Sopenharmony_ci    return ch in ws
10787db96d56Sopenharmony_ci
10797db96d56Sopenharmony_ci
10807db96d56Sopenharmony_ci########################################################################
10817db96d56Sopenharmony_ci###  Unified Diff
10827db96d56Sopenharmony_ci########################################################################
10837db96d56Sopenharmony_ci
10847db96d56Sopenharmony_cidef _format_range_unified(start, stop):
10857db96d56Sopenharmony_ci    'Convert range to the "ed" format'
10867db96d56Sopenharmony_ci    # Per the diff spec at http://www.unix.org/single_unix_specification/
10877db96d56Sopenharmony_ci    beginning = start + 1     # lines start numbering with one
10887db96d56Sopenharmony_ci    length = stop - start
10897db96d56Sopenharmony_ci    if length == 1:
10907db96d56Sopenharmony_ci        return '{}'.format(beginning)
10917db96d56Sopenharmony_ci    if not length:
10927db96d56Sopenharmony_ci        beginning -= 1        # empty ranges begin at line just before the range
10937db96d56Sopenharmony_ci    return '{},{}'.format(beginning, length)
10947db96d56Sopenharmony_ci
10957db96d56Sopenharmony_cidef unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
10967db96d56Sopenharmony_ci                 tofiledate='', n=3, lineterm='\n'):
10977db96d56Sopenharmony_ci    r"""
10987db96d56Sopenharmony_ci    Compare two sequences of lines; generate the delta as a unified diff.
10997db96d56Sopenharmony_ci
11007db96d56Sopenharmony_ci    Unified diffs are a compact way of showing line changes and a few
11017db96d56Sopenharmony_ci    lines of context.  The number of context lines is set by 'n' which
11027db96d56Sopenharmony_ci    defaults to three.
11037db96d56Sopenharmony_ci
11047db96d56Sopenharmony_ci    By default, the diff control lines (those with ---, +++, or @@) are
11057db96d56Sopenharmony_ci    created with a trailing newline.  This is helpful so that inputs
11067db96d56Sopenharmony_ci    created from file.readlines() result in diffs that are suitable for
11077db96d56Sopenharmony_ci    file.writelines() since both the inputs and outputs have trailing
11087db96d56Sopenharmony_ci    newlines.
11097db96d56Sopenharmony_ci
11107db96d56Sopenharmony_ci    For inputs that do not have trailing newlines, set the lineterm
11117db96d56Sopenharmony_ci    argument to "" so that the output will be uniformly newline free.
11127db96d56Sopenharmony_ci
11137db96d56Sopenharmony_ci    The unidiff format normally has a header for filenames and modification
11147db96d56Sopenharmony_ci    times.  Any or all of these may be specified using strings for
11157db96d56Sopenharmony_ci    'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
11167db96d56Sopenharmony_ci    The modification times are normally expressed in the ISO 8601 format.
11177db96d56Sopenharmony_ci
11187db96d56Sopenharmony_ci    Example:
11197db96d56Sopenharmony_ci
11207db96d56Sopenharmony_ci    >>> for line in unified_diff('one two three four'.split(),
11217db96d56Sopenharmony_ci    ...             'zero one tree four'.split(), 'Original', 'Current',
11227db96d56Sopenharmony_ci    ...             '2005-01-26 23:30:50', '2010-04-02 10:20:52',
11237db96d56Sopenharmony_ci    ...             lineterm=''):
11247db96d56Sopenharmony_ci    ...     print(line)                 # doctest: +NORMALIZE_WHITESPACE
11257db96d56Sopenharmony_ci    --- Original        2005-01-26 23:30:50
11267db96d56Sopenharmony_ci    +++ Current         2010-04-02 10:20:52
11277db96d56Sopenharmony_ci    @@ -1,4 +1,4 @@
11287db96d56Sopenharmony_ci    +zero
11297db96d56Sopenharmony_ci     one
11307db96d56Sopenharmony_ci    -two
11317db96d56Sopenharmony_ci    -three
11327db96d56Sopenharmony_ci    +tree
11337db96d56Sopenharmony_ci     four
11347db96d56Sopenharmony_ci    """
11357db96d56Sopenharmony_ci
11367db96d56Sopenharmony_ci    _check_types(a, b, fromfile, tofile, fromfiledate, tofiledate, lineterm)
11377db96d56Sopenharmony_ci    started = False
11387db96d56Sopenharmony_ci    for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
11397db96d56Sopenharmony_ci        if not started:
11407db96d56Sopenharmony_ci            started = True
11417db96d56Sopenharmony_ci            fromdate = '\t{}'.format(fromfiledate) if fromfiledate else ''
11427db96d56Sopenharmony_ci            todate = '\t{}'.format(tofiledate) if tofiledate else ''
11437db96d56Sopenharmony_ci            yield '--- {}{}{}'.format(fromfile, fromdate, lineterm)
11447db96d56Sopenharmony_ci            yield '+++ {}{}{}'.format(tofile, todate, lineterm)
11457db96d56Sopenharmony_ci
11467db96d56Sopenharmony_ci        first, last = group[0], group[-1]
11477db96d56Sopenharmony_ci        file1_range = _format_range_unified(first[1], last[2])
11487db96d56Sopenharmony_ci        file2_range = _format_range_unified(first[3], last[4])
11497db96d56Sopenharmony_ci        yield '@@ -{} +{} @@{}'.format(file1_range, file2_range, lineterm)
11507db96d56Sopenharmony_ci
11517db96d56Sopenharmony_ci        for tag, i1, i2, j1, j2 in group:
11527db96d56Sopenharmony_ci            if tag == 'equal':
11537db96d56Sopenharmony_ci                for line in a[i1:i2]:
11547db96d56Sopenharmony_ci                    yield ' ' + line
11557db96d56Sopenharmony_ci                continue
11567db96d56Sopenharmony_ci            if tag in {'replace', 'delete'}:
11577db96d56Sopenharmony_ci                for line in a[i1:i2]:
11587db96d56Sopenharmony_ci                    yield '-' + line
11597db96d56Sopenharmony_ci            if tag in {'replace', 'insert'}:
11607db96d56Sopenharmony_ci                for line in b[j1:j2]:
11617db96d56Sopenharmony_ci                    yield '+' + line
11627db96d56Sopenharmony_ci
11637db96d56Sopenharmony_ci
11647db96d56Sopenharmony_ci########################################################################
11657db96d56Sopenharmony_ci###  Context Diff
11667db96d56Sopenharmony_ci########################################################################
11677db96d56Sopenharmony_ci
11687db96d56Sopenharmony_cidef _format_range_context(start, stop):
11697db96d56Sopenharmony_ci    'Convert range to the "ed" format'
11707db96d56Sopenharmony_ci    # Per the diff spec at http://www.unix.org/single_unix_specification/
11717db96d56Sopenharmony_ci    beginning = start + 1     # lines start numbering with one
11727db96d56Sopenharmony_ci    length = stop - start
11737db96d56Sopenharmony_ci    if not length:
11747db96d56Sopenharmony_ci        beginning -= 1        # empty ranges begin at line just before the range
11757db96d56Sopenharmony_ci    if length <= 1:
11767db96d56Sopenharmony_ci        return '{}'.format(beginning)
11777db96d56Sopenharmony_ci    return '{},{}'.format(beginning, beginning + length - 1)
11787db96d56Sopenharmony_ci
11797db96d56Sopenharmony_ci# See http://www.unix.org/single_unix_specification/
11807db96d56Sopenharmony_cidef context_diff(a, b, fromfile='', tofile='',
11817db96d56Sopenharmony_ci                 fromfiledate='', tofiledate='', n=3, lineterm='\n'):
11827db96d56Sopenharmony_ci    r"""
11837db96d56Sopenharmony_ci    Compare two sequences of lines; generate the delta as a context diff.
11847db96d56Sopenharmony_ci
11857db96d56Sopenharmony_ci    Context diffs are a compact way of showing line changes and a few
11867db96d56Sopenharmony_ci    lines of context.  The number of context lines is set by 'n' which
11877db96d56Sopenharmony_ci    defaults to three.
11887db96d56Sopenharmony_ci
11897db96d56Sopenharmony_ci    By default, the diff control lines (those with *** or ---) are
11907db96d56Sopenharmony_ci    created with a trailing newline.  This is helpful so that inputs
11917db96d56Sopenharmony_ci    created from file.readlines() result in diffs that are suitable for
11927db96d56Sopenharmony_ci    file.writelines() since both the inputs and outputs have trailing
11937db96d56Sopenharmony_ci    newlines.
11947db96d56Sopenharmony_ci
11957db96d56Sopenharmony_ci    For inputs that do not have trailing newlines, set the lineterm
11967db96d56Sopenharmony_ci    argument to "" so that the output will be uniformly newline free.
11977db96d56Sopenharmony_ci
11987db96d56Sopenharmony_ci    The context diff format normally has a header for filenames and
11997db96d56Sopenharmony_ci    modification times.  Any or all of these may be specified using
12007db96d56Sopenharmony_ci    strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
12017db96d56Sopenharmony_ci    The modification times are normally expressed in the ISO 8601 format.
12027db96d56Sopenharmony_ci    If not specified, the strings default to blanks.
12037db96d56Sopenharmony_ci
12047db96d56Sopenharmony_ci    Example:
12057db96d56Sopenharmony_ci
12067db96d56Sopenharmony_ci    >>> print(''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(True),
12077db96d56Sopenharmony_ci    ...       'zero\none\ntree\nfour\n'.splitlines(True), 'Original', 'Current')),
12087db96d56Sopenharmony_ci    ...       end="")
12097db96d56Sopenharmony_ci    *** Original
12107db96d56Sopenharmony_ci    --- Current
12117db96d56Sopenharmony_ci    ***************
12127db96d56Sopenharmony_ci    *** 1,4 ****
12137db96d56Sopenharmony_ci      one
12147db96d56Sopenharmony_ci    ! two
12157db96d56Sopenharmony_ci    ! three
12167db96d56Sopenharmony_ci      four
12177db96d56Sopenharmony_ci    --- 1,4 ----
12187db96d56Sopenharmony_ci    + zero
12197db96d56Sopenharmony_ci      one
12207db96d56Sopenharmony_ci    ! tree
12217db96d56Sopenharmony_ci      four
12227db96d56Sopenharmony_ci    """
12237db96d56Sopenharmony_ci
12247db96d56Sopenharmony_ci    _check_types(a, b, fromfile, tofile, fromfiledate, tofiledate, lineterm)
12257db96d56Sopenharmony_ci    prefix = dict(insert='+ ', delete='- ', replace='! ', equal='  ')
12267db96d56Sopenharmony_ci    started = False
12277db96d56Sopenharmony_ci    for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
12287db96d56Sopenharmony_ci        if not started:
12297db96d56Sopenharmony_ci            started = True
12307db96d56Sopenharmony_ci            fromdate = '\t{}'.format(fromfiledate) if fromfiledate else ''
12317db96d56Sopenharmony_ci            todate = '\t{}'.format(tofiledate) if tofiledate else ''
12327db96d56Sopenharmony_ci            yield '*** {}{}{}'.format(fromfile, fromdate, lineterm)
12337db96d56Sopenharmony_ci            yield '--- {}{}{}'.format(tofile, todate, lineterm)
12347db96d56Sopenharmony_ci
12357db96d56Sopenharmony_ci        first, last = group[0], group[-1]
12367db96d56Sopenharmony_ci        yield '***************' + lineterm
12377db96d56Sopenharmony_ci
12387db96d56Sopenharmony_ci        file1_range = _format_range_context(first[1], last[2])
12397db96d56Sopenharmony_ci        yield '*** {} ****{}'.format(file1_range, lineterm)
12407db96d56Sopenharmony_ci
12417db96d56Sopenharmony_ci        if any(tag in {'replace', 'delete'} for tag, _, _, _, _ in group):
12427db96d56Sopenharmony_ci            for tag, i1, i2, _, _ in group:
12437db96d56Sopenharmony_ci                if tag != 'insert':
12447db96d56Sopenharmony_ci                    for line in a[i1:i2]:
12457db96d56Sopenharmony_ci                        yield prefix[tag] + line
12467db96d56Sopenharmony_ci
12477db96d56Sopenharmony_ci        file2_range = _format_range_context(first[3], last[4])
12487db96d56Sopenharmony_ci        yield '--- {} ----{}'.format(file2_range, lineterm)
12497db96d56Sopenharmony_ci
12507db96d56Sopenharmony_ci        if any(tag in {'replace', 'insert'} for tag, _, _, _, _ in group):
12517db96d56Sopenharmony_ci            for tag, _, _, j1, j2 in group:
12527db96d56Sopenharmony_ci                if tag != 'delete':
12537db96d56Sopenharmony_ci                    for line in b[j1:j2]:
12547db96d56Sopenharmony_ci                        yield prefix[tag] + line
12557db96d56Sopenharmony_ci
12567db96d56Sopenharmony_cidef _check_types(a, b, *args):
12577db96d56Sopenharmony_ci    # Checking types is weird, but the alternative is garbled output when
12587db96d56Sopenharmony_ci    # someone passes mixed bytes and str to {unified,context}_diff(). E.g.
12597db96d56Sopenharmony_ci    # without this check, passing filenames as bytes results in output like
12607db96d56Sopenharmony_ci    #   --- b'oldfile.txt'
12617db96d56Sopenharmony_ci    #   +++ b'newfile.txt'
12627db96d56Sopenharmony_ci    # because of how str.format() incorporates bytes objects.
12637db96d56Sopenharmony_ci    if a and not isinstance(a[0], str):
12647db96d56Sopenharmony_ci        raise TypeError('lines to compare must be str, not %s (%r)' %
12657db96d56Sopenharmony_ci                        (type(a[0]).__name__, a[0]))
12667db96d56Sopenharmony_ci    if b and not isinstance(b[0], str):
12677db96d56Sopenharmony_ci        raise TypeError('lines to compare must be str, not %s (%r)' %
12687db96d56Sopenharmony_ci                        (type(b[0]).__name__, b[0]))
12697db96d56Sopenharmony_ci    for arg in args:
12707db96d56Sopenharmony_ci        if not isinstance(arg, str):
12717db96d56Sopenharmony_ci            raise TypeError('all arguments must be str, not: %r' % (arg,))
12727db96d56Sopenharmony_ci
12737db96d56Sopenharmony_cidef diff_bytes(dfunc, a, b, fromfile=b'', tofile=b'',
12747db96d56Sopenharmony_ci               fromfiledate=b'', tofiledate=b'', n=3, lineterm=b'\n'):
12757db96d56Sopenharmony_ci    r"""
12767db96d56Sopenharmony_ci    Compare `a` and `b`, two sequences of lines represented as bytes rather
12777db96d56Sopenharmony_ci    than str. This is a wrapper for `dfunc`, which is typically either
12787db96d56Sopenharmony_ci    unified_diff() or context_diff(). Inputs are losslessly converted to
12797db96d56Sopenharmony_ci    strings so that `dfunc` only has to worry about strings, and encoded
12807db96d56Sopenharmony_ci    back to bytes on return. This is necessary to compare files with
12817db96d56Sopenharmony_ci    unknown or inconsistent encoding. All other inputs (except `n`) must be
12827db96d56Sopenharmony_ci    bytes rather than str.
12837db96d56Sopenharmony_ci    """
12847db96d56Sopenharmony_ci    def decode(s):
12857db96d56Sopenharmony_ci        try:
12867db96d56Sopenharmony_ci            return s.decode('ascii', 'surrogateescape')
12877db96d56Sopenharmony_ci        except AttributeError as err:
12887db96d56Sopenharmony_ci            msg = ('all arguments must be bytes, not %s (%r)' %
12897db96d56Sopenharmony_ci                   (type(s).__name__, s))
12907db96d56Sopenharmony_ci            raise TypeError(msg) from err
12917db96d56Sopenharmony_ci    a = list(map(decode, a))
12927db96d56Sopenharmony_ci    b = list(map(decode, b))
12937db96d56Sopenharmony_ci    fromfile = decode(fromfile)
12947db96d56Sopenharmony_ci    tofile = decode(tofile)
12957db96d56Sopenharmony_ci    fromfiledate = decode(fromfiledate)
12967db96d56Sopenharmony_ci    tofiledate = decode(tofiledate)
12977db96d56Sopenharmony_ci    lineterm = decode(lineterm)
12987db96d56Sopenharmony_ci
12997db96d56Sopenharmony_ci    lines = dfunc(a, b, fromfile, tofile, fromfiledate, tofiledate, n, lineterm)
13007db96d56Sopenharmony_ci    for line in lines:
13017db96d56Sopenharmony_ci        yield line.encode('ascii', 'surrogateescape')
13027db96d56Sopenharmony_ci
13037db96d56Sopenharmony_cidef ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):
13047db96d56Sopenharmony_ci    r"""
13057db96d56Sopenharmony_ci    Compare `a` and `b` (lists of strings); return a `Differ`-style delta.
13067db96d56Sopenharmony_ci
13077db96d56Sopenharmony_ci    Optional keyword parameters `linejunk` and `charjunk` are for filter
13087db96d56Sopenharmony_ci    functions, or can be None:
13097db96d56Sopenharmony_ci
13107db96d56Sopenharmony_ci    - linejunk: A function that should accept a single string argument and
13117db96d56Sopenharmony_ci      return true iff the string is junk.  The default is None, and is
13127db96d56Sopenharmony_ci      recommended; the underlying SequenceMatcher class has an adaptive
13137db96d56Sopenharmony_ci      notion of "noise" lines.
13147db96d56Sopenharmony_ci
13157db96d56Sopenharmony_ci    - charjunk: A function that accepts a character (string of length
13167db96d56Sopenharmony_ci      1), and returns true iff the character is junk. The default is
13177db96d56Sopenharmony_ci      the module-level function IS_CHARACTER_JUNK, which filters out
13187db96d56Sopenharmony_ci      whitespace characters (a blank or tab; note: it's a bad idea to
13197db96d56Sopenharmony_ci      include newline in this!).
13207db96d56Sopenharmony_ci
13217db96d56Sopenharmony_ci    Tools/scripts/ndiff.py is a command-line front-end to this function.
13227db96d56Sopenharmony_ci
13237db96d56Sopenharmony_ci    Example:
13247db96d56Sopenharmony_ci
13257db96d56Sopenharmony_ci    >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(keepends=True),
13267db96d56Sopenharmony_ci    ...              'ore\ntree\nemu\n'.splitlines(keepends=True))
13277db96d56Sopenharmony_ci    >>> print(''.join(diff), end="")
13287db96d56Sopenharmony_ci    - one
13297db96d56Sopenharmony_ci    ?  ^
13307db96d56Sopenharmony_ci    + ore
13317db96d56Sopenharmony_ci    ?  ^
13327db96d56Sopenharmony_ci    - two
13337db96d56Sopenharmony_ci    - three
13347db96d56Sopenharmony_ci    ?  -
13357db96d56Sopenharmony_ci    + tree
13367db96d56Sopenharmony_ci    + emu
13377db96d56Sopenharmony_ci    """
13387db96d56Sopenharmony_ci    return Differ(linejunk, charjunk).compare(a, b)
13397db96d56Sopenharmony_ci
13407db96d56Sopenharmony_cidef _mdiff(fromlines, tolines, context=None, linejunk=None,
13417db96d56Sopenharmony_ci           charjunk=IS_CHARACTER_JUNK):
13427db96d56Sopenharmony_ci    r"""Returns generator yielding marked up from/to side by side differences.
13437db96d56Sopenharmony_ci
13447db96d56Sopenharmony_ci    Arguments:
13457db96d56Sopenharmony_ci    fromlines -- list of text lines to compared to tolines
13467db96d56Sopenharmony_ci    tolines -- list of text lines to be compared to fromlines
13477db96d56Sopenharmony_ci    context -- number of context lines to display on each side of difference,
13487db96d56Sopenharmony_ci               if None, all from/to text lines will be generated.
13497db96d56Sopenharmony_ci    linejunk -- passed on to ndiff (see ndiff documentation)
13507db96d56Sopenharmony_ci    charjunk -- passed on to ndiff (see ndiff documentation)
13517db96d56Sopenharmony_ci
13527db96d56Sopenharmony_ci    This function returns an iterator which returns a tuple:
13537db96d56Sopenharmony_ci    (from line tuple, to line tuple, boolean flag)
13547db96d56Sopenharmony_ci
13557db96d56Sopenharmony_ci    from/to line tuple -- (line num, line text)
13567db96d56Sopenharmony_ci        line num -- integer or None (to indicate a context separation)
13577db96d56Sopenharmony_ci        line text -- original line text with following markers inserted:
13587db96d56Sopenharmony_ci            '\0+' -- marks start of added text
13597db96d56Sopenharmony_ci            '\0-' -- marks start of deleted text
13607db96d56Sopenharmony_ci            '\0^' -- marks start of changed text
13617db96d56Sopenharmony_ci            '\1' -- marks end of added/deleted/changed text
13627db96d56Sopenharmony_ci
13637db96d56Sopenharmony_ci    boolean flag -- None indicates context separation, True indicates
13647db96d56Sopenharmony_ci        either "from" or "to" line contains a change, otherwise False.
13657db96d56Sopenharmony_ci
13667db96d56Sopenharmony_ci    This function/iterator was originally developed to generate side by side
13677db96d56Sopenharmony_ci    file difference for making HTML pages (see HtmlDiff class for example
13687db96d56Sopenharmony_ci    usage).
13697db96d56Sopenharmony_ci
13707db96d56Sopenharmony_ci    Note, this function utilizes the ndiff function to generate the side by
13717db96d56Sopenharmony_ci    side difference markup.  Optional ndiff arguments may be passed to this
13727db96d56Sopenharmony_ci    function and they in turn will be passed to ndiff.
13737db96d56Sopenharmony_ci    """
13747db96d56Sopenharmony_ci    import re
13757db96d56Sopenharmony_ci
13767db96d56Sopenharmony_ci    # regular expression for finding intraline change indices
13777db96d56Sopenharmony_ci    change_re = re.compile(r'(\++|\-+|\^+)')
13787db96d56Sopenharmony_ci
13797db96d56Sopenharmony_ci    # create the difference iterator to generate the differences
13807db96d56Sopenharmony_ci    diff_lines_iterator = ndiff(fromlines,tolines,linejunk,charjunk)
13817db96d56Sopenharmony_ci
13827db96d56Sopenharmony_ci    def _make_line(lines, format_key, side, num_lines=[0,0]):
13837db96d56Sopenharmony_ci        """Returns line of text with user's change markup and line formatting.
13847db96d56Sopenharmony_ci
13857db96d56Sopenharmony_ci        lines -- list of lines from the ndiff generator to produce a line of
13867db96d56Sopenharmony_ci                 text from.  When producing the line of text to return, the
13877db96d56Sopenharmony_ci                 lines used are removed from this list.
13887db96d56Sopenharmony_ci        format_key -- '+' return first line in list with "add" markup around
13897db96d56Sopenharmony_ci                          the entire line.
13907db96d56Sopenharmony_ci                      '-' return first line in list with "delete" markup around
13917db96d56Sopenharmony_ci                          the entire line.
13927db96d56Sopenharmony_ci                      '?' return first line in list with add/delete/change
13937db96d56Sopenharmony_ci                          intraline markup (indices obtained from second line)
13947db96d56Sopenharmony_ci                      None return first line in list with no markup
13957db96d56Sopenharmony_ci        side -- indice into the num_lines list (0=from,1=to)
13967db96d56Sopenharmony_ci        num_lines -- from/to current line number.  This is NOT intended to be a
13977db96d56Sopenharmony_ci                     passed parameter.  It is present as a keyword argument to
13987db96d56Sopenharmony_ci                     maintain memory of the current line numbers between calls
13997db96d56Sopenharmony_ci                     of this function.
14007db96d56Sopenharmony_ci
14017db96d56Sopenharmony_ci        Note, this function is purposefully not defined at the module scope so
14027db96d56Sopenharmony_ci        that data it needs from its parent function (within whose context it
14037db96d56Sopenharmony_ci        is defined) does not need to be of module scope.
14047db96d56Sopenharmony_ci        """
14057db96d56Sopenharmony_ci        num_lines[side] += 1
14067db96d56Sopenharmony_ci        # Handle case where no user markup is to be added, just return line of
14077db96d56Sopenharmony_ci        # text with user's line format to allow for usage of the line number.
14087db96d56Sopenharmony_ci        if format_key is None:
14097db96d56Sopenharmony_ci            return (num_lines[side],lines.pop(0)[2:])
14107db96d56Sopenharmony_ci        # Handle case of intraline changes
14117db96d56Sopenharmony_ci        if format_key == '?':
14127db96d56Sopenharmony_ci            text, markers = lines.pop(0), lines.pop(0)
14137db96d56Sopenharmony_ci            # find intraline changes (store change type and indices in tuples)
14147db96d56Sopenharmony_ci            sub_info = []
14157db96d56Sopenharmony_ci            def record_sub_info(match_object,sub_info=sub_info):
14167db96d56Sopenharmony_ci                sub_info.append([match_object.group(1)[0],match_object.span()])
14177db96d56Sopenharmony_ci                return match_object.group(1)
14187db96d56Sopenharmony_ci            change_re.sub(record_sub_info,markers)
14197db96d56Sopenharmony_ci            # process each tuple inserting our special marks that won't be
14207db96d56Sopenharmony_ci            # noticed by an xml/html escaper.
14217db96d56Sopenharmony_ci            for key,(begin,end) in reversed(sub_info):
14227db96d56Sopenharmony_ci                text = text[0:begin]+'\0'+key+text[begin:end]+'\1'+text[end:]
14237db96d56Sopenharmony_ci            text = text[2:]
14247db96d56Sopenharmony_ci        # Handle case of add/delete entire line
14257db96d56Sopenharmony_ci        else:
14267db96d56Sopenharmony_ci            text = lines.pop(0)[2:]
14277db96d56Sopenharmony_ci            # if line of text is just a newline, insert a space so there is
14287db96d56Sopenharmony_ci            # something for the user to highlight and see.
14297db96d56Sopenharmony_ci            if not text:
14307db96d56Sopenharmony_ci                text = ' '
14317db96d56Sopenharmony_ci            # insert marks that won't be noticed by an xml/html escaper.
14327db96d56Sopenharmony_ci            text = '\0' + format_key + text + '\1'
14337db96d56Sopenharmony_ci        # Return line of text, first allow user's line formatter to do its
14347db96d56Sopenharmony_ci        # thing (such as adding the line number) then replace the special
14357db96d56Sopenharmony_ci        # marks with what the user's change markup.
14367db96d56Sopenharmony_ci        return (num_lines[side],text)
14377db96d56Sopenharmony_ci
14387db96d56Sopenharmony_ci    def _line_iterator():
14397db96d56Sopenharmony_ci        """Yields from/to lines of text with a change indication.
14407db96d56Sopenharmony_ci
14417db96d56Sopenharmony_ci        This function is an iterator.  It itself pulls lines from a
14427db96d56Sopenharmony_ci        differencing iterator, processes them and yields them.  When it can
14437db96d56Sopenharmony_ci        it yields both a "from" and a "to" line, otherwise it will yield one
14447db96d56Sopenharmony_ci        or the other.  In addition to yielding the lines of from/to text, a
14457db96d56Sopenharmony_ci        boolean flag is yielded to indicate if the text line(s) have
14467db96d56Sopenharmony_ci        differences in them.
14477db96d56Sopenharmony_ci
14487db96d56Sopenharmony_ci        Note, this function is purposefully not defined at the module scope so
14497db96d56Sopenharmony_ci        that data it needs from its parent function (within whose context it
14507db96d56Sopenharmony_ci        is defined) does not need to be of module scope.
14517db96d56Sopenharmony_ci        """
14527db96d56Sopenharmony_ci        lines = []
14537db96d56Sopenharmony_ci        num_blanks_pending, num_blanks_to_yield = 0, 0
14547db96d56Sopenharmony_ci        while True:
14557db96d56Sopenharmony_ci            # Load up next 4 lines so we can look ahead, create strings which
14567db96d56Sopenharmony_ci            # are a concatenation of the first character of each of the 4 lines
14577db96d56Sopenharmony_ci            # so we can do some very readable comparisons.
14587db96d56Sopenharmony_ci            while len(lines) < 4:
14597db96d56Sopenharmony_ci                lines.append(next(diff_lines_iterator, 'X'))
14607db96d56Sopenharmony_ci            s = ''.join([line[0] for line in lines])
14617db96d56Sopenharmony_ci            if s.startswith('X'):
14627db96d56Sopenharmony_ci                # When no more lines, pump out any remaining blank lines so the
14637db96d56Sopenharmony_ci                # corresponding add/delete lines get a matching blank line so
14647db96d56Sopenharmony_ci                # all line pairs get yielded at the next level.
14657db96d56Sopenharmony_ci                num_blanks_to_yield = num_blanks_pending
14667db96d56Sopenharmony_ci            elif s.startswith('-?+?'):
14677db96d56Sopenharmony_ci                # simple intraline change
14687db96d56Sopenharmony_ci                yield _make_line(lines,'?',0), _make_line(lines,'?',1), True
14697db96d56Sopenharmony_ci                continue
14707db96d56Sopenharmony_ci            elif s.startswith('--++'):
14717db96d56Sopenharmony_ci                # in delete block, add block coming: we do NOT want to get
14727db96d56Sopenharmony_ci                # caught up on blank lines yet, just process the delete line
14737db96d56Sopenharmony_ci                num_blanks_pending -= 1
14747db96d56Sopenharmony_ci                yield _make_line(lines,'-',0), None, True
14757db96d56Sopenharmony_ci                continue
14767db96d56Sopenharmony_ci            elif s.startswith(('--?+', '--+', '- ')):
14777db96d56Sopenharmony_ci                # in delete block and see an intraline change or unchanged line
14787db96d56Sopenharmony_ci                # coming: yield the delete line and then blanks
14797db96d56Sopenharmony_ci                from_line,to_line = _make_line(lines,'-',0), None
14807db96d56Sopenharmony_ci                num_blanks_to_yield,num_blanks_pending = num_blanks_pending-1,0
14817db96d56Sopenharmony_ci            elif s.startswith('-+?'):
14827db96d56Sopenharmony_ci                # intraline change
14837db96d56Sopenharmony_ci                yield _make_line(lines,None,0), _make_line(lines,'?',1), True
14847db96d56Sopenharmony_ci                continue
14857db96d56Sopenharmony_ci            elif s.startswith('-?+'):
14867db96d56Sopenharmony_ci                # intraline change
14877db96d56Sopenharmony_ci                yield _make_line(lines,'?',0), _make_line(lines,None,1), True
14887db96d56Sopenharmony_ci                continue
14897db96d56Sopenharmony_ci            elif s.startswith('-'):
14907db96d56Sopenharmony_ci                # delete FROM line
14917db96d56Sopenharmony_ci                num_blanks_pending -= 1
14927db96d56Sopenharmony_ci                yield _make_line(lines,'-',0), None, True
14937db96d56Sopenharmony_ci                continue
14947db96d56Sopenharmony_ci            elif s.startswith('+--'):
14957db96d56Sopenharmony_ci                # in add block, delete block coming: we do NOT want to get
14967db96d56Sopenharmony_ci                # caught up on blank lines yet, just process the add line
14977db96d56Sopenharmony_ci                num_blanks_pending += 1
14987db96d56Sopenharmony_ci                yield None, _make_line(lines,'+',1), True
14997db96d56Sopenharmony_ci                continue
15007db96d56Sopenharmony_ci            elif s.startswith(('+ ', '+-')):
15017db96d56Sopenharmony_ci                # will be leaving an add block: yield blanks then add line
15027db96d56Sopenharmony_ci                from_line, to_line = None, _make_line(lines,'+',1)
15037db96d56Sopenharmony_ci                num_blanks_to_yield,num_blanks_pending = num_blanks_pending+1,0
15047db96d56Sopenharmony_ci            elif s.startswith('+'):
15057db96d56Sopenharmony_ci                # inside an add block, yield the add line
15067db96d56Sopenharmony_ci                num_blanks_pending += 1
15077db96d56Sopenharmony_ci                yield None, _make_line(lines,'+',1), True
15087db96d56Sopenharmony_ci                continue
15097db96d56Sopenharmony_ci            elif s.startswith(' '):
15107db96d56Sopenharmony_ci                # unchanged text, yield it to both sides
15117db96d56Sopenharmony_ci                yield _make_line(lines[:],None,0),_make_line(lines,None,1),False
15127db96d56Sopenharmony_ci                continue
15137db96d56Sopenharmony_ci            # Catch up on the blank lines so when we yield the next from/to
15147db96d56Sopenharmony_ci            # pair, they are lined up.
15157db96d56Sopenharmony_ci            while(num_blanks_to_yield < 0):
15167db96d56Sopenharmony_ci                num_blanks_to_yield += 1
15177db96d56Sopenharmony_ci                yield None,('','\n'),True
15187db96d56Sopenharmony_ci            while(num_blanks_to_yield > 0):
15197db96d56Sopenharmony_ci                num_blanks_to_yield -= 1
15207db96d56Sopenharmony_ci                yield ('','\n'),None,True
15217db96d56Sopenharmony_ci            if s.startswith('X'):
15227db96d56Sopenharmony_ci                return
15237db96d56Sopenharmony_ci            else:
15247db96d56Sopenharmony_ci                yield from_line,to_line,True
15257db96d56Sopenharmony_ci
15267db96d56Sopenharmony_ci    def _line_pair_iterator():
15277db96d56Sopenharmony_ci        """Yields from/to lines of text with a change indication.
15287db96d56Sopenharmony_ci
15297db96d56Sopenharmony_ci        This function is an iterator.  It itself pulls lines from the line
15307db96d56Sopenharmony_ci        iterator.  Its difference from that iterator is that this function
15317db96d56Sopenharmony_ci        always yields a pair of from/to text lines (with the change
15327db96d56Sopenharmony_ci        indication).  If necessary it will collect single from/to lines
15337db96d56Sopenharmony_ci        until it has a matching pair from/to pair to yield.
15347db96d56Sopenharmony_ci
15357db96d56Sopenharmony_ci        Note, this function is purposefully not defined at the module scope so
15367db96d56Sopenharmony_ci        that data it needs from its parent function (within whose context it
15377db96d56Sopenharmony_ci        is defined) does not need to be of module scope.
15387db96d56Sopenharmony_ci        """
15397db96d56Sopenharmony_ci        line_iterator = _line_iterator()
15407db96d56Sopenharmony_ci        fromlines,tolines=[],[]
15417db96d56Sopenharmony_ci        while True:
15427db96d56Sopenharmony_ci            # Collecting lines of text until we have a from/to pair
15437db96d56Sopenharmony_ci            while (len(fromlines)==0 or len(tolines)==0):
15447db96d56Sopenharmony_ci                try:
15457db96d56Sopenharmony_ci                    from_line, to_line, found_diff = next(line_iterator)
15467db96d56Sopenharmony_ci                except StopIteration:
15477db96d56Sopenharmony_ci                    return
15487db96d56Sopenharmony_ci                if from_line is not None:
15497db96d56Sopenharmony_ci                    fromlines.append((from_line,found_diff))
15507db96d56Sopenharmony_ci                if to_line is not None:
15517db96d56Sopenharmony_ci                    tolines.append((to_line,found_diff))
15527db96d56Sopenharmony_ci            # Once we have a pair, remove them from the collection and yield it
15537db96d56Sopenharmony_ci            from_line, fromDiff = fromlines.pop(0)
15547db96d56Sopenharmony_ci            to_line, to_diff = tolines.pop(0)
15557db96d56Sopenharmony_ci            yield (from_line,to_line,fromDiff or to_diff)
15567db96d56Sopenharmony_ci
15577db96d56Sopenharmony_ci    # Handle case where user does not want context differencing, just yield
15587db96d56Sopenharmony_ci    # them up without doing anything else with them.
15597db96d56Sopenharmony_ci    line_pair_iterator = _line_pair_iterator()
15607db96d56Sopenharmony_ci    if context is None:
15617db96d56Sopenharmony_ci        yield from line_pair_iterator
15627db96d56Sopenharmony_ci    # Handle case where user wants context differencing.  We must do some
15637db96d56Sopenharmony_ci    # storage of lines until we know for sure that they are to be yielded.
15647db96d56Sopenharmony_ci    else:
15657db96d56Sopenharmony_ci        context += 1
15667db96d56Sopenharmony_ci        lines_to_write = 0
15677db96d56Sopenharmony_ci        while True:
15687db96d56Sopenharmony_ci            # Store lines up until we find a difference, note use of a
15697db96d56Sopenharmony_ci            # circular queue because we only need to keep around what
15707db96d56Sopenharmony_ci            # we need for context.
15717db96d56Sopenharmony_ci            index, contextLines = 0, [None]*(context)
15727db96d56Sopenharmony_ci            found_diff = False
15737db96d56Sopenharmony_ci            while(found_diff is False):
15747db96d56Sopenharmony_ci                try:
15757db96d56Sopenharmony_ci                    from_line, to_line, found_diff = next(line_pair_iterator)
15767db96d56Sopenharmony_ci                except StopIteration:
15777db96d56Sopenharmony_ci                    return
15787db96d56Sopenharmony_ci                i = index % context
15797db96d56Sopenharmony_ci                contextLines[i] = (from_line, to_line, found_diff)
15807db96d56Sopenharmony_ci                index += 1
15817db96d56Sopenharmony_ci            # Yield lines that we have collected so far, but first yield
15827db96d56Sopenharmony_ci            # the user's separator.
15837db96d56Sopenharmony_ci            if index > context:
15847db96d56Sopenharmony_ci                yield None, None, None
15857db96d56Sopenharmony_ci                lines_to_write = context
15867db96d56Sopenharmony_ci            else:
15877db96d56Sopenharmony_ci                lines_to_write = index
15887db96d56Sopenharmony_ci                index = 0
15897db96d56Sopenharmony_ci            while(lines_to_write):
15907db96d56Sopenharmony_ci                i = index % context
15917db96d56Sopenharmony_ci                index += 1
15927db96d56Sopenharmony_ci                yield contextLines[i]
15937db96d56Sopenharmony_ci                lines_to_write -= 1
15947db96d56Sopenharmony_ci            # Now yield the context lines after the change
15957db96d56Sopenharmony_ci            lines_to_write = context-1
15967db96d56Sopenharmony_ci            try:
15977db96d56Sopenharmony_ci                while(lines_to_write):
15987db96d56Sopenharmony_ci                    from_line, to_line, found_diff = next(line_pair_iterator)
15997db96d56Sopenharmony_ci                    # If another change within the context, extend the context
16007db96d56Sopenharmony_ci                    if found_diff:
16017db96d56Sopenharmony_ci                        lines_to_write = context-1
16027db96d56Sopenharmony_ci                    else:
16037db96d56Sopenharmony_ci                        lines_to_write -= 1
16047db96d56Sopenharmony_ci                    yield from_line, to_line, found_diff
16057db96d56Sopenharmony_ci            except StopIteration:
16067db96d56Sopenharmony_ci                # Catch exception from next() and return normally
16077db96d56Sopenharmony_ci                return
16087db96d56Sopenharmony_ci
16097db96d56Sopenharmony_ci
16107db96d56Sopenharmony_ci_file_template = """
16117db96d56Sopenharmony_ci<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
16127db96d56Sopenharmony_ci          "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
16137db96d56Sopenharmony_ci
16147db96d56Sopenharmony_ci<html>
16157db96d56Sopenharmony_ci
16167db96d56Sopenharmony_ci<head>
16177db96d56Sopenharmony_ci    <meta http-equiv="Content-Type"
16187db96d56Sopenharmony_ci          content="text/html; charset=%(charset)s" />
16197db96d56Sopenharmony_ci    <title></title>
16207db96d56Sopenharmony_ci    <style type="text/css">%(styles)s
16217db96d56Sopenharmony_ci    </style>
16227db96d56Sopenharmony_ci</head>
16237db96d56Sopenharmony_ci
16247db96d56Sopenharmony_ci<body>
16257db96d56Sopenharmony_ci    %(table)s%(legend)s
16267db96d56Sopenharmony_ci</body>
16277db96d56Sopenharmony_ci
16287db96d56Sopenharmony_ci</html>"""
16297db96d56Sopenharmony_ci
16307db96d56Sopenharmony_ci_styles = """
16317db96d56Sopenharmony_ci        table.diff {font-family:Courier; border:medium;}
16327db96d56Sopenharmony_ci        .diff_header {background-color:#e0e0e0}
16337db96d56Sopenharmony_ci        td.diff_header {text-align:right}
16347db96d56Sopenharmony_ci        .diff_next {background-color:#c0c0c0}
16357db96d56Sopenharmony_ci        .diff_add {background-color:#aaffaa}
16367db96d56Sopenharmony_ci        .diff_chg {background-color:#ffff77}
16377db96d56Sopenharmony_ci        .diff_sub {background-color:#ffaaaa}"""
16387db96d56Sopenharmony_ci
16397db96d56Sopenharmony_ci_table_template = """
16407db96d56Sopenharmony_ci    <table class="diff" id="difflib_chg_%(prefix)s_top"
16417db96d56Sopenharmony_ci           cellspacing="0" cellpadding="0" rules="groups" >
16427db96d56Sopenharmony_ci        <colgroup></colgroup> <colgroup></colgroup> <colgroup></colgroup>
16437db96d56Sopenharmony_ci        <colgroup></colgroup> <colgroup></colgroup> <colgroup></colgroup>
16447db96d56Sopenharmony_ci        %(header_row)s
16457db96d56Sopenharmony_ci        <tbody>
16467db96d56Sopenharmony_ci%(data_rows)s        </tbody>
16477db96d56Sopenharmony_ci    </table>"""
16487db96d56Sopenharmony_ci
16497db96d56Sopenharmony_ci_legend = """
16507db96d56Sopenharmony_ci    <table class="diff" summary="Legends">
16517db96d56Sopenharmony_ci        <tr> <th colspan="2"> Legends </th> </tr>
16527db96d56Sopenharmony_ci        <tr> <td> <table border="" summary="Colors">
16537db96d56Sopenharmony_ci                      <tr><th> Colors </th> </tr>
16547db96d56Sopenharmony_ci                      <tr><td class="diff_add">&nbsp;Added&nbsp;</td></tr>
16557db96d56Sopenharmony_ci                      <tr><td class="diff_chg">Changed</td> </tr>
16567db96d56Sopenharmony_ci                      <tr><td class="diff_sub">Deleted</td> </tr>
16577db96d56Sopenharmony_ci                  </table></td>
16587db96d56Sopenharmony_ci             <td> <table border="" summary="Links">
16597db96d56Sopenharmony_ci                      <tr><th colspan="2"> Links </th> </tr>
16607db96d56Sopenharmony_ci                      <tr><td>(f)irst change</td> </tr>
16617db96d56Sopenharmony_ci                      <tr><td>(n)ext change</td> </tr>
16627db96d56Sopenharmony_ci                      <tr><td>(t)op</td> </tr>
16637db96d56Sopenharmony_ci                  </table></td> </tr>
16647db96d56Sopenharmony_ci    </table>"""
16657db96d56Sopenharmony_ci
16667db96d56Sopenharmony_ciclass HtmlDiff(object):
16677db96d56Sopenharmony_ci    """For producing HTML side by side comparison with change highlights.
16687db96d56Sopenharmony_ci
16697db96d56Sopenharmony_ci    This class can be used to create an HTML table (or a complete HTML file
16707db96d56Sopenharmony_ci    containing the table) showing a side by side, line by line comparison
16717db96d56Sopenharmony_ci    of text with inter-line and intra-line change highlights.  The table can
16727db96d56Sopenharmony_ci    be generated in either full or contextual difference mode.
16737db96d56Sopenharmony_ci
16747db96d56Sopenharmony_ci    The following methods are provided for HTML generation:
16757db96d56Sopenharmony_ci
16767db96d56Sopenharmony_ci    make_table -- generates HTML for a single side by side table
16777db96d56Sopenharmony_ci    make_file -- generates complete HTML file with a single side by side table
16787db96d56Sopenharmony_ci
16797db96d56Sopenharmony_ci    See tools/scripts/diff.py for an example usage of this class.
16807db96d56Sopenharmony_ci    """
16817db96d56Sopenharmony_ci
16827db96d56Sopenharmony_ci    _file_template = _file_template
16837db96d56Sopenharmony_ci    _styles = _styles
16847db96d56Sopenharmony_ci    _table_template = _table_template
16857db96d56Sopenharmony_ci    _legend = _legend
16867db96d56Sopenharmony_ci    _default_prefix = 0
16877db96d56Sopenharmony_ci
16887db96d56Sopenharmony_ci    def __init__(self,tabsize=8,wrapcolumn=None,linejunk=None,
16897db96d56Sopenharmony_ci                 charjunk=IS_CHARACTER_JUNK):
16907db96d56Sopenharmony_ci        """HtmlDiff instance initializer
16917db96d56Sopenharmony_ci
16927db96d56Sopenharmony_ci        Arguments:
16937db96d56Sopenharmony_ci        tabsize -- tab stop spacing, defaults to 8.
16947db96d56Sopenharmony_ci        wrapcolumn -- column number where lines are broken and wrapped,
16957db96d56Sopenharmony_ci            defaults to None where lines are not wrapped.
16967db96d56Sopenharmony_ci        linejunk,charjunk -- keyword arguments passed into ndiff() (used by
16977db96d56Sopenharmony_ci            HtmlDiff() to generate the side by side HTML differences).  See
16987db96d56Sopenharmony_ci            ndiff() documentation for argument default values and descriptions.
16997db96d56Sopenharmony_ci        """
17007db96d56Sopenharmony_ci        self._tabsize = tabsize
17017db96d56Sopenharmony_ci        self._wrapcolumn = wrapcolumn
17027db96d56Sopenharmony_ci        self._linejunk = linejunk
17037db96d56Sopenharmony_ci        self._charjunk = charjunk
17047db96d56Sopenharmony_ci
17057db96d56Sopenharmony_ci    def make_file(self, fromlines, tolines, fromdesc='', todesc='',
17067db96d56Sopenharmony_ci                  context=False, numlines=5, *, charset='utf-8'):
17077db96d56Sopenharmony_ci        """Returns HTML file of side by side comparison with change highlights
17087db96d56Sopenharmony_ci
17097db96d56Sopenharmony_ci        Arguments:
17107db96d56Sopenharmony_ci        fromlines -- list of "from" lines
17117db96d56Sopenharmony_ci        tolines -- list of "to" lines
17127db96d56Sopenharmony_ci        fromdesc -- "from" file column header string
17137db96d56Sopenharmony_ci        todesc -- "to" file column header string
17147db96d56Sopenharmony_ci        context -- set to True for contextual differences (defaults to False
17157db96d56Sopenharmony_ci            which shows full differences).
17167db96d56Sopenharmony_ci        numlines -- number of context lines.  When context is set True,
17177db96d56Sopenharmony_ci            controls number of lines displayed before and after the change.
17187db96d56Sopenharmony_ci            When context is False, controls the number of lines to place
17197db96d56Sopenharmony_ci            the "next" link anchors before the next change (so click of
17207db96d56Sopenharmony_ci            "next" link jumps to just before the change).
17217db96d56Sopenharmony_ci        charset -- charset of the HTML document
17227db96d56Sopenharmony_ci        """
17237db96d56Sopenharmony_ci
17247db96d56Sopenharmony_ci        return (self._file_template % dict(
17257db96d56Sopenharmony_ci            styles=self._styles,
17267db96d56Sopenharmony_ci            legend=self._legend,
17277db96d56Sopenharmony_ci            table=self.make_table(fromlines, tolines, fromdesc, todesc,
17287db96d56Sopenharmony_ci                                  context=context, numlines=numlines),
17297db96d56Sopenharmony_ci            charset=charset
17307db96d56Sopenharmony_ci        )).encode(charset, 'xmlcharrefreplace').decode(charset)
17317db96d56Sopenharmony_ci
17327db96d56Sopenharmony_ci    def _tab_newline_replace(self,fromlines,tolines):
17337db96d56Sopenharmony_ci        """Returns from/to line lists with tabs expanded and newlines removed.
17347db96d56Sopenharmony_ci
17357db96d56Sopenharmony_ci        Instead of tab characters being replaced by the number of spaces
17367db96d56Sopenharmony_ci        needed to fill in to the next tab stop, this function will fill
17377db96d56Sopenharmony_ci        the space with tab characters.  This is done so that the difference
17387db96d56Sopenharmony_ci        algorithms can identify changes in a file when tabs are replaced by
17397db96d56Sopenharmony_ci        spaces and vice versa.  At the end of the HTML generation, the tab
17407db96d56Sopenharmony_ci        characters will be replaced with a nonbreakable space.
17417db96d56Sopenharmony_ci        """
17427db96d56Sopenharmony_ci        def expand_tabs(line):
17437db96d56Sopenharmony_ci            # hide real spaces
17447db96d56Sopenharmony_ci            line = line.replace(' ','\0')
17457db96d56Sopenharmony_ci            # expand tabs into spaces
17467db96d56Sopenharmony_ci            line = line.expandtabs(self._tabsize)
17477db96d56Sopenharmony_ci            # replace spaces from expanded tabs back into tab characters
17487db96d56Sopenharmony_ci            # (we'll replace them with markup after we do differencing)
17497db96d56Sopenharmony_ci            line = line.replace(' ','\t')
17507db96d56Sopenharmony_ci            return line.replace('\0',' ').rstrip('\n')
17517db96d56Sopenharmony_ci        fromlines = [expand_tabs(line) for line in fromlines]
17527db96d56Sopenharmony_ci        tolines = [expand_tabs(line) for line in tolines]
17537db96d56Sopenharmony_ci        return fromlines,tolines
17547db96d56Sopenharmony_ci
17557db96d56Sopenharmony_ci    def _split_line(self,data_list,line_num,text):
17567db96d56Sopenharmony_ci        """Builds list of text lines by splitting text lines at wrap point
17577db96d56Sopenharmony_ci
17587db96d56Sopenharmony_ci        This function will determine if the input text line needs to be
17597db96d56Sopenharmony_ci        wrapped (split) into separate lines.  If so, the first wrap point
17607db96d56Sopenharmony_ci        will be determined and the first line appended to the output
17617db96d56Sopenharmony_ci        text line list.  This function is used recursively to handle
17627db96d56Sopenharmony_ci        the second part of the split line to further split it.
17637db96d56Sopenharmony_ci        """
17647db96d56Sopenharmony_ci        # if blank line or context separator, just add it to the output list
17657db96d56Sopenharmony_ci        if not line_num:
17667db96d56Sopenharmony_ci            data_list.append((line_num,text))
17677db96d56Sopenharmony_ci            return
17687db96d56Sopenharmony_ci
17697db96d56Sopenharmony_ci        # if line text doesn't need wrapping, just add it to the output list
17707db96d56Sopenharmony_ci        size = len(text)
17717db96d56Sopenharmony_ci        max = self._wrapcolumn
17727db96d56Sopenharmony_ci        if (size <= max) or ((size -(text.count('\0')*3)) <= max):
17737db96d56Sopenharmony_ci            data_list.append((line_num,text))
17747db96d56Sopenharmony_ci            return
17757db96d56Sopenharmony_ci
17767db96d56Sopenharmony_ci        # scan text looking for the wrap point, keeping track if the wrap
17777db96d56Sopenharmony_ci        # point is inside markers
17787db96d56Sopenharmony_ci        i = 0
17797db96d56Sopenharmony_ci        n = 0
17807db96d56Sopenharmony_ci        mark = ''
17817db96d56Sopenharmony_ci        while n < max and i < size:
17827db96d56Sopenharmony_ci            if text[i] == '\0':
17837db96d56Sopenharmony_ci                i += 1
17847db96d56Sopenharmony_ci                mark = text[i]
17857db96d56Sopenharmony_ci                i += 1
17867db96d56Sopenharmony_ci            elif text[i] == '\1':
17877db96d56Sopenharmony_ci                i += 1
17887db96d56Sopenharmony_ci                mark = ''
17897db96d56Sopenharmony_ci            else:
17907db96d56Sopenharmony_ci                i += 1
17917db96d56Sopenharmony_ci                n += 1
17927db96d56Sopenharmony_ci
17937db96d56Sopenharmony_ci        # wrap point is inside text, break it up into separate lines
17947db96d56Sopenharmony_ci        line1 = text[:i]
17957db96d56Sopenharmony_ci        line2 = text[i:]
17967db96d56Sopenharmony_ci
17977db96d56Sopenharmony_ci        # if wrap point is inside markers, place end marker at end of first
17987db96d56Sopenharmony_ci        # line and start marker at beginning of second line because each
17997db96d56Sopenharmony_ci        # line will have its own table tag markup around it.
18007db96d56Sopenharmony_ci        if mark:
18017db96d56Sopenharmony_ci            line1 = line1 + '\1'
18027db96d56Sopenharmony_ci            line2 = '\0' + mark + line2
18037db96d56Sopenharmony_ci
18047db96d56Sopenharmony_ci        # tack on first line onto the output list
18057db96d56Sopenharmony_ci        data_list.append((line_num,line1))
18067db96d56Sopenharmony_ci
18077db96d56Sopenharmony_ci        # use this routine again to wrap the remaining text
18087db96d56Sopenharmony_ci        self._split_line(data_list,'>',line2)
18097db96d56Sopenharmony_ci
18107db96d56Sopenharmony_ci    def _line_wrapper(self,diffs):
18117db96d56Sopenharmony_ci        """Returns iterator that splits (wraps) mdiff text lines"""
18127db96d56Sopenharmony_ci
18137db96d56Sopenharmony_ci        # pull from/to data and flags from mdiff iterator
18147db96d56Sopenharmony_ci        for fromdata,todata,flag in diffs:
18157db96d56Sopenharmony_ci            # check for context separators and pass them through
18167db96d56Sopenharmony_ci            if flag is None:
18177db96d56Sopenharmony_ci                yield fromdata,todata,flag
18187db96d56Sopenharmony_ci                continue
18197db96d56Sopenharmony_ci            (fromline,fromtext),(toline,totext) = fromdata,todata
18207db96d56Sopenharmony_ci            # for each from/to line split it at the wrap column to form
18217db96d56Sopenharmony_ci            # list of text lines.
18227db96d56Sopenharmony_ci            fromlist,tolist = [],[]
18237db96d56Sopenharmony_ci            self._split_line(fromlist,fromline,fromtext)
18247db96d56Sopenharmony_ci            self._split_line(tolist,toline,totext)
18257db96d56Sopenharmony_ci            # yield from/to line in pairs inserting blank lines as
18267db96d56Sopenharmony_ci            # necessary when one side has more wrapped lines
18277db96d56Sopenharmony_ci            while fromlist or tolist:
18287db96d56Sopenharmony_ci                if fromlist:
18297db96d56Sopenharmony_ci                    fromdata = fromlist.pop(0)
18307db96d56Sopenharmony_ci                else:
18317db96d56Sopenharmony_ci                    fromdata = ('',' ')
18327db96d56Sopenharmony_ci                if tolist:
18337db96d56Sopenharmony_ci                    todata = tolist.pop(0)
18347db96d56Sopenharmony_ci                else:
18357db96d56Sopenharmony_ci                    todata = ('',' ')
18367db96d56Sopenharmony_ci                yield fromdata,todata,flag
18377db96d56Sopenharmony_ci
18387db96d56Sopenharmony_ci    def _collect_lines(self,diffs):
18397db96d56Sopenharmony_ci        """Collects mdiff output into separate lists
18407db96d56Sopenharmony_ci
18417db96d56Sopenharmony_ci        Before storing the mdiff from/to data into a list, it is converted
18427db96d56Sopenharmony_ci        into a single line of text with HTML markup.
18437db96d56Sopenharmony_ci        """
18447db96d56Sopenharmony_ci
18457db96d56Sopenharmony_ci        fromlist,tolist,flaglist = [],[],[]
18467db96d56Sopenharmony_ci        # pull from/to data and flags from mdiff style iterator
18477db96d56Sopenharmony_ci        for fromdata,todata,flag in diffs:
18487db96d56Sopenharmony_ci            try:
18497db96d56Sopenharmony_ci                # store HTML markup of the lines into the lists
18507db96d56Sopenharmony_ci                fromlist.append(self._format_line(0,flag,*fromdata))
18517db96d56Sopenharmony_ci                tolist.append(self._format_line(1,flag,*todata))
18527db96d56Sopenharmony_ci            except TypeError:
18537db96d56Sopenharmony_ci                # exceptions occur for lines where context separators go
18547db96d56Sopenharmony_ci                fromlist.append(None)
18557db96d56Sopenharmony_ci                tolist.append(None)
18567db96d56Sopenharmony_ci            flaglist.append(flag)
18577db96d56Sopenharmony_ci        return fromlist,tolist,flaglist
18587db96d56Sopenharmony_ci
18597db96d56Sopenharmony_ci    def _format_line(self,side,flag,linenum,text):
18607db96d56Sopenharmony_ci        """Returns HTML markup of "from" / "to" text lines
18617db96d56Sopenharmony_ci
18627db96d56Sopenharmony_ci        side -- 0 or 1 indicating "from" or "to" text
18637db96d56Sopenharmony_ci        flag -- indicates if difference on line
18647db96d56Sopenharmony_ci        linenum -- line number (used for line number column)
18657db96d56Sopenharmony_ci        text -- line text to be marked up
18667db96d56Sopenharmony_ci        """
18677db96d56Sopenharmony_ci        try:
18687db96d56Sopenharmony_ci            linenum = '%d' % linenum
18697db96d56Sopenharmony_ci            id = ' id="%s%s"' % (self._prefix[side],linenum)
18707db96d56Sopenharmony_ci        except TypeError:
18717db96d56Sopenharmony_ci            # handle blank lines where linenum is '>' or ''
18727db96d56Sopenharmony_ci            id = ''
18737db96d56Sopenharmony_ci        # replace those things that would get confused with HTML symbols
18747db96d56Sopenharmony_ci        text=text.replace("&","&amp;").replace(">","&gt;").replace("<","&lt;")
18757db96d56Sopenharmony_ci
18767db96d56Sopenharmony_ci        # make space non-breakable so they don't get compressed or line wrapped
18777db96d56Sopenharmony_ci        text = text.replace(' ','&nbsp;').rstrip()
18787db96d56Sopenharmony_ci
18797db96d56Sopenharmony_ci        return '<td class="diff_header"%s>%s</td><td nowrap="nowrap">%s</td>' \
18807db96d56Sopenharmony_ci               % (id,linenum,text)
18817db96d56Sopenharmony_ci
18827db96d56Sopenharmony_ci    def _make_prefix(self):
18837db96d56Sopenharmony_ci        """Create unique anchor prefixes"""
18847db96d56Sopenharmony_ci
18857db96d56Sopenharmony_ci        # Generate a unique anchor prefix so multiple tables
18867db96d56Sopenharmony_ci        # can exist on the same HTML page without conflicts.
18877db96d56Sopenharmony_ci        fromprefix = "from%d_" % HtmlDiff._default_prefix
18887db96d56Sopenharmony_ci        toprefix = "to%d_" % HtmlDiff._default_prefix
18897db96d56Sopenharmony_ci        HtmlDiff._default_prefix += 1
18907db96d56Sopenharmony_ci        # store prefixes so line format method has access
18917db96d56Sopenharmony_ci        self._prefix = [fromprefix,toprefix]
18927db96d56Sopenharmony_ci
18937db96d56Sopenharmony_ci    def _convert_flags(self,fromlist,tolist,flaglist,context,numlines):
18947db96d56Sopenharmony_ci        """Makes list of "next" links"""
18957db96d56Sopenharmony_ci
18967db96d56Sopenharmony_ci        # all anchor names will be generated using the unique "to" prefix
18977db96d56Sopenharmony_ci        toprefix = self._prefix[1]
18987db96d56Sopenharmony_ci
18997db96d56Sopenharmony_ci        # process change flags, generating middle column of next anchors/links
19007db96d56Sopenharmony_ci        next_id = ['']*len(flaglist)
19017db96d56Sopenharmony_ci        next_href = ['']*len(flaglist)
19027db96d56Sopenharmony_ci        num_chg, in_change = 0, False
19037db96d56Sopenharmony_ci        last = 0
19047db96d56Sopenharmony_ci        for i,flag in enumerate(flaglist):
19057db96d56Sopenharmony_ci            if flag:
19067db96d56Sopenharmony_ci                if not in_change:
19077db96d56Sopenharmony_ci                    in_change = True
19087db96d56Sopenharmony_ci                    last = i
19097db96d56Sopenharmony_ci                    # at the beginning of a change, drop an anchor a few lines
19107db96d56Sopenharmony_ci                    # (the context lines) before the change for the previous
19117db96d56Sopenharmony_ci                    # link
19127db96d56Sopenharmony_ci                    i = max([0,i-numlines])
19137db96d56Sopenharmony_ci                    next_id[i] = ' id="difflib_chg_%s_%d"' % (toprefix,num_chg)
19147db96d56Sopenharmony_ci                    # at the beginning of a change, drop a link to the next
19157db96d56Sopenharmony_ci                    # change
19167db96d56Sopenharmony_ci                    num_chg += 1
19177db96d56Sopenharmony_ci                    next_href[last] = '<a href="#difflib_chg_%s_%d">n</a>' % (
19187db96d56Sopenharmony_ci                         toprefix,num_chg)
19197db96d56Sopenharmony_ci            else:
19207db96d56Sopenharmony_ci                in_change = False
19217db96d56Sopenharmony_ci        # check for cases where there is no content to avoid exceptions
19227db96d56Sopenharmony_ci        if not flaglist:
19237db96d56Sopenharmony_ci            flaglist = [False]
19247db96d56Sopenharmony_ci            next_id = ['']
19257db96d56Sopenharmony_ci            next_href = ['']
19267db96d56Sopenharmony_ci            last = 0
19277db96d56Sopenharmony_ci            if context:
19287db96d56Sopenharmony_ci                fromlist = ['<td></td><td>&nbsp;No Differences Found&nbsp;</td>']
19297db96d56Sopenharmony_ci                tolist = fromlist
19307db96d56Sopenharmony_ci            else:
19317db96d56Sopenharmony_ci                fromlist = tolist = ['<td></td><td>&nbsp;Empty File&nbsp;</td>']
19327db96d56Sopenharmony_ci        # if not a change on first line, drop a link
19337db96d56Sopenharmony_ci        if not flaglist[0]:
19347db96d56Sopenharmony_ci            next_href[0] = '<a href="#difflib_chg_%s_0">f</a>' % toprefix
19357db96d56Sopenharmony_ci        # redo the last link to link to the top
19367db96d56Sopenharmony_ci        next_href[last] = '<a href="#difflib_chg_%s_top">t</a>' % (toprefix)
19377db96d56Sopenharmony_ci
19387db96d56Sopenharmony_ci        return fromlist,tolist,flaglist,next_href,next_id
19397db96d56Sopenharmony_ci
19407db96d56Sopenharmony_ci    def make_table(self,fromlines,tolines,fromdesc='',todesc='',context=False,
19417db96d56Sopenharmony_ci                   numlines=5):
19427db96d56Sopenharmony_ci        """Returns HTML table of side by side comparison with change highlights
19437db96d56Sopenharmony_ci
19447db96d56Sopenharmony_ci        Arguments:
19457db96d56Sopenharmony_ci        fromlines -- list of "from" lines
19467db96d56Sopenharmony_ci        tolines -- list of "to" lines
19477db96d56Sopenharmony_ci        fromdesc -- "from" file column header string
19487db96d56Sopenharmony_ci        todesc -- "to" file column header string
19497db96d56Sopenharmony_ci        context -- set to True for contextual differences (defaults to False
19507db96d56Sopenharmony_ci            which shows full differences).
19517db96d56Sopenharmony_ci        numlines -- number of context lines.  When context is set True,
19527db96d56Sopenharmony_ci            controls number of lines displayed before and after the change.
19537db96d56Sopenharmony_ci            When context is False, controls the number of lines to place
19547db96d56Sopenharmony_ci            the "next" link anchors before the next change (so click of
19557db96d56Sopenharmony_ci            "next" link jumps to just before the change).
19567db96d56Sopenharmony_ci        """
19577db96d56Sopenharmony_ci
19587db96d56Sopenharmony_ci        # make unique anchor prefixes so that multiple tables may exist
19597db96d56Sopenharmony_ci        # on the same page without conflict.
19607db96d56Sopenharmony_ci        self._make_prefix()
19617db96d56Sopenharmony_ci
19627db96d56Sopenharmony_ci        # change tabs to spaces before it gets more difficult after we insert
19637db96d56Sopenharmony_ci        # markup
19647db96d56Sopenharmony_ci        fromlines,tolines = self._tab_newline_replace(fromlines,tolines)
19657db96d56Sopenharmony_ci
19667db96d56Sopenharmony_ci        # create diffs iterator which generates side by side from/to data
19677db96d56Sopenharmony_ci        if context:
19687db96d56Sopenharmony_ci            context_lines = numlines
19697db96d56Sopenharmony_ci        else:
19707db96d56Sopenharmony_ci            context_lines = None
19717db96d56Sopenharmony_ci        diffs = _mdiff(fromlines,tolines,context_lines,linejunk=self._linejunk,
19727db96d56Sopenharmony_ci                      charjunk=self._charjunk)
19737db96d56Sopenharmony_ci
19747db96d56Sopenharmony_ci        # set up iterator to wrap lines that exceed desired width
19757db96d56Sopenharmony_ci        if self._wrapcolumn:
19767db96d56Sopenharmony_ci            diffs = self._line_wrapper(diffs)
19777db96d56Sopenharmony_ci
19787db96d56Sopenharmony_ci        # collect up from/to lines and flags into lists (also format the lines)
19797db96d56Sopenharmony_ci        fromlist,tolist,flaglist = self._collect_lines(diffs)
19807db96d56Sopenharmony_ci
19817db96d56Sopenharmony_ci        # process change flags, generating middle column of next anchors/links
19827db96d56Sopenharmony_ci        fromlist,tolist,flaglist,next_href,next_id = self._convert_flags(
19837db96d56Sopenharmony_ci            fromlist,tolist,flaglist,context,numlines)
19847db96d56Sopenharmony_ci
19857db96d56Sopenharmony_ci        s = []
19867db96d56Sopenharmony_ci        fmt = '            <tr><td class="diff_next"%s>%s</td>%s' + \
19877db96d56Sopenharmony_ci              '<td class="diff_next">%s</td>%s</tr>\n'
19887db96d56Sopenharmony_ci        for i in range(len(flaglist)):
19897db96d56Sopenharmony_ci            if flaglist[i] is None:
19907db96d56Sopenharmony_ci                # mdiff yields None on separator lines skip the bogus ones
19917db96d56Sopenharmony_ci                # generated for the first line
19927db96d56Sopenharmony_ci                if i > 0:
19937db96d56Sopenharmony_ci                    s.append('        </tbody>        \n        <tbody>\n')
19947db96d56Sopenharmony_ci            else:
19957db96d56Sopenharmony_ci                s.append( fmt % (next_id[i],next_href[i],fromlist[i],
19967db96d56Sopenharmony_ci                                           next_href[i],tolist[i]))
19977db96d56Sopenharmony_ci        if fromdesc or todesc:
19987db96d56Sopenharmony_ci            header_row = '<thead><tr>%s%s%s%s</tr></thead>' % (
19997db96d56Sopenharmony_ci                '<th class="diff_next"><br /></th>',
20007db96d56Sopenharmony_ci                '<th colspan="2" class="diff_header">%s</th>' % fromdesc,
20017db96d56Sopenharmony_ci                '<th class="diff_next"><br /></th>',
20027db96d56Sopenharmony_ci                '<th colspan="2" class="diff_header">%s</th>' % todesc)
20037db96d56Sopenharmony_ci        else:
20047db96d56Sopenharmony_ci            header_row = ''
20057db96d56Sopenharmony_ci
20067db96d56Sopenharmony_ci        table = self._table_template % dict(
20077db96d56Sopenharmony_ci            data_rows=''.join(s),
20087db96d56Sopenharmony_ci            header_row=header_row,
20097db96d56Sopenharmony_ci            prefix=self._prefix[1])
20107db96d56Sopenharmony_ci
20117db96d56Sopenharmony_ci        return table.replace('\0+','<span class="diff_add">'). \
20127db96d56Sopenharmony_ci                     replace('\0-','<span class="diff_sub">'). \
20137db96d56Sopenharmony_ci                     replace('\0^','<span class="diff_chg">'). \
20147db96d56Sopenharmony_ci                     replace('\1','</span>'). \
20157db96d56Sopenharmony_ci                     replace('\t','&nbsp;')
20167db96d56Sopenharmony_ci
20177db96d56Sopenharmony_cidel re
20187db96d56Sopenharmony_ci
20197db96d56Sopenharmony_cidef restore(delta, which):
20207db96d56Sopenharmony_ci    r"""
20217db96d56Sopenharmony_ci    Generate one of the two sequences that generated a delta.
20227db96d56Sopenharmony_ci
20237db96d56Sopenharmony_ci    Given a `delta` produced by `Differ.compare()` or `ndiff()`, extract
20247db96d56Sopenharmony_ci    lines originating from file 1 or 2 (parameter `which`), stripping off line
20257db96d56Sopenharmony_ci    prefixes.
20267db96d56Sopenharmony_ci
20277db96d56Sopenharmony_ci    Examples:
20287db96d56Sopenharmony_ci
20297db96d56Sopenharmony_ci    >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(keepends=True),
20307db96d56Sopenharmony_ci    ...              'ore\ntree\nemu\n'.splitlines(keepends=True))
20317db96d56Sopenharmony_ci    >>> diff = list(diff)
20327db96d56Sopenharmony_ci    >>> print(''.join(restore(diff, 1)), end="")
20337db96d56Sopenharmony_ci    one
20347db96d56Sopenharmony_ci    two
20357db96d56Sopenharmony_ci    three
20367db96d56Sopenharmony_ci    >>> print(''.join(restore(diff, 2)), end="")
20377db96d56Sopenharmony_ci    ore
20387db96d56Sopenharmony_ci    tree
20397db96d56Sopenharmony_ci    emu
20407db96d56Sopenharmony_ci    """
20417db96d56Sopenharmony_ci    try:
20427db96d56Sopenharmony_ci        tag = {1: "- ", 2: "+ "}[int(which)]
20437db96d56Sopenharmony_ci    except KeyError:
20447db96d56Sopenharmony_ci        raise ValueError('unknown delta choice (must be 1 or 2): %r'
20457db96d56Sopenharmony_ci                           % which) from None
20467db96d56Sopenharmony_ci    prefixes = ("  ", tag)
20477db96d56Sopenharmony_ci    for line in delta:
20487db96d56Sopenharmony_ci        if line[:2] in prefixes:
20497db96d56Sopenharmony_ci            yield line[2:]
20507db96d56Sopenharmony_ci
20517db96d56Sopenharmony_cidef _test():
20527db96d56Sopenharmony_ci    import doctest, difflib
20537db96d56Sopenharmony_ci    return doctest.testmod(difflib)
20547db96d56Sopenharmony_ci
20557db96d56Sopenharmony_ciif __name__ == "__main__":
20567db96d56Sopenharmony_ci    _test()
2057