17db96d56Sopenharmony_ci"""Utilities for comparing files and directories. 27db96d56Sopenharmony_ci 37db96d56Sopenharmony_ciClasses: 47db96d56Sopenharmony_ci dircmp 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_ciFunctions: 77db96d56Sopenharmony_ci cmp(f1, f2, shallow=True) -> int 87db96d56Sopenharmony_ci cmpfiles(a, b, common) -> ([], [], []) 97db96d56Sopenharmony_ci clear_cache() 107db96d56Sopenharmony_ci 117db96d56Sopenharmony_ci""" 127db96d56Sopenharmony_ci 137db96d56Sopenharmony_ciimport os 147db96d56Sopenharmony_ciimport stat 157db96d56Sopenharmony_cifrom itertools import filterfalse 167db96d56Sopenharmony_cifrom types import GenericAlias 177db96d56Sopenharmony_ci 187db96d56Sopenharmony_ci__all__ = ['clear_cache', 'cmp', 'dircmp', 'cmpfiles', 'DEFAULT_IGNORES'] 197db96d56Sopenharmony_ci 207db96d56Sopenharmony_ci_cache = {} 217db96d56Sopenharmony_ciBUFSIZE = 8*1024 227db96d56Sopenharmony_ci 237db96d56Sopenharmony_ciDEFAULT_IGNORES = [ 247db96d56Sopenharmony_ci 'RCS', 'CVS', 'tags', '.git', '.hg', '.bzr', '_darcs', '__pycache__'] 257db96d56Sopenharmony_ci 267db96d56Sopenharmony_cidef clear_cache(): 277db96d56Sopenharmony_ci """Clear the filecmp cache.""" 287db96d56Sopenharmony_ci _cache.clear() 297db96d56Sopenharmony_ci 307db96d56Sopenharmony_cidef cmp(f1, f2, shallow=True): 317db96d56Sopenharmony_ci """Compare two files. 327db96d56Sopenharmony_ci 337db96d56Sopenharmony_ci Arguments: 347db96d56Sopenharmony_ci 357db96d56Sopenharmony_ci f1 -- First file name 367db96d56Sopenharmony_ci 377db96d56Sopenharmony_ci f2 -- Second file name 387db96d56Sopenharmony_ci 397db96d56Sopenharmony_ci shallow -- treat files as identical if their stat signatures (type, size, 407db96d56Sopenharmony_ci mtime) are identical. Otherwise, files are considered different 417db96d56Sopenharmony_ci if their sizes or contents differ. [default: True] 427db96d56Sopenharmony_ci 437db96d56Sopenharmony_ci Return value: 447db96d56Sopenharmony_ci 457db96d56Sopenharmony_ci True if the files are the same, False otherwise. 467db96d56Sopenharmony_ci 477db96d56Sopenharmony_ci This function uses a cache for past comparisons and the results, 487db96d56Sopenharmony_ci with cache entries invalidated if their stat information 497db96d56Sopenharmony_ci changes. The cache may be cleared by calling clear_cache(). 507db96d56Sopenharmony_ci 517db96d56Sopenharmony_ci """ 527db96d56Sopenharmony_ci 537db96d56Sopenharmony_ci s1 = _sig(os.stat(f1)) 547db96d56Sopenharmony_ci s2 = _sig(os.stat(f2)) 557db96d56Sopenharmony_ci if s1[0] != stat.S_IFREG or s2[0] != stat.S_IFREG: 567db96d56Sopenharmony_ci return False 577db96d56Sopenharmony_ci if shallow and s1 == s2: 587db96d56Sopenharmony_ci return True 597db96d56Sopenharmony_ci if s1[1] != s2[1]: 607db96d56Sopenharmony_ci return False 617db96d56Sopenharmony_ci 627db96d56Sopenharmony_ci outcome = _cache.get((f1, f2, s1, s2)) 637db96d56Sopenharmony_ci if outcome is None: 647db96d56Sopenharmony_ci outcome = _do_cmp(f1, f2) 657db96d56Sopenharmony_ci if len(_cache) > 100: # limit the maximum size of the cache 667db96d56Sopenharmony_ci clear_cache() 677db96d56Sopenharmony_ci _cache[f1, f2, s1, s2] = outcome 687db96d56Sopenharmony_ci return outcome 697db96d56Sopenharmony_ci 707db96d56Sopenharmony_cidef _sig(st): 717db96d56Sopenharmony_ci return (stat.S_IFMT(st.st_mode), 727db96d56Sopenharmony_ci st.st_size, 737db96d56Sopenharmony_ci st.st_mtime) 747db96d56Sopenharmony_ci 757db96d56Sopenharmony_cidef _do_cmp(f1, f2): 767db96d56Sopenharmony_ci bufsize = BUFSIZE 777db96d56Sopenharmony_ci with open(f1, 'rb') as fp1, open(f2, 'rb') as fp2: 787db96d56Sopenharmony_ci while True: 797db96d56Sopenharmony_ci b1 = fp1.read(bufsize) 807db96d56Sopenharmony_ci b2 = fp2.read(bufsize) 817db96d56Sopenharmony_ci if b1 != b2: 827db96d56Sopenharmony_ci return False 837db96d56Sopenharmony_ci if not b1: 847db96d56Sopenharmony_ci return True 857db96d56Sopenharmony_ci 867db96d56Sopenharmony_ci# Directory comparison class. 877db96d56Sopenharmony_ci# 887db96d56Sopenharmony_ciclass dircmp: 897db96d56Sopenharmony_ci """A class that manages the comparison of 2 directories. 907db96d56Sopenharmony_ci 917db96d56Sopenharmony_ci dircmp(a, b, ignore=None, hide=None) 927db96d56Sopenharmony_ci A and B are directories. 937db96d56Sopenharmony_ci IGNORE is a list of names to ignore, 947db96d56Sopenharmony_ci defaults to DEFAULT_IGNORES. 957db96d56Sopenharmony_ci HIDE is a list of names to hide, 967db96d56Sopenharmony_ci defaults to [os.curdir, os.pardir]. 977db96d56Sopenharmony_ci 987db96d56Sopenharmony_ci High level usage: 997db96d56Sopenharmony_ci x = dircmp(dir1, dir2) 1007db96d56Sopenharmony_ci x.report() -> prints a report on the differences between dir1 and dir2 1017db96d56Sopenharmony_ci or 1027db96d56Sopenharmony_ci x.report_partial_closure() -> prints report on differences between dir1 1037db96d56Sopenharmony_ci and dir2, and reports on common immediate subdirectories. 1047db96d56Sopenharmony_ci x.report_full_closure() -> like report_partial_closure, 1057db96d56Sopenharmony_ci but fully recursive. 1067db96d56Sopenharmony_ci 1077db96d56Sopenharmony_ci Attributes: 1087db96d56Sopenharmony_ci left_list, right_list: The files in dir1 and dir2, 1097db96d56Sopenharmony_ci filtered by hide and ignore. 1107db96d56Sopenharmony_ci common: a list of names in both dir1 and dir2. 1117db96d56Sopenharmony_ci left_only, right_only: names only in dir1, dir2. 1127db96d56Sopenharmony_ci common_dirs: subdirectories in both dir1 and dir2. 1137db96d56Sopenharmony_ci common_files: files in both dir1 and dir2. 1147db96d56Sopenharmony_ci common_funny: names in both dir1 and dir2 where the type differs between 1157db96d56Sopenharmony_ci dir1 and dir2, or the name is not stat-able. 1167db96d56Sopenharmony_ci same_files: list of identical files. 1177db96d56Sopenharmony_ci diff_files: list of filenames which differ. 1187db96d56Sopenharmony_ci funny_files: list of files which could not be compared. 1197db96d56Sopenharmony_ci subdirs: a dictionary of dircmp instances (or MyDirCmp instances if this 1207db96d56Sopenharmony_ci object is of type MyDirCmp, a subclass of dircmp), keyed by names 1217db96d56Sopenharmony_ci in common_dirs. 1227db96d56Sopenharmony_ci """ 1237db96d56Sopenharmony_ci 1247db96d56Sopenharmony_ci def __init__(self, a, b, ignore=None, hide=None): # Initialize 1257db96d56Sopenharmony_ci self.left = a 1267db96d56Sopenharmony_ci self.right = b 1277db96d56Sopenharmony_ci if hide is None: 1287db96d56Sopenharmony_ci self.hide = [os.curdir, os.pardir] # Names never to be shown 1297db96d56Sopenharmony_ci else: 1307db96d56Sopenharmony_ci self.hide = hide 1317db96d56Sopenharmony_ci if ignore is None: 1327db96d56Sopenharmony_ci self.ignore = DEFAULT_IGNORES 1337db96d56Sopenharmony_ci else: 1347db96d56Sopenharmony_ci self.ignore = ignore 1357db96d56Sopenharmony_ci 1367db96d56Sopenharmony_ci def phase0(self): # Compare everything except common subdirectories 1377db96d56Sopenharmony_ci self.left_list = _filter(os.listdir(self.left), 1387db96d56Sopenharmony_ci self.hide+self.ignore) 1397db96d56Sopenharmony_ci self.right_list = _filter(os.listdir(self.right), 1407db96d56Sopenharmony_ci self.hide+self.ignore) 1417db96d56Sopenharmony_ci self.left_list.sort() 1427db96d56Sopenharmony_ci self.right_list.sort() 1437db96d56Sopenharmony_ci 1447db96d56Sopenharmony_ci def phase1(self): # Compute common names 1457db96d56Sopenharmony_ci a = dict(zip(map(os.path.normcase, self.left_list), self.left_list)) 1467db96d56Sopenharmony_ci b = dict(zip(map(os.path.normcase, self.right_list), self.right_list)) 1477db96d56Sopenharmony_ci self.common = list(map(a.__getitem__, filter(b.__contains__, a))) 1487db96d56Sopenharmony_ci self.left_only = list(map(a.__getitem__, filterfalse(b.__contains__, a))) 1497db96d56Sopenharmony_ci self.right_only = list(map(b.__getitem__, filterfalse(a.__contains__, b))) 1507db96d56Sopenharmony_ci 1517db96d56Sopenharmony_ci def phase2(self): # Distinguish files, directories, funnies 1527db96d56Sopenharmony_ci self.common_dirs = [] 1537db96d56Sopenharmony_ci self.common_files = [] 1547db96d56Sopenharmony_ci self.common_funny = [] 1557db96d56Sopenharmony_ci 1567db96d56Sopenharmony_ci for x in self.common: 1577db96d56Sopenharmony_ci a_path = os.path.join(self.left, x) 1587db96d56Sopenharmony_ci b_path = os.path.join(self.right, x) 1597db96d56Sopenharmony_ci 1607db96d56Sopenharmony_ci ok = 1 1617db96d56Sopenharmony_ci try: 1627db96d56Sopenharmony_ci a_stat = os.stat(a_path) 1637db96d56Sopenharmony_ci except OSError: 1647db96d56Sopenharmony_ci # print('Can\'t stat', a_path, ':', why.args[1]) 1657db96d56Sopenharmony_ci ok = 0 1667db96d56Sopenharmony_ci try: 1677db96d56Sopenharmony_ci b_stat = os.stat(b_path) 1687db96d56Sopenharmony_ci except OSError: 1697db96d56Sopenharmony_ci # print('Can\'t stat', b_path, ':', why.args[1]) 1707db96d56Sopenharmony_ci ok = 0 1717db96d56Sopenharmony_ci 1727db96d56Sopenharmony_ci if ok: 1737db96d56Sopenharmony_ci a_type = stat.S_IFMT(a_stat.st_mode) 1747db96d56Sopenharmony_ci b_type = stat.S_IFMT(b_stat.st_mode) 1757db96d56Sopenharmony_ci if a_type != b_type: 1767db96d56Sopenharmony_ci self.common_funny.append(x) 1777db96d56Sopenharmony_ci elif stat.S_ISDIR(a_type): 1787db96d56Sopenharmony_ci self.common_dirs.append(x) 1797db96d56Sopenharmony_ci elif stat.S_ISREG(a_type): 1807db96d56Sopenharmony_ci self.common_files.append(x) 1817db96d56Sopenharmony_ci else: 1827db96d56Sopenharmony_ci self.common_funny.append(x) 1837db96d56Sopenharmony_ci else: 1847db96d56Sopenharmony_ci self.common_funny.append(x) 1857db96d56Sopenharmony_ci 1867db96d56Sopenharmony_ci def phase3(self): # Find out differences between common files 1877db96d56Sopenharmony_ci xx = cmpfiles(self.left, self.right, self.common_files) 1887db96d56Sopenharmony_ci self.same_files, self.diff_files, self.funny_files = xx 1897db96d56Sopenharmony_ci 1907db96d56Sopenharmony_ci def phase4(self): # Find out differences between common subdirectories 1917db96d56Sopenharmony_ci # A new dircmp (or MyDirCmp if dircmp was subclassed) object is created 1927db96d56Sopenharmony_ci # for each common subdirectory, 1937db96d56Sopenharmony_ci # these are stored in a dictionary indexed by filename. 1947db96d56Sopenharmony_ci # The hide and ignore properties are inherited from the parent 1957db96d56Sopenharmony_ci self.subdirs = {} 1967db96d56Sopenharmony_ci for x in self.common_dirs: 1977db96d56Sopenharmony_ci a_x = os.path.join(self.left, x) 1987db96d56Sopenharmony_ci b_x = os.path.join(self.right, x) 1997db96d56Sopenharmony_ci self.subdirs[x] = self.__class__(a_x, b_x, self.ignore, self.hide) 2007db96d56Sopenharmony_ci 2017db96d56Sopenharmony_ci def phase4_closure(self): # Recursively call phase4() on subdirectories 2027db96d56Sopenharmony_ci self.phase4() 2037db96d56Sopenharmony_ci for sd in self.subdirs.values(): 2047db96d56Sopenharmony_ci sd.phase4_closure() 2057db96d56Sopenharmony_ci 2067db96d56Sopenharmony_ci def report(self): # Print a report on the differences between a and b 2077db96d56Sopenharmony_ci # Output format is purposely lousy 2087db96d56Sopenharmony_ci print('diff', self.left, self.right) 2097db96d56Sopenharmony_ci if self.left_only: 2107db96d56Sopenharmony_ci self.left_only.sort() 2117db96d56Sopenharmony_ci print('Only in', self.left, ':', self.left_only) 2127db96d56Sopenharmony_ci if self.right_only: 2137db96d56Sopenharmony_ci self.right_only.sort() 2147db96d56Sopenharmony_ci print('Only in', self.right, ':', self.right_only) 2157db96d56Sopenharmony_ci if self.same_files: 2167db96d56Sopenharmony_ci self.same_files.sort() 2177db96d56Sopenharmony_ci print('Identical files :', self.same_files) 2187db96d56Sopenharmony_ci if self.diff_files: 2197db96d56Sopenharmony_ci self.diff_files.sort() 2207db96d56Sopenharmony_ci print('Differing files :', self.diff_files) 2217db96d56Sopenharmony_ci if self.funny_files: 2227db96d56Sopenharmony_ci self.funny_files.sort() 2237db96d56Sopenharmony_ci print('Trouble with common files :', self.funny_files) 2247db96d56Sopenharmony_ci if self.common_dirs: 2257db96d56Sopenharmony_ci self.common_dirs.sort() 2267db96d56Sopenharmony_ci print('Common subdirectories :', self.common_dirs) 2277db96d56Sopenharmony_ci if self.common_funny: 2287db96d56Sopenharmony_ci self.common_funny.sort() 2297db96d56Sopenharmony_ci print('Common funny cases :', self.common_funny) 2307db96d56Sopenharmony_ci 2317db96d56Sopenharmony_ci def report_partial_closure(self): # Print reports on self and on subdirs 2327db96d56Sopenharmony_ci self.report() 2337db96d56Sopenharmony_ci for sd in self.subdirs.values(): 2347db96d56Sopenharmony_ci print() 2357db96d56Sopenharmony_ci sd.report() 2367db96d56Sopenharmony_ci 2377db96d56Sopenharmony_ci def report_full_closure(self): # Report on self and subdirs recursively 2387db96d56Sopenharmony_ci self.report() 2397db96d56Sopenharmony_ci for sd in self.subdirs.values(): 2407db96d56Sopenharmony_ci print() 2417db96d56Sopenharmony_ci sd.report_full_closure() 2427db96d56Sopenharmony_ci 2437db96d56Sopenharmony_ci methodmap = dict(subdirs=phase4, 2447db96d56Sopenharmony_ci same_files=phase3, diff_files=phase3, funny_files=phase3, 2457db96d56Sopenharmony_ci common_dirs = phase2, common_files=phase2, common_funny=phase2, 2467db96d56Sopenharmony_ci common=phase1, left_only=phase1, right_only=phase1, 2477db96d56Sopenharmony_ci left_list=phase0, right_list=phase0) 2487db96d56Sopenharmony_ci 2497db96d56Sopenharmony_ci def __getattr__(self, attr): 2507db96d56Sopenharmony_ci if attr not in self.methodmap: 2517db96d56Sopenharmony_ci raise AttributeError(attr) 2527db96d56Sopenharmony_ci self.methodmap[attr](self) 2537db96d56Sopenharmony_ci return getattr(self, attr) 2547db96d56Sopenharmony_ci 2557db96d56Sopenharmony_ci __class_getitem__ = classmethod(GenericAlias) 2567db96d56Sopenharmony_ci 2577db96d56Sopenharmony_ci 2587db96d56Sopenharmony_cidef cmpfiles(a, b, common, shallow=True): 2597db96d56Sopenharmony_ci """Compare common files in two directories. 2607db96d56Sopenharmony_ci 2617db96d56Sopenharmony_ci a, b -- directory names 2627db96d56Sopenharmony_ci common -- list of file names found in both directories 2637db96d56Sopenharmony_ci shallow -- if true, do comparison based solely on stat() information 2647db96d56Sopenharmony_ci 2657db96d56Sopenharmony_ci Returns a tuple of three lists: 2667db96d56Sopenharmony_ci files that compare equal 2677db96d56Sopenharmony_ci files that are different 2687db96d56Sopenharmony_ci filenames that aren't regular files. 2697db96d56Sopenharmony_ci 2707db96d56Sopenharmony_ci """ 2717db96d56Sopenharmony_ci res = ([], [], []) 2727db96d56Sopenharmony_ci for x in common: 2737db96d56Sopenharmony_ci ax = os.path.join(a, x) 2747db96d56Sopenharmony_ci bx = os.path.join(b, x) 2757db96d56Sopenharmony_ci res[_cmp(ax, bx, shallow)].append(x) 2767db96d56Sopenharmony_ci return res 2777db96d56Sopenharmony_ci 2787db96d56Sopenharmony_ci 2797db96d56Sopenharmony_ci# Compare two files. 2807db96d56Sopenharmony_ci# Return: 2817db96d56Sopenharmony_ci# 0 for equal 2827db96d56Sopenharmony_ci# 1 for different 2837db96d56Sopenharmony_ci# 2 for funny cases (can't stat, etc.) 2847db96d56Sopenharmony_ci# 2857db96d56Sopenharmony_cidef _cmp(a, b, sh, abs=abs, cmp=cmp): 2867db96d56Sopenharmony_ci try: 2877db96d56Sopenharmony_ci return not abs(cmp(a, b, sh)) 2887db96d56Sopenharmony_ci except OSError: 2897db96d56Sopenharmony_ci return 2 2907db96d56Sopenharmony_ci 2917db96d56Sopenharmony_ci 2927db96d56Sopenharmony_ci# Return a copy with items that occur in skip removed. 2937db96d56Sopenharmony_ci# 2947db96d56Sopenharmony_cidef _filter(flist, skip): 2957db96d56Sopenharmony_ci return list(filterfalse(skip.__contains__, flist)) 2967db96d56Sopenharmony_ci 2977db96d56Sopenharmony_ci 2987db96d56Sopenharmony_ci# Demonstration and testing. 2997db96d56Sopenharmony_ci# 3007db96d56Sopenharmony_cidef demo(): 3017db96d56Sopenharmony_ci import sys 3027db96d56Sopenharmony_ci import getopt 3037db96d56Sopenharmony_ci options, args = getopt.getopt(sys.argv[1:], 'r') 3047db96d56Sopenharmony_ci if len(args) != 2: 3057db96d56Sopenharmony_ci raise getopt.GetoptError('need exactly two args', None) 3067db96d56Sopenharmony_ci dd = dircmp(args[0], args[1]) 3077db96d56Sopenharmony_ci if ('-r', '') in options: 3087db96d56Sopenharmony_ci dd.report_full_closure() 3097db96d56Sopenharmony_ci else: 3107db96d56Sopenharmony_ci dd.report() 3117db96d56Sopenharmony_ci 3127db96d56Sopenharmony_ciif __name__ == '__main__': 3137db96d56Sopenharmony_ci demo() 314