17db96d56Sopenharmony_ci"""distutils.filelist 27db96d56Sopenharmony_ci 37db96d56Sopenharmony_ciProvides the FileList class, used for poking about the filesystem 47db96d56Sopenharmony_ciand building lists of files. 57db96d56Sopenharmony_ci""" 67db96d56Sopenharmony_ci 77db96d56Sopenharmony_ciimport os, re 87db96d56Sopenharmony_ciimport fnmatch 97db96d56Sopenharmony_ciimport functools 107db96d56Sopenharmony_cifrom distutils.util import convert_path 117db96d56Sopenharmony_cifrom distutils.errors import DistutilsTemplateError, DistutilsInternalError 127db96d56Sopenharmony_cifrom distutils import log 137db96d56Sopenharmony_ci 147db96d56Sopenharmony_ciclass FileList: 157db96d56Sopenharmony_ci """A list of files built by on exploring the filesystem and filtered by 167db96d56Sopenharmony_ci applying various patterns to what we find there. 177db96d56Sopenharmony_ci 187db96d56Sopenharmony_ci Instance attributes: 197db96d56Sopenharmony_ci dir 207db96d56Sopenharmony_ci directory from which files will be taken -- only used if 217db96d56Sopenharmony_ci 'allfiles' not supplied to constructor 227db96d56Sopenharmony_ci files 237db96d56Sopenharmony_ci list of filenames currently being built/filtered/manipulated 247db96d56Sopenharmony_ci allfiles 257db96d56Sopenharmony_ci complete list of files under consideration (ie. without any 267db96d56Sopenharmony_ci filtering applied) 277db96d56Sopenharmony_ci """ 287db96d56Sopenharmony_ci 297db96d56Sopenharmony_ci def __init__(self, warn=None, debug_print=None): 307db96d56Sopenharmony_ci # ignore argument to FileList, but keep them for backwards 317db96d56Sopenharmony_ci # compatibility 327db96d56Sopenharmony_ci self.allfiles = None 337db96d56Sopenharmony_ci self.files = [] 347db96d56Sopenharmony_ci 357db96d56Sopenharmony_ci def set_allfiles(self, allfiles): 367db96d56Sopenharmony_ci self.allfiles = allfiles 377db96d56Sopenharmony_ci 387db96d56Sopenharmony_ci def findall(self, dir=os.curdir): 397db96d56Sopenharmony_ci self.allfiles = findall(dir) 407db96d56Sopenharmony_ci 417db96d56Sopenharmony_ci def debug_print(self, msg): 427db96d56Sopenharmony_ci """Print 'msg' to stdout if the global DEBUG (taken from the 437db96d56Sopenharmony_ci DISTUTILS_DEBUG environment variable) flag is true. 447db96d56Sopenharmony_ci """ 457db96d56Sopenharmony_ci from distutils.debug import DEBUG 467db96d56Sopenharmony_ci if DEBUG: 477db96d56Sopenharmony_ci print(msg) 487db96d56Sopenharmony_ci 497db96d56Sopenharmony_ci # -- List-like methods --------------------------------------------- 507db96d56Sopenharmony_ci 517db96d56Sopenharmony_ci def append(self, item): 527db96d56Sopenharmony_ci self.files.append(item) 537db96d56Sopenharmony_ci 547db96d56Sopenharmony_ci def extend(self, items): 557db96d56Sopenharmony_ci self.files.extend(items) 567db96d56Sopenharmony_ci 577db96d56Sopenharmony_ci def sort(self): 587db96d56Sopenharmony_ci # Not a strict lexical sort! 597db96d56Sopenharmony_ci sortable_files = sorted(map(os.path.split, self.files)) 607db96d56Sopenharmony_ci self.files = [] 617db96d56Sopenharmony_ci for sort_tuple in sortable_files: 627db96d56Sopenharmony_ci self.files.append(os.path.join(*sort_tuple)) 637db96d56Sopenharmony_ci 647db96d56Sopenharmony_ci 657db96d56Sopenharmony_ci # -- Other miscellaneous utility methods --------------------------- 667db96d56Sopenharmony_ci 677db96d56Sopenharmony_ci def remove_duplicates(self): 687db96d56Sopenharmony_ci # Assumes list has been sorted! 697db96d56Sopenharmony_ci for i in range(len(self.files) - 1, 0, -1): 707db96d56Sopenharmony_ci if self.files[i] == self.files[i - 1]: 717db96d56Sopenharmony_ci del self.files[i] 727db96d56Sopenharmony_ci 737db96d56Sopenharmony_ci 747db96d56Sopenharmony_ci # -- "File template" methods --------------------------------------- 757db96d56Sopenharmony_ci 767db96d56Sopenharmony_ci def _parse_template_line(self, line): 777db96d56Sopenharmony_ci words = line.split() 787db96d56Sopenharmony_ci action = words[0] 797db96d56Sopenharmony_ci 807db96d56Sopenharmony_ci patterns = dir = dir_pattern = None 817db96d56Sopenharmony_ci 827db96d56Sopenharmony_ci if action in ('include', 'exclude', 837db96d56Sopenharmony_ci 'global-include', 'global-exclude'): 847db96d56Sopenharmony_ci if len(words) < 2: 857db96d56Sopenharmony_ci raise DistutilsTemplateError( 867db96d56Sopenharmony_ci "'%s' expects <pattern1> <pattern2> ..." % action) 877db96d56Sopenharmony_ci patterns = [convert_path(w) for w in words[1:]] 887db96d56Sopenharmony_ci elif action in ('recursive-include', 'recursive-exclude'): 897db96d56Sopenharmony_ci if len(words) < 3: 907db96d56Sopenharmony_ci raise DistutilsTemplateError( 917db96d56Sopenharmony_ci "'%s' expects <dir> <pattern1> <pattern2> ..." % action) 927db96d56Sopenharmony_ci dir = convert_path(words[1]) 937db96d56Sopenharmony_ci patterns = [convert_path(w) for w in words[2:]] 947db96d56Sopenharmony_ci elif action in ('graft', 'prune'): 957db96d56Sopenharmony_ci if len(words) != 2: 967db96d56Sopenharmony_ci raise DistutilsTemplateError( 977db96d56Sopenharmony_ci "'%s' expects a single <dir_pattern>" % action) 987db96d56Sopenharmony_ci dir_pattern = convert_path(words[1]) 997db96d56Sopenharmony_ci else: 1007db96d56Sopenharmony_ci raise DistutilsTemplateError("unknown action '%s'" % action) 1017db96d56Sopenharmony_ci 1027db96d56Sopenharmony_ci return (action, patterns, dir, dir_pattern) 1037db96d56Sopenharmony_ci 1047db96d56Sopenharmony_ci def process_template_line(self, line): 1057db96d56Sopenharmony_ci # Parse the line: split it up, make sure the right number of words 1067db96d56Sopenharmony_ci # is there, and return the relevant words. 'action' is always 1077db96d56Sopenharmony_ci # defined: it's the first word of the line. Which of the other 1087db96d56Sopenharmony_ci # three are defined depends on the action; it'll be either 1097db96d56Sopenharmony_ci # patterns, (dir and patterns), or (dir_pattern). 1107db96d56Sopenharmony_ci (action, patterns, dir, dir_pattern) = self._parse_template_line(line) 1117db96d56Sopenharmony_ci 1127db96d56Sopenharmony_ci # OK, now we know that the action is valid and we have the 1137db96d56Sopenharmony_ci # right number of words on the line for that action -- so we 1147db96d56Sopenharmony_ci # can proceed with minimal error-checking. 1157db96d56Sopenharmony_ci if action == 'include': 1167db96d56Sopenharmony_ci self.debug_print("include " + ' '.join(patterns)) 1177db96d56Sopenharmony_ci for pattern in patterns: 1187db96d56Sopenharmony_ci if not self.include_pattern(pattern, anchor=1): 1197db96d56Sopenharmony_ci log.warn("warning: no files found matching '%s'", 1207db96d56Sopenharmony_ci pattern) 1217db96d56Sopenharmony_ci 1227db96d56Sopenharmony_ci elif action == 'exclude': 1237db96d56Sopenharmony_ci self.debug_print("exclude " + ' '.join(patterns)) 1247db96d56Sopenharmony_ci for pattern in patterns: 1257db96d56Sopenharmony_ci if not self.exclude_pattern(pattern, anchor=1): 1267db96d56Sopenharmony_ci log.warn(("warning: no previously-included files " 1277db96d56Sopenharmony_ci "found matching '%s'"), pattern) 1287db96d56Sopenharmony_ci 1297db96d56Sopenharmony_ci elif action == 'global-include': 1307db96d56Sopenharmony_ci self.debug_print("global-include " + ' '.join(patterns)) 1317db96d56Sopenharmony_ci for pattern in patterns: 1327db96d56Sopenharmony_ci if not self.include_pattern(pattern, anchor=0): 1337db96d56Sopenharmony_ci log.warn(("warning: no files found matching '%s' " 1347db96d56Sopenharmony_ci "anywhere in distribution"), pattern) 1357db96d56Sopenharmony_ci 1367db96d56Sopenharmony_ci elif action == 'global-exclude': 1377db96d56Sopenharmony_ci self.debug_print("global-exclude " + ' '.join(patterns)) 1387db96d56Sopenharmony_ci for pattern in patterns: 1397db96d56Sopenharmony_ci if not self.exclude_pattern(pattern, anchor=0): 1407db96d56Sopenharmony_ci log.warn(("warning: no previously-included files matching " 1417db96d56Sopenharmony_ci "'%s' found anywhere in distribution"), 1427db96d56Sopenharmony_ci pattern) 1437db96d56Sopenharmony_ci 1447db96d56Sopenharmony_ci elif action == 'recursive-include': 1457db96d56Sopenharmony_ci self.debug_print("recursive-include %s %s" % 1467db96d56Sopenharmony_ci (dir, ' '.join(patterns))) 1477db96d56Sopenharmony_ci for pattern in patterns: 1487db96d56Sopenharmony_ci if not self.include_pattern(pattern, prefix=dir): 1497db96d56Sopenharmony_ci log.warn(("warning: no files found matching '%s' " 1507db96d56Sopenharmony_ci "under directory '%s'"), 1517db96d56Sopenharmony_ci pattern, dir) 1527db96d56Sopenharmony_ci 1537db96d56Sopenharmony_ci elif action == 'recursive-exclude': 1547db96d56Sopenharmony_ci self.debug_print("recursive-exclude %s %s" % 1557db96d56Sopenharmony_ci (dir, ' '.join(patterns))) 1567db96d56Sopenharmony_ci for pattern in patterns: 1577db96d56Sopenharmony_ci if not self.exclude_pattern(pattern, prefix=dir): 1587db96d56Sopenharmony_ci log.warn(("warning: no previously-included files matching " 1597db96d56Sopenharmony_ci "'%s' found under directory '%s'"), 1607db96d56Sopenharmony_ci pattern, dir) 1617db96d56Sopenharmony_ci 1627db96d56Sopenharmony_ci elif action == 'graft': 1637db96d56Sopenharmony_ci self.debug_print("graft " + dir_pattern) 1647db96d56Sopenharmony_ci if not self.include_pattern(None, prefix=dir_pattern): 1657db96d56Sopenharmony_ci log.warn("warning: no directories found matching '%s'", 1667db96d56Sopenharmony_ci dir_pattern) 1677db96d56Sopenharmony_ci 1687db96d56Sopenharmony_ci elif action == 'prune': 1697db96d56Sopenharmony_ci self.debug_print("prune " + dir_pattern) 1707db96d56Sopenharmony_ci if not self.exclude_pattern(None, prefix=dir_pattern): 1717db96d56Sopenharmony_ci log.warn(("no previously-included directories found " 1727db96d56Sopenharmony_ci "matching '%s'"), dir_pattern) 1737db96d56Sopenharmony_ci else: 1747db96d56Sopenharmony_ci raise DistutilsInternalError( 1757db96d56Sopenharmony_ci "this cannot happen: invalid action '%s'" % action) 1767db96d56Sopenharmony_ci 1777db96d56Sopenharmony_ci 1787db96d56Sopenharmony_ci # -- Filtering/selection methods ----------------------------------- 1797db96d56Sopenharmony_ci 1807db96d56Sopenharmony_ci def include_pattern(self, pattern, anchor=1, prefix=None, is_regex=0): 1817db96d56Sopenharmony_ci """Select strings (presumably filenames) from 'self.files' that 1827db96d56Sopenharmony_ci match 'pattern', a Unix-style wildcard (glob) pattern. Patterns 1837db96d56Sopenharmony_ci are not quite the same as implemented by the 'fnmatch' module: '*' 1847db96d56Sopenharmony_ci and '?' match non-special characters, where "special" is platform- 1857db96d56Sopenharmony_ci dependent: slash on Unix; colon, slash, and backslash on 1867db96d56Sopenharmony_ci DOS/Windows; and colon on Mac OS. 1877db96d56Sopenharmony_ci 1887db96d56Sopenharmony_ci If 'anchor' is true (the default), then the pattern match is more 1897db96d56Sopenharmony_ci stringent: "*.py" will match "foo.py" but not "foo/bar.py". If 1907db96d56Sopenharmony_ci 'anchor' is false, both of these will match. 1917db96d56Sopenharmony_ci 1927db96d56Sopenharmony_ci If 'prefix' is supplied, then only filenames starting with 'prefix' 1937db96d56Sopenharmony_ci (itself a pattern) and ending with 'pattern', with anything in between 1947db96d56Sopenharmony_ci them, will match. 'anchor' is ignored in this case. 1957db96d56Sopenharmony_ci 1967db96d56Sopenharmony_ci If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and 1977db96d56Sopenharmony_ci 'pattern' is assumed to be either a string containing a regex or a 1987db96d56Sopenharmony_ci regex object -- no translation is done, the regex is just compiled 1997db96d56Sopenharmony_ci and used as-is. 2007db96d56Sopenharmony_ci 2017db96d56Sopenharmony_ci Selected strings will be added to self.files. 2027db96d56Sopenharmony_ci 2037db96d56Sopenharmony_ci Return True if files are found, False otherwise. 2047db96d56Sopenharmony_ci """ 2057db96d56Sopenharmony_ci # XXX docstring lying about what the special chars are? 2067db96d56Sopenharmony_ci files_found = False 2077db96d56Sopenharmony_ci pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) 2087db96d56Sopenharmony_ci self.debug_print("include_pattern: applying regex r'%s'" % 2097db96d56Sopenharmony_ci pattern_re.pattern) 2107db96d56Sopenharmony_ci 2117db96d56Sopenharmony_ci # delayed loading of allfiles list 2127db96d56Sopenharmony_ci if self.allfiles is None: 2137db96d56Sopenharmony_ci self.findall() 2147db96d56Sopenharmony_ci 2157db96d56Sopenharmony_ci for name in self.allfiles: 2167db96d56Sopenharmony_ci if pattern_re.search(name): 2177db96d56Sopenharmony_ci self.debug_print(" adding " + name) 2187db96d56Sopenharmony_ci self.files.append(name) 2197db96d56Sopenharmony_ci files_found = True 2207db96d56Sopenharmony_ci return files_found 2217db96d56Sopenharmony_ci 2227db96d56Sopenharmony_ci 2237db96d56Sopenharmony_ci def exclude_pattern (self, pattern, 2247db96d56Sopenharmony_ci anchor=1, prefix=None, is_regex=0): 2257db96d56Sopenharmony_ci """Remove strings (presumably filenames) from 'files' that match 2267db96d56Sopenharmony_ci 'pattern'. Other parameters are the same as for 2277db96d56Sopenharmony_ci 'include_pattern()', above. 2287db96d56Sopenharmony_ci The list 'self.files' is modified in place. 2297db96d56Sopenharmony_ci Return True if files are found, False otherwise. 2307db96d56Sopenharmony_ci """ 2317db96d56Sopenharmony_ci files_found = False 2327db96d56Sopenharmony_ci pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) 2337db96d56Sopenharmony_ci self.debug_print("exclude_pattern: applying regex r'%s'" % 2347db96d56Sopenharmony_ci pattern_re.pattern) 2357db96d56Sopenharmony_ci for i in range(len(self.files)-1, -1, -1): 2367db96d56Sopenharmony_ci if pattern_re.search(self.files[i]): 2377db96d56Sopenharmony_ci self.debug_print(" removing " + self.files[i]) 2387db96d56Sopenharmony_ci del self.files[i] 2397db96d56Sopenharmony_ci files_found = True 2407db96d56Sopenharmony_ci return files_found 2417db96d56Sopenharmony_ci 2427db96d56Sopenharmony_ci 2437db96d56Sopenharmony_ci# ---------------------------------------------------------------------- 2447db96d56Sopenharmony_ci# Utility functions 2457db96d56Sopenharmony_ci 2467db96d56Sopenharmony_cidef _find_all_simple(path): 2477db96d56Sopenharmony_ci """ 2487db96d56Sopenharmony_ci Find all files under 'path' 2497db96d56Sopenharmony_ci """ 2507db96d56Sopenharmony_ci results = ( 2517db96d56Sopenharmony_ci os.path.join(base, file) 2527db96d56Sopenharmony_ci for base, dirs, files in os.walk(path, followlinks=True) 2537db96d56Sopenharmony_ci for file in files 2547db96d56Sopenharmony_ci ) 2557db96d56Sopenharmony_ci return filter(os.path.isfile, results) 2567db96d56Sopenharmony_ci 2577db96d56Sopenharmony_ci 2587db96d56Sopenharmony_cidef findall(dir=os.curdir): 2597db96d56Sopenharmony_ci """ 2607db96d56Sopenharmony_ci Find all files under 'dir' and return the list of full filenames. 2617db96d56Sopenharmony_ci Unless dir is '.', return full filenames with dir prepended. 2627db96d56Sopenharmony_ci """ 2637db96d56Sopenharmony_ci files = _find_all_simple(dir) 2647db96d56Sopenharmony_ci if dir == os.curdir: 2657db96d56Sopenharmony_ci make_rel = functools.partial(os.path.relpath, start=dir) 2667db96d56Sopenharmony_ci files = map(make_rel, files) 2677db96d56Sopenharmony_ci return list(files) 2687db96d56Sopenharmony_ci 2697db96d56Sopenharmony_ci 2707db96d56Sopenharmony_cidef glob_to_re(pattern): 2717db96d56Sopenharmony_ci """Translate a shell-like glob pattern to a regular expression; return 2727db96d56Sopenharmony_ci a string containing the regex. Differs from 'fnmatch.translate()' in 2737db96d56Sopenharmony_ci that '*' does not match "special characters" (which are 2747db96d56Sopenharmony_ci platform-specific). 2757db96d56Sopenharmony_ci """ 2767db96d56Sopenharmony_ci pattern_re = fnmatch.translate(pattern) 2777db96d56Sopenharmony_ci 2787db96d56Sopenharmony_ci # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which 2797db96d56Sopenharmony_ci # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix, 2807db96d56Sopenharmony_ci # and by extension they shouldn't match such "special characters" under 2817db96d56Sopenharmony_ci # any OS. So change all non-escaped dots in the RE to match any 2827db96d56Sopenharmony_ci # character except the special characters (currently: just os.sep). 2837db96d56Sopenharmony_ci sep = os.sep 2847db96d56Sopenharmony_ci if os.sep == '\\': 2857db96d56Sopenharmony_ci # we're using a regex to manipulate a regex, so we need 2867db96d56Sopenharmony_ci # to escape the backslash twice 2877db96d56Sopenharmony_ci sep = r'\\\\' 2887db96d56Sopenharmony_ci escaped = r'\1[^%s]' % sep 2897db96d56Sopenharmony_ci pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re) 2907db96d56Sopenharmony_ci return pattern_re 2917db96d56Sopenharmony_ci 2927db96d56Sopenharmony_ci 2937db96d56Sopenharmony_cidef translate_pattern(pattern, anchor=1, prefix=None, is_regex=0): 2947db96d56Sopenharmony_ci """Translate a shell-like wildcard pattern to a compiled regular 2957db96d56Sopenharmony_ci expression. Return the compiled regex. If 'is_regex' true, 2967db96d56Sopenharmony_ci then 'pattern' is directly compiled to a regex (if it's a string) 2977db96d56Sopenharmony_ci or just returned as-is (assumes it's a regex object). 2987db96d56Sopenharmony_ci """ 2997db96d56Sopenharmony_ci if is_regex: 3007db96d56Sopenharmony_ci if isinstance(pattern, str): 3017db96d56Sopenharmony_ci return re.compile(pattern) 3027db96d56Sopenharmony_ci else: 3037db96d56Sopenharmony_ci return pattern 3047db96d56Sopenharmony_ci 3057db96d56Sopenharmony_ci # ditch start and end characters 3067db96d56Sopenharmony_ci start, _, end = glob_to_re('_').partition('_') 3077db96d56Sopenharmony_ci 3087db96d56Sopenharmony_ci if pattern: 3097db96d56Sopenharmony_ci pattern_re = glob_to_re(pattern) 3107db96d56Sopenharmony_ci assert pattern_re.startswith(start) and pattern_re.endswith(end) 3117db96d56Sopenharmony_ci else: 3127db96d56Sopenharmony_ci pattern_re = '' 3137db96d56Sopenharmony_ci 3147db96d56Sopenharmony_ci if prefix is not None: 3157db96d56Sopenharmony_ci prefix_re = glob_to_re(prefix) 3167db96d56Sopenharmony_ci assert prefix_re.startswith(start) and prefix_re.endswith(end) 3177db96d56Sopenharmony_ci prefix_re = prefix_re[len(start): len(prefix_re) - len(end)] 3187db96d56Sopenharmony_ci sep = os.sep 3197db96d56Sopenharmony_ci if os.sep == '\\': 3207db96d56Sopenharmony_ci sep = r'\\' 3217db96d56Sopenharmony_ci pattern_re = pattern_re[len(start): len(pattern_re) - len(end)] 3227db96d56Sopenharmony_ci pattern_re = r'%s\A%s%s.*%s%s' % (start, prefix_re, sep, pattern_re, end) 3237db96d56Sopenharmony_ci else: # no prefix -- respect anchor flag 3247db96d56Sopenharmony_ci if anchor: 3257db96d56Sopenharmony_ci pattern_re = r'%s\A%s' % (start, pattern_re[len(start):]) 3267db96d56Sopenharmony_ci 3277db96d56Sopenharmony_ci return re.compile(pattern_re) 328