17db96d56Sopenharmony_ci"""distutils.filelist
27db96d56Sopenharmony_ci
37db96d56Sopenharmony_ciProvides the FileList class, used for poking about the filesystem
47db96d56Sopenharmony_ciand building lists of files.
57db96d56Sopenharmony_ci"""
67db96d56Sopenharmony_ci
77db96d56Sopenharmony_ciimport os, re
87db96d56Sopenharmony_ciimport fnmatch
97db96d56Sopenharmony_ciimport functools
107db96d56Sopenharmony_cifrom distutils.util import convert_path
117db96d56Sopenharmony_cifrom distutils.errors import DistutilsTemplateError, DistutilsInternalError
127db96d56Sopenharmony_cifrom distutils import log
137db96d56Sopenharmony_ci
147db96d56Sopenharmony_ciclass FileList:
157db96d56Sopenharmony_ci    """A list of files built by on exploring the filesystem and filtered by
167db96d56Sopenharmony_ci    applying various patterns to what we find there.
177db96d56Sopenharmony_ci
187db96d56Sopenharmony_ci    Instance attributes:
197db96d56Sopenharmony_ci      dir
207db96d56Sopenharmony_ci        directory from which files will be taken -- only used if
217db96d56Sopenharmony_ci        'allfiles' not supplied to constructor
227db96d56Sopenharmony_ci      files
237db96d56Sopenharmony_ci        list of filenames currently being built/filtered/manipulated
247db96d56Sopenharmony_ci      allfiles
257db96d56Sopenharmony_ci        complete list of files under consideration (ie. without any
267db96d56Sopenharmony_ci        filtering applied)
277db96d56Sopenharmony_ci    """
287db96d56Sopenharmony_ci
297db96d56Sopenharmony_ci    def __init__(self, warn=None, debug_print=None):
307db96d56Sopenharmony_ci        # ignore argument to FileList, but keep them for backwards
317db96d56Sopenharmony_ci        # compatibility
327db96d56Sopenharmony_ci        self.allfiles = None
337db96d56Sopenharmony_ci        self.files = []
347db96d56Sopenharmony_ci
357db96d56Sopenharmony_ci    def set_allfiles(self, allfiles):
367db96d56Sopenharmony_ci        self.allfiles = allfiles
377db96d56Sopenharmony_ci
387db96d56Sopenharmony_ci    def findall(self, dir=os.curdir):
397db96d56Sopenharmony_ci        self.allfiles = findall(dir)
407db96d56Sopenharmony_ci
417db96d56Sopenharmony_ci    def debug_print(self, msg):
427db96d56Sopenharmony_ci        """Print 'msg' to stdout if the global DEBUG (taken from the
437db96d56Sopenharmony_ci        DISTUTILS_DEBUG environment variable) flag is true.
447db96d56Sopenharmony_ci        """
457db96d56Sopenharmony_ci        from distutils.debug import DEBUG
467db96d56Sopenharmony_ci        if DEBUG:
477db96d56Sopenharmony_ci            print(msg)
487db96d56Sopenharmony_ci
497db96d56Sopenharmony_ci    # -- List-like methods ---------------------------------------------
507db96d56Sopenharmony_ci
517db96d56Sopenharmony_ci    def append(self, item):
527db96d56Sopenharmony_ci        self.files.append(item)
537db96d56Sopenharmony_ci
547db96d56Sopenharmony_ci    def extend(self, items):
557db96d56Sopenharmony_ci        self.files.extend(items)
567db96d56Sopenharmony_ci
577db96d56Sopenharmony_ci    def sort(self):
587db96d56Sopenharmony_ci        # Not a strict lexical sort!
597db96d56Sopenharmony_ci        sortable_files = sorted(map(os.path.split, self.files))
607db96d56Sopenharmony_ci        self.files = []
617db96d56Sopenharmony_ci        for sort_tuple in sortable_files:
627db96d56Sopenharmony_ci            self.files.append(os.path.join(*sort_tuple))
637db96d56Sopenharmony_ci
647db96d56Sopenharmony_ci
657db96d56Sopenharmony_ci    # -- Other miscellaneous utility methods ---------------------------
667db96d56Sopenharmony_ci
677db96d56Sopenharmony_ci    def remove_duplicates(self):
687db96d56Sopenharmony_ci        # Assumes list has been sorted!
697db96d56Sopenharmony_ci        for i in range(len(self.files) - 1, 0, -1):
707db96d56Sopenharmony_ci            if self.files[i] == self.files[i - 1]:
717db96d56Sopenharmony_ci                del self.files[i]
727db96d56Sopenharmony_ci
737db96d56Sopenharmony_ci
747db96d56Sopenharmony_ci    # -- "File template" methods ---------------------------------------
757db96d56Sopenharmony_ci
767db96d56Sopenharmony_ci    def _parse_template_line(self, line):
777db96d56Sopenharmony_ci        words = line.split()
787db96d56Sopenharmony_ci        action = words[0]
797db96d56Sopenharmony_ci
807db96d56Sopenharmony_ci        patterns = dir = dir_pattern = None
817db96d56Sopenharmony_ci
827db96d56Sopenharmony_ci        if action in ('include', 'exclude',
837db96d56Sopenharmony_ci                      'global-include', 'global-exclude'):
847db96d56Sopenharmony_ci            if len(words) < 2:
857db96d56Sopenharmony_ci                raise DistutilsTemplateError(
867db96d56Sopenharmony_ci                      "'%s' expects <pattern1> <pattern2> ..." % action)
877db96d56Sopenharmony_ci            patterns = [convert_path(w) for w in words[1:]]
887db96d56Sopenharmony_ci        elif action in ('recursive-include', 'recursive-exclude'):
897db96d56Sopenharmony_ci            if len(words) < 3:
907db96d56Sopenharmony_ci                raise DistutilsTemplateError(
917db96d56Sopenharmony_ci                      "'%s' expects <dir> <pattern1> <pattern2> ..." % action)
927db96d56Sopenharmony_ci            dir = convert_path(words[1])
937db96d56Sopenharmony_ci            patterns = [convert_path(w) for w in words[2:]]
947db96d56Sopenharmony_ci        elif action in ('graft', 'prune'):
957db96d56Sopenharmony_ci            if len(words) != 2:
967db96d56Sopenharmony_ci                raise DistutilsTemplateError(
977db96d56Sopenharmony_ci                      "'%s' expects a single <dir_pattern>" % action)
987db96d56Sopenharmony_ci            dir_pattern = convert_path(words[1])
997db96d56Sopenharmony_ci        else:
1007db96d56Sopenharmony_ci            raise DistutilsTemplateError("unknown action '%s'" % action)
1017db96d56Sopenharmony_ci
1027db96d56Sopenharmony_ci        return (action, patterns, dir, dir_pattern)
1037db96d56Sopenharmony_ci
1047db96d56Sopenharmony_ci    def process_template_line(self, line):
1057db96d56Sopenharmony_ci        # Parse the line: split it up, make sure the right number of words
1067db96d56Sopenharmony_ci        # is there, and return the relevant words.  'action' is always
1077db96d56Sopenharmony_ci        # defined: it's the first word of the line.  Which of the other
1087db96d56Sopenharmony_ci        # three are defined depends on the action; it'll be either
1097db96d56Sopenharmony_ci        # patterns, (dir and patterns), or (dir_pattern).
1107db96d56Sopenharmony_ci        (action, patterns, dir, dir_pattern) = self._parse_template_line(line)
1117db96d56Sopenharmony_ci
1127db96d56Sopenharmony_ci        # OK, now we know that the action is valid and we have the
1137db96d56Sopenharmony_ci        # right number of words on the line for that action -- so we
1147db96d56Sopenharmony_ci        # can proceed with minimal error-checking.
1157db96d56Sopenharmony_ci        if action == 'include':
1167db96d56Sopenharmony_ci            self.debug_print("include " + ' '.join(patterns))
1177db96d56Sopenharmony_ci            for pattern in patterns:
1187db96d56Sopenharmony_ci                if not self.include_pattern(pattern, anchor=1):
1197db96d56Sopenharmony_ci                    log.warn("warning: no files found matching '%s'",
1207db96d56Sopenharmony_ci                             pattern)
1217db96d56Sopenharmony_ci
1227db96d56Sopenharmony_ci        elif action == 'exclude':
1237db96d56Sopenharmony_ci            self.debug_print("exclude " + ' '.join(patterns))
1247db96d56Sopenharmony_ci            for pattern in patterns:
1257db96d56Sopenharmony_ci                if not self.exclude_pattern(pattern, anchor=1):
1267db96d56Sopenharmony_ci                    log.warn(("warning: no previously-included files "
1277db96d56Sopenharmony_ci                              "found matching '%s'"), pattern)
1287db96d56Sopenharmony_ci
1297db96d56Sopenharmony_ci        elif action == 'global-include':
1307db96d56Sopenharmony_ci            self.debug_print("global-include " + ' '.join(patterns))
1317db96d56Sopenharmony_ci            for pattern in patterns:
1327db96d56Sopenharmony_ci                if not self.include_pattern(pattern, anchor=0):
1337db96d56Sopenharmony_ci                    log.warn(("warning: no files found matching '%s' "
1347db96d56Sopenharmony_ci                              "anywhere in distribution"), pattern)
1357db96d56Sopenharmony_ci
1367db96d56Sopenharmony_ci        elif action == 'global-exclude':
1377db96d56Sopenharmony_ci            self.debug_print("global-exclude " + ' '.join(patterns))
1387db96d56Sopenharmony_ci            for pattern in patterns:
1397db96d56Sopenharmony_ci                if not self.exclude_pattern(pattern, anchor=0):
1407db96d56Sopenharmony_ci                    log.warn(("warning: no previously-included files matching "
1417db96d56Sopenharmony_ci                              "'%s' found anywhere in distribution"),
1427db96d56Sopenharmony_ci                             pattern)
1437db96d56Sopenharmony_ci
1447db96d56Sopenharmony_ci        elif action == 'recursive-include':
1457db96d56Sopenharmony_ci            self.debug_print("recursive-include %s %s" %
1467db96d56Sopenharmony_ci                             (dir, ' '.join(patterns)))
1477db96d56Sopenharmony_ci            for pattern in patterns:
1487db96d56Sopenharmony_ci                if not self.include_pattern(pattern, prefix=dir):
1497db96d56Sopenharmony_ci                    log.warn(("warning: no files found matching '%s' "
1507db96d56Sopenharmony_ci                                "under directory '%s'"),
1517db96d56Sopenharmony_ci                             pattern, dir)
1527db96d56Sopenharmony_ci
1537db96d56Sopenharmony_ci        elif action == 'recursive-exclude':
1547db96d56Sopenharmony_ci            self.debug_print("recursive-exclude %s %s" %
1557db96d56Sopenharmony_ci                             (dir, ' '.join(patterns)))
1567db96d56Sopenharmony_ci            for pattern in patterns:
1577db96d56Sopenharmony_ci                if not self.exclude_pattern(pattern, prefix=dir):
1587db96d56Sopenharmony_ci                    log.warn(("warning: no previously-included files matching "
1597db96d56Sopenharmony_ci                              "'%s' found under directory '%s'"),
1607db96d56Sopenharmony_ci                             pattern, dir)
1617db96d56Sopenharmony_ci
1627db96d56Sopenharmony_ci        elif action == 'graft':
1637db96d56Sopenharmony_ci            self.debug_print("graft " + dir_pattern)
1647db96d56Sopenharmony_ci            if not self.include_pattern(None, prefix=dir_pattern):
1657db96d56Sopenharmony_ci                log.warn("warning: no directories found matching '%s'",
1667db96d56Sopenharmony_ci                         dir_pattern)
1677db96d56Sopenharmony_ci
1687db96d56Sopenharmony_ci        elif action == 'prune':
1697db96d56Sopenharmony_ci            self.debug_print("prune " + dir_pattern)
1707db96d56Sopenharmony_ci            if not self.exclude_pattern(None, prefix=dir_pattern):
1717db96d56Sopenharmony_ci                log.warn(("no previously-included directories found "
1727db96d56Sopenharmony_ci                          "matching '%s'"), dir_pattern)
1737db96d56Sopenharmony_ci        else:
1747db96d56Sopenharmony_ci            raise DistutilsInternalError(
1757db96d56Sopenharmony_ci                  "this cannot happen: invalid action '%s'" % action)
1767db96d56Sopenharmony_ci
1777db96d56Sopenharmony_ci
1787db96d56Sopenharmony_ci    # -- Filtering/selection methods -----------------------------------
1797db96d56Sopenharmony_ci
1807db96d56Sopenharmony_ci    def include_pattern(self, pattern, anchor=1, prefix=None, is_regex=0):
1817db96d56Sopenharmony_ci        """Select strings (presumably filenames) from 'self.files' that
1827db96d56Sopenharmony_ci        match 'pattern', a Unix-style wildcard (glob) pattern.  Patterns
1837db96d56Sopenharmony_ci        are not quite the same as implemented by the 'fnmatch' module: '*'
1847db96d56Sopenharmony_ci        and '?'  match non-special characters, where "special" is platform-
1857db96d56Sopenharmony_ci        dependent: slash on Unix; colon, slash, and backslash on
1867db96d56Sopenharmony_ci        DOS/Windows; and colon on Mac OS.
1877db96d56Sopenharmony_ci
1887db96d56Sopenharmony_ci        If 'anchor' is true (the default), then the pattern match is more
1897db96d56Sopenharmony_ci        stringent: "*.py" will match "foo.py" but not "foo/bar.py".  If
1907db96d56Sopenharmony_ci        'anchor' is false, both of these will match.
1917db96d56Sopenharmony_ci
1927db96d56Sopenharmony_ci        If 'prefix' is supplied, then only filenames starting with 'prefix'
1937db96d56Sopenharmony_ci        (itself a pattern) and ending with 'pattern', with anything in between
1947db96d56Sopenharmony_ci        them, will match.  'anchor' is ignored in this case.
1957db96d56Sopenharmony_ci
1967db96d56Sopenharmony_ci        If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
1977db96d56Sopenharmony_ci        'pattern' is assumed to be either a string containing a regex or a
1987db96d56Sopenharmony_ci        regex object -- no translation is done, the regex is just compiled
1997db96d56Sopenharmony_ci        and used as-is.
2007db96d56Sopenharmony_ci
2017db96d56Sopenharmony_ci        Selected strings will be added to self.files.
2027db96d56Sopenharmony_ci
2037db96d56Sopenharmony_ci        Return True if files are found, False otherwise.
2047db96d56Sopenharmony_ci        """
2057db96d56Sopenharmony_ci        # XXX docstring lying about what the special chars are?
2067db96d56Sopenharmony_ci        files_found = False
2077db96d56Sopenharmony_ci        pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
2087db96d56Sopenharmony_ci        self.debug_print("include_pattern: applying regex r'%s'" %
2097db96d56Sopenharmony_ci                         pattern_re.pattern)
2107db96d56Sopenharmony_ci
2117db96d56Sopenharmony_ci        # delayed loading of allfiles list
2127db96d56Sopenharmony_ci        if self.allfiles is None:
2137db96d56Sopenharmony_ci            self.findall()
2147db96d56Sopenharmony_ci
2157db96d56Sopenharmony_ci        for name in self.allfiles:
2167db96d56Sopenharmony_ci            if pattern_re.search(name):
2177db96d56Sopenharmony_ci                self.debug_print(" adding " + name)
2187db96d56Sopenharmony_ci                self.files.append(name)
2197db96d56Sopenharmony_ci                files_found = True
2207db96d56Sopenharmony_ci        return files_found
2217db96d56Sopenharmony_ci
2227db96d56Sopenharmony_ci
2237db96d56Sopenharmony_ci    def exclude_pattern (self, pattern,
2247db96d56Sopenharmony_ci                         anchor=1, prefix=None, is_regex=0):
2257db96d56Sopenharmony_ci        """Remove strings (presumably filenames) from 'files' that match
2267db96d56Sopenharmony_ci        'pattern'.  Other parameters are the same as for
2277db96d56Sopenharmony_ci        'include_pattern()', above.
2287db96d56Sopenharmony_ci        The list 'self.files' is modified in place.
2297db96d56Sopenharmony_ci        Return True if files are found, False otherwise.
2307db96d56Sopenharmony_ci        """
2317db96d56Sopenharmony_ci        files_found = False
2327db96d56Sopenharmony_ci        pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
2337db96d56Sopenharmony_ci        self.debug_print("exclude_pattern: applying regex r'%s'" %
2347db96d56Sopenharmony_ci                         pattern_re.pattern)
2357db96d56Sopenharmony_ci        for i in range(len(self.files)-1, -1, -1):
2367db96d56Sopenharmony_ci            if pattern_re.search(self.files[i]):
2377db96d56Sopenharmony_ci                self.debug_print(" removing " + self.files[i])
2387db96d56Sopenharmony_ci                del self.files[i]
2397db96d56Sopenharmony_ci                files_found = True
2407db96d56Sopenharmony_ci        return files_found
2417db96d56Sopenharmony_ci
2427db96d56Sopenharmony_ci
2437db96d56Sopenharmony_ci# ----------------------------------------------------------------------
2447db96d56Sopenharmony_ci# Utility functions
2457db96d56Sopenharmony_ci
2467db96d56Sopenharmony_cidef _find_all_simple(path):
2477db96d56Sopenharmony_ci    """
2487db96d56Sopenharmony_ci    Find all files under 'path'
2497db96d56Sopenharmony_ci    """
2507db96d56Sopenharmony_ci    results = (
2517db96d56Sopenharmony_ci        os.path.join(base, file)
2527db96d56Sopenharmony_ci        for base, dirs, files in os.walk(path, followlinks=True)
2537db96d56Sopenharmony_ci        for file in files
2547db96d56Sopenharmony_ci    )
2557db96d56Sopenharmony_ci    return filter(os.path.isfile, results)
2567db96d56Sopenharmony_ci
2577db96d56Sopenharmony_ci
2587db96d56Sopenharmony_cidef findall(dir=os.curdir):
2597db96d56Sopenharmony_ci    """
2607db96d56Sopenharmony_ci    Find all files under 'dir' and return the list of full filenames.
2617db96d56Sopenharmony_ci    Unless dir is '.', return full filenames with dir prepended.
2627db96d56Sopenharmony_ci    """
2637db96d56Sopenharmony_ci    files = _find_all_simple(dir)
2647db96d56Sopenharmony_ci    if dir == os.curdir:
2657db96d56Sopenharmony_ci        make_rel = functools.partial(os.path.relpath, start=dir)
2667db96d56Sopenharmony_ci        files = map(make_rel, files)
2677db96d56Sopenharmony_ci    return list(files)
2687db96d56Sopenharmony_ci
2697db96d56Sopenharmony_ci
2707db96d56Sopenharmony_cidef glob_to_re(pattern):
2717db96d56Sopenharmony_ci    """Translate a shell-like glob pattern to a regular expression; return
2727db96d56Sopenharmony_ci    a string containing the regex.  Differs from 'fnmatch.translate()' in
2737db96d56Sopenharmony_ci    that '*' does not match "special characters" (which are
2747db96d56Sopenharmony_ci    platform-specific).
2757db96d56Sopenharmony_ci    """
2767db96d56Sopenharmony_ci    pattern_re = fnmatch.translate(pattern)
2777db96d56Sopenharmony_ci
2787db96d56Sopenharmony_ci    # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
2797db96d56Sopenharmony_ci    # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
2807db96d56Sopenharmony_ci    # and by extension they shouldn't match such "special characters" under
2817db96d56Sopenharmony_ci    # any OS.  So change all non-escaped dots in the RE to match any
2827db96d56Sopenharmony_ci    # character except the special characters (currently: just os.sep).
2837db96d56Sopenharmony_ci    sep = os.sep
2847db96d56Sopenharmony_ci    if os.sep == '\\':
2857db96d56Sopenharmony_ci        # we're using a regex to manipulate a regex, so we need
2867db96d56Sopenharmony_ci        # to escape the backslash twice
2877db96d56Sopenharmony_ci        sep = r'\\\\'
2887db96d56Sopenharmony_ci    escaped = r'\1[^%s]' % sep
2897db96d56Sopenharmony_ci    pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
2907db96d56Sopenharmony_ci    return pattern_re
2917db96d56Sopenharmony_ci
2927db96d56Sopenharmony_ci
2937db96d56Sopenharmony_cidef translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
2947db96d56Sopenharmony_ci    """Translate a shell-like wildcard pattern to a compiled regular
2957db96d56Sopenharmony_ci    expression.  Return the compiled regex.  If 'is_regex' true,
2967db96d56Sopenharmony_ci    then 'pattern' is directly compiled to a regex (if it's a string)
2977db96d56Sopenharmony_ci    or just returned as-is (assumes it's a regex object).
2987db96d56Sopenharmony_ci    """
2997db96d56Sopenharmony_ci    if is_regex:
3007db96d56Sopenharmony_ci        if isinstance(pattern, str):
3017db96d56Sopenharmony_ci            return re.compile(pattern)
3027db96d56Sopenharmony_ci        else:
3037db96d56Sopenharmony_ci            return pattern
3047db96d56Sopenharmony_ci
3057db96d56Sopenharmony_ci    # ditch start and end characters
3067db96d56Sopenharmony_ci    start, _, end = glob_to_re('_').partition('_')
3077db96d56Sopenharmony_ci
3087db96d56Sopenharmony_ci    if pattern:
3097db96d56Sopenharmony_ci        pattern_re = glob_to_re(pattern)
3107db96d56Sopenharmony_ci        assert pattern_re.startswith(start) and pattern_re.endswith(end)
3117db96d56Sopenharmony_ci    else:
3127db96d56Sopenharmony_ci        pattern_re = ''
3137db96d56Sopenharmony_ci
3147db96d56Sopenharmony_ci    if prefix is not None:
3157db96d56Sopenharmony_ci        prefix_re = glob_to_re(prefix)
3167db96d56Sopenharmony_ci        assert prefix_re.startswith(start) and prefix_re.endswith(end)
3177db96d56Sopenharmony_ci        prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
3187db96d56Sopenharmony_ci        sep = os.sep
3197db96d56Sopenharmony_ci        if os.sep == '\\':
3207db96d56Sopenharmony_ci            sep = r'\\'
3217db96d56Sopenharmony_ci        pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
3227db96d56Sopenharmony_ci        pattern_re = r'%s\A%s%s.*%s%s' % (start, prefix_re, sep, pattern_re, end)
3237db96d56Sopenharmony_ci    else:                               # no prefix -- respect anchor flag
3247db96d56Sopenharmony_ci        if anchor:
3257db96d56Sopenharmony_ci            pattern_re = r'%s\A%s' % (start, pattern_re[len(start):])
3267db96d56Sopenharmony_ci
3277db96d56Sopenharmony_ci    return re.compile(pattern_re)
328