17db96d56Sopenharmony_ci#!/usr/bin/env python3
27db96d56Sopenharmony_ci
37db96d56Sopenharmony_ci"""\
47db96d56Sopenharmony_ciList python source files.
57db96d56Sopenharmony_ci
67db96d56Sopenharmony_ciThere are three functions to check whether a file is a Python source, listed
77db96d56Sopenharmony_cihere with increasing complexity:
87db96d56Sopenharmony_ci
97db96d56Sopenharmony_ci- has_python_ext() checks whether a file name ends in '.py[w]'.
107db96d56Sopenharmony_ci- look_like_python() checks whether the file is not binary and either has
117db96d56Sopenharmony_ci  the '.py[w]' extension or the first line contains the word 'python'.
127db96d56Sopenharmony_ci- can_be_compiled() checks whether the file can be compiled by compile().
137db96d56Sopenharmony_ci
147db96d56Sopenharmony_ciThe file also must be of appropriate size - not bigger than a megabyte.
157db96d56Sopenharmony_ci
167db96d56Sopenharmony_ciwalk_python_files() recursively lists all Python files under the given directories.
177db96d56Sopenharmony_ci"""
187db96d56Sopenharmony_ci__author__ = "Oleg Broytmann, Georg Brandl"
197db96d56Sopenharmony_ci
207db96d56Sopenharmony_ci__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"]
217db96d56Sopenharmony_ci
227db96d56Sopenharmony_ci
237db96d56Sopenharmony_ciimport os, re
247db96d56Sopenharmony_ci
257db96d56Sopenharmony_cibinary_re = re.compile(br'[\x00-\x08\x0E-\x1F\x7F]')
267db96d56Sopenharmony_ci
277db96d56Sopenharmony_cidebug = False
287db96d56Sopenharmony_ci
297db96d56Sopenharmony_cidef print_debug(msg):
307db96d56Sopenharmony_ci    if debug: print(msg)
317db96d56Sopenharmony_ci
327db96d56Sopenharmony_ci
337db96d56Sopenharmony_cidef _open(fullpath):
347db96d56Sopenharmony_ci    try:
357db96d56Sopenharmony_ci        size = os.stat(fullpath).st_size
367db96d56Sopenharmony_ci    except OSError as err: # Permission denied - ignore the file
377db96d56Sopenharmony_ci        print_debug("%s: permission denied: %s" % (fullpath, err))
387db96d56Sopenharmony_ci        return None
397db96d56Sopenharmony_ci
407db96d56Sopenharmony_ci    if size > 1024*1024: # too big
417db96d56Sopenharmony_ci        print_debug("%s: the file is too big: %d bytes" % (fullpath, size))
427db96d56Sopenharmony_ci        return None
437db96d56Sopenharmony_ci
447db96d56Sopenharmony_ci    try:
457db96d56Sopenharmony_ci        return open(fullpath, "rb")
467db96d56Sopenharmony_ci    except IOError as err: # Access denied, or a special file - ignore it
477db96d56Sopenharmony_ci        print_debug("%s: access denied: %s" % (fullpath, err))
487db96d56Sopenharmony_ci        return None
497db96d56Sopenharmony_ci
507db96d56Sopenharmony_cidef has_python_ext(fullpath):
517db96d56Sopenharmony_ci    return fullpath.endswith(".py") or fullpath.endswith(".pyw")
527db96d56Sopenharmony_ci
537db96d56Sopenharmony_cidef looks_like_python(fullpath):
547db96d56Sopenharmony_ci    infile = _open(fullpath)
557db96d56Sopenharmony_ci    if infile is None:
567db96d56Sopenharmony_ci        return False
577db96d56Sopenharmony_ci
587db96d56Sopenharmony_ci    with infile:
597db96d56Sopenharmony_ci        line = infile.readline()
607db96d56Sopenharmony_ci
617db96d56Sopenharmony_ci    if binary_re.search(line):
627db96d56Sopenharmony_ci        # file appears to be binary
637db96d56Sopenharmony_ci        print_debug("%s: appears to be binary" % fullpath)
647db96d56Sopenharmony_ci        return False
657db96d56Sopenharmony_ci
667db96d56Sopenharmony_ci    if fullpath.endswith(".py") or fullpath.endswith(".pyw"):
677db96d56Sopenharmony_ci        return True
687db96d56Sopenharmony_ci    elif b"python" in line:
697db96d56Sopenharmony_ci        # disguised Python script (e.g. CGI)
707db96d56Sopenharmony_ci        return True
717db96d56Sopenharmony_ci
727db96d56Sopenharmony_ci    return False
737db96d56Sopenharmony_ci
747db96d56Sopenharmony_cidef can_be_compiled(fullpath):
757db96d56Sopenharmony_ci    infile = _open(fullpath)
767db96d56Sopenharmony_ci    if infile is None:
777db96d56Sopenharmony_ci        return False
787db96d56Sopenharmony_ci
797db96d56Sopenharmony_ci    with infile:
807db96d56Sopenharmony_ci        code = infile.read()
817db96d56Sopenharmony_ci
827db96d56Sopenharmony_ci    try:
837db96d56Sopenharmony_ci        compile(code, fullpath, "exec")
847db96d56Sopenharmony_ci    except Exception as err:
857db96d56Sopenharmony_ci        print_debug("%s: cannot compile: %s" % (fullpath, err))
867db96d56Sopenharmony_ci        return False
877db96d56Sopenharmony_ci
887db96d56Sopenharmony_ci    return True
897db96d56Sopenharmony_ci
907db96d56Sopenharmony_ci
917db96d56Sopenharmony_cidef walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None):
927db96d56Sopenharmony_ci    """\
937db96d56Sopenharmony_ci    Recursively yield all Python source files below the given paths.
947db96d56Sopenharmony_ci
957db96d56Sopenharmony_ci    paths: a list of files and/or directories to be checked.
967db96d56Sopenharmony_ci    is_python: a function that takes a file name and checks whether it is a
977db96d56Sopenharmony_ci               Python source file
987db96d56Sopenharmony_ci    exclude_dirs: a list of directory base names that should be excluded in
997db96d56Sopenharmony_ci                  the search
1007db96d56Sopenharmony_ci    """
1017db96d56Sopenharmony_ci    if exclude_dirs is None:
1027db96d56Sopenharmony_ci        exclude_dirs=[]
1037db96d56Sopenharmony_ci
1047db96d56Sopenharmony_ci    for path in paths:
1057db96d56Sopenharmony_ci        print_debug("testing: %s" % path)
1067db96d56Sopenharmony_ci        if os.path.isfile(path):
1077db96d56Sopenharmony_ci            if is_python(path):
1087db96d56Sopenharmony_ci                yield path
1097db96d56Sopenharmony_ci        elif os.path.isdir(path):
1107db96d56Sopenharmony_ci            print_debug("    it is a directory")
1117db96d56Sopenharmony_ci            for dirpath, dirnames, filenames in os.walk(path):
1127db96d56Sopenharmony_ci                for exclude in exclude_dirs:
1137db96d56Sopenharmony_ci                    if exclude in dirnames:
1147db96d56Sopenharmony_ci                        dirnames.remove(exclude)
1157db96d56Sopenharmony_ci                for filename in filenames:
1167db96d56Sopenharmony_ci                    fullpath = os.path.join(dirpath, filename)
1177db96d56Sopenharmony_ci                    print_debug("testing: %s" % fullpath)
1187db96d56Sopenharmony_ci                    if is_python(fullpath):
1197db96d56Sopenharmony_ci                        yield fullpath
1207db96d56Sopenharmony_ci        else:
1217db96d56Sopenharmony_ci            print_debug("    unknown type")
1227db96d56Sopenharmony_ci
1237db96d56Sopenharmony_ci
1247db96d56Sopenharmony_ciif __name__ == "__main__":
1257db96d56Sopenharmony_ci    # Two simple examples/tests
1267db96d56Sopenharmony_ci    for fullpath in walk_python_files(['.']):
1277db96d56Sopenharmony_ci        print(fullpath)
1287db96d56Sopenharmony_ci    print("----------")
1297db96d56Sopenharmony_ci    for fullpath in walk_python_files(['.'], is_python=can_be_compiled):
1307db96d56Sopenharmony_ci        print(fullpath)
131