17db96d56Sopenharmony_ci#!/usr/bin/env python3 27db96d56Sopenharmony_ci 37db96d56Sopenharmony_ci"""\ 47db96d56Sopenharmony_ciList python source files. 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_ciThere are three functions to check whether a file is a Python source, listed 77db96d56Sopenharmony_cihere with increasing complexity: 87db96d56Sopenharmony_ci 97db96d56Sopenharmony_ci- has_python_ext() checks whether a file name ends in '.py[w]'. 107db96d56Sopenharmony_ci- look_like_python() checks whether the file is not binary and either has 117db96d56Sopenharmony_ci the '.py[w]' extension or the first line contains the word 'python'. 127db96d56Sopenharmony_ci- can_be_compiled() checks whether the file can be compiled by compile(). 137db96d56Sopenharmony_ci 147db96d56Sopenharmony_ciThe file also must be of appropriate size - not bigger than a megabyte. 157db96d56Sopenharmony_ci 167db96d56Sopenharmony_ciwalk_python_files() recursively lists all Python files under the given directories. 177db96d56Sopenharmony_ci""" 187db96d56Sopenharmony_ci__author__ = "Oleg Broytmann, Georg Brandl" 197db96d56Sopenharmony_ci 207db96d56Sopenharmony_ci__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"] 217db96d56Sopenharmony_ci 227db96d56Sopenharmony_ci 237db96d56Sopenharmony_ciimport os, re 247db96d56Sopenharmony_ci 257db96d56Sopenharmony_cibinary_re = re.compile(br'[\x00-\x08\x0E-\x1F\x7F]') 267db96d56Sopenharmony_ci 277db96d56Sopenharmony_cidebug = False 287db96d56Sopenharmony_ci 297db96d56Sopenharmony_cidef print_debug(msg): 307db96d56Sopenharmony_ci if debug: print(msg) 317db96d56Sopenharmony_ci 327db96d56Sopenharmony_ci 337db96d56Sopenharmony_cidef _open(fullpath): 347db96d56Sopenharmony_ci try: 357db96d56Sopenharmony_ci size = os.stat(fullpath).st_size 367db96d56Sopenharmony_ci except OSError as err: # Permission denied - ignore the file 377db96d56Sopenharmony_ci print_debug("%s: permission denied: %s" % (fullpath, err)) 387db96d56Sopenharmony_ci return None 397db96d56Sopenharmony_ci 407db96d56Sopenharmony_ci if size > 1024*1024: # too big 417db96d56Sopenharmony_ci print_debug("%s: the file is too big: %d bytes" % (fullpath, size)) 427db96d56Sopenharmony_ci return None 437db96d56Sopenharmony_ci 447db96d56Sopenharmony_ci try: 457db96d56Sopenharmony_ci return open(fullpath, "rb") 467db96d56Sopenharmony_ci except IOError as err: # Access denied, or a special file - ignore it 477db96d56Sopenharmony_ci print_debug("%s: access denied: %s" % (fullpath, err)) 487db96d56Sopenharmony_ci return None 497db96d56Sopenharmony_ci 507db96d56Sopenharmony_cidef has_python_ext(fullpath): 517db96d56Sopenharmony_ci return fullpath.endswith(".py") or fullpath.endswith(".pyw") 527db96d56Sopenharmony_ci 537db96d56Sopenharmony_cidef looks_like_python(fullpath): 547db96d56Sopenharmony_ci infile = _open(fullpath) 557db96d56Sopenharmony_ci if infile is None: 567db96d56Sopenharmony_ci return False 577db96d56Sopenharmony_ci 587db96d56Sopenharmony_ci with infile: 597db96d56Sopenharmony_ci line = infile.readline() 607db96d56Sopenharmony_ci 617db96d56Sopenharmony_ci if binary_re.search(line): 627db96d56Sopenharmony_ci # file appears to be binary 637db96d56Sopenharmony_ci print_debug("%s: appears to be binary" % fullpath) 647db96d56Sopenharmony_ci return False 657db96d56Sopenharmony_ci 667db96d56Sopenharmony_ci if fullpath.endswith(".py") or fullpath.endswith(".pyw"): 677db96d56Sopenharmony_ci return True 687db96d56Sopenharmony_ci elif b"python" in line: 697db96d56Sopenharmony_ci # disguised Python script (e.g. CGI) 707db96d56Sopenharmony_ci return True 717db96d56Sopenharmony_ci 727db96d56Sopenharmony_ci return False 737db96d56Sopenharmony_ci 747db96d56Sopenharmony_cidef can_be_compiled(fullpath): 757db96d56Sopenharmony_ci infile = _open(fullpath) 767db96d56Sopenharmony_ci if infile is None: 777db96d56Sopenharmony_ci return False 787db96d56Sopenharmony_ci 797db96d56Sopenharmony_ci with infile: 807db96d56Sopenharmony_ci code = infile.read() 817db96d56Sopenharmony_ci 827db96d56Sopenharmony_ci try: 837db96d56Sopenharmony_ci compile(code, fullpath, "exec") 847db96d56Sopenharmony_ci except Exception as err: 857db96d56Sopenharmony_ci print_debug("%s: cannot compile: %s" % (fullpath, err)) 867db96d56Sopenharmony_ci return False 877db96d56Sopenharmony_ci 887db96d56Sopenharmony_ci return True 897db96d56Sopenharmony_ci 907db96d56Sopenharmony_ci 917db96d56Sopenharmony_cidef walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None): 927db96d56Sopenharmony_ci """\ 937db96d56Sopenharmony_ci Recursively yield all Python source files below the given paths. 947db96d56Sopenharmony_ci 957db96d56Sopenharmony_ci paths: a list of files and/or directories to be checked. 967db96d56Sopenharmony_ci is_python: a function that takes a file name and checks whether it is a 977db96d56Sopenharmony_ci Python source file 987db96d56Sopenharmony_ci exclude_dirs: a list of directory base names that should be excluded in 997db96d56Sopenharmony_ci the search 1007db96d56Sopenharmony_ci """ 1017db96d56Sopenharmony_ci if exclude_dirs is None: 1027db96d56Sopenharmony_ci exclude_dirs=[] 1037db96d56Sopenharmony_ci 1047db96d56Sopenharmony_ci for path in paths: 1057db96d56Sopenharmony_ci print_debug("testing: %s" % path) 1067db96d56Sopenharmony_ci if os.path.isfile(path): 1077db96d56Sopenharmony_ci if is_python(path): 1087db96d56Sopenharmony_ci yield path 1097db96d56Sopenharmony_ci elif os.path.isdir(path): 1107db96d56Sopenharmony_ci print_debug(" it is a directory") 1117db96d56Sopenharmony_ci for dirpath, dirnames, filenames in os.walk(path): 1127db96d56Sopenharmony_ci for exclude in exclude_dirs: 1137db96d56Sopenharmony_ci if exclude in dirnames: 1147db96d56Sopenharmony_ci dirnames.remove(exclude) 1157db96d56Sopenharmony_ci for filename in filenames: 1167db96d56Sopenharmony_ci fullpath = os.path.join(dirpath, filename) 1177db96d56Sopenharmony_ci print_debug("testing: %s" % fullpath) 1187db96d56Sopenharmony_ci if is_python(fullpath): 1197db96d56Sopenharmony_ci yield fullpath 1207db96d56Sopenharmony_ci else: 1217db96d56Sopenharmony_ci print_debug(" unknown type") 1227db96d56Sopenharmony_ci 1237db96d56Sopenharmony_ci 1247db96d56Sopenharmony_ciif __name__ == "__main__": 1257db96d56Sopenharmony_ci # Two simple examples/tests 1267db96d56Sopenharmony_ci for fullpath in walk_python_files(['.']): 1277db96d56Sopenharmony_ci print(fullpath) 1287db96d56Sopenharmony_ci print("----------") 1297db96d56Sopenharmony_ci for fullpath in walk_python_files(['.'], is_python=can_be_compiled): 1307db96d56Sopenharmony_ci print(fullpath) 131