17db96d56Sopenharmony_ci#!/usr/bin/env python3
27db96d56Sopenharmony_ci
37db96d56Sopenharmony_ci"""List all those Python files that require a coding directive
47db96d56Sopenharmony_ci
57db96d56Sopenharmony_ciUsage: findnocoding.py dir1 [dir2...]
67db96d56Sopenharmony_ci"""
77db96d56Sopenharmony_ci
87db96d56Sopenharmony_ci__author__ = "Oleg Broytmann, Georg Brandl"
97db96d56Sopenharmony_ci
107db96d56Sopenharmony_ciimport sys, os, re, getopt
117db96d56Sopenharmony_ci
127db96d56Sopenharmony_ci# our pysource module finds Python source files
137db96d56Sopenharmony_citry:
147db96d56Sopenharmony_ci    import pysource
157db96d56Sopenharmony_ciexcept ImportError:
167db96d56Sopenharmony_ci    # emulate the module with a simple os.walk
177db96d56Sopenharmony_ci    class pysource:
187db96d56Sopenharmony_ci        has_python_ext = looks_like_python = can_be_compiled = None
197db96d56Sopenharmony_ci        def walk_python_files(self, paths, *args, **kwargs):
207db96d56Sopenharmony_ci            for path in paths:
217db96d56Sopenharmony_ci                if os.path.isfile(path):
227db96d56Sopenharmony_ci                    yield path.endswith(".py")
237db96d56Sopenharmony_ci                elif os.path.isdir(path):
247db96d56Sopenharmony_ci                    for root, dirs, files in os.walk(path):
257db96d56Sopenharmony_ci                        for filename in files:
267db96d56Sopenharmony_ci                            if filename.endswith(".py"):
277db96d56Sopenharmony_ci                                yield os.path.join(root, filename)
287db96d56Sopenharmony_ci    pysource = pysource()
297db96d56Sopenharmony_ci
307db96d56Sopenharmony_ci
317db96d56Sopenharmony_ci    print("The pysource module is not available; "
327db96d56Sopenharmony_ci                         "no sophisticated Python source file search will be done.", file=sys.stderr)
337db96d56Sopenharmony_ci
347db96d56Sopenharmony_ci
357db96d56Sopenharmony_cidecl_re = re.compile(rb'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)')
367db96d56Sopenharmony_ciblank_re = re.compile(rb'^[ \t\f]*(?:[#\r\n]|$)')
377db96d56Sopenharmony_ci
387db96d56Sopenharmony_cidef get_declaration(line):
397db96d56Sopenharmony_ci    match = decl_re.match(line)
407db96d56Sopenharmony_ci    if match:
417db96d56Sopenharmony_ci        return match.group(1)
427db96d56Sopenharmony_ci    return b''
437db96d56Sopenharmony_ci
447db96d56Sopenharmony_cidef has_correct_encoding(text, codec):
457db96d56Sopenharmony_ci    try:
467db96d56Sopenharmony_ci        str(text, codec)
477db96d56Sopenharmony_ci    except UnicodeDecodeError:
487db96d56Sopenharmony_ci        return False
497db96d56Sopenharmony_ci    else:
507db96d56Sopenharmony_ci        return True
517db96d56Sopenharmony_ci
527db96d56Sopenharmony_cidef needs_declaration(fullpath):
537db96d56Sopenharmony_ci    try:
547db96d56Sopenharmony_ci        infile = open(fullpath, 'rb')
557db96d56Sopenharmony_ci    except IOError: # Oops, the file was removed - ignore it
567db96d56Sopenharmony_ci        return None
577db96d56Sopenharmony_ci
587db96d56Sopenharmony_ci    with infile:
597db96d56Sopenharmony_ci        line1 = infile.readline()
607db96d56Sopenharmony_ci        line2 = infile.readline()
617db96d56Sopenharmony_ci
627db96d56Sopenharmony_ci        if (get_declaration(line1) or
637db96d56Sopenharmony_ci            blank_re.match(line1) and get_declaration(line2)):
647db96d56Sopenharmony_ci            # the file does have an encoding declaration, so trust it
657db96d56Sopenharmony_ci            return False
667db96d56Sopenharmony_ci
677db96d56Sopenharmony_ci        # check the whole file for non utf-8 characters
687db96d56Sopenharmony_ci        rest = infile.read()
697db96d56Sopenharmony_ci
707db96d56Sopenharmony_ci    if has_correct_encoding(line1+line2+rest, "utf-8"):
717db96d56Sopenharmony_ci        return False
727db96d56Sopenharmony_ci
737db96d56Sopenharmony_ci    return True
747db96d56Sopenharmony_ci
757db96d56Sopenharmony_ci
767db96d56Sopenharmony_ciusage = """Usage: %s [-cd] paths...
777db96d56Sopenharmony_ci    -c: recognize Python source files trying to compile them
787db96d56Sopenharmony_ci    -d: debug output""" % sys.argv[0]
797db96d56Sopenharmony_ci
807db96d56Sopenharmony_ciif __name__ == '__main__':
817db96d56Sopenharmony_ci
827db96d56Sopenharmony_ci    try:
837db96d56Sopenharmony_ci        opts, args = getopt.getopt(sys.argv[1:], 'cd')
847db96d56Sopenharmony_ci    except getopt.error as msg:
857db96d56Sopenharmony_ci        print(msg, file=sys.stderr)
867db96d56Sopenharmony_ci        print(usage, file=sys.stderr)
877db96d56Sopenharmony_ci        sys.exit(1)
887db96d56Sopenharmony_ci
897db96d56Sopenharmony_ci    is_python = pysource.looks_like_python
907db96d56Sopenharmony_ci    debug = False
917db96d56Sopenharmony_ci
927db96d56Sopenharmony_ci    for o, a in opts:
937db96d56Sopenharmony_ci        if o == '-c':
947db96d56Sopenharmony_ci            is_python = pysource.can_be_compiled
957db96d56Sopenharmony_ci        elif o == '-d':
967db96d56Sopenharmony_ci            debug = True
977db96d56Sopenharmony_ci
987db96d56Sopenharmony_ci    if not args:
997db96d56Sopenharmony_ci        print(usage, file=sys.stderr)
1007db96d56Sopenharmony_ci        sys.exit(1)
1017db96d56Sopenharmony_ci
1027db96d56Sopenharmony_ci    for fullpath in pysource.walk_python_files(args, is_python):
1037db96d56Sopenharmony_ci        if debug:
1047db96d56Sopenharmony_ci            print("Testing for coding: %s" % fullpath)
1057db96d56Sopenharmony_ci        result = needs_declaration(fullpath)
1067db96d56Sopenharmony_ci        if result:
1077db96d56Sopenharmony_ci            print(fullpath)
108