1e5c31af7Sopenharmony_ci#!/usr/bin/env python 2e5c31af7Sopenharmony_ci 3e5c31af7Sopenharmony_ci# Copyright 2020 The Amber Authors. All rights reserved. 4e5c31af7Sopenharmony_ci# 5e5c31af7Sopenharmony_ci# Licensed under the Apache License, Version 2.0 (the "License"); 6e5c31af7Sopenharmony_ci# you may not use this file except in compliance with the License. 7e5c31af7Sopenharmony_ci# You may obtain a copy of the License at 8e5c31af7Sopenharmony_ci# 9e5c31af7Sopenharmony_ci# http://www.apache.org/licenses/LICENSE-2.0 10e5c31af7Sopenharmony_ci# 11e5c31af7Sopenharmony_ci# Unless required by applicable law or agreed to in writing, software 12e5c31af7Sopenharmony_ci# distributed under the License is distributed on an "AS IS" BASIS, 13e5c31af7Sopenharmony_ci# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14e5c31af7Sopenharmony_ci# See the License for the specific language governing permissions and 15e5c31af7Sopenharmony_ci# limitations under the License. 16e5c31af7Sopenharmony_ci 17e5c31af7Sopenharmony_ci""" 18e5c31af7Sopenharmony_ciScript to check files for inclusive language. The script will scan all files 19e5c31af7Sopenharmony_ciand flag non-inclusive terminology which is identified. 20e5c31af7Sopenharmony_ci 21e5c31af7Sopenharmony_ciUsage, run the script from a folder and the script will scan down through that 22e5c31af7Sopenharmony_cifolder. 23e5c31af7Sopenharmony_ci""" 24e5c31af7Sopenharmony_ci 25e5c31af7Sopenharmony_ciimport fnmatch 26e5c31af7Sopenharmony_ciimport os 27e5c31af7Sopenharmony_ciimport re 28e5c31af7Sopenharmony_ciimport sys 29e5c31af7Sopenharmony_ci 30e5c31af7Sopenharmony_ciREGEXES = [ 31e5c31af7Sopenharmony_ci r"(?i)black[-_]?list", 32e5c31af7Sopenharmony_ci r"(?i)white[-_]?list", 33e5c31af7Sopenharmony_ci r"(?i)gr[ea]y[-_]?list", 34e5c31af7Sopenharmony_ci r"(?i)(first class citizen)", 35e5c31af7Sopenharmony_ci r"(?i)black[-_]?hat", 36e5c31af7Sopenharmony_ci r"(?i)white[-_]?hat", 37e5c31af7Sopenharmony_ci r"(?i)gr[ea]y[-_]?hat", 38e5c31af7Sopenharmony_ci r"(?i)master", 39e5c31af7Sopenharmony_ci r"(?i)slave", 40e5c31af7Sopenharmony_ci r"(?i)\bhim\b", 41e5c31af7Sopenharmony_ci r"(?i)\bhis\b", 42e5c31af7Sopenharmony_ci r"(?i)\bshe\b", 43e5c31af7Sopenharmony_ci r"(?i)\bher\b", 44e5c31af7Sopenharmony_ci r"(?i)\bhers\b", 45e5c31af7Sopenharmony_ci r"(?i)\bman\b", 46e5c31af7Sopenharmony_ci r"(?i)\bwoman\b", 47e5c31af7Sopenharmony_ci r"(?i)\she\s", 48e5c31af7Sopenharmony_ci r"(?i)\she$", 49e5c31af7Sopenharmony_ci r"(?i)^he\s", 50e5c31af7Sopenharmony_ci r"(?i)^he$", 51e5c31af7Sopenharmony_ci r"(?i)\she['|\u2019]d\s", 52e5c31af7Sopenharmony_ci r"(?i)\she['|\u2019]d$", 53e5c31af7Sopenharmony_ci r"(?i)^he['|\u2019]d\s", 54e5c31af7Sopenharmony_ci r"(?i)^he['|\u2019]d$", 55e5c31af7Sopenharmony_ci r"(?i)\she['|\u2019]s\s", 56e5c31af7Sopenharmony_ci r"(?i)\she['|\u2019]s$", 57e5c31af7Sopenharmony_ci r"(?i)^he['|\u2019]s\s", 58e5c31af7Sopenharmony_ci r"(?i)^he['|\u2019]s$", 59e5c31af7Sopenharmony_ci r"(?i)\she['|\u2019]ll\s", 60e5c31af7Sopenharmony_ci r"(?i)\she['|\u2019]ll$", 61e5c31af7Sopenharmony_ci r"(?i)^he['|\u2019]ll\s", 62e5c31af7Sopenharmony_ci r"(?i)^he['|\u2019]ll$", 63e5c31af7Sopenharmony_ci r"(?i)grandfather", 64e5c31af7Sopenharmony_ci r"(?i)\bmitm\b", 65e5c31af7Sopenharmony_ci r"(?i)\bcrazy\b", 66e5c31af7Sopenharmony_ci r"(?i)\binsane\b", 67e5c31af7Sopenharmony_ci r"(?i)\bblind\sto\b", 68e5c31af7Sopenharmony_ci r"(?i)\bflying\sblind\b", 69e5c31af7Sopenharmony_ci r"(?i)\bblind\seye\b", 70e5c31af7Sopenharmony_ci r"(?i)\bcripple\b", 71e5c31af7Sopenharmony_ci r"(?i)\bcrippled\b", 72e5c31af7Sopenharmony_ci r"(?i)\bdumb\b", 73e5c31af7Sopenharmony_ci r"(?i)\bdummy\b", 74e5c31af7Sopenharmony_ci r"(?i)\bparanoid\b", 75e5c31af7Sopenharmony_ci r"(?i)\bsane\b", 76e5c31af7Sopenharmony_ci r"(?i)\bsanity\b", 77e5c31af7Sopenharmony_ci r"(?i)red[-_]?line", 78e5c31af7Sopenharmony_ci] 79e5c31af7Sopenharmony_ci 80e5c31af7Sopenharmony_ciSUPPRESSIONS = [ 81e5c31af7Sopenharmony_ci r"(?i)MS_SLAVE", 82e5c31af7Sopenharmony_ci r"(?i)man[ -_]?page", 83e5c31af7Sopenharmony_ci] 84e5c31af7Sopenharmony_ci 85e5c31af7Sopenharmony_ci 86e5c31af7Sopenharmony_ciREGEX_LIST = [] 87e5c31af7Sopenharmony_cifor reg in REGEXES: 88e5c31af7Sopenharmony_ci REGEX_LIST.append(re.compile(reg)) 89e5c31af7Sopenharmony_ci 90e5c31af7Sopenharmony_ciSUPPRESSION_LIST = [] 91e5c31af7Sopenharmony_cifor supp in SUPPRESSIONS: 92e5c31af7Sopenharmony_ci SUPPRESSION_LIST.append(re.compile(supp)) 93e5c31af7Sopenharmony_ci 94e5c31af7Sopenharmony_cidef find(top, filename_glob, skip_glob_list): 95e5c31af7Sopenharmony_ci """Returns files in the tree rooted at top matching filename_glob but not 96e5c31af7Sopenharmony_ci in directories matching skip_glob_list.""" 97e5c31af7Sopenharmony_ci 98e5c31af7Sopenharmony_ci file_list = [] 99e5c31af7Sopenharmony_ci for path, dirs, files in os.walk(top): 100e5c31af7Sopenharmony_ci for glob in skip_glob_list: 101e5c31af7Sopenharmony_ci for match in fnmatch.filter(dirs, glob): 102e5c31af7Sopenharmony_ci dirs.remove(match) 103e5c31af7Sopenharmony_ci for filename in fnmatch.filter(files, filename_glob): 104e5c31af7Sopenharmony_ci if filename == os.path.basename(__file__): 105e5c31af7Sopenharmony_ci continue 106e5c31af7Sopenharmony_ci file_list.append(os.path.join(path, filename)) 107e5c31af7Sopenharmony_ci return file_list 108e5c31af7Sopenharmony_ci 109e5c31af7Sopenharmony_ci 110e5c31af7Sopenharmony_cidef filtered_descendants(glob): 111e5c31af7Sopenharmony_ci """Returns glob-matching filenames under the current directory, but skips 112e5c31af7Sopenharmony_ci some irrelevant paths.""" 113e5c31af7Sopenharmony_ci return find('.', glob, ['third_party', 'external', 'build*', 'out*', 114e5c31af7Sopenharmony_ci 'CompilerIdCXX', '.git']) 115e5c31af7Sopenharmony_ci 116e5c31af7Sopenharmony_cidef check_match(filename, contents): 117e5c31af7Sopenharmony_ci """Check if contents contains any matching entries""" 118e5c31af7Sopenharmony_ci ret = False 119e5c31af7Sopenharmony_ci for reg in REGEX_LIST: 120e5c31af7Sopenharmony_ci match = reg.search(contents) 121e5c31af7Sopenharmony_ci if match: 122e5c31af7Sopenharmony_ci suppressed = False 123e5c31af7Sopenharmony_ci for supp in SUPPRESSION_LIST: 124e5c31af7Sopenharmony_ci idx = match.start() 125e5c31af7Sopenharmony_ci supp_match = supp.match(contents[idx:]) 126e5c31af7Sopenharmony_ci if supp_match: 127e5c31af7Sopenharmony_ci suppressed = True 128e5c31af7Sopenharmony_ci 129e5c31af7Sopenharmony_ci # This is a hack to handle the MS_ prefix that is needed 130e5c31af7Sopenharmony_ci # to check for. Find a better way if we get more suppressions 131e5c31af7Sopenharmony_ci # which modify the prefix of the string 132e5c31af7Sopenharmony_ci if idx >= 3: 133e5c31af7Sopenharmony_ci supp_match = supp.match(contents[idx - 3:]) 134e5c31af7Sopenharmony_ci if supp_match: 135e5c31af7Sopenharmony_ci suppressed = True 136e5c31af7Sopenharmony_ci 137e5c31af7Sopenharmony_ci if not suppressed: 138e5c31af7Sopenharmony_ci # No matching suppression. 139e5c31af7Sopenharmony_ci print("{}: found non-inclusive language: {}".format( 140e5c31af7Sopenharmony_ci filename, match.group(0))) 141e5c31af7Sopenharmony_ci ret = True 142e5c31af7Sopenharmony_ci 143e5c31af7Sopenharmony_ci return ret 144e5c31af7Sopenharmony_ci 145e5c31af7Sopenharmony_ci 146e5c31af7Sopenharmony_cidef alert_if_lang_matches(glob): 147e5c31af7Sopenharmony_ci """Prints names of all files matching non-inclusive language. 148e5c31af7Sopenharmony_ci 149e5c31af7Sopenharmony_ci Finds all glob-matching files under the current directory and checks if they 150e5c31af7Sopenharmony_ci contain the language pattern. Prints the names of all the files that 151e5c31af7Sopenharmony_ci match. 152e5c31af7Sopenharmony_ci 153e5c31af7Sopenharmony_ci Returns the total number of file names printed. 154e5c31af7Sopenharmony_ci """ 155e5c31af7Sopenharmony_ci verbose = False 156e5c31af7Sopenharmony_ci printed_count = 0 157e5c31af7Sopenharmony_ci for file in filtered_descendants(glob): 158e5c31af7Sopenharmony_ci has_match = False 159e5c31af7Sopenharmony_ci try: 160e5c31af7Sopenharmony_ci with open(file, 'r', encoding='utf8') as contents: 161e5c31af7Sopenharmony_ci if check_match(file, contents.read()): 162e5c31af7Sopenharmony_ci printed_count += 1 163e5c31af7Sopenharmony_ci except: 164e5c31af7Sopenharmony_ci if verbose: 165e5c31af7Sopenharmony_ci print("skipping {}".format(file)) 166e5c31af7Sopenharmony_ci 167e5c31af7Sopenharmony_ci return printed_count 168e5c31af7Sopenharmony_ci 169e5c31af7Sopenharmony_ci 170e5c31af7Sopenharmony_cidef main(): 171e5c31af7Sopenharmony_ci globs = ['*'] 172e5c31af7Sopenharmony_ci count = 0 173e5c31af7Sopenharmony_ci for glob in globs: 174e5c31af7Sopenharmony_ci count += alert_if_lang_matches(glob) 175e5c31af7Sopenharmony_ci 176e5c31af7Sopenharmony_ci sys.exit(count > 0) 177e5c31af7Sopenharmony_ci 178e5c31af7Sopenharmony_ciif __name__ == '__main__': 179e5c31af7Sopenharmony_ci main() 180