18c2ecf20Sopenharmony_ci#!/usr/bin/env python3 28c2ecf20Sopenharmony_ci# SPDX-License-Identifier: GPL-2.0 38c2ecf20Sopenharmony_ci# 48c2ecf20Sopenharmony_ci# Copyright (C) Google LLC, 2018 58c2ecf20Sopenharmony_ci# 68c2ecf20Sopenharmony_ci# Author: Tom Roeder <tmroeder@google.com> 78c2ecf20Sopenharmony_ci# 88c2ecf20Sopenharmony_ci"""A tool for generating compile_commands.json in the Linux kernel.""" 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ciimport argparse 118c2ecf20Sopenharmony_ciimport json 128c2ecf20Sopenharmony_ciimport logging 138c2ecf20Sopenharmony_ciimport os 148c2ecf20Sopenharmony_ciimport re 158c2ecf20Sopenharmony_ciimport subprocess 168c2ecf20Sopenharmony_ciimport sys 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ci_DEFAULT_OUTPUT = 'compile_commands.json' 198c2ecf20Sopenharmony_ci_DEFAULT_LOG_LEVEL = 'WARNING' 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ci_FILENAME_PATTERN = r'^\..*\.cmd$' 228c2ecf20Sopenharmony_ci_LINE_PATTERN = r'^cmd_[^ ]*\.o := (.* )([^ ]*\.c)$' 238c2ecf20Sopenharmony_ci_VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_cidef parse_arguments(): 278c2ecf20Sopenharmony_ci """Sets up and parses command-line arguments. 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci Returns: 308c2ecf20Sopenharmony_ci log_level: A logging level to filter log output. 318c2ecf20Sopenharmony_ci directory: The work directory where the objects were built. 328c2ecf20Sopenharmony_ci ar: Command used for parsing .a archives. 338c2ecf20Sopenharmony_ci output: Where to write the compile-commands JSON file. 348c2ecf20Sopenharmony_ci paths: The list of files/directories to handle to find .cmd files. 358c2ecf20Sopenharmony_ci """ 368c2ecf20Sopenharmony_ci usage = 'Creates a compile_commands.json database from kernel .cmd files' 378c2ecf20Sopenharmony_ci parser = argparse.ArgumentParser(description=usage) 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci directory_help = ('specify the output directory used for the kernel build ' 408c2ecf20Sopenharmony_ci '(defaults to the working directory)') 418c2ecf20Sopenharmony_ci parser.add_argument('-d', '--directory', type=str, default='.', 428c2ecf20Sopenharmony_ci help=directory_help) 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ci output_help = ('path to the output command database (defaults to ' + 458c2ecf20Sopenharmony_ci _DEFAULT_OUTPUT + ')') 468c2ecf20Sopenharmony_ci parser.add_argument('-o', '--output', type=str, default=_DEFAULT_OUTPUT, 478c2ecf20Sopenharmony_ci help=output_help) 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci log_level_help = ('the level of log messages to produce (defaults to ' + 508c2ecf20Sopenharmony_ci _DEFAULT_LOG_LEVEL + ')') 518c2ecf20Sopenharmony_ci parser.add_argument('--log_level', choices=_VALID_LOG_LEVELS, 528c2ecf20Sopenharmony_ci default=_DEFAULT_LOG_LEVEL, help=log_level_help) 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci ar_help = 'command used for parsing .a archives' 558c2ecf20Sopenharmony_ci parser.add_argument('-a', '--ar', type=str, default='llvm-ar', help=ar_help) 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci paths_help = ('directories to search or files to parse ' 588c2ecf20Sopenharmony_ci '(files should be *.o, *.a, or modules.order). ' 598c2ecf20Sopenharmony_ci 'If nothing is specified, the current directory is searched') 608c2ecf20Sopenharmony_ci parser.add_argument('paths', type=str, nargs='*', help=paths_help) 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci args = parser.parse_args() 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci return (args.log_level, 658c2ecf20Sopenharmony_ci os.path.abspath(args.directory), 668c2ecf20Sopenharmony_ci args.output, 678c2ecf20Sopenharmony_ci args.ar, 688c2ecf20Sopenharmony_ci args.paths if len(args.paths) > 0 else [args.directory]) 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_cidef cmdfiles_in_dir(directory): 728c2ecf20Sopenharmony_ci """Generate the iterator of .cmd files found under the directory. 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci Walk under the given directory, and yield every .cmd file found. 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_ci Args: 778c2ecf20Sopenharmony_ci directory: The directory to search for .cmd files. 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_ci Yields: 808c2ecf20Sopenharmony_ci The path to a .cmd file. 818c2ecf20Sopenharmony_ci """ 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_ci filename_matcher = re.compile(_FILENAME_PATTERN) 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_ci for dirpath, _, filenames in os.walk(directory): 868c2ecf20Sopenharmony_ci for filename in filenames: 878c2ecf20Sopenharmony_ci if filename_matcher.match(filename): 888c2ecf20Sopenharmony_ci yield os.path.join(dirpath, filename) 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_cidef to_cmdfile(path): 928c2ecf20Sopenharmony_ci """Return the path of .cmd file used for the given build artifact 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci Args: 958c2ecf20Sopenharmony_ci Path: file path 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci Returns: 988c2ecf20Sopenharmony_ci The path to .cmd file 998c2ecf20Sopenharmony_ci """ 1008c2ecf20Sopenharmony_ci dir, base = os.path.split(path) 1018c2ecf20Sopenharmony_ci return os.path.join(dir, '.' + base + '.cmd') 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_cidef cmdfiles_for_o(obj): 1058c2ecf20Sopenharmony_ci """Generate the iterator of .cmd files associated with the object 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci Yield the .cmd file used to build the given object 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci Args: 1108c2ecf20Sopenharmony_ci obj: The object path 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci Yields: 1138c2ecf20Sopenharmony_ci The path to .cmd file 1148c2ecf20Sopenharmony_ci """ 1158c2ecf20Sopenharmony_ci yield to_cmdfile(obj) 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_cidef cmdfiles_for_a(archive, ar): 1198c2ecf20Sopenharmony_ci """Generate the iterator of .cmd files associated with the archive. 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci Parse the given archive, and yield every .cmd file used to build it. 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci Args: 1248c2ecf20Sopenharmony_ci archive: The archive to parse 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci Yields: 1278c2ecf20Sopenharmony_ci The path to every .cmd file found 1288c2ecf20Sopenharmony_ci """ 1298c2ecf20Sopenharmony_ci for obj in subprocess.check_output([ar, '-t', archive]).decode().split(): 1308c2ecf20Sopenharmony_ci yield to_cmdfile(obj) 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_cidef cmdfiles_for_modorder(modorder): 1348c2ecf20Sopenharmony_ci """Generate the iterator of .cmd files associated with the modules.order. 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci Parse the given modules.order, and yield every .cmd file used to build the 1378c2ecf20Sopenharmony_ci contained modules. 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci Args: 1408c2ecf20Sopenharmony_ci modorder: The modules.order file to parse 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci Yields: 1438c2ecf20Sopenharmony_ci The path to every .cmd file found 1448c2ecf20Sopenharmony_ci """ 1458c2ecf20Sopenharmony_ci with open(modorder) as f: 1468c2ecf20Sopenharmony_ci for line in f: 1478c2ecf20Sopenharmony_ci ko = line.rstrip() 1488c2ecf20Sopenharmony_ci base, ext = os.path.splitext(ko) 1498c2ecf20Sopenharmony_ci if ext != '.ko': 1508c2ecf20Sopenharmony_ci sys.exit('{}: module path must end with .ko'.format(ko)) 1518c2ecf20Sopenharmony_ci mod = base + '.mod' 1528c2ecf20Sopenharmony_ci # The first line of *.mod lists the objects that compose the module. 1538c2ecf20Sopenharmony_ci with open(mod) as m: 1548c2ecf20Sopenharmony_ci for obj in m.readline().split(): 1558c2ecf20Sopenharmony_ci yield to_cmdfile(obj) 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_cidef process_line(root_directory, command_prefix, file_path): 1598c2ecf20Sopenharmony_ci """Extracts information from a .cmd line and creates an entry from it. 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_ci Args: 1628c2ecf20Sopenharmony_ci root_directory: The directory that was searched for .cmd files. Usually 1638c2ecf20Sopenharmony_ci used directly in the "directory" entry in compile_commands.json. 1648c2ecf20Sopenharmony_ci command_prefix: The extracted command line, up to the last element. 1658c2ecf20Sopenharmony_ci file_path: The .c file from the end of the extracted command. 1668c2ecf20Sopenharmony_ci Usually relative to root_directory, but sometimes absolute. 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci Returns: 1698c2ecf20Sopenharmony_ci An entry to append to compile_commands. 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci Raises: 1728c2ecf20Sopenharmony_ci ValueError: Could not find the extracted file based on file_path and 1738c2ecf20Sopenharmony_ci root_directory or file_directory. 1748c2ecf20Sopenharmony_ci """ 1758c2ecf20Sopenharmony_ci # The .cmd files are intended to be included directly by Make, so they 1768c2ecf20Sopenharmony_ci # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the 1778c2ecf20Sopenharmony_ci # kernel version). The compile_commands.json file is not interepreted 1788c2ecf20Sopenharmony_ci # by Make, so this code replaces the escaped version with '#'. 1798c2ecf20Sopenharmony_ci prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#') 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci # Use os.path.abspath() to normalize the path resolving '.' and '..' . 1828c2ecf20Sopenharmony_ci abs_path = os.path.abspath(os.path.join(root_directory, file_path)) 1838c2ecf20Sopenharmony_ci if not os.path.exists(abs_path): 1848c2ecf20Sopenharmony_ci raise ValueError('File %s not found' % abs_path) 1858c2ecf20Sopenharmony_ci return { 1868c2ecf20Sopenharmony_ci 'directory': root_directory, 1878c2ecf20Sopenharmony_ci 'file': abs_path, 1888c2ecf20Sopenharmony_ci 'command': prefix + file_path, 1898c2ecf20Sopenharmony_ci } 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_cidef main(): 1938c2ecf20Sopenharmony_ci """Walks through the directory and finds and parses .cmd files.""" 1948c2ecf20Sopenharmony_ci log_level, directory, output, ar, paths = parse_arguments() 1958c2ecf20Sopenharmony_ci 1968c2ecf20Sopenharmony_ci level = getattr(logging, log_level) 1978c2ecf20Sopenharmony_ci logging.basicConfig(format='%(levelname)s: %(message)s', level=level) 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci line_matcher = re.compile(_LINE_PATTERN) 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci compile_commands = [] 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci for path in paths: 2048c2ecf20Sopenharmony_ci # If 'path' is a directory, handle all .cmd files under it. 2058c2ecf20Sopenharmony_ci # Otherwise, handle .cmd files associated with the file. 2068c2ecf20Sopenharmony_ci # Most of built-in objects are linked via archives (built-in.a or lib.a) 2078c2ecf20Sopenharmony_ci # but some objects are linked to vmlinux directly. 2088c2ecf20Sopenharmony_ci # Modules are listed in modules.order. 2098c2ecf20Sopenharmony_ci if os.path.isdir(path): 2108c2ecf20Sopenharmony_ci cmdfiles = cmdfiles_in_dir(path) 2118c2ecf20Sopenharmony_ci elif path.endswith('.o'): 2128c2ecf20Sopenharmony_ci cmdfiles = cmdfiles_for_o(path) 2138c2ecf20Sopenharmony_ci elif path.endswith('.a'): 2148c2ecf20Sopenharmony_ci cmdfiles = cmdfiles_for_a(path, ar) 2158c2ecf20Sopenharmony_ci elif path.endswith('modules.order'): 2168c2ecf20Sopenharmony_ci cmdfiles = cmdfiles_for_modorder(path) 2178c2ecf20Sopenharmony_ci else: 2188c2ecf20Sopenharmony_ci sys.exit('{}: unknown file type'.format(path)) 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci for cmdfile in cmdfiles: 2218c2ecf20Sopenharmony_ci with open(cmdfile, 'rt') as f: 2228c2ecf20Sopenharmony_ci result = line_matcher.match(f.readline()) 2238c2ecf20Sopenharmony_ci if result: 2248c2ecf20Sopenharmony_ci try: 2258c2ecf20Sopenharmony_ci entry = process_line(directory, result.group(1), 2268c2ecf20Sopenharmony_ci result.group(2)) 2278c2ecf20Sopenharmony_ci compile_commands.append(entry) 2288c2ecf20Sopenharmony_ci except ValueError as err: 2298c2ecf20Sopenharmony_ci logging.info('Could not add line from %s: %s', 2308c2ecf20Sopenharmony_ci cmdfile, err) 2318c2ecf20Sopenharmony_ci 2328c2ecf20Sopenharmony_ci with open(output, 'wt') as f: 2338c2ecf20Sopenharmony_ci json.dump(compile_commands, f, indent=2, sort_keys=True) 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ciif __name__ == '__main__': 2378c2ecf20Sopenharmony_ci main() 238