18c2ecf20Sopenharmony_ci#!/usr/bin/env python3
28c2ecf20Sopenharmony_ci# SPDX-License-Identifier: GPL-2.0
38c2ecf20Sopenharmony_ci#
48c2ecf20Sopenharmony_ci# Copyright (C) Google LLC, 2018
58c2ecf20Sopenharmony_ci#
68c2ecf20Sopenharmony_ci# Author: Tom Roeder <tmroeder@google.com>
78c2ecf20Sopenharmony_ci#
88c2ecf20Sopenharmony_ci"""A tool for generating compile_commands.json in the Linux kernel."""
98c2ecf20Sopenharmony_ci
108c2ecf20Sopenharmony_ciimport argparse
118c2ecf20Sopenharmony_ciimport json
128c2ecf20Sopenharmony_ciimport logging
138c2ecf20Sopenharmony_ciimport os
148c2ecf20Sopenharmony_ciimport re
158c2ecf20Sopenharmony_ciimport subprocess
168c2ecf20Sopenharmony_ciimport sys
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_ci_DEFAULT_OUTPUT = 'compile_commands.json'
198c2ecf20Sopenharmony_ci_DEFAULT_LOG_LEVEL = 'WARNING'
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci_FILENAME_PATTERN = r'^\..*\.cmd$'
228c2ecf20Sopenharmony_ci_LINE_PATTERN = r'^cmd_[^ ]*\.o := (.* )([^ ]*\.c)$'
238c2ecf20Sopenharmony_ci_VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_cidef parse_arguments():
278c2ecf20Sopenharmony_ci    """Sets up and parses command-line arguments.
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci    Returns:
308c2ecf20Sopenharmony_ci        log_level: A logging level to filter log output.
318c2ecf20Sopenharmony_ci        directory: The work directory where the objects were built.
328c2ecf20Sopenharmony_ci        ar: Command used for parsing .a archives.
338c2ecf20Sopenharmony_ci        output: Where to write the compile-commands JSON file.
348c2ecf20Sopenharmony_ci        paths: The list of files/directories to handle to find .cmd files.
358c2ecf20Sopenharmony_ci    """
368c2ecf20Sopenharmony_ci    usage = 'Creates a compile_commands.json database from kernel .cmd files'
378c2ecf20Sopenharmony_ci    parser = argparse.ArgumentParser(description=usage)
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci    directory_help = ('specify the output directory used for the kernel build '
408c2ecf20Sopenharmony_ci                      '(defaults to the working directory)')
418c2ecf20Sopenharmony_ci    parser.add_argument('-d', '--directory', type=str, default='.',
428c2ecf20Sopenharmony_ci                        help=directory_help)
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci    output_help = ('path to the output command database (defaults to ' +
458c2ecf20Sopenharmony_ci                   _DEFAULT_OUTPUT + ')')
468c2ecf20Sopenharmony_ci    parser.add_argument('-o', '--output', type=str, default=_DEFAULT_OUTPUT,
478c2ecf20Sopenharmony_ci                        help=output_help)
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci    log_level_help = ('the level of log messages to produce (defaults to ' +
508c2ecf20Sopenharmony_ci                      _DEFAULT_LOG_LEVEL + ')')
518c2ecf20Sopenharmony_ci    parser.add_argument('--log_level', choices=_VALID_LOG_LEVELS,
528c2ecf20Sopenharmony_ci                        default=_DEFAULT_LOG_LEVEL, help=log_level_help)
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci    ar_help = 'command used for parsing .a archives'
558c2ecf20Sopenharmony_ci    parser.add_argument('-a', '--ar', type=str, default='llvm-ar', help=ar_help)
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci    paths_help = ('directories to search or files to parse '
588c2ecf20Sopenharmony_ci                  '(files should be *.o, *.a, or modules.order). '
598c2ecf20Sopenharmony_ci                  'If nothing is specified, the current directory is searched')
608c2ecf20Sopenharmony_ci    parser.add_argument('paths', type=str, nargs='*', help=paths_help)
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci    args = parser.parse_args()
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci    return (args.log_level,
658c2ecf20Sopenharmony_ci            os.path.abspath(args.directory),
668c2ecf20Sopenharmony_ci            args.output,
678c2ecf20Sopenharmony_ci            args.ar,
688c2ecf20Sopenharmony_ci            args.paths if len(args.paths) > 0 else [args.directory])
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_cidef cmdfiles_in_dir(directory):
728c2ecf20Sopenharmony_ci    """Generate the iterator of .cmd files found under the directory.
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci    Walk under the given directory, and yield every .cmd file found.
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci    Args:
778c2ecf20Sopenharmony_ci        directory: The directory to search for .cmd files.
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci    Yields:
808c2ecf20Sopenharmony_ci        The path to a .cmd file.
818c2ecf20Sopenharmony_ci    """
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_ci    filename_matcher = re.compile(_FILENAME_PATTERN)
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci    for dirpath, _, filenames in os.walk(directory):
868c2ecf20Sopenharmony_ci        for filename in filenames:
878c2ecf20Sopenharmony_ci            if filename_matcher.match(filename):
888c2ecf20Sopenharmony_ci                yield os.path.join(dirpath, filename)
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_cidef to_cmdfile(path):
928c2ecf20Sopenharmony_ci    """Return the path of .cmd file used for the given build artifact
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci    Args:
958c2ecf20Sopenharmony_ci        Path: file path
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_ci    Returns:
988c2ecf20Sopenharmony_ci        The path to .cmd file
998c2ecf20Sopenharmony_ci    """
1008c2ecf20Sopenharmony_ci    dir, base = os.path.split(path)
1018c2ecf20Sopenharmony_ci    return os.path.join(dir, '.' + base + '.cmd')
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_cidef cmdfiles_for_o(obj):
1058c2ecf20Sopenharmony_ci    """Generate the iterator of .cmd files associated with the object
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci    Yield the .cmd file used to build the given object
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci    Args:
1108c2ecf20Sopenharmony_ci        obj: The object path
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci    Yields:
1138c2ecf20Sopenharmony_ci        The path to .cmd file
1148c2ecf20Sopenharmony_ci    """
1158c2ecf20Sopenharmony_ci    yield to_cmdfile(obj)
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_cidef cmdfiles_for_a(archive, ar):
1198c2ecf20Sopenharmony_ci    """Generate the iterator of .cmd files associated with the archive.
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci    Parse the given archive, and yield every .cmd file used to build it.
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci    Args:
1248c2ecf20Sopenharmony_ci        archive: The archive to parse
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci    Yields:
1278c2ecf20Sopenharmony_ci        The path to every .cmd file found
1288c2ecf20Sopenharmony_ci    """
1298c2ecf20Sopenharmony_ci    for obj in subprocess.check_output([ar, '-t', archive]).decode().split():
1308c2ecf20Sopenharmony_ci        yield to_cmdfile(obj)
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_cidef cmdfiles_for_modorder(modorder):
1348c2ecf20Sopenharmony_ci    """Generate the iterator of .cmd files associated with the modules.order.
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci    Parse the given modules.order, and yield every .cmd file used to build the
1378c2ecf20Sopenharmony_ci    contained modules.
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci    Args:
1408c2ecf20Sopenharmony_ci        modorder: The modules.order file to parse
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_ci    Yields:
1438c2ecf20Sopenharmony_ci        The path to every .cmd file found
1448c2ecf20Sopenharmony_ci    """
1458c2ecf20Sopenharmony_ci    with open(modorder) as f:
1468c2ecf20Sopenharmony_ci        for line in f:
1478c2ecf20Sopenharmony_ci            ko = line.rstrip()
1488c2ecf20Sopenharmony_ci            base, ext = os.path.splitext(ko)
1498c2ecf20Sopenharmony_ci            if ext != '.ko':
1508c2ecf20Sopenharmony_ci                sys.exit('{}: module path must end with .ko'.format(ko))
1518c2ecf20Sopenharmony_ci            mod = base + '.mod'
1528c2ecf20Sopenharmony_ci	    # The first line of *.mod lists the objects that compose the module.
1538c2ecf20Sopenharmony_ci            with open(mod) as m:
1548c2ecf20Sopenharmony_ci                for obj in m.readline().split():
1558c2ecf20Sopenharmony_ci                    yield to_cmdfile(obj)
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_cidef process_line(root_directory, command_prefix, file_path):
1598c2ecf20Sopenharmony_ci    """Extracts information from a .cmd line and creates an entry from it.
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ci    Args:
1628c2ecf20Sopenharmony_ci        root_directory: The directory that was searched for .cmd files. Usually
1638c2ecf20Sopenharmony_ci            used directly in the "directory" entry in compile_commands.json.
1648c2ecf20Sopenharmony_ci        command_prefix: The extracted command line, up to the last element.
1658c2ecf20Sopenharmony_ci        file_path: The .c file from the end of the extracted command.
1668c2ecf20Sopenharmony_ci            Usually relative to root_directory, but sometimes absolute.
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci    Returns:
1698c2ecf20Sopenharmony_ci        An entry to append to compile_commands.
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci    Raises:
1728c2ecf20Sopenharmony_ci        ValueError: Could not find the extracted file based on file_path and
1738c2ecf20Sopenharmony_ci            root_directory or file_directory.
1748c2ecf20Sopenharmony_ci    """
1758c2ecf20Sopenharmony_ci    # The .cmd files are intended to be included directly by Make, so they
1768c2ecf20Sopenharmony_ci    # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the
1778c2ecf20Sopenharmony_ci    # kernel version). The compile_commands.json file is not interepreted
1788c2ecf20Sopenharmony_ci    # by Make, so this code replaces the escaped version with '#'.
1798c2ecf20Sopenharmony_ci    prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#')
1808c2ecf20Sopenharmony_ci
1818c2ecf20Sopenharmony_ci    # Use os.path.abspath() to normalize the path resolving '.' and '..' .
1828c2ecf20Sopenharmony_ci    abs_path = os.path.abspath(os.path.join(root_directory, file_path))
1838c2ecf20Sopenharmony_ci    if not os.path.exists(abs_path):
1848c2ecf20Sopenharmony_ci        raise ValueError('File %s not found' % abs_path)
1858c2ecf20Sopenharmony_ci    return {
1868c2ecf20Sopenharmony_ci        'directory': root_directory,
1878c2ecf20Sopenharmony_ci        'file': abs_path,
1888c2ecf20Sopenharmony_ci        'command': prefix + file_path,
1898c2ecf20Sopenharmony_ci    }
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_cidef main():
1938c2ecf20Sopenharmony_ci    """Walks through the directory and finds and parses .cmd files."""
1948c2ecf20Sopenharmony_ci    log_level, directory, output, ar, paths = parse_arguments()
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci    level = getattr(logging, log_level)
1978c2ecf20Sopenharmony_ci    logging.basicConfig(format='%(levelname)s: %(message)s', level=level)
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci    line_matcher = re.compile(_LINE_PATTERN)
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci    compile_commands = []
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci    for path in paths:
2048c2ecf20Sopenharmony_ci        # If 'path' is a directory, handle all .cmd files under it.
2058c2ecf20Sopenharmony_ci        # Otherwise, handle .cmd files associated with the file.
2068c2ecf20Sopenharmony_ci        # Most of built-in objects are linked via archives (built-in.a or lib.a)
2078c2ecf20Sopenharmony_ci        # but some objects are linked to vmlinux directly.
2088c2ecf20Sopenharmony_ci        # Modules are listed in modules.order.
2098c2ecf20Sopenharmony_ci        if os.path.isdir(path):
2108c2ecf20Sopenharmony_ci            cmdfiles = cmdfiles_in_dir(path)
2118c2ecf20Sopenharmony_ci        elif path.endswith('.o'):
2128c2ecf20Sopenharmony_ci            cmdfiles = cmdfiles_for_o(path)
2138c2ecf20Sopenharmony_ci        elif path.endswith('.a'):
2148c2ecf20Sopenharmony_ci            cmdfiles = cmdfiles_for_a(path, ar)
2158c2ecf20Sopenharmony_ci        elif path.endswith('modules.order'):
2168c2ecf20Sopenharmony_ci            cmdfiles = cmdfiles_for_modorder(path)
2178c2ecf20Sopenharmony_ci        else:
2188c2ecf20Sopenharmony_ci            sys.exit('{}: unknown file type'.format(path))
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci        for cmdfile in cmdfiles:
2218c2ecf20Sopenharmony_ci            with open(cmdfile, 'rt') as f:
2228c2ecf20Sopenharmony_ci                result = line_matcher.match(f.readline())
2238c2ecf20Sopenharmony_ci                if result:
2248c2ecf20Sopenharmony_ci                    try:
2258c2ecf20Sopenharmony_ci                        entry = process_line(directory, result.group(1),
2268c2ecf20Sopenharmony_ci                                             result.group(2))
2278c2ecf20Sopenharmony_ci                        compile_commands.append(entry)
2288c2ecf20Sopenharmony_ci                    except ValueError as err:
2298c2ecf20Sopenharmony_ci                        logging.info('Could not add line from %s: %s',
2308c2ecf20Sopenharmony_ci                                     cmdfile, err)
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_ci    with open(output, 'wt') as f:
2338c2ecf20Sopenharmony_ci        json.dump(compile_commands, f, indent=2, sort_keys=True)
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ciif __name__ == '__main__':
2378c2ecf20Sopenharmony_ci    main()
238