162306a36Sopenharmony_ci#!/usr/bin/env python3
262306a36Sopenharmony_ci# SPDX-License-Identifier: GPL-2.0
362306a36Sopenharmony_ci#
462306a36Sopenharmony_ci# Copyright (C) Google LLC, 2018
562306a36Sopenharmony_ci#
662306a36Sopenharmony_ci# Author: Tom Roeder <tmroeder@google.com>
762306a36Sopenharmony_ci#
862306a36Sopenharmony_ci"""A tool for generating compile_commands.json in the Linux kernel."""
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ciimport argparse
1162306a36Sopenharmony_ciimport json
1262306a36Sopenharmony_ciimport logging
1362306a36Sopenharmony_ciimport os
1462306a36Sopenharmony_ciimport re
1562306a36Sopenharmony_ciimport subprocess
1662306a36Sopenharmony_ciimport sys
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci_DEFAULT_OUTPUT = 'compile_commands.json'
1962306a36Sopenharmony_ci_DEFAULT_LOG_LEVEL = 'WARNING'
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci_FILENAME_PATTERN = r'^\..*\.cmd$'
2262306a36Sopenharmony_ci_LINE_PATTERN = r'^savedcmd_[^ ]*\.o := (.* )([^ ]*\.[cS]) *(;|$)'
2362306a36Sopenharmony_ci_VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
2462306a36Sopenharmony_ci# The tools/ directory adopts a different build system, and produces .cmd
2562306a36Sopenharmony_ci# files in a different format. Do not support it.
2662306a36Sopenharmony_ci_EXCLUDE_DIRS = ['.git', 'Documentation', 'include', 'tools']
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_cidef parse_arguments():
2962306a36Sopenharmony_ci    """Sets up and parses command-line arguments.
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci    Returns:
3262306a36Sopenharmony_ci        log_level: A logging level to filter log output.
3362306a36Sopenharmony_ci        directory: The work directory where the objects were built.
3462306a36Sopenharmony_ci        ar: Command used for parsing .a archives.
3562306a36Sopenharmony_ci        output: Where to write the compile-commands JSON file.
3662306a36Sopenharmony_ci        paths: The list of files/directories to handle to find .cmd files.
3762306a36Sopenharmony_ci    """
3862306a36Sopenharmony_ci    usage = 'Creates a compile_commands.json database from kernel .cmd files'
3962306a36Sopenharmony_ci    parser = argparse.ArgumentParser(description=usage)
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci    directory_help = ('specify the output directory used for the kernel build '
4262306a36Sopenharmony_ci                      '(defaults to the working directory)')
4362306a36Sopenharmony_ci    parser.add_argument('-d', '--directory', type=str, default='.',
4462306a36Sopenharmony_ci                        help=directory_help)
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci    output_help = ('path to the output command database (defaults to ' +
4762306a36Sopenharmony_ci                   _DEFAULT_OUTPUT + ')')
4862306a36Sopenharmony_ci    parser.add_argument('-o', '--output', type=str, default=_DEFAULT_OUTPUT,
4962306a36Sopenharmony_ci                        help=output_help)
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci    log_level_help = ('the level of log messages to produce (defaults to ' +
5262306a36Sopenharmony_ci                      _DEFAULT_LOG_LEVEL + ')')
5362306a36Sopenharmony_ci    parser.add_argument('--log_level', choices=_VALID_LOG_LEVELS,
5462306a36Sopenharmony_ci                        default=_DEFAULT_LOG_LEVEL, help=log_level_help)
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci    ar_help = 'command used for parsing .a archives'
5762306a36Sopenharmony_ci    parser.add_argument('-a', '--ar', type=str, default='llvm-ar', help=ar_help)
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci    paths_help = ('directories to search or files to parse '
6062306a36Sopenharmony_ci                  '(files should be *.o, *.a, or modules.order). '
6162306a36Sopenharmony_ci                  'If nothing is specified, the current directory is searched')
6262306a36Sopenharmony_ci    parser.add_argument('paths', type=str, nargs='*', help=paths_help)
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci    args = parser.parse_args()
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci    return (args.log_level,
6762306a36Sopenharmony_ci            os.path.abspath(args.directory),
6862306a36Sopenharmony_ci            args.output,
6962306a36Sopenharmony_ci            args.ar,
7062306a36Sopenharmony_ci            args.paths if len(args.paths) > 0 else [args.directory])
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_cidef cmdfiles_in_dir(directory):
7462306a36Sopenharmony_ci    """Generate the iterator of .cmd files found under the directory.
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci    Walk under the given directory, and yield every .cmd file found.
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci    Args:
7962306a36Sopenharmony_ci        directory: The directory to search for .cmd files.
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci    Yields:
8262306a36Sopenharmony_ci        The path to a .cmd file.
8362306a36Sopenharmony_ci    """
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci    filename_matcher = re.compile(_FILENAME_PATTERN)
8662306a36Sopenharmony_ci    exclude_dirs = [ os.path.join(directory, d) for d in _EXCLUDE_DIRS ]
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci    for dirpath, dirnames, filenames in os.walk(directory, topdown=True):
8962306a36Sopenharmony_ci        # Prune unwanted directories.
9062306a36Sopenharmony_ci        if dirpath in exclude_dirs:
9162306a36Sopenharmony_ci            dirnames[:] = []
9262306a36Sopenharmony_ci            continue
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci        for filename in filenames:
9562306a36Sopenharmony_ci            if filename_matcher.match(filename):
9662306a36Sopenharmony_ci                yield os.path.join(dirpath, filename)
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_cidef to_cmdfile(path):
10062306a36Sopenharmony_ci    """Return the path of .cmd file used for the given build artifact
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci    Args:
10362306a36Sopenharmony_ci        Path: file path
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci    Returns:
10662306a36Sopenharmony_ci        The path to .cmd file
10762306a36Sopenharmony_ci    """
10862306a36Sopenharmony_ci    dir, base = os.path.split(path)
10962306a36Sopenharmony_ci    return os.path.join(dir, '.' + base + '.cmd')
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_cidef cmdfiles_for_a(archive, ar):
11362306a36Sopenharmony_ci    """Generate the iterator of .cmd files associated with the archive.
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci    Parse the given archive, and yield every .cmd file used to build it.
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci    Args:
11862306a36Sopenharmony_ci        archive: The archive to parse
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci    Yields:
12162306a36Sopenharmony_ci        The path to every .cmd file found
12262306a36Sopenharmony_ci    """
12362306a36Sopenharmony_ci    for obj in subprocess.check_output([ar, '-t', archive]).decode().split():
12462306a36Sopenharmony_ci        yield to_cmdfile(obj)
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_cidef cmdfiles_for_modorder(modorder):
12862306a36Sopenharmony_ci    """Generate the iterator of .cmd files associated with the modules.order.
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci    Parse the given modules.order, and yield every .cmd file used to build the
13162306a36Sopenharmony_ci    contained modules.
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci    Args:
13462306a36Sopenharmony_ci        modorder: The modules.order file to parse
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci    Yields:
13762306a36Sopenharmony_ci        The path to every .cmd file found
13862306a36Sopenharmony_ci    """
13962306a36Sopenharmony_ci    with open(modorder) as f:
14062306a36Sopenharmony_ci        for line in f:
14162306a36Sopenharmony_ci            obj = line.rstrip()
14262306a36Sopenharmony_ci            base, ext = os.path.splitext(obj)
14362306a36Sopenharmony_ci            if ext != '.o':
14462306a36Sopenharmony_ci                sys.exit('{}: module path must end with .o'.format(obj))
14562306a36Sopenharmony_ci            mod = base + '.mod'
14662306a36Sopenharmony_ci            # Read from *.mod, to get a list of objects that compose the module.
14762306a36Sopenharmony_ci            with open(mod) as m:
14862306a36Sopenharmony_ci                for mod_line in m:
14962306a36Sopenharmony_ci                    yield to_cmdfile(mod_line.rstrip())
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_cidef process_line(root_directory, command_prefix, file_path):
15362306a36Sopenharmony_ci    """Extracts information from a .cmd line and creates an entry from it.
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci    Args:
15662306a36Sopenharmony_ci        root_directory: The directory that was searched for .cmd files. Usually
15762306a36Sopenharmony_ci            used directly in the "directory" entry in compile_commands.json.
15862306a36Sopenharmony_ci        command_prefix: The extracted command line, up to the last element.
15962306a36Sopenharmony_ci        file_path: The .c file from the end of the extracted command.
16062306a36Sopenharmony_ci            Usually relative to root_directory, but sometimes absolute.
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci    Returns:
16362306a36Sopenharmony_ci        An entry to append to compile_commands.
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci    Raises:
16662306a36Sopenharmony_ci        ValueError: Could not find the extracted file based on file_path and
16762306a36Sopenharmony_ci            root_directory or file_directory.
16862306a36Sopenharmony_ci    """
16962306a36Sopenharmony_ci    # The .cmd files are intended to be included directly by Make, so they
17062306a36Sopenharmony_ci    # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the
17162306a36Sopenharmony_ci    # kernel version). The compile_commands.json file is not interepreted
17262306a36Sopenharmony_ci    # by Make, so this code replaces the escaped version with '#'.
17362306a36Sopenharmony_ci    prefix = command_prefix.replace(r'\#', '#').replace('$(pound)', '#')
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci    # Use os.path.abspath() to normalize the path resolving '.' and '..' .
17662306a36Sopenharmony_ci    abs_path = os.path.abspath(os.path.join(root_directory, file_path))
17762306a36Sopenharmony_ci    if not os.path.exists(abs_path):
17862306a36Sopenharmony_ci        raise ValueError('File %s not found' % abs_path)
17962306a36Sopenharmony_ci    return {
18062306a36Sopenharmony_ci        'directory': root_directory,
18162306a36Sopenharmony_ci        'file': abs_path,
18262306a36Sopenharmony_ci        'command': prefix + file_path,
18362306a36Sopenharmony_ci    }
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_cidef main():
18762306a36Sopenharmony_ci    """Walks through the directory and finds and parses .cmd files."""
18862306a36Sopenharmony_ci    log_level, directory, output, ar, paths = parse_arguments()
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci    level = getattr(logging, log_level)
19162306a36Sopenharmony_ci    logging.basicConfig(format='%(levelname)s: %(message)s', level=level)
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci    line_matcher = re.compile(_LINE_PATTERN)
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci    compile_commands = []
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci    for path in paths:
19862306a36Sopenharmony_ci        # If 'path' is a directory, handle all .cmd files under it.
19962306a36Sopenharmony_ci        # Otherwise, handle .cmd files associated with the file.
20062306a36Sopenharmony_ci        # built-in objects are linked via vmlinux.a
20162306a36Sopenharmony_ci        # Modules are listed in modules.order.
20262306a36Sopenharmony_ci        if os.path.isdir(path):
20362306a36Sopenharmony_ci            cmdfiles = cmdfiles_in_dir(path)
20462306a36Sopenharmony_ci        elif path.endswith('.a'):
20562306a36Sopenharmony_ci            cmdfiles = cmdfiles_for_a(path, ar)
20662306a36Sopenharmony_ci        elif path.endswith('modules.order'):
20762306a36Sopenharmony_ci            cmdfiles = cmdfiles_for_modorder(path)
20862306a36Sopenharmony_ci        else:
20962306a36Sopenharmony_ci            sys.exit('{}: unknown file type'.format(path))
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci        for cmdfile in cmdfiles:
21262306a36Sopenharmony_ci            with open(cmdfile, 'rt') as f:
21362306a36Sopenharmony_ci                result = line_matcher.match(f.readline())
21462306a36Sopenharmony_ci                if result:
21562306a36Sopenharmony_ci                    try:
21662306a36Sopenharmony_ci                        entry = process_line(directory, result.group(1),
21762306a36Sopenharmony_ci                                             result.group(2))
21862306a36Sopenharmony_ci                        compile_commands.append(entry)
21962306a36Sopenharmony_ci                    except ValueError as err:
22062306a36Sopenharmony_ci                        logging.info('Could not add line from %s: %s',
22162306a36Sopenharmony_ci                                     cmdfile, err)
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci    with open(output, 'wt') as f:
22462306a36Sopenharmony_ci        json.dump(compile_commands, f, indent=2, sort_keys=True)
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ciif __name__ == '__main__':
22862306a36Sopenharmony_ci    main()
229