1cb93a386Sopenharmony_ci#!/usr/bin/env python
2cb93a386Sopenharmony_ci# Copyright (c) 2015 The Chromium Authors. All rights reserved.
3cb93a386Sopenharmony_ci# Use of this source code is governed by a BSD-style license that can be
4cb93a386Sopenharmony_ci# found in the LICENSE file.
5cb93a386Sopenharmony_ci
6cb93a386Sopenharmony_ci
7cb93a386Sopenharmony_ci"""Parse an LLVM coverage report to generate useable results."""
8cb93a386Sopenharmony_ci
9cb93a386Sopenharmony_ci
10cb93a386Sopenharmony_ciimport argparse
11cb93a386Sopenharmony_ciimport json
12cb93a386Sopenharmony_ciimport os
13cb93a386Sopenharmony_ciimport re
14cb93a386Sopenharmony_ciimport subprocess
15cb93a386Sopenharmony_ciimport sys
16cb93a386Sopenharmony_ci
17cb93a386Sopenharmony_ci
18cb93a386Sopenharmony_cidef _fix_filename(filename):
19cb93a386Sopenharmony_ci  """Return a filename which we can use to identify the file.
20cb93a386Sopenharmony_ci
21cb93a386Sopenharmony_ci  The file paths printed by llvm-cov take the form:
22cb93a386Sopenharmony_ci
23cb93a386Sopenharmony_ci      /path/to/repo/out/dir/../../src/filename.cpp
24cb93a386Sopenharmony_ci
25cb93a386Sopenharmony_ci  And then they're truncated to 22 characters with leading ellipses:
26cb93a386Sopenharmony_ci
27cb93a386Sopenharmony_ci      ...../../src/filename.cpp
28cb93a386Sopenharmony_ci
29cb93a386Sopenharmony_ci  This makes it really tough to determine whether the file actually belongs in
30cb93a386Sopenharmony_ci  the Skia repo.  This function strips out the leading junk so that, if the file
31cb93a386Sopenharmony_ci  exists in the repo, the returned string matches the end of some relative path
32cb93a386Sopenharmony_ci  in the repo. This doesn't guarantee correctness, but it's about as close as
33cb93a386Sopenharmony_ci  we can get.
34cb93a386Sopenharmony_ci  """
35cb93a386Sopenharmony_ci  return filename.split('..')[-1].lstrip('./')
36cb93a386Sopenharmony_ci
37cb93a386Sopenharmony_ci
38cb93a386Sopenharmony_cidef _file_in_repo(filename, all_files):
39cb93a386Sopenharmony_ci  """Return the name of the checked-in file matching the given filename.
40cb93a386Sopenharmony_ci
41cb93a386Sopenharmony_ci  Use suffix matching to determine which checked-in files the given filename
42cb93a386Sopenharmony_ci  matches. If there are no matches or multiple matches, return None.
43cb93a386Sopenharmony_ci  """
44cb93a386Sopenharmony_ci  new_file = _fix_filename(filename)
45cb93a386Sopenharmony_ci  matched = []
46cb93a386Sopenharmony_ci  for f in all_files:
47cb93a386Sopenharmony_ci    if f.endswith(new_file):
48cb93a386Sopenharmony_ci      matched.append(f)
49cb93a386Sopenharmony_ci  if len(matched) == 1:
50cb93a386Sopenharmony_ci    return matched[0]
51cb93a386Sopenharmony_ci  elif len(matched) > 1:
52cb93a386Sopenharmony_ci    print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s'
53cb93a386Sopenharmony_ci                          % (new_file, '\n\t'.join(matched)))
54cb93a386Sopenharmony_ci  return None
55cb93a386Sopenharmony_ci
56cb93a386Sopenharmony_ci
57cb93a386Sopenharmony_cidef _get_per_file_per_line_coverage(report):
58cb93a386Sopenharmony_ci  """Return a dict whose keys are file names and values are coverage data.
59cb93a386Sopenharmony_ci
60cb93a386Sopenharmony_ci  Values are lists which take the form (lineno, coverage, code).
61cb93a386Sopenharmony_ci  """
62cb93a386Sopenharmony_ci  all_files = []
63cb93a386Sopenharmony_ci  for root, dirs, files in os.walk(os.getcwd()):
64cb93a386Sopenharmony_ci    if 'third_party/externals' in root:
65cb93a386Sopenharmony_ci      continue
66cb93a386Sopenharmony_ci    files = [f for f in files if not (f[0] == '.' or f.endswith('.pyc'))]
67cb93a386Sopenharmony_ci    dirs[:] = [d for d in dirs if not d[0] == '.']
68cb93a386Sopenharmony_ci    for name in files:
69cb93a386Sopenharmony_ci      all_files.append(os.path.join(root[(len(os.getcwd()) + 1):], name))
70cb93a386Sopenharmony_ci  all_files.sort()
71cb93a386Sopenharmony_ci
72cb93a386Sopenharmony_ci  lines = report.splitlines()
73cb93a386Sopenharmony_ci  current_file = None
74cb93a386Sopenharmony_ci  file_lines = []
75cb93a386Sopenharmony_ci  files = {}
76cb93a386Sopenharmony_ci  not_checked_in = '%' # Use this as the file name for not-checked-in files.
77cb93a386Sopenharmony_ci  for line in lines:
78cb93a386Sopenharmony_ci    m = re.match('([a-zA-Z0-9\./_-]+):', line)
79cb93a386Sopenharmony_ci    if m:
80cb93a386Sopenharmony_ci      if current_file and current_file != not_checked_in:
81cb93a386Sopenharmony_ci        files[current_file] = file_lines
82cb93a386Sopenharmony_ci      match_filename = _file_in_repo(m.groups()[0], all_files)
83cb93a386Sopenharmony_ci      current_file = match_filename or not_checked_in
84cb93a386Sopenharmony_ci      file_lines = []
85cb93a386Sopenharmony_ci    else:
86cb93a386Sopenharmony_ci      if current_file != not_checked_in:
87cb93a386Sopenharmony_ci        skip = re.match('^\s{2}-+$|^\s{2}\|.+$', line)
88cb93a386Sopenharmony_ci        if line and not skip:
89cb93a386Sopenharmony_ci          cov, linenum, code = line.split('|', 2)
90cb93a386Sopenharmony_ci          cov = cov.strip()
91cb93a386Sopenharmony_ci          if cov:
92cb93a386Sopenharmony_ci            cov = int(cov)
93cb93a386Sopenharmony_ci          else:
94cb93a386Sopenharmony_ci            cov = None # We don't care about coverage for this line.
95cb93a386Sopenharmony_ci          linenum = int(linenum.strip())
96cb93a386Sopenharmony_ci          assert linenum == len(file_lines) + 1
97cb93a386Sopenharmony_ci          file_lines.append((linenum, cov, code.decode('utf-8', 'replace')))
98cb93a386Sopenharmony_ci  return files
99cb93a386Sopenharmony_ci
100cb93a386Sopenharmony_ci
101cb93a386Sopenharmony_ci
102cb93a386Sopenharmony_cidef _testname(filename):
103cb93a386Sopenharmony_ci  """Transform the file name into an ingestible test name."""
104cb93a386Sopenharmony_ci  return re.sub(r'[^a-zA-Z0-9]', '_', filename)
105cb93a386Sopenharmony_ci
106cb93a386Sopenharmony_ci
107cb93a386Sopenharmony_cidef _nanobench_json(results, properties, key):
108cb93a386Sopenharmony_ci  """Return the results in JSON format like that produced by nanobench."""
109cb93a386Sopenharmony_ci  rv = {}
110cb93a386Sopenharmony_ci  # Copy over the properties first, then set the 'key' and 'results' keys,
111cb93a386Sopenharmony_ci  # in order to avoid bad formatting in case the user passes in a properties
112cb93a386Sopenharmony_ci  # dict containing those keys.
113cb93a386Sopenharmony_ci  rv.update(properties)
114cb93a386Sopenharmony_ci  rv['key'] = key
115cb93a386Sopenharmony_ci  rv['results'] = {
116cb93a386Sopenharmony_ci    _testname(f): {
117cb93a386Sopenharmony_ci      'coverage': {
118cb93a386Sopenharmony_ci        'percent': percent,
119cb93a386Sopenharmony_ci        'lines_not_covered': not_covered_lines,
120cb93a386Sopenharmony_ci        'options': {
121cb93a386Sopenharmony_ci          'fullname': f,
122cb93a386Sopenharmony_ci          'dir': os.path.dirname(f),
123cb93a386Sopenharmony_ci          'source_type': 'coverage',
124cb93a386Sopenharmony_ci        },
125cb93a386Sopenharmony_ci      },
126cb93a386Sopenharmony_ci    } for percent, not_covered_lines, f in results
127cb93a386Sopenharmony_ci  }
128cb93a386Sopenharmony_ci  return rv
129cb93a386Sopenharmony_ci
130cb93a386Sopenharmony_ci
131cb93a386Sopenharmony_cidef _parse_key_value(kv_list):
132cb93a386Sopenharmony_ci  """Return a dict whose key/value pairs are derived from the given list.
133cb93a386Sopenharmony_ci
134cb93a386Sopenharmony_ci  For example:
135cb93a386Sopenharmony_ci
136cb93a386Sopenharmony_ci      ['k1', 'v1', 'k2', 'v2']
137cb93a386Sopenharmony_ci  becomes:
138cb93a386Sopenharmony_ci
139cb93a386Sopenharmony_ci      {'k1': 'v1',
140cb93a386Sopenharmony_ci       'k2': 'v2'}
141cb93a386Sopenharmony_ci  """
142cb93a386Sopenharmony_ci  if len(kv_list) % 2 != 0:
143cb93a386Sopenharmony_ci    raise Exception('Invalid key/value pairs: %s' % kv_list)
144cb93a386Sopenharmony_ci
145cb93a386Sopenharmony_ci  rv = {}
146cb93a386Sopenharmony_ci  for i in xrange(len(kv_list) / 2):
147cb93a386Sopenharmony_ci    rv[kv_list[i*2]] = kv_list[i*2+1]
148cb93a386Sopenharmony_ci  return rv
149cb93a386Sopenharmony_ci
150cb93a386Sopenharmony_ci
151cb93a386Sopenharmony_cidef _get_per_file_summaries(line_by_line):
152cb93a386Sopenharmony_ci  """Summarize the full line-by-line coverage report by file."""
153cb93a386Sopenharmony_ci  per_file = []
154cb93a386Sopenharmony_ci  for filepath, lines in line_by_line.iteritems():
155cb93a386Sopenharmony_ci    total_lines = 0
156cb93a386Sopenharmony_ci    covered_lines = 0
157cb93a386Sopenharmony_ci    for _, cov, _ in lines:
158cb93a386Sopenharmony_ci      if cov is not None:
159cb93a386Sopenharmony_ci        total_lines += 1
160cb93a386Sopenharmony_ci        if cov > 0:
161cb93a386Sopenharmony_ci          covered_lines += 1
162cb93a386Sopenharmony_ci    if total_lines > 0:
163cb93a386Sopenharmony_ci      per_file.append((float(covered_lines)/float(total_lines)*100.0,
164cb93a386Sopenharmony_ci                       total_lines - covered_lines,
165cb93a386Sopenharmony_ci                       filepath))
166cb93a386Sopenharmony_ci  return per_file
167cb93a386Sopenharmony_ci
168cb93a386Sopenharmony_ci
169cb93a386Sopenharmony_cidef main():
170cb93a386Sopenharmony_ci  """Generate useful data from a coverage report."""
171cb93a386Sopenharmony_ci  # Parse args.
172cb93a386Sopenharmony_ci  parser = argparse.ArgumentParser()
173cb93a386Sopenharmony_ci  parser.add_argument('--report', help='input file; an llvm coverage report.',
174cb93a386Sopenharmony_ci                      required=True)
175cb93a386Sopenharmony_ci  parser.add_argument('--nanobench', help='output file for nanobench data.')
176cb93a386Sopenharmony_ci  parser.add_argument(
177cb93a386Sopenharmony_ci      '--key', metavar='key_or_value', nargs='+',
178cb93a386Sopenharmony_ci      help='key/value pairs identifying this bot.')
179cb93a386Sopenharmony_ci  parser.add_argument(
180cb93a386Sopenharmony_ci      '--properties', metavar='key_or_value', nargs='+',
181cb93a386Sopenharmony_ci      help='key/value pairs representing properties of this build.')
182cb93a386Sopenharmony_ci  parser.add_argument('--linebyline',
183cb93a386Sopenharmony_ci                      help='output file for line-by-line JSON data.')
184cb93a386Sopenharmony_ci  args = parser.parse_args()
185cb93a386Sopenharmony_ci
186cb93a386Sopenharmony_ci  if args.nanobench and not (args.key and args.properties):
187cb93a386Sopenharmony_ci    raise Exception('--key and --properties are required with --nanobench')
188cb93a386Sopenharmony_ci
189cb93a386Sopenharmony_ci  with open(args.report) as f:
190cb93a386Sopenharmony_ci    report = f.read()
191cb93a386Sopenharmony_ci
192cb93a386Sopenharmony_ci  line_by_line = _get_per_file_per_line_coverage(report)
193cb93a386Sopenharmony_ci
194cb93a386Sopenharmony_ci  if args.linebyline:
195cb93a386Sopenharmony_ci    with open(args.linebyline, 'w') as f:
196cb93a386Sopenharmony_ci      json.dump(line_by_line, f)
197cb93a386Sopenharmony_ci
198cb93a386Sopenharmony_ci  if args.nanobench:
199cb93a386Sopenharmony_ci    # Parse the key and properties for use in the nanobench JSON output.
200cb93a386Sopenharmony_ci    key = _parse_key_value(args.key)
201cb93a386Sopenharmony_ci    properties = _parse_key_value(args.properties)
202cb93a386Sopenharmony_ci
203cb93a386Sopenharmony_ci    # Get per-file summaries.
204cb93a386Sopenharmony_ci    per_file = _get_per_file_summaries(line_by_line)
205cb93a386Sopenharmony_ci
206cb93a386Sopenharmony_ci    # Write results.
207cb93a386Sopenharmony_ci    format_results = _nanobench_json(per_file, properties, key)
208cb93a386Sopenharmony_ci    with open(args.nanobench, 'w') as f:
209cb93a386Sopenharmony_ci      json.dump(format_results, f)
210cb93a386Sopenharmony_ci
211cb93a386Sopenharmony_ci
212cb93a386Sopenharmony_ciif __name__ == '__main__':
213cb93a386Sopenharmony_ci  main()
214