1cb93a386Sopenharmony_ci#!/usr/bin/env python 2cb93a386Sopenharmony_ci 3cb93a386Sopenharmony_ciimport argparse 4cb93a386Sopenharmony_ciimport sys 5cb93a386Sopenharmony_ci 6cb93a386Sopenharmony_cihave_scipy = True 7cb93a386Sopenharmony_citry: 8cb93a386Sopenharmony_ci import scipy.stats 9cb93a386Sopenharmony_ciexcept: 10cb93a386Sopenharmony_ci have_scipy = False 11cb93a386Sopenharmony_ci 12cb93a386Sopenharmony_ciSIGNIFICANCE_THRESHOLD = 0.0001 13cb93a386Sopenharmony_ci 14cb93a386Sopenharmony_ciparser = argparse.ArgumentParser( 15cb93a386Sopenharmony_ci formatter_class=argparse.RawDescriptionHelpFormatter, 16cb93a386Sopenharmony_ci description='Compare performance of two runs from nanobench.') 17cb93a386Sopenharmony_ciparser.add_argument('--use_means', action='store_true', default=False, 18cb93a386Sopenharmony_ci help='Use means to calculate performance ratios.') 19cb93a386Sopenharmony_ciparser.add_argument('baseline', help='Baseline file.') 20cb93a386Sopenharmony_ciparser.add_argument('experiment', help='Experiment file.') 21cb93a386Sopenharmony_ciargs = parser.parse_args() 22cb93a386Sopenharmony_ci 23cb93a386Sopenharmony_cia,b = {},{} 24cb93a386Sopenharmony_cifor (path, d) in [(args.baseline, a), (args.experiment, b)]: 25cb93a386Sopenharmony_ci for line in open(path): 26cb93a386Sopenharmony_ci try: 27cb93a386Sopenharmony_ci tokens = line.split() 28cb93a386Sopenharmony_ci if tokens[0] != "Samples:": 29cb93a386Sopenharmony_ci continue 30cb93a386Sopenharmony_ci samples = tokens[1:-1] 31cb93a386Sopenharmony_ci label = tokens[-1] 32cb93a386Sopenharmony_ci d[label] = map(float, samples) 33cb93a386Sopenharmony_ci except: 34cb93a386Sopenharmony_ci pass 35cb93a386Sopenharmony_ci 36cb93a386Sopenharmony_cicommon = set(a.keys()).intersection(b.keys()) 37cb93a386Sopenharmony_ci 38cb93a386Sopenharmony_cidef mean(xs): 39cb93a386Sopenharmony_ci return sum(xs) / len(xs) 40cb93a386Sopenharmony_ci 41cb93a386Sopenharmony_cips = [] 42cb93a386Sopenharmony_cifor key in common: 43cb93a386Sopenharmony_ci p, asem, bsem = 0, 0, 0 44cb93a386Sopenharmony_ci m = mean if args.use_means else min 45cb93a386Sopenharmony_ci am, bm = m(a[key]), m(b[key]) 46cb93a386Sopenharmony_ci if have_scipy: 47cb93a386Sopenharmony_ci _, p = scipy.stats.mannwhitneyu(a[key], b[key]) 48cb93a386Sopenharmony_ci asem, bsem = scipy.stats.sem(a[key]), scipy.stats.sem(b[key]) 49cb93a386Sopenharmony_ci ps.append((bm/am, p, key, am, bm, asem, bsem)) 50cb93a386Sopenharmony_cips.sort(reverse=True) 51cb93a386Sopenharmony_ci 52cb93a386Sopenharmony_cidef humanize(ns): 53cb93a386Sopenharmony_ci for threshold, suffix in [(1e9, 's'), (1e6, 'ms'), (1e3, 'us'), (1e0, 'ns')]: 54cb93a386Sopenharmony_ci if ns > threshold: 55cb93a386Sopenharmony_ci return "%.3g%s" % (ns/threshold, suffix) 56cb93a386Sopenharmony_ci 57cb93a386Sopenharmony_cimaxlen = max(map(len, common)) 58cb93a386Sopenharmony_ci 59cb93a386Sopenharmony_ci# We print only signficant changes in benchmark timing distribution. 60cb93a386Sopenharmony_cibonferroni = SIGNIFICANCE_THRESHOLD / len(ps) # Adjust for the fact we've run multiple tests. 61cb93a386Sopenharmony_cifor ratio, p, key, am, bm, asem, bsem in ps: 62cb93a386Sopenharmony_ci if p < bonferroni: 63cb93a386Sopenharmony_ci str_ratio = ('%.2gx' if ratio < 1 else '%.3gx') % ratio 64cb93a386Sopenharmony_ci if args.use_means: 65cb93a386Sopenharmony_ci print '%*s\t%6s(%6s) -> %6s(%6s)\t%s' % (maxlen, key, humanize(am), humanize(asem), 66cb93a386Sopenharmony_ci humanize(bm), humanize(bsem), str_ratio) 67cb93a386Sopenharmony_ci else: 68cb93a386Sopenharmony_ci print '%*s\t%6s -> %6s\t%s' % (maxlen, key, humanize(am), humanize(bm), str_ratio) 69