1"""report.py - Utilities for reporting statistics about benchmark results
2"""
3
4import unittest
5import os
6import re
7import copy
8import random
9
10from scipy.stats import mannwhitneyu, gmean
11from numpy import array
12
13
14class BenchmarkColor(object):
15    def __init__(self, name, code):
16        self.name = name
17        self.code = code
18
19    def __repr__(self):
20        return '%s%r' % (self.__class__.__name__,
21                         (self.name, self.code))
22
23    def __format__(self, format):
24        return self.code
25
26
27# Benchmark Colors Enumeration
28BC_NONE = BenchmarkColor('NONE', '')
29BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
30BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
31BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
32BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m')
33BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
34BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
35BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
36BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
37BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
38BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
39BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
40
41UTEST_MIN_REPETITIONS = 2
42UTEST_OPTIMAL_REPETITIONS = 9  # Lowest reasonable number, More is better.
43UTEST_COL_NAME = "_pvalue"
44
45_TIME_UNIT_TO_SECONDS_MULTIPLIER = {
46    "s": 1.0,
47    "ms": 1e-3,
48    "us": 1e-6,
49    "ns": 1e-9,
50}
51
52
53def color_format(use_color, fmt_str, *args, **kwargs):
54    """
55    Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
56    'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
57    is False then all color codes in 'args' and 'kwargs' are replaced with
58    the empty string.
59    """
60    assert use_color is True or use_color is False
61    if not use_color:
62        args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
63                for arg in args]
64        kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
65                  for key, arg in kwargs.items()}
66    return fmt_str.format(*args, **kwargs)
67
68
69def find_longest_name(benchmark_list):
70    """
71    Return the length of the longest benchmark name in a given list of
72    benchmark JSON objects
73    """
74    longest_name = 1
75    for bc in benchmark_list:
76        if len(bc['name']) > longest_name:
77            longest_name = len(bc['name'])
78    return longest_name
79
80
81def calculate_change(old_val, new_val):
82    """
83    Return a float representing the decimal change between old_val and new_val.
84    """
85    if old_val == 0 and new_val == 0:
86        return 0.0
87    if old_val == 0:
88        return float(new_val - old_val) / (float(old_val + new_val) / 2)
89    return float(new_val - old_val) / abs(old_val)
90
91
92def filter_benchmark(json_orig, family, replacement=""):
93    """
94    Apply a filter to the json, and only leave the 'family' of benchmarks.
95    """
96    regex = re.compile(family)
97    filtered = {}
98    filtered['benchmarks'] = []
99    for be in json_orig['benchmarks']:
100        if not regex.search(be['name']):
101            continue
102        filteredbench = copy.deepcopy(be)  # Do NOT modify the old name!
103        filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
104        filtered['benchmarks'].append(filteredbench)
105    return filtered
106
107
108def get_unique_benchmark_names(json):
109    """
110    While *keeping* the order, give all the unique 'names' used for benchmarks.
111    """
112    seen = set()
113    uniqued = [x['name'] for x in json['benchmarks']
114               if x['name'] not in seen and
115               (seen.add(x['name']) or True)]
116    return uniqued
117
118
119def intersect(list1, list2):
120    """
121    Given two lists, get a new list consisting of the elements only contained
122    in *both of the input lists*, while preserving the ordering.
123    """
124    return [x for x in list1 if x in list2]
125
126
127def is_potentially_comparable_benchmark(x):
128    return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x)
129
130
131def partition_benchmarks(json1, json2):
132    """
133    While preserving the ordering, find benchmarks with the same names in
134    both of the inputs, and group them.
135    (i.e. partition/filter into groups with common name)
136    """
137    json1_unique_names = get_unique_benchmark_names(json1)
138    json2_unique_names = get_unique_benchmark_names(json2)
139    names = intersect(json1_unique_names, json2_unique_names)
140    partitions = []
141    for name in names:
142        time_unit = None
143        # Pick the time unit from the first entry of the lhs benchmark.
144        # We should be careful not to crash with unexpected input.
145        for x in json1['benchmarks']:
146            if (x['name'] == name and is_potentially_comparable_benchmark(x)):
147                time_unit = x['time_unit']
148                break
149        if time_unit is None:
150            continue
151        # Filter by name and time unit.
152        # All the repetitions are assumed to be comparable.
153        lhs = [x for x in json1['benchmarks'] if x['name'] == name and
154               x['time_unit'] == time_unit]
155        rhs = [x for x in json2['benchmarks'] if x['name'] == name and
156               x['time_unit'] == time_unit]
157        partitions.append([lhs, rhs])
158    return partitions
159
160
161def get_timedelta_field_as_seconds(benchmark, field_name):
162    """
163    Get value of field_name field of benchmark, which is time with time unit
164    time_unit, as time in seconds.
165    """
166    timedelta = benchmark[field_name]
167    time_unit = benchmark.get('time_unit', 's')
168    return timedelta * _TIME_UNIT_TO_SECONDS_MULTIPLIER.get(time_unit)
169
170
171def calculate_geomean(json):
172    """
173    Extract all real/cpu times from all the benchmarks as seconds,
174    and calculate their geomean.
175    """
176    times = []
177    for benchmark in json['benchmarks']:
178        if 'run_type' in benchmark and benchmark['run_type'] == 'aggregate':
179            continue
180        times.append([get_timedelta_field_as_seconds(benchmark, 'real_time'),
181                      get_timedelta_field_as_seconds(benchmark, 'cpu_time')])
182    return gmean(times) if times else array([])
183
184
185def extract_field(partition, field_name):
186    # The count of elements may be different. We want *all* of them.
187    lhs = [x[field_name] for x in partition[0]]
188    rhs = [x[field_name] for x in partition[1]]
189    return [lhs, rhs]
190
191
192def calc_utest(timings_cpu, timings_time):
193    min_rep_cnt = min(len(timings_time[0]),
194                      len(timings_time[1]),
195                      len(timings_cpu[0]),
196                      len(timings_cpu[1]))
197
198    # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
199    if min_rep_cnt < UTEST_MIN_REPETITIONS:
200        return False, None, None
201
202    time_pvalue = mannwhitneyu(
203        timings_time[0], timings_time[1], alternative='two-sided').pvalue
204    cpu_pvalue = mannwhitneyu(
205        timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue
206
207    return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
208
209
210def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True):
211    def get_utest_color(pval):
212        return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
213
214    # Check if we failed miserably with minimum required repetitions for utest
215    if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None:
216        return []
217
218    dsc = "U Test, Repetitions: {} vs {}".format(
219        utest['nr_of_repetitions'], utest['nr_of_repetitions_other'])
220    dsc_color = BC_OKGREEN
221
222    # We still got some results to show but issue a warning about it.
223    if not utest['have_optimal_repetitions']:
224        dsc_color = BC_WARNING
225        dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
226            UTEST_OPTIMAL_REPETITIONS)
227
228    special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{}      {}"
229
230    return [color_format(use_color,
231                         special_str,
232                         BC_HEADER,
233                         "{}{}".format(bc_name, UTEST_COL_NAME),
234                         first_col_width,
235                         get_utest_color(
236                             utest['time_pvalue']), utest['time_pvalue'],
237                         get_utest_color(
238                             utest['cpu_pvalue']), utest['cpu_pvalue'],
239                         dsc_color, dsc,
240                         endc=BC_ENDC)]
241
242
243def get_difference_report(
244        json1,
245        json2,
246        utest=False):
247    """
248    Calculate and report the difference between each test of two benchmarks
249    runs specified as 'json1' and 'json2'. Output is another json containing
250    relevant details for each test run.
251    """
252    assert utest is True or utest is False
253
254    diff_report = []
255    partitions = partition_benchmarks(json1, json2)
256    for partition in partitions:
257        benchmark_name = partition[0][0]['name']
258        label = partition[0][0]['label'] if 'label' in partition[0][0] else ''
259        time_unit = partition[0][0]['time_unit']
260        measurements = []
261        utest_results = {}
262        # Careful, we may have different repetition count.
263        for i in range(min(len(partition[0]), len(partition[1]))):
264            bn = partition[0][i]
265            other_bench = partition[1][i]
266            measurements.append({
267                'real_time': bn['real_time'],
268                'cpu_time': bn['cpu_time'],
269                'real_time_other': other_bench['real_time'],
270                'cpu_time_other': other_bench['cpu_time'],
271                'time': calculate_change(bn['real_time'], other_bench['real_time']),
272                'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time'])
273            })
274
275        # After processing the whole partition, if requested, do the U test.
276        if utest:
277            timings_cpu = extract_field(partition, 'cpu_time')
278            timings_time = extract_field(partition, 'real_time')
279            have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(
280                timings_cpu, timings_time)
281            if cpu_pvalue and time_pvalue:
282                utest_results = {
283                    'have_optimal_repetitions': have_optimal_repetitions,
284                    'cpu_pvalue': cpu_pvalue,
285                    'time_pvalue': time_pvalue,
286                    'nr_of_repetitions': len(timings_cpu[0]),
287                    'nr_of_repetitions_other': len(timings_cpu[1])
288                }
289
290        # Store only if we had any measurements for given benchmark.
291        # E.g. partition_benchmarks will filter out the benchmarks having
292        # time units which are not compatible with other time units in the
293        # benchmark suite.
294        if measurements:
295            run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else ''
296            aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else ''
297            diff_report.append({
298                'name': benchmark_name,
299                'label': label,
300                'measurements': measurements,
301                'time_unit': time_unit,
302                'run_type': run_type,
303                'aggregate_name': aggregate_name,
304                'utest': utest_results
305            })
306
307    lhs_gmean = calculate_geomean(json1)
308    rhs_gmean = calculate_geomean(json2)
309    if lhs_gmean.any() and rhs_gmean.any():
310        diff_report.append({
311            'name': 'OVERALL_GEOMEAN',
312            'label': '',
313            'measurements': [{
314                'real_time': lhs_gmean[0],
315                'cpu_time': lhs_gmean[1],
316                'real_time_other': rhs_gmean[0],
317                'cpu_time_other': rhs_gmean[1],
318                'time': calculate_change(lhs_gmean[0], rhs_gmean[0]),
319                'cpu': calculate_change(lhs_gmean[1], rhs_gmean[1])
320            }],
321            'time_unit': 's',
322            'run_type': 'aggregate',
323            'aggregate_name': 'geomean',
324            'utest': {}
325        })
326
327    return diff_report
328
329
330def print_difference_report(
331        json_diff_report,
332        include_aggregates_only=False,
333        utest=False,
334        utest_alpha=0.05,
335        use_color=True):
336    """
337    Calculate and report the difference between each test of two benchmarks
338    runs specified as 'json1' and 'json2'.
339    """
340    assert utest is True or utest is False
341
342    def get_color(res):
343        if res > 0.05:
344            return BC_FAIL
345        elif res > -0.07:
346            return BC_WHITE
347        else:
348            return BC_CYAN
349
350    first_col_width = find_longest_name(json_diff_report)
351    first_col_width = max(
352        first_col_width,
353        len('Benchmark'))
354    first_col_width += len(UTEST_COL_NAME)
355    first_line = "{:<{}s}Time             CPU      Time Old      Time New       CPU Old       CPU New".format(
356        'Benchmark', 12 + first_col_width)
357    output_strs = [first_line, '-' * len(first_line)]
358
359    fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
360    for benchmark in json_diff_report:
361        # *If* we were asked to only include aggregates,
362        # and if it is non-aggregate, then don't print it.
363        if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate':
364            for measurement in benchmark['measurements']:
365                output_strs += [color_format(use_color,
366                                             fmt_str,
367                                             BC_HEADER,
368                                             benchmark['name'],
369                                             first_col_width,
370                                             get_color(measurement['time']),
371                                             measurement['time'],
372                                             get_color(measurement['cpu']),
373                                             measurement['cpu'],
374                                             measurement['real_time'],
375                                             measurement['real_time_other'],
376                                             measurement['cpu_time'],
377                                             measurement['cpu_time_other'],
378                                             endc=BC_ENDC)]
379
380        # After processing the measurements, if requested and
381        # if applicable (e.g. u-test exists for given benchmark),
382        # print the U test.
383        if utest and benchmark['utest']:
384            output_strs += print_utest(benchmark['name'],
385                                       benchmark['utest'],
386                                       utest_alpha=utest_alpha,
387                                       first_col_width=first_col_width,
388                                       use_color=use_color)
389
390    return output_strs
391
392
393###############################################################################
394# Unit tests
395
396
397class TestGetUniqueBenchmarkNames(unittest.TestCase):
398    def load_results(self):
399        import json
400        testInputs = os.path.join(
401            os.path.dirname(
402                os.path.realpath(__file__)),
403            'Inputs')
404        testOutput = os.path.join(testInputs, 'test3_run0.json')
405        with open(testOutput, 'r') as f:
406            json = json.load(f)
407        return json
408
409    def test_basic(self):
410        expect_lines = [
411            'BM_One',
412            'BM_Two',
413            'short',  # These two are not sorted
414            'medium',  # These two are not sorted
415        ]
416        json = self.load_results()
417        output_lines = get_unique_benchmark_names(json)
418        print("\n")
419        print("\n".join(output_lines))
420        self.assertEqual(len(output_lines), len(expect_lines))
421        for i in range(0, len(output_lines)):
422            self.assertEqual(expect_lines[i], output_lines[i])
423
424
425class TestReportDifference(unittest.TestCase):
426    @classmethod
427    def setUpClass(cls):
428        def load_results():
429            import json
430            testInputs = os.path.join(
431                os.path.dirname(
432                    os.path.realpath(__file__)),
433                'Inputs')
434            testOutput1 = os.path.join(testInputs, 'test1_run1.json')
435            testOutput2 = os.path.join(testInputs, 'test1_run2.json')
436            with open(testOutput1, 'r') as f:
437                json1 = json.load(f)
438            with open(testOutput2, 'r') as f:
439                json2 = json.load(f)
440            return json1, json2
441
442        json1, json2 = load_results()
443        cls.json_diff_report = get_difference_report(json1, json2)
444
445    def test_json_diff_report_pretty_printing(self):
446        expect_lines = [
447            ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
448            ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
449            ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
450            ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
451            ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
452            ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
453            ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
454            ['BM_100xSlower', '+99.0000', '+99.0000',
455                '100', '10000', '100', '10000'],
456            ['BM_100xFaster', '-0.9900', '-0.9900',
457                '10000', '100', '10000', '100'],
458            ['BM_10PercentCPUToTime', '+0.1000',
459                '-0.1000', '100', '110', '100', '90'],
460            ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
461            ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
462            ['BM_hasLabel', '+0.0000', '+0.0000', '1', '1', '1', '1'],
463            ['OVERALL_GEOMEAN', '-0.8113', '-0.7779', '0', '0', '0', '0']
464        ]
465        output_lines_with_header = print_difference_report(
466            self.json_diff_report, use_color=False)
467        output_lines = output_lines_with_header[2:]
468        print("\n")
469        print("\n".join(output_lines_with_header))
470        self.assertEqual(len(output_lines), len(expect_lines))
471        for i in range(0, len(output_lines)):
472            parts = [x for x in output_lines[i].split(' ') if x]
473            self.assertEqual(len(parts), 7)
474            self.assertEqual(expect_lines[i], parts)
475
476    def test_json_diff_report_output(self):
477        expected_output = [
478            {
479                'name': 'BM_SameTimes',
480                'label': '',
481                'measurements': [{'time': 0.0000, 'cpu': 0.0000,
482                                  'real_time': 10, 'real_time_other': 10,
483                                  'cpu_time': 10, 'cpu_time_other': 10}],
484                'time_unit': 'ns',
485                'utest': {}
486            },
487            {
488                'name': 'BM_2xFaster',
489                'label': '',
490                'measurements': [{'time': -0.5000, 'cpu': -0.5000,
491                                  'real_time': 50, 'real_time_other': 25,
492                                  'cpu_time': 50, 'cpu_time_other': 25}],
493                'time_unit': 'ns',
494                'utest': {}
495            },
496            {
497                'name': 'BM_2xSlower',
498                'label': '',
499                'measurements': [{'time': 1.0000, 'cpu': 1.0000,
500                                  'real_time': 50, 'real_time_other': 100,
501                                  'cpu_time': 50, 'cpu_time_other': 100}],
502                'time_unit': 'ns',
503                'utest': {}
504            },
505            {
506                'name': 'BM_1PercentFaster',
507                'label': '',
508                'measurements': [{'time': -0.0100, 'cpu': -0.0100,
509                                  'real_time': 100, 'real_time_other': 98.9999999,
510                                  'cpu_time': 100, 'cpu_time_other': 98.9999999}],
511                'time_unit': 'ns',
512                'utest': {}
513            },
514            {
515                'name': 'BM_1PercentSlower',
516                'label': '',
517                'measurements': [{'time': 0.0100, 'cpu': 0.0100,
518                                  'real_time': 100, 'real_time_other': 101,
519                                  'cpu_time': 100, 'cpu_time_other': 101}],
520                'time_unit': 'ns',
521                'utest': {}
522            },
523            {
524                'name': 'BM_10PercentFaster',
525                'label': '',
526                'measurements': [{'time': -0.1000, 'cpu': -0.1000,
527                                  'real_time': 100, 'real_time_other': 90,
528                                  'cpu_time': 100, 'cpu_time_other': 90}],
529                'time_unit': 'ns',
530                'utest': {}
531            },
532            {
533                'name': 'BM_10PercentSlower',
534                'label': '',
535                'measurements': [{'time': 0.1000, 'cpu': 0.1000,
536                                  'real_time': 100, 'real_time_other': 110,
537                                  'cpu_time': 100, 'cpu_time_other': 110}],
538                'time_unit': 'ns',
539                'utest': {}
540            },
541            {
542                'name': 'BM_100xSlower',
543                'label': '',
544                'measurements': [{'time': 99.0000, 'cpu': 99.0000,
545                                  'real_time': 100, 'real_time_other': 10000,
546                                  'cpu_time': 100, 'cpu_time_other': 10000}],
547                'time_unit': 'ns',
548                'utest': {}
549            },
550            {
551                'name': 'BM_100xFaster',
552                'label': '',
553                'measurements': [{'time': -0.9900, 'cpu': -0.9900,
554                                  'real_time': 10000, 'real_time_other': 100,
555                                  'cpu_time': 10000, 'cpu_time_other': 100}],
556                'time_unit': 'ns',
557                'utest': {}
558            },
559            {
560                'name': 'BM_10PercentCPUToTime',
561                'label': '',
562                'measurements': [{'time': 0.1000, 'cpu': -0.1000,
563                                  'real_time': 100, 'real_time_other': 110,
564                                  'cpu_time': 100, 'cpu_time_other': 90}],
565                'time_unit': 'ns',
566                'utest': {}
567            },
568            {
569                'name': 'BM_ThirdFaster',
570                'label': '',
571                'measurements': [{'time': -0.3333, 'cpu': -0.3334,
572                                  'real_time': 100, 'real_time_other': 67,
573                                  'cpu_time': 100, 'cpu_time_other': 67}],
574                'time_unit': 'ns',
575                'utest': {}
576            },
577            {
578                'name': 'BM_NotBadTimeUnit',
579                'label': '',
580                'measurements': [{'time': -0.9000, 'cpu': 0.2000,
581                                  'real_time': 0.4, 'real_time_other': 0.04,
582                                  'cpu_time': 0.5, 'cpu_time_other': 0.6}],
583                'time_unit': 's',
584                'utest': {}
585            },
586            {
587                'name': 'BM_hasLabel',
588                'label': 'a label',
589                'measurements': [{'time': 0.0000, 'cpu': 0.0000,
590                                  'real_time': 1, 'real_time_other': 1,
591                                  'cpu_time': 1, 'cpu_time_other': 1}],
592                'time_unit': 's',
593                'utest': {}
594            },
595            {
596                'name': 'OVERALL_GEOMEAN',
597                'label': '',
598                'measurements': [{'real_time': 3.1622776601683826e-06, 'cpu_time': 3.2130844755623912e-06,
599                                  'real_time_other': 1.9768988699420897e-07, 'cpu_time_other': 2.397447755209533e-07,
600                                  'time': -0.8112976497120911, 'cpu': -0.7778551721181174}],
601                'time_unit': 's',
602                'run_type': 'aggregate',
603                'aggregate_name': 'geomean', 'utest': {}
604            },
605        ]
606        self.assertEqual(len(self.json_diff_report), len(expected_output))
607        for out, expected in zip(
608                self.json_diff_report, expected_output):
609            self.assertEqual(out['name'], expected['name'])
610            self.assertEqual(out['label'], expected['label'])
611            self.assertEqual(out['time_unit'], expected['time_unit'])
612            assert_utest(self, out, expected)
613            assert_measurements(self, out, expected)
614
615
616class TestReportDifferenceBetweenFamilies(unittest.TestCase):
617    @classmethod
618    def setUpClass(cls):
619        def load_result():
620            import json
621            testInputs = os.path.join(
622                os.path.dirname(
623                    os.path.realpath(__file__)),
624                'Inputs')
625            testOutput = os.path.join(testInputs, 'test2_run.json')
626            with open(testOutput, 'r') as f:
627                json = json.load(f)
628            return json
629
630        json = load_result()
631        json1 = filter_benchmark(json, "BM_Z.ro", ".")
632        json2 = filter_benchmark(json, "BM_O.e", ".")
633        cls.json_diff_report = get_difference_report(json1, json2)
634
635    def test_json_diff_report_pretty_printing(self):
636        expect_lines = [
637            ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
638            ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
639            ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
640            ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
641            ['OVERALL_GEOMEAN', '-0.5000', '-0.5000', '0', '0', '0', '0']
642        ]
643        output_lines_with_header = print_difference_report(
644            self.json_diff_report, use_color=False)
645        output_lines = output_lines_with_header[2:]
646        print("\n")
647        print("\n".join(output_lines_with_header))
648        self.assertEqual(len(output_lines), len(expect_lines))
649        for i in range(0, len(output_lines)):
650            parts = [x for x in output_lines[i].split(' ') if x]
651            self.assertEqual(len(parts), 7)
652            self.assertEqual(expect_lines[i], parts)
653
654    def test_json_diff_report(self):
655        expected_output = [
656            {
657                'name': u'.',
658                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}],
659                'time_unit': 'ns',
660                'utest': {}
661            },
662            {
663                'name': u'./4',
664                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}],
665                'time_unit': 'ns',
666                'utest': {},
667            },
668            {
669                'name': u'Prefix/.',
670                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}],
671                'time_unit': 'ns',
672                'utest': {}
673            },
674            {
675                'name': u'Prefix/./3',
676                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}],
677                'time_unit': 'ns',
678                'utest': {}
679            },
680            {
681                'name': 'OVERALL_GEOMEAN',
682                'measurements': [{'real_time': 2.213363839400641e-08, 'cpu_time': 2.213363839400641e-08,
683                                  'real_time_other': 1.1066819197003185e-08, 'cpu_time_other': 1.1066819197003185e-08,
684                                  'time': -0.5000000000000009, 'cpu': -0.5000000000000009}],
685                'time_unit': 's',
686                'run_type': 'aggregate',
687                'aggregate_name': 'geomean',
688                'utest': {}
689            }
690        ]
691        self.assertEqual(len(self.json_diff_report), len(expected_output))
692        for out, expected in zip(
693                self.json_diff_report, expected_output):
694            self.assertEqual(out['name'], expected['name'])
695            self.assertEqual(out['time_unit'], expected['time_unit'])
696            assert_utest(self, out, expected)
697            assert_measurements(self, out, expected)
698
699
700class TestReportDifferenceWithUTest(unittest.TestCase):
701    @classmethod
702    def setUpClass(cls):
703        def load_results():
704            import json
705            testInputs = os.path.join(
706                os.path.dirname(
707                    os.path.realpath(__file__)),
708                'Inputs')
709            testOutput1 = os.path.join(testInputs, 'test3_run0.json')
710            testOutput2 = os.path.join(testInputs, 'test3_run1.json')
711            with open(testOutput1, 'r') as f:
712                json1 = json.load(f)
713            with open(testOutput2, 'r') as f:
714                json2 = json.load(f)
715            return json1, json2
716
717        json1, json2 = load_results()
718        cls.json_diff_report = get_difference_report(
719            json1, json2, utest=True)
720
721    def test_json_diff_report_pretty_printing(self):
722        expect_lines = [
723            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
724            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
725            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
726            ['BM_Two_pvalue',
727             '1.0000',
728             '0.6667',
729             'U',
730             'Test,',
731             'Repetitions:',
732             '2',
733             'vs',
734             '2.',
735             'WARNING:',
736             'Results',
737             'unreliable!',
738             '9+',
739             'repetitions',
740             'recommended.'],
741            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
742            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
743            ['short_pvalue',
744             '0.7671',
745             '0.2000',
746             'U',
747             'Test,',
748             'Repetitions:',
749             '2',
750             'vs',
751             '3.',
752             'WARNING:',
753             'Results',
754             'unreliable!',
755             '9+',
756             'repetitions',
757             'recommended.'],
758            ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
759            ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0']
760        ]
761        output_lines_with_header = print_difference_report(
762            self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False)
763        output_lines = output_lines_with_header[2:]
764        print("\n")
765        print("\n".join(output_lines_with_header))
766        self.assertEqual(len(output_lines), len(expect_lines))
767        for i in range(0, len(output_lines)):
768            parts = [x for x in output_lines[i].split(' ') if x]
769            self.assertEqual(expect_lines[i], parts)
770
771    def test_json_diff_report_pretty_printing_aggregates_only(self):
772        expect_lines = [
773            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
774            ['BM_Two_pvalue',
775             '1.0000',
776             '0.6667',
777             'U',
778             'Test,',
779             'Repetitions:',
780             '2',
781             'vs',
782             '2.',
783             'WARNING:',
784             'Results',
785             'unreliable!',
786             '9+',
787             'repetitions',
788             'recommended.'],
789            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
790            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
791            ['short_pvalue',
792             '0.7671',
793             '0.2000',
794             'U',
795             'Test,',
796             'Repetitions:',
797             '2',
798             'vs',
799             '3.',
800             'WARNING:',
801             'Results',
802             'unreliable!',
803             '9+',
804             'repetitions',
805             'recommended.'],
806            ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0']
807        ]
808        output_lines_with_header = print_difference_report(
809            self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False)
810        output_lines = output_lines_with_header[2:]
811        print("\n")
812        print("\n".join(output_lines_with_header))
813        self.assertEqual(len(output_lines), len(expect_lines))
814        for i in range(0, len(output_lines)):
815            parts = [x for x in output_lines[i].split(' ') if x]
816            self.assertEqual(expect_lines[i], parts)
817
818    def test_json_diff_report(self):
819        expected_output = [
820            {
821                'name': u'BM_One',
822                'measurements': [
823                    {'time': -0.1,
824                     'cpu': 0.1,
825                     'real_time': 10,
826                     'real_time_other': 9,
827                     'cpu_time': 100,
828                     'cpu_time_other': 110}
829                ],
830                'time_unit': 'ns',
831                'utest': {}
832            },
833            {
834                'name': u'BM_Two',
835                'measurements': [
836                    {'time': 0.1111111111111111,
837                     'cpu': -0.011111111111111112,
838                     'real_time': 9,
839                     'real_time_other': 10,
840                     'cpu_time': 90,
841                     'cpu_time_other': 89},
842                    {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
843                        'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
844                ],
845                'time_unit': 'ns',
846                'utest': {
847                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0
848                }
849            },
850            {
851                'name': u'short',
852                'measurements': [
853                    {'time': -0.125,
854                     'cpu': -0.0625,
855                     'real_time': 8,
856                     'real_time_other': 7,
857                     'cpu_time': 80,
858                     'cpu_time_other': 75},
859                    {'time': -0.4325,
860                     'cpu': -0.13506493506493514,
861                     'real_time': 8,
862                     'real_time_other': 4.54,
863                     'cpu_time': 77,
864                     'cpu_time_other': 66.6}
865                ],
866                'time_unit': 'ns',
867                'utest': {
868                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772
869                }
870            },
871            {
872                'name': u'medium',
873                'measurements': [
874                    {'time': -0.375,
875                     'cpu': -0.3375,
876                     'real_time': 8,
877                     'real_time_other': 5,
878                     'cpu_time': 80,
879                     'cpu_time_other': 53}
880                ],
881                'time_unit': 'ns',
882                'utest': {}
883            },
884            {
885                'name': 'OVERALL_GEOMEAN',
886                'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08,
887                                  'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08,
888                                  'time': 1.6404861082353634, 'cpu': -0.6984640740519662}],
889                'time_unit': 's',
890                'run_type': 'aggregate',
891                'aggregate_name': 'geomean',
892                'utest': {}
893            }
894        ]
895        self.assertEqual(len(self.json_diff_report), len(expected_output))
896        for out, expected in zip(
897                self.json_diff_report, expected_output):
898            self.assertEqual(out['name'], expected['name'])
899            self.assertEqual(out['time_unit'], expected['time_unit'])
900            assert_utest(self, out, expected)
901            assert_measurements(self, out, expected)
902
903
904class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
905        unittest.TestCase):
906    @classmethod
907    def setUpClass(cls):
908        def load_results():
909            import json
910            testInputs = os.path.join(
911                os.path.dirname(
912                    os.path.realpath(__file__)),
913                'Inputs')
914            testOutput1 = os.path.join(testInputs, 'test3_run0.json')
915            testOutput2 = os.path.join(testInputs, 'test3_run1.json')
916            with open(testOutput1, 'r') as f:
917                json1 = json.load(f)
918            with open(testOutput2, 'r') as f:
919                json2 = json.load(f)
920            return json1, json2
921
922        json1, json2 = load_results()
923        cls.json_diff_report = get_difference_report(
924            json1, json2, utest=True)
925
926    def test_json_diff_report_pretty_printing(self):
927        expect_lines = [
928            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
929            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
930            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
931            ['BM_Two_pvalue',
932             '1.0000',
933             '0.6667',
934             'U',
935             'Test,',
936             'Repetitions:',
937             '2',
938             'vs',
939             '2.',
940             'WARNING:',
941             'Results',
942             'unreliable!',
943             '9+',
944             'repetitions',
945             'recommended.'],
946            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
947            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
948            ['short_pvalue',
949             '0.7671',
950             '0.2000',
951             'U',
952             'Test,',
953             'Repetitions:',
954             '2',
955             'vs',
956             '3.',
957             'WARNING:',
958             'Results',
959             'unreliable!',
960             '9+',
961             'repetitions',
962             'recommended.'],
963            ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
964            ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0']
965        ]
966        output_lines_with_header = print_difference_report(
967            self.json_diff_report,
968            utest=True, utest_alpha=0.05, use_color=False)
969        output_lines = output_lines_with_header[2:]
970        print("\n")
971        print("\n".join(output_lines_with_header))
972        self.assertEqual(len(output_lines), len(expect_lines))
973        for i in range(0, len(output_lines)):
974            parts = [x for x in output_lines[i].split(' ') if x]
975            self.assertEqual(expect_lines[i], parts)
976
977    def test_json_diff_report(self):
978        expected_output = [
979            {
980                'name': u'BM_One',
981                'measurements': [
982                    {'time': -0.1,
983                     'cpu': 0.1,
984                     'real_time': 10,
985                     'real_time_other': 9,
986                     'cpu_time': 100,
987                     'cpu_time_other': 110}
988                ],
989                'time_unit': 'ns',
990                'utest': {}
991            },
992            {
993                'name': u'BM_Two',
994                'measurements': [
995                    {'time': 0.1111111111111111,
996                     'cpu': -0.011111111111111112,
997                     'real_time': 9,
998                     'real_time_other': 10,
999                     'cpu_time': 90,
1000                     'cpu_time_other': 89},
1001                    {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
1002                        'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
1003                ],
1004                'time_unit': 'ns',
1005                'utest': {
1006                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0
1007                }
1008            },
1009            {
1010                'name': u'short',
1011                'measurements': [
1012                    {'time': -0.125,
1013                     'cpu': -0.0625,
1014                     'real_time': 8,
1015                     'real_time_other': 7,
1016                     'cpu_time': 80,
1017                     'cpu_time_other': 75},
1018                    {'time': -0.4325,
1019                     'cpu': -0.13506493506493514,
1020                     'real_time': 8,
1021                     'real_time_other': 4.54,
1022                     'cpu_time': 77,
1023                     'cpu_time_other': 66.6}
1024                ],
1025                'time_unit': 'ns',
1026                'utest': {
1027                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772
1028                }
1029            },
1030            {
1031                'name': u'medium',
1032                'measurements': [
1033                    {'real_time_other': 5,
1034                     'cpu_time': 80,
1035                     'time': -0.375,
1036                     'real_time': 8,
1037                     'cpu_time_other': 53,
1038                     'cpu': -0.3375
1039                     }
1040                ],
1041                'utest': {},
1042                'time_unit': u'ns',
1043                'aggregate_name': ''
1044            },
1045            {
1046                'name': 'OVERALL_GEOMEAN',
1047                'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08,
1048                                  'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08,
1049                                  'time': 1.6404861082353634, 'cpu': -0.6984640740519662}],
1050                'time_unit': 's',
1051                'run_type': 'aggregate',
1052                'aggregate_name': 'geomean',
1053                'utest': {}
1054            }
1055        ]
1056        self.assertEqual(len(self.json_diff_report), len(expected_output))
1057        for out, expected in zip(
1058                self.json_diff_report, expected_output):
1059            self.assertEqual(out['name'], expected['name'])
1060            self.assertEqual(out['time_unit'], expected['time_unit'])
1061            assert_utest(self, out, expected)
1062            assert_measurements(self, out, expected)
1063
1064
1065class TestReportDifferenceForPercentageAggregates(
1066        unittest.TestCase):
1067    @classmethod
1068    def setUpClass(cls):
1069        def load_results():
1070            import json
1071            testInputs = os.path.join(
1072                os.path.dirname(
1073                    os.path.realpath(__file__)),
1074                'Inputs')
1075            testOutput1 = os.path.join(testInputs, 'test4_run0.json')
1076            testOutput2 = os.path.join(testInputs, 'test4_run1.json')
1077            with open(testOutput1, 'r') as f:
1078                json1 = json.load(f)
1079            with open(testOutput2, 'r') as f:
1080                json2 = json.load(f)
1081            return json1, json2
1082
1083        json1, json2 = load_results()
1084        cls.json_diff_report = get_difference_report(
1085            json1, json2, utest=True)
1086
1087    def test_json_diff_report_pretty_printing(self):
1088        expect_lines = [
1089            ['whocares', '-0.5000', '+0.5000', '0', '0', '0', '0']
1090        ]
1091        output_lines_with_header = print_difference_report(
1092            self.json_diff_report,
1093            utest=True, utest_alpha=0.05, use_color=False)
1094        output_lines = output_lines_with_header[2:]
1095        print("\n")
1096        print("\n".join(output_lines_with_header))
1097        self.assertEqual(len(output_lines), len(expect_lines))
1098        for i in range(0, len(output_lines)):
1099            parts = [x for x in output_lines[i].split(' ') if x]
1100            self.assertEqual(expect_lines[i], parts)
1101
1102    def test_json_diff_report(self):
1103        expected_output = [
1104            {
1105                'name': u'whocares',
1106                'measurements': [
1107                    {'time': -0.5,
1108                     'cpu': 0.5,
1109                     'real_time': 0.01,
1110                     'real_time_other': 0.005,
1111                     'cpu_time': 0.10,
1112                     'cpu_time_other': 0.15}
1113                ],
1114                'time_unit': 'ns',
1115                'utest': {}
1116            }
1117        ]
1118        self.assertEqual(len(self.json_diff_report), len(expected_output))
1119        for out, expected in zip(
1120                self.json_diff_report, expected_output):
1121            self.assertEqual(out['name'], expected['name'])
1122            self.assertEqual(out['time_unit'], expected['time_unit'])
1123            assert_utest(self, out, expected)
1124            assert_measurements(self, out, expected)
1125
1126
1127class TestReportSorting(unittest.TestCase):
1128    @classmethod
1129    def setUpClass(cls):
1130        def load_result():
1131            import json
1132            testInputs = os.path.join(
1133                os.path.dirname(
1134                    os.path.realpath(__file__)),
1135                'Inputs')
1136            testOutput = os.path.join(testInputs, 'test4_run.json')
1137            with open(testOutput, 'r') as f:
1138                json = json.load(f)
1139            return json
1140
1141        cls.json = load_result()
1142
1143    def test_json_diff_report_pretty_printing(self):
1144        import util
1145
1146        expected_names = [
1147            "99 family 0 instance 0 repetition 0",
1148            "98 family 0 instance 0 repetition 1",
1149            "97 family 0 instance 0 aggregate",
1150            "96 family 0 instance 1 repetition 0",
1151            "95 family 0 instance 1 repetition 1",
1152            "94 family 0 instance 1 aggregate",
1153            "93 family 1 instance 0 repetition 0",
1154            "92 family 1 instance 0 repetition 1",
1155            "91 family 1 instance 0 aggregate",
1156            "90 family 1 instance 1 repetition 0",
1157            "89 family 1 instance 1 repetition 1",
1158            "88 family 1 instance 1 aggregate"
1159        ]
1160
1161        for n in range(len(self.json['benchmarks']) ** 2):
1162            random.shuffle(self.json['benchmarks'])
1163            sorted_benchmarks = util.sort_benchmark_results(self.json)[
1164                'benchmarks']
1165            self.assertEqual(len(expected_names), len(sorted_benchmarks))
1166            for out, expected in zip(sorted_benchmarks, expected_names):
1167                self.assertEqual(out['name'], expected)
1168
1169
1170def assert_utest(unittest_instance, lhs, rhs):
1171    if lhs['utest']:
1172        unittest_instance.assertAlmostEqual(
1173            lhs['utest']['cpu_pvalue'],
1174            rhs['utest']['cpu_pvalue'])
1175        unittest_instance.assertAlmostEqual(
1176            lhs['utest']['time_pvalue'],
1177            rhs['utest']['time_pvalue'])
1178        unittest_instance.assertEqual(
1179            lhs['utest']['have_optimal_repetitions'],
1180            rhs['utest']['have_optimal_repetitions'])
1181    else:
1182        # lhs is empty. assert if rhs is not.
1183        unittest_instance.assertEqual(lhs['utest'], rhs['utest'])
1184
1185
1186def assert_measurements(unittest_instance, lhs, rhs):
1187    for m1, m2 in zip(lhs['measurements'], rhs['measurements']):
1188        unittest_instance.assertEqual(m1['real_time'], m2['real_time'])
1189        unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time'])
1190        # m1['time'] and m1['cpu'] hold values which are being calculated,
1191        # and therefore we must use almost-equal pattern.
1192        unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4)
1193        unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4)
1194
1195
1196if __name__ == '__main__':
1197    unittest.main()
1198
1199# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
1200# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
1201# kate: indent-mode python; remove-trailing-spaces modified;
1202