1"""report.py - Utilities for reporting statistics about benchmark results 2""" 3 4import unittest 5import os 6import re 7import copy 8import random 9 10from scipy.stats import mannwhitneyu, gmean 11from numpy import array 12 13 14class BenchmarkColor(object): 15 def __init__(self, name, code): 16 self.name = name 17 self.code = code 18 19 def __repr__(self): 20 return '%s%r' % (self.__class__.__name__, 21 (self.name, self.code)) 22 23 def __format__(self, format): 24 return self.code 25 26 27# Benchmark Colors Enumeration 28BC_NONE = BenchmarkColor('NONE', '') 29BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m') 30BC_CYAN = BenchmarkColor('CYAN', '\033[96m') 31BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m') 32BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m') 33BC_HEADER = BenchmarkColor('HEADER', '\033[92m') 34BC_WARNING = BenchmarkColor('WARNING', '\033[93m') 35BC_WHITE = BenchmarkColor('WHITE', '\033[97m') 36BC_FAIL = BenchmarkColor('FAIL', '\033[91m') 37BC_ENDC = BenchmarkColor('ENDC', '\033[0m') 38BC_BOLD = BenchmarkColor('BOLD', '\033[1m') 39BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m') 40 41UTEST_MIN_REPETITIONS = 2 42UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better. 43UTEST_COL_NAME = "_pvalue" 44 45_TIME_UNIT_TO_SECONDS_MULTIPLIER = { 46 "s": 1.0, 47 "ms": 1e-3, 48 "us": 1e-6, 49 "ns": 1e-9, 50} 51 52 53def color_format(use_color, fmt_str, *args, **kwargs): 54 """ 55 Return the result of 'fmt_str.format(*args, **kwargs)' after transforming 56 'args' and 'kwargs' according to the value of 'use_color'. If 'use_color' 57 is False then all color codes in 'args' and 'kwargs' are replaced with 58 the empty string. 59 """ 60 assert use_color is True or use_color is False 61 if not use_color: 62 args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE 63 for arg in args] 64 kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE 65 for key, arg in kwargs.items()} 66 return fmt_str.format(*args, **kwargs) 67 68 69def find_longest_name(benchmark_list): 70 """ 71 Return the length of the longest benchmark name in a given list of 72 benchmark JSON objects 73 """ 74 longest_name = 1 75 for bc in benchmark_list: 76 if len(bc['name']) > longest_name: 77 longest_name = len(bc['name']) 78 return longest_name 79 80 81def calculate_change(old_val, new_val): 82 """ 83 Return a float representing the decimal change between old_val and new_val. 84 """ 85 if old_val == 0 and new_val == 0: 86 return 0.0 87 if old_val == 0: 88 return float(new_val - old_val) / (float(old_val + new_val) / 2) 89 return float(new_val - old_val) / abs(old_val) 90 91 92def filter_benchmark(json_orig, family, replacement=""): 93 """ 94 Apply a filter to the json, and only leave the 'family' of benchmarks. 95 """ 96 regex = re.compile(family) 97 filtered = {} 98 filtered['benchmarks'] = [] 99 for be in json_orig['benchmarks']: 100 if not regex.search(be['name']): 101 continue 102 filteredbench = copy.deepcopy(be) # Do NOT modify the old name! 103 filteredbench['name'] = regex.sub(replacement, filteredbench['name']) 104 filtered['benchmarks'].append(filteredbench) 105 return filtered 106 107 108def get_unique_benchmark_names(json): 109 """ 110 While *keeping* the order, give all the unique 'names' used for benchmarks. 111 """ 112 seen = set() 113 uniqued = [x['name'] for x in json['benchmarks'] 114 if x['name'] not in seen and 115 (seen.add(x['name']) or True)] 116 return uniqued 117 118 119def intersect(list1, list2): 120 """ 121 Given two lists, get a new list consisting of the elements only contained 122 in *both of the input lists*, while preserving the ordering. 123 """ 124 return [x for x in list1 if x in list2] 125 126 127def is_potentially_comparable_benchmark(x): 128 return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x) 129 130 131def partition_benchmarks(json1, json2): 132 """ 133 While preserving the ordering, find benchmarks with the same names in 134 both of the inputs, and group them. 135 (i.e. partition/filter into groups with common name) 136 """ 137 json1_unique_names = get_unique_benchmark_names(json1) 138 json2_unique_names = get_unique_benchmark_names(json2) 139 names = intersect(json1_unique_names, json2_unique_names) 140 partitions = [] 141 for name in names: 142 time_unit = None 143 # Pick the time unit from the first entry of the lhs benchmark. 144 # We should be careful not to crash with unexpected input. 145 for x in json1['benchmarks']: 146 if (x['name'] == name and is_potentially_comparable_benchmark(x)): 147 time_unit = x['time_unit'] 148 break 149 if time_unit is None: 150 continue 151 # Filter by name and time unit. 152 # All the repetitions are assumed to be comparable. 153 lhs = [x for x in json1['benchmarks'] if x['name'] == name and 154 x['time_unit'] == time_unit] 155 rhs = [x for x in json2['benchmarks'] if x['name'] == name and 156 x['time_unit'] == time_unit] 157 partitions.append([lhs, rhs]) 158 return partitions 159 160 161def get_timedelta_field_as_seconds(benchmark, field_name): 162 """ 163 Get value of field_name field of benchmark, which is time with time unit 164 time_unit, as time in seconds. 165 """ 166 timedelta = benchmark[field_name] 167 time_unit = benchmark.get('time_unit', 's') 168 return timedelta * _TIME_UNIT_TO_SECONDS_MULTIPLIER.get(time_unit) 169 170 171def calculate_geomean(json): 172 """ 173 Extract all real/cpu times from all the benchmarks as seconds, 174 and calculate their geomean. 175 """ 176 times = [] 177 for benchmark in json['benchmarks']: 178 if 'run_type' in benchmark and benchmark['run_type'] == 'aggregate': 179 continue 180 times.append([get_timedelta_field_as_seconds(benchmark, 'real_time'), 181 get_timedelta_field_as_seconds(benchmark, 'cpu_time')]) 182 return gmean(times) if times else array([]) 183 184 185def extract_field(partition, field_name): 186 # The count of elements may be different. We want *all* of them. 187 lhs = [x[field_name] for x in partition[0]] 188 rhs = [x[field_name] for x in partition[1]] 189 return [lhs, rhs] 190 191 192def calc_utest(timings_cpu, timings_time): 193 min_rep_cnt = min(len(timings_time[0]), 194 len(timings_time[1]), 195 len(timings_cpu[0]), 196 len(timings_cpu[1])) 197 198 # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions? 199 if min_rep_cnt < UTEST_MIN_REPETITIONS: 200 return False, None, None 201 202 time_pvalue = mannwhitneyu( 203 timings_time[0], timings_time[1], alternative='two-sided').pvalue 204 cpu_pvalue = mannwhitneyu( 205 timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue 206 207 return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue 208 209 210def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True): 211 def get_utest_color(pval): 212 return BC_FAIL if pval >= utest_alpha else BC_OKGREEN 213 214 # Check if we failed miserably with minimum required repetitions for utest 215 if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None: 216 return [] 217 218 dsc = "U Test, Repetitions: {} vs {}".format( 219 utest['nr_of_repetitions'], utest['nr_of_repetitions_other']) 220 dsc_color = BC_OKGREEN 221 222 # We still got some results to show but issue a warning about it. 223 if not utest['have_optimal_repetitions']: 224 dsc_color = BC_WARNING 225 dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format( 226 UTEST_OPTIMAL_REPETITIONS) 227 228 special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}" 229 230 return [color_format(use_color, 231 special_str, 232 BC_HEADER, 233 "{}{}".format(bc_name, UTEST_COL_NAME), 234 first_col_width, 235 get_utest_color( 236 utest['time_pvalue']), utest['time_pvalue'], 237 get_utest_color( 238 utest['cpu_pvalue']), utest['cpu_pvalue'], 239 dsc_color, dsc, 240 endc=BC_ENDC)] 241 242 243def get_difference_report( 244 json1, 245 json2, 246 utest=False): 247 """ 248 Calculate and report the difference between each test of two benchmarks 249 runs specified as 'json1' and 'json2'. Output is another json containing 250 relevant details for each test run. 251 """ 252 assert utest is True or utest is False 253 254 diff_report = [] 255 partitions = partition_benchmarks(json1, json2) 256 for partition in partitions: 257 benchmark_name = partition[0][0]['name'] 258 label = partition[0][0]['label'] if 'label' in partition[0][0] else '' 259 time_unit = partition[0][0]['time_unit'] 260 measurements = [] 261 utest_results = {} 262 # Careful, we may have different repetition count. 263 for i in range(min(len(partition[0]), len(partition[1]))): 264 bn = partition[0][i] 265 other_bench = partition[1][i] 266 measurements.append({ 267 'real_time': bn['real_time'], 268 'cpu_time': bn['cpu_time'], 269 'real_time_other': other_bench['real_time'], 270 'cpu_time_other': other_bench['cpu_time'], 271 'time': calculate_change(bn['real_time'], other_bench['real_time']), 272 'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time']) 273 }) 274 275 # After processing the whole partition, if requested, do the U test. 276 if utest: 277 timings_cpu = extract_field(partition, 'cpu_time') 278 timings_time = extract_field(partition, 'real_time') 279 have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest( 280 timings_cpu, timings_time) 281 if cpu_pvalue and time_pvalue: 282 utest_results = { 283 'have_optimal_repetitions': have_optimal_repetitions, 284 'cpu_pvalue': cpu_pvalue, 285 'time_pvalue': time_pvalue, 286 'nr_of_repetitions': len(timings_cpu[0]), 287 'nr_of_repetitions_other': len(timings_cpu[1]) 288 } 289 290 # Store only if we had any measurements for given benchmark. 291 # E.g. partition_benchmarks will filter out the benchmarks having 292 # time units which are not compatible with other time units in the 293 # benchmark suite. 294 if measurements: 295 run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else '' 296 aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else '' 297 diff_report.append({ 298 'name': benchmark_name, 299 'label': label, 300 'measurements': measurements, 301 'time_unit': time_unit, 302 'run_type': run_type, 303 'aggregate_name': aggregate_name, 304 'utest': utest_results 305 }) 306 307 lhs_gmean = calculate_geomean(json1) 308 rhs_gmean = calculate_geomean(json2) 309 if lhs_gmean.any() and rhs_gmean.any(): 310 diff_report.append({ 311 'name': 'OVERALL_GEOMEAN', 312 'label': '', 313 'measurements': [{ 314 'real_time': lhs_gmean[0], 315 'cpu_time': lhs_gmean[1], 316 'real_time_other': rhs_gmean[0], 317 'cpu_time_other': rhs_gmean[1], 318 'time': calculate_change(lhs_gmean[0], rhs_gmean[0]), 319 'cpu': calculate_change(lhs_gmean[1], rhs_gmean[1]) 320 }], 321 'time_unit': 's', 322 'run_type': 'aggregate', 323 'aggregate_name': 'geomean', 324 'utest': {} 325 }) 326 327 return diff_report 328 329 330def print_difference_report( 331 json_diff_report, 332 include_aggregates_only=False, 333 utest=False, 334 utest_alpha=0.05, 335 use_color=True): 336 """ 337 Calculate and report the difference between each test of two benchmarks 338 runs specified as 'json1' and 'json2'. 339 """ 340 assert utest is True or utest is False 341 342 def get_color(res): 343 if res > 0.05: 344 return BC_FAIL 345 elif res > -0.07: 346 return BC_WHITE 347 else: 348 return BC_CYAN 349 350 first_col_width = find_longest_name(json_diff_report) 351 first_col_width = max( 352 first_col_width, 353 len('Benchmark')) 354 first_col_width += len(UTEST_COL_NAME) 355 first_line = "{:<{}s}Time CPU Time Old Time New CPU Old CPU New".format( 356 'Benchmark', 12 + first_col_width) 357 output_strs = [first_line, '-' * len(first_line)] 358 359 fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}" 360 for benchmark in json_diff_report: 361 # *If* we were asked to only include aggregates, 362 # and if it is non-aggregate, then don't print it. 363 if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate': 364 for measurement in benchmark['measurements']: 365 output_strs += [color_format(use_color, 366 fmt_str, 367 BC_HEADER, 368 benchmark['name'], 369 first_col_width, 370 get_color(measurement['time']), 371 measurement['time'], 372 get_color(measurement['cpu']), 373 measurement['cpu'], 374 measurement['real_time'], 375 measurement['real_time_other'], 376 measurement['cpu_time'], 377 measurement['cpu_time_other'], 378 endc=BC_ENDC)] 379 380 # After processing the measurements, if requested and 381 # if applicable (e.g. u-test exists for given benchmark), 382 # print the U test. 383 if utest and benchmark['utest']: 384 output_strs += print_utest(benchmark['name'], 385 benchmark['utest'], 386 utest_alpha=utest_alpha, 387 first_col_width=first_col_width, 388 use_color=use_color) 389 390 return output_strs 391 392 393############################################################################### 394# Unit tests 395 396 397class TestGetUniqueBenchmarkNames(unittest.TestCase): 398 def load_results(self): 399 import json 400 testInputs = os.path.join( 401 os.path.dirname( 402 os.path.realpath(__file__)), 403 'Inputs') 404 testOutput = os.path.join(testInputs, 'test3_run0.json') 405 with open(testOutput, 'r') as f: 406 json = json.load(f) 407 return json 408 409 def test_basic(self): 410 expect_lines = [ 411 'BM_One', 412 'BM_Two', 413 'short', # These two are not sorted 414 'medium', # These two are not sorted 415 ] 416 json = self.load_results() 417 output_lines = get_unique_benchmark_names(json) 418 print("\n") 419 print("\n".join(output_lines)) 420 self.assertEqual(len(output_lines), len(expect_lines)) 421 for i in range(0, len(output_lines)): 422 self.assertEqual(expect_lines[i], output_lines[i]) 423 424 425class TestReportDifference(unittest.TestCase): 426 @classmethod 427 def setUpClass(cls): 428 def load_results(): 429 import json 430 testInputs = os.path.join( 431 os.path.dirname( 432 os.path.realpath(__file__)), 433 'Inputs') 434 testOutput1 = os.path.join(testInputs, 'test1_run1.json') 435 testOutput2 = os.path.join(testInputs, 'test1_run2.json') 436 with open(testOutput1, 'r') as f: 437 json1 = json.load(f) 438 with open(testOutput2, 'r') as f: 439 json2 = json.load(f) 440 return json1, json2 441 442 json1, json2 = load_results() 443 cls.json_diff_report = get_difference_report(json1, json2) 444 445 def test_json_diff_report_pretty_printing(self): 446 expect_lines = [ 447 ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'], 448 ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'], 449 ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'], 450 ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'], 451 ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'], 452 ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'], 453 ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'], 454 ['BM_100xSlower', '+99.0000', '+99.0000', 455 '100', '10000', '100', '10000'], 456 ['BM_100xFaster', '-0.9900', '-0.9900', 457 '10000', '100', '10000', '100'], 458 ['BM_10PercentCPUToTime', '+0.1000', 459 '-0.1000', '100', '110', '100', '90'], 460 ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'], 461 ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'], 462 ['BM_hasLabel', '+0.0000', '+0.0000', '1', '1', '1', '1'], 463 ['OVERALL_GEOMEAN', '-0.8113', '-0.7779', '0', '0', '0', '0'] 464 ] 465 output_lines_with_header = print_difference_report( 466 self.json_diff_report, use_color=False) 467 output_lines = output_lines_with_header[2:] 468 print("\n") 469 print("\n".join(output_lines_with_header)) 470 self.assertEqual(len(output_lines), len(expect_lines)) 471 for i in range(0, len(output_lines)): 472 parts = [x for x in output_lines[i].split(' ') if x] 473 self.assertEqual(len(parts), 7) 474 self.assertEqual(expect_lines[i], parts) 475 476 def test_json_diff_report_output(self): 477 expected_output = [ 478 { 479 'name': 'BM_SameTimes', 480 'label': '', 481 'measurements': [{'time': 0.0000, 'cpu': 0.0000, 482 'real_time': 10, 'real_time_other': 10, 483 'cpu_time': 10, 'cpu_time_other': 10}], 484 'time_unit': 'ns', 485 'utest': {} 486 }, 487 { 488 'name': 'BM_2xFaster', 489 'label': '', 490 'measurements': [{'time': -0.5000, 'cpu': -0.5000, 491 'real_time': 50, 'real_time_other': 25, 492 'cpu_time': 50, 'cpu_time_other': 25}], 493 'time_unit': 'ns', 494 'utest': {} 495 }, 496 { 497 'name': 'BM_2xSlower', 498 'label': '', 499 'measurements': [{'time': 1.0000, 'cpu': 1.0000, 500 'real_time': 50, 'real_time_other': 100, 501 'cpu_time': 50, 'cpu_time_other': 100}], 502 'time_unit': 'ns', 503 'utest': {} 504 }, 505 { 506 'name': 'BM_1PercentFaster', 507 'label': '', 508 'measurements': [{'time': -0.0100, 'cpu': -0.0100, 509 'real_time': 100, 'real_time_other': 98.9999999, 510 'cpu_time': 100, 'cpu_time_other': 98.9999999}], 511 'time_unit': 'ns', 512 'utest': {} 513 }, 514 { 515 'name': 'BM_1PercentSlower', 516 'label': '', 517 'measurements': [{'time': 0.0100, 'cpu': 0.0100, 518 'real_time': 100, 'real_time_other': 101, 519 'cpu_time': 100, 'cpu_time_other': 101}], 520 'time_unit': 'ns', 521 'utest': {} 522 }, 523 { 524 'name': 'BM_10PercentFaster', 525 'label': '', 526 'measurements': [{'time': -0.1000, 'cpu': -0.1000, 527 'real_time': 100, 'real_time_other': 90, 528 'cpu_time': 100, 'cpu_time_other': 90}], 529 'time_unit': 'ns', 530 'utest': {} 531 }, 532 { 533 'name': 'BM_10PercentSlower', 534 'label': '', 535 'measurements': [{'time': 0.1000, 'cpu': 0.1000, 536 'real_time': 100, 'real_time_other': 110, 537 'cpu_time': 100, 'cpu_time_other': 110}], 538 'time_unit': 'ns', 539 'utest': {} 540 }, 541 { 542 'name': 'BM_100xSlower', 543 'label': '', 544 'measurements': [{'time': 99.0000, 'cpu': 99.0000, 545 'real_time': 100, 'real_time_other': 10000, 546 'cpu_time': 100, 'cpu_time_other': 10000}], 547 'time_unit': 'ns', 548 'utest': {} 549 }, 550 { 551 'name': 'BM_100xFaster', 552 'label': '', 553 'measurements': [{'time': -0.9900, 'cpu': -0.9900, 554 'real_time': 10000, 'real_time_other': 100, 555 'cpu_time': 10000, 'cpu_time_other': 100}], 556 'time_unit': 'ns', 557 'utest': {} 558 }, 559 { 560 'name': 'BM_10PercentCPUToTime', 561 'label': '', 562 'measurements': [{'time': 0.1000, 'cpu': -0.1000, 563 'real_time': 100, 'real_time_other': 110, 564 'cpu_time': 100, 'cpu_time_other': 90}], 565 'time_unit': 'ns', 566 'utest': {} 567 }, 568 { 569 'name': 'BM_ThirdFaster', 570 'label': '', 571 'measurements': [{'time': -0.3333, 'cpu': -0.3334, 572 'real_time': 100, 'real_time_other': 67, 573 'cpu_time': 100, 'cpu_time_other': 67}], 574 'time_unit': 'ns', 575 'utest': {} 576 }, 577 { 578 'name': 'BM_NotBadTimeUnit', 579 'label': '', 580 'measurements': [{'time': -0.9000, 'cpu': 0.2000, 581 'real_time': 0.4, 'real_time_other': 0.04, 582 'cpu_time': 0.5, 'cpu_time_other': 0.6}], 583 'time_unit': 's', 584 'utest': {} 585 }, 586 { 587 'name': 'BM_hasLabel', 588 'label': 'a label', 589 'measurements': [{'time': 0.0000, 'cpu': 0.0000, 590 'real_time': 1, 'real_time_other': 1, 591 'cpu_time': 1, 'cpu_time_other': 1}], 592 'time_unit': 's', 593 'utest': {} 594 }, 595 { 596 'name': 'OVERALL_GEOMEAN', 597 'label': '', 598 'measurements': [{'real_time': 3.1622776601683826e-06, 'cpu_time': 3.2130844755623912e-06, 599 'real_time_other': 1.9768988699420897e-07, 'cpu_time_other': 2.397447755209533e-07, 600 'time': -0.8112976497120911, 'cpu': -0.7778551721181174}], 601 'time_unit': 's', 602 'run_type': 'aggregate', 603 'aggregate_name': 'geomean', 'utest': {} 604 }, 605 ] 606 self.assertEqual(len(self.json_diff_report), len(expected_output)) 607 for out, expected in zip( 608 self.json_diff_report, expected_output): 609 self.assertEqual(out['name'], expected['name']) 610 self.assertEqual(out['label'], expected['label']) 611 self.assertEqual(out['time_unit'], expected['time_unit']) 612 assert_utest(self, out, expected) 613 assert_measurements(self, out, expected) 614 615 616class TestReportDifferenceBetweenFamilies(unittest.TestCase): 617 @classmethod 618 def setUpClass(cls): 619 def load_result(): 620 import json 621 testInputs = os.path.join( 622 os.path.dirname( 623 os.path.realpath(__file__)), 624 'Inputs') 625 testOutput = os.path.join(testInputs, 'test2_run.json') 626 with open(testOutput, 'r') as f: 627 json = json.load(f) 628 return json 629 630 json = load_result() 631 json1 = filter_benchmark(json, "BM_Z.ro", ".") 632 json2 = filter_benchmark(json, "BM_O.e", ".") 633 cls.json_diff_report = get_difference_report(json1, json2) 634 635 def test_json_diff_report_pretty_printing(self): 636 expect_lines = [ 637 ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'], 638 ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'], 639 ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'], 640 ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'], 641 ['OVERALL_GEOMEAN', '-0.5000', '-0.5000', '0', '0', '0', '0'] 642 ] 643 output_lines_with_header = print_difference_report( 644 self.json_diff_report, use_color=False) 645 output_lines = output_lines_with_header[2:] 646 print("\n") 647 print("\n".join(output_lines_with_header)) 648 self.assertEqual(len(output_lines), len(expect_lines)) 649 for i in range(0, len(output_lines)): 650 parts = [x for x in output_lines[i].split(' ') if x] 651 self.assertEqual(len(parts), 7) 652 self.assertEqual(expect_lines[i], parts) 653 654 def test_json_diff_report(self): 655 expected_output = [ 656 { 657 'name': u'.', 658 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}], 659 'time_unit': 'ns', 660 'utest': {} 661 }, 662 { 663 'name': u'./4', 664 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}], 665 'time_unit': 'ns', 666 'utest': {}, 667 }, 668 { 669 'name': u'Prefix/.', 670 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}], 671 'time_unit': 'ns', 672 'utest': {} 673 }, 674 { 675 'name': u'Prefix/./3', 676 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}], 677 'time_unit': 'ns', 678 'utest': {} 679 }, 680 { 681 'name': 'OVERALL_GEOMEAN', 682 'measurements': [{'real_time': 2.213363839400641e-08, 'cpu_time': 2.213363839400641e-08, 683 'real_time_other': 1.1066819197003185e-08, 'cpu_time_other': 1.1066819197003185e-08, 684 'time': -0.5000000000000009, 'cpu': -0.5000000000000009}], 685 'time_unit': 's', 686 'run_type': 'aggregate', 687 'aggregate_name': 'geomean', 688 'utest': {} 689 } 690 ] 691 self.assertEqual(len(self.json_diff_report), len(expected_output)) 692 for out, expected in zip( 693 self.json_diff_report, expected_output): 694 self.assertEqual(out['name'], expected['name']) 695 self.assertEqual(out['time_unit'], expected['time_unit']) 696 assert_utest(self, out, expected) 697 assert_measurements(self, out, expected) 698 699 700class TestReportDifferenceWithUTest(unittest.TestCase): 701 @classmethod 702 def setUpClass(cls): 703 def load_results(): 704 import json 705 testInputs = os.path.join( 706 os.path.dirname( 707 os.path.realpath(__file__)), 708 'Inputs') 709 testOutput1 = os.path.join(testInputs, 'test3_run0.json') 710 testOutput2 = os.path.join(testInputs, 'test3_run1.json') 711 with open(testOutput1, 'r') as f: 712 json1 = json.load(f) 713 with open(testOutput2, 'r') as f: 714 json2 = json.load(f) 715 return json1, json2 716 717 json1, json2 = load_results() 718 cls.json_diff_report = get_difference_report( 719 json1, json2, utest=True) 720 721 def test_json_diff_report_pretty_printing(self): 722 expect_lines = [ 723 ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], 724 ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], 725 ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], 726 ['BM_Two_pvalue', 727 '1.0000', 728 '0.6667', 729 'U', 730 'Test,', 731 'Repetitions:', 732 '2', 733 'vs', 734 '2.', 735 'WARNING:', 736 'Results', 737 'unreliable!', 738 '9+', 739 'repetitions', 740 'recommended.'], 741 ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], 742 ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], 743 ['short_pvalue', 744 '0.7671', 745 '0.2000', 746 'U', 747 'Test,', 748 'Repetitions:', 749 '2', 750 'vs', 751 '3.', 752 'WARNING:', 753 'Results', 754 'unreliable!', 755 '9+', 756 'repetitions', 757 'recommended.'], 758 ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], 759 ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] 760 ] 761 output_lines_with_header = print_difference_report( 762 self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False) 763 output_lines = output_lines_with_header[2:] 764 print("\n") 765 print("\n".join(output_lines_with_header)) 766 self.assertEqual(len(output_lines), len(expect_lines)) 767 for i in range(0, len(output_lines)): 768 parts = [x for x in output_lines[i].split(' ') if x] 769 self.assertEqual(expect_lines[i], parts) 770 771 def test_json_diff_report_pretty_printing_aggregates_only(self): 772 expect_lines = [ 773 ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], 774 ['BM_Two_pvalue', 775 '1.0000', 776 '0.6667', 777 'U', 778 'Test,', 779 'Repetitions:', 780 '2', 781 'vs', 782 '2.', 783 'WARNING:', 784 'Results', 785 'unreliable!', 786 '9+', 787 'repetitions', 788 'recommended.'], 789 ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], 790 ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], 791 ['short_pvalue', 792 '0.7671', 793 '0.2000', 794 'U', 795 'Test,', 796 'Repetitions:', 797 '2', 798 'vs', 799 '3.', 800 'WARNING:', 801 'Results', 802 'unreliable!', 803 '9+', 804 'repetitions', 805 'recommended.'], 806 ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] 807 ] 808 output_lines_with_header = print_difference_report( 809 self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False) 810 output_lines = output_lines_with_header[2:] 811 print("\n") 812 print("\n".join(output_lines_with_header)) 813 self.assertEqual(len(output_lines), len(expect_lines)) 814 for i in range(0, len(output_lines)): 815 parts = [x for x in output_lines[i].split(' ') if x] 816 self.assertEqual(expect_lines[i], parts) 817 818 def test_json_diff_report(self): 819 expected_output = [ 820 { 821 'name': u'BM_One', 822 'measurements': [ 823 {'time': -0.1, 824 'cpu': 0.1, 825 'real_time': 10, 826 'real_time_other': 9, 827 'cpu_time': 100, 828 'cpu_time_other': 110} 829 ], 830 'time_unit': 'ns', 831 'utest': {} 832 }, 833 { 834 'name': u'BM_Two', 835 'measurements': [ 836 {'time': 0.1111111111111111, 837 'cpu': -0.011111111111111112, 838 'real_time': 9, 839 'real_time_other': 10, 840 'cpu_time': 90, 841 'cpu_time_other': 89}, 842 {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, 843 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} 844 ], 845 'time_unit': 'ns', 846 'utest': { 847 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0 848 } 849 }, 850 { 851 'name': u'short', 852 'measurements': [ 853 {'time': -0.125, 854 'cpu': -0.0625, 855 'real_time': 8, 856 'real_time_other': 7, 857 'cpu_time': 80, 858 'cpu_time_other': 75}, 859 {'time': -0.4325, 860 'cpu': -0.13506493506493514, 861 'real_time': 8, 862 'real_time_other': 4.54, 863 'cpu_time': 77, 864 'cpu_time_other': 66.6} 865 ], 866 'time_unit': 'ns', 867 'utest': { 868 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772 869 } 870 }, 871 { 872 'name': u'medium', 873 'measurements': [ 874 {'time': -0.375, 875 'cpu': -0.3375, 876 'real_time': 8, 877 'real_time_other': 5, 878 'cpu_time': 80, 879 'cpu_time_other': 53} 880 ], 881 'time_unit': 'ns', 882 'utest': {} 883 }, 884 { 885 'name': 'OVERALL_GEOMEAN', 886 'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08, 887 'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08, 888 'time': 1.6404861082353634, 'cpu': -0.6984640740519662}], 889 'time_unit': 's', 890 'run_type': 'aggregate', 891 'aggregate_name': 'geomean', 892 'utest': {} 893 } 894 ] 895 self.assertEqual(len(self.json_diff_report), len(expected_output)) 896 for out, expected in zip( 897 self.json_diff_report, expected_output): 898 self.assertEqual(out['name'], expected['name']) 899 self.assertEqual(out['time_unit'], expected['time_unit']) 900 assert_utest(self, out, expected) 901 assert_measurements(self, out, expected) 902 903 904class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( 905 unittest.TestCase): 906 @classmethod 907 def setUpClass(cls): 908 def load_results(): 909 import json 910 testInputs = os.path.join( 911 os.path.dirname( 912 os.path.realpath(__file__)), 913 'Inputs') 914 testOutput1 = os.path.join(testInputs, 'test3_run0.json') 915 testOutput2 = os.path.join(testInputs, 'test3_run1.json') 916 with open(testOutput1, 'r') as f: 917 json1 = json.load(f) 918 with open(testOutput2, 'r') as f: 919 json2 = json.load(f) 920 return json1, json2 921 922 json1, json2 = load_results() 923 cls.json_diff_report = get_difference_report( 924 json1, json2, utest=True) 925 926 def test_json_diff_report_pretty_printing(self): 927 expect_lines = [ 928 ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], 929 ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], 930 ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], 931 ['BM_Two_pvalue', 932 '1.0000', 933 '0.6667', 934 'U', 935 'Test,', 936 'Repetitions:', 937 '2', 938 'vs', 939 '2.', 940 'WARNING:', 941 'Results', 942 'unreliable!', 943 '9+', 944 'repetitions', 945 'recommended.'], 946 ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], 947 ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], 948 ['short_pvalue', 949 '0.7671', 950 '0.2000', 951 'U', 952 'Test,', 953 'Repetitions:', 954 '2', 955 'vs', 956 '3.', 957 'WARNING:', 958 'Results', 959 'unreliable!', 960 '9+', 961 'repetitions', 962 'recommended.'], 963 ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], 964 ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0'] 965 ] 966 output_lines_with_header = print_difference_report( 967 self.json_diff_report, 968 utest=True, utest_alpha=0.05, use_color=False) 969 output_lines = output_lines_with_header[2:] 970 print("\n") 971 print("\n".join(output_lines_with_header)) 972 self.assertEqual(len(output_lines), len(expect_lines)) 973 for i in range(0, len(output_lines)): 974 parts = [x for x in output_lines[i].split(' ') if x] 975 self.assertEqual(expect_lines[i], parts) 976 977 def test_json_diff_report(self): 978 expected_output = [ 979 { 980 'name': u'BM_One', 981 'measurements': [ 982 {'time': -0.1, 983 'cpu': 0.1, 984 'real_time': 10, 985 'real_time_other': 9, 986 'cpu_time': 100, 987 'cpu_time_other': 110} 988 ], 989 'time_unit': 'ns', 990 'utest': {} 991 }, 992 { 993 'name': u'BM_Two', 994 'measurements': [ 995 {'time': 0.1111111111111111, 996 'cpu': -0.011111111111111112, 997 'real_time': 9, 998 'real_time_other': 10, 999 'cpu_time': 90, 1000 'cpu_time_other': 89}, 1001 {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, 1002 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} 1003 ], 1004 'time_unit': 'ns', 1005 'utest': { 1006 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0 1007 } 1008 }, 1009 { 1010 'name': u'short', 1011 'measurements': [ 1012 {'time': -0.125, 1013 'cpu': -0.0625, 1014 'real_time': 8, 1015 'real_time_other': 7, 1016 'cpu_time': 80, 1017 'cpu_time_other': 75}, 1018 {'time': -0.4325, 1019 'cpu': -0.13506493506493514, 1020 'real_time': 8, 1021 'real_time_other': 4.54, 1022 'cpu_time': 77, 1023 'cpu_time_other': 66.6} 1024 ], 1025 'time_unit': 'ns', 1026 'utest': { 1027 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772 1028 } 1029 }, 1030 { 1031 'name': u'medium', 1032 'measurements': [ 1033 {'real_time_other': 5, 1034 'cpu_time': 80, 1035 'time': -0.375, 1036 'real_time': 8, 1037 'cpu_time_other': 53, 1038 'cpu': -0.3375 1039 } 1040 ], 1041 'utest': {}, 1042 'time_unit': u'ns', 1043 'aggregate_name': '' 1044 }, 1045 { 1046 'name': 'OVERALL_GEOMEAN', 1047 'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08, 1048 'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08, 1049 'time': 1.6404861082353634, 'cpu': -0.6984640740519662}], 1050 'time_unit': 's', 1051 'run_type': 'aggregate', 1052 'aggregate_name': 'geomean', 1053 'utest': {} 1054 } 1055 ] 1056 self.assertEqual(len(self.json_diff_report), len(expected_output)) 1057 for out, expected in zip( 1058 self.json_diff_report, expected_output): 1059 self.assertEqual(out['name'], expected['name']) 1060 self.assertEqual(out['time_unit'], expected['time_unit']) 1061 assert_utest(self, out, expected) 1062 assert_measurements(self, out, expected) 1063 1064 1065class TestReportDifferenceForPercentageAggregates( 1066 unittest.TestCase): 1067 @classmethod 1068 def setUpClass(cls): 1069 def load_results(): 1070 import json 1071 testInputs = os.path.join( 1072 os.path.dirname( 1073 os.path.realpath(__file__)), 1074 'Inputs') 1075 testOutput1 = os.path.join(testInputs, 'test4_run0.json') 1076 testOutput2 = os.path.join(testInputs, 'test4_run1.json') 1077 with open(testOutput1, 'r') as f: 1078 json1 = json.load(f) 1079 with open(testOutput2, 'r') as f: 1080 json2 = json.load(f) 1081 return json1, json2 1082 1083 json1, json2 = load_results() 1084 cls.json_diff_report = get_difference_report( 1085 json1, json2, utest=True) 1086 1087 def test_json_diff_report_pretty_printing(self): 1088 expect_lines = [ 1089 ['whocares', '-0.5000', '+0.5000', '0', '0', '0', '0'] 1090 ] 1091 output_lines_with_header = print_difference_report( 1092 self.json_diff_report, 1093 utest=True, utest_alpha=0.05, use_color=False) 1094 output_lines = output_lines_with_header[2:] 1095 print("\n") 1096 print("\n".join(output_lines_with_header)) 1097 self.assertEqual(len(output_lines), len(expect_lines)) 1098 for i in range(0, len(output_lines)): 1099 parts = [x for x in output_lines[i].split(' ') if x] 1100 self.assertEqual(expect_lines[i], parts) 1101 1102 def test_json_diff_report(self): 1103 expected_output = [ 1104 { 1105 'name': u'whocares', 1106 'measurements': [ 1107 {'time': -0.5, 1108 'cpu': 0.5, 1109 'real_time': 0.01, 1110 'real_time_other': 0.005, 1111 'cpu_time': 0.10, 1112 'cpu_time_other': 0.15} 1113 ], 1114 'time_unit': 'ns', 1115 'utest': {} 1116 } 1117 ] 1118 self.assertEqual(len(self.json_diff_report), len(expected_output)) 1119 for out, expected in zip( 1120 self.json_diff_report, expected_output): 1121 self.assertEqual(out['name'], expected['name']) 1122 self.assertEqual(out['time_unit'], expected['time_unit']) 1123 assert_utest(self, out, expected) 1124 assert_measurements(self, out, expected) 1125 1126 1127class TestReportSorting(unittest.TestCase): 1128 @classmethod 1129 def setUpClass(cls): 1130 def load_result(): 1131 import json 1132 testInputs = os.path.join( 1133 os.path.dirname( 1134 os.path.realpath(__file__)), 1135 'Inputs') 1136 testOutput = os.path.join(testInputs, 'test4_run.json') 1137 with open(testOutput, 'r') as f: 1138 json = json.load(f) 1139 return json 1140 1141 cls.json = load_result() 1142 1143 def test_json_diff_report_pretty_printing(self): 1144 import util 1145 1146 expected_names = [ 1147 "99 family 0 instance 0 repetition 0", 1148 "98 family 0 instance 0 repetition 1", 1149 "97 family 0 instance 0 aggregate", 1150 "96 family 0 instance 1 repetition 0", 1151 "95 family 0 instance 1 repetition 1", 1152 "94 family 0 instance 1 aggregate", 1153 "93 family 1 instance 0 repetition 0", 1154 "92 family 1 instance 0 repetition 1", 1155 "91 family 1 instance 0 aggregate", 1156 "90 family 1 instance 1 repetition 0", 1157 "89 family 1 instance 1 repetition 1", 1158 "88 family 1 instance 1 aggregate" 1159 ] 1160 1161 for n in range(len(self.json['benchmarks']) ** 2): 1162 random.shuffle(self.json['benchmarks']) 1163 sorted_benchmarks = util.sort_benchmark_results(self.json)[ 1164 'benchmarks'] 1165 self.assertEqual(len(expected_names), len(sorted_benchmarks)) 1166 for out, expected in zip(sorted_benchmarks, expected_names): 1167 self.assertEqual(out['name'], expected) 1168 1169 1170def assert_utest(unittest_instance, lhs, rhs): 1171 if lhs['utest']: 1172 unittest_instance.assertAlmostEqual( 1173 lhs['utest']['cpu_pvalue'], 1174 rhs['utest']['cpu_pvalue']) 1175 unittest_instance.assertAlmostEqual( 1176 lhs['utest']['time_pvalue'], 1177 rhs['utest']['time_pvalue']) 1178 unittest_instance.assertEqual( 1179 lhs['utest']['have_optimal_repetitions'], 1180 rhs['utest']['have_optimal_repetitions']) 1181 else: 1182 # lhs is empty. assert if rhs is not. 1183 unittest_instance.assertEqual(lhs['utest'], rhs['utest']) 1184 1185 1186def assert_measurements(unittest_instance, lhs, rhs): 1187 for m1, m2 in zip(lhs['measurements'], rhs['measurements']): 1188 unittest_instance.assertEqual(m1['real_time'], m2['real_time']) 1189 unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time']) 1190 # m1['time'] and m1['cpu'] hold values which are being calculated, 1191 # and therefore we must use almost-equal pattern. 1192 unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4) 1193 unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4) 1194 1195 1196if __name__ == '__main__': 1197 unittest.main() 1198 1199# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 1200# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; 1201# kate: indent-mode python; remove-trailing-spaces modified; 1202