11cb0ef41Sopenharmony_ci#!/usr/bin/env Rscript 21cb0ef41Sopenharmony_cilibrary(ggplot2); 31cb0ef41Sopenharmony_cilibrary(plyr); 41cb0ef41Sopenharmony_ci 51cb0ef41Sopenharmony_ci# get __dirname and load ./_cli.R 61cb0ef41Sopenharmony_ciargs = commandArgs(trailingOnly = F); 71cb0ef41Sopenharmony_cidirname = dirname(sub("--file=", "", args[grep("--file", args)])); 81cb0ef41Sopenharmony_cisource(paste0(dirname, '/_cli.R'), chdir=T); 91cb0ef41Sopenharmony_ci 101cb0ef41Sopenharmony_ciif (!is.null(args.options$help) || 111cb0ef41Sopenharmony_ci (!is.null(args.options$plot) && args.options$plot == TRUE)) { 121cb0ef41Sopenharmony_ci stop("usage: cat file.csv | Rscript compare.R 131cb0ef41Sopenharmony_ci --help show this message 141cb0ef41Sopenharmony_ci --plot filename save plot to filename"); 151cb0ef41Sopenharmony_ci} 161cb0ef41Sopenharmony_ci 171cb0ef41Sopenharmony_ciplot.filename = args.options$plot; 181cb0ef41Sopenharmony_ci 191cb0ef41Sopenharmony_cidat = read.csv( 201cb0ef41Sopenharmony_ci file('stdin'), 211cb0ef41Sopenharmony_ci colClasses=c('character', 'character', 'character', 'numeric', 'numeric') 221cb0ef41Sopenharmony_ci); 231cb0ef41Sopenharmony_cidat = data.frame(dat); 241cb0ef41Sopenharmony_ci 251cb0ef41Sopenharmony_cidat$nameTwoLines = paste0(dat$filename, '\n', dat$configuration); 261cb0ef41Sopenharmony_cidat$name = paste0(dat$filename, ' ', dat$configuration); 271cb0ef41Sopenharmony_ci 281cb0ef41Sopenharmony_ci# Create a box plot 291cb0ef41Sopenharmony_ciif (!is.null(plot.filename)) { 301cb0ef41Sopenharmony_ci p = ggplot(data=dat); 311cb0ef41Sopenharmony_ci p = p + geom_boxplot(aes(x=nameTwoLines, y=rate, fill=binary)); 321cb0ef41Sopenharmony_ci p = p + ylab("rate of operations (higher is better)"); 331cb0ef41Sopenharmony_ci p = p + xlab("benchmark"); 341cb0ef41Sopenharmony_ci p = p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)); 351cb0ef41Sopenharmony_ci ggsave(plot.filename, p); 361cb0ef41Sopenharmony_ci} 371cb0ef41Sopenharmony_ci 381cb0ef41Sopenharmony_ci# Computes the shared standard error, as used in Welch's t-test. 391cb0ef41Sopenharmony_ciwelch.sd = function (old.rate, new.rate) { 401cb0ef41Sopenharmony_ci old.se.squared = var(old.rate) / length(old.rate) 411cb0ef41Sopenharmony_ci new.se.squared = var(new.rate) / length(new.rate) 421cb0ef41Sopenharmony_ci return(sqrt(old.se.squared + new.se.squared)) 431cb0ef41Sopenharmony_ci} 441cb0ef41Sopenharmony_ci 451cb0ef41Sopenharmony_ci# Calculate the improvement confidence interval. The improvement is calculated 461cb0ef41Sopenharmony_ci# by dividing by old.mu and not new.mu, because old.mu is what the mean 471cb0ef41Sopenharmony_ci# improvement is calculated relative to. 481cb0ef41Sopenharmony_ciconfidence.interval = function (shared.se, old.mu, w, risk) { 491cb0ef41Sopenharmony_ci interval = qt(1 - (risk / 2), w$parameter) * shared.se; 501cb0ef41Sopenharmony_ci return(sprintf("±%.2f%%", (interval / old.mu) * 100)) 511cb0ef41Sopenharmony_ci} 521cb0ef41Sopenharmony_ci 531cb0ef41Sopenharmony_ci# Calculate the statistics table. 541cb0ef41Sopenharmony_cistatistics = ddply(dat, "name", function(subdat) { 551cb0ef41Sopenharmony_ci old.rate = subset(subdat, binary == "old")$rate; 561cb0ef41Sopenharmony_ci new.rate = subset(subdat, binary == "new")$rate; 571cb0ef41Sopenharmony_ci 581cb0ef41Sopenharmony_ci # Calculate improvement for the "new" binary compared with the "old" binary 591cb0ef41Sopenharmony_ci old.mu = mean(old.rate); 601cb0ef41Sopenharmony_ci new.mu = mean(new.rate); 611cb0ef41Sopenharmony_ci improvement = sprintf("%.2f %%", ((new.mu - old.mu) / old.mu * 100)); 621cb0ef41Sopenharmony_ci 631cb0ef41Sopenharmony_ci r = list( 641cb0ef41Sopenharmony_ci confidence = "NA", 651cb0ef41Sopenharmony_ci improvement = improvement, 661cb0ef41Sopenharmony_ci "accuracy (*)" = "NA", 671cb0ef41Sopenharmony_ci "(**)" = "NA", 681cb0ef41Sopenharmony_ci "(***)" = "NA" 691cb0ef41Sopenharmony_ci ); 701cb0ef41Sopenharmony_ci 711cb0ef41Sopenharmony_ci # Check if there is enough data to calculate the p-value. 721cb0ef41Sopenharmony_ci if (length(old.rate) > 1 && length(new.rate) > 1) { 731cb0ef41Sopenharmony_ci # Perform a statistical test to see if there actually is a difference in 741cb0ef41Sopenharmony_ci # performance. 751cb0ef41Sopenharmony_ci w = t.test(rate ~ binary, data=subdat); 761cb0ef41Sopenharmony_ci shared.se = welch.sd(old.rate, new.rate) 771cb0ef41Sopenharmony_ci 781cb0ef41Sopenharmony_ci # Add user-friendly stars to the table. There should be at least one star 791cb0ef41Sopenharmony_ci # before you can say that there is an improvement. 801cb0ef41Sopenharmony_ci confidence = ''; 811cb0ef41Sopenharmony_ci if (w$p.value < 0.001) { 821cb0ef41Sopenharmony_ci confidence = '***'; 831cb0ef41Sopenharmony_ci } else if (w$p.value < 0.01) { 841cb0ef41Sopenharmony_ci confidence = '**'; 851cb0ef41Sopenharmony_ci } else if (w$p.value < 0.05) { 861cb0ef41Sopenharmony_ci confidence = '*'; 871cb0ef41Sopenharmony_ci } 881cb0ef41Sopenharmony_ci 891cb0ef41Sopenharmony_ci r = list( 901cb0ef41Sopenharmony_ci confidence = confidence, 911cb0ef41Sopenharmony_ci improvement = improvement, 921cb0ef41Sopenharmony_ci "accuracy (*)" = confidence.interval(shared.se, old.mu, w, 0.05), 931cb0ef41Sopenharmony_ci "(**)" = confidence.interval(shared.se, old.mu, w, 0.01), 941cb0ef41Sopenharmony_ci "(***)" = confidence.interval(shared.se, old.mu, w, 0.001) 951cb0ef41Sopenharmony_ci ); 961cb0ef41Sopenharmony_ci } 971cb0ef41Sopenharmony_ci 981cb0ef41Sopenharmony_ci return(data.frame(r, check.names=FALSE)); 991cb0ef41Sopenharmony_ci}); 1001cb0ef41Sopenharmony_ci 1011cb0ef41Sopenharmony_ci 1021cb0ef41Sopenharmony_ci# Set the benchmark names as the row.names to left align them in the print. 1031cb0ef41Sopenharmony_cirow.names(statistics) = statistics$name; 1041cb0ef41Sopenharmony_cistatistics$name = NULL; 1051cb0ef41Sopenharmony_ci 1061cb0ef41Sopenharmony_cioptions(width = 200); 1071cb0ef41Sopenharmony_ciprint(statistics); 1081cb0ef41Sopenharmony_cicat("\n") 1091cb0ef41Sopenharmony_cicat(sprintf( 1101cb0ef41Sopenharmony_ci"Be aware that when doing many comparisons the risk of a false-positive 1111cb0ef41Sopenharmony_ciresult increases. In this case, there are %d comparisons, you can thus 1121cb0ef41Sopenharmony_ciexpect the following amount of false-positive results: 1131cb0ef41Sopenharmony_ci %.2f false positives, when considering a 5%% risk acceptance (*, **, ***), 1141cb0ef41Sopenharmony_ci %.2f false positives, when considering a 1%% risk acceptance (**, ***), 1151cb0ef41Sopenharmony_ci %.2f false positives, when considering a 0.1%% risk acceptance (***) 1161cb0ef41Sopenharmony_ci", 1171cb0ef41Sopenharmony_cinrow(statistics), 1181cb0ef41Sopenharmony_cinrow(statistics) * 0.05, 1191cb0ef41Sopenharmony_cinrow(statistics) * 0.01, 1201cb0ef41Sopenharmony_cinrow(statistics) * 0.001)) 121