11cb0ef41Sopenharmony_ci#!/usr/bin/env Rscript
21cb0ef41Sopenharmony_cilibrary(ggplot2);
31cb0ef41Sopenharmony_cilibrary(plyr);
41cb0ef41Sopenharmony_ci
51cb0ef41Sopenharmony_ci# get __dirname and load ./_cli.R
61cb0ef41Sopenharmony_ciargs = commandArgs(trailingOnly = F);
71cb0ef41Sopenharmony_cidirname = dirname(sub("--file=", "", args[grep("--file", args)]));
81cb0ef41Sopenharmony_cisource(paste0(dirname, '/_cli.R'), chdir=T);
91cb0ef41Sopenharmony_ci
101cb0ef41Sopenharmony_ciif (!is.null(args.options$help) ||
111cb0ef41Sopenharmony_ci   (!is.null(args.options$plot) && args.options$plot == TRUE)) {
121cb0ef41Sopenharmony_ci  stop("usage: cat file.csv | Rscript compare.R
131cb0ef41Sopenharmony_ci  --help           show this message
141cb0ef41Sopenharmony_ci  --plot filename  save plot to filename");
151cb0ef41Sopenharmony_ci}
161cb0ef41Sopenharmony_ci
171cb0ef41Sopenharmony_ciplot.filename = args.options$plot;
181cb0ef41Sopenharmony_ci
191cb0ef41Sopenharmony_cidat = read.csv(
201cb0ef41Sopenharmony_ci  file('stdin'),
211cb0ef41Sopenharmony_ci  colClasses=c('character', 'character', 'character', 'numeric', 'numeric')
221cb0ef41Sopenharmony_ci);
231cb0ef41Sopenharmony_cidat = data.frame(dat);
241cb0ef41Sopenharmony_ci
251cb0ef41Sopenharmony_cidat$nameTwoLines = paste0(dat$filename, '\n', dat$configuration);
261cb0ef41Sopenharmony_cidat$name = paste0(dat$filename, ' ', dat$configuration);
271cb0ef41Sopenharmony_ci
281cb0ef41Sopenharmony_ci# Create a box plot
291cb0ef41Sopenharmony_ciif (!is.null(plot.filename)) {
301cb0ef41Sopenharmony_ci  p = ggplot(data=dat);
311cb0ef41Sopenharmony_ci  p = p + geom_boxplot(aes(x=nameTwoLines, y=rate, fill=binary));
321cb0ef41Sopenharmony_ci  p = p + ylab("rate of operations (higher is better)");
331cb0ef41Sopenharmony_ci  p = p + xlab("benchmark");
341cb0ef41Sopenharmony_ci  p = p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5));
351cb0ef41Sopenharmony_ci  ggsave(plot.filename, p);
361cb0ef41Sopenharmony_ci}
371cb0ef41Sopenharmony_ci
381cb0ef41Sopenharmony_ci# Computes the shared standard error, as used in Welch's t-test.
391cb0ef41Sopenharmony_ciwelch.sd = function (old.rate, new.rate) {
401cb0ef41Sopenharmony_ci  old.se.squared = var(old.rate) / length(old.rate)
411cb0ef41Sopenharmony_ci  new.se.squared = var(new.rate) / length(new.rate)
421cb0ef41Sopenharmony_ci  return(sqrt(old.se.squared + new.se.squared))
431cb0ef41Sopenharmony_ci}
441cb0ef41Sopenharmony_ci
451cb0ef41Sopenharmony_ci# Calculate the improvement confidence interval. The improvement is calculated
461cb0ef41Sopenharmony_ci# by dividing by old.mu and not new.mu, because old.mu is what the mean
471cb0ef41Sopenharmony_ci# improvement is calculated relative to.
481cb0ef41Sopenharmony_ciconfidence.interval = function (shared.se, old.mu, w, risk) {
491cb0ef41Sopenharmony_ci  interval = qt(1 - (risk / 2), w$parameter) * shared.se;
501cb0ef41Sopenharmony_ci  return(sprintf("±%.2f%%", (interval / old.mu) * 100))
511cb0ef41Sopenharmony_ci}
521cb0ef41Sopenharmony_ci
531cb0ef41Sopenharmony_ci# Calculate the statistics table.
541cb0ef41Sopenharmony_cistatistics = ddply(dat, "name", function(subdat) {
551cb0ef41Sopenharmony_ci  old.rate = subset(subdat, binary == "old")$rate;
561cb0ef41Sopenharmony_ci  new.rate = subset(subdat, binary == "new")$rate;
571cb0ef41Sopenharmony_ci
581cb0ef41Sopenharmony_ci  # Calculate improvement for the "new" binary compared with the "old" binary
591cb0ef41Sopenharmony_ci  old.mu = mean(old.rate);
601cb0ef41Sopenharmony_ci  new.mu = mean(new.rate);
611cb0ef41Sopenharmony_ci  improvement = sprintf("%.2f %%", ((new.mu - old.mu) / old.mu * 100));
621cb0ef41Sopenharmony_ci
631cb0ef41Sopenharmony_ci  r = list(
641cb0ef41Sopenharmony_ci    confidence = "NA",
651cb0ef41Sopenharmony_ci    improvement = improvement,
661cb0ef41Sopenharmony_ci    "accuracy (*)" = "NA",
671cb0ef41Sopenharmony_ci    "(**)" = "NA",
681cb0ef41Sopenharmony_ci    "(***)" = "NA"
691cb0ef41Sopenharmony_ci  );
701cb0ef41Sopenharmony_ci
711cb0ef41Sopenharmony_ci  # Check if there is enough data to calculate the p-value.
721cb0ef41Sopenharmony_ci  if (length(old.rate) > 1 && length(new.rate) > 1) {
731cb0ef41Sopenharmony_ci    # Perform a statistical test to see if there actually is a difference in
741cb0ef41Sopenharmony_ci    # performance.
751cb0ef41Sopenharmony_ci    w = t.test(rate ~ binary, data=subdat);
761cb0ef41Sopenharmony_ci    shared.se = welch.sd(old.rate, new.rate)
771cb0ef41Sopenharmony_ci
781cb0ef41Sopenharmony_ci    # Add user-friendly stars to the table. There should be at least one star
791cb0ef41Sopenharmony_ci    # before you can say that there is an improvement.
801cb0ef41Sopenharmony_ci    confidence = '';
811cb0ef41Sopenharmony_ci    if (w$p.value < 0.001) {
821cb0ef41Sopenharmony_ci      confidence = '***';
831cb0ef41Sopenharmony_ci    } else if (w$p.value < 0.01) {
841cb0ef41Sopenharmony_ci      confidence = '**';
851cb0ef41Sopenharmony_ci    } else if (w$p.value < 0.05) {
861cb0ef41Sopenharmony_ci      confidence = '*';
871cb0ef41Sopenharmony_ci    }
881cb0ef41Sopenharmony_ci
891cb0ef41Sopenharmony_ci    r = list(
901cb0ef41Sopenharmony_ci      confidence = confidence,
911cb0ef41Sopenharmony_ci      improvement = improvement,
921cb0ef41Sopenharmony_ci      "accuracy (*)" = confidence.interval(shared.se, old.mu, w, 0.05),
931cb0ef41Sopenharmony_ci      "(**)" = confidence.interval(shared.se, old.mu, w, 0.01),
941cb0ef41Sopenharmony_ci      "(***)" = confidence.interval(shared.se, old.mu, w, 0.001)
951cb0ef41Sopenharmony_ci    );
961cb0ef41Sopenharmony_ci  }
971cb0ef41Sopenharmony_ci
981cb0ef41Sopenharmony_ci  return(data.frame(r, check.names=FALSE));
991cb0ef41Sopenharmony_ci});
1001cb0ef41Sopenharmony_ci
1011cb0ef41Sopenharmony_ci
1021cb0ef41Sopenharmony_ci# Set the benchmark names as the row.names to left align them in the print.
1031cb0ef41Sopenharmony_cirow.names(statistics) = statistics$name;
1041cb0ef41Sopenharmony_cistatistics$name = NULL;
1051cb0ef41Sopenharmony_ci
1061cb0ef41Sopenharmony_cioptions(width = 200);
1071cb0ef41Sopenharmony_ciprint(statistics);
1081cb0ef41Sopenharmony_cicat("\n")
1091cb0ef41Sopenharmony_cicat(sprintf(
1101cb0ef41Sopenharmony_ci"Be aware that when doing many comparisons the risk of a false-positive
1111cb0ef41Sopenharmony_ciresult increases. In this case, there are %d comparisons, you can thus
1121cb0ef41Sopenharmony_ciexpect the following amount of false-positive results:
1131cb0ef41Sopenharmony_ci  %.2f false positives, when considering a   5%% risk acceptance (*, **, ***),
1141cb0ef41Sopenharmony_ci  %.2f false positives, when considering a   1%% risk acceptance (**, ***),
1151cb0ef41Sopenharmony_ci  %.2f false positives, when considering a 0.1%% risk acceptance (***)
1161cb0ef41Sopenharmony_ci",
1171cb0ef41Sopenharmony_cinrow(statistics),
1181cb0ef41Sopenharmony_cinrow(statistics) * 0.05,
1191cb0ef41Sopenharmony_cinrow(statistics) * 0.01,
1201cb0ef41Sopenharmony_cinrow(statistics) * 0.001))
121