11cb0ef41Sopenharmony_ci#!/usr/bin/env Rscript
21cb0ef41Sopenharmony_cilibrary(ggplot2);
31cb0ef41Sopenharmony_cilibrary(plyr);
41cb0ef41Sopenharmony_ci
51cb0ef41Sopenharmony_ci# get __dirname and load ./_cli.R
61cb0ef41Sopenharmony_ciargs = commandArgs(trailingOnly = F);
71cb0ef41Sopenharmony_cidirname = dirname(sub("--file=", "", args[grep("--file", args)]));
81cb0ef41Sopenharmony_cisource(paste0(dirname, '/_cli.R'), chdir=T);
91cb0ef41Sopenharmony_ci
101cb0ef41Sopenharmony_ciif (is.null(args.options$xaxis) || is.null(args.options$category) ||
111cb0ef41Sopenharmony_ci   (!is.null(args.options$plot) && args.options$plot == TRUE)) {
121cb0ef41Sopenharmony_ci  stop("usage: cat file.csv | Rscript scatter.R [variable=value ...]
131cb0ef41Sopenharmony_ci  --xaxis    variable   variable name to use as xaxis (required)
141cb0ef41Sopenharmony_ci  --category variable   variable name to use as colored category (required)
151cb0ef41Sopenharmony_ci  --plot     filename   save plot to filename
161cb0ef41Sopenharmony_ci  --log                 use a log-2 scale for xaxis in the plot");
171cb0ef41Sopenharmony_ci}
181cb0ef41Sopenharmony_ci
191cb0ef41Sopenharmony_ciplot.filename = args.options$plot;
201cb0ef41Sopenharmony_ci
211cb0ef41Sopenharmony_ci# parse options
221cb0ef41Sopenharmony_cix.axis.name = args.options$xaxis;
231cb0ef41Sopenharmony_cicategory.name = args.options$category;
241cb0ef41Sopenharmony_ciuse.log2 = !is.null(args.options$log);
251cb0ef41Sopenharmony_ci
261cb0ef41Sopenharmony_ci# parse data
271cb0ef41Sopenharmony_cidat = read.csv(file('stdin'), strip.white=TRUE);
281cb0ef41Sopenharmony_cidat = data.frame(dat);
291cb0ef41Sopenharmony_ci
301cb0ef41Sopenharmony_ci# List of aggregated variables
311cb0ef41Sopenharmony_ciaggregate = names(dat);
321cb0ef41Sopenharmony_ciaggregate = aggregate[
331cb0ef41Sopenharmony_ci  ! aggregate %in% c('rate', 'time', 'filename', x.axis.name, category.name)
341cb0ef41Sopenharmony_ci];
351cb0ef41Sopenharmony_ci# Variables that don't change aren't aggregated
361cb0ef41Sopenharmony_cifor (aggregate.key in aggregate) {
371cb0ef41Sopenharmony_ci  if (length(unique(dat[[aggregate.key]])) == 1) {
381cb0ef41Sopenharmony_ci    aggregate = aggregate[aggregate != aggregate.key];
391cb0ef41Sopenharmony_ci  }
401cb0ef41Sopenharmony_ci}
411cb0ef41Sopenharmony_ci
421cb0ef41Sopenharmony_ci# Print out aggregated variables
431cb0ef41Sopenharmony_cifor (aggregate.variable in aggregate) {
441cb0ef41Sopenharmony_ci  cat(sprintf('aggregating variable: %s\n', aggregate.variable));
451cb0ef41Sopenharmony_ci}
461cb0ef41Sopenharmony_ciif (length(aggregate) > 0) {
471cb0ef41Sopenharmony_ci  cat('\n');
481cb0ef41Sopenharmony_ci}
491cb0ef41Sopenharmony_ci
501cb0ef41Sopenharmony_ci# Calculate statistics
511cb0ef41Sopenharmony_cistats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
521cb0ef41Sopenharmony_ci  rate = subdat$rate;
531cb0ef41Sopenharmony_ci
541cb0ef41Sopenharmony_ci  # calculate confidence interval of the mean
551cb0ef41Sopenharmony_ci  ci = NA;
561cb0ef41Sopenharmony_ci  if (length(rate) > 1) {
571cb0ef41Sopenharmony_ci    se = sqrt(var(rate)/length(rate));
581cb0ef41Sopenharmony_ci    ci = se * qt(0.975, length(rate) - 1)
591cb0ef41Sopenharmony_ci  }
601cb0ef41Sopenharmony_ci
611cb0ef41Sopenharmony_ci  # calculate mean and 95 % confidence interval
621cb0ef41Sopenharmony_ci  r = list(
631cb0ef41Sopenharmony_ci    rate = mean(rate),
641cb0ef41Sopenharmony_ci    confidence.interval = ci
651cb0ef41Sopenharmony_ci  );
661cb0ef41Sopenharmony_ci
671cb0ef41Sopenharmony_ci  return(data.frame(r));
681cb0ef41Sopenharmony_ci});
691cb0ef41Sopenharmony_ci
701cb0ef41Sopenharmony_ciprint(stats, row.names=F);
711cb0ef41Sopenharmony_ci
721cb0ef41Sopenharmony_ciif (!is.null(plot.filename)) {
731cb0ef41Sopenharmony_ci  p = ggplot(stats, aes_string(x=x.axis.name, y='rate', colour=category.name));
741cb0ef41Sopenharmony_ci  if (use.log2) {
751cb0ef41Sopenharmony_ci    p = p + scale_x_continuous(trans='log2');
761cb0ef41Sopenharmony_ci  }
771cb0ef41Sopenharmony_ci  p = p + geom_errorbar(
781cb0ef41Sopenharmony_ci    aes(ymin=rate-confidence.interval, ymax=rate+confidence.interval),
791cb0ef41Sopenharmony_ci    width=.1, na.rm=TRUE
801cb0ef41Sopenharmony_ci  );
811cb0ef41Sopenharmony_ci  p = p + geom_point();
821cb0ef41Sopenharmony_ci  p = p + geom_line();
831cb0ef41Sopenharmony_ci  p = p + ylab("rate of operations (higher is better)");
841cb0ef41Sopenharmony_ci  p = p + ggtitle(dat[1, 1]);
851cb0ef41Sopenharmony_ci  ggsave(plot.filename, p);
861cb0ef41Sopenharmony_ci}
87