summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--benchmark/compare.R41
-rw-r--r--benchmark/scatter.R17
2 files changed, 37 insertions, 21 deletions
diff --git a/benchmark/compare.R b/benchmark/compare.R
index 01beb38046..1200340f32 100644
--- a/benchmark/compare.R
+++ b/benchmark/compare.R
@@ -33,30 +33,39 @@ if (!is.null(plot.filename)) {
# Print a table with results
statistics = ddply(dat, "name", function(subdat) {
- # Perform a statistics test to see of there actually is a difference in
- # performace.
- w = t.test(rate ~ binary, data=subdat);
+ old.rate = subset(subdat, binary == "old")$rate;
+ new.rate = subset(subdat, binary == "new")$rate;
# Calculate improvement for the "new" binary compared with the "old" binary
- new_mu = mean(subset(subdat, binary == "new")$rate);
- old_mu = mean(subset(subdat, binary == "old")$rate);
- improvement = sprintf("%.2f %%", ((new_mu - old_mu) / old_mu * 100));
+ old.mu = mean(old.rate);
+ new.mu = mean(new.rate);
+ improvement = sprintf("%.2f %%", ((new.mu - old.mu) / old.mu * 100));
- # Add user friendly stars to the table. There should be at least one star
- # before you can say that there is an improvement.
- significant = '';
- if (w$p.value < 0.001) {
- significant = '***';
- } else if (w$p.value < 0.01) {
- significant = '**';
- } else if (w$p.value < 0.05) {
- significant = '*';
+ p.value = NA;
+ significant = 'NA';
+ # Check if there is enough data to calulate the calculate the p-value
+ if (length(old.rate) > 1 && length(new.rate) > 1) {
+ # Perform a statistics test to see of there actually is a difference in
+ # performance.
+ w = t.test(rate ~ binary, data=subdat);
+ p.value = w$p.value;
+
+ # Add user friendly stars to the table. There should be at least one star
+ # before you can say that there is an improvement.
+ significant = '';
+ if (p.value < 0.001) {
+ significant = '***';
+ } else if (p.value < 0.01) {
+ significant = '**';
+ } else if (p.value < 0.05) {
+ significant = '*';
+ }
}
r = list(
improvement = improvement,
significant = significant,
- p.value = w$p.value
+ p.value = p.value
);
return(data.frame(r));
});
diff --git a/benchmark/scatter.R b/benchmark/scatter.R
index 7b98611482..10e099e430 100644
--- a/benchmark/scatter.R
+++ b/benchmark/scatter.R
@@ -51,13 +51,17 @@ if (length(aggregate) > 0) {
stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
rate = subdat$rate;
- # calculate standard error of the mean
- se = sqrt(var(rate)/length(rate));
+ # calculate confidence interval of the mean
+ ci = NA;
+ if (length(rate) > 1) {
+ se = sqrt(var(rate)/length(rate));
+ ci = se * qt(0.975, length(rate) - 1)
+ }
# calculate mean and 95 % confidence interval
r = list(
rate = mean(rate),
- confidence.interval = se * qt(0.975, length(rate) - 1)
+ confidence.interval = ci
);
return(data.frame(r));
@@ -66,11 +70,14 @@ stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
print(stats, row.names=F);
if (!is.null(plot.filename)) {
- p = ggplot(stats, aes_string(x=x.axis.name, y='mean', colour=category.name));
+ p = ggplot(stats, aes_string(x=x.axis.name, y='rate', colour=category.name));
if (use.log2) {
p = p + scale_x_continuous(trans='log2');
}
- p = p + geom_errorbar(aes(ymin=mean-confidence.interval, ymax=mean+confidence.interval), width=.1);
+ p = p + geom_errorbar(
+ aes(ymin=rate-confidence.interval, ymax=rate+confidence.interval),
+ width=.1, na.rm=TRUE
+ );
p = p + geom_point();
p = p + ylab("rate of operations (higher is better)");
p = p + ggtitle(dat[1, 1]);