#!/usr/bin/env python # Copyright 2017 the V8 project authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. ''' python %prog Compare perf trybot JSON files and output the results into a pleasing HTML page. Examples: %prog -t "ia32 results" Result,../result.json Master,/path-to/master.json -o results.html %prog -t "x64 results" ../result.json master.json -o results.html ''' from collections import OrderedDict import commands import json import math from argparse import ArgumentParser import os import shutil import sys import tempfile PERCENT_CONSIDERED_SIGNIFICANT = 0.5 PROBABILITY_CONSIDERED_SIGNIFICANT = 0.02 PROBABILITY_CONSIDERED_MEANINGLESS = 0.05 class Statistics: @staticmethod def Mean(values): return float(sum(values)) / len(values) @staticmethod def Variance(values, average): return map(lambda x: (x - average) ** 2, values) @staticmethod def StandardDeviation(values, average): return math.sqrt(Statistics.Mean(Statistics.Variance(values, average))) @staticmethod def ComputeZ(baseline_avg, baseline_sigma, mean, n): if baseline_sigma == 0: return 1000.0; return abs((mean - baseline_avg) / (baseline_sigma / math.sqrt(n))) # Values from http://www.fourmilab.ch/rpkp/experiments/analysis/zCalc.html @staticmethod def ComputeProbability(z): if z > 2.575829: # p 0.005: two sided < 0.01 return 0 if z > 2.326348: # p 0.010 return 0.01 if z > 2.170091: # p 0.015 return 0.02 if z > 2.053749: # p 0.020 return 0.03 if z > 1.959964: # p 0.025: two sided < 0.05 return 0.04 if z > 1.880793: # p 0.030 return 0.05 if z > 1.811910: # p 0.035 return 0.06 if z > 1.750686: # p 0.040 return 0.07 if z > 1.695397: # p 0.045 return 0.08 if z > 1.644853: # p 0.050: two sided < 0.10 return 0.09 if z > 1.281551: # p 0.100: two sided < 0.20 return 0.10 return 0.20 # two sided p >= 0.20 class ResultsDiff: def __init__(self, significant, notable, percentage_string): self.significant_ = significant self.notable_ = notable self.percentage_string_ = percentage_string def percentage_string(self): return self.percentage_string_; def isSignificant(self): return self.significant_ def isNotablyPositive(self): return self.notable_ > 0 def isNotablyNegative(self): return self.notable_ < 0 class BenchmarkResult: def __init__(self, units, count, result, sigma): self.units_ = units self.count_ = float(count) self.result_ = float(result) self.sigma_ = float(sigma) def Compare(self, other): if self.units_ != other.units_: print ("Incompatible units: %s and %s" % (self.units_, other.units_)) sys.exit(1) significant = False notable = 0 percentage_string = "" # compute notability and significance. if self.units_ == "score": compare_num = 100*self.result_/other.result_ - 100 else: compare_num = 100*other.result_/self.result_ - 100 if abs(compare_num) > 0.1: percentage_string = "%3.1f" % (compare_num) z = Statistics.ComputeZ(other.result_, other.sigma_, self.result_, self.count_) p = Statistics.ComputeProbability(z) if p < PROBABILITY_CONSIDERED_SIGNIFICANT: significant = True if compare_num >= PERCENT_CONSIDERED_SIGNIFICANT: notable = 1 elif compare_num <= -PERCENT_CONSIDERED_SIGNIFICANT: notable = -1 return ResultsDiff(significant, notable, percentage_string) def result(self): return self.result_ def sigma(self): return self.sigma_ class Benchmark: def __init__(self, name): self.name_ = name self.runs_ = {} def name(self): return self.name_ def getResult(self, run_name): return self.runs_.get(run_name) def appendResult(self, run_name, trace): values = map(float, trace['results']) count = len(values) mean = Statistics.Mean(values) stddev = float(trace.get('stddev') or Statistics.StandardDeviation(values, mean)) units = trace["units"] # print run_name, units, count, mean, stddev self.runs_[run_name] = BenchmarkResult(units, count, mean, stddev) class BenchmarkSuite: def __init__(self, name): self.name_ = name self.benchmarks_ = {} def SortedTestKeys(self): keys = self.benchmarks_.keys() keys.sort() t = "Total" if t in keys: keys.remove(t) keys.append(t) return keys def name(self): return self.name_ def getBenchmark(self, benchmark_name): benchmark_object = self.benchmarks_.get(benchmark_name) if benchmark_object == None: benchmark_object = Benchmark(benchmark_name) self.benchmarks_[benchmark_name] = benchmark_object return benchmark_object class ResultTableRenderer: def __init__(self, output_file): self.benchmarks_ = [] self.print_output_ = [] self.output_file_ = output_file def Print(self, str_data): self.print_output_.append(str_data) def FlushOutput(self): string_data = "\n".join(self.print_output_) print_output = [] if self.output_file_: # create a file with open(self.output_file_, "w") as text_file: text_file.write(string_data) else: print(string_data) def bold(self, data): return "%s" % data def red(self, data): return "%s" % data def green(self, data): return "%s" % data def PrintHeader(self): data = """ Output """ self.Print(data) def StartSuite(self, suite_name, run_names): self.Print("

") self.Print("%s (top)" % (suite_name, suite_name)) self.Print("

"); self.Print("") self.Print("") self.Print(" ") main_run = None for run_name in run_names: self.Print(" " % run_name) if main_run == None: main_run = run_name else: self.Print(" ") self.Print("") self.Print("") def FinishSuite(self): self.Print("") self.Print("
Test%s%
") def StartBenchmark(self, benchmark_name): self.Print(" ") self.Print(" %s" % benchmark_name) def FinishBenchmark(self): self.Print(" ") def PrintResult(self, run): if run == None: self.PrintEmptyCell() return self.Print(" %3.1f" % run.result()) def PrintComparison(self, run, main_run): if run == None or main_run == None: self.PrintEmptyCell() return diff = run.Compare(main_run) res = diff.percentage_string() if diff.isSignificant(): res = self.bold(res) if diff.isNotablyPositive(): res = self.green(res) elif diff.isNotablyNegative(): res = self.red(res) self.Print(" %s" % res) def PrintEmptyCell(self): self.Print(" ") def StartTOC(self, title): self.Print("

%s

" % title) self.Print("") def PrintBenchmarkLink(self, benchmark): self.Print("
  • " + benchmark + "
  • ") def PrintFooter(self): data = """ """ self.Print(data) def Render(args): benchmark_suites = {} run_names = OrderedDict() for json_file_list in args.json_file_list: run_name = json_file_list[0] if run_name.endswith(".json"): # The first item in the list is also a file name run_name = os.path.splitext(run_name)[0] filenames = json_file_list else: filenames = json_file_list[1:] for filename in filenames: print ("Processing result set \"%s\", file: %s" % (run_name, filename)) with open(filename) as json_data: data = json.load(json_data) run_names[run_name] = 0 for error in data["errors"]: print "Error:", error for trace in data["traces"]: suite_name = trace["graphs"][0] benchmark_name = "/".join(trace["graphs"][1:]) benchmark_suite_object = benchmark_suites.get(suite_name) if benchmark_suite_object == None: benchmark_suite_object = BenchmarkSuite(suite_name) benchmark_suites[suite_name] = benchmark_suite_object benchmark_object = benchmark_suite_object.getBenchmark(benchmark_name) benchmark_object.appendResult(run_name, trace); renderer = ResultTableRenderer(args.output) renderer.PrintHeader() title = args.title or "Benchmark results" renderer.StartTOC(title) for suite_name, benchmark_suite_object in sorted(benchmark_suites.iteritems()): renderer.PrintBenchmarkLink(suite_name) renderer.FinishTOC() for suite_name, benchmark_suite_object in sorted(benchmark_suites.iteritems()): renderer.StartSuite(suite_name, run_names) for benchmark_name in benchmark_suite_object.SortedTestKeys(): benchmark_object = benchmark_suite_object.getBenchmark(benchmark_name) # print suite_name, benchmark_object.name() renderer.StartBenchmark(benchmark_name) main_run = None main_result = None for run_name in run_names: result = benchmark_object.getResult(run_name) renderer.PrintResult(result) if main_run == None: main_run = run_name main_result = result else: renderer.PrintComparison(result, main_result) renderer.FinishBenchmark() renderer.FinishSuite() renderer.PrintFooter() renderer.FlushOutput() def CommaSeparatedList(arg): return [x for x in arg.split(',')] if __name__ == '__main__': parser = ArgumentParser(description="Compare perf trybot JSON files and " + "output the results into a pleasing HTML page.") parser.add_argument("-t", "--title", dest="title", help="Optional title of the web page") parser.add_argument("-o", "--output", dest="output", help="Write html output to this file rather than stdout") parser.add_argument("json_file_list", nargs="+", type=CommaSeparatedList, help="[column name,]./path-to/result.json - a comma-separated" + " list of optional column name and paths to json files") args = parser.parse_args() Render(args)