deps/v8/test/benchmarks/csuite/compare-baseline.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264

#!/usr/bin/python
# Copyright 2018 the V8 project authors. All rights reserved.

'''
python %prog [options] [baseline_files]

Compare benchmark results from the benchmark runner against one or
more baselines. You can either pipe the result of the benchmark
runner directly into this script or specify the results file with
the -f option.
'''

import csv
import math
from optparse import OptionParser
import os
import sys

PERCENT_CONSIDERED_SIGNIFICANT = 0.5
PROBABILITY_CONSIDERED_SIGNIFICANT = 0.02
PROBABILITY_CONSIDERED_MEANINGLESS = 0.05

RESET_SEQ = "\033[0m"
RED_SEQ = "\033[31m"
GREEN_SEQ = "\033[32m"
BLUE_SEQ = "\033[34m"
BOLD_SEQ = "\033[1m"

v8_benchmarks = ["V8", "Octane", "Richards", "DeltaBlue", "Crypto",
                 "EarleyBoyer", "RayTrace", "RegExp", "Splay", "SplayLatency",
                 "NavierStokes", "PdfJS", "Mandreel", "MandreelLatency",
                 "Gameboy", "CodeLoad", "Box2D", "zlib", "Typescript"]

suite_names = ["V8", "Octane", "Kraken-Orig", "Kraken-Once", "Kraken",
               "SunSpider", "SunSpider-Once", "SunSpider-Orig"]

def ColorText(opts, text):
  if opts.no_color:
    result = text.replace("$RESET", "")
    result = result.replace("$BLUE", "")
    result = result.replace("$RED", "")
    result = result.replace("$GREEN", "")
    result = result.replace("$BOLD", "")
  else:
    if opts.html:
      result = text.replace("$RESET", "</font></b>")
      result = result.replace("$BLUE", "<font COLOR=\"0000DD\">")
      result = result.replace("$RED", "<font COLOR=\"DD0000\">")
      result = result.replace("$GREEN", "<font COLOR=\"00DD00\">")
      result = result.replace("$BOLD", "<b>")
    else:
      result = text.replace("$RESET", RESET_SEQ)
      result = result.replace("$BLUE", BLUE_SEQ)
      result = result.replace("$RED", RED_SEQ)
      result = result.replace("$GREEN", GREEN_SEQ)
      result = result.replace("$BOLD", BOLD_SEQ)
  return result

def NormalizedSigmaToString(normalized_sigma):
  assert normalized_sigma >= 0
  if normalized_sigma < PROBABILITY_CONSIDERED_SIGNIFICANT:
    return "|"
  return "S"

def ComputeZ(baseline_avg, baseline_sigma, mean, n):
  if baseline_sigma == 0:
    return 1000.0;
  return abs((mean - baseline_avg) / (baseline_sigma / math.sqrt(n)))

# Values from http://www.fourmilab.ch/rpkp/experiments/analysis/zCalc.html
def ComputeProbability(z):
  if z > 2.575829: # p 0.005: two sided < 0.01
    return 0
  if z > 2.326348: # p 0.010
    return 0.01
  if z > 2.170091: # p 0.015
    return 0.02
  if z > 2.053749: # p 0.020
    return 0.03
  if z > 1.959964: # p 0.025: two sided < 0.05
    return 0.04
  if z > 1.880793: # p 0.030
    return 0.05
  if z > 1.811910: # p 0.035
    return 0.06
  if z > 1.750686: # p 0.040
    return 0.07
  if z > 1.695397: # p 0.045
    return 0.08
  if z > 1.644853: # p 0.050: two sided < 0.10
    return 0.09
  if z > 1.281551: # p 0.100: two sided < 0.20
    return 0.10
  return 0.20 # two sided p >= 0.20

def PercentColor(change_percent, flakyness):
  result = ""
  if change_percent >= PERCENT_CONSIDERED_SIGNIFICANT:
    result = "$GREEN"
  elif change_percent <= -PERCENT_CONSIDERED_SIGNIFICANT:
    result = "$RED"
  else:
    return ""
  if flakyness < PROBABILITY_CONSIDERED_SIGNIFICANT:
    result += "$BOLD"
  elif flakyness > PROBABILITY_CONSIDERED_MEANINGLESS:
    result = ""
  return result

def ProcessOneResultLine(opts, suite, testname, time, sigma, num, baselines):
  time = float(time)
  sigma = float(sigma)
  num = int(num)
  if testname in suite_names:
    base_color = "$BOLD"
  else:
    base_color = ""
  if opts.html:
    line_out = ("<tr><td>%s%s$RESET</td><td>%s%8.1f$RESET</td>" %
                (base_color, testname, base_color, time))
  else:
    sigma_string = NormalizedSigmaToString(sigma / time)
    line_out = ("%s%40s$RESET: %s%8.1f$RESET %s" %
                (base_color, testname, base_color, time, sigma_string))
  for baseline in baselines:
    raw_score = ""
    compare_score = ""
    found = False
    if suite in baseline[1]:
      baseline_results = baseline[1][suite]
      for item in baseline_results:
        if testname == item[0]:
          found = True
          raw_score_num = float(item[1])
          raw_sigma_num = float(item[2])
          raw_score = "%7.1f" % raw_score_num
          compare_num = 0
          compare_score = ""
          percent_color = ""
          if testname in v8_benchmarks:
            compare_num = 100*time/raw_score_num - 100
          else:
            compare_num = 100*raw_score_num/time - 100
          if abs(compare_num) > 0.1:
            compare_score = "%3.1f" % (compare_num)
            z = ComputeZ(raw_score_num, raw_sigma_num, time, num)
            p = ComputeProbability(z)
            percent_color = PercentColor(compare_num, p)
          sigma_string = NormalizedSigmaToString(raw_sigma_num / raw_score_num)
          if opts.html:
            format_string = "<td>%s%8s$RESET</td><td>%s%6s$RESET</td>"
          else:
            format_string = " %s%8s$RESET %s %s%6s$RESET |"
          line_out += (format_string %
              (base_color, raw_score, sigma_string,
               percent_color, compare_score))
    if not found:
      if opts.html:
        line_out += "<td></td><td></td>"
      else:
        line_out += "|          |        "
  if opts.html:
    line_out += "</tr>"
  print(ColorText(opts, line_out))

def PrintSeparator(opts, baselines, big):
  if not opts.html:
    if big:
      separator = "==================================================="
    else:
      separator = "---------------------------------------------------"
    for baseline in baselines:
      if big:
        separator += "+==========+========"
      else:
        separator += "+----------+--------"
    separator += "+"
    print(separator)

def ProcessResults(opts, results, baselines):
  for suite in suite_names:
    if suite in results:
      for result in results[suite]:
        ProcessOneResultLine(opts, suite, result[0], result[1], result[2],
                             result[3], baselines);
      PrintSeparator(opts, baselines, False)

def ProcessFile(file_path):
  file_reader = csv.reader(open(file_path, 'rb'), delimiter=',')
  benchmark_results = {}
  current_rows = []
  for row in file_reader:
    if len(row) > 1:
      current_rows.append(row)
      for suite in suite_names:
        if row[0] == suite:
          benchmark_results[row[0]] = current_rows
          current_rows = []
  return benchmark_results

def ProcessStdIn():
  benchmark_results = {}
  current_rows = []
  for line_in in sys.stdin:
    line_in = line_in.rstrip()
    row = line_in.split(",")
    if len(row) > 1:
      current_rows.append(row)
      for suite in suite_names:
        if row[0] == suite:
          benchmark_results[row[0]] = current_rows
          current_rows = []
  return benchmark_results

def CompareFiles(opts, args):
  results = []
  baselines = []
  for file_path in args:
    baseline = ProcessFile(file_path)
    baselines.append((os.path.basename(file_path), baseline))
  if opts.html:
    header = "<tr><th>benchmark</th><th>score</th>"
  else:
    header = "%40s: %8s " % ("benchmark", "score")
  for baseline in baselines:
    (baseline_name, baseline_results) = baseline
    if opts.html:
      header += ("<th>%s</th><th>%s</th>") % (baseline_name[0:7], "%")
    else:
      header += "| %8s | %6s " % (baseline_name[0:7], "%")
  if opts.html:
    header += "</tr>\n"
  else:
    header += "|"
  print(header)
  PrintSeparator(opts, baselines, True)
  if opts.filename:
    file_reader = csv.reader(open(opts.filename, 'rb'), delimiter=',')
    results = ProcessFile(opts.filename)
  else:
    results = ProcessStdIn()
  ProcessResults(opts, results, baselines)

if __name__ == '__main__':
  parser = OptionParser(usage=__doc__)
  parser.add_option("-f", "--filename", dest="filename",
                    help="Specifies the filename for the results to "\
"compare to the baselines rather than reading from stdin.")
  parser.add_option("-b", "--baselines", dest="baselines",
                    help="Specifies a directory of baseline files to "\
"compare against.")
  parser.add_option("-n", "--no-color", action="store_true",
                    dest="no_color", default=False,
                    help="Generates output without escape codes that "\
"add color highlights.")
  parser.add_option("--html", action="store_true",
                    dest="html", default=False,
                    help="Generates output as a HTML table ")
  (opts, args) = parser.parse_args()
  if opts.baselines:
    args.extend(map(lambda x: (opts.baselines + "/" + x),
                    (os.listdir(opts.baselines))))
  args = reversed(sorted(args))
  CompareFiles(opts, args)