1 files changed, 145 insertions, 0 deletions
diff --git a/deps/v8/build/config/merge_for_jumbo.py b/deps/v8/build/config/merge_for_jumbo.py
new file mode 100755
index 0000000000..6d037a80eb
--- /dev/null
+++ b/deps/v8/build/config/merge_for_jumbo.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python
+#
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""This script creates a "jumbo" file which merges all incoming files
+for compiling.
+
+"""
+
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import hashlib
+import io
+import os
+
+def cut_ranges(boundaries):
+  # Given an increasing sequence of boundary indices, generate a sequence of
+  # non-overlapping ranges. The total range is inclusive of the first index
+  # and exclusive of the last index from the given sequence.
+  for start, stop in zip(boundaries, boundaries[1:]):
+    yield range(start, stop)
+
+
+def generate_chunk_stops(inputs, output_count, smart_merge=True):
+  # Note: In the comments below, unique numeric labels are assigned to files.
+  #       Consider them as the sorted rank of the hash of each file path.
+  # Simple jumbo chunking generates uniformly sized chunks with the ceiling of:
+  # (output_index + 1) * input_count / output_count
+  input_count = len(inputs)
+  stops = [((i + 1) * input_count + output_count - 1) // output_count
+           for i in range(output_count)]
+  # This is disruptive at times because file insertions and removals can
+  # invalidate many chunks as all files are offset by one.
+  # For example, say we have 12 files in 4 uniformly sized chunks:
+  # 9, 4, 0; 7,  1, 11;  5, 10, 2; 6, 3, 8
+  # If we delete the first file we get:
+  # 4, 0, 7; 1, 11,  5; 10,  2, 6; 3, 8
+  # All of the chunks have new sets of inputs.
+
+  # With path-aware chunking, we start with the uniformly sized chunks:
+  # 9, 4, 0; 7,  1, 11;  5, 10, 2; 6, 3, 8
+  # First we find the smallest rank in each of the chunks. Their indices are
+  # stored in the |centers| list and in this example the ranks would be:
+  # 0, 1, 2, 3
+  # Then we find the largest rank between the centers. Their indices are stored
+  # in the |stops| list and in this example the ranks would be:
+  # 7, 11, 6
+  # These files mark the boundaries between chunks and these boundary files are
+  # often maintained even as files are added or deleted.
+  # In this example, 7, 11, and 6 are the first files in each chunk:
+  # 9, 4, 0; 7,  1; 11,  5, 10, 2; 6, 3, 8
+  # If we delete the first file and repeat the process we get:
+  # 4, 0; 7, 1; 11,  5, 10,  2; 6, 3, 8
+  # Only the first chunk has a new set of inputs.
+  if smart_merge:
+    # Starting with the simple chunks, every file is assigned a rank.
+    # This requires a hash function that is stable across runs.
+    hasher = lambda n: hashlib.md5(inputs[n].encode()).hexdigest()
+    # In each chunk there is a key file with lowest rank; mark them.
+    # Note that they will not easily change.
+    centers = [min(indices, key=hasher) for indices in cut_ranges([0] + stops)]
+    # Between each pair of key files there is a file with highest rank.
+    # Mark these to be used as border files. They also will not easily change.
+    # Forget the inital chunks and create new chunks by splitting the list at
+    # every border file.
+    stops = [max(indices, key=hasher) for indices in cut_ranges(centers)]
+    stops.append(input_count)
+  return stops
+
+
+def write_jumbo_files(inputs, outputs, written_input_set, written_output_set):
+  chunk_stops = generate_chunk_stops(inputs, len(outputs))
+
+  written_inputs = 0
+  for output_index, output_file in enumerate(outputs):
+    written_output_set.add(output_file)
+    if os.path.isfile(output_file):
+      with open(output_file, "r") as current:
+        current_jumbo_file = current.read()
+    else:
+      current_jumbo_file = None
+
+    out = io.StringIO()
+    out.write("/* This is a Jumbo file. Don't edit. */\n\n")
+    out.write("/* Generated with merge_for_jumbo.py. */\n\n")
+    input_limit = chunk_stops[output_index]
+    while written_inputs < input_limit:
+      filename = inputs[written_inputs]
+      written_inputs += 1
+      out.write("#include \"%s\"\n" % filename)
+      written_input_set.add(filename)
+    new_jumbo_file = out.getvalue()
+    out.close()
+
+    if new_jumbo_file != current_jumbo_file:
+      with open(output_file, "w") as out:
+        out.write(new_jumbo_file)
+
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument("--outputs", nargs="+", required=True,
+                      help='List of output files to split input into')
+  parser.add_argument("--file-list", required=True)
+  parser.add_argument("--verbose", action="store_true")
+  args = parser.parse_args()
+
+  lines = []
+  # If written with gn |write_file| each file is on its own line.
+  with open(args.file_list) as file_list_file:
+    lines = [line.strip() for line in file_list_file if line.strip()]
+  # If written with gn |response_file_contents| the files are space separated.
+  all_inputs = []
+  for line in lines:
+    all_inputs.extend(line.split())
+
+  written_output_set = set()  # Just for double checking
+  written_input_set = set()  # Just for double checking
+  for language_ext in (".cc", ".c", ".mm",):
+    if language_ext == ".cc":
+      ext_pattern = (".cc", ".cpp")
+    else:
+      ext_pattern = tuple([language_ext])
+
+    outputs = [x for x in args.outputs if x.endswith(ext_pattern)]
+    inputs = [x for x in all_inputs if x.endswith(ext_pattern)]
+
+    if not outputs:
+      assert not inputs
+      continue
+
+    write_jumbo_files(inputs, outputs, written_input_set, written_output_set)
+
+  assert set(args.outputs) == written_output_set, "Did not fill all outputs"
+  assert set(all_inputs) == written_input_set, "Did not use all inputs"
+  if args.verbose:
+    print("Generated %s (%d files) based on %s" % (
+      str(args.outputs), len(written_input_set), args.file_list))
+
+if __name__ == "__main__":
+  main()