diff options
Diffstat (limited to 'deps/v8/build/config/merge_for_jumbo.py')
-rwxr-xr-x | deps/v8/build/config/merge_for_jumbo.py | 145 |
1 files changed, 145 insertions, 0 deletions
diff --git a/deps/v8/build/config/merge_for_jumbo.py b/deps/v8/build/config/merge_for_jumbo.py new file mode 100755 index 0000000000..6d037a80eb --- /dev/null +++ b/deps/v8/build/config/merge_for_jumbo.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python +# +# Copyright 2016 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""This script creates a "jumbo" file which merges all incoming files +for compiling. + +""" + +from __future__ import print_function +from __future__ import unicode_literals + +import argparse +import hashlib +import io +import os + +def cut_ranges(boundaries): + # Given an increasing sequence of boundary indices, generate a sequence of + # non-overlapping ranges. The total range is inclusive of the first index + # and exclusive of the last index from the given sequence. + for start, stop in zip(boundaries, boundaries[1:]): + yield range(start, stop) + + +def generate_chunk_stops(inputs, output_count, smart_merge=True): + # Note: In the comments below, unique numeric labels are assigned to files. + # Consider them as the sorted rank of the hash of each file path. + # Simple jumbo chunking generates uniformly sized chunks with the ceiling of: + # (output_index + 1) * input_count / output_count + input_count = len(inputs) + stops = [((i + 1) * input_count + output_count - 1) // output_count + for i in range(output_count)] + # This is disruptive at times because file insertions and removals can + # invalidate many chunks as all files are offset by one. + # For example, say we have 12 files in 4 uniformly sized chunks: + # 9, 4, 0; 7, 1, 11; 5, 10, 2; 6, 3, 8 + # If we delete the first file we get: + # 4, 0, 7; 1, 11, 5; 10, 2, 6; 3, 8 + # All of the chunks have new sets of inputs. + + # With path-aware chunking, we start with the uniformly sized chunks: + # 9, 4, 0; 7, 1, 11; 5, 10, 2; 6, 3, 8 + # First we find the smallest rank in each of the chunks. Their indices are + # stored in the |centers| list and in this example the ranks would be: + # 0, 1, 2, 3 + # Then we find the largest rank between the centers. Their indices are stored + # in the |stops| list and in this example the ranks would be: + # 7, 11, 6 + # These files mark the boundaries between chunks and these boundary files are + # often maintained even as files are added or deleted. + # In this example, 7, 11, and 6 are the first files in each chunk: + # 9, 4, 0; 7, 1; 11, 5, 10, 2; 6, 3, 8 + # If we delete the first file and repeat the process we get: + # 4, 0; 7, 1; 11, 5, 10, 2; 6, 3, 8 + # Only the first chunk has a new set of inputs. + if smart_merge: + # Starting with the simple chunks, every file is assigned a rank. + # This requires a hash function that is stable across runs. + hasher = lambda n: hashlib.md5(inputs[n].encode()).hexdigest() + # In each chunk there is a key file with lowest rank; mark them. + # Note that they will not easily change. + centers = [min(indices, key=hasher) for indices in cut_ranges([0] + stops)] + # Between each pair of key files there is a file with highest rank. + # Mark these to be used as border files. They also will not easily change. + # Forget the inital chunks and create new chunks by splitting the list at + # every border file. + stops = [max(indices, key=hasher) for indices in cut_ranges(centers)] + stops.append(input_count) + return stops + + +def write_jumbo_files(inputs, outputs, written_input_set, written_output_set): + chunk_stops = generate_chunk_stops(inputs, len(outputs)) + + written_inputs = 0 + for output_index, output_file in enumerate(outputs): + written_output_set.add(output_file) + if os.path.isfile(output_file): + with open(output_file, "r") as current: + current_jumbo_file = current.read() + else: + current_jumbo_file = None + + out = io.StringIO() + out.write("/* This is a Jumbo file. Don't edit. */\n\n") + out.write("/* Generated with merge_for_jumbo.py. */\n\n") + input_limit = chunk_stops[output_index] + while written_inputs < input_limit: + filename = inputs[written_inputs] + written_inputs += 1 + out.write("#include \"%s\"\n" % filename) + written_input_set.add(filename) + new_jumbo_file = out.getvalue() + out.close() + + if new_jumbo_file != current_jumbo_file: + with open(output_file, "w") as out: + out.write(new_jumbo_file) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--outputs", nargs="+", required=True, + help='List of output files to split input into') + parser.add_argument("--file-list", required=True) + parser.add_argument("--verbose", action="store_true") + args = parser.parse_args() + + lines = [] + # If written with gn |write_file| each file is on its own line. + with open(args.file_list) as file_list_file: + lines = [line.strip() for line in file_list_file if line.strip()] + # If written with gn |response_file_contents| the files are space separated. + all_inputs = [] + for line in lines: + all_inputs.extend(line.split()) + + written_output_set = set() # Just for double checking + written_input_set = set() # Just for double checking + for language_ext in (".cc", ".c", ".mm",): + if language_ext == ".cc": + ext_pattern = (".cc", ".cpp") + else: + ext_pattern = tuple([language_ext]) + + outputs = [x for x in args.outputs if x.endswith(ext_pattern)] + inputs = [x for x in all_inputs if x.endswith(ext_pattern)] + + if not outputs: + assert not inputs + continue + + write_jumbo_files(inputs, outputs, written_input_set, written_output_set) + + assert set(args.outputs) == written_output_set, "Did not fill all outputs" + assert set(all_inputs) == written_input_set, "Did not use all inputs" + if args.verbose: + print("Generated %s (%d files) based on %s" % ( + str(args.outputs), len(written_input_set), args.file_list)) + +if __name__ == "__main__": + main() |