summaryrefslogtreecommitdiff
path: root/deps/v8/build/toolchain/win/ml.py
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/build/toolchain/win/ml.py')
-rwxr-xr-xdeps/v8/build/toolchain/win/ml.py287
1 files changed, 287 insertions, 0 deletions
diff --git a/deps/v8/build/toolchain/win/ml.py b/deps/v8/build/toolchain/win/ml.py
new file mode 100755
index 0000000000..877c584c57
--- /dev/null
+++ b/deps/v8/build/toolchain/win/ml.py
@@ -0,0 +1,287 @@
+#!/usr/bin/env python
+# Copyright 2018 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Wraps ml.exe or ml64.exe and postprocesses the output to be deterministic.
+Sets timestamp in .obj file to 0, hence incompatible with link.exe /incremental.
+
+Use by prefixing the ml(64).exe invocation with this script:
+ python ml.py ml.exe [args...]"""
+
+import array
+import collections
+import struct
+import subprocess
+import sys
+
+
+class Struct(object):
+ """A thin wrapper around the struct module that returns a namedtuple"""
+ def __init__(self, name, *args):
+ """Pass the name of the return type, and then an interleaved list of
+ format strings as used by the struct module and of field names."""
+ self.fmt = '<' + ''.join(args[0::2])
+ self.type = collections.namedtuple(name, args[1::2])
+
+ def pack_into(self, buffer, offset, data):
+ return struct.pack_into(self.fmt, buffer, offset, *data)
+
+ def unpack_from(self, buffer, offset=0):
+ return self.type(*struct.unpack_from(self.fmt, buffer, offset))
+
+ def size(self):
+ return struct.calcsize(self.fmt)
+
+
+def Subtract(nt, **kwargs):
+ """Subtract(nt, f=2) returns a new namedtuple with 2 subtracted from nt.f"""
+ return nt._replace(**{k: getattr(nt, k) - v for k, v in kwargs.iteritems()})
+
+
+def MakeDeterministic(objdata):
+ # Takes data produced by ml(64).exe (without any special flags) and
+ # 1. Sets the timestamp to 0
+ # 2. Strips the .debug$S section (which contains an unwanted absolute path)
+
+ # This makes several assumptions about ml's output:
+ # - Section data is in the same order as the corresponding section headers:
+ # section headers preceeding the .debug$S section header have their data
+ # preceeding the .debug$S section data; likewise for section headers
+ # following the .debug$S section.
+ # - The .debug$S section contains only the absolute path to the obj file and
+ # nothing else, in particular there's only a single entry in the symbol
+ # table refering to the .debug$S section.
+ # - There are no COFF line number entries.
+ # - There's no IMAGE_SYM_CLASS_CLR_TOKEN symbol.
+ # These seem to hold in practice; if they stop holding this script needs to
+ # become smarter.
+
+ objdata = array.array('c', objdata) # Writable, e.g. via struct.pack_into.
+
+ # Read coff header.
+ COFFHEADER = Struct('COFFHEADER',
+ 'H', 'Machine',
+ 'H', 'NumberOfSections',
+ 'I', 'TimeDateStamp',
+ 'I', 'PointerToSymbolTable',
+ 'I', 'NumberOfSymbols',
+
+ 'H', 'SizeOfOptionalHeader',
+ 'H', 'Characteristics')
+ coff_header = COFFHEADER.unpack_from(objdata)
+ assert coff_header.SizeOfOptionalHeader == 0 # Only set for binaries.
+
+ # Read section headers following coff header.
+ SECTIONHEADER = Struct('SECTIONHEADER',
+ '8s', 'Name',
+ 'I', 'VirtualSize',
+ 'I', 'VirtualAddress',
+
+ 'I', 'SizeOfRawData',
+ 'I', 'PointerToRawData',
+ 'I', 'PointerToRelocations',
+ 'I', 'PointerToLineNumbers',
+
+ 'H', 'NumberOfRelocations',
+ 'H', 'NumberOfLineNumbers',
+ 'I', 'Characteristics')
+ section_headers = []
+ debug_section_index = -1
+ for i in range(0, coff_header.NumberOfSections):
+ section_header = SECTIONHEADER.unpack_from(
+ objdata, offset=COFFHEADER.size() + i * SECTIONHEADER.size())
+ assert not section_header[0].startswith('/') # Support short names only.
+ section_headers.append(section_header)
+
+ if section_header.Name == '.debug$S':
+ assert debug_section_index == -1
+ debug_section_index = i
+ assert debug_section_index != -1
+
+ data_start = COFFHEADER.size() + len(section_headers) * SECTIONHEADER.size()
+
+ # Verify the .debug$S section looks like we expect.
+ assert section_headers[debug_section_index].Name == '.debug$S'
+ assert section_headers[debug_section_index].VirtualSize == 0
+ assert section_headers[debug_section_index].VirtualAddress == 0
+ debug_size = section_headers[debug_section_index].SizeOfRawData
+ debug_offset = section_headers[debug_section_index].PointerToRawData
+ assert section_headers[debug_section_index].PointerToRelocations == 0
+ assert section_headers[debug_section_index].PointerToLineNumbers == 0
+ assert section_headers[debug_section_index].NumberOfRelocations == 0
+ assert section_headers[debug_section_index].NumberOfLineNumbers == 0
+
+ # Make sure sections in front of .debug$S have their data preceeding it.
+ for header in section_headers[:debug_section_index]:
+ assert header.PointerToRawData < debug_offset
+ assert header.PointerToRelocations < debug_offset
+ assert header.PointerToLineNumbers < debug_offset
+
+ # Make sure sections after of .debug$S have their data following it.
+ for header in section_headers[debug_section_index + 1:]:
+ # Make sure the .debug$S data is at the very end of section data:
+ assert header.PointerToRawData > debug_offset
+ assert header.PointerToRelocations == 0
+ assert header.PointerToLineNumbers == 0
+
+ # Make sure the first non-empty section's data starts right after the section
+ # headers.
+ for section_header in section_headers:
+ if section_header.PointerToRawData == 0:
+ assert section_header.PointerToRelocations == 0
+ assert section_header.PointerToLineNumbers == 0
+ continue
+ assert section_header.PointerToRawData == data_start
+ break
+
+ # Make sure the symbol table (and hence, string table) appear after the last
+ # section:
+ assert (coff_header.PointerToSymbolTable >=
+ section_headers[-1].PointerToRawData + section_headers[-1].SizeOfRawData)
+
+ # The symbol table contains a symbol for the no-longer-present .debug$S
+ # section. If we leave it there, lld-link will complain:
+ #
+ # lld-link: error: .debug$S should not refer to non-existent section 5
+ #
+ # so we need to remove that symbol table entry as well. This shifts symbol
+ # entries around and we need to update symbol table indices in:
+ # - relocations
+ # - line number records (never present)
+ # - one aux symbol entries (never present in ml output)
+ SYM = Struct('SYM',
+ '8s', 'Name',
+ 'I', 'Value',
+ 'h', 'SectionNumber', # Note: Signed!
+ 'H', 'Type',
+
+ 'B', 'StorageClass',
+ 'B', 'NumberOfAuxSymbols')
+ i = 0
+ debug_sym = -1
+ while i < coff_header.NumberOfSymbols:
+ sym_offset = coff_header.PointerToSymbolTable + i * SYM.size()
+ sym = SYM.unpack_from(objdata, sym_offset)
+
+ # 107 is IMAGE_SYM_CLASS_CLR_TOKEN, which has aux entry "CLR Token
+ # Definition", which contains a symbol index. Check it's never present.
+ assert sym.StorageClass != 107
+
+ # Note: sym.SectionNumber is 1-based, debug_section_index is 0-based.
+ if sym.SectionNumber - 1 == debug_section_index:
+ assert debug_sym == -1, 'more than one .debug$S symbol found'
+ debug_sym = i
+ # Make sure the .debug$S symbol looks like we expect.
+ # In particular, it should have exactly one aux symbol.
+ assert sym.Name == '.debug$S'
+ assert sym.Value == 0
+ assert sym.Type == 0
+ assert sym.StorageClass == 3
+ assert sym.NumberOfAuxSymbols == 1
+ elif sym.SectionNumber > debug_section_index:
+ sym = Subtract(sym, SectionNumber=1)
+ SYM.pack_into(objdata, sym_offset, sym)
+ i += 1 + sym.NumberOfAuxSymbols
+ assert debug_sym != -1, '.debug$S symbol not found'
+
+ # Note: Usually the .debug$S section is the last, but for files saying
+ # `includelib foo.lib`, like safe_terminate_process.asm in 32-bit builds,
+ # this isn't true: .drectve is after .debug$S.
+
+ # Update symbol table indices in relocations.
+ # There are a few processor types that have one or two relocation types
+ # where SymbolTableIndex has a different meaning, but not for x86.
+ REL = Struct('REL',
+ 'I', 'VirtualAddress',
+ 'I', 'SymbolTableIndex',
+ 'H', 'Type')
+ for header in section_headers[0:debug_section_index]:
+ for j in range(0, header.NumberOfRelocations):
+ rel_offset = header.PointerToRelocations + j * REL.size()
+ rel = REL.unpack_from(objdata, rel_offset)
+ assert rel.SymbolTableIndex != debug_sym
+ if rel.SymbolTableIndex > debug_sym:
+ rel = Subtract(rel, SymbolTableIndex=2)
+ REL.pack_into(objdata, rel_offset, rel)
+
+ # Update symbol table indices in line numbers -- just check they don't exist.
+ for header in section_headers:
+ assert header.NumberOfLineNumbers == 0
+
+ # Now that all indices are updated, remove the symbol table entry refering to
+ # .debug$S and its aux entry.
+ del objdata[coff_header.PointerToSymbolTable + debug_sym * SYM.size():
+ coff_header.PointerToSymbolTable + (debug_sym + 2) * SYM.size()]
+
+ # Now we know that it's safe to write out the input data, with just the
+ # timestamp overwritten to 0, the last section header cut out (and the
+ # offsets of all other section headers decremented by the size of that
+ # one section header), and the last section's data cut out. The symbol
+ # table offset needs to be reduced by one section header and the size of
+ # the missing section.
+ # (The COFF spec only requires on-disk sections to be aligned in image files,
+ # for obj files it's not required. If that wasn't the case, deleting slices
+ # if data would not generally be safe.)
+
+ # Update section offsets and remove .debug$S section data.
+ for i in range(0, debug_section_index):
+ header = section_headers[i]
+ if header.SizeOfRawData:
+ header = Subtract(header, PointerToRawData=SECTIONHEADER.size())
+ if header.NumberOfRelocations:
+ header = Subtract(header, PointerToRelocations=SECTIONHEADER.size())
+ if header.NumberOfLineNumbers:
+ header = Subtract(header, PointerToLineNumbers=SECTIONHEADER.size())
+ SECTIONHEADER.pack_into(
+ objdata, COFFHEADER.size() + i * SECTIONHEADER.size(), header)
+ for i in range(debug_section_index + 1, len(section_headers)):
+ header = section_headers[i]
+ shift = SECTIONHEADER.size() + debug_size
+ if header.SizeOfRawData:
+ header = Subtract(header, PointerToRawData=shift)
+ if header.NumberOfRelocations:
+ header = Subtract(header, PointerToRelocations=shift)
+ if header.NumberOfLineNumbers:
+ header = Subtract(header, PointerToLineNumbers=shift)
+ SECTIONHEADER.pack_into(
+ objdata, COFFHEADER.size() + i * SECTIONHEADER.size(), header)
+
+ del objdata[debug_offset:debug_offset + debug_size]
+
+ # Finally, remove .debug$S section header and update coff header.
+ coff_header = coff_header._replace(TimeDateStamp=0)
+ coff_header = Subtract(coff_header,
+ NumberOfSections=1,
+ PointerToSymbolTable=SECTIONHEADER.size() + debug_size,
+ NumberOfSymbols=2)
+ COFFHEADER.pack_into(objdata, 0, coff_header)
+
+ del objdata[
+ COFFHEADER.size() + debug_section_index * SECTIONHEADER.size():
+ COFFHEADER.size() + (debug_section_index + 1) * SECTIONHEADER.size()]
+
+ # All done!
+ return objdata.tostring()
+
+
+def main():
+ ml_result = subprocess.call(sys.argv[1:])
+ if ml_result != 0:
+ return ml_result
+
+ objfile = None
+ for i in range(1, len(sys.argv)):
+ if sys.argv[i].startswith('/Fo'):
+ objfile = sys.argv[i][len('/Fo'):]
+ assert objfile, 'failed to find ml output'
+
+ with open(objfile, 'rb') as f:
+ objdata = f.read()
+ objdata = MakeDeterministic(objdata)
+ with open(objfile, 'wb') as f:
+ f.write(objdata)
+
+
+if __name__ == '__main__':
+ sys.exit(main())