1 files changed, 288 insertions, 0 deletions
diff --git a/deps/v8/build/android/gyp/extract_unwind_tables.py b/deps/v8/build/android/gyp/extract_unwind_tables.py
new file mode 100755
index 0000000000..37a8421449
--- /dev/null
+++ b/deps/v8/build/android/gyp/extract_unwind_tables.py
@@ -0,0 +1,288 @@
+#!/usr/bin/env python
+# Copyright 2018 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Extracts the unwind tables in from breakpad symbol files
+
+Runs dump_syms on the given binary file and extracts the CFI data into the
+given output file.
+The output file is a binary file containing CFI rows ordered based on function
+address. The output file only contains rows that match the most popular rule
+type in CFI table, to reduce the output size and specify data in compact format.
+See doc https://github.com/google/breakpad/blob/master/docs/symbol_files.md.
+1. The CFA rules should be of postfix form "SP <val> +".
+2. The RA rules should be of postfix form "CFA <val> + ^".
+Note: breakpad represents dereferencing address with '^' operator.
+
+The output file has 2 tables UNW_INDEX and UNW_DATA, inspired from ARM EHABI
+format. The first table contains function addresses and an index into the
+UNW_DATA table. The second table contains one or more rows for the function
+unwind information.
+
+The output file starts with 4 bytes counting the size of UNW_INDEX in bytes.
+Then UNW_INDEX table and UNW_DATA table.
+
+UNW_INDEX contains two columns of N rows each, where N is the number of
+functions.
+  1. First column 4 byte rows of all the function start address as offset from
+     start of the binary, in sorted order.
+  2. For each function addr, the second column contains 2 byte indices in order.
+     The indices are offsets (in count of 2 bytes) of the CFI data from start of
+     UNW_DATA.
+The last entry in the table always contains CANT_UNWIND index to specify the
+end address of the last function.
+
+UNW_DATA contains data of all the functions. Each function data contains N rows.
+The data found at the address pointed from UNW_INDEX will be:
+  2 bytes: N - number of rows that belong to current function.
+  N * 4 bytes: N rows of data. 16 bits : Address offset from function start.
+                               14 bits : CFA offset / 4.
+                                2 bits : RA offset / 4.
+
+The function is not added to the unwind table in following conditions:
+C1. If length of the function code (number of instructions) is greater than
+    0xFFFF (2 byte address span). This is because we use 16 bits to refer to
+    offset of instruction from start of the address.
+C2. If the function moves the SP by more than 0xFFFF bytes. This is because we
+    use 14 bits to denote CFA offset (last 2 bits are 0).
+C3. If the Return Address is stored at an offset >= 16 from the CFA. Some
+    functions which have variable arguments can have offset upto 16.
+    TODO(ssid): We can actually store offset 16 by subtracting 1 from RA/4 since
+    we never have 0.
+C4: Some functions do not have unwind information defined in dwarf info. These
+    functions have index value CANT_UNWIND(0xFFFF) in UNW_INDEX table.
+
+
+Usage:
+  extract_unwind_tables.py --input_path [root path to unstripped chrome.so]
+      --output_path [output path] --dump_syms_path [path to dump_syms binary]
+"""
+
+import argparse
+import re
+import struct
+import subprocess
+import sys
+import tempfile
+
+
+_CFA_REG = '.cfa'
+_RA_REG = '.ra'
+
+_ADDR_ENTRY = 0
+_LENGTH_ENTRY = 1
+
+_CANT_UNWIND = 0xFFFF
+
+
+def _Write4Bytes(output_file, val):
+  """Writes a 32 bit unsigned integer to the given output file."""
+  output_file.write(struct.pack('<L', val));
+
+
+def _Write2Bytes(output_file, val):
+  """Writes a 16 bit unsigned integer to the given output file."""
+  output_file.write(struct.pack('<H', val));
+
+
+def _FindRuleForRegister(cfi_row, reg):
+  """Returns the postfix expression as string for a given register.
+
+  Breakpad CFI row format specifies rules for unwinding each register in postfix
+  expression form separated by space. Each rule starts with register name and a
+  colon. Eg: "CFI R1: <rule> R2: <rule>".
+  """
+  out = []
+  found_register = False
+  for part in cfi_row:
+    if found_register:
+      if part[-1] == ':':
+        break
+      out.append(part)
+    elif part == reg + ':':
+      found_register = True
+  return ' '.join(out)
+
+
+def _GetCfaAndRaOffset(cfi_row):
+  """Returns a tuple with 2 numbers (cfa_offset, ra_offset).
+
+  Returns right values if rule matches the predefined criteria. Returns (0, 0)
+  otherwise. The criteria for CFA rule is postfix form "SP <val> +" and RA rule
+  is postfix form "CFA -<val> + ^".
+  """
+  cfa_offset = 0
+  ra_offset = 0
+  cfa_rule = _FindRuleForRegister(cfi_row, _CFA_REG)
+  ra_rule = _FindRuleForRegister(cfi_row, _RA_REG)
+  if cfa_rule and re.match(r'sp [0-9]+ \+', cfa_rule):
+    cfa_offset = int(cfa_rule.split()[1], 10)
+  if ra_rule:
+    if not re.match(r'.cfa -[0-9]+ \+ \^', ra_rule):
+      return (0, 0)
+    ra_offset = -1 * int(ra_rule.split()[1], 10)
+  return (cfa_offset, ra_offset)
+
+
+def _GetAllCfiRows(symbol_file):
+  """Returns parsed CFI data from given symbol_file.
+
+  Each entry in the cfi data dictionary returned is a map from function start
+  address to array of function rows, starting with FUNCTION type, followed by
+  one or more CFI rows.
+  """
+  cfi_data = {}
+  current_func = []
+  for line in symbol_file:
+    if 'STACK CFI' not in line:
+      continue
+
+    parts = line.split()
+    data = {}
+    if parts[2] == 'INIT':
+      # Add the previous function to the output
+      if len(current_func) > 1:
+        cfi_data[current_func[0][_ADDR_ENTRY]] = current_func
+      current_func = []
+
+      # The function line is of format "STACK CFI INIT <addr> <length> ..."
+      data[_ADDR_ENTRY] = int(parts[3], 16)
+      data[_LENGTH_ENTRY] = int(parts[4], 16)
+
+      # Condition C1: Skip if length is large.
+      if data[_LENGTH_ENTRY] == 0 or data[_LENGTH_ENTRY] > 0xffff:
+        continue  # Skip the current function.
+    else:
+      # The current function is skipped.
+      if len(current_func) == 0:
+        continue
+
+      # The CFI row is of format "STACK CFI <addr> .cfa: <expr> .ra: <expr> ..."
+      data[_ADDR_ENTRY] = int(parts[2], 16)
+      (data[_CFA_REG], data[_RA_REG]) = _GetCfaAndRaOffset(parts)
+
+      # Condition C2 and C3: Skip based on limits on offsets.
+      if data[_CFA_REG] == 0 or data[_RA_REG] >= 16 or data[_CFA_REG] > 0xffff:
+        current_func = []
+        continue
+      assert data[_CFA_REG] % 4 == 0
+      # Since we skipped functions with code size larger than 0xffff, we should
+      # have no function offset larger than the same value.
+      assert data[_ADDR_ENTRY] - current_func[0][_ADDR_ENTRY] < 0xffff
+
+    if data[_ADDR_ENTRY] == 0:
+      # Skip current function, delete all previous entries.
+      current_func = []
+      continue
+    assert data[_ADDR_ENTRY] % 2 == 0
+    current_func.append(data)
+
+  # Condition C4: Skip function without CFI rows.
+  if len(current_func) > 1:
+    cfi_data[current_func[0][_ADDR_ENTRY]] = current_func
+  return cfi_data
+
+
+def _WriteCfiData(cfi_data, out_file):
+  """Writes the CFI data in defined format to out_file."""
+  # Stores the final data that will be written to UNW_DATA table, in order
+  # with 2 byte items.
+  unw_data = []
+
+  # Represent all the CFI data of functions as set of numbers and map them to an
+  # index in the |unw_data|. This index is later written to the UNW_INDEX table
+  # for each function. This map is used to find index of the data for functions.
+  data_to_index = {}
+  # Store mapping between the functions to the index.
+  func_addr_to_index = {}
+  previous_func_end = 0
+  for addr, function in sorted(cfi_data.iteritems()):
+    # Add an empty function entry when functions CFIs are missing between 2
+    # functions.
+    if previous_func_end != 0 and addr - previous_func_end  > 4:
+      func_addr_to_index[previous_func_end + 2] = _CANT_UNWIND
+    previous_func_end = addr + cfi_data[addr][0][_LENGTH_ENTRY]
+
+    assert len(function) > 1
+    func_data_arr = []
+    func_data = 0
+    # The first row contains the function address and length. The rest of the
+    # rows have CFI data. Create function data array as given in the format.
+    for row in function[1:]:
+      addr_offset = row[_ADDR_ENTRY] - addr
+      cfa_offset = (row[_CFA_REG]) | (row[_RA_REG] / 4)
+
+      func_data_arr.append(addr_offset)
+      func_data_arr.append(cfa_offset)
+
+    # Consider all the rows in the data as one large integer and add it as a key
+    # to the |data_to_index|.
+    for data in func_data_arr:
+      func_data = (func_data << 16) | data
+
+    row_count = len(func_data_arr) / 2
+    if func_data not in data_to_index:
+      # When data is not found, create a new index = len(unw_data), and write
+      # the data to |unw_data|.
+      index = len(unw_data)
+      data_to_index[func_data] = index
+      unw_data.append(row_count)
+      for row in func_data_arr:
+        unw_data.append(row)
+    else:
+      # If the data was found, then use the same index for the function.
+      index = data_to_index[func_data]
+      assert row_count == unw_data[index]
+    func_addr_to_index[addr] = data_to_index[func_data]
+
+  # Mark the end end of last function entry.
+  func_addr_to_index[previous_func_end + 2] = _CANT_UNWIND
+
+  # Write the size of UNW_INDEX file in bytes.
+  _Write4Bytes(out_file, len(func_addr_to_index) * 6)
+
+  # Write the UNW_INDEX table. First list of addresses and then indices.
+  sorted_unw_index = sorted(func_addr_to_index.iteritems())
+  for addr, index in sorted_unw_index:
+    _Write4Bytes(out_file, addr)
+  for addr, index in sorted_unw_index:
+    _Write2Bytes(out_file, index)
+
+  # Write the UNW_DATA table.
+  for data in unw_data:
+    _Write2Bytes(out_file, data)
+
+
+def _ParseCfiData(sym_file, output_path):
+  with open(sym_file, 'r') as f:
+    cfi_data =  _GetAllCfiRows(f)
+
+  with open(output_path, 'wb') as out_file:
+    _WriteCfiData(cfi_data, out_file)
+
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '--input_path', required=True,
+      help='The input path of the unstripped binary')
+  parser.add_argument(
+      '--output_path', required=True,
+      help='The path of the output file')
+  parser.add_argument(
+      '--dump_syms_path', required=True,
+      help='The path of the dump_syms binary')
+
+  args = parser.parse_args()
+
+  with tempfile.NamedTemporaryFile() as sym_file:
+    out = subprocess.call(
+        ['./' +args.dump_syms_path, args.input_path], stdout=sym_file)
+    assert not out
+    sym_file.flush()
+    _ParseCfiData(sym_file.name, args.output_path)
+  return 0
+
+if __name__ == '__main__':
+  sys.exit(main())