quickjs-tart

quickjs-based runtime for wallet-core logic
Log | Files | Refs | README | LICENSE

c_parsing_helper.py (6374B)


      1 """Helper functions to parse C code in heavily constrained scenarios.
      2 
      3 Currently supported functionality:
      4 
      5 * read_function_declarations: read function declarations from a header file.
      6 """
      7 
      8 # Copyright The Mbed TLS Contributors
      9 # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
     10 
     11 ### WARNING: the code in this file has not been extensively reviewed yet.
     12 ### We do not think it is harmful, but it may be below our normal standards
     13 ### for robustness and maintainability.
     14 
     15 import re
     16 from typing import Dict, Iterable, Iterator, List, Optional, Tuple
     17 
     18 
     19 class ArgumentInfo:
     20     """Information about an argument to an API function."""
     21     #pylint: disable=too-few-public-methods
     22 
     23     _KEYWORDS = [
     24         'const', 'register', 'restrict',
     25         'int', 'long', 'short', 'signed', 'unsigned',
     26     ]
     27     _DECLARATION_RE = re.compile(
     28         r'(?P<type>\w[\w\s*]*?)\s*' +
     29         r'(?!(?:' + r'|'.join(_KEYWORDS) + r'))(?P<name>\b\w+\b)?' +
     30         r'\s*(?P<suffix>\[[^][]*\])?\Z',
     31         re.A | re.S)
     32 
     33     @classmethod
     34     def normalize_type(cls, typ: str) -> str:
     35         """Normalize whitespace in a type."""
     36         typ = re.sub(r'\s+', r' ', typ)
     37         typ = re.sub(r'\s*\*', r' *', typ)
     38         return typ
     39 
     40     def __init__(self, decl: str) -> None:
     41         self.decl = decl.strip()
     42         m = self._DECLARATION_RE.match(self.decl)
     43         if not m:
     44             raise ValueError(self.decl)
     45         self.type = self.normalize_type(m.group('type')) #type: str
     46         self.name = m.group('name') #type: Optional[str]
     47         self.suffix = m.group('suffix') if m.group('suffix') else '' #type: str
     48 
     49     def __str__(self) -> str:
     50         return self.decl
     51 
     52 class FunctionInfo:
     53     """Information about an API function."""
     54     #pylint: disable=too-few-public-methods
     55 
     56     # Regex matching the declaration of a function that returns void.
     57     VOID_RE = re.compile(r'\s*\bvoid\s*\Z', re.A)
     58 
     59     def __init__(self, #pylint: disable=too-many-arguments
     60                  filename: str,
     61                  line_number: int,
     62                  qualifiers: Iterable[str],
     63                  return_type: str,
     64                  name: str,
     65                  arguments: List[str],
     66                  doc: str = "") -> None:
     67 
     68         self.filename = filename
     69         self.line_number = line_number
     70         self.qualifiers = frozenset(qualifiers)
     71         self.return_type = return_type
     72         self.name = name
     73         self.arguments = [ArgumentInfo(arg) for arg in arguments]
     74         self.doc = doc
     75 
     76     def returns_void(self) -> bool:
     77         """Whether the function returns void."""
     78         return bool(self.VOID_RE.search(self.return_type))
     79 
     80     def __str__(self) -> str:
     81         str_args = [str(a) for a in self.arguments]
     82         str_text = "{} {} {}({})".format(" ".join(self.qualifiers),
     83                                          self.return_type, self.name,
     84                                          ", ".join(str_args)).strip()
     85         str_text = self._c_wrap_(str_text)
     86         return self.doc + "\n" + str_text
     87 
     88     @staticmethod
     89     def _c_wrap_(in_str: str, line_len: int = 80) -> str:
     90         """Auto-idents function declaration args using opening parenthesis."""
     91         if len(in_str) >= line_len:
     92             p_idx = in_str.index("(")
     93             ident = " "  * p_idx
     94             padded_comma = ",\n" + ident
     95             in_str = in_str.replace(",", padded_comma)
     96         return in_str
     97 
     98 # Match one C comment.
     99 # Note that we match both comment types, so things like // in a /*...*/
    100 # comment are handled correctly.
    101 _C_COMMENT_RE = re.compile(r'//(?:[^\n]|\\\n)*|/\*.*?\*/', re.S)
    102 _NOT_NEWLINES_RE = re.compile(r'[^\n]+')
    103 
    104 def read_logical_lines(filename: str) -> Iterator[Tuple[int, str]]:
    105     """Read logical lines from a file.
    106 
    107     Logical lines are one or more physical line, with balanced parentheses.
    108     """
    109     with open(filename, encoding='utf-8') as inp:
    110         content = inp.read()
    111     # Strip comments, but keep newlines for line numbering
    112     content = re.sub(_C_COMMENT_RE,
    113                      lambda m: re.sub(_NOT_NEWLINES_RE, "", m.group(0)),
    114                      content)
    115     lines = enumerate(content.splitlines(), 1)
    116     for line_number, line in lines:
    117         # Read a logical line, containing balanced parentheses.
    118         # We assume that parentheses are balanced (this should be ok
    119         # since comments have been stripped), otherwise there will be
    120         # a gigantic logical line at the end.
    121         paren_level = line.count('(') - line.count(')')
    122         while paren_level > 0:
    123             _, more = next(lines) #pylint: disable=stop-iteration-return
    124             paren_level += more.count('(') - more.count(')')
    125             line += '\n' + more
    126         yield line_number, line
    127 
    128 _C_FUNCTION_DECLARATION_RE = re.compile(
    129     r'(?P<qualifiers>(?:(?:extern|inline|static)\b\s*)*)'
    130     r'(?P<return_type>\w[\w\s*]*?)\s*' +
    131     r'\b(?P<name>\w+)' +
    132     r'\s*\((?P<arguments>.*)\)\s*;',
    133     re.A | re.S)
    134 
    135 def read_function_declarations(functions: Dict[str, FunctionInfo],
    136                                filename: str) -> None:
    137 
    138     """Collect function declarations from a C header file."""
    139     for line_number, line in read_logical_lines(filename):
    140         m = _C_FUNCTION_DECLARATION_RE.match(line)
    141         if not m:
    142             continue
    143         qualifiers = m.group('qualifiers').split()
    144         return_type = m.group('return_type')
    145         name = m.group('name')
    146         arguments = m.group('arguments').split(',')
    147         if len(arguments) == 1 and re.match(FunctionInfo.VOID_RE, arguments[0]):
    148             arguments = []
    149         # Note: we replace any existing declaration for the same name.
    150         functions[name] = FunctionInfo(filename, line_number,
    151                                        qualifiers,
    152                                        return_type,
    153                                        name,
    154                                        arguments)
    155 
    156 _C_TYPEDEF_DECLARATION_RE = re.compile(r'typedef (?:struct )?(?P<type>\w+) (?P<name>\w+)')
    157 
    158 def read_typedefs(filename: str) -> Dict[str, str]:
    159     """ Extract type definitions in a {typedef aliased name: original type} dictionary.
    160     Multi-line typedef struct are not captured. """
    161 
    162     type_decl = {}
    163 
    164     for _, line in read_logical_lines(filename):
    165         m = _C_TYPEDEF_DECLARATION_RE.match(line)
    166         if m:
    167             type_decl[m.group("name")] = m.group("type")
    168     return type_decl