c_parsing_helper.py (6374B)
1 """Helper functions to parse C code in heavily constrained scenarios. 2 3 Currently supported functionality: 4 5 * read_function_declarations: read function declarations from a header file. 6 """ 7 8 # Copyright The Mbed TLS Contributors 9 # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 10 11 ### WARNING: the code in this file has not been extensively reviewed yet. 12 ### We do not think it is harmful, but it may be below our normal standards 13 ### for robustness and maintainability. 14 15 import re 16 from typing import Dict, Iterable, Iterator, List, Optional, Tuple 17 18 19 class ArgumentInfo: 20 """Information about an argument to an API function.""" 21 #pylint: disable=too-few-public-methods 22 23 _KEYWORDS = [ 24 'const', 'register', 'restrict', 25 'int', 'long', 'short', 'signed', 'unsigned', 26 ] 27 _DECLARATION_RE = re.compile( 28 r'(?P<type>\w[\w\s*]*?)\s*' + 29 r'(?!(?:' + r'|'.join(_KEYWORDS) + r'))(?P<name>\b\w+\b)?' + 30 r'\s*(?P<suffix>\[[^][]*\])?\Z', 31 re.A | re.S) 32 33 @classmethod 34 def normalize_type(cls, typ: str) -> str: 35 """Normalize whitespace in a type.""" 36 typ = re.sub(r'\s+', r' ', typ) 37 typ = re.sub(r'\s*\*', r' *', typ) 38 return typ 39 40 def __init__(self, decl: str) -> None: 41 self.decl = decl.strip() 42 m = self._DECLARATION_RE.match(self.decl) 43 if not m: 44 raise ValueError(self.decl) 45 self.type = self.normalize_type(m.group('type')) #type: str 46 self.name = m.group('name') #type: Optional[str] 47 self.suffix = m.group('suffix') if m.group('suffix') else '' #type: str 48 49 def __str__(self) -> str: 50 return self.decl 51 52 class FunctionInfo: 53 """Information about an API function.""" 54 #pylint: disable=too-few-public-methods 55 56 # Regex matching the declaration of a function that returns void. 57 VOID_RE = re.compile(r'\s*\bvoid\s*\Z', re.A) 58 59 def __init__(self, #pylint: disable=too-many-arguments 60 filename: str, 61 line_number: int, 62 qualifiers: Iterable[str], 63 return_type: str, 64 name: str, 65 arguments: List[str], 66 doc: str = "") -> None: 67 68 self.filename = filename 69 self.line_number = line_number 70 self.qualifiers = frozenset(qualifiers) 71 self.return_type = return_type 72 self.name = name 73 self.arguments = [ArgumentInfo(arg) for arg in arguments] 74 self.doc = doc 75 76 def returns_void(self) -> bool: 77 """Whether the function returns void.""" 78 return bool(self.VOID_RE.search(self.return_type)) 79 80 def __str__(self) -> str: 81 str_args = [str(a) for a in self.arguments] 82 str_text = "{} {} {}({})".format(" ".join(self.qualifiers), 83 self.return_type, self.name, 84 ", ".join(str_args)).strip() 85 str_text = self._c_wrap_(str_text) 86 return self.doc + "\n" + str_text 87 88 @staticmethod 89 def _c_wrap_(in_str: str, line_len: int = 80) -> str: 90 """Auto-idents function declaration args using opening parenthesis.""" 91 if len(in_str) >= line_len: 92 p_idx = in_str.index("(") 93 ident = " " * p_idx 94 padded_comma = ",\n" + ident 95 in_str = in_str.replace(",", padded_comma) 96 return in_str 97 98 # Match one C comment. 99 # Note that we match both comment types, so things like // in a /*...*/ 100 # comment are handled correctly. 101 _C_COMMENT_RE = re.compile(r'//(?:[^\n]|\\\n)*|/\*.*?\*/', re.S) 102 _NOT_NEWLINES_RE = re.compile(r'[^\n]+') 103 104 def read_logical_lines(filename: str) -> Iterator[Tuple[int, str]]: 105 """Read logical lines from a file. 106 107 Logical lines are one or more physical line, with balanced parentheses. 108 """ 109 with open(filename, encoding='utf-8') as inp: 110 content = inp.read() 111 # Strip comments, but keep newlines for line numbering 112 content = re.sub(_C_COMMENT_RE, 113 lambda m: re.sub(_NOT_NEWLINES_RE, "", m.group(0)), 114 content) 115 lines = enumerate(content.splitlines(), 1) 116 for line_number, line in lines: 117 # Read a logical line, containing balanced parentheses. 118 # We assume that parentheses are balanced (this should be ok 119 # since comments have been stripped), otherwise there will be 120 # a gigantic logical line at the end. 121 paren_level = line.count('(') - line.count(')') 122 while paren_level > 0: 123 _, more = next(lines) #pylint: disable=stop-iteration-return 124 paren_level += more.count('(') - more.count(')') 125 line += '\n' + more 126 yield line_number, line 127 128 _C_FUNCTION_DECLARATION_RE = re.compile( 129 r'(?P<qualifiers>(?:(?:extern|inline|static)\b\s*)*)' 130 r'(?P<return_type>\w[\w\s*]*?)\s*' + 131 r'\b(?P<name>\w+)' + 132 r'\s*\((?P<arguments>.*)\)\s*;', 133 re.A | re.S) 134 135 def read_function_declarations(functions: Dict[str, FunctionInfo], 136 filename: str) -> None: 137 138 """Collect function declarations from a C header file.""" 139 for line_number, line in read_logical_lines(filename): 140 m = _C_FUNCTION_DECLARATION_RE.match(line) 141 if not m: 142 continue 143 qualifiers = m.group('qualifiers').split() 144 return_type = m.group('return_type') 145 name = m.group('name') 146 arguments = m.group('arguments').split(',') 147 if len(arguments) == 1 and re.match(FunctionInfo.VOID_RE, arguments[0]): 148 arguments = [] 149 # Note: we replace any existing declaration for the same name. 150 functions[name] = FunctionInfo(filename, line_number, 151 qualifiers, 152 return_type, 153 name, 154 arguments) 155 156 _C_TYPEDEF_DECLARATION_RE = re.compile(r'typedef (?:struct )?(?P<type>\w+) (?P<name>\w+)') 157 158 def read_typedefs(filename: str) -> Dict[str, str]: 159 """ Extract type definitions in a {typedef aliased name: original type} dictionary. 160 Multi-line typedef struct are not captured. """ 161 162 type_decl = {} 163 164 for _, line in read_logical_lines(filename): 165 m = _C_TYPEDEF_DECLARATION_RE.match(line) 166 if m: 167 type_decl[m.group("name")] = m.group("type") 168 return type_decl