check_names.py (46730B)
1 #!/usr/bin/env python3 2 # 3 # Copyright The Mbed TLS Contributors 4 # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 5 6 """ 7 This script confirms that the naming of all symbols and identifiers in Mbed TLS 8 are consistent with the house style and are also self-consistent. It only runs 9 on Linux and macOS since it depends on nm. 10 11 It contains three major Python classes, TFPSACryptoCodeParser, 12 MBEDTLSCodeParser and NameChecker. They all have a comprehensive "run-all" 13 function (comprehensive_parse() and perform_checks()) but the individual 14 functions can also be used for specific needs. 15 16 CodeParser(a inherent base class for TFPSACryptoCodeParser and MBEDTLSCodeParser) 17 makes heavy use of regular expressions to parse the code, and is dependent on 18 the current code formatting. Many Python C parser libraries require 19 preprocessed C code, which means no macro parsing. Compiler tools are also not 20 very helpful when we want the exact location in the original source (which 21 becomes impossible when e.g. comments are stripped). 22 23 NameChecker performs the following checks: 24 25 - All exported and available symbols in the library object files, are explicitly 26 declared in the header files. This uses the nm command. 27 - All macros, constants, and identifiers (function names, struct names, etc) 28 follow the required regex pattern. 29 - Typo checking: All words that begin with MBED|PSA exist as macros or constants. 30 31 The script returns 0 on success, 1 on test failure, and 2 if there is a script 32 error. It must be run from Mbed TLS root. 33 """ 34 35 import abc 36 import argparse 37 import fnmatch 38 import glob 39 import textwrap 40 import os 41 import sys 42 import traceback 43 import re 44 import enum 45 import shutil 46 import subprocess 47 import logging 48 import tempfile 49 50 import project_scripts # pylint: disable=unused-import 51 from mbedtls_framework import build_tree 52 53 54 # Naming patterns to check against. These are defined outside the NameCheck 55 # class for ease of modification. 56 PUBLIC_MACRO_PATTERN = r"^(MBEDTLS|PSA|TF_PSA)_[0-9A-Z_]*[0-9A-Z]$" 57 INTERNAL_MACRO_PATTERN = r"^[0-9A-Za-z_]*[0-9A-Z]$" 58 CONSTANTS_PATTERN = PUBLIC_MACRO_PATTERN 59 IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$" 60 61 class Match(): # pylint: disable=too-few-public-methods 62 """ 63 A class representing a match, together with its found position. 64 65 Fields: 66 * filename: the file that the match was in. 67 * line: the full line containing the match. 68 * line_no: the line number. 69 * pos: a tuple of (start, end) positions on the line where the match is. 70 * name: the match itself. 71 """ 72 def __init__(self, filename, line, line_no, pos, name): 73 # pylint: disable=too-many-arguments 74 self.filename = filename 75 self.line = line 76 self.line_no = line_no 77 self.pos = pos 78 self.name = name 79 80 def __str__(self): 81 """ 82 Return a formatted code listing representation of the erroneous line. 83 """ 84 gutter = format(self.line_no, "4d") 85 underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^" 86 87 return ( 88 " {0} |\n".format(" " * len(gutter)) + 89 " {0} | {1}".format(gutter, self.line) + 90 " {0} | {1}\n".format(" " * len(gutter), underline) 91 ) 92 93 class Problem(abc.ABC): # pylint: disable=too-few-public-methods 94 """ 95 An abstract parent class representing a form of static analysis error. 96 It extends an Abstract Base Class, which means it is not instantiable, and 97 it also mandates certain abstract methods to be implemented in subclasses. 98 """ 99 # Class variable to control the quietness of all problems 100 quiet = False 101 def __init__(self): 102 self.textwrapper = textwrap.TextWrapper() 103 self.textwrapper.width = 80 104 self.textwrapper.initial_indent = " > " 105 self.textwrapper.subsequent_indent = " " 106 107 def __str__(self): 108 """ 109 Unified string representation method for all Problems. 110 """ 111 if self.__class__.quiet: 112 return self.quiet_output() 113 return self.verbose_output() 114 115 @abc.abstractmethod 116 def quiet_output(self): 117 """ 118 The output when --quiet is enabled. 119 """ 120 pass 121 122 @abc.abstractmethod 123 def verbose_output(self): 124 """ 125 The default output with explanation and code snippet if appropriate. 126 """ 127 pass 128 129 class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods 130 """ 131 A problem that occurs when an exported/available symbol in the object file 132 is not explicitly declared in header files. Created with 133 NameCheck.check_symbols_declared_in_header() 134 135 Fields: 136 * symbol_name: the name of the symbol. 137 """ 138 def __init__(self, symbol_name): 139 self.symbol_name = symbol_name 140 Problem.__init__(self) 141 142 def quiet_output(self): 143 return "{0}".format(self.symbol_name) 144 145 def verbose_output(self): 146 return self.textwrapper.fill( 147 "'{0}' was found as an available symbol in the output of nm, " 148 "however it was not declared in any header files." 149 .format(self.symbol_name)) 150 151 class PatternMismatch(Problem): # pylint: disable=too-few-public-methods 152 """ 153 A problem that occurs when something doesn't match the expected pattern. 154 Created with NameCheck.check_match_pattern() 155 156 Fields: 157 * pattern: the expected regex pattern 158 * match: the Match object in question 159 """ 160 def __init__(self, pattern, match): 161 self.pattern = pattern 162 self.match = match 163 Problem.__init__(self) 164 165 166 def quiet_output(self): 167 return ( 168 "{0}:{1}:{2}" 169 .format(self.match.filename, self.match.line_no, self.match.name) 170 ) 171 172 def verbose_output(self): 173 return self.textwrapper.fill( 174 "{0}:{1}: '{2}' does not match the required pattern '{3}'." 175 .format( 176 self.match.filename, 177 self.match.line_no, 178 self.match.name, 179 self.pattern 180 ) 181 ) + "\n" + str(self.match) 182 183 class Typo(Problem): # pylint: disable=too-few-public-methods 184 """ 185 A problem that occurs when a word using MBED or PSA doesn't 186 appear to be defined as constants nor enum values. Created with 187 NameCheck.check_for_typos() 188 189 Fields: 190 * match: the Match object of the MBED|PSA name in question. 191 """ 192 def __init__(self, match): 193 self.match = match 194 Problem.__init__(self) 195 196 def quiet_output(self): 197 return ( 198 "{0}:{1}:{2}" 199 .format(self.match.filename, self.match.line_no, self.match.name) 200 ) 201 202 def verbose_output(self): 203 return self.textwrapper.fill( 204 "{0}:{1}: '{2}' looks like a typo. It was not found in any " 205 "macros or any enums. If this is not a typo, put " 206 "//no-check-names after it." 207 .format(self.match.filename, self.match.line_no, self.match.name) 208 ) + "\n" + str(self.match) 209 210 class CodeParser(): 211 """ 212 Class for retrieving files and parsing the code. This can be used 213 independently of the checks that NameChecker performs, for example for 214 list_internal_identifiers.py. 215 """ 216 def __init__(self, log): 217 self.log = log 218 if not build_tree.looks_like_root(os.getcwd()): 219 raise Exception("This script must be run from Mbed TLS or TF-PSA-Crypto root") 220 221 # Memo for storing "glob expression": set(filepaths) 222 self.files = {} 223 224 # Globally excluded filenames. 225 # Note that "*" can match directory separators in exclude lists. 226 self.excluded_files = ["*/bn_mul", "*/compat-2.x.h"] 227 228 def _parse(self, all_macros, enum_consts, identifiers, 229 excluded_identifiers, mbed_psa_words, symbols): 230 # pylint: disable=too-many-arguments 231 """ 232 Parse macros, enums, identifiers, excluded identifiers, Mbed PSA word and Symbols. 233 234 Returns a dict of parsed item key to the corresponding List of Matches. 235 """ 236 237 self.log.info("Parsing source code...") 238 self.log.debug( 239 "The following files are excluded from the search: {}" 240 .format(str(self.excluded_files)) 241 ) 242 243 # Remove identifier macros like mbedtls_printf or mbedtls_calloc 244 identifiers_justname = [x.name for x in identifiers] 245 actual_macros = {"public": [], "internal": []} 246 for scope in actual_macros: 247 for macro in all_macros[scope]: 248 if macro.name not in identifiers_justname: 249 actual_macros[scope].append(macro) 250 251 self.log.debug("Found:") 252 # Aligns the counts on the assumption that none exceeds 4 digits 253 for scope in actual_macros: 254 self.log.debug(" {:4} Total {} Macros" 255 .format(len(all_macros[scope]), scope)) 256 self.log.debug(" {:4} {} Non-identifier Macros" 257 .format(len(actual_macros[scope]), scope)) 258 self.log.debug(" {:4} Enum Constants".format(len(enum_consts))) 259 self.log.debug(" {:4} Identifiers".format(len(identifiers))) 260 self.log.debug(" {:4} Exported Symbols".format(len(symbols))) 261 return { 262 "public_macros": actual_macros["public"], 263 "internal_macros": actual_macros["internal"], 264 "private_macros": all_macros["private"], 265 "enum_consts": enum_consts, 266 "identifiers": identifiers, 267 "excluded_identifiers": excluded_identifiers, 268 "symbols": symbols, 269 "mbed_psa_words": mbed_psa_words 270 } 271 272 def is_file_excluded(self, path, exclude_wildcards): 273 """Whether the given file path is excluded.""" 274 # exclude_wildcards may be None. Also, consider the global exclusions. 275 exclude_wildcards = (exclude_wildcards or []) + self.excluded_files 276 for pattern in exclude_wildcards: 277 if fnmatch.fnmatch(path, pattern): 278 return True 279 return False 280 281 def get_all_files(self, include_wildcards, exclude_wildcards): 282 """ 283 Get all files that match any of the included UNIX-style wildcards 284 and filter them into included and excluded lists. 285 While the check_names script is designed only for use on UNIX/macOS 286 (due to nm), this function alone will work fine on Windows even with 287 forward slashes in the wildcard. 288 289 Args: 290 * include_wildcards: a List of shell-style wildcards to match filepaths. 291 * exclude_wildcards: a List of shell-style wildcards to exclude. 292 293 Returns: 294 * inc_files: A List of relative filepaths for included files. 295 * exc_files: A List of relative filepaths for excluded files. 296 """ 297 accumulator = set() 298 all_wildcards = include_wildcards + (exclude_wildcards or []) 299 for wildcard in all_wildcards: 300 accumulator = accumulator.union(glob.iglob(wildcard)) 301 302 inc_files = [] 303 exc_files = [] 304 for path in accumulator: 305 if self.is_file_excluded(path, exclude_wildcards): 306 exc_files.append(path) 307 else: 308 inc_files.append(path) 309 return (inc_files, exc_files) 310 311 def get_included_files(self, include_wildcards, exclude_wildcards): 312 """ 313 Get all files that match any of the included UNIX-style wildcards. 314 While the check_names script is designed only for use on UNIX/macOS 315 (due to nm), this function alone will work fine on Windows even with 316 forward slashes in the wildcard. 317 318 Args: 319 * include_wildcards: a List of shell-style wildcards to match filepaths. 320 * exclude_wildcards: a List of shell-style wildcards to exclude. 321 322 Returns a List of relative filepaths. 323 """ 324 accumulator = set() 325 326 for include_wildcard in include_wildcards: 327 accumulator = accumulator.union(glob.iglob(include_wildcard)) 328 329 return list(path for path in accumulator 330 if not self.is_file_excluded(path, exclude_wildcards)) 331 332 def parse_macros(self, include, exclude=None): 333 """ 334 Parse all macros defined by #define preprocessor directives. 335 336 Args: 337 * include: A List of glob expressions to look for files through. 338 * exclude: A List of glob expressions for excluding files. 339 340 Returns a List of Match objects for the found macros. 341 """ 342 macro_regex = re.compile(r"# *define +(?P<macro>\w+)") 343 exclusions = ( 344 "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_" 345 ) 346 347 files = self.get_included_files(include, exclude) 348 self.log.debug("Looking for macros in {} files".format(len(files))) 349 350 macros = [] 351 for header_file in files: 352 with open(header_file, "r", encoding="utf-8") as header: 353 for line_no, line in enumerate(header): 354 for macro in macro_regex.finditer(line): 355 if macro.group("macro").startswith(exclusions): 356 continue 357 358 macros.append(Match( 359 header_file, 360 line, 361 line_no, 362 macro.span("macro"), 363 macro.group("macro"))) 364 365 return macros 366 367 def parse_mbed_psa_words(self, include, exclude=None): 368 """ 369 Parse all words in the file that begin with MBED|PSA, in and out of 370 macros, comments, anything. 371 372 Args: 373 * include: A List of glob expressions to look for files through. 374 * exclude: A List of glob expressions for excluding files. 375 376 Returns a List of Match objects for words beginning with MBED|PSA. 377 """ 378 # Typos of TLS are common, hence the broader check below than MBEDTLS. 379 mbed_regex = re.compile(r"\b(MBED.+?|PSA)_[A-Z0-9_]*") 380 exclusions = re.compile(r"// *no-check-names|#error") 381 382 files = self.get_included_files(include, exclude) 383 self.log.debug( 384 "Looking for MBED|PSA words in {} files" 385 .format(len(files)) 386 ) 387 388 mbed_psa_words = [] 389 for filename in files: 390 with open(filename, "r", encoding="utf-8") as fp: 391 for line_no, line in enumerate(fp): 392 if exclusions.search(line): 393 continue 394 395 for name in mbed_regex.finditer(line): 396 mbed_psa_words.append(Match( 397 filename, 398 line, 399 line_no, 400 name.span(0), 401 name.group(0))) 402 403 return mbed_psa_words 404 405 def parse_enum_consts(self, include, exclude=None): 406 """ 407 Parse all enum value constants that are declared. 408 409 Args: 410 * include: A List of glob expressions to look for files through. 411 * exclude: A List of glob expressions for excluding files. 412 413 Returns a List of Match objects for the findings. 414 """ 415 files = self.get_included_files(include, exclude) 416 self.log.debug("Looking for enum consts in {} files".format(len(files))) 417 418 # Emulate a finite state machine to parse enum declarations. 419 # OUTSIDE_KEYWORD = outside the enum keyword 420 # IN_BRACES = inside enum opening braces 421 # IN_BETWEEN = between enum keyword and opening braces 422 states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"]) 423 enum_consts = [] 424 for header_file in files: 425 state = states.OUTSIDE_KEYWORD 426 with open(header_file, "r", encoding="utf-8") as header: 427 for line_no, line in enumerate(header): 428 # Match typedefs and brackets only when they are at the 429 # beginning of the line -- if they are indented, they might 430 # be sub-structures within structs, etc. 431 optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?" 432 if (state == states.OUTSIDE_KEYWORD and 433 re.search(r"^(typedef +)?enum " + \ 434 optional_c_identifier + \ 435 r" *{", line)): 436 state = states.IN_BRACES 437 elif (state == states.OUTSIDE_KEYWORD and 438 re.search(r"^(typedef +)?enum", line)): 439 state = states.IN_BETWEEN 440 elif (state == states.IN_BETWEEN and 441 re.search(r"^{", line)): 442 state = states.IN_BRACES 443 elif (state == states.IN_BRACES and 444 re.search(r"^}", line)): 445 state = states.OUTSIDE_KEYWORD 446 elif (state == states.IN_BRACES and 447 not re.search(r"^ *#", line)): 448 enum_const = re.search(r"^ *(?P<enum_const>\w+)", line) 449 if not enum_const: 450 continue 451 452 enum_consts.append(Match( 453 header_file, 454 line, 455 line_no, 456 enum_const.span("enum_const"), 457 enum_const.group("enum_const"))) 458 459 return enum_consts 460 461 IGNORED_CHUNK_REGEX = re.compile('|'.join([ 462 r'/\*.*?\*/', # block comment entirely on one line 463 r'//.*', # line comment 464 r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal 465 ])) 466 467 def strip_comments_and_literals(self, line, in_block_comment): 468 """Strip comments and string literals from line. 469 470 Continuation lines are not supported. 471 472 If in_block_comment is true, assume that the line starts inside a 473 block comment. 474 475 Return updated values of (line, in_block_comment) where: 476 * Comments in line have been replaced by a space (or nothing at the 477 start or end of the line). 478 * String contents have been removed. 479 * in_block_comment indicates whether the line ends inside a block 480 comment that continues on the next line. 481 """ 482 483 # Terminate current multiline comment? 484 if in_block_comment: 485 m = re.search(r"\*/", line) 486 if m: 487 in_block_comment = False 488 line = line[m.end(0):] 489 else: 490 return '', True 491 492 # Remove full comments and string literals. 493 # Do it all together to handle cases like "/*" correctly. 494 # Note that continuation lines are not supported. 495 line = re.sub(self.IGNORED_CHUNK_REGEX, 496 lambda s: '""' if s.group('string') else ' ', 497 line) 498 499 # Start an unfinished comment? 500 # (If `/*` was part of a complete comment, it's already been removed.) 501 m = re.search(r"/\*", line) 502 if m: 503 in_block_comment = True 504 line = line[:m.start(0)] 505 506 return line, in_block_comment 507 508 IDENTIFIER_REGEX = re.compile('|'.join([ 509 # Match " something(a" or " *something(a". Functions. 510 # Assumptions: 511 # - function definition from return type to one of its arguments is 512 # all on one line 513 # - function definition line only contains alphanumeric, asterisk, 514 # underscore, and open bracket 515 r".* \**(\w+) *\( *\w", 516 # Match "(*something)(". 517 r".*\( *\* *(\w+) *\) *\(", 518 # Match names of named data structures. 519 r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$", 520 # Match names of typedef instances, after closing bracket. 521 r"}? *(\w+)[;[].*", 522 ])) 523 # The regex below is indented for clarity. 524 EXCLUSION_LINES = re.compile("|".join([ 525 r"extern +\"C\"", 526 r"(typedef +)?(struct|union|enum)( *{)?$", 527 r"} *;?$", 528 r"$", 529 r"//", 530 r"#", 531 ])) 532 533 def parse_identifiers_in_file(self, header_file, identifiers): 534 """ 535 Parse all lines of a header where a function/enum/struct/union/typedef 536 identifier is declared, based on some regex and heuristics. Highly 537 dependent on formatting style. 538 539 Append found matches to the list ``identifiers``. 540 """ 541 542 with open(header_file, "r", encoding="utf-8") as header: 543 in_block_comment = False 544 # The previous line variable is used for concatenating lines 545 # when identifiers are formatted and spread across multiple 546 # lines. 547 previous_line = "" 548 549 for line_no, line in enumerate(header): 550 line, in_block_comment = \ 551 self.strip_comments_and_literals(line, in_block_comment) 552 553 if self.EXCLUSION_LINES.match(line): 554 previous_line = "" 555 continue 556 557 # If the line contains only space-separated alphanumeric 558 # characters (or underscore, asterisk, or open parenthesis), 559 # and nothing else, high chance it's a declaration that 560 # continues on the next line 561 if re.search(r"^([\w\*\(]+\s+)+$", line): 562 previous_line += line 563 continue 564 565 # If previous line seemed to start an unfinished declaration 566 # (as above), concat and treat them as one. 567 if previous_line: 568 line = previous_line.strip() + " " + line.strip() + "\n" 569 previous_line = "" 570 571 # Skip parsing if line has a space in front = heuristic to 572 # skip function argument lines (highly subject to formatting 573 # changes) 574 if line[0] == " ": 575 continue 576 577 identifier = self.IDENTIFIER_REGEX.search(line) 578 579 if not identifier: 580 continue 581 582 # Find the group that matched, and append it 583 for group in identifier.groups(): 584 if not group: 585 continue 586 587 identifiers.append(Match( 588 header_file, 589 line, 590 line_no, 591 identifier.span(), 592 group)) 593 594 def parse_identifiers(self, include, exclude=None): 595 """ 596 Parse all lines of a header where a function/enum/struct/union/typedef 597 identifier is declared, based on some regex and heuristics. Highly 598 dependent on formatting style. Identifiers in excluded files are still 599 parsed 600 601 Args: 602 * include: A List of glob expressions to look for files through. 603 * exclude: A List of glob expressions for excluding files. 604 605 Returns: a Tuple of two Lists of Match objects with identifiers. 606 * included_identifiers: A List of Match objects with identifiers from 607 included files. 608 * excluded_identifiers: A List of Match objects with identifiers from 609 excluded files. 610 """ 611 612 included_files, excluded_files = \ 613 self.get_all_files(include, exclude) 614 615 self.log.debug("Looking for included identifiers in {} files".format \ 616 (len(included_files))) 617 618 included_identifiers = [] 619 excluded_identifiers = [] 620 for header_file in included_files: 621 self.parse_identifiers_in_file(header_file, included_identifiers) 622 for header_file in excluded_files: 623 self.parse_identifiers_in_file(header_file, excluded_identifiers) 624 625 return (included_identifiers, excluded_identifiers) 626 627 def parse_symbols(self): 628 """ 629 Compile a library, and parse the object files using nm to retrieve the 630 list of referenced symbols. Exceptions thrown here are rethrown because 631 they would be critical errors that void several tests, and thus needs 632 to halt the program. This is explicitly done for clarity. 633 634 Returns a List of unique symbols defined and used in the libraries. 635 """ 636 raise NotImplementedError("parse_symbols must be implemented by a code parser") 637 638 def comprehensive_parse(self): 639 """ 640 (Must be defined as a class method) 641 Comprehensive ("default") function to call each parsing function and 642 retrieve various elements of the code, together with the source location. 643 644 Returns a dict of parsed item key to the corresponding List of Matches. 645 """ 646 raise NotImplementedError("comprehension_parse must be implemented by a code parser") 647 648 def parse_symbols_from_nm(self, object_files): 649 """ 650 Run nm to retrieve the list of referenced symbols in each object file. 651 Does not return the position data since it is of no use. 652 653 Args: 654 * object_files: a List of compiled object filepaths to search through. 655 656 Returns a List of unique symbols defined and used in any of the object 657 files. 658 """ 659 nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$") 660 nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)") 661 exclusions = ("FStar", "Hacl") 662 symbols = [] 663 # Gather all outputs of nm 664 nm_output = "" 665 for lib in object_files: 666 nm_output += subprocess.run( 667 ["nm", "-og", lib], 668 universal_newlines=True, 669 stdout=subprocess.PIPE, 670 stderr=subprocess.STDOUT, 671 check=True 672 ).stdout 673 for line in nm_output.splitlines(): 674 if not nm_undefined_regex.search(line): 675 symbol = nm_valid_regex.search(line) 676 if (symbol and not symbol.group("symbol").startswith(exclusions)): 677 symbols.append(symbol.group("symbol")) 678 else: 679 self.log.error(line) 680 return symbols 681 682 class TFPSACryptoCodeParser(CodeParser): 683 """ 684 Class for retrieving files and parsing TF-PSA-Crypto code. This can be used 685 independently of the checks that NameChecker performs. 686 """ 687 688 def __init__(self, log): 689 super().__init__(log) 690 if not build_tree.looks_like_tf_psa_crypto_root(os.getcwd()): 691 raise Exception("This script must be run from TF-PSA-Crypto root.") 692 693 def comprehensive_parse(self): 694 """ 695 Comprehensive ("default") function to call each parsing function and 696 retrieve various elements of the code, together with the source location. 697 698 Returns a dict of parsed item key to the corresponding List of Matches. 699 """ 700 all_macros = {"public": [], "internal": [], "private":[]} 701 all_macros["public"] = self.parse_macros([ 702 "include/psa/*.h", 703 "include/tf-psa-crypto/*.h", 704 "include/mbedtls/*.h", 705 "drivers/builtin/include/mbedtls/*.h", 706 "drivers/everest/include/everest/everest.h", 707 "drivers/everest/include/everest/x25519.h", 708 "drivers/everest/include/tf-psa-crypto/private/everest/everest.h", 709 "drivers/everest/include/tf-psa-crypto/private/everest/x25519.h" 710 ]) 711 all_macros["internal"] = self.parse_macros([ 712 "core/*.h", 713 "drivers/builtin/src/*.h", 714 "framework/tests/include/test/drivers/*.h", 715 ]) 716 all_macros["private"] = self.parse_macros([ 717 "core/*.c", 718 "drivers/builtin/src/*.c", 719 ]) 720 enum_consts = self.parse_enum_consts([ 721 "include/psa/*.h", 722 "include/tf-psa-crypto/*.h", 723 "include/mbedtls/*.h", 724 "drivers/builtin/include/mbedtls/*.h", 725 "core/*.h", 726 "drivers/builtin/src/*.h", 727 "core/*.c", 728 "drivers/builtin/src/*.c", 729 "drivers/everest/include/everest/everest.h", 730 "drivers/everest/include/everest/x25519.h", 731 "drivers/everest/include/tf-psa-crypto/private/everest/everest.h", 732 "drivers/everest/include/tf-psa-crypto/private/everest/x25519.h" 733 ]) 734 identifiers, excluded_identifiers = self.parse_identifiers([ 735 "include/psa/*.h", 736 "include/tf-psa-crypto/*.h", 737 "include/mbedtls/*.h", 738 "drivers/builtin/include/mbedtls/*.h", 739 "core/*.h", 740 "drivers/builtin/src/*.h", 741 "drivers/everest/include/everest/everest.h", 742 "drivers/everest/include/everest/x25519.h", 743 "drivers/everest/include/tf-psa-crypto/private/everest/everest.h", 744 "drivers/everest/include/tf-psa-crypto/private/everest/x25519.h" 745 ], ["drivers/p256-m/p256-m/p256-m.h"]) 746 mbed_psa_words = self.parse_mbed_psa_words([ 747 "include/psa/*.h", 748 "include/tf-psa-crypto/*.h", 749 "include/mbedtls/*.h", 750 "drivers/builtin/include/mbedtls/*.h", 751 "core/*.h", 752 "drivers/builtin/src/*.h", 753 "drivers/everest/include/everest/everest.h", 754 "drivers/everest/include/everest/x25519.h", 755 "drivers/everest/include/tf-psa-crypto/private/everest/everest.h", 756 "drivers/everest/include/tf-psa-crypto/private/everest/x25519.h", 757 "core/*.c", 758 "drivers/builtin/src/*.c", 759 "drivers/everest/library/everest.c", 760 "drivers/everest/library/x25519.c" 761 ], ["core/psa_crypto_driver_wrappers.h"]) 762 symbols = self.parse_symbols() 763 764 return self._parse(all_macros, enum_consts, identifiers, 765 excluded_identifiers, mbed_psa_words, symbols) 766 767 def parse_symbols(self): 768 """ 769 Compile the TF-PSA-Crypto libraries, and parse the 770 object files using nm to retrieve the list of referenced symbols. 771 Exceptions thrown here are rethrown because they would be critical 772 errors that void several tests, and thus needs to halt the program. This 773 is explicitly done for clarity. 774 775 Returns a List of unique symbols defined and used in the libraries. 776 """ 777 self.log.info("Compiling...") 778 symbols = [] 779 780 # Back up the config and atomically compile with the full configuration. 781 shutil.copy( 782 "include/psa/crypto_config.h", 783 "include/psa/crypto_config.h.bak" 784 ) 785 try: 786 # Use check=True in all subprocess calls so that failures are raised 787 # as exceptions and logged. 788 subprocess.run( 789 ["python3", "scripts/config.py", "full"], 790 universal_newlines=True, 791 check=True 792 ) 793 my_environment = os.environ.copy() 794 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables" 795 796 source_dir = os.getcwd() 797 build_dir = tempfile.mkdtemp() 798 os.chdir(build_dir) 799 subprocess.run( 800 ["cmake", "-DGEN_FILES=ON", source_dir], 801 universal_newlines=True, 802 check=True 803 ) 804 subprocess.run( 805 ["make"], 806 env=my_environment, 807 universal_newlines=True, 808 stdout=subprocess.PIPE, 809 stderr=subprocess.STDOUT, 810 check=True 811 ) 812 813 # Perform object file analysis using nm 814 symbols = self.parse_symbols_from_nm([ 815 build_dir + "/drivers/builtin/libbuiltin.a", 816 build_dir + "/drivers/p256-m/libp256m.a", 817 build_dir + "/drivers/everest/libeverest.a", 818 build_dir + "/core/libtfpsacrypto.a" 819 ]) 820 821 os.chdir(source_dir) 822 shutil.rmtree(build_dir) 823 except subprocess.CalledProcessError as error: 824 self.log.debug(error.output) 825 raise error 826 finally: 827 # Put back the original config regardless of there being errors. 828 # Works also for keyboard interrupts. 829 shutil.move( 830 "include/psa/crypto_config.h.bak", 831 "include/psa/crypto_config.h" 832 ) 833 834 return symbols 835 836 class MBEDTLSCodeParser(CodeParser): 837 """ 838 Class for retrieving files and parsing Mbed TLS code. This can be used 839 independently of the checks that NameChecker performs. 840 """ 841 842 def __init__(self, log): 843 super().__init__(log) 844 if not build_tree.looks_like_mbedtls_root(os.getcwd()): 845 raise Exception("This script must be run from Mbed TLS root.") 846 847 def comprehensive_parse(self): 848 """ 849 Comprehensive ("default") function to call each parsing function and 850 retrieve various elements of the code, together with the source location. 851 852 Returns a dict of parsed item key to the corresponding List of Matches. 853 """ 854 all_macros = {"public": [], "internal": [], "private":[]} 855 # TF-PSA-Crypto is in the same repo in 3.6 so initalise variable here. 856 tf_psa_crypto_parse_result = {} 857 858 if build_tree.is_mbedtls_3_6(): 859 all_macros["public"] = self.parse_macros([ 860 "include/mbedtls/*.h", 861 "include/psa/*.h", 862 "3rdparty/everest/include/everest/everest.h", 863 "3rdparty/everest/include/everest/x25519.h" 864 ]) 865 all_macros["internal"] = self.parse_macros([ 866 "library/*.h", 867 "framework/tests/include/test/drivers/*.h", 868 ]) 869 all_macros["private"] = self.parse_macros([ 870 "library/*.c", 871 ]) 872 enum_consts = self.parse_enum_consts([ 873 "include/mbedtls/*.h", 874 "include/psa/*.h", 875 "library/*.h", 876 "library/*.c", 877 "3rdparty/everest/include/everest/everest.h", 878 "3rdparty/everest/include/everest/x25519.h" 879 ]) 880 identifiers, excluded_identifiers = self.parse_identifiers([ 881 "include/mbedtls/*.h", 882 "include/psa/*.h", 883 "library/*.h", 884 "3rdparty/everest/include/everest/everest.h", 885 "3rdparty/everest/include/everest/x25519.h" 886 ], ["3rdparty/p256-m/p256-m/p256-m.h"]) 887 mbed_psa_words = self.parse_mbed_psa_words([ 888 "include/mbedtls/*.h", 889 "include/psa/*.h", 890 "library/*.h", 891 "3rdparty/everest/include/everest/everest.h", 892 "3rdparty/everest/include/everest/x25519.h", 893 "library/*.c", 894 "3rdparty/everest/library/everest.c", 895 "3rdparty/everest/library/x25519.c" 896 ], ["library/psa_crypto_driver_wrappers.h"]) 897 else: 898 all_macros = {"public": [], "internal": [], "private":[]} 899 all_macros["public"] = self.parse_macros([ 900 "include/mbedtls/*.h", 901 ]) 902 all_macros["internal"] = self.parse_macros([ 903 "library/*.h", 904 "framework/tests/include/test/drivers/*.h", 905 ]) 906 all_macros["private"] = self.parse_macros([ 907 "library/*.c", 908 ]) 909 enum_consts = self.parse_enum_consts([ 910 "include/mbedtls/*.h", 911 "library/*.h", 912 "library/*.c", 913 ]) 914 identifiers, excluded_identifiers = self.parse_identifiers([ 915 "include/mbedtls/*.h", 916 "library/*.h", 917 ]) 918 mbed_psa_words = self.parse_mbed_psa_words([ 919 "include/mbedtls/*.h", 920 "library/*.h", 921 "library/*.c", 922 ]) 923 os.chdir("./tf-psa-crypto") 924 tf_psa_crypto_code_parser = TFPSACryptoCodeParser(self.log) 925 tf_psa_crypto_parse_result = tf_psa_crypto_code_parser.comprehensive_parse() 926 os.chdir("../") 927 928 symbols = self.parse_symbols() 929 mbedtls_parse_result = self._parse(all_macros, enum_consts, 930 identifiers, excluded_identifiers, 931 mbed_psa_words, symbols) 932 # Combile results for Mbed TLS and TF-PSA-Crypto 933 for key in tf_psa_crypto_parse_result: 934 mbedtls_parse_result[key] += tf_psa_crypto_parse_result[key] 935 return mbedtls_parse_result 936 937 def parse_symbols(self): 938 """ 939 Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509 940 object files using nm to retrieve the list of referenced symbols. 941 Exceptions thrown here are rethrown because they would be critical 942 errors that void several tests, and thus needs to halt the program. This 943 is explicitly done for clarity. 944 945 Returns a List of unique symbols defined and used in the libraries. 946 """ 947 self.log.info("Compiling...") 948 symbols = [] 949 950 # Back up the config and atomically compile with the full configuration. 951 shutil.copy( 952 "include/mbedtls/mbedtls_config.h", 953 "include/mbedtls/mbedtls_config.h.bak" 954 ) 955 try: 956 # Use check=True in all subprocess calls so that failures are raised 957 # as exceptions and logged. 958 subprocess.run( 959 ["python3", "scripts/config.py", "full"], 960 universal_newlines=True, 961 check=True 962 ) 963 my_environment = os.environ.copy() 964 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables" 965 # Run make clean separately to lib to prevent unwanted behavior when 966 # make is invoked with parallelism. 967 subprocess.run( 968 ["make", "clean"], 969 universal_newlines=True, 970 check=True 971 ) 972 subprocess.run( 973 ["make", "lib"], 974 env=my_environment, 975 universal_newlines=True, 976 stdout=subprocess.PIPE, 977 stderr=subprocess.STDOUT, 978 check=True 979 ) 980 981 # Perform object file analysis using nm 982 symbols = self.parse_symbols_from_nm([ 983 "library/libmbedcrypto.a", 984 "library/libmbedtls.a", 985 "library/libmbedx509.a" 986 ]) 987 988 subprocess.run( 989 ["make", "clean"], 990 universal_newlines=True, 991 check=True 992 ) 993 except subprocess.CalledProcessError as error: 994 self.log.debug(error.output) 995 raise error 996 finally: 997 # Put back the original config regardless of there being errors. 998 # Works also for keyboard interrupts. 999 shutil.move( 1000 "include/mbedtls/mbedtls_config.h.bak", 1001 "include/mbedtls/mbedtls_config.h" 1002 ) 1003 1004 return symbols 1005 1006 class NameChecker(): 1007 """ 1008 Representation of the core name checking operation performed by this script. 1009 """ 1010 def __init__(self, parse_result, log): 1011 self.parse_result = parse_result 1012 self.log = log 1013 1014 def perform_checks(self, quiet=False): 1015 """ 1016 A comprehensive checker that performs each check in order, and outputs 1017 a final verdict. 1018 1019 Args: 1020 * quiet: whether to hide detailed problem explanation. 1021 """ 1022 self.log.info("=============") 1023 Problem.quiet = quiet 1024 problems = 0 1025 problems += self.check_symbols_declared_in_header() 1026 1027 pattern_checks = [ 1028 ("public_macros", PUBLIC_MACRO_PATTERN), 1029 ("internal_macros", INTERNAL_MACRO_PATTERN), 1030 ("enum_consts", CONSTANTS_PATTERN), 1031 ("identifiers", IDENTIFIER_PATTERN) 1032 ] 1033 for group, check_pattern in pattern_checks: 1034 problems += self.check_match_pattern(group, check_pattern) 1035 1036 problems += self.check_for_typos() 1037 1038 self.log.info("=============") 1039 if problems > 0: 1040 self.log.info("FAIL: {0} problem(s) to fix".format(str(problems))) 1041 if quiet: 1042 self.log.info("Remove --quiet to see explanations.") 1043 else: 1044 self.log.info("Use --quiet for minimal output.") 1045 return 1 1046 else: 1047 self.log.info("PASS") 1048 return 0 1049 1050 def check_symbols_declared_in_header(self): 1051 """ 1052 Perform a check that all detected symbols in the library object files 1053 are properly declared in headers. 1054 Assumes parse_names_in_source() was called before this. 1055 1056 Returns the number of problems that need fixing. 1057 """ 1058 problems = [] 1059 all_identifiers = self.parse_result["identifiers"] + \ 1060 self.parse_result["excluded_identifiers"] 1061 1062 for symbol in self.parse_result["symbols"]: 1063 found_symbol_declared = False 1064 for identifier_match in all_identifiers: 1065 if symbol == identifier_match.name: 1066 found_symbol_declared = True 1067 break 1068 1069 if not found_symbol_declared: 1070 problems.append(SymbolNotInHeader(symbol)) 1071 1072 self.output_check_result("All symbols in header", problems) 1073 return len(problems) 1074 1075 def check_match_pattern(self, group_to_check, check_pattern): 1076 """ 1077 Perform a check that all items of a group conform to a regex pattern. 1078 Assumes parse_names_in_source() was called before this. 1079 1080 Args: 1081 * group_to_check: string key to index into self.parse_result. 1082 * check_pattern: the regex to check against. 1083 1084 Returns the number of problems that need fixing. 1085 """ 1086 problems = [] 1087 1088 for item_match in self.parse_result[group_to_check]: 1089 if not re.search(check_pattern, item_match.name): 1090 problems.append(PatternMismatch(check_pattern, item_match)) 1091 # Double underscore should not be used for names 1092 if re.search(r".*__.*", item_match.name): 1093 problems.append( 1094 PatternMismatch("no double underscore allowed", item_match)) 1095 1096 self.output_check_result( 1097 "Naming patterns of {}".format(group_to_check), 1098 problems) 1099 return len(problems) 1100 1101 def check_for_typos(self): 1102 """ 1103 Perform a check that all words in the source code beginning with MBED are 1104 either defined as macros, or as enum constants. 1105 Assumes parse_names_in_source() was called before this. 1106 1107 Returns the number of problems that need fixing. 1108 """ 1109 problems = [] 1110 1111 # Set comprehension, equivalent to a list comprehension wrapped by set() 1112 all_caps_names = { 1113 match.name 1114 for match 1115 in self.parse_result["public_macros"] + 1116 self.parse_result["internal_macros"] + 1117 self.parse_result["private_macros"] + 1118 self.parse_result["enum_consts"] 1119 } 1120 typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|" 1121 r"MBEDTLS_TEST_LIBTESTDRIVER*|" 1122 r"PSA_CRYPTO_DRIVER_TEST") 1123 1124 for name_match in self.parse_result["mbed_psa_words"]: 1125 found = name_match.name in all_caps_names 1126 1127 # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the 1128 # PSA driver, they will not exist as macros. However, they 1129 # should still be checked for typos using the equivalent 1130 # BUILTINs that exist. 1131 if "MBEDTLS_PSA_ACCEL_" in name_match.name: 1132 found = name_match.name.replace( 1133 "MBEDTLS_PSA_ACCEL_", 1134 "MBEDTLS_PSA_BUILTIN_") in all_caps_names 1135 1136 if not found and not typo_exclusion.search(name_match.name): 1137 problems.append(Typo(name_match)) 1138 1139 self.output_check_result("Likely typos", problems) 1140 return len(problems) 1141 1142 def output_check_result(self, name, problems): 1143 """ 1144 Write out the PASS/FAIL status of a performed check depending on whether 1145 there were problems. 1146 1147 Args: 1148 * name: the name of the test 1149 * problems: a List of encountered Problems 1150 """ 1151 if problems: 1152 self.log.info("{}: FAIL\n".format(name)) 1153 for problem in problems: 1154 self.log.warning(str(problem)) 1155 else: 1156 self.log.info("{}: PASS".format(name)) 1157 1158 def main(): 1159 """ 1160 Perform argument parsing, and create an instance of CodeParser and 1161 NameChecker to begin the core operation. 1162 """ 1163 parser = argparse.ArgumentParser( 1164 formatter_class=argparse.RawDescriptionHelpFormatter, 1165 description=( 1166 "This script confirms that the naming of all symbols and identifiers " 1167 "in Mbed TLS are consistent with the house style and are also " 1168 "self-consistent.\n\n" 1169 "Expected to be run from the Mbed TLS root directory.") 1170 ) 1171 parser.add_argument( 1172 "-v", "--verbose", 1173 action="store_true", 1174 help="show parse results" 1175 ) 1176 parser.add_argument( 1177 "-q", "--quiet", 1178 action="store_true", 1179 help="hide unnecessary text, explanations, and highlights" 1180 ) 1181 1182 args = parser.parse_args() 1183 1184 # Configure the global logger, which is then passed to the classes below 1185 log = logging.getLogger() 1186 log.setLevel(logging.DEBUG if args.verbose else logging.INFO) 1187 log.addHandler(logging.StreamHandler()) 1188 1189 try: 1190 if build_tree.looks_like_tf_psa_crypto_root(os.getcwd()): 1191 tf_psa_crypto_code_parser = TFPSACryptoCodeParser(log) 1192 parse_result = tf_psa_crypto_code_parser.comprehensive_parse() 1193 elif build_tree.looks_like_mbedtls_root(os.getcwd()): 1194 # Mbed TLS uses TF-PSA-Crypto, so we need to parse TF-PSA-Crypto too 1195 mbedtls_code_parser = MBEDTLSCodeParser(log) 1196 parse_result = mbedtls_code_parser.comprehensive_parse() 1197 else: 1198 raise Exception("This script must be run from Mbed TLS or TF-PSA-Crypto root") 1199 except Exception: # pylint: disable=broad-except 1200 traceback.print_exc() 1201 sys.exit(2) 1202 1203 name_checker = NameChecker(parse_result, log) 1204 return_code = name_checker.perform_checks(quiet=args.quiet) 1205 1206 sys.exit(return_code) 1207 1208 if __name__ == "__main__": 1209 main()