quickjs-tart

quickjs-based runtime for wallet-core logic
Log | Files | Refs | README | LICENSE

check_names.py (46730B)


      1 #!/usr/bin/env python3
      2 #
      3 # Copyright The Mbed TLS Contributors
      4 # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
      5 
      6 """
      7 This script confirms that the naming of all symbols and identifiers in Mbed TLS
      8 are consistent with the house style and are also self-consistent. It only runs
      9 on Linux and macOS since it depends on nm.
     10 
     11 It contains three major Python classes, TFPSACryptoCodeParser,
     12 MBEDTLSCodeParser and NameChecker. They all have a comprehensive "run-all"
     13 function (comprehensive_parse() and perform_checks()) but the individual
     14 functions can also be used for specific needs.
     15 
     16 CodeParser(a inherent base class for TFPSACryptoCodeParser and MBEDTLSCodeParser)
     17 makes heavy use of regular expressions to parse the code, and is dependent on
     18 the current code formatting. Many Python C parser libraries require
     19 preprocessed C code, which means no macro parsing. Compiler tools are also not
     20 very helpful when we want the exact location in the original source (which
     21 becomes impossible when e.g. comments are stripped).
     22 
     23 NameChecker performs the following checks:
     24 
     25 - All exported and available symbols in the library object files, are explicitly
     26   declared in the header files. This uses the nm command.
     27 - All macros, constants, and identifiers (function names, struct names, etc)
     28   follow the required regex pattern.
     29 - Typo checking: All words that begin with MBED|PSA exist as macros or constants.
     30 
     31 The script returns 0 on success, 1 on test failure, and 2 if there is a script
     32 error. It must be run from Mbed TLS root.
     33 """
     34 
     35 import abc
     36 import argparse
     37 import fnmatch
     38 import glob
     39 import textwrap
     40 import os
     41 import sys
     42 import traceback
     43 import re
     44 import enum
     45 import shutil
     46 import subprocess
     47 import logging
     48 import tempfile
     49 
     50 import project_scripts # pylint: disable=unused-import
     51 from mbedtls_framework import build_tree
     52 
     53 
     54 # Naming patterns to check against. These are defined outside the NameCheck
     55 # class for ease of modification.
     56 PUBLIC_MACRO_PATTERN = r"^(MBEDTLS|PSA|TF_PSA)_[0-9A-Z_]*[0-9A-Z]$"
     57 INTERNAL_MACRO_PATTERN = r"^[0-9A-Za-z_]*[0-9A-Z]$"
     58 CONSTANTS_PATTERN = PUBLIC_MACRO_PATTERN
     59 IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
     60 
     61 class Match(): # pylint: disable=too-few-public-methods
     62     """
     63     A class representing a match, together with its found position.
     64 
     65     Fields:
     66     * filename: the file that the match was in.
     67     * line: the full line containing the match.
     68     * line_no: the line number.
     69     * pos: a tuple of (start, end) positions on the line where the match is.
     70     * name: the match itself.
     71     """
     72     def __init__(self, filename, line, line_no, pos, name):
     73         # pylint: disable=too-many-arguments
     74         self.filename = filename
     75         self.line = line
     76         self.line_no = line_no
     77         self.pos = pos
     78         self.name = name
     79 
     80     def __str__(self):
     81         """
     82         Return a formatted code listing representation of the erroneous line.
     83         """
     84         gutter = format(self.line_no, "4d")
     85         underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
     86 
     87         return (
     88             " {0} |\n".format(" " * len(gutter)) +
     89             " {0} | {1}".format(gutter, self.line) +
     90             " {0} | {1}\n".format(" " * len(gutter), underline)
     91         )
     92 
     93 class Problem(abc.ABC): # pylint: disable=too-few-public-methods
     94     """
     95     An abstract parent class representing a form of static analysis error.
     96     It extends an Abstract Base Class, which means it is not instantiable, and
     97     it also mandates certain abstract methods to be implemented in subclasses.
     98     """
     99     # Class variable to control the quietness of all problems
    100     quiet = False
    101     def __init__(self):
    102         self.textwrapper = textwrap.TextWrapper()
    103         self.textwrapper.width = 80
    104         self.textwrapper.initial_indent = "    > "
    105         self.textwrapper.subsequent_indent = "      "
    106 
    107     def __str__(self):
    108         """
    109         Unified string representation method for all Problems.
    110         """
    111         if self.__class__.quiet:
    112             return self.quiet_output()
    113         return self.verbose_output()
    114 
    115     @abc.abstractmethod
    116     def quiet_output(self):
    117         """
    118         The output when --quiet is enabled.
    119         """
    120         pass
    121 
    122     @abc.abstractmethod
    123     def verbose_output(self):
    124         """
    125         The default output with explanation and code snippet if appropriate.
    126         """
    127         pass
    128 
    129 class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
    130     """
    131     A problem that occurs when an exported/available symbol in the object file
    132     is not explicitly declared in header files. Created with
    133     NameCheck.check_symbols_declared_in_header()
    134 
    135     Fields:
    136     * symbol_name: the name of the symbol.
    137     """
    138     def __init__(self, symbol_name):
    139         self.symbol_name = symbol_name
    140         Problem.__init__(self)
    141 
    142     def quiet_output(self):
    143         return "{0}".format(self.symbol_name)
    144 
    145     def verbose_output(self):
    146         return self.textwrapper.fill(
    147             "'{0}' was found as an available symbol in the output of nm, "
    148             "however it was not declared in any header files."
    149             .format(self.symbol_name))
    150 
    151 class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
    152     """
    153     A problem that occurs when something doesn't match the expected pattern.
    154     Created with NameCheck.check_match_pattern()
    155 
    156     Fields:
    157     * pattern: the expected regex pattern
    158     * match: the Match object in question
    159     """
    160     def __init__(self, pattern, match):
    161         self.pattern = pattern
    162         self.match = match
    163         Problem.__init__(self)
    164 
    165 
    166     def quiet_output(self):
    167         return (
    168             "{0}:{1}:{2}"
    169             .format(self.match.filename, self.match.line_no, self.match.name)
    170         )
    171 
    172     def verbose_output(self):
    173         return self.textwrapper.fill(
    174             "{0}:{1}: '{2}' does not match the required pattern '{3}'."
    175             .format(
    176                 self.match.filename,
    177                 self.match.line_no,
    178                 self.match.name,
    179                 self.pattern
    180             )
    181         ) + "\n" + str(self.match)
    182 
    183 class Typo(Problem): # pylint: disable=too-few-public-methods
    184     """
    185     A problem that occurs when a word using MBED or PSA doesn't
    186     appear to be defined as constants nor enum values. Created with
    187     NameCheck.check_for_typos()
    188 
    189     Fields:
    190     * match: the Match object of the MBED|PSA name in question.
    191     """
    192     def __init__(self, match):
    193         self.match = match
    194         Problem.__init__(self)
    195 
    196     def quiet_output(self):
    197         return (
    198             "{0}:{1}:{2}"
    199             .format(self.match.filename, self.match.line_no, self.match.name)
    200         )
    201 
    202     def verbose_output(self):
    203         return self.textwrapper.fill(
    204             "{0}:{1}: '{2}' looks like a typo. It was not found in any "
    205             "macros or any enums. If this is not a typo, put "
    206             "//no-check-names after it."
    207             .format(self.match.filename, self.match.line_no, self.match.name)
    208         ) + "\n" + str(self.match)
    209 
    210 class CodeParser():
    211     """
    212     Class for retrieving files and parsing the code. This can be used
    213     independently of the checks that NameChecker performs, for example for
    214     list_internal_identifiers.py.
    215     """
    216     def __init__(self, log):
    217         self.log = log
    218         if not build_tree.looks_like_root(os.getcwd()):
    219             raise Exception("This script must be run from Mbed TLS or TF-PSA-Crypto root")
    220 
    221         # Memo for storing "glob expression": set(filepaths)
    222         self.files = {}
    223 
    224         # Globally excluded filenames.
    225         # Note that "*" can match directory separators in exclude lists.
    226         self.excluded_files = ["*/bn_mul", "*/compat-2.x.h"]
    227 
    228     def _parse(self, all_macros, enum_consts, identifiers,
    229                excluded_identifiers, mbed_psa_words, symbols):
    230         # pylint: disable=too-many-arguments
    231         """
    232         Parse macros, enums, identifiers, excluded identifiers, Mbed PSA word and Symbols.
    233 
    234         Returns a dict of parsed item key to the corresponding List of Matches.
    235         """
    236 
    237         self.log.info("Parsing source code...")
    238         self.log.debug(
    239             "The following files are excluded from the search: {}"
    240             .format(str(self.excluded_files))
    241         )
    242 
    243         # Remove identifier macros like mbedtls_printf or mbedtls_calloc
    244         identifiers_justname = [x.name for x in identifiers]
    245         actual_macros = {"public": [], "internal": []}
    246         for scope in actual_macros:
    247             for macro in all_macros[scope]:
    248                 if macro.name not in identifiers_justname:
    249                     actual_macros[scope].append(macro)
    250 
    251         self.log.debug("Found:")
    252         # Aligns the counts on the assumption that none exceeds 4 digits
    253         for scope in actual_macros:
    254             self.log.debug("  {:4} Total {} Macros"
    255                            .format(len(all_macros[scope]), scope))
    256             self.log.debug("  {:4} {} Non-identifier Macros"
    257                            .format(len(actual_macros[scope]), scope))
    258         self.log.debug("  {:4} Enum Constants".format(len(enum_consts)))
    259         self.log.debug("  {:4} Identifiers".format(len(identifiers)))
    260         self.log.debug("  {:4} Exported Symbols".format(len(symbols)))
    261         return {
    262             "public_macros": actual_macros["public"],
    263             "internal_macros": actual_macros["internal"],
    264             "private_macros": all_macros["private"],
    265             "enum_consts": enum_consts,
    266             "identifiers": identifiers,
    267             "excluded_identifiers": excluded_identifiers,
    268             "symbols": symbols,
    269             "mbed_psa_words": mbed_psa_words
    270         }
    271 
    272     def is_file_excluded(self, path, exclude_wildcards):
    273         """Whether the given file path is excluded."""
    274         # exclude_wildcards may be None. Also, consider the global exclusions.
    275         exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
    276         for pattern in exclude_wildcards:
    277             if fnmatch.fnmatch(path, pattern):
    278                 return True
    279         return False
    280 
    281     def get_all_files(self, include_wildcards, exclude_wildcards):
    282         """
    283         Get all files that match any of the included UNIX-style wildcards
    284         and filter them into included and excluded lists.
    285         While the check_names script is designed only for use on UNIX/macOS
    286         (due to nm), this function alone will work fine on Windows even with
    287         forward slashes in the wildcard.
    288 
    289         Args:
    290         * include_wildcards: a List of shell-style wildcards to match filepaths.
    291         * exclude_wildcards: a List of shell-style wildcards to exclude.
    292 
    293         Returns:
    294         * inc_files: A List of relative filepaths for included files.
    295         * exc_files: A List of relative filepaths for excluded files.
    296         """
    297         accumulator = set()
    298         all_wildcards = include_wildcards + (exclude_wildcards or [])
    299         for wildcard in all_wildcards:
    300             accumulator = accumulator.union(glob.iglob(wildcard))
    301 
    302         inc_files = []
    303         exc_files = []
    304         for path in accumulator:
    305             if self.is_file_excluded(path, exclude_wildcards):
    306                 exc_files.append(path)
    307             else:
    308                 inc_files.append(path)
    309         return (inc_files, exc_files)
    310 
    311     def get_included_files(self, include_wildcards, exclude_wildcards):
    312         """
    313         Get all files that match any of the included UNIX-style wildcards.
    314         While the check_names script is designed only for use on UNIX/macOS
    315         (due to nm), this function alone will work fine on Windows even with
    316         forward slashes in the wildcard.
    317 
    318         Args:
    319         * include_wildcards: a List of shell-style wildcards to match filepaths.
    320         * exclude_wildcards: a List of shell-style wildcards to exclude.
    321 
    322         Returns a List of relative filepaths.
    323         """
    324         accumulator = set()
    325 
    326         for include_wildcard in include_wildcards:
    327             accumulator = accumulator.union(glob.iglob(include_wildcard))
    328 
    329         return list(path for path in accumulator
    330                     if not self.is_file_excluded(path, exclude_wildcards))
    331 
    332     def parse_macros(self, include, exclude=None):
    333         """
    334         Parse all macros defined by #define preprocessor directives.
    335 
    336         Args:
    337         * include: A List of glob expressions to look for files through.
    338         * exclude: A List of glob expressions for excluding files.
    339 
    340         Returns a List of Match objects for the found macros.
    341         """
    342         macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
    343         exclusions = (
    344             "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
    345         )
    346 
    347         files = self.get_included_files(include, exclude)
    348         self.log.debug("Looking for macros in {} files".format(len(files)))
    349 
    350         macros = []
    351         for header_file in files:
    352             with open(header_file, "r", encoding="utf-8") as header:
    353                 for line_no, line in enumerate(header):
    354                     for macro in macro_regex.finditer(line):
    355                         if macro.group("macro").startswith(exclusions):
    356                             continue
    357 
    358                         macros.append(Match(
    359                             header_file,
    360                             line,
    361                             line_no,
    362                             macro.span("macro"),
    363                             macro.group("macro")))
    364 
    365         return macros
    366 
    367     def parse_mbed_psa_words(self, include, exclude=None):
    368         """
    369         Parse all words in the file that begin with MBED|PSA, in and out of
    370         macros, comments, anything.
    371 
    372         Args:
    373         * include: A List of glob expressions to look for files through.
    374         * exclude: A List of glob expressions for excluding files.
    375 
    376         Returns a List of Match objects for words beginning with MBED|PSA.
    377         """
    378         # Typos of TLS are common, hence the broader check below than MBEDTLS.
    379         mbed_regex = re.compile(r"\b(MBED.+?|PSA)_[A-Z0-9_]*")
    380         exclusions = re.compile(r"// *no-check-names|#error")
    381 
    382         files = self.get_included_files(include, exclude)
    383         self.log.debug(
    384             "Looking for MBED|PSA words in {} files"
    385             .format(len(files))
    386         )
    387 
    388         mbed_psa_words = []
    389         for filename in files:
    390             with open(filename, "r", encoding="utf-8") as fp:
    391                 for line_no, line in enumerate(fp):
    392                     if exclusions.search(line):
    393                         continue
    394 
    395                     for name in mbed_regex.finditer(line):
    396                         mbed_psa_words.append(Match(
    397                             filename,
    398                             line,
    399                             line_no,
    400                             name.span(0),
    401                             name.group(0)))
    402 
    403         return mbed_psa_words
    404 
    405     def parse_enum_consts(self, include, exclude=None):
    406         """
    407         Parse all enum value constants that are declared.
    408 
    409         Args:
    410         * include: A List of glob expressions to look for files through.
    411         * exclude: A List of glob expressions for excluding files.
    412 
    413         Returns a List of Match objects for the findings.
    414         """
    415         files = self.get_included_files(include, exclude)
    416         self.log.debug("Looking for enum consts in {} files".format(len(files)))
    417 
    418         # Emulate a finite state machine to parse enum declarations.
    419         # OUTSIDE_KEYWORD = outside the enum keyword
    420         # IN_BRACES = inside enum opening braces
    421         # IN_BETWEEN = between enum keyword and opening braces
    422         states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
    423         enum_consts = []
    424         for header_file in files:
    425             state = states.OUTSIDE_KEYWORD
    426             with open(header_file, "r", encoding="utf-8") as header:
    427                 for line_no, line in enumerate(header):
    428                     # Match typedefs and brackets only when they are at the
    429                     # beginning of the line -- if they are indented, they might
    430                     # be sub-structures within structs, etc.
    431                     optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?"
    432                     if (state == states.OUTSIDE_KEYWORD and
    433                             re.search(r"^(typedef +)?enum " + \
    434                                     optional_c_identifier + \
    435                                     r" *{", line)):
    436                         state = states.IN_BRACES
    437                     elif (state == states.OUTSIDE_KEYWORD and
    438                           re.search(r"^(typedef +)?enum", line)):
    439                         state = states.IN_BETWEEN
    440                     elif (state == states.IN_BETWEEN and
    441                           re.search(r"^{", line)):
    442                         state = states.IN_BRACES
    443                     elif (state == states.IN_BRACES and
    444                           re.search(r"^}", line)):
    445                         state = states.OUTSIDE_KEYWORD
    446                     elif (state == states.IN_BRACES and
    447                           not re.search(r"^ *#", line)):
    448                         enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
    449                         if not enum_const:
    450                             continue
    451 
    452                         enum_consts.append(Match(
    453                             header_file,
    454                             line,
    455                             line_no,
    456                             enum_const.span("enum_const"),
    457                             enum_const.group("enum_const")))
    458 
    459         return enum_consts
    460 
    461     IGNORED_CHUNK_REGEX = re.compile('|'.join([
    462         r'/\*.*?\*/', # block comment entirely on one line
    463         r'//.*', # line comment
    464         r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal
    465     ]))
    466 
    467     def strip_comments_and_literals(self, line, in_block_comment):
    468         """Strip comments and string literals from line.
    469 
    470         Continuation lines are not supported.
    471 
    472         If in_block_comment is true, assume that the line starts inside a
    473         block comment.
    474 
    475         Return updated values of (line, in_block_comment) where:
    476         * Comments in line have been replaced by a space (or nothing at the
    477           start or end of the line).
    478         * String contents have been removed.
    479         * in_block_comment indicates whether the line ends inside a block
    480           comment that continues on the next line.
    481         """
    482 
    483         # Terminate current multiline comment?
    484         if in_block_comment:
    485             m = re.search(r"\*/", line)
    486             if m:
    487                 in_block_comment = False
    488                 line = line[m.end(0):]
    489             else:
    490                 return '', True
    491 
    492         # Remove full comments and string literals.
    493         # Do it all together to handle cases like "/*" correctly.
    494         # Note that continuation lines are not supported.
    495         line = re.sub(self.IGNORED_CHUNK_REGEX,
    496                       lambda s: '""' if s.group('string') else ' ',
    497                       line)
    498 
    499         # Start an unfinished comment?
    500         # (If `/*` was part of a complete comment, it's already been removed.)
    501         m = re.search(r"/\*", line)
    502         if m:
    503             in_block_comment = True
    504             line = line[:m.start(0)]
    505 
    506         return line, in_block_comment
    507 
    508     IDENTIFIER_REGEX = re.compile('|'.join([
    509         # Match " something(a" or " *something(a". Functions.
    510         # Assumptions:
    511         # - function definition from return type to one of its arguments is
    512         #   all on one line
    513         # - function definition line only contains alphanumeric, asterisk,
    514         #   underscore, and open bracket
    515         r".* \**(\w+) *\( *\w",
    516         # Match "(*something)(".
    517         r".*\( *\* *(\w+) *\) *\(",
    518         # Match names of named data structures.
    519         r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$",
    520         # Match names of typedef instances, after closing bracket.
    521         r"}? *(\w+)[;[].*",
    522     ]))
    523     # The regex below is indented for clarity.
    524     EXCLUSION_LINES = re.compile("|".join([
    525         r"extern +\"C\"",
    526         r"(typedef +)?(struct|union|enum)( *{)?$",
    527         r"} *;?$",
    528         r"$",
    529         r"//",
    530         r"#",
    531     ]))
    532 
    533     def parse_identifiers_in_file(self, header_file, identifiers):
    534         """
    535         Parse all lines of a header where a function/enum/struct/union/typedef
    536         identifier is declared, based on some regex and heuristics. Highly
    537         dependent on formatting style.
    538 
    539         Append found matches to the list ``identifiers``.
    540         """
    541 
    542         with open(header_file, "r", encoding="utf-8") as header:
    543             in_block_comment = False
    544             # The previous line variable is used for concatenating lines
    545             # when identifiers are formatted and spread across multiple
    546             # lines.
    547             previous_line = ""
    548 
    549             for line_no, line in enumerate(header):
    550                 line, in_block_comment = \
    551                     self.strip_comments_and_literals(line, in_block_comment)
    552 
    553                 if self.EXCLUSION_LINES.match(line):
    554                     previous_line = ""
    555                     continue
    556 
    557                 # If the line contains only space-separated alphanumeric
    558                 # characters (or underscore, asterisk, or open parenthesis),
    559                 # and nothing else, high chance it's a declaration that
    560                 # continues on the next line
    561                 if re.search(r"^([\w\*\(]+\s+)+$", line):
    562                     previous_line += line
    563                     continue
    564 
    565                 # If previous line seemed to start an unfinished declaration
    566                 # (as above), concat and treat them as one.
    567                 if previous_line:
    568                     line = previous_line.strip() + " " + line.strip() + "\n"
    569                     previous_line = ""
    570 
    571                 # Skip parsing if line has a space in front = heuristic to
    572                 # skip function argument lines (highly subject to formatting
    573                 # changes)
    574                 if line[0] == " ":
    575                     continue
    576 
    577                 identifier = self.IDENTIFIER_REGEX.search(line)
    578 
    579                 if not identifier:
    580                     continue
    581 
    582                 # Find the group that matched, and append it
    583                 for group in identifier.groups():
    584                     if not group:
    585                         continue
    586 
    587                     identifiers.append(Match(
    588                         header_file,
    589                         line,
    590                         line_no,
    591                         identifier.span(),
    592                         group))
    593 
    594     def parse_identifiers(self, include, exclude=None):
    595         """
    596         Parse all lines of a header where a function/enum/struct/union/typedef
    597         identifier is declared, based on some regex and heuristics. Highly
    598         dependent on formatting style. Identifiers in excluded files are still
    599         parsed
    600 
    601         Args:
    602         * include: A List of glob expressions to look for files through.
    603         * exclude: A List of glob expressions for excluding files.
    604 
    605         Returns: a Tuple of two Lists of Match objects with identifiers.
    606         * included_identifiers: A List of Match objects with identifiers from
    607           included files.
    608         * excluded_identifiers: A List of Match objects with identifiers from
    609           excluded files.
    610         """
    611 
    612         included_files, excluded_files = \
    613             self.get_all_files(include, exclude)
    614 
    615         self.log.debug("Looking for included identifiers in {} files".format \
    616             (len(included_files)))
    617 
    618         included_identifiers = []
    619         excluded_identifiers = []
    620         for header_file in included_files:
    621             self.parse_identifiers_in_file(header_file, included_identifiers)
    622         for header_file in excluded_files:
    623             self.parse_identifiers_in_file(header_file, excluded_identifiers)
    624 
    625         return (included_identifiers, excluded_identifiers)
    626 
    627     def parse_symbols(self):
    628         """
    629         Compile a library, and parse the object files using nm to retrieve the
    630         list of referenced symbols. Exceptions thrown here are rethrown because
    631         they would be critical errors that void several tests, and thus needs
    632         to halt the program. This is explicitly done for clarity.
    633 
    634         Returns a List of unique symbols defined and used in the libraries.
    635         """
    636         raise NotImplementedError("parse_symbols must be implemented by a code parser")
    637 
    638     def comprehensive_parse(self):
    639         """
    640         (Must be defined as a class method)
    641         Comprehensive ("default") function to call each parsing function and
    642         retrieve various elements of the code, together with the source location.
    643 
    644         Returns a dict of parsed item key to the corresponding List of Matches.
    645         """
    646         raise NotImplementedError("comprehension_parse must be implemented by a code parser")
    647 
    648     def parse_symbols_from_nm(self, object_files):
    649         """
    650         Run nm to retrieve the list of referenced symbols in each object file.
    651         Does not return the position data since it is of no use.
    652 
    653         Args:
    654         * object_files: a List of compiled object filepaths to search through.
    655 
    656         Returns a List of unique symbols defined and used in any of the object
    657         files.
    658         """
    659         nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$")
    660         nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
    661         exclusions = ("FStar", "Hacl")
    662         symbols = []
    663         # Gather all outputs of nm
    664         nm_output = ""
    665         for lib in object_files:
    666             nm_output += subprocess.run(
    667                 ["nm", "-og", lib],
    668                 universal_newlines=True,
    669                 stdout=subprocess.PIPE,
    670                 stderr=subprocess.STDOUT,
    671                 check=True
    672             ).stdout
    673         for line in nm_output.splitlines():
    674             if not nm_undefined_regex.search(line):
    675                 symbol = nm_valid_regex.search(line)
    676                 if (symbol and not symbol.group("symbol").startswith(exclusions)):
    677                     symbols.append(symbol.group("symbol"))
    678                 else:
    679                     self.log.error(line)
    680         return symbols
    681 
    682 class TFPSACryptoCodeParser(CodeParser):
    683     """
    684     Class for retrieving files and parsing TF-PSA-Crypto code. This can be used
    685     independently of the checks that NameChecker performs.
    686     """
    687 
    688     def __init__(self, log):
    689         super().__init__(log)
    690         if not build_tree.looks_like_tf_psa_crypto_root(os.getcwd()):
    691             raise Exception("This script must be run from TF-PSA-Crypto root.")
    692 
    693     def comprehensive_parse(self):
    694         """
    695         Comprehensive ("default") function to call each parsing function and
    696         retrieve various elements of the code, together with the source location.
    697 
    698         Returns a dict of parsed item key to the corresponding List of Matches.
    699         """
    700         all_macros = {"public": [], "internal": [], "private":[]}
    701         all_macros["public"] = self.parse_macros([
    702             "include/psa/*.h",
    703             "include/tf-psa-crypto/*.h",
    704             "include/mbedtls/*.h",
    705             "drivers/builtin/include/mbedtls/*.h",
    706             "drivers/everest/include/everest/everest.h",
    707             "drivers/everest/include/everest/x25519.h",
    708             "drivers/everest/include/tf-psa-crypto/private/everest/everest.h",
    709             "drivers/everest/include/tf-psa-crypto/private/everest/x25519.h"
    710         ])
    711         all_macros["internal"] = self.parse_macros([
    712             "core/*.h",
    713             "drivers/builtin/src/*.h",
    714             "framework/tests/include/test/drivers/*.h",
    715         ])
    716         all_macros["private"] = self.parse_macros([
    717             "core/*.c",
    718             "drivers/builtin/src/*.c",
    719         ])
    720         enum_consts = self.parse_enum_consts([
    721             "include/psa/*.h",
    722             "include/tf-psa-crypto/*.h",
    723             "include/mbedtls/*.h",
    724             "drivers/builtin/include/mbedtls/*.h",
    725             "core/*.h",
    726             "drivers/builtin/src/*.h",
    727             "core/*.c",
    728             "drivers/builtin/src/*.c",
    729             "drivers/everest/include/everest/everest.h",
    730             "drivers/everest/include/everest/x25519.h",
    731             "drivers/everest/include/tf-psa-crypto/private/everest/everest.h",
    732             "drivers/everest/include/tf-psa-crypto/private/everest/x25519.h"
    733         ])
    734         identifiers, excluded_identifiers = self.parse_identifiers([
    735             "include/psa/*.h",
    736             "include/tf-psa-crypto/*.h",
    737             "include/mbedtls/*.h",
    738             "drivers/builtin/include/mbedtls/*.h",
    739             "core/*.h",
    740             "drivers/builtin/src/*.h",
    741             "drivers/everest/include/everest/everest.h",
    742             "drivers/everest/include/everest/x25519.h",
    743             "drivers/everest/include/tf-psa-crypto/private/everest/everest.h",
    744             "drivers/everest/include/tf-psa-crypto/private/everest/x25519.h"
    745         ], ["drivers/p256-m/p256-m/p256-m.h"])
    746         mbed_psa_words = self.parse_mbed_psa_words([
    747             "include/psa/*.h",
    748             "include/tf-psa-crypto/*.h",
    749             "include/mbedtls/*.h",
    750             "drivers/builtin/include/mbedtls/*.h",
    751             "core/*.h",
    752             "drivers/builtin/src/*.h",
    753             "drivers/everest/include/everest/everest.h",
    754             "drivers/everest/include/everest/x25519.h",
    755             "drivers/everest/include/tf-psa-crypto/private/everest/everest.h",
    756             "drivers/everest/include/tf-psa-crypto/private/everest/x25519.h",
    757             "core/*.c",
    758             "drivers/builtin/src/*.c",
    759             "drivers/everest/library/everest.c",
    760             "drivers/everest/library/x25519.c"
    761         ], ["core/psa_crypto_driver_wrappers.h"])
    762         symbols = self.parse_symbols()
    763 
    764         return self._parse(all_macros, enum_consts, identifiers,
    765                            excluded_identifiers, mbed_psa_words, symbols)
    766 
    767     def parse_symbols(self):
    768         """
    769         Compile the TF-PSA-Crypto libraries, and parse the
    770         object files using nm to retrieve the list of referenced symbols.
    771         Exceptions thrown here are rethrown because they would be critical
    772         errors that void several tests, and thus needs to halt the program. This
    773         is explicitly done for clarity.
    774 
    775         Returns a List of unique symbols defined and used in the libraries.
    776         """
    777         self.log.info("Compiling...")
    778         symbols = []
    779 
    780         # Back up the config and atomically compile with the full configuration.
    781         shutil.copy(
    782             "include/psa/crypto_config.h",
    783             "include/psa/crypto_config.h.bak"
    784         )
    785         try:
    786             # Use check=True in all subprocess calls so that failures are raised
    787             # as exceptions and logged.
    788             subprocess.run(
    789                 ["python3", "scripts/config.py", "full"],
    790                 universal_newlines=True,
    791                 check=True
    792             )
    793             my_environment = os.environ.copy()
    794             my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
    795 
    796             source_dir = os.getcwd()
    797             build_dir = tempfile.mkdtemp()
    798             os.chdir(build_dir)
    799             subprocess.run(
    800                 ["cmake", "-DGEN_FILES=ON", source_dir],
    801                 universal_newlines=True,
    802                 check=True
    803             )
    804             subprocess.run(
    805                 ["make"],
    806                 env=my_environment,
    807                 universal_newlines=True,
    808                 stdout=subprocess.PIPE,
    809                 stderr=subprocess.STDOUT,
    810                 check=True
    811             )
    812 
    813             # Perform object file analysis using nm
    814             symbols = self.parse_symbols_from_nm([
    815                 build_dir + "/drivers/builtin/libbuiltin.a",
    816                 build_dir + "/drivers/p256-m/libp256m.a",
    817                 build_dir + "/drivers/everest/libeverest.a",
    818                 build_dir + "/core/libtfpsacrypto.a"
    819             ])
    820 
    821             os.chdir(source_dir)
    822             shutil.rmtree(build_dir)
    823         except subprocess.CalledProcessError as error:
    824             self.log.debug(error.output)
    825             raise error
    826         finally:
    827             # Put back the original config regardless of there being errors.
    828             # Works also for keyboard interrupts.
    829             shutil.move(
    830                 "include/psa/crypto_config.h.bak",
    831                 "include/psa/crypto_config.h"
    832             )
    833 
    834         return symbols
    835 
    836 class MBEDTLSCodeParser(CodeParser):
    837     """
    838     Class for retrieving files and parsing Mbed TLS code. This can be used
    839     independently of the checks that NameChecker performs.
    840     """
    841 
    842     def __init__(self, log):
    843         super().__init__(log)
    844         if not build_tree.looks_like_mbedtls_root(os.getcwd()):
    845             raise Exception("This script must be run from Mbed TLS root.")
    846 
    847     def comprehensive_parse(self):
    848         """
    849         Comprehensive ("default") function to call each parsing function and
    850         retrieve various elements of the code, together with the source location.
    851 
    852         Returns a dict of parsed item key to the corresponding List of Matches.
    853         """
    854         all_macros = {"public": [], "internal": [], "private":[]}
    855         # TF-PSA-Crypto is in the same repo in 3.6 so initalise variable here.
    856         tf_psa_crypto_parse_result = {}
    857 
    858         if build_tree.is_mbedtls_3_6():
    859             all_macros["public"] = self.parse_macros([
    860                 "include/mbedtls/*.h",
    861                 "include/psa/*.h",
    862                 "3rdparty/everest/include/everest/everest.h",
    863                 "3rdparty/everest/include/everest/x25519.h"
    864             ])
    865             all_macros["internal"] = self.parse_macros([
    866                 "library/*.h",
    867                 "framework/tests/include/test/drivers/*.h",
    868             ])
    869             all_macros["private"] = self.parse_macros([
    870                 "library/*.c",
    871             ])
    872             enum_consts = self.parse_enum_consts([
    873                 "include/mbedtls/*.h",
    874                 "include/psa/*.h",
    875                 "library/*.h",
    876                 "library/*.c",
    877                 "3rdparty/everest/include/everest/everest.h",
    878                 "3rdparty/everest/include/everest/x25519.h"
    879             ])
    880             identifiers, excluded_identifiers = self.parse_identifiers([
    881                 "include/mbedtls/*.h",
    882                 "include/psa/*.h",
    883                 "library/*.h",
    884                 "3rdparty/everest/include/everest/everest.h",
    885                 "3rdparty/everest/include/everest/x25519.h"
    886             ], ["3rdparty/p256-m/p256-m/p256-m.h"])
    887             mbed_psa_words = self.parse_mbed_psa_words([
    888                 "include/mbedtls/*.h",
    889                 "include/psa/*.h",
    890                 "library/*.h",
    891                 "3rdparty/everest/include/everest/everest.h",
    892                 "3rdparty/everest/include/everest/x25519.h",
    893                 "library/*.c",
    894                 "3rdparty/everest/library/everest.c",
    895                 "3rdparty/everest/library/x25519.c"
    896             ], ["library/psa_crypto_driver_wrappers.h"])
    897         else:
    898             all_macros = {"public": [], "internal": [], "private":[]}
    899             all_macros["public"] = self.parse_macros([
    900                 "include/mbedtls/*.h",
    901             ])
    902             all_macros["internal"] = self.parse_macros([
    903                 "library/*.h",
    904                 "framework/tests/include/test/drivers/*.h",
    905             ])
    906             all_macros["private"] = self.parse_macros([
    907                 "library/*.c",
    908             ])
    909             enum_consts = self.parse_enum_consts([
    910                 "include/mbedtls/*.h",
    911                 "library/*.h",
    912                 "library/*.c",
    913             ])
    914             identifiers, excluded_identifiers = self.parse_identifiers([
    915                 "include/mbedtls/*.h",
    916                 "library/*.h",
    917             ])
    918             mbed_psa_words = self.parse_mbed_psa_words([
    919                 "include/mbedtls/*.h",
    920                 "library/*.h",
    921                 "library/*.c",
    922             ])
    923             os.chdir("./tf-psa-crypto")
    924             tf_psa_crypto_code_parser = TFPSACryptoCodeParser(self.log)
    925             tf_psa_crypto_parse_result = tf_psa_crypto_code_parser.comprehensive_parse()
    926             os.chdir("../")
    927 
    928         symbols = self.parse_symbols()
    929         mbedtls_parse_result = self._parse(all_macros, enum_consts,
    930                                            identifiers, excluded_identifiers,
    931                                            mbed_psa_words, symbols)
    932         # Combile results for Mbed TLS and TF-PSA-Crypto
    933         for key in tf_psa_crypto_parse_result:
    934             mbedtls_parse_result[key] += tf_psa_crypto_parse_result[key]
    935         return mbedtls_parse_result
    936 
    937     def parse_symbols(self):
    938         """
    939         Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
    940         object files using nm to retrieve the list of referenced symbols.
    941         Exceptions thrown here are rethrown because they would be critical
    942         errors that void several tests, and thus needs to halt the program. This
    943         is explicitly done for clarity.
    944 
    945         Returns a List of unique symbols defined and used in the libraries.
    946         """
    947         self.log.info("Compiling...")
    948         symbols = []
    949 
    950         # Back up the config and atomically compile with the full configuration.
    951         shutil.copy(
    952             "include/mbedtls/mbedtls_config.h",
    953             "include/mbedtls/mbedtls_config.h.bak"
    954         )
    955         try:
    956             # Use check=True in all subprocess calls so that failures are raised
    957             # as exceptions and logged.
    958             subprocess.run(
    959                 ["python3", "scripts/config.py", "full"],
    960                 universal_newlines=True,
    961                 check=True
    962             )
    963             my_environment = os.environ.copy()
    964             my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
    965             # Run make clean separately to lib to prevent unwanted behavior when
    966             # make is invoked with parallelism.
    967             subprocess.run(
    968                 ["make", "clean"],
    969                 universal_newlines=True,
    970                 check=True
    971             )
    972             subprocess.run(
    973                 ["make", "lib"],
    974                 env=my_environment,
    975                 universal_newlines=True,
    976                 stdout=subprocess.PIPE,
    977                 stderr=subprocess.STDOUT,
    978                 check=True
    979             )
    980 
    981             # Perform object file analysis using nm
    982             symbols = self.parse_symbols_from_nm([
    983                 "library/libmbedcrypto.a",
    984                 "library/libmbedtls.a",
    985                 "library/libmbedx509.a"
    986             ])
    987 
    988             subprocess.run(
    989                 ["make", "clean"],
    990                 universal_newlines=True,
    991                 check=True
    992             )
    993         except subprocess.CalledProcessError as error:
    994             self.log.debug(error.output)
    995             raise error
    996         finally:
    997             # Put back the original config regardless of there being errors.
    998             # Works also for keyboard interrupts.
    999             shutil.move(
   1000                 "include/mbedtls/mbedtls_config.h.bak",
   1001                 "include/mbedtls/mbedtls_config.h"
   1002             )
   1003 
   1004         return symbols
   1005 
   1006 class NameChecker():
   1007     """
   1008     Representation of the core name checking operation performed by this script.
   1009     """
   1010     def __init__(self, parse_result, log):
   1011         self.parse_result = parse_result
   1012         self.log = log
   1013 
   1014     def perform_checks(self, quiet=False):
   1015         """
   1016         A comprehensive checker that performs each check in order, and outputs
   1017         a final verdict.
   1018 
   1019         Args:
   1020         * quiet: whether to hide detailed problem explanation.
   1021         """
   1022         self.log.info("=============")
   1023         Problem.quiet = quiet
   1024         problems = 0
   1025         problems += self.check_symbols_declared_in_header()
   1026 
   1027         pattern_checks = [
   1028             ("public_macros", PUBLIC_MACRO_PATTERN),
   1029             ("internal_macros", INTERNAL_MACRO_PATTERN),
   1030             ("enum_consts", CONSTANTS_PATTERN),
   1031             ("identifiers", IDENTIFIER_PATTERN)
   1032         ]
   1033         for group, check_pattern in pattern_checks:
   1034             problems += self.check_match_pattern(group, check_pattern)
   1035 
   1036         problems += self.check_for_typos()
   1037 
   1038         self.log.info("=============")
   1039         if problems > 0:
   1040             self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
   1041             if quiet:
   1042                 self.log.info("Remove --quiet to see explanations.")
   1043             else:
   1044                 self.log.info("Use --quiet for minimal output.")
   1045             return 1
   1046         else:
   1047             self.log.info("PASS")
   1048             return 0
   1049 
   1050     def check_symbols_declared_in_header(self):
   1051         """
   1052         Perform a check that all detected symbols in the library object files
   1053         are properly declared in headers.
   1054         Assumes parse_names_in_source() was called before this.
   1055 
   1056         Returns the number of problems that need fixing.
   1057         """
   1058         problems = []
   1059         all_identifiers = self.parse_result["identifiers"] +  \
   1060             self.parse_result["excluded_identifiers"]
   1061 
   1062         for symbol in self.parse_result["symbols"]:
   1063             found_symbol_declared = False
   1064             for identifier_match in all_identifiers:
   1065                 if symbol == identifier_match.name:
   1066                     found_symbol_declared = True
   1067                     break
   1068 
   1069             if not found_symbol_declared:
   1070                 problems.append(SymbolNotInHeader(symbol))
   1071 
   1072         self.output_check_result("All symbols in header", problems)
   1073         return len(problems)
   1074 
   1075     def check_match_pattern(self, group_to_check, check_pattern):
   1076         """
   1077         Perform a check that all items of a group conform to a regex pattern.
   1078         Assumes parse_names_in_source() was called before this.
   1079 
   1080         Args:
   1081         * group_to_check: string key to index into self.parse_result.
   1082         * check_pattern: the regex to check against.
   1083 
   1084         Returns the number of problems that need fixing.
   1085         """
   1086         problems = []
   1087 
   1088         for item_match in self.parse_result[group_to_check]:
   1089             if not re.search(check_pattern, item_match.name):
   1090                 problems.append(PatternMismatch(check_pattern, item_match))
   1091             # Double underscore should not be used for names
   1092             if re.search(r".*__.*", item_match.name):
   1093                 problems.append(
   1094                     PatternMismatch("no double underscore allowed", item_match))
   1095 
   1096         self.output_check_result(
   1097             "Naming patterns of {}".format(group_to_check),
   1098             problems)
   1099         return len(problems)
   1100 
   1101     def check_for_typos(self):
   1102         """
   1103         Perform a check that all words in the source code beginning with MBED are
   1104         either defined as macros, or as enum constants.
   1105         Assumes parse_names_in_source() was called before this.
   1106 
   1107         Returns the number of problems that need fixing.
   1108         """
   1109         problems = []
   1110 
   1111         # Set comprehension, equivalent to a list comprehension wrapped by set()
   1112         all_caps_names = {
   1113             match.name
   1114             for match
   1115             in self.parse_result["public_macros"] +
   1116             self.parse_result["internal_macros"] +
   1117             self.parse_result["private_macros"] +
   1118             self.parse_result["enum_consts"]
   1119             }
   1120         typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|"
   1121                                     r"MBEDTLS_TEST_LIBTESTDRIVER*|"
   1122                                     r"PSA_CRYPTO_DRIVER_TEST")
   1123 
   1124         for name_match in self.parse_result["mbed_psa_words"]:
   1125             found = name_match.name in all_caps_names
   1126 
   1127             # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
   1128             # PSA driver, they will not exist as macros. However, they
   1129             # should still be checked for typos using the equivalent
   1130             # BUILTINs that exist.
   1131             if "MBEDTLS_PSA_ACCEL_" in name_match.name:
   1132                 found = name_match.name.replace(
   1133                     "MBEDTLS_PSA_ACCEL_",
   1134                     "MBEDTLS_PSA_BUILTIN_") in all_caps_names
   1135 
   1136             if not found and not typo_exclusion.search(name_match.name):
   1137                 problems.append(Typo(name_match))
   1138 
   1139         self.output_check_result("Likely typos", problems)
   1140         return len(problems)
   1141 
   1142     def output_check_result(self, name, problems):
   1143         """
   1144         Write out the PASS/FAIL status of a performed check depending on whether
   1145         there were problems.
   1146 
   1147         Args:
   1148         * name: the name of the test
   1149         * problems: a List of encountered Problems
   1150         """
   1151         if problems:
   1152             self.log.info("{}: FAIL\n".format(name))
   1153             for problem in problems:
   1154                 self.log.warning(str(problem))
   1155         else:
   1156             self.log.info("{}: PASS".format(name))
   1157 
   1158 def main():
   1159     """
   1160     Perform argument parsing, and create an instance of CodeParser and
   1161     NameChecker to begin the core operation.
   1162     """
   1163     parser = argparse.ArgumentParser(
   1164         formatter_class=argparse.RawDescriptionHelpFormatter,
   1165         description=(
   1166             "This script confirms that the naming of all symbols and identifiers "
   1167             "in Mbed TLS are consistent with the house style and are also "
   1168             "self-consistent.\n\n"
   1169             "Expected to be run from the Mbed TLS root directory.")
   1170     )
   1171     parser.add_argument(
   1172         "-v", "--verbose",
   1173         action="store_true",
   1174         help="show parse results"
   1175     )
   1176     parser.add_argument(
   1177         "-q", "--quiet",
   1178         action="store_true",
   1179         help="hide unnecessary text, explanations, and highlights"
   1180     )
   1181 
   1182     args = parser.parse_args()
   1183 
   1184     # Configure the global logger, which is then passed to the classes below
   1185     log = logging.getLogger()
   1186     log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
   1187     log.addHandler(logging.StreamHandler())
   1188 
   1189     try:
   1190         if build_tree.looks_like_tf_psa_crypto_root(os.getcwd()):
   1191             tf_psa_crypto_code_parser = TFPSACryptoCodeParser(log)
   1192             parse_result = tf_psa_crypto_code_parser.comprehensive_parse()
   1193         elif build_tree.looks_like_mbedtls_root(os.getcwd()):
   1194             # Mbed TLS uses TF-PSA-Crypto, so we need to parse TF-PSA-Crypto too
   1195             mbedtls_code_parser = MBEDTLSCodeParser(log)
   1196             parse_result = mbedtls_code_parser.comprehensive_parse()
   1197         else:
   1198             raise Exception("This script must be run from Mbed TLS or TF-PSA-Crypto root")
   1199     except Exception: # pylint: disable=broad-except
   1200         traceback.print_exc()
   1201         sys.exit(2)
   1202 
   1203     name_checker = NameChecker(parse_result, log)
   1204     return_code = name_checker.perform_checks(quiet=args.quiet)
   1205 
   1206     sys.exit(return_code)
   1207 
   1208 if __name__ == "__main__":
   1209     main()