quickjs-tart

quickjs-based runtime for wallet-core logic
Log | Files | Refs | README | LICENSE

check_files.py (21376B)


      1 #!/usr/bin/env python3
      2 
      3 # Copyright The Mbed TLS Contributors
      4 # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
      5 
      6 """
      7 This script checks the current state of the source code for minor issues,
      8 including incorrect file permissions, presence of tabs, non-Unix line endings,
      9 trailing whitespace, and presence of UTF-8 BOM.
     10 Note: requires python 3, must be run from Mbed TLS root.
     11 """
     12 
     13 import argparse
     14 import codecs
     15 import inspect
     16 import logging
     17 import os
     18 import re
     19 import subprocess
     20 import sys
     21 try:
     22     from typing import FrozenSet, Optional, Pattern # pylint: disable=unused-import
     23 except ImportError:
     24     pass
     25 
     26 from mbedtls_framework import build_tree
     27 
     28 
     29 class FileIssueTracker:
     30     """Base class for file-wide issue tracking.
     31 
     32     To implement a checker that processes a file as a whole, inherit from
     33     this class and implement `check_file_for_issue` and define ``heading``.
     34 
     35     ``suffix_exemptions``: files whose name ends with a string in this set
     36      will not be checked.
     37 
     38     ``path_exemptions``: files whose path (relative to the root of the source
     39     tree) matches this regular expression will not be checked. This can be
     40     ``None`` to match no path. Paths are normalized and converted to ``/``
     41     separators before matching.
     42 
     43     ``heading``: human-readable description of the issue
     44     """
     45 
     46     suffix_exemptions = frozenset() #type: FrozenSet[str]
     47     path_exemptions = None #type: Optional[Pattern[str]]
     48     # heading must be defined in derived classes.
     49     # pylint: disable=no-member
     50 
     51     def __init__(self):
     52         self.files_with_issues = {}
     53 
     54     @staticmethod
     55     def normalize_path(filepath):
     56         """Normalize ``filepath`` with / as the directory separator."""
     57         filepath = os.path.normpath(filepath)
     58         # On Windows, we may have backslashes to separate directories.
     59         # We need slashes to match exemption lists.
     60         seps = os.path.sep
     61         if os.path.altsep is not None:
     62             seps += os.path.altsep
     63         return '/'.join(filepath.split(seps))
     64 
     65     def should_check_file(self, filepath):
     66         """Whether the given file name should be checked.
     67 
     68         Files whose name ends with a string listed in ``self.suffix_exemptions``
     69         or whose path matches ``self.path_exemptions`` will not be checked.
     70         """
     71         for files_exemption in self.suffix_exemptions:
     72             if filepath.endswith(files_exemption):
     73                 return False
     74         if self.path_exemptions and \
     75            re.match(self.path_exemptions, self.normalize_path(filepath)):
     76             return False
     77         return True
     78 
     79     def check_file_for_issue(self, filepath):
     80         """Check the specified file for the issue that this class is for.
     81 
     82         Subclasses must implement this method.
     83         """
     84         raise NotImplementedError
     85 
     86     def record_issue(self, filepath, line_number):
     87         """Record that an issue was found at the specified location."""
     88         if filepath not in self.files_with_issues.keys():
     89             self.files_with_issues[filepath] = []
     90         self.files_with_issues[filepath].append(line_number)
     91 
     92     def output_file_issues(self, logger):
     93         """Log all the locations where the issue was found."""
     94         if self.files_with_issues.values():
     95             logger.info(self.heading)
     96             for filename, lines in sorted(self.files_with_issues.items()):
     97                 if lines:
     98                     logger.info("{}: {}".format(
     99                         filename, ", ".join(str(x) for x in lines)
    100                     ))
    101                 else:
    102                     logger.info(filename)
    103             logger.info("")
    104 
    105 BINARY_FILE_PATH_RE_LIST = [
    106     r'docs/.*\.pdf\Z',
    107     r'docs/.*\.png\Z',
    108     r'tf-psa-crypto/docs/.*\.pdf\Z',
    109     r'tf-psa-crypto/docs/.*\.png\Z',
    110     r'programs/fuzz/corpuses/[^.]+\Z',
    111     r'framework/data_files/[^.]+\Z',
    112     r'framework/data_files/.*\.(crt|csr|db|der|key|pubkey)\Z',
    113     r'framework/data_files/.*\.req\.[^/]+\Z',
    114     r'framework/data_files/.*malformed[^/]+\Z',
    115     r'framework/data_files/format_pkcs12\.fmt\Z',
    116     r'framework/data_files/.*\.bin\Z',
    117 ]
    118 BINARY_FILE_PATH_RE = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST))
    119 
    120 class LineIssueTracker(FileIssueTracker):
    121     """Base class for line-by-line issue tracking.
    122 
    123     To implement a checker that processes files line by line, inherit from
    124     this class and implement `line_with_issue`.
    125     """
    126 
    127     # Exclude binary files.
    128     path_exemptions = BINARY_FILE_PATH_RE
    129 
    130     def issue_with_line(self, line, filepath, line_number):
    131         """Check the specified line for the issue that this class is for.
    132 
    133         Subclasses must implement this method.
    134         """
    135         raise NotImplementedError
    136 
    137     def check_file_line(self, filepath, line, line_number):
    138         if self.issue_with_line(line, filepath, line_number):
    139             self.record_issue(filepath, line_number)
    140 
    141     def check_file_for_issue(self, filepath):
    142         """Check the lines of the specified file.
    143 
    144         Subclasses must implement the ``issue_with_line`` method.
    145         """
    146         with open(filepath, "rb") as f:
    147             for i, line in enumerate(iter(f.readline, b"")):
    148                 self.check_file_line(filepath, line, i + 1)
    149 
    150 
    151 def is_windows_file(filepath):
    152     _root, ext = os.path.splitext(filepath)
    153     return ext in ('.bat', '.dsp', '.dsw', '.sln', '.vcxproj')
    154 
    155 
    156 class ShebangIssueTracker(FileIssueTracker):
    157     """Track files with a bad, missing or extraneous shebang line.
    158 
    159     Executable scripts must start with a valid shebang (#!) line.
    160     """
    161 
    162     heading = "Invalid shebang line:"
    163 
    164     # Allow either /bin/sh, /bin/bash, or /usr/bin/env.
    165     # Allow at most one argument (this is a Linux limitation).
    166     # For sh and bash, the argument if present must be options.
    167     # For env, the argument must be the base name of the interpreter.
    168     _shebang_re = re.compile(rb'^#! ?(?:/bin/(bash|sh)(?: -[^\n ]*)?'
    169                              rb'|/usr/bin/env ([^\n /]+))$')
    170     _extensions = {
    171         b'bash': 'sh',
    172         b'perl': 'pl',
    173         b'python3': 'py',
    174         b'sh': 'sh',
    175     }
    176 
    177     path_exemptions = re.compile(r'framework/scripts/quiet/.*')
    178 
    179     def is_valid_shebang(self, first_line, filepath):
    180         m = re.match(self._shebang_re, first_line)
    181         if not m:
    182             return False
    183         interpreter = m.group(1) or m.group(2)
    184         if interpreter not in self._extensions:
    185             return False
    186         if not filepath.endswith('.' + self._extensions[interpreter]):
    187             return False
    188         return True
    189 
    190     def check_file_for_issue(self, filepath):
    191         is_executable = os.access(filepath, os.X_OK)
    192         with open(filepath, "rb") as f:
    193             first_line = f.readline()
    194         if first_line.startswith(b'#!'):
    195             if not is_executable:
    196                 # Shebang on a non-executable file
    197                 self.files_with_issues[filepath] = None
    198             elif not self.is_valid_shebang(first_line, filepath):
    199                 self.files_with_issues[filepath] = [1]
    200         elif is_executable:
    201             # Executable without a shebang
    202             self.files_with_issues[filepath] = None
    203 
    204 
    205 class EndOfFileNewlineIssueTracker(FileIssueTracker):
    206     """Track files that end with an incomplete line
    207     (no newline character at the end of the last line)."""
    208 
    209     heading = "Missing newline at end of file:"
    210 
    211     path_exemptions = BINARY_FILE_PATH_RE
    212 
    213     def check_file_for_issue(self, filepath):
    214         with open(filepath, "rb") as f:
    215             try:
    216                 f.seek(-1, 2)
    217             except OSError:
    218                 # This script only works on regular files. If we can't seek
    219                 # 1 before the end, it means that this position is before
    220                 # the beginning of the file, i.e. that the file is empty.
    221                 return
    222             if f.read(1) != b"\n":
    223                 self.files_with_issues[filepath] = None
    224 
    225 
    226 class Utf8BomIssueTracker(FileIssueTracker):
    227     """Track files that start with a UTF-8 BOM.
    228     Files should be ASCII or UTF-8. Valid UTF-8 does not start with a BOM."""
    229 
    230     heading = "UTF-8 BOM present:"
    231 
    232     suffix_exemptions = frozenset([".vcxproj", ".sln"])
    233     path_exemptions = BINARY_FILE_PATH_RE
    234 
    235     def check_file_for_issue(self, filepath):
    236         with open(filepath, "rb") as f:
    237             if f.read().startswith(codecs.BOM_UTF8):
    238                 self.files_with_issues[filepath] = None
    239 
    240 
    241 class UnicodeIssueTracker(LineIssueTracker):
    242     """Track lines with invalid characters or invalid text encoding."""
    243 
    244     heading = "Invalid UTF-8 or forbidden character:"
    245 
    246     # Only allow valid UTF-8, and only other explicitly allowed characters.
    247     # We deliberately exclude all characters that aren't a simple non-blank,
    248     # non-zero-width glyph, apart from a very small set (tab, ordinary space,
    249     # line breaks, "basic" no-break space and soft hyphen). In particular,
    250     # non-ASCII control characters, combinig characters, and Unicode state
    251     # changes (e.g. right-to-left text) are forbidden.
    252     # Note that we do allow some characters with a risk of visual confusion,
    253     # for example '-' (U+002D HYPHEN-MINUS) vs '­' (U+00AD SOFT HYPHEN) vs
    254     # '‐' (U+2010 HYPHEN), or 'A' (U+0041 LATIN CAPITAL LETTER A) vs
    255     # 'Α' (U+0391 GREEK CAPITAL LETTER ALPHA).
    256     GOOD_CHARACTERS = ''.join([
    257         '\t\n\r -~', # ASCII (tabs and line endings are checked separately)
    258         '\u00A0-\u00FF', # Latin-1 Supplement (for NO-BREAK SPACE and punctuation)
    259         '\u2010-\u2027\u2030-\u205E', # General Punctuation (printable)
    260         '\u2070\u2071\u2074-\u208E\u2090-\u209C', # Superscripts and Subscripts
    261         '\u2190-\u21FF', # Arrows
    262         '\u2200-\u22FF', # Mathematical Symbols
    263         '\u2500-\u257F' # Box Drawings characters used in markdown trees
    264     ])
    265     # Allow any of the characters and ranges above, and anything classified
    266     # as a word constituent.
    267     GOOD_CHARACTERS_RE = re.compile(r'[\w{}]+\Z'.format(GOOD_CHARACTERS))
    268 
    269     def issue_with_line(self, line, _filepath, line_number):
    270         try:
    271             text = line.decode('utf-8')
    272         except UnicodeDecodeError:
    273             return True
    274         if line_number == 1 and text.startswith('\uFEFF'):
    275             # Strip BOM (U+FEFF ZERO WIDTH NO-BREAK SPACE) at the beginning.
    276             # Which files are allowed to have a BOM is handled in
    277             # Utf8BomIssueTracker.
    278             text = text[1:]
    279         return not self.GOOD_CHARACTERS_RE.match(text)
    280 
    281 class UnixLineEndingIssueTracker(LineIssueTracker):
    282     """Track files with non-Unix line endings (i.e. files with CR)."""
    283 
    284     heading = "Non-Unix line endings:"
    285 
    286     def should_check_file(self, filepath):
    287         if not super().should_check_file(filepath):
    288             return False
    289         return not is_windows_file(filepath)
    290 
    291     def issue_with_line(self, line, _filepath, _line_number):
    292         return b"\r" in line
    293 
    294 
    295 class WindowsLineEndingIssueTracker(LineIssueTracker):
    296     """Track files with non-Windows line endings (i.e. CR or LF not in CRLF)."""
    297 
    298     heading = "Non-Windows line endings:"
    299 
    300     def should_check_file(self, filepath):
    301         if not super().should_check_file(filepath):
    302             return False
    303         return is_windows_file(filepath)
    304 
    305     def issue_with_line(self, line, _filepath, _line_number):
    306         return not line.endswith(b"\r\n") or b"\r" in line[:-2]
    307 
    308 
    309 class TrailingWhitespaceIssueTracker(LineIssueTracker):
    310     """Track lines with trailing whitespace."""
    311 
    312     heading = "Trailing whitespace:"
    313     suffix_exemptions = frozenset([".dsp", ".md"])
    314 
    315     def issue_with_line(self, line, _filepath, _line_number):
    316         return line.rstrip(b"\r\n") != line.rstrip()
    317 
    318 
    319 class TabIssueTracker(LineIssueTracker):
    320     """Track lines with tabs."""
    321 
    322     heading = "Tabs present:"
    323     suffix_exemptions = frozenset([
    324         ".make",
    325         ".pem", # some openssl dumps have tabs
    326         ".sln",
    327         "/.gitmodules",
    328         "/Makefile",
    329         "/Makefile.inc",
    330         "/generate_visualc_files.pl",
    331     ])
    332 
    333     def issue_with_line(self, line, _filepath, _line_number):
    334         return b"\t" in line
    335 
    336 
    337 class MergeArtifactIssueTracker(LineIssueTracker):
    338     """Track lines with merge artifacts.
    339     These are leftovers from a ``git merge`` that wasn't fully edited."""
    340 
    341     heading = "Merge artifact:"
    342 
    343     def issue_with_line(self, line, _filepath, _line_number):
    344         # Detect leftover git conflict markers.
    345         if line.startswith(b'<<<<<<< ') or line.startswith(b'>>>>>>> '):
    346             return True
    347         if line.startswith(b'||||||| '): # from merge.conflictStyle=diff3
    348             return True
    349         if line.rstrip(b'\r\n') == b'=======' and \
    350            not _filepath.endswith('.md'):
    351             return True
    352         return False
    353 
    354 
    355 def this_location():
    356     frame = inspect.currentframe()
    357     assert frame is not None
    358     info = inspect.getframeinfo(frame)
    359     return os.path.basename(info.filename), info.lineno
    360 THIS_FILE_BASE_NAME, LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER = this_location()
    361 
    362 class LicenseIssueTracker(LineIssueTracker):
    363     """Check copyright statements and license indications.
    364 
    365     This class only checks that statements are correct if present. It does
    366     not enforce the presence of statements in each file.
    367     """
    368 
    369     heading = "License issue:"
    370 
    371     LICENSE_EXEMPTION_RE_LIST = []
    372 
    373     # Exempt third-party drivers which may be under a different license
    374     if build_tree.looks_like_tf_psa_crypto_root(os.getcwd()):
    375         LICENSE_EXEMPTION_RE_LIST.append(r'drivers/(?=(everest)/.*)')
    376     elif build_tree.is_mbedtls_3_6():
    377         LICENSE_EXEMPTION_RE_LIST.append(r'3rdparty/(?!(p256-m)/.*)')
    378 
    379     LICENSE_EXEMPTION_RE_LIST += [
    380         # Documentation explaining the license may have accidental
    381         # false positives.
    382         r'(ChangeLog|LICENSE|framework\/LICENSE|[-0-9A-Z_a-z]+\.md)\Z',
    383         # Files imported from TF-M, and not used except in test builds,
    384         # may be under a different license.
    385         r'configs/ext/crypto_config_profile_medium\.h\Z',
    386         r'configs/ext/tfm_mbedcrypto_config_profile_medium\.h\Z',
    387         r'configs/ext/README\.md\Z',
    388         # Third-party file.
    389         r'dco\.txt\Z',
    390         r'framework\/dco\.txt\Z',
    391     ]
    392     path_exemptions = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST +
    393                                           LICENSE_EXEMPTION_RE_LIST))
    394 
    395     COPYRIGHT_HOLDER = rb'The Mbed TLS Contributors'
    396     # Catch "Copyright foo", "Copyright (C) foo", "Copyright © foo", etc.
    397     COPYRIGHT_RE = re.compile(rb'.*\bcopyright\s+((?:\w|\s|[()]|[^ -~])*\w)', re.I)
    398 
    399     SPDX_HEADER_KEY = b'SPDX-License-Identifier'
    400     LICENSE_IDENTIFIER = b'Apache-2.0 OR GPL-2.0-or-later'
    401     SPDX_RE = re.compile(br'.*?(' +
    402                          re.escape(SPDX_HEADER_KEY) +
    403                          br')(:\s*(.*?)\W*\Z|.*)', re.I)
    404 
    405     LICENSE_MENTION_RE = re.compile(rb'.*(?:' + rb'|'.join([
    406         rb'Apache License',
    407         rb'General Public License',
    408     ]) + rb')', re.I)
    409 
    410     def __init__(self):
    411         super().__init__()
    412         # Record what problem was caused. We can't easily report it due to
    413         # the structure of the script. To be fixed after
    414         # https://github.com/Mbed-TLS/mbedtls/pull/2506
    415         self.problem = None
    416 
    417     def issue_with_line(self, line, filepath, line_number):
    418         #pylint: disable=too-many-return-statements
    419 
    420         # Use endswith() rather than the more correct os.path.basename()
    421         # because experimentally, it makes a significant difference to
    422         # the running time.
    423         if filepath.endswith(THIS_FILE_BASE_NAME) and \
    424            line_number > LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER:
    425             # Avoid false positives from the code in this class.
    426             # Also skip the rest of this file, which is highly unlikely to
    427             # contain any problematic statements since we put those near the
    428             # top of files.
    429             return False
    430 
    431         m = self.COPYRIGHT_RE.match(line)
    432         if m and m.group(1) != self.COPYRIGHT_HOLDER:
    433             self.problem = 'Invalid copyright line'
    434             return True
    435 
    436         m = self.SPDX_RE.match(line)
    437         if m:
    438             if m.group(1) != self.SPDX_HEADER_KEY:
    439                 self.problem = 'Misspelled ' + self.SPDX_HEADER_KEY.decode()
    440                 return True
    441             if not m.group(3):
    442                 self.problem = 'Improperly formatted SPDX license identifier'
    443                 return True
    444             if m.group(3) != self.LICENSE_IDENTIFIER:
    445                 self.problem = 'Wrong SPDX license identifier'
    446                 return True
    447 
    448         m = self.LICENSE_MENTION_RE.match(line)
    449         if m:
    450             self.problem = 'Suspicious license mention'
    451             return True
    452 
    453         return False
    454 
    455 
    456 class ErrorAddIssueTracker(LineIssueTracker):
    457     """Signal direct additions of error codes.
    458 
    459     Adding a low-level error code with a high-level error code is deprecated
    460     and should use MBEDTLS_ERROR_ADD.
    461     """
    462 
    463     heading = "Direct addition of error codes"
    464 
    465     _ERR_PLUS_RE = re.compile(br'MBEDTLS_ERR_\w+ *\+|'
    466                               br'\+ *MBEDTLS_ERR_')
    467     _EXCLUDE_RE = re.compile(br' *case ')
    468 
    469     def issue_with_line(self, line, filepath, line_number):
    470         if self._ERR_PLUS_RE.search(line) and not self._EXCLUDE_RE.match(line):
    471             return True
    472         return False
    473 
    474 
    475 class IntegrityChecker:
    476     """Sanity-check files under the current directory."""
    477 
    478     def __init__(self, log_file):
    479         """Instantiate the sanity checker.
    480         Check files under the current directory.
    481         Write a report of issues to log_file."""
    482         if not build_tree.looks_like_root(os.getcwd()):
    483             raise Exception("This script must be run from Mbed TLS or TF-PSA-Crypto root")
    484         self.logger = None
    485         self.setup_logger(log_file)
    486         self.issues_to_check = [
    487             ShebangIssueTracker(),
    488             EndOfFileNewlineIssueTracker(),
    489             Utf8BomIssueTracker(),
    490             UnicodeIssueTracker(),
    491             UnixLineEndingIssueTracker(),
    492             WindowsLineEndingIssueTracker(),
    493             TrailingWhitespaceIssueTracker(),
    494             TabIssueTracker(),
    495             MergeArtifactIssueTracker(),
    496             LicenseIssueTracker(),
    497         ]
    498 
    499         if not build_tree.is_mbedtls_3_6():
    500             self.issues_to_check.append(ErrorAddIssueTracker())
    501 
    502     def setup_logger(self, log_file, level=logging.INFO):
    503         """Log to log_file if provided, or to stderr if None."""
    504         self.logger = logging.getLogger()
    505         self.logger.setLevel(level)
    506         if log_file:
    507             handler = logging.FileHandler(log_file)
    508             self.logger.addHandler(handler)
    509         else:
    510             console = logging.StreamHandler()
    511             self.logger.addHandler(console)
    512 
    513     @staticmethod
    514     def collect_files():
    515         """Return the list of files to check.
    516 
    517         These are the regular files commited into Git.
    518         """
    519         bytes_output = subprocess.check_output(['git', '-C', 'framework',
    520                                                 'ls-files', '-z'])
    521         bytes_framework_filepaths = bytes_output.split(b'\0')[:-1]
    522         bytes_framework_filepaths = ["framework/".encode() + filepath
    523                                      for filepath in bytes_framework_filepaths]
    524 
    525         bytes_output = subprocess.check_output(['git', 'ls-files', '-z'])
    526         bytes_filepaths = bytes_output.split(b'\0')[:-1] + \
    527                           bytes_framework_filepaths
    528         ascii_filepaths = map(lambda fp: fp.decode('ascii'), bytes_filepaths)
    529 
    530         # Filter out directories. Normally Git doesn't list directories
    531         # (it only knows about the files inside them), but there is
    532         # at least one case where 'git ls-files' includes a directory:
    533         # submodules. Just skip submodules (and any other directories).
    534         ascii_filepaths = [fp for fp in ascii_filepaths
    535                            if os.path.isfile(fp)]
    536         # Prepend './' to files in the top-level directory so that
    537         # something like `'/Makefile' in fp` matches in the top-level
    538         # directory as well as in subdirectories.
    539         return [fp if os.path.dirname(fp) else os.path.join(os.curdir, fp)
    540                 for fp in ascii_filepaths]
    541 
    542     def check_files(self):
    543         """Check all files for all issues."""
    544         for issue_to_check in self.issues_to_check:
    545             for filepath in self.collect_files():
    546                 if issue_to_check.should_check_file(filepath):
    547                     issue_to_check.check_file_for_issue(filepath)
    548 
    549     def output_issues(self):
    550         """Log the issues found and their locations.
    551 
    552         Return 1 if there were issues, 0 otherwise.
    553         """
    554         integrity_return_code = 0
    555         for issue_to_check in self.issues_to_check:
    556             if issue_to_check.files_with_issues:
    557                 integrity_return_code = 1
    558             issue_to_check.output_file_issues(self.logger)
    559         return integrity_return_code
    560 
    561 
    562 def run_main():
    563     parser = argparse.ArgumentParser(description=__doc__)
    564     parser.add_argument(
    565         "-l", "--log_file", type=str, help="path to optional output log",
    566     )
    567     check_args = parser.parse_args()
    568     integrity_check = IntegrityChecker(check_args.log_file)
    569     integrity_check.check_files()
    570     return_code = integrity_check.output_issues()
    571     sys.exit(return_code)
    572 
    573 
    574 if __name__ == "__main__":
    575     run_main()