quickjs-tart

quickjs-based runtime for wallet-core logic
Log | Files | Refs | README | LICENSE

assemble_changelog.py (21686B)


      1 #!/usr/bin/env python3
      2 
      3 """Assemble Mbed TLS change log entries into the change log file.
      4 
      5 Add changelog entries to the first level-2 section.
      6 Create a new level-2 section for unreleased changes if needed.
      7 Remove the input files unless --keep-entries is specified.
      8 
      9 In each level-3 section, entries are sorted in chronological order
     10 (oldest first). From oldest to newest:
     11 * Merged entry files are sorted according to their merge date (date of
     12   the merge commit that brought the commit that created the file into
     13   the target branch).
     14 * Committed but unmerged entry files are sorted according to the date
     15   of the commit that adds them.
     16 * Uncommitted entry files are sorted according to their modification time.
     17 
     18 You must run this program from within a git working directory.
     19 """
     20 
     21 # Copyright The Mbed TLS Contributors
     22 # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
     23 
     24 import argparse
     25 from collections import OrderedDict, namedtuple
     26 import datetime
     27 import functools
     28 import glob
     29 import os
     30 import re
     31 import subprocess
     32 import sys
     33 
     34 class InputFormatError(Exception):
     35     def __init__(self, filename, line_number, message, *args, **kwargs):
     36         message = '{}:{}: {}'.format(filename, line_number,
     37                                      message.format(*args, **kwargs))
     38         super().__init__(message)
     39 
     40 class CategoryParseError(Exception):
     41     def __init__(self, line_offset, error_message):
     42         self.line_offset = line_offset
     43         self.error_message = error_message
     44         super().__init__('{}: {}'.format(line_offset, error_message))
     45 
     46 class LostContent(Exception):
     47     def __init__(self, filename, line):
     48         message = ('Lost content from {}: "{}"'.format(filename, line))
     49         super().__init__(message)
     50 
     51 class FilePathError(Exception):
     52     def __init__(self, filenames):
     53         message = ('Changelog filenames do not end with .txt: {}'.format(", ".join(filenames)))
     54         super().__init__(message)
     55 
     56 # The category names we use in the changelog.
     57 # If you edit this, update ChangeLog.d/README.md.
     58 STANDARD_CATEGORIES = (
     59     'API changes',
     60     'Default behavior changes',
     61     'Requirement changes',
     62     'New deprecations',
     63     'Removals',
     64     'Features',
     65     'Security',
     66     'Bugfix',
     67     'Changes',
     68 )
     69 
     70 # The maximum line length for an entry
     71 MAX_LINE_LENGTH = 80
     72 
     73 CategoryContent = namedtuple('CategoryContent', [
     74     'name', 'title_line', # Title text and line number of the title
     75     'body', 'body_line', # Body text and starting line number of the body
     76 ])
     77 
     78 class ChangelogFormat:
     79     """Virtual class documenting how to write a changelog format class."""
     80 
     81     @classmethod
     82     def extract_top_version(cls, changelog_file_content):
     83         """Split out the top version section.
     84 
     85         If the top version is already released, create a new top
     86         version section for an unreleased version.
     87 
     88         Return ``(header, top_version_title, top_version_body, trailer)``
     89         where the "top version" is the existing top version section if it's
     90         for unreleased changes, and a newly created section otherwise.
     91         To assemble the changelog after modifying top_version_body,
     92         concatenate the four pieces.
     93         """
     94         raise NotImplementedError
     95 
     96     @classmethod
     97     def version_title_text(cls, version_title):
     98         """Return the text of a formatted version section title."""
     99         raise NotImplementedError
    100 
    101     @classmethod
    102     def split_categories(cls, version_body):
    103         """Split a changelog version section body into categories.
    104 
    105         Return a list of `CategoryContent` the name is category title
    106         without any formatting.
    107         """
    108         raise NotImplementedError
    109 
    110     @classmethod
    111     def format_category(cls, title, body):
    112         """Construct the text of a category section from its title and body."""
    113         raise NotImplementedError
    114 
    115 class TextChangelogFormat(ChangelogFormat):
    116     """The traditional Mbed TLS changelog format."""
    117 
    118     _unreleased_version_text = '= {} x.x.x branch released xxxx-xx-xx'
    119     @classmethod
    120     def is_released_version(cls, title):
    121         # Look for an incomplete release date
    122         return not re.search(r'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title)
    123 
    124     _top_version_re = re.compile(r'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)',
    125                                  re.DOTALL)
    126     _name_re = re.compile(r'=\s(.*)\s[0-9x]+\.', re.DOTALL)
    127     @classmethod
    128     def extract_top_version(cls, changelog_file_content):
    129         """A version section starts with a line starting with '='."""
    130         m = re.search(cls._top_version_re, changelog_file_content)
    131         top_version_start = m.start(1)
    132         top_version_end = m.end(2)
    133         top_version_title = m.group(1)
    134         top_version_body = m.group(2)
    135         name = re.match(cls._name_re, top_version_title).group(1)
    136         if cls.is_released_version(top_version_title):
    137             top_version_end = top_version_start
    138             top_version_title = cls._unreleased_version_text.format(name) + '\n\n'
    139             top_version_body = ''
    140         return (changelog_file_content[:top_version_start],
    141                 top_version_title, top_version_body,
    142                 changelog_file_content[top_version_end:])
    143 
    144     @classmethod
    145     def version_title_text(cls, version_title):
    146         return re.sub(r'\n.*', version_title, re.DOTALL)
    147 
    148     _category_title_re = re.compile(r'(^\w.*)\n+', re.MULTILINE)
    149     @classmethod
    150     def split_categories(cls, version_body):
    151         """A category title is a line with the title in column 0."""
    152         if not version_body:
    153             return []
    154         title_matches = list(re.finditer(cls._category_title_re, version_body))
    155         if not title_matches or title_matches[0].start() != 0:
    156             # There is junk before the first category.
    157             raise CategoryParseError(0, 'Junk found where category expected')
    158         title_starts = [m.start(1) for m in title_matches]
    159         body_starts = [m.end(0) for m in title_matches]
    160         body_ends = title_starts[1:] + [len(version_body)]
    161         bodies = [version_body[body_start:body_end].rstrip('\n') + '\n'
    162                   for (body_start, body_end) in zip(body_starts, body_ends)]
    163         title_lines = [version_body[:pos].count('\n') for pos in title_starts]
    164         body_lines = [version_body[:pos].count('\n') for pos in body_starts]
    165         return [CategoryContent(title_match.group(1), title_line,
    166                                 body, body_line)
    167                 for title_match, title_line, body, body_line
    168                 in zip(title_matches, title_lines, bodies, body_lines)]
    169 
    170     @classmethod
    171     def format_category(cls, title, body):
    172         # `split_categories` ensures that each body ends with a newline.
    173         # Make sure that there is additionally a blank line between categories.
    174         if not body.endswith('\n\n'):
    175             body += '\n'
    176         return title + '\n' + body
    177 
    178 class ChangeLog:
    179     """An Mbed TLS changelog.
    180 
    181     A changelog file consists of some header text followed by one or
    182     more version sections. The version sections are in reverse
    183     chronological order. Each version section consists of a title and a body.
    184 
    185     The body of a version section consists of zero or more category
    186     subsections. Each category subsection consists of a title and a body.
    187 
    188     A changelog entry file has the same format as the body of a version section.
    189 
    190     A `ChangelogFormat` object defines the concrete syntax of the changelog.
    191     Entry files must have the same format as the changelog file.
    192     """
    193 
    194     # Only accept dotted version numbers (e.g. "3.1", not "3").
    195     # Refuse ".x" in a version number where x is a letter: this indicates
    196     # a version that is not yet released. Something like "3.1a" is accepted.
    197     _version_number_re = re.compile(r'[0-9]+\.[0-9A-Za-z.]+')
    198     _incomplete_version_number_re = re.compile(r'.*\.[A-Za-z]')
    199     _only_url_re = re.compile(r'^\s*\w+://\S+\s*$')
    200     _has_url_re = re.compile(r'.*://.*')
    201 
    202     def add_categories_from_text(self, filename, line_offset,
    203                                  text, allow_unknown_category):
    204         """Parse a version section or entry file."""
    205         try:
    206             categories = self.format.split_categories(text)
    207         except CategoryParseError as e:
    208             raise InputFormatError(filename, line_offset + e.line_offset,
    209                                    e.error_message)
    210         for category in categories:
    211             if not allow_unknown_category and \
    212                category.name not in self.categories:
    213                 raise InputFormatError(filename,
    214                                        line_offset + category.title_line,
    215                                        'Unknown category: "{}"',
    216                                        category.name)
    217 
    218             body_split = category.body.splitlines()
    219 
    220             for line_number, line in enumerate(body_split, 1):
    221                 if not self._only_url_re.match(line) and \
    222                    len(line) > MAX_LINE_LENGTH:
    223                     long_url_msg = '. URL exceeding length limit must be alone in its line.' \
    224                         if self._has_url_re.match(line) else ""
    225                     raise InputFormatError(filename,
    226                                            category.body_line + line_number,
    227                                            'Line is longer than allowed: '
    228                                            'Length {} (Max {}){}',
    229                                            len(line), MAX_LINE_LENGTH,
    230                                            long_url_msg)
    231 
    232             self.categories[category.name] += category.body
    233 
    234     def __init__(self, input_stream, changelog_format):
    235         """Create a changelog object.
    236 
    237         Populate the changelog object from the content of the file
    238         input_stream.
    239         """
    240         self.format = changelog_format
    241         whole_file = input_stream.read()
    242         (self.header,
    243          self.top_version_title, top_version_body,
    244          self.trailer) = self.format.extract_top_version(whole_file)
    245         # Split the top version section into categories.
    246         self.categories = OrderedDict()
    247         for category in STANDARD_CATEGORIES:
    248             self.categories[category] = ''
    249         offset = (self.header + self.top_version_title).count('\n') + 1
    250 
    251         self.add_categories_from_text(input_stream.name, offset,
    252                                       top_version_body, True)
    253 
    254     def add_file(self, input_stream):
    255         """Add changelog entries from a file.
    256         """
    257         self.add_categories_from_text(input_stream.name, 1,
    258                                       input_stream.read(), False)
    259 
    260     def write(self, filename):
    261         """Write the changelog to the specified file.
    262         """
    263         with open(filename, 'w', encoding='utf-8') as out:
    264             out.write(self.header)
    265             out.write(self.top_version_title)
    266             for title, body in self.categories.items():
    267                 if not body:
    268                     continue
    269                 out.write(self.format.format_category(title, body))
    270             out.write(self.trailer)
    271 
    272 
    273 @functools.total_ordering
    274 class EntryFileSortKey:
    275     """This classes defines an ordering on changelog entry files: older < newer.
    276 
    277     * Merged entry files are sorted according to their merge date (date of
    278       the merge commit that brought the commit that created the file into
    279       the target branch).
    280     * Committed but unmerged entry files are sorted according to the date
    281       of the commit that adds them.
    282     * Uncommitted entry files are sorted according to their modification time.
    283 
    284     This class assumes that the file is in a git working directory with
    285     the target branch checked out.
    286     """
    287 
    288     # Categories of files. A lower number is considered older.
    289     MERGED = 0
    290     COMMITTED = 1
    291     LOCAL = 2
    292 
    293     @staticmethod
    294     def creation_hash(filename):
    295         """Return the git commit id at which the given file was created.
    296 
    297         Return None if the file was never checked into git.
    298         """
    299         hashes = subprocess.check_output(['git', 'log', '--format=%H',
    300                                           '--follow',
    301                                           '--', filename])
    302         m = re.search('(.+)$', hashes.decode('ascii'))
    303         if not m:
    304             # The git output is empty. This means that the file was
    305             # never checked in.
    306             return None
    307         # The last commit in the log is the oldest one, which is when the
    308         # file was created.
    309         return m.group(0)
    310 
    311     @staticmethod
    312     def list_merges(some_hash, target, *options):
    313         """List merge commits from some_hash to target.
    314 
    315         Pass options to git to select which commits are included.
    316         """
    317         text = subprocess.check_output(['git', 'rev-list',
    318                                         '--merges', *options,
    319                                         '..'.join([some_hash, target])])
    320         return text.decode('ascii').rstrip('\n').split('\n')
    321 
    322     @classmethod
    323     def merge_hash(cls, some_hash):
    324         """Return the git commit id at which the given commit was merged.
    325 
    326         Return None if the given commit was never merged.
    327         """
    328         target = 'HEAD'
    329         # List the merges from some_hash to the target in two ways.
    330         # The ancestry list is the ones that are both descendants of
    331         # some_hash and ancestors of the target.
    332         ancestry = frozenset(cls.list_merges(some_hash, target,
    333                                              '--ancestry-path'))
    334         # The first_parents list only contains merges that are directly
    335         # on the target branch. We want it in reverse order (oldest first).
    336         first_parents = cls.list_merges(some_hash, target,
    337                                         '--first-parent', '--reverse')
    338         # Look for the oldest merge commit that's both on the direct path
    339         # and directly on the target branch. That's the place where some_hash
    340         # was merged on the target branch. See
    341         # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
    342         for commit in first_parents:
    343             if commit in ancestry:
    344                 return commit
    345         return None
    346 
    347     @staticmethod
    348     def commit_timestamp(commit_id):
    349         """Return the timestamp of the given commit."""
    350         text = subprocess.check_output(['git', 'show', '-s',
    351                                         '--format=%ct',
    352                                         commit_id])
    353         return datetime.datetime.utcfromtimestamp(int(text))
    354 
    355     @staticmethod
    356     def file_timestamp(filename):
    357         """Return the modification timestamp of the given file."""
    358         mtime = os.stat(filename).st_mtime
    359         return datetime.datetime.fromtimestamp(mtime)
    360 
    361     def __init__(self, filename):
    362         """Determine position of the file in the changelog entry order.
    363 
    364         This constructor returns an object that can be used with comparison
    365         operators, with `sort` and `sorted`, etc. Older entries are sorted
    366         before newer entries.
    367         """
    368         self.filename = filename
    369         creation_hash = self.creation_hash(filename)
    370         if not creation_hash:
    371             self.category = self.LOCAL
    372             self.datetime = self.file_timestamp(filename)
    373             return
    374         merge_hash = self.merge_hash(creation_hash)
    375         if not merge_hash:
    376             self.category = self.COMMITTED
    377             self.datetime = self.commit_timestamp(creation_hash)
    378             return
    379         self.category = self.MERGED
    380         self.datetime = self.commit_timestamp(merge_hash)
    381 
    382     def sort_key(self):
    383         """"Return a concrete sort key for this entry file sort key object.
    384 
    385         ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``.
    386         """
    387         return (self.category, self.datetime, self.filename)
    388 
    389     def __eq__(self, other):
    390         return self.sort_key() == other.sort_key()
    391 
    392     def __lt__(self, other):
    393         return self.sort_key() < other.sort_key()
    394 
    395 
    396 def check_output(generated_output_file, main_input_file, merged_files):
    397     """Make sanity checks on the generated output.
    398 
    399     The intent of these sanity checks is to have reasonable confidence
    400     that no content has been lost.
    401 
    402     The sanity check is that every line that is present in an input file
    403     is also present in an output file. This is not perfect but good enough
    404     for now.
    405     """
    406     with open(generated_output_file, 'r', encoding='utf-8') as fd:
    407         generated_output = set(fd)
    408         for line in open(main_input_file, 'r', encoding='utf-8'):
    409             if line not in generated_output:
    410                 raise LostContent('original file', line)
    411         for merged_file in merged_files:
    412             for line in open(merged_file, 'r', encoding='utf-8'):
    413                 if line not in generated_output:
    414                     raise LostContent(merged_file, line)
    415 
    416 def finish_output(changelog, output_file, input_file, merged_files):
    417     """Write the changelog to the output file.
    418 
    419     The input file and the list of merged files are used only for sanity
    420     checks on the output.
    421     """
    422     if os.path.exists(output_file) and not os.path.isfile(output_file):
    423         # The output is a non-regular file (e.g. pipe). Write to it directly.
    424         output_temp = output_file
    425     else:
    426         # The output is a regular file. Write to a temporary file,
    427         # then move it into place atomically.
    428         output_temp = output_file + '.tmp'
    429     changelog.write(output_temp)
    430     check_output(output_temp, input_file, merged_files)
    431     if output_temp != output_file:
    432         os.rename(output_temp, output_file)
    433 
    434 def remove_merged_entries(files_to_remove):
    435     for filename in files_to_remove:
    436         os.remove(filename)
    437 
    438 def list_files_to_merge(options):
    439     """List the entry files to merge, oldest first.
    440 
    441     "Oldest" is defined by `EntryFileSortKey`.
    442 
    443     Also check for required .txt extension
    444     """
    445     files_to_merge = glob.glob(os.path.join(options.dir, '*'))
    446 
    447     # Ignore 00README.md
    448     readme = os.path.join(options.dir, "00README.md")
    449     if readme in files_to_merge:
    450         files_to_merge.remove(readme)
    451 
    452     # Identify files without the required .txt extension
    453     bad_files = [x for x in files_to_merge if not x.endswith(".txt")]
    454     if bad_files:
    455         raise FilePathError(bad_files)
    456 
    457     files_to_merge.sort(key=EntryFileSortKey)
    458     return files_to_merge
    459 
    460 def merge_entries(options):
    461     """Merge changelog entries into the changelog file.
    462 
    463     Read the changelog file from options.input.
    464     Check that all entries have a .txt extension
    465     Read entries to merge from the directory options.dir.
    466     Write the new changelog to options.output.
    467     Remove the merged entries if options.keep_entries is false.
    468     """
    469     with open(options.input, 'r', encoding='utf-8') as input_file:
    470         changelog = ChangeLog(input_file, TextChangelogFormat)
    471     files_to_merge = list_files_to_merge(options)
    472     if not files_to_merge:
    473         sys.stderr.write('There are no pending changelog entries.\n')
    474         return
    475     for filename in files_to_merge:
    476         with open(filename, 'r', encoding='utf-8') as input_file:
    477             changelog.add_file(input_file)
    478     finish_output(changelog, options.output, options.input, files_to_merge)
    479     if not options.keep_entries:
    480         remove_merged_entries(files_to_merge)
    481 
    482 def show_file_timestamps(options):
    483     """List the files to merge and their timestamp.
    484 
    485     This is only intended for debugging purposes.
    486     """
    487     files = list_files_to_merge(options)
    488     for filename in files:
    489         ts = EntryFileSortKey(filename)
    490         print(ts.category, ts.datetime, filename)
    491 
    492 def set_defaults(options):
    493     """Add default values for missing options."""
    494     output_file = getattr(options, 'output', None)
    495     if output_file is None:
    496         options.output = options.input
    497     if getattr(options, 'keep_entries', None) is None:
    498         options.keep_entries = (output_file is not None)
    499 
    500 def main():
    501     """Command line entry point."""
    502     parser = argparse.ArgumentParser(description=__doc__)
    503     parser.add_argument('--dir', '-d', metavar='DIR',
    504                         default='ChangeLog.d',
    505                         help='Directory to read entries from'
    506                              ' (default: ChangeLog.d)')
    507     parser.add_argument('--input', '-i', metavar='FILE',
    508                         default='ChangeLog',
    509                         help='Existing changelog file to read from and augment'
    510                              ' (default: ChangeLog)')
    511     parser.add_argument('--keep-entries',
    512                         action='store_true', dest='keep_entries', default=None,
    513                         help='Keep the files containing entries'
    514                              ' (default: remove them if --output/-o is not specified)')
    515     parser.add_argument('--no-keep-entries',
    516                         action='store_false', dest='keep_entries',
    517                         help='Remove the files containing entries after they are merged'
    518                              ' (default: remove them if --output/-o is not specified)')
    519     parser.add_argument('--output', '-o', metavar='FILE',
    520                         help='Output changelog file'
    521                              ' (default: overwrite the input)')
    522     parser.add_argument('--list-files-only',
    523                         action='store_true',
    524                         help=('Only list the files that would be processed '
    525                               '(with some debugging information)'))
    526     options = parser.parse_args()
    527     set_defaults(options)
    528     if options.list_files_only:
    529         show_file_timestamps(options)
    530         return
    531     merge_entries(options)
    532 
    533 if __name__ == '__main__':
    534     main()