blob: 9afe3db08c7633d6d3a44b64c8593d6130bc8df4 [file] [log] [blame]
Gilles Peskine40b3f412019-10-13 21:44:25 +02001#!/usr/bin/env python3
2
Gilles Peskine42f384c2020-03-27 09:23:38 +01003"""Assemble Mbed TLS change log entries into the change log file.
Gilles Peskinea2607962020-01-28 19:58:17 +01004
5Add changelog entries to the first level-2 section.
6Create a new level-2 section for unreleased changes if needed.
7Remove the input files unless --keep-entries is specified.
Gilles Peskine28af9582020-03-26 22:39:18 +01008
9In each level-3 section, entries are sorted in chronological order
10(oldest first). From oldest to newest:
11* Merged entry files are sorted according to their merge date (date of
12 the merge commit that brought the commit that created the file into
13 the target branch).
14* Committed but unmerged entry files are sorted according to the date
15 of the commit that adds them.
16* Uncommitted entry files are sorted according to their modification time.
17
18You must run this program from within a git working directory.
Gilles Peskine40b3f412019-10-13 21:44:25 +020019"""
20
21# Copyright (C) 2019, Arm Limited, All Rights Reserved
22# SPDX-License-Identifier: Apache-2.0
23#
24# Licensed under the Apache License, Version 2.0 (the "License"); you may
25# not use this file except in compliance with the License.
26# You may obtain a copy of the License at
27#
28# http://www.apache.org/licenses/LICENSE-2.0
29#
30# Unless required by applicable law or agreed to in writing, software
31# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
32# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
33# See the License for the specific language governing permissions and
34# limitations under the License.
35#
Gilles Peskine42f384c2020-03-27 09:23:38 +010036# This file is part of Mbed TLS (https://tls.mbed.org)
Gilles Peskine40b3f412019-10-13 21:44:25 +020037
38import argparse
Gilles Peskine6e97c432020-03-27 19:05:18 +010039from collections import OrderedDict, namedtuple
Gilles Peskine8f46bbf2020-03-25 16:34:43 +010040import datetime
41import functools
Gilles Peskine40b3f412019-10-13 21:44:25 +020042import glob
43import os
44import re
Gilles Peskine8f46bbf2020-03-25 16:34:43 +010045import subprocess
Gilles Peskine40b3f412019-10-13 21:44:25 +020046import sys
47
48class InputFormatError(Exception):
49 def __init__(self, filename, line_number, message, *args, **kwargs):
Gilles Peskine566407d2020-01-22 15:55:36 +010050 message = '{}:{}: {}'.format(filename, line_number,
51 message.format(*args, **kwargs))
52 super().__init__(message)
Gilles Peskine40b3f412019-10-13 21:44:25 +020053
Gilles Peskine2b242492020-01-22 15:41:50 +010054class LostContent(Exception):
55 def __init__(self, filename, line):
56 message = ('Lost content from {}: "{}"'.format(filename, line))
57 super().__init__(message)
58
Gilles Peskine6e97c432020-03-27 19:05:18 +010059STANDARD_CATEGORIES = (
60 b'API changes',
Gilles Peskine40b3f412019-10-13 21:44:25 +020061 b'Default behavior changes',
62 b'Requirement changes',
63 b'New deprecations',
64 b'Removals',
Gilles Peskine6e97c432020-03-27 19:05:18 +010065 b'Features',
Gilles Peskine40b3f412019-10-13 21:44:25 +020066 b'Security',
Gilles Peskine6e97c432020-03-27 19:05:18 +010067 b'Bugfix',
68 b'Changes',
Gilles Peskine40b3f412019-10-13 21:44:25 +020069)
70
Gilles Peskine6e97c432020-03-27 19:05:18 +010071CategoryContent = namedtuple('CategoryContent', [
72 'name', 'title_line', # Title text and line number of the title
73 'body', 'body_line', # Body text and starting line number of the body
74])
75
76class ChangelogFormat:
77 """Virtual class documenting how to write a changelog format class."""
78
79 @classmethod
80 def extract_top_version(cls, changelog_file_content):
81 """Split out the top version section.
82
83 Return ``(header, top_version_title, top_version_body, trailer)``
84 where ``changelog_file_content == header + top_version_title +
85 top_version_body + trailer``.
Gilles Peskineeebf24f2020-03-27 19:25:38 +010086
87 If the top version is already released, create a new top
88 version section for an unreleased version.
Gilles Peskine6e97c432020-03-27 19:05:18 +010089 """
90 raise NotImplementedError
91
92 @classmethod
93 def version_title_text(cls, version_title):
94 """Return the text of a formatted version section title."""
95 raise NotImplementedError
96
97 @classmethod
98 def split_categories(cls, version_body):
99 """Split a changelog version section body into categories.
100
101 Return a list of `CategoryContent` the name is category title
102 without any formatting.
103 """
104 raise NotImplementedError
105
106 @classmethod
107 def format_category(cls, title, body):
108 """Construct the text of a category section from its title and body."""
109 raise NotImplementedError
110
111class TextChangelogFormat(ChangelogFormat):
112 """The traditional Mbed TLS changelog format."""
113
Gilles Peskineeebf24f2020-03-27 19:25:38 +0100114 _unreleased_version_text = b'= mbed TLS x.x.x branch released xxxx-xx-xx'
115 @classmethod
116 def is_released_version(cls, title):
117 # Look for an incomplete release date
118 return not re.search(br'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title)
119
Gilles Peskine6e97c432020-03-27 19:05:18 +0100120 _top_version_re = re.compile(br'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)',
121 re.DOTALL)
122 @classmethod
123 def extract_top_version(cls, changelog_file_content):
124 """A version section starts with a line starting with '='."""
125 m = re.search(cls._top_version_re, changelog_file_content)
126 top_version_start = m.start(1)
127 top_version_end = m.end(2)
Gilles Peskineeebf24f2020-03-27 19:25:38 +0100128 top_version_title = m.group(1)
129 top_version_body = m.group(2)
130 if cls.is_released_version(top_version_title):
131 top_version_end = top_version_start
132 top_version_title = cls._unreleased_version_text + b'\n\n'
133 top_version_body = b''
Gilles Peskine6e97c432020-03-27 19:05:18 +0100134 return (changelog_file_content[:top_version_start],
Gilles Peskineeebf24f2020-03-27 19:25:38 +0100135 top_version_title, top_version_body,
Gilles Peskine6e97c432020-03-27 19:05:18 +0100136 changelog_file_content[top_version_end:])
137
138 @classmethod
139 def version_title_text(cls, version_title):
140 return re.sub(br'\n.*', version_title, re.DOTALL)
141
142 _category_title_re = re.compile(br'(^\w.*)\n+', re.MULTILINE)
143 @classmethod
144 def split_categories(cls, version_body):
145 """A category title is a line with the title in column 0."""
146 title_matches = list(re.finditer(cls._category_title_re, version_body))
147 if not title_matches:
148 return []
149 title_starts = [m.start(1) for m in title_matches]
150 body_starts = [m.end(0) for m in title_matches]
151 body_ends = title_starts[1:] + [len(version_body)]
152 bodies = [version_body[body_start:body_end].rstrip(b'\n') + b'\n'
153 for (body_start, body_end) in zip(body_starts, body_ends)]
154 title_lines = [version_body[:pos].count(b'\n') for pos in title_starts]
155 body_lines = [version_body[:pos].count(b'\n') for pos in body_starts]
156 return [CategoryContent(title_match.group(1), title_line,
157 body, body_line)
158 for title_match, title_line, body, body_line
159 in zip(title_matches, title_lines, bodies, body_lines)]
160
161 @classmethod
162 def format_category(cls, title, body):
163 # `split_categories` ensures that each body ends with a newline.
164 # Make sure that there is additionally a blank line between categories.
165 if not body.endswith(b'\n\n'):
166 body += b'\n'
167 return title + b'\n' + body
168
Gilles Peskine40b3f412019-10-13 21:44:25 +0200169class ChangeLog:
Gilles Peskine42f384c2020-03-27 09:23:38 +0100170 """An Mbed TLS changelog.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200171
Gilles Peskine6e97c432020-03-27 19:05:18 +0100172 A changelog file consists of some header text followed by one or
173 more version sections. The version sections are in reverse
174 chronological order. Each version section consists of a title and a body.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200175
Gilles Peskine6e97c432020-03-27 19:05:18 +0100176 The body of a version section consists of zero or more category
177 subsections. Each category subsection consists of a title and a body.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200178
Gilles Peskine6e97c432020-03-27 19:05:18 +0100179 A changelog entry file has the same format as the body of a version section.
180
181 A `ChangelogFormat` object defines the concrete syntax of the changelog.
182 Entry files must have the same format as the changelog file.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200183 """
184
Gilles Peskinea2607962020-01-28 19:58:17 +0100185 # Only accept dotted version numbers (e.g. "3.1", not "3").
Gilles Peskineafc9db82020-01-30 11:38:01 +0100186 # Refuse ".x" in a version number where x is a letter: this indicates
187 # a version that is not yet released. Something like "3.1a" is accepted.
188 _version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
189 _incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
Gilles Peskinea2607962020-01-28 19:58:17 +0100190
Gilles Peskine6e97c432020-03-27 19:05:18 +0100191 def add_categories_from_text(self, filename, line_offset,
192 text, allow_unknown_category):
193 """Parse a version section or entry file."""
194 categories = self.format.split_categories(text)
195 for category in categories:
196 if not allow_unknown_category and \
197 category.name not in self.categories:
198 raise InputFormatError(filename,
199 line_offset + category.title_line,
200 'Unknown category: "{}"',
201 category.name.decode('utf8'))
202 self.categories[category.name] += category.body
203
204 def __init__(self, input_stream, changelog_format):
Gilles Peskine40b3f412019-10-13 21:44:25 +0200205 """Create a changelog object.
206
Gilles Peskine974232f2020-01-22 12:43:29 +0100207 Populate the changelog object from the content of the file
Gilles Peskine6e97c432020-03-27 19:05:18 +0100208 input_stream.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200209 """
Gilles Peskine6e97c432020-03-27 19:05:18 +0100210 self.format = changelog_format
211 whole_file = input_stream.read()
212 (self.header,
213 self.top_version_title, top_version_body,
214 self.trailer) = self.format.extract_top_version(whole_file)
215 # Split the top version section into categories.
216 self.categories = OrderedDict()
217 for category in STANDARD_CATEGORIES:
218 self.categories[category] = b''
219 offset = (self.header + self.top_version_title).count(b'\n')
220 self.add_categories_from_text(input_stream.name, offset,
221 top_version_body, True)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200222
223 def add_file(self, input_stream):
224 """Add changelog entries from a file.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200225 """
Gilles Peskine6e97c432020-03-27 19:05:18 +0100226 self.add_categories_from_text(input_stream.name, 0,
227 input_stream.read(), False)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200228
229 def write(self, filename):
230 """Write the changelog to the specified file.
231 """
232 with open(filename, 'wb') as out:
Gilles Peskine6e97c432020-03-27 19:05:18 +0100233 out.write(self.header)
234 out.write(self.top_version_title)
235 for title, body in self.categories.items():
236 if not body:
Gilles Peskine40b3f412019-10-13 21:44:25 +0200237 continue
Gilles Peskine6e97c432020-03-27 19:05:18 +0100238 out.write(self.format.format_category(title, body))
239 out.write(self.trailer)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200240
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100241
242@functools.total_ordering
Gilles Peskine28af9582020-03-26 22:39:18 +0100243class EntryFileSortKey:
244 """This classes defines an ordering on changelog entry files: older < newer.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100245
Gilles Peskine28af9582020-03-26 22:39:18 +0100246 * Merged entry files are sorted according to their merge date (date of
247 the merge commit that brought the commit that created the file into
248 the target branch).
249 * Committed but unmerged entry files are sorted according to the date
250 of the commit that adds them.
251 * Uncommitted entry files are sorted according to their modification time.
252
253 This class assumes that the file is in a git working directory with
254 the target branch checked out.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100255 """
256
257 # Categories of files. A lower number is considered older.
258 MERGED = 0
259 COMMITTED = 1
260 LOCAL = 2
261
262 @staticmethod
263 def creation_hash(filename):
264 """Return the git commit id at which the given file was created.
265
266 Return None if the file was never checked into git.
267 """
Gilles Peskine98a53aa2020-03-26 22:47:07 +0100268 hashes = subprocess.check_output(['git', 'log', '--format=%H',
269 '--follow',
270 '--', filename])
Gilles Peskine13dc6342020-03-26 22:46:47 +0100271 m = re.search(b'(.+)$', hashes)
272 if not m:
273 # The git output is empty. This means that the file was
274 # never checked in.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100275 return None
Gilles Peskine13dc6342020-03-26 22:46:47 +0100276 # The last commit in the log is the oldest one, which is when the
277 # file was created.
278 return m.group(0)
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100279
280 @staticmethod
281 def list_merges(some_hash, target, *options):
282 """List merge commits from some_hash to target.
283
284 Pass options to git to select which commits are included.
285 """
286 text = subprocess.check_output(['git', 'rev-list',
287 '--merges', *options,
288 b'..'.join([some_hash, target])])
289 return text.rstrip(b'\n').split(b'\n')
290
291 @classmethod
292 def merge_hash(cls, some_hash):
293 """Return the git commit id at which the given commit was merged.
294
295 Return None if the given commit was never merged.
296 """
297 target = b'HEAD'
298 # List the merges from some_hash to the target in two ways.
299 # The ancestry list is the ones that are both descendants of
300 # some_hash and ancestors of the target.
301 ancestry = frozenset(cls.list_merges(some_hash, target,
302 '--ancestry-path'))
303 # The first_parents list only contains merges that are directly
304 # on the target branch. We want it in reverse order (oldest first).
305 first_parents = cls.list_merges(some_hash, target,
306 '--first-parent', '--reverse')
307 # Look for the oldest merge commit that's both on the direct path
308 # and directly on the target branch. That's the place where some_hash
309 # was merged on the target branch. See
310 # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
311 for commit in first_parents:
312 if commit in ancestry:
313 return commit
314 return None
315
316 @staticmethod
317 def commit_timestamp(commit_id):
Gilles Peskineac0f0862020-03-27 10:56:45 +0100318 """Return the timestamp of the given commit."""
319 text = subprocess.check_output(['git', 'show', '-s',
320 '--format=%ct',
321 commit_id])
322 return datetime.datetime.utcfromtimestamp(int(text))
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100323
324 @staticmethod
325 def file_timestamp(filename):
326 """Return the modification timestamp of the given file."""
327 mtime = os.stat(filename).st_mtime
328 return datetime.datetime.fromtimestamp(mtime)
329
330 def __init__(self, filename):
Gilles Peskine28af9582020-03-26 22:39:18 +0100331 """Determine position of the file in the changelog entry order.
332
333 This constructor returns an object that can be used with comparison
334 operators, with `sort` and `sorted`, etc. Older entries are sorted
335 before newer entries.
336 """
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100337 self.filename = filename
338 creation_hash = self.creation_hash(filename)
339 if not creation_hash:
340 self.category = self.LOCAL
341 self.datetime = self.file_timestamp(filename)
342 return
343 merge_hash = self.merge_hash(creation_hash)
344 if not merge_hash:
345 self.category = self.COMMITTED
346 self.datetime = self.commit_timestamp(creation_hash)
347 return
348 self.category = self.MERGED
349 self.datetime = self.commit_timestamp(merge_hash)
350
351 def sort_key(self):
Gilles Peskine28af9582020-03-26 22:39:18 +0100352 """"Return a concrete sort key for this entry file sort key object.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100353
Gilles Peskine28af9582020-03-26 22:39:18 +0100354 ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``.
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100355 """
356 return (self.category, self.datetime, self.filename)
357
358 def __eq__(self, other):
359 return self.sort_key() == other.sort_key()
360
361 def __lt__(self, other):
362 return self.sort_key() < other.sort_key()
363
364
Gilles Peskine2b242492020-01-22 15:41:50 +0100365def check_output(generated_output_file, main_input_file, merged_files):
366 """Make sanity checks on the generated output.
367
368 The intent of these sanity checks is to have reasonable confidence
369 that no content has been lost.
370
371 The sanity check is that every line that is present in an input file
372 is also present in an output file. This is not perfect but good enough
373 for now.
374 """
375 generated_output = set(open(generated_output_file, 'rb'))
376 for line in open(main_input_file, 'rb'):
377 if line not in generated_output:
378 raise LostContent('original file', line)
379 for merged_file in merged_files:
380 for line in open(merged_file, 'rb'):
381 if line not in generated_output:
382 raise LostContent(merged_file, line)
383
384def finish_output(changelog, output_file, input_file, merged_files):
Gilles Peskine40b3f412019-10-13 21:44:25 +0200385 """Write the changelog to the output file.
386
Gilles Peskine2b242492020-01-22 15:41:50 +0100387 The input file and the list of merged files are used only for sanity
388 checks on the output.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200389 """
390 if os.path.exists(output_file) and not os.path.isfile(output_file):
391 # The output is a non-regular file (e.g. pipe). Write to it directly.
392 output_temp = output_file
393 else:
394 # The output is a regular file. Write to a temporary file,
395 # then move it into place atomically.
396 output_temp = output_file + '.tmp'
397 changelog.write(output_temp)
Gilles Peskine2b242492020-01-22 15:41:50 +0100398 check_output(output_temp, input_file, merged_files)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200399 if output_temp != output_file:
400 os.rename(output_temp, output_file)
401
Gilles Peskine5e39c9e2020-01-22 14:55:37 +0100402def remove_merged_entries(files_to_remove):
403 for filename in files_to_remove:
404 os.remove(filename)
405
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100406def list_files_to_merge(options):
407 """List the entry files to merge, oldest first.
408
Gilles Peskine28af9582020-03-26 22:39:18 +0100409 "Oldest" is defined by `EntryFileSortKey`.
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100410 """
Gilles Peskine6e97c432020-03-27 19:05:18 +0100411 files_to_merge = glob.glob(os.path.join(options.dir, '*.txt'))
Gilles Peskine7fa3eb72020-03-26 22:41:32 +0100412 files_to_merge.sort(key=EntryFileSortKey)
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100413 return files_to_merge
414
Gilles Peskine40b3f412019-10-13 21:44:25 +0200415def merge_entries(options):
416 """Merge changelog entries into the changelog file.
417
418 Read the changelog file from options.input.
419 Read entries to merge from the directory options.dir.
420 Write the new changelog to options.output.
421 Remove the merged entries if options.keep_entries is false.
422 """
423 with open(options.input, 'rb') as input_file:
Gilles Peskine6e97c432020-03-27 19:05:18 +0100424 changelog = ChangeLog(input_file, TextChangelogFormat)
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100425 files_to_merge = list_files_to_merge(options)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200426 if not files_to_merge:
427 sys.stderr.write('There are no pending changelog entries.\n')
428 return
429 for filename in files_to_merge:
430 with open(filename, 'rb') as input_file:
431 changelog.add_file(input_file)
Gilles Peskine2b242492020-01-22 15:41:50 +0100432 finish_output(changelog, options.output, options.input, files_to_merge)
Gilles Peskine5e39c9e2020-01-22 14:55:37 +0100433 if not options.keep_entries:
434 remove_merged_entries(files_to_merge)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200435
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100436def show_file_timestamps(options):
437 """List the files to merge and their timestamp.
438
439 This is only intended for debugging purposes.
440 """
441 files = list_files_to_merge(options)
442 for filename in files:
Gilles Peskine28af9582020-03-26 22:39:18 +0100443 ts = EntryFileSortKey(filename)
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100444 print(ts.category, ts.datetime, filename)
445
Gilles Peskine40b3f412019-10-13 21:44:25 +0200446def set_defaults(options):
447 """Add default values for missing options."""
448 output_file = getattr(options, 'output', None)
449 if output_file is None:
450 options.output = options.input
451 if getattr(options, 'keep_entries', None) is None:
452 options.keep_entries = (output_file is not None)
453
454def main():
455 """Command line entry point."""
456 parser = argparse.ArgumentParser(description=__doc__)
457 parser.add_argument('--dir', '-d', metavar='DIR',
458 default='ChangeLog.d',
Gilles Peskine6e910092020-01-22 15:58:18 +0100459 help='Directory to read entries from'
460 ' (default: ChangeLog.d)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200461 parser.add_argument('--input', '-i', metavar='FILE',
Gilles Peskine6e97c432020-03-27 19:05:18 +0100462 default='ChangeLog',
Gilles Peskine6e910092020-01-22 15:58:18 +0100463 help='Existing changelog file to read from and augment'
Gilles Peskine6e97c432020-03-27 19:05:18 +0100464 ' (default: ChangeLog)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200465 parser.add_argument('--keep-entries',
466 action='store_true', dest='keep_entries', default=None,
Gilles Peskine6e910092020-01-22 15:58:18 +0100467 help='Keep the files containing entries'
468 ' (default: remove them if --output/-o is not specified)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200469 parser.add_argument('--no-keep-entries',
470 action='store_false', dest='keep_entries',
Gilles Peskine6e910092020-01-22 15:58:18 +0100471 help='Remove the files containing entries after they are merged'
472 ' (default: remove them if --output/-o is not specified)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200473 parser.add_argument('--output', '-o', metavar='FILE',
Gilles Peskine6e910092020-01-22 15:58:18 +0100474 help='Output changelog file'
475 ' (default: overwrite the input)')
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100476 parser.add_argument('--list-files-only',
477 action='store_true',
Gilles Peskinec68c7c82020-03-27 19:01:35 +0100478 help=('Only list the files that would be processed '
Gilles Peskineac0f0862020-03-27 10:56:45 +0100479 '(with some debugging information)'))
Gilles Peskine40b3f412019-10-13 21:44:25 +0200480 options = parser.parse_args()
481 set_defaults(options)
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100482 if options.list_files_only:
483 show_file_timestamps(options)
484 return
Gilles Peskine40b3f412019-10-13 21:44:25 +0200485 merge_entries(options)
486
487if __name__ == '__main__':
488 main()