blob: f85392c530fc1c51d07ab65e4012696c4d9a6f6b [file] [log] [blame]
Gilles Peskinecff94e32020-04-21 18:33:12 +02001#!/usr/bin/env python3
2
3"""Assemble Mbed TLS change log entries into the change log file.
4
5Add changelog entries to the first level-2 section.
6Create a new level-2 section for unreleased changes if needed.
7Remove the input files unless --keep-entries is specified.
8
9In each level-3 section, entries are sorted in chronological order
10(oldest first). From oldest to newest:
11* Merged entry files are sorted according to their merge date (date of
12 the merge commit that brought the commit that created the file into
13 the target branch).
14* Committed but unmerged entry files are sorted according to the date
15 of the commit that adds them.
16* Uncommitted entry files are sorted according to their modification time.
17
18You must run this program from within a git working directory.
19"""
20
Bence Szépkútia2947ac2020-08-19 16:37:36 +020021# Copyright The Mbed TLS Contributors
Bence Szépkútif744bd72020-06-05 13:02:18 +020022# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
23#
24# This file is provided under the Apache License 2.0, or the
25# GNU General Public License v2.0 or later.
26#
27# **********
28# Apache License 2.0:
Gilles Peskinecff94e32020-04-21 18:33:12 +020029#
30# Licensed under the Apache License, Version 2.0 (the "License"); you may
31# not use this file except in compliance with the License.
32# You may obtain a copy of the License at
33#
34# http://www.apache.org/licenses/LICENSE-2.0
35#
36# Unless required by applicable law or agreed to in writing, software
37# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
38# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
39# See the License for the specific language governing permissions and
40# limitations under the License.
41#
Bence Szépkútif744bd72020-06-05 13:02:18 +020042# **********
43#
44# **********
45# GNU General Public License v2.0 or later:
46#
47# This program is free software; you can redistribute it and/or modify
48# it under the terms of the GNU General Public License as published by
49# the Free Software Foundation; either version 2 of the License, or
50# (at your option) any later version.
51#
52# This program is distributed in the hope that it will be useful,
53# but WITHOUT ANY WARRANTY; without even the implied warranty of
54# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
55# GNU General Public License for more details.
56#
57# You should have received a copy of the GNU General Public License along
58# with this program; if not, write to the Free Software Foundation, Inc.,
59# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
60#
61# **********
Gilles Peskinecff94e32020-04-21 18:33:12 +020062
63import argparse
64from collections import OrderedDict, namedtuple
65import datetime
66import functools
67import glob
68import os
69import re
70import subprocess
71import sys
72
73class InputFormatError(Exception):
74 def __init__(self, filename, line_number, message, *args, **kwargs):
75 message = '{}:{}: {}'.format(filename, line_number,
76 message.format(*args, **kwargs))
77 super().__init__(message)
78
79class CategoryParseError(Exception):
80 def __init__(self, line_offset, error_message):
81 self.line_offset = line_offset
82 self.error_message = error_message
83 super().__init__('{}: {}'.format(line_offset, error_message))
84
85class LostContent(Exception):
86 def __init__(self, filename, line):
87 message = ('Lost content from {}: "{}"'.format(filename, line))
88 super().__init__(message)
89
90# The category names we use in the changelog.
91# If you edit this, update ChangeLog.d/README.md.
92STANDARD_CATEGORIES = (
93 b'API changes',
94 b'Default behavior changes',
95 b'Requirement changes',
96 b'New deprecations',
97 b'Removals',
98 b'Features',
99 b'Security',
100 b'Bugfix',
101 b'Changes',
102)
103
Paul Elliottc24a1e82021-03-05 12:22:51 +0000104# The maximum line length for an entry
105MAX_LINE_LENGTH = 80
106
Gilles Peskinecff94e32020-04-21 18:33:12 +0200107CategoryContent = namedtuple('CategoryContent', [
108 'name', 'title_line', # Title text and line number of the title
109 'body', 'body_line', # Body text and starting line number of the body
110])
111
112class ChangelogFormat:
113 """Virtual class documenting how to write a changelog format class."""
114
115 @classmethod
116 def extract_top_version(cls, changelog_file_content):
117 """Split out the top version section.
118
119 If the top version is already released, create a new top
120 version section for an unreleased version.
121
122 Return ``(header, top_version_title, top_version_body, trailer)``
123 where the "top version" is the existing top version section if it's
124 for unreleased changes, and a newly created section otherwise.
125 To assemble the changelog after modifying top_version_body,
126 concatenate the four pieces.
127 """
128 raise NotImplementedError
129
130 @classmethod
131 def version_title_text(cls, version_title):
132 """Return the text of a formatted version section title."""
133 raise NotImplementedError
134
135 @classmethod
136 def split_categories(cls, version_body):
137 """Split a changelog version section body into categories.
138
139 Return a list of `CategoryContent` the name is category title
140 without any formatting.
141 """
142 raise NotImplementedError
143
144 @classmethod
145 def format_category(cls, title, body):
146 """Construct the text of a category section from its title and body."""
147 raise NotImplementedError
148
149class TextChangelogFormat(ChangelogFormat):
150 """The traditional Mbed TLS changelog format."""
151
152 _unreleased_version_text = b'= mbed TLS x.x.x branch released xxxx-xx-xx'
153 @classmethod
154 def is_released_version(cls, title):
155 # Look for an incomplete release date
156 return not re.search(br'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title)
157
158 _top_version_re = re.compile(br'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)',
159 re.DOTALL)
160 @classmethod
161 def extract_top_version(cls, changelog_file_content):
162 """A version section starts with a line starting with '='."""
163 m = re.search(cls._top_version_re, changelog_file_content)
164 top_version_start = m.start(1)
165 top_version_end = m.end(2)
166 top_version_title = m.group(1)
167 top_version_body = m.group(2)
168 if cls.is_released_version(top_version_title):
169 top_version_end = top_version_start
170 top_version_title = cls._unreleased_version_text + b'\n\n'
171 top_version_body = b''
172 return (changelog_file_content[:top_version_start],
173 top_version_title, top_version_body,
174 changelog_file_content[top_version_end:])
175
176 @classmethod
177 def version_title_text(cls, version_title):
178 return re.sub(br'\n.*', version_title, re.DOTALL)
179
180 _category_title_re = re.compile(br'(^\w.*)\n+', re.MULTILINE)
181 @classmethod
182 def split_categories(cls, version_body):
183 """A category title is a line with the title in column 0."""
184 if not version_body:
185 return []
186 title_matches = list(re.finditer(cls._category_title_re, version_body))
187 if not title_matches or title_matches[0].start() != 0:
188 # There is junk before the first category.
189 raise CategoryParseError(0, 'Junk found where category expected')
190 title_starts = [m.start(1) for m in title_matches]
191 body_starts = [m.end(0) for m in title_matches]
192 body_ends = title_starts[1:] + [len(version_body)]
193 bodies = [version_body[body_start:body_end].rstrip(b'\n') + b'\n'
194 for (body_start, body_end) in zip(body_starts, body_ends)]
195 title_lines = [version_body[:pos].count(b'\n') for pos in title_starts]
196 body_lines = [version_body[:pos].count(b'\n') for pos in body_starts]
197 return [CategoryContent(title_match.group(1), title_line,
198 body, body_line)
199 for title_match, title_line, body, body_line
200 in zip(title_matches, title_lines, bodies, body_lines)]
201
202 @classmethod
203 def format_category(cls, title, body):
204 # `split_categories` ensures that each body ends with a newline.
205 # Make sure that there is additionally a blank line between categories.
206 if not body.endswith(b'\n\n'):
207 body += b'\n'
208 return title + b'\n' + body
209
210class ChangeLog:
211 """An Mbed TLS changelog.
212
213 A changelog file consists of some header text followed by one or
214 more version sections. The version sections are in reverse
215 chronological order. Each version section consists of a title and a body.
216
217 The body of a version section consists of zero or more category
218 subsections. Each category subsection consists of a title and a body.
219
220 A changelog entry file has the same format as the body of a version section.
221
222 A `ChangelogFormat` object defines the concrete syntax of the changelog.
223 Entry files must have the same format as the changelog file.
224 """
225
226 # Only accept dotted version numbers (e.g. "3.1", not "3").
227 # Refuse ".x" in a version number where x is a letter: this indicates
228 # a version that is not yet released. Something like "3.1a" is accepted.
229 _version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
230 _incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
231
232 def add_categories_from_text(self, filename, line_offset,
233 text, allow_unknown_category):
234 """Parse a version section or entry file."""
235 try:
236 categories = self.format.split_categories(text)
237 except CategoryParseError as e:
238 raise InputFormatError(filename, line_offset + e.line_offset,
239 e.error_message)
240 for category in categories:
241 if not allow_unknown_category and \
242 category.name not in self.categories:
243 raise InputFormatError(filename,
244 line_offset + category.title_line,
245 'Unknown category: "{}"',
246 category.name.decode('utf8'))
Paul Elliottc24a1e82021-03-05 12:22:51 +0000247
248 body_split = category.body.splitlines()
249 for line in body_split:
250 if len(line) > MAX_LINE_LENGTH:
251 raise InputFormatError(filename,
252 line_offset + category.title_line,
253 'Category body line too long: "{} ({})"',
254 category.name.decode('utf8'), len(line))
255
Gilles Peskinecff94e32020-04-21 18:33:12 +0200256 self.categories[category.name] += category.body
257
258 def __init__(self, input_stream, changelog_format):
259 """Create a changelog object.
260
261 Populate the changelog object from the content of the file
262 input_stream.
263 """
264 self.format = changelog_format
265 whole_file = input_stream.read()
266 (self.header,
267 self.top_version_title, top_version_body,
268 self.trailer) = self.format.extract_top_version(whole_file)
269 # Split the top version section into categories.
270 self.categories = OrderedDict()
271 for category in STANDARD_CATEGORIES:
272 self.categories[category] = b''
273 offset = (self.header + self.top_version_title).count(b'\n') + 1
274 self.add_categories_from_text(input_stream.name, offset,
275 top_version_body, True)
276
277 def add_file(self, input_stream):
278 """Add changelog entries from a file.
279 """
280 self.add_categories_from_text(input_stream.name, 1,
281 input_stream.read(), False)
282
283 def write(self, filename):
284 """Write the changelog to the specified file.
285 """
286 with open(filename, 'wb') as out:
287 out.write(self.header)
288 out.write(self.top_version_title)
289 for title, body in self.categories.items():
290 if not body:
291 continue
292 out.write(self.format.format_category(title, body))
293 out.write(self.trailer)
294
295
296@functools.total_ordering
297class EntryFileSortKey:
298 """This classes defines an ordering on changelog entry files: older < newer.
299
300 * Merged entry files are sorted according to their merge date (date of
301 the merge commit that brought the commit that created the file into
302 the target branch).
303 * Committed but unmerged entry files are sorted according to the date
304 of the commit that adds them.
305 * Uncommitted entry files are sorted according to their modification time.
306
307 This class assumes that the file is in a git working directory with
308 the target branch checked out.
309 """
310
311 # Categories of files. A lower number is considered older.
312 MERGED = 0
313 COMMITTED = 1
314 LOCAL = 2
315
316 @staticmethod
317 def creation_hash(filename):
318 """Return the git commit id at which the given file was created.
319
320 Return None if the file was never checked into git.
321 """
322 hashes = subprocess.check_output(['git', 'log', '--format=%H',
323 '--follow',
324 '--', filename])
325 m = re.search(b'(.+)$', hashes)
326 if not m:
327 # The git output is empty. This means that the file was
328 # never checked in.
329 return None
330 # The last commit in the log is the oldest one, which is when the
331 # file was created.
332 return m.group(0)
333
334 @staticmethod
335 def list_merges(some_hash, target, *options):
336 """List merge commits from some_hash to target.
337
338 Pass options to git to select which commits are included.
339 """
340 text = subprocess.check_output(['git', 'rev-list',
341 '--merges', *options,
342 b'..'.join([some_hash, target])])
343 return text.rstrip(b'\n').split(b'\n')
344
345 @classmethod
346 def merge_hash(cls, some_hash):
347 """Return the git commit id at which the given commit was merged.
348
349 Return None if the given commit was never merged.
350 """
351 target = b'HEAD'
352 # List the merges from some_hash to the target in two ways.
353 # The ancestry list is the ones that are both descendants of
354 # some_hash and ancestors of the target.
355 ancestry = frozenset(cls.list_merges(some_hash, target,
356 '--ancestry-path'))
357 # The first_parents list only contains merges that are directly
358 # on the target branch. We want it in reverse order (oldest first).
359 first_parents = cls.list_merges(some_hash, target,
360 '--first-parent', '--reverse')
361 # Look for the oldest merge commit that's both on the direct path
362 # and directly on the target branch. That's the place where some_hash
363 # was merged on the target branch. See
364 # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
365 for commit in first_parents:
366 if commit in ancestry:
367 return commit
368 return None
369
370 @staticmethod
371 def commit_timestamp(commit_id):
372 """Return the timestamp of the given commit."""
373 text = subprocess.check_output(['git', 'show', '-s',
374 '--format=%ct',
375 commit_id])
376 return datetime.datetime.utcfromtimestamp(int(text))
377
378 @staticmethod
379 def file_timestamp(filename):
380 """Return the modification timestamp of the given file."""
381 mtime = os.stat(filename).st_mtime
382 return datetime.datetime.fromtimestamp(mtime)
383
384 def __init__(self, filename):
385 """Determine position of the file in the changelog entry order.
386
387 This constructor returns an object that can be used with comparison
388 operators, with `sort` and `sorted`, etc. Older entries are sorted
389 before newer entries.
390 """
391 self.filename = filename
392 creation_hash = self.creation_hash(filename)
393 if not creation_hash:
394 self.category = self.LOCAL
395 self.datetime = self.file_timestamp(filename)
396 return
397 merge_hash = self.merge_hash(creation_hash)
398 if not merge_hash:
399 self.category = self.COMMITTED
400 self.datetime = self.commit_timestamp(creation_hash)
401 return
402 self.category = self.MERGED
403 self.datetime = self.commit_timestamp(merge_hash)
404
405 def sort_key(self):
406 """"Return a concrete sort key for this entry file sort key object.
407
408 ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``.
409 """
410 return (self.category, self.datetime, self.filename)
411
412 def __eq__(self, other):
413 return self.sort_key() == other.sort_key()
414
415 def __lt__(self, other):
416 return self.sort_key() < other.sort_key()
417
418
419def check_output(generated_output_file, main_input_file, merged_files):
420 """Make sanity checks on the generated output.
421
422 The intent of these sanity checks is to have reasonable confidence
423 that no content has been lost.
424
425 The sanity check is that every line that is present in an input file
426 is also present in an output file. This is not perfect but good enough
427 for now.
428 """
429 generated_output = set(open(generated_output_file, 'rb'))
430 for line in open(main_input_file, 'rb'):
431 if line not in generated_output:
432 raise LostContent('original file', line)
433 for merged_file in merged_files:
434 for line in open(merged_file, 'rb'):
435 if line not in generated_output:
436 raise LostContent(merged_file, line)
437
438def finish_output(changelog, output_file, input_file, merged_files):
439 """Write the changelog to the output file.
440
441 The input file and the list of merged files are used only for sanity
442 checks on the output.
443 """
444 if os.path.exists(output_file) and not os.path.isfile(output_file):
445 # The output is a non-regular file (e.g. pipe). Write to it directly.
446 output_temp = output_file
447 else:
448 # The output is a regular file. Write to a temporary file,
449 # then move it into place atomically.
450 output_temp = output_file + '.tmp'
451 changelog.write(output_temp)
452 check_output(output_temp, input_file, merged_files)
453 if output_temp != output_file:
454 os.rename(output_temp, output_file)
455
456def remove_merged_entries(files_to_remove):
457 for filename in files_to_remove:
458 os.remove(filename)
459
460def list_files_to_merge(options):
461 """List the entry files to merge, oldest first.
462
463 "Oldest" is defined by `EntryFileSortKey`.
464 """
465 files_to_merge = glob.glob(os.path.join(options.dir, '*.txt'))
466 files_to_merge.sort(key=EntryFileSortKey)
467 return files_to_merge
468
469def merge_entries(options):
470 """Merge changelog entries into the changelog file.
471
472 Read the changelog file from options.input.
473 Read entries to merge from the directory options.dir.
474 Write the new changelog to options.output.
475 Remove the merged entries if options.keep_entries is false.
476 """
477 with open(options.input, 'rb') as input_file:
478 changelog = ChangeLog(input_file, TextChangelogFormat)
479 files_to_merge = list_files_to_merge(options)
480 if not files_to_merge:
481 sys.stderr.write('There are no pending changelog entries.\n')
482 return
483 for filename in files_to_merge:
484 with open(filename, 'rb') as input_file:
485 changelog.add_file(input_file)
486 finish_output(changelog, options.output, options.input, files_to_merge)
487 if not options.keep_entries:
488 remove_merged_entries(files_to_merge)
489
490def show_file_timestamps(options):
491 """List the files to merge and their timestamp.
492
493 This is only intended for debugging purposes.
494 """
495 files = list_files_to_merge(options)
496 for filename in files:
497 ts = EntryFileSortKey(filename)
498 print(ts.category, ts.datetime, filename)
499
500def set_defaults(options):
501 """Add default values for missing options."""
502 output_file = getattr(options, 'output', None)
503 if output_file is None:
504 options.output = options.input
505 if getattr(options, 'keep_entries', None) is None:
506 options.keep_entries = (output_file is not None)
507
508def main():
509 """Command line entry point."""
510 parser = argparse.ArgumentParser(description=__doc__)
511 parser.add_argument('--dir', '-d', metavar='DIR',
512 default='ChangeLog.d',
513 help='Directory to read entries from'
514 ' (default: ChangeLog.d)')
515 parser.add_argument('--input', '-i', metavar='FILE',
516 default='ChangeLog',
517 help='Existing changelog file to read from and augment'
518 ' (default: ChangeLog)')
519 parser.add_argument('--keep-entries',
520 action='store_true', dest='keep_entries', default=None,
521 help='Keep the files containing entries'
522 ' (default: remove them if --output/-o is not specified)')
523 parser.add_argument('--no-keep-entries',
524 action='store_false', dest='keep_entries',
525 help='Remove the files containing entries after they are merged'
526 ' (default: remove them if --output/-o is not specified)')
527 parser.add_argument('--output', '-o', metavar='FILE',
528 help='Output changelog file'
529 ' (default: overwrite the input)')
530 parser.add_argument('--list-files-only',
531 action='store_true',
532 help=('Only list the files that would be processed '
533 '(with some debugging information)'))
534 options = parser.parse_args()
535 set_defaults(options)
536 if options.list_files_only:
537 show_file_timestamps(options)
538 return
539 merge_entries(options)
540
541if __name__ == '__main__':
542 main()