blob: 96d2217a92b43764fe34387a7099b32c27c5e653 [file] [log] [blame]
Gilles Peskinecff94e32020-04-21 18:33:12 +02001#!/usr/bin/env python3
2
3"""Assemble Mbed TLS change log entries into the change log file.
4
5Add changelog entries to the first level-2 section.
6Create a new level-2 section for unreleased changes if needed.
7Remove the input files unless --keep-entries is specified.
8
9In each level-3 section, entries are sorted in chronological order
10(oldest first). From oldest to newest:
11* Merged entry files are sorted according to their merge date (date of
12 the merge commit that brought the commit that created the file into
13 the target branch).
14* Committed but unmerged entry files are sorted according to the date
15 of the commit that adds them.
16* Uncommitted entry files are sorted according to their modification time.
17
18You must run this program from within a git working directory.
19"""
20
Bence Szépkútia2947ac2020-08-19 16:37:36 +020021# Copyright The Mbed TLS Contributors
Bence Szépkútif744bd72020-06-05 13:02:18 +020022# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
23#
24# This file is provided under the Apache License 2.0, or the
25# GNU General Public License v2.0 or later.
26#
27# **********
28# Apache License 2.0:
Gilles Peskinecff94e32020-04-21 18:33:12 +020029#
30# Licensed under the Apache License, Version 2.0 (the "License"); you may
31# not use this file except in compliance with the License.
32# You may obtain a copy of the License at
33#
34# http://www.apache.org/licenses/LICENSE-2.0
35#
36# Unless required by applicable law or agreed to in writing, software
37# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
38# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
39# See the License for the specific language governing permissions and
40# limitations under the License.
41#
Bence Szépkútif744bd72020-06-05 13:02:18 +020042# **********
43#
44# **********
45# GNU General Public License v2.0 or later:
46#
47# This program is free software; you can redistribute it and/or modify
48# it under the terms of the GNU General Public License as published by
49# the Free Software Foundation; either version 2 of the License, or
50# (at your option) any later version.
51#
52# This program is distributed in the hope that it will be useful,
53# but WITHOUT ANY WARRANTY; without even the implied warranty of
54# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
55# GNU General Public License for more details.
56#
57# You should have received a copy of the GNU General Public License along
58# with this program; if not, write to the Free Software Foundation, Inc.,
59# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
60#
61# **********
Gilles Peskinecff94e32020-04-21 18:33:12 +020062
63import argparse
64from collections import OrderedDict, namedtuple
65import datetime
66import functools
67import glob
68import os
69import re
70import subprocess
71import sys
72
73class InputFormatError(Exception):
74 def __init__(self, filename, line_number, message, *args, **kwargs):
75 message = '{}:{}: {}'.format(filename, line_number,
76 message.format(*args, **kwargs))
77 super().__init__(message)
78
79class CategoryParseError(Exception):
80 def __init__(self, line_offset, error_message):
81 self.line_offset = line_offset
82 self.error_message = error_message
83 super().__init__('{}: {}'.format(line_offset, error_message))
84
85class LostContent(Exception):
86 def __init__(self, filename, line):
87 message = ('Lost content from {}: "{}"'.format(filename, line))
88 super().__init__(message)
89
90# The category names we use in the changelog.
91# If you edit this, update ChangeLog.d/README.md.
92STANDARD_CATEGORIES = (
93 b'API changes',
94 b'Default behavior changes',
95 b'Requirement changes',
96 b'New deprecations',
97 b'Removals',
98 b'Features',
99 b'Security',
100 b'Bugfix',
101 b'Changes',
102)
103
Paul Elliottc24a1e82021-03-05 12:22:51 +0000104# The maximum line length for an entry
105MAX_LINE_LENGTH = 80
106
Gilles Peskinecff94e32020-04-21 18:33:12 +0200107CategoryContent = namedtuple('CategoryContent', [
108 'name', 'title_line', # Title text and line number of the title
109 'body', 'body_line', # Body text and starting line number of the body
110])
111
112class ChangelogFormat:
113 """Virtual class documenting how to write a changelog format class."""
114
115 @classmethod
116 def extract_top_version(cls, changelog_file_content):
117 """Split out the top version section.
118
119 If the top version is already released, create a new top
120 version section for an unreleased version.
121
122 Return ``(header, top_version_title, top_version_body, trailer)``
123 where the "top version" is the existing top version section if it's
124 for unreleased changes, and a newly created section otherwise.
125 To assemble the changelog after modifying top_version_body,
126 concatenate the four pieces.
127 """
128 raise NotImplementedError
129
130 @classmethod
131 def version_title_text(cls, version_title):
132 """Return the text of a formatted version section title."""
133 raise NotImplementedError
134
135 @classmethod
136 def split_categories(cls, version_body):
137 """Split a changelog version section body into categories.
138
139 Return a list of `CategoryContent` the name is category title
140 without any formatting.
141 """
142 raise NotImplementedError
143
144 @classmethod
145 def format_category(cls, title, body):
146 """Construct the text of a category section from its title and body."""
147 raise NotImplementedError
148
149class TextChangelogFormat(ChangelogFormat):
150 """The traditional Mbed TLS changelog format."""
151
152 _unreleased_version_text = b'= mbed TLS x.x.x branch released xxxx-xx-xx'
153 @classmethod
154 def is_released_version(cls, title):
155 # Look for an incomplete release date
156 return not re.search(br'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title)
157
158 _top_version_re = re.compile(br'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)',
159 re.DOTALL)
160 @classmethod
161 def extract_top_version(cls, changelog_file_content):
162 """A version section starts with a line starting with '='."""
163 m = re.search(cls._top_version_re, changelog_file_content)
164 top_version_start = m.start(1)
165 top_version_end = m.end(2)
166 top_version_title = m.group(1)
167 top_version_body = m.group(2)
168 if cls.is_released_version(top_version_title):
169 top_version_end = top_version_start
170 top_version_title = cls._unreleased_version_text + b'\n\n'
171 top_version_body = b''
172 return (changelog_file_content[:top_version_start],
173 top_version_title, top_version_body,
174 changelog_file_content[top_version_end:])
175
176 @classmethod
177 def version_title_text(cls, version_title):
178 return re.sub(br'\n.*', version_title, re.DOTALL)
179
180 _category_title_re = re.compile(br'(^\w.*)\n+', re.MULTILINE)
181 @classmethod
182 def split_categories(cls, version_body):
183 """A category title is a line with the title in column 0."""
184 if not version_body:
185 return []
186 title_matches = list(re.finditer(cls._category_title_re, version_body))
187 if not title_matches or title_matches[0].start() != 0:
188 # There is junk before the first category.
189 raise CategoryParseError(0, 'Junk found where category expected')
190 title_starts = [m.start(1) for m in title_matches]
191 body_starts = [m.end(0) for m in title_matches]
192 body_ends = title_starts[1:] + [len(version_body)]
193 bodies = [version_body[body_start:body_end].rstrip(b'\n') + b'\n'
194 for (body_start, body_end) in zip(body_starts, body_ends)]
195 title_lines = [version_body[:pos].count(b'\n') for pos in title_starts]
196 body_lines = [version_body[:pos].count(b'\n') for pos in body_starts]
197 return [CategoryContent(title_match.group(1), title_line,
198 body, body_line)
199 for title_match, title_line, body, body_line
200 in zip(title_matches, title_lines, bodies, body_lines)]
201
202 @classmethod
203 def format_category(cls, title, body):
204 # `split_categories` ensures that each body ends with a newline.
205 # Make sure that there is additionally a blank line between categories.
206 if not body.endswith(b'\n\n'):
207 body += b'\n'
208 return title + b'\n' + body
209
210class ChangeLog:
211 """An Mbed TLS changelog.
212
213 A changelog file consists of some header text followed by one or
214 more version sections. The version sections are in reverse
215 chronological order. Each version section consists of a title and a body.
216
217 The body of a version section consists of zero or more category
218 subsections. Each category subsection consists of a title and a body.
219
220 A changelog entry file has the same format as the body of a version section.
221
222 A `ChangelogFormat` object defines the concrete syntax of the changelog.
223 Entry files must have the same format as the changelog file.
224 """
225
226 # Only accept dotted version numbers (e.g. "3.1", not "3").
227 # Refuse ".x" in a version number where x is a letter: this indicates
228 # a version that is not yet released. Something like "3.1a" is accepted.
229 _version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
230 _incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
231
232 def add_categories_from_text(self, filename, line_offset,
233 text, allow_unknown_category):
234 """Parse a version section or entry file."""
235 try:
236 categories = self.format.split_categories(text)
237 except CategoryParseError as e:
238 raise InputFormatError(filename, line_offset + e.line_offset,
239 e.error_message)
240 for category in categories:
241 if not allow_unknown_category and \
242 category.name not in self.categories:
243 raise InputFormatError(filename,
244 line_offset + category.title_line,
245 'Unknown category: "{}"',
246 category.name.decode('utf8'))
Paul Elliottc24a1e82021-03-05 12:22:51 +0000247
248 body_split = category.body.splitlines()
Paul Elliott217565e2021-03-09 10:24:55 +0000249 line_number = 1
Paul Elliottc24a1e82021-03-05 12:22:51 +0000250 for line in body_split:
251 if len(line) > MAX_LINE_LENGTH:
252 raise InputFormatError(filename,
Paul Elliott217565e2021-03-09 10:24:55 +0000253 line_offset + category.title_line + line_number,
254 'Line is longer than allowed: Length {} (Max {})',
255 len(line), MAX_LINE_LENGTH)
256 line_number += 1
Paul Elliottc24a1e82021-03-05 12:22:51 +0000257
Gilles Peskinecff94e32020-04-21 18:33:12 +0200258 self.categories[category.name] += category.body
259
260 def __init__(self, input_stream, changelog_format):
261 """Create a changelog object.
262
263 Populate the changelog object from the content of the file
264 input_stream.
265 """
266 self.format = changelog_format
267 whole_file = input_stream.read()
268 (self.header,
269 self.top_version_title, top_version_body,
270 self.trailer) = self.format.extract_top_version(whole_file)
271 # Split the top version section into categories.
272 self.categories = OrderedDict()
273 for category in STANDARD_CATEGORIES:
274 self.categories[category] = b''
275 offset = (self.header + self.top_version_title).count(b'\n') + 1
276 self.add_categories_from_text(input_stream.name, offset,
277 top_version_body, True)
278
279 def add_file(self, input_stream):
280 """Add changelog entries from a file.
281 """
282 self.add_categories_from_text(input_stream.name, 1,
283 input_stream.read(), False)
284
285 def write(self, filename):
286 """Write the changelog to the specified file.
287 """
288 with open(filename, 'wb') as out:
289 out.write(self.header)
290 out.write(self.top_version_title)
291 for title, body in self.categories.items():
292 if not body:
293 continue
294 out.write(self.format.format_category(title, body))
295 out.write(self.trailer)
296
297
298@functools.total_ordering
299class EntryFileSortKey:
300 """This classes defines an ordering on changelog entry files: older < newer.
301
302 * Merged entry files are sorted according to their merge date (date of
303 the merge commit that brought the commit that created the file into
304 the target branch).
305 * Committed but unmerged entry files are sorted according to the date
306 of the commit that adds them.
307 * Uncommitted entry files are sorted according to their modification time.
308
309 This class assumes that the file is in a git working directory with
310 the target branch checked out.
311 """
312
313 # Categories of files. A lower number is considered older.
314 MERGED = 0
315 COMMITTED = 1
316 LOCAL = 2
317
318 @staticmethod
319 def creation_hash(filename):
320 """Return the git commit id at which the given file was created.
321
322 Return None if the file was never checked into git.
323 """
324 hashes = subprocess.check_output(['git', 'log', '--format=%H',
325 '--follow',
326 '--', filename])
327 m = re.search(b'(.+)$', hashes)
328 if not m:
329 # The git output is empty. This means that the file was
330 # never checked in.
331 return None
332 # The last commit in the log is the oldest one, which is when the
333 # file was created.
334 return m.group(0)
335
336 @staticmethod
337 def list_merges(some_hash, target, *options):
338 """List merge commits from some_hash to target.
339
340 Pass options to git to select which commits are included.
341 """
342 text = subprocess.check_output(['git', 'rev-list',
343 '--merges', *options,
344 b'..'.join([some_hash, target])])
345 return text.rstrip(b'\n').split(b'\n')
346
347 @classmethod
348 def merge_hash(cls, some_hash):
349 """Return the git commit id at which the given commit was merged.
350
351 Return None if the given commit was never merged.
352 """
353 target = b'HEAD'
354 # List the merges from some_hash to the target in two ways.
355 # The ancestry list is the ones that are both descendants of
356 # some_hash and ancestors of the target.
357 ancestry = frozenset(cls.list_merges(some_hash, target,
358 '--ancestry-path'))
359 # The first_parents list only contains merges that are directly
360 # on the target branch. We want it in reverse order (oldest first).
361 first_parents = cls.list_merges(some_hash, target,
362 '--first-parent', '--reverse')
363 # Look for the oldest merge commit that's both on the direct path
364 # and directly on the target branch. That's the place where some_hash
365 # was merged on the target branch. See
366 # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
367 for commit in first_parents:
368 if commit in ancestry:
369 return commit
370 return None
371
372 @staticmethod
373 def commit_timestamp(commit_id):
374 """Return the timestamp of the given commit."""
375 text = subprocess.check_output(['git', 'show', '-s',
376 '--format=%ct',
377 commit_id])
378 return datetime.datetime.utcfromtimestamp(int(text))
379
380 @staticmethod
381 def file_timestamp(filename):
382 """Return the modification timestamp of the given file."""
383 mtime = os.stat(filename).st_mtime
384 return datetime.datetime.fromtimestamp(mtime)
385
386 def __init__(self, filename):
387 """Determine position of the file in the changelog entry order.
388
389 This constructor returns an object that can be used with comparison
390 operators, with `sort` and `sorted`, etc. Older entries are sorted
391 before newer entries.
392 """
393 self.filename = filename
394 creation_hash = self.creation_hash(filename)
395 if not creation_hash:
396 self.category = self.LOCAL
397 self.datetime = self.file_timestamp(filename)
398 return
399 merge_hash = self.merge_hash(creation_hash)
400 if not merge_hash:
401 self.category = self.COMMITTED
402 self.datetime = self.commit_timestamp(creation_hash)
403 return
404 self.category = self.MERGED
405 self.datetime = self.commit_timestamp(merge_hash)
406
407 def sort_key(self):
408 """"Return a concrete sort key for this entry file sort key object.
409
410 ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``.
411 """
412 return (self.category, self.datetime, self.filename)
413
414 def __eq__(self, other):
415 return self.sort_key() == other.sort_key()
416
417 def __lt__(self, other):
418 return self.sort_key() < other.sort_key()
419
420
421def check_output(generated_output_file, main_input_file, merged_files):
422 """Make sanity checks on the generated output.
423
424 The intent of these sanity checks is to have reasonable confidence
425 that no content has been lost.
426
427 The sanity check is that every line that is present in an input file
428 is also present in an output file. This is not perfect but good enough
429 for now.
430 """
431 generated_output = set(open(generated_output_file, 'rb'))
432 for line in open(main_input_file, 'rb'):
433 if line not in generated_output:
434 raise LostContent('original file', line)
435 for merged_file in merged_files:
436 for line in open(merged_file, 'rb'):
437 if line not in generated_output:
438 raise LostContent(merged_file, line)
439
440def finish_output(changelog, output_file, input_file, merged_files):
441 """Write the changelog to the output file.
442
443 The input file and the list of merged files are used only for sanity
444 checks on the output.
445 """
446 if os.path.exists(output_file) and not os.path.isfile(output_file):
447 # The output is a non-regular file (e.g. pipe). Write to it directly.
448 output_temp = output_file
449 else:
450 # The output is a regular file. Write to a temporary file,
451 # then move it into place atomically.
452 output_temp = output_file + '.tmp'
453 changelog.write(output_temp)
454 check_output(output_temp, input_file, merged_files)
455 if output_temp != output_file:
456 os.rename(output_temp, output_file)
457
458def remove_merged_entries(files_to_remove):
459 for filename in files_to_remove:
460 os.remove(filename)
461
462def list_files_to_merge(options):
463 """List the entry files to merge, oldest first.
464
465 "Oldest" is defined by `EntryFileSortKey`.
466 """
467 files_to_merge = glob.glob(os.path.join(options.dir, '*.txt'))
468 files_to_merge.sort(key=EntryFileSortKey)
469 return files_to_merge
470
471def merge_entries(options):
472 """Merge changelog entries into the changelog file.
473
474 Read the changelog file from options.input.
475 Read entries to merge from the directory options.dir.
476 Write the new changelog to options.output.
477 Remove the merged entries if options.keep_entries is false.
478 """
479 with open(options.input, 'rb') as input_file:
480 changelog = ChangeLog(input_file, TextChangelogFormat)
481 files_to_merge = list_files_to_merge(options)
482 if not files_to_merge:
483 sys.stderr.write('There are no pending changelog entries.\n')
484 return
485 for filename in files_to_merge:
486 with open(filename, 'rb') as input_file:
487 changelog.add_file(input_file)
488 finish_output(changelog, options.output, options.input, files_to_merge)
489 if not options.keep_entries:
490 remove_merged_entries(files_to_merge)
491
492def show_file_timestamps(options):
493 """List the files to merge and their timestamp.
494
495 This is only intended for debugging purposes.
496 """
497 files = list_files_to_merge(options)
498 for filename in files:
499 ts = EntryFileSortKey(filename)
500 print(ts.category, ts.datetime, filename)
501
502def set_defaults(options):
503 """Add default values for missing options."""
504 output_file = getattr(options, 'output', None)
505 if output_file is None:
506 options.output = options.input
507 if getattr(options, 'keep_entries', None) is None:
508 options.keep_entries = (output_file is not None)
509
510def main():
511 """Command line entry point."""
512 parser = argparse.ArgumentParser(description=__doc__)
513 parser.add_argument('--dir', '-d', metavar='DIR',
514 default='ChangeLog.d',
515 help='Directory to read entries from'
516 ' (default: ChangeLog.d)')
517 parser.add_argument('--input', '-i', metavar='FILE',
518 default='ChangeLog',
519 help='Existing changelog file to read from and augment'
520 ' (default: ChangeLog)')
521 parser.add_argument('--keep-entries',
522 action='store_true', dest='keep_entries', default=None,
523 help='Keep the files containing entries'
524 ' (default: remove them if --output/-o is not specified)')
525 parser.add_argument('--no-keep-entries',
526 action='store_false', dest='keep_entries',
527 help='Remove the files containing entries after they are merged'
528 ' (default: remove them if --output/-o is not specified)')
529 parser.add_argument('--output', '-o', metavar='FILE',
530 help='Output changelog file'
531 ' (default: overwrite the input)')
532 parser.add_argument('--list-files-only',
533 action='store_true',
534 help=('Only list the files that would be processed '
535 '(with some debugging information)'))
536 options = parser.parse_args()
537 set_defaults(options)
538 if options.list_files_only:
539 show_file_timestamps(options)
540 return
541 merge_entries(options)
542
543if __name__ == '__main__':
544 main()