blob: 928f73bf7f77a65370f4b0800efb7aacc522fe03 [file] [log] [blame]
Gilles Peskine40b3f412019-10-13 21:44:25 +02001#!/usr/bin/env python3
2
3"""Assemble Mbed Crypto change log entries into the change log file.
Gilles Peskinea2607962020-01-28 19:58:17 +01004
5Add changelog entries to the first level-2 section.
6Create a new level-2 section for unreleased changes if needed.
7Remove the input files unless --keep-entries is specified.
Gilles Peskine40b3f412019-10-13 21:44:25 +02008"""
9
10# Copyright (C) 2019, Arm Limited, All Rights Reserved
11# SPDX-License-Identifier: Apache-2.0
12#
13# Licensed under the Apache License, Version 2.0 (the "License"); you may
14# not use this file except in compliance with the License.
15# You may obtain a copy of the License at
16#
17# http://www.apache.org/licenses/LICENSE-2.0
18#
19# Unless required by applicable law or agreed to in writing, software
20# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
21# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22# See the License for the specific language governing permissions and
23# limitations under the License.
24#
25# This file is part of Mbed Crypto (https://tls.mbed.org)
26
27import argparse
Gilles Peskined8b6c772020-01-28 18:57:47 +010028from collections import OrderedDict
Gilles Peskine8f46bbf2020-03-25 16:34:43 +010029import datetime
30import functools
Gilles Peskine40b3f412019-10-13 21:44:25 +020031import glob
32import os
33import re
Gilles Peskine8f46bbf2020-03-25 16:34:43 +010034import subprocess
Gilles Peskine40b3f412019-10-13 21:44:25 +020035import sys
36
37class InputFormatError(Exception):
38 def __init__(self, filename, line_number, message, *args, **kwargs):
Gilles Peskine566407d2020-01-22 15:55:36 +010039 message = '{}:{}: {}'.format(filename, line_number,
40 message.format(*args, **kwargs))
41 super().__init__(message)
Gilles Peskine40b3f412019-10-13 21:44:25 +020042
Gilles Peskine2b242492020-01-22 15:41:50 +010043class LostContent(Exception):
44 def __init__(self, filename, line):
45 message = ('Lost content from {}: "{}"'.format(filename, line))
46 super().__init__(message)
47
Gilles Peskine40b3f412019-10-13 21:44:25 +020048STANDARD_SECTIONS = (
49 b'Interface changes',
50 b'Default behavior changes',
51 b'Requirement changes',
52 b'New deprecations',
53 b'Removals',
54 b'New features',
55 b'Security',
56 b'Bug fixes',
57 b'Performance improvements',
58 b'Other changes',
59)
60
61class ChangeLog:
62 """An Mbed Crypto changelog.
63
64 A changelog is a file in Markdown format. Each level 2 section title
65 starts a version, and versions are sorted in reverse chronological
66 order. Lines with a level 2 section title must start with '##'.
67
68 Within a version, there are multiple sections, each devoted to a kind
69 of change: bug fix, feature request, etc. Section titles should match
70 entries in STANDARD_SECTIONS exactly.
71
72 Within each section, each separate change should be on a line starting
73 with a '*' bullet. There may be blank lines surrounding titles, but
74 there should not be any blank line inside a section.
75 """
76
77 _title_re = re.compile(br'#*')
78 def title_level(self, line):
79 """Determine whether the line is a title.
80
81 Return (level, content) where level is the Markdown section level
82 (1 for '#', 2 for '##', etc.) and content is the section title
83 without leading or trailing whitespace. For a non-title line,
84 the level is 0.
85 """
86 level = re.match(self._title_re, line).end()
87 return level, line[level:].strip()
88
Gilles Peskinea2607962020-01-28 19:58:17 +010089 # Only accept dotted version numbers (e.g. "3.1", not "3").
Gilles Peskineafc9db82020-01-30 11:38:01 +010090 # Refuse ".x" in a version number where x is a letter: this indicates
91 # a version that is not yet released. Something like "3.1a" is accepted.
92 _version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
93 _incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
Gilles Peskinea2607962020-01-28 19:58:17 +010094
95 def section_is_released_version(self, title):
96 """Whether this section is for a released version.
97
98 True if the given level-2 section title indicates that this section
99 contains released changes, otherwise False.
100 """
101 # Assume that a released version has a numerical version number
102 # that follows a particular pattern. These criteria may be revised
103 # as needed in future versions of this script.
104 version_number = re.search(self._version_number_re, title)
Gilles Peskineafc9db82020-01-30 11:38:01 +0100105 if version_number:
106 return not re.search(self._incomplete_version_number_re,
107 version_number.group(0))
108 else:
109 return False
Gilles Peskinea2607962020-01-28 19:58:17 +0100110
111 def unreleased_version_title(self):
112 """The title to use if creating a new section for an unreleased version."""
113 # pylint: disable=no-self-use; this method may be overridden
114 return b'Unreleased changes'
115
Gilles Peskine40b3f412019-10-13 21:44:25 +0200116 def __init__(self, input_stream):
117 """Create a changelog object.
118
Gilles Peskine974232f2020-01-22 12:43:29 +0100119 Populate the changelog object from the content of the file
120 input_stream. This is typically a file opened for reading, but
121 can be any generator returning the lines to read.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200122 """
Gilles Peskine37d670a2020-01-28 19:14:15 +0100123 # Content before the level-2 section where the new entries are to be
124 # added.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200125 self.header = []
Gilles Peskine37d670a2020-01-28 19:14:15 +0100126 # Content of the level-3 sections of where the new entries are to
127 # be added.
Gilles Peskined8b6c772020-01-28 18:57:47 +0100128 self.section_content = OrderedDict()
129 for section in STANDARD_SECTIONS:
130 self.section_content[section] = []
Gilles Peskine37d670a2020-01-28 19:14:15 +0100131 # Content of level-2 sections for already-released versions.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200132 self.trailer = []
Gilles Peskine8c4a84c2020-01-22 15:40:39 +0100133 self.read_main_file(input_stream)
134
135 def read_main_file(self, input_stream):
136 """Populate the changelog object from the content of the file.
137
138 This method is only intended to be called as part of the constructor
139 of the class and may not act sensibly on an object that is already
140 partially populated.
141 """
Gilles Peskinea2607962020-01-28 19:58:17 +0100142 # Parse the first level-2 section, containing changelog entries
143 # for unreleased changes.
144 # If we'll be expanding this section, everything before the first
Gilles Peskine37d670a2020-01-28 19:14:15 +0100145 # level-3 section title ("###...") following the first level-2
146 # section title ("##...") is passed through as the header
147 # and everything after the second level-2 section title is passed
148 # through as the trailer. Inside the first level-2 section,
149 # split out the level-3 sections.
Gilles Peskinea2607962020-01-28 19:58:17 +0100150 # If we'll be creating a new version, the header is everything
151 # before the point where we want to add the level-2 section
152 # for this version, and the trailer is what follows.
Gilles Peskine8c4a84c2020-01-22 15:40:39 +0100153 level_2_seen = 0
154 current_section = None
Gilles Peskine40b3f412019-10-13 21:44:25 +0200155 for line in input_stream:
156 level, content = self.title_level(line)
157 if level == 2:
158 level_2_seen += 1
Gilles Peskinea2607962020-01-28 19:58:17 +0100159 if level_2_seen == 1:
160 if self.section_is_released_version(content):
161 self.header.append(b'## ' +
162 self.unreleased_version_title() +
163 b'\n\n')
164 level_2_seen = 2
Gilles Peskine40b3f412019-10-13 21:44:25 +0200165 elif level == 3 and level_2_seen == 1:
166 current_section = content
Gilles Peskined8b6c772020-01-28 18:57:47 +0100167 self.section_content.setdefault(content, [])
Gilles Peskine37d670a2020-01-28 19:14:15 +0100168 if level_2_seen == 1 and current_section is not None:
169 if level != 3 and line.strip():
Gilles Peskine40b3f412019-10-13 21:44:25 +0200170 self.section_content[current_section].append(line)
171 elif level_2_seen <= 1:
172 self.header.append(line)
173 else:
174 self.trailer.append(line)
175
176 def add_file(self, input_stream):
177 """Add changelog entries from a file.
178
179 Read lines from input_stream, which is typically a file opened
180 for reading. These lines must contain a series of level 3
181 Markdown sections with recognized titles. The corresponding
182 content is injected into the respective sections in the changelog.
183 The section titles must be either one of the hard-coded values
Gilles Peskine974232f2020-01-22 12:43:29 +0100184 in STANDARD_SECTIONS in assemble_changelog.py or already present
185 in ChangeLog.md. Section titles must match byte-for-byte except that
186 leading or trailing whitespace is ignored.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200187 """
188 filename = input_stream.name
189 current_section = None
190 for line_number, line in enumerate(input_stream, 1):
191 if not line.strip():
192 continue
193 level, content = self.title_level(line)
194 if level == 3:
195 current_section = content
196 if current_section not in self.section_content:
197 raise InputFormatError(filename, line_number,
198 'Section {} is not recognized',
199 str(current_section)[1:])
200 elif level == 0:
201 if current_section is None:
202 raise InputFormatError(filename, line_number,
203 'Missing section title at the beginning of the file')
204 self.section_content[current_section].append(line)
205 else:
206 raise InputFormatError(filename, line_number,
207 'Only level 3 headers (###) are permitted')
208
209 def write(self, filename):
210 """Write the changelog to the specified file.
211 """
212 with open(filename, 'wb') as out:
213 for line in self.header:
214 out.write(line)
Gilles Peskined8b6c772020-01-28 18:57:47 +0100215 for section, lines in self.section_content.items():
Gilles Peskine40b3f412019-10-13 21:44:25 +0200216 if not lines:
217 continue
218 out.write(b'### ' + section + b'\n\n')
219 for line in lines:
220 out.write(line)
221 out.write(b'\n')
222 for line in self.trailer:
223 out.write(line)
224
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100225
226@functools.total_ordering
227class FileMergeTimestamp:
228 """A timestamp indicating when a file was merged.
229
230 If file1 was merged before file2, then
231 FileMergeTimestamp(file1) <= FileMergeTimestamp(file2).
232 """
233
234 # Categories of files. A lower number is considered older.
235 MERGED = 0
236 COMMITTED = 1
237 LOCAL = 2
238
239 @staticmethod
240 def creation_hash(filename):
241 """Return the git commit id at which the given file was created.
242
243 Return None if the file was never checked into git.
244 """
245 hashes = subprocess.check_output(['git', 'log', '--format=%H', '--', filename])
246 if not hashes:
247 # The file was never checked in.
248 return None
249 hashes = hashes.rstrip(b'\n')
250 last_hash = hashes[hashes.rfind(b'\n')+1:]
251 return last_hash
252
253 @staticmethod
254 def list_merges(some_hash, target, *options):
255 """List merge commits from some_hash to target.
256
257 Pass options to git to select which commits are included.
258 """
259 text = subprocess.check_output(['git', 'rev-list',
260 '--merges', *options,
261 b'..'.join([some_hash, target])])
262 return text.rstrip(b'\n').split(b'\n')
263
264 @classmethod
265 def merge_hash(cls, some_hash):
266 """Return the git commit id at which the given commit was merged.
267
268 Return None if the given commit was never merged.
269 """
270 target = b'HEAD'
271 # List the merges from some_hash to the target in two ways.
272 # The ancestry list is the ones that are both descendants of
273 # some_hash and ancestors of the target.
274 ancestry = frozenset(cls.list_merges(some_hash, target,
275 '--ancestry-path'))
276 # The first_parents list only contains merges that are directly
277 # on the target branch. We want it in reverse order (oldest first).
278 first_parents = cls.list_merges(some_hash, target,
279 '--first-parent', '--reverse')
280 # Look for the oldest merge commit that's both on the direct path
281 # and directly on the target branch. That's the place where some_hash
282 # was merged on the target branch. See
283 # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
284 for commit in first_parents:
285 if commit in ancestry:
286 return commit
287 return None
288
289 @staticmethod
290 def commit_timestamp(commit_id):
291 """Return the timestamp of the given commit."""
292 text = subprocess.check_output(['git', 'show', '-s',
293 '--format=%ct',
294 commit_id])
295 return datetime.datetime.utcfromtimestamp(int(text))
296
297 @staticmethod
298 def file_timestamp(filename):
299 """Return the modification timestamp of the given file."""
300 mtime = os.stat(filename).st_mtime
301 return datetime.datetime.fromtimestamp(mtime)
302
303 def __init__(self, filename):
304 """Determine the timestamp at which the file was merged."""
305 self.filename = filename
306 creation_hash = self.creation_hash(filename)
307 if not creation_hash:
308 self.category = self.LOCAL
309 self.datetime = self.file_timestamp(filename)
310 return
311 merge_hash = self.merge_hash(creation_hash)
312 if not merge_hash:
313 self.category = self.COMMITTED
314 self.datetime = self.commit_timestamp(creation_hash)
315 return
316 self.category = self.MERGED
317 self.datetime = self.commit_timestamp(merge_hash)
318
319 def sort_key(self):
320 """"Return a sort key for this merge timestamp object.
321
322 ts1.sort_key() < ts2.sort_key() if and only if ts1 is
323 considered to be older than ts2.
324 """
325 return (self.category, self.datetime, self.filename)
326
327 def __eq__(self, other):
328 return self.sort_key() == other.sort_key()
329
330 def __lt__(self, other):
331 return self.sort_key() < other.sort_key()
332
333
Gilles Peskine2b242492020-01-22 15:41:50 +0100334def check_output(generated_output_file, main_input_file, merged_files):
335 """Make sanity checks on the generated output.
336
337 The intent of these sanity checks is to have reasonable confidence
338 that no content has been lost.
339
340 The sanity check is that every line that is present in an input file
341 is also present in an output file. This is not perfect but good enough
342 for now.
343 """
344 generated_output = set(open(generated_output_file, 'rb'))
345 for line in open(main_input_file, 'rb'):
346 if line not in generated_output:
347 raise LostContent('original file', line)
348 for merged_file in merged_files:
349 for line in open(merged_file, 'rb'):
350 if line not in generated_output:
351 raise LostContent(merged_file, line)
352
353def finish_output(changelog, output_file, input_file, merged_files):
Gilles Peskine40b3f412019-10-13 21:44:25 +0200354 """Write the changelog to the output file.
355
Gilles Peskine2b242492020-01-22 15:41:50 +0100356 The input file and the list of merged files are used only for sanity
357 checks on the output.
Gilles Peskine40b3f412019-10-13 21:44:25 +0200358 """
359 if os.path.exists(output_file) and not os.path.isfile(output_file):
360 # The output is a non-regular file (e.g. pipe). Write to it directly.
361 output_temp = output_file
362 else:
363 # The output is a regular file. Write to a temporary file,
364 # then move it into place atomically.
365 output_temp = output_file + '.tmp'
366 changelog.write(output_temp)
Gilles Peskine2b242492020-01-22 15:41:50 +0100367 check_output(output_temp, input_file, merged_files)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200368 if output_temp != output_file:
369 os.rename(output_temp, output_file)
370
Gilles Peskine5e39c9e2020-01-22 14:55:37 +0100371def remove_merged_entries(files_to_remove):
372 for filename in files_to_remove:
373 os.remove(filename)
374
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100375def list_files_to_merge(options):
376 """List the entry files to merge, oldest first.
377
378 A file is considered older if it was merged earlier. See
379 `FileMergeTimestamp` for details.
380 """
381 files_to_merge = glob.glob(os.path.join(options.dir, '*.md'))
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100382 files_to_merge.sort(key=lambda f: FileMergeTimestamp(f).sort_key())
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100383 return files_to_merge
384
Gilles Peskine40b3f412019-10-13 21:44:25 +0200385def merge_entries(options):
386 """Merge changelog entries into the changelog file.
387
388 Read the changelog file from options.input.
389 Read entries to merge from the directory options.dir.
390 Write the new changelog to options.output.
391 Remove the merged entries if options.keep_entries is false.
392 """
393 with open(options.input, 'rb') as input_file:
394 changelog = ChangeLog(input_file)
Gilles Peskine27a1fac2020-03-25 16:34:18 +0100395 files_to_merge = list_files_to_merge(options)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200396 if not files_to_merge:
397 sys.stderr.write('There are no pending changelog entries.\n')
398 return
399 for filename in files_to_merge:
400 with open(filename, 'rb') as input_file:
401 changelog.add_file(input_file)
Gilles Peskine2b242492020-01-22 15:41:50 +0100402 finish_output(changelog, options.output, options.input, files_to_merge)
Gilles Peskine5e39c9e2020-01-22 14:55:37 +0100403 if not options.keep_entries:
404 remove_merged_entries(files_to_merge)
Gilles Peskine40b3f412019-10-13 21:44:25 +0200405
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100406def show_file_timestamps(options):
407 """List the files to merge and their timestamp.
408
409 This is only intended for debugging purposes.
410 """
411 files = list_files_to_merge(options)
412 for filename in files:
413 ts = FileMergeTimestamp(filename)
414 print(ts.category, ts.datetime, filename)
415
Gilles Peskine40b3f412019-10-13 21:44:25 +0200416def set_defaults(options):
417 """Add default values for missing options."""
418 output_file = getattr(options, 'output', None)
419 if output_file is None:
420 options.output = options.input
421 if getattr(options, 'keep_entries', None) is None:
422 options.keep_entries = (output_file is not None)
423
424def main():
425 """Command line entry point."""
426 parser = argparse.ArgumentParser(description=__doc__)
427 parser.add_argument('--dir', '-d', metavar='DIR',
428 default='ChangeLog.d',
Gilles Peskine6e910092020-01-22 15:58:18 +0100429 help='Directory to read entries from'
430 ' (default: ChangeLog.d)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200431 parser.add_argument('--input', '-i', metavar='FILE',
432 default='ChangeLog.md',
Gilles Peskine6e910092020-01-22 15:58:18 +0100433 help='Existing changelog file to read from and augment'
434 ' (default: ChangeLog.md)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200435 parser.add_argument('--keep-entries',
436 action='store_true', dest='keep_entries', default=None,
Gilles Peskine6e910092020-01-22 15:58:18 +0100437 help='Keep the files containing entries'
438 ' (default: remove them if --output/-o is not specified)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200439 parser.add_argument('--no-keep-entries',
440 action='store_false', dest='keep_entries',
Gilles Peskine6e910092020-01-22 15:58:18 +0100441 help='Remove the files containing entries after they are merged'
442 ' (default: remove them if --output/-o is not specified)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200443 parser.add_argument('--output', '-o', metavar='FILE',
Gilles Peskine6e910092020-01-22 15:58:18 +0100444 help='Output changelog file'
445 ' (default: overwrite the input)')
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100446 parser.add_argument('--list-files-only',
447 action='store_true',
448 help='Only list the files that would be processed (with some debugging information)')
Gilles Peskine40b3f412019-10-13 21:44:25 +0200449 options = parser.parse_args()
450 set_defaults(options)
Gilles Peskine8f46bbf2020-03-25 16:34:43 +0100451 if options.list_files_only:
452 show_file_timestamps(options)
453 return
Gilles Peskine40b3f412019-10-13 21:44:25 +0200454 merge_entries(options)
455
456if __name__ == '__main__':
457 main()