Blame - scripts/assemble_changelog.py - mirror/mbed-tls

blob: 96d2217a92b43764fe34387a7099b32c27c5e653 [file] [log] [blame]

Gilles Peskine	cff94e3	2020-04-21 18:33:12 +0200	[diff] [blame]	1	#!/usr/bin/env python3
				2
				3	"""Assemble Mbed TLS change log entries into the change log file.
				4
				5	Add changelog entries to the first level-2 section.
				6	Create a new level-2 section for unreleased changes if needed.
				7	Remove the input files unless --keep-entries is specified.
				8
				9	In each level-3 section, entries are sorted in chronological order
				10	(oldest first). From oldest to newest:
				11	* Merged entry files are sorted according to their merge date (date of
				12	the merge commit that brought the commit that created the file into
				13	the target branch).
				14	* Committed but unmerged entry files are sorted according to the date
				15	of the commit that adds them.
				16	* Uncommitted entry files are sorted according to their modification time.
				17
				18	You must run this program from within a git working directory.
				19	"""
				20
Bence Szépkúti	a2947ac	2020-08-19 16:37:36 +0200	[diff] [blame]	21	# Copyright The Mbed TLS Contributors
Bence Szépkúti	f744bd7	2020-06-05 13:02:18 +0200	[diff] [blame]	22	# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
				23	#
				24	# This file is provided under the Apache License 2.0, or the
				25	# GNU General Public License v2.0 or later.
				26	#
				27	# **********
				28	# Apache License 2.0:
Gilles Peskine	cff94e3	2020-04-21 18:33:12 +0200	[diff] [blame]	29	#
				30	# Licensed under the Apache License, Version 2.0 (the "License"); you may
				31	# not use this file except in compliance with the License.
				32	# You may obtain a copy of the License at
				33	#
				34	# http://www.apache.org/licenses/LICENSE-2.0
				35	#
				36	# Unless required by applicable law or agreed to in writing, software
				37	# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
				38	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				39	# See the License for the specific language governing permissions and
				40	# limitations under the License.
				41	#
Bence Szépkúti	f744bd7	2020-06-05 13:02:18 +0200	[diff] [blame]	42	# **********
				43	#
				44	# **********
				45	# GNU General Public License v2.0 or later:
				46	#
				47	# This program is free software; you can redistribute it and/or modify
				48	# it under the terms of the GNU General Public License as published by
				49	# the Free Software Foundation; either version 2 of the License, or
				50	# (at your option) any later version.
				51	#
				52	# This program is distributed in the hope that it will be useful,
				53	# but WITHOUT ANY WARRANTY; without even the implied warranty of
				54	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				55	# GNU General Public License for more details.
				56	#
				57	# You should have received a copy of the GNU General Public License along
				58	# with this program; if not, write to the Free Software Foundation, Inc.,
				59	# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
				60	#
				61	# **********
Gilles Peskine	cff94e3	2020-04-21 18:33:12 +0200	[diff] [blame]	62
				63	import argparse
				64	from collections import OrderedDict, namedtuple
				65	import datetime
				66	import functools
				67	import glob
				68	import os
				69	import re
				70	import subprocess
				71	import sys
				72
				73	class InputFormatError(Exception):
				74	def __init__(self, filename, line_number, message, args, *kwargs):
				75	message = '{}:{}: {}'.format(filename, line_number,
				76	message.format(args, *kwargs))
				77	super().__init__(message)
				78
				79	class CategoryParseError(Exception):
				80	def __init__(self, line_offset, error_message):
				81	self.line_offset = line_offset
				82	self.error_message = error_message
				83	super().__init__('{}: {}'.format(line_offset, error_message))
				84
				85	class LostContent(Exception):
				86	def __init__(self, filename, line):
				87	message = ('Lost content from {}: "{}"'.format(filename, line))
				88	super().__init__(message)
				89
				90	# The category names we use in the changelog.
				91	# If you edit this, update ChangeLog.d/README.md.
				92	STANDARD_CATEGORIES = (
				93	b'API changes',
				94	b'Default behavior changes',
				95	b'Requirement changes',
				96	b'New deprecations',
				97	b'Removals',
				98	b'Features',
				99	b'Security',
				100	b'Bugfix',
				101	b'Changes',
				102	)
				103
Paul Elliott	c24a1e8	2021-03-05 12:22:51 +0000	[diff] [blame]	104	# The maximum line length for an entry
				105	MAX_LINE_LENGTH = 80
				106
Gilles Peskine	cff94e3	2020-04-21 18:33:12 +0200	[diff] [blame]	107	CategoryContent = namedtuple('CategoryContent', [
				108	'name', 'title_line', # Title text and line number of the title
				109	'body', 'body_line', # Body text and starting line number of the body
				110	])
				111
				112	class ChangelogFormat:
				113	"""Virtual class documenting how to write a changelog format class."""
				114
				115	@classmethod
				116	def extract_top_version(cls, changelog_file_content):
				117	"""Split out the top version section.
				118
				119	If the top version is already released, create a new top
				120	version section for an unreleased version.
				121
				122	Return ``(header, top_version_title, top_version_body, trailer)``
				123	where the "top version" is the existing top version section if it's
				124	for unreleased changes, and a newly created section otherwise.
				125	To assemble the changelog after modifying top_version_body,
				126	concatenate the four pieces.
				127	"""
				128	raise NotImplementedError
				129
				130	@classmethod
				131	def version_title_text(cls, version_title):
				132	"""Return the text of a formatted version section title."""
				133	raise NotImplementedError
				134
				135	@classmethod
				136	def split_categories(cls, version_body):
				137	"""Split a changelog version section body into categories.
				138
				139	Return a list of `CategoryContent` the name is category title
				140	without any formatting.
				141	"""
				142	raise NotImplementedError
				143
				144	@classmethod
				145	def format_category(cls, title, body):
				146	"""Construct the text of a category section from its title and body."""
				147	raise NotImplementedError
				148
				149	class TextChangelogFormat(ChangelogFormat):
				150	"""The traditional Mbed TLS changelog format."""
				151
				152	_unreleased_version_text = b'= mbed TLS x.x.x branch released xxxx-xx-xx'
				153	@classmethod
				154	def is_released_version(cls, title):
				155	# Look for an incomplete release date
				156	return not re.search(br'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title)
				157
				158	_top_version_re = re.compile(br'(?:\A\|\n)(=[^\n]\n+)(.?\n)(?:=\|$)',
				159	re.DOTALL)
				160	@classmethod
				161	def extract_top_version(cls, changelog_file_content):
				162	"""A version section starts with a line starting with '='."""
				163	m = re.search(cls._top_version_re, changelog_file_content)
				164	top_version_start = m.start(1)
				165	top_version_end = m.end(2)
				166	top_version_title = m.group(1)
				167	top_version_body = m.group(2)
				168	if cls.is_released_version(top_version_title):
				169	top_version_end = top_version_start
				170	top_version_title = cls._unreleased_version_text + b'\n\n'
				171	top_version_body = b''
				172	return (changelog_file_content[:top_version_start],
				173	top_version_title, top_version_body,
				174	changelog_file_content[top_version_end:])
				175
				176	@classmethod
				177	def version_title_text(cls, version_title):
				178	return re.sub(br'\n.*', version_title, re.DOTALL)
				179
				180	_category_title_re = re.compile(br'(^\w.*)\n+', re.MULTILINE)
				181	@classmethod
				182	def split_categories(cls, version_body):
				183	"""A category title is a line with the title in column 0."""
				184	if not version_body:
				185	return []
				186	title_matches = list(re.finditer(cls._category_title_re, version_body))
				187	if not title_matches or title_matches[0].start() != 0:
				188	# There is junk before the first category.
				189	raise CategoryParseError(0, 'Junk found where category expected')
				190	title_starts = [m.start(1) for m in title_matches]
				191	body_starts = [m.end(0) for m in title_matches]
				192	body_ends = title_starts[1:] + [len(version_body)]
				193	bodies = [version_body[body_start:body_end].rstrip(b'\n') + b'\n'
				194	for (body_start, body_end) in zip(body_starts, body_ends)]
				195	title_lines = [version_body[:pos].count(b'\n') for pos in title_starts]
				196	body_lines = [version_body[:pos].count(b'\n') for pos in body_starts]
				197	return [CategoryContent(title_match.group(1), title_line,
				198	body, body_line)
				199	for title_match, title_line, body, body_line
				200	in zip(title_matches, title_lines, bodies, body_lines)]
				201
				202	@classmethod
				203	def format_category(cls, title, body):
				204	# `split_categories` ensures that each body ends with a newline.
				205	# Make sure that there is additionally a blank line between categories.
				206	if not body.endswith(b'\n\n'):
				207	body += b'\n'
				208	return title + b'\n' + body
				209
				210	class ChangeLog:
				211	"""An Mbed TLS changelog.
				212
				213	A changelog file consists of some header text followed by one or
				214	more version sections. The version sections are in reverse
				215	chronological order. Each version section consists of a title and a body.
				216
				217	The body of a version section consists of zero or more category
				218	subsections. Each category subsection consists of a title and a body.
				219
				220	A changelog entry file has the same format as the body of a version section.
				221
				222	A `ChangelogFormat` object defines the concrete syntax of the changelog.
				223	Entry files must have the same format as the changelog file.
				224	"""
				225
				226	# Only accept dotted version numbers (e.g. "3.1", not "3").
				227	# Refuse ".x" in a version number where x is a letter: this indicates
				228	# a version that is not yet released. Something like "3.1a" is accepted.
				229	_version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
				230	_incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
				231
				232	def add_categories_from_text(self, filename, line_offset,
				233	text, allow_unknown_category):
				234	"""Parse a version section or entry file."""
				235	try:
				236	categories = self.format.split_categories(text)
				237	except CategoryParseError as e:
				238	raise InputFormatError(filename, line_offset + e.line_offset,
				239	e.error_message)
				240	for category in categories:
				241	if not allow_unknown_category and \
				242	category.name not in self.categories:
				243	raise InputFormatError(filename,
				244	line_offset + category.title_line,
				245	'Unknown category: "{}"',
				246	category.name.decode('utf8'))
Paul Elliott	c24a1e8	2021-03-05 12:22:51 +0000	[diff] [blame]	247
				248	body_split = category.body.splitlines()
Paul Elliott	217565e	2021-03-09 10:24:55 +0000	[diff] [blame^]	249	line_number = 1
Paul Elliott	c24a1e8	2021-03-05 12:22:51 +0000	[diff] [blame]	250	for line in body_split:
				251	if len(line) > MAX_LINE_LENGTH:
				252	raise InputFormatError(filename,
Paul Elliott	217565e	2021-03-09 10:24:55 +0000	[diff] [blame^]	253	line_offset + category.title_line + line_number,
				254	'Line is longer than allowed: Length {} (Max {})',
				255	len(line), MAX_LINE_LENGTH)
				256	line_number += 1
Paul Elliott	c24a1e8	2021-03-05 12:22:51 +0000	[diff] [blame]	257
Gilles Peskine	cff94e3	2020-04-21 18:33:12 +0200	[diff] [blame]	258	self.categories[category.name] += category.body
				259
				260	def __init__(self, input_stream, changelog_format):
				261	"""Create a changelog object.
				262
				263	Populate the changelog object from the content of the file
				264	input_stream.
				265	"""
				266	self.format = changelog_format
				267	whole_file = input_stream.read()
				268	(self.header,
				269	self.top_version_title, top_version_body,
				270	self.trailer) = self.format.extract_top_version(whole_file)
				271	# Split the top version section into categories.
				272	self.categories = OrderedDict()
				273	for category in STANDARD_CATEGORIES:
				274	self.categories[category] = b''
				275	offset = (self.header + self.top_version_title).count(b'\n') + 1
				276	self.add_categories_from_text(input_stream.name, offset,
				277	top_version_body, True)
				278
				279	def add_file(self, input_stream):
				280	"""Add changelog entries from a file.
				281	"""
				282	self.add_categories_from_text(input_stream.name, 1,
				283	input_stream.read(), False)
				284
				285	def write(self, filename):
				286	"""Write the changelog to the specified file.
				287	"""
				288	with open(filename, 'wb') as out:
				289	out.write(self.header)
				290	out.write(self.top_version_title)
				291	for title, body in self.categories.items():
				292	if not body:
				293	continue
				294	out.write(self.format.format_category(title, body))
				295	out.write(self.trailer)
				296
				297
				298	@functools.total_ordering
				299	class EntryFileSortKey:
				300	"""This classes defines an ordering on changelog entry files: older < newer.
				301
				302	* Merged entry files are sorted according to their merge date (date of
				303	the merge commit that brought the commit that created the file into
				304	the target branch).
				305	* Committed but unmerged entry files are sorted according to the date
				306	of the commit that adds them.
				307	* Uncommitted entry files are sorted according to their modification time.
				308
				309	This class assumes that the file is in a git working directory with
				310	the target branch checked out.
				311	"""
				312
				313	# Categories of files. A lower number is considered older.
				314	MERGED = 0
				315	COMMITTED = 1
				316	LOCAL = 2
				317
				318	@staticmethod
				319	def creation_hash(filename):
				320	"""Return the git commit id at which the given file was created.
				321
				322	Return None if the file was never checked into git.
				323	"""
				324	hashes = subprocess.check_output(['git', 'log', '--format=%H',
				325	'--follow',
				326	'--', filename])
				327	m = re.search(b'(.+)$', hashes)
				328	if not m:
				329	# The git output is empty. This means that the file was
				330	# never checked in.
				331	return None
				332	# The last commit in the log is the oldest one, which is when the
				333	# file was created.
				334	return m.group(0)
				335
				336	@staticmethod
				337	def list_merges(some_hash, target, *options):
				338	"""List merge commits from some_hash to target.
				339
				340	Pass options to git to select which commits are included.
				341	"""
				342	text = subprocess.check_output(['git', 'rev-list',
				343	'--merges', *options,
				344	b'..'.join([some_hash, target])])
				345	return text.rstrip(b'\n').split(b'\n')
				346
				347	@classmethod
				348	def merge_hash(cls, some_hash):
				349	"""Return the git commit id at which the given commit was merged.
				350
				351	Return None if the given commit was never merged.
				352	"""
				353	target = b'HEAD'
				354	# List the merges from some_hash to the target in two ways.
				355	# The ancestry list is the ones that are both descendants of
				356	# some_hash and ancestors of the target.
				357	ancestry = frozenset(cls.list_merges(some_hash, target,
				358	'--ancestry-path'))
				359	# The first_parents list only contains merges that are directly
				360	# on the target branch. We want it in reverse order (oldest first).
				361	first_parents = cls.list_merges(some_hash, target,
				362	'--first-parent', '--reverse')
				363	# Look for the oldest merge commit that's both on the direct path
				364	# and directly on the target branch. That's the place where some_hash
				365	# was merged on the target branch. See
				366	# https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
				367	for commit in first_parents:
				368	if commit in ancestry:
				369	return commit
				370	return None
				371
				372	@staticmethod
				373	def commit_timestamp(commit_id):
				374	"""Return the timestamp of the given commit."""
				375	text = subprocess.check_output(['git', 'show', '-s',
				376	'--format=%ct',
				377	commit_id])
				378	return datetime.datetime.utcfromtimestamp(int(text))
				379
				380	@staticmethod
				381	def file_timestamp(filename):
				382	"""Return the modification timestamp of the given file."""
				383	mtime = os.stat(filename).st_mtime
				384	return datetime.datetime.fromtimestamp(mtime)
				385
				386	def __init__(self, filename):
				387	"""Determine position of the file in the changelog entry order.
				388
				389	This constructor returns an object that can be used with comparison
				390	operators, with `sort` and `sorted`, etc. Older entries are sorted
				391	before newer entries.
				392	"""
				393	self.filename = filename
				394	creation_hash = self.creation_hash(filename)
				395	if not creation_hash:
				396	self.category = self.LOCAL
				397	self.datetime = self.file_timestamp(filename)
				398	return
				399	merge_hash = self.merge_hash(creation_hash)
				400	if not merge_hash:
				401	self.category = self.COMMITTED
				402	self.datetime = self.commit_timestamp(creation_hash)
				403	return
				404	self.category = self.MERGED
				405	self.datetime = self.commit_timestamp(merge_hash)
				406
				407	def sort_key(self):
				408	""""Return a concrete sort key for this entry file sort key object.
				409
				410	``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``.
				411	"""
				412	return (self.category, self.datetime, self.filename)
				413
				414	def __eq__(self, other):
				415	return self.sort_key() == other.sort_key()
				416
				417	def __lt__(self, other):
				418	return self.sort_key() < other.sort_key()
				419
				420
				421	def check_output(generated_output_file, main_input_file, merged_files):
				422	"""Make sanity checks on the generated output.
				423
				424	The intent of these sanity checks is to have reasonable confidence
				425	that no content has been lost.
				426
				427	The sanity check is that every line that is present in an input file
				428	is also present in an output file. This is not perfect but good enough
				429	for now.
				430	"""
				431	generated_output = set(open(generated_output_file, 'rb'))
				432	for line in open(main_input_file, 'rb'):
				433	if line not in generated_output:
				434	raise LostContent('original file', line)
				435	for merged_file in merged_files:
				436	for line in open(merged_file, 'rb'):
				437	if line not in generated_output:
				438	raise LostContent(merged_file, line)
				439
				440	def finish_output(changelog, output_file, input_file, merged_files):
				441	"""Write the changelog to the output file.
				442
				443	The input file and the list of merged files are used only for sanity
				444	checks on the output.
				445	"""
				446	if os.path.exists(output_file) and not os.path.isfile(output_file):
				447	# The output is a non-regular file (e.g. pipe). Write to it directly.
				448	output_temp = output_file
				449	else:
				450	# The output is a regular file. Write to a temporary file,
				451	# then move it into place atomically.
				452	output_temp = output_file + '.tmp'
				453	changelog.write(output_temp)
				454	check_output(output_temp, input_file, merged_files)
				455	if output_temp != output_file:
				456	os.rename(output_temp, output_file)
				457
				458	def remove_merged_entries(files_to_remove):
				459	for filename in files_to_remove:
				460	os.remove(filename)
				461
				462	def list_files_to_merge(options):
				463	"""List the entry files to merge, oldest first.
				464
				465	"Oldest" is defined by `EntryFileSortKey`.
				466	"""
				467	files_to_merge = glob.glob(os.path.join(options.dir, '*.txt'))
				468	files_to_merge.sort(key=EntryFileSortKey)
				469	return files_to_merge
				470
				471	def merge_entries(options):
				472	"""Merge changelog entries into the changelog file.
				473
				474	Read the changelog file from options.input.
				475	Read entries to merge from the directory options.dir.
				476	Write the new changelog to options.output.
				477	Remove the merged entries if options.keep_entries is false.
				478	"""
				479	with open(options.input, 'rb') as input_file:
				480	changelog = ChangeLog(input_file, TextChangelogFormat)
				481	files_to_merge = list_files_to_merge(options)
				482	if not files_to_merge:
				483	sys.stderr.write('There are no pending changelog entries.\n')
				484	return
				485	for filename in files_to_merge:
				486	with open(filename, 'rb') as input_file:
				487	changelog.add_file(input_file)
				488	finish_output(changelog, options.output, options.input, files_to_merge)
				489	if not options.keep_entries:
				490	remove_merged_entries(files_to_merge)
				491
				492	def show_file_timestamps(options):
				493	"""List the files to merge and their timestamp.
				494
				495	This is only intended for debugging purposes.
				496	"""
				497	files = list_files_to_merge(options)
				498	for filename in files:
				499	ts = EntryFileSortKey(filename)
				500	print(ts.category, ts.datetime, filename)
				501
				502	def set_defaults(options):
				503	"""Add default values for missing options."""
				504	output_file = getattr(options, 'output', None)
				505	if output_file is None:
				506	options.output = options.input
				507	if getattr(options, 'keep_entries', None) is None:
				508	options.keep_entries = (output_file is not None)
				509
				510	def main():
				511	"""Command line entry point."""
				512	parser = argparse.ArgumentParser(description=__doc__)
				513	parser.add_argument('--dir', '-d', metavar='DIR',
				514	default='ChangeLog.d',
				515	help='Directory to read entries from'
				516	' (default: ChangeLog.d)')
				517	parser.add_argument('--input', '-i', metavar='FILE',
				518	default='ChangeLog',
				519	help='Existing changelog file to read from and augment'
				520	' (default: ChangeLog)')
				521	parser.add_argument('--keep-entries',
				522	action='store_true', dest='keep_entries', default=None,
				523	help='Keep the files containing entries'
				524	' (default: remove them if --output/-o is not specified)')
				525	parser.add_argument('--no-keep-entries',
				526	action='store_false', dest='keep_entries',
				527	help='Remove the files containing entries after they are merged'
				528	' (default: remove them if --output/-o is not specified)')
				529	parser.add_argument('--output', '-o', metavar='FILE',
				530	help='Output changelog file'
				531	' (default: overwrite the input)')
				532	parser.add_argument('--list-files-only',
				533	action='store_true',
				534	help=('Only list the files that would be processed '
				535	'(with some debugging information)'))
				536	options = parser.parse_args()
				537	set_defaults(options)
				538	if options.list_files_only:
				539	show_file_timestamps(options)
				540	return
				541	merge_entries(options)
				542
				543	if __name__ == '__main__':
				544	main()