Blame - scripts/assemble_changelog.py - mirror/mbed-tls

blob: f85392c530fc1c51d07ab65e4012696c4d9a6f6b [file] [log] [blame]

Gilles Peskine	cff94e3	2020-04-21 18:33:12 +0200	[diff] [blame]	1	#!/usr/bin/env python3
				2
				3	"""Assemble Mbed TLS change log entries into the change log file.
				4
				5	Add changelog entries to the first level-2 section.
				6	Create a new level-2 section for unreleased changes if needed.
				7	Remove the input files unless --keep-entries is specified.
				8
				9	In each level-3 section, entries are sorted in chronological order
				10	(oldest first). From oldest to newest:
				11	* Merged entry files are sorted according to their merge date (date of
				12	the merge commit that brought the commit that created the file into
				13	the target branch).
				14	* Committed but unmerged entry files are sorted according to the date
				15	of the commit that adds them.
				16	* Uncommitted entry files are sorted according to their modification time.
				17
				18	You must run this program from within a git working directory.
				19	"""
				20
Bence Szépkúti	a2947ac	2020-08-19 16:37:36 +0200	[diff] [blame]	21	# Copyright The Mbed TLS Contributors
Bence Szépkúti	f744bd7	2020-06-05 13:02:18 +0200	[diff] [blame]	22	# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
				23	#
				24	# This file is provided under the Apache License 2.0, or the
				25	# GNU General Public License v2.0 or later.
				26	#
				27	# **********
				28	# Apache License 2.0:
Gilles Peskine	cff94e3	2020-04-21 18:33:12 +0200	[diff] [blame]	29	#
				30	# Licensed under the Apache License, Version 2.0 (the "License"); you may
				31	# not use this file except in compliance with the License.
				32	# You may obtain a copy of the License at
				33	#
				34	# http://www.apache.org/licenses/LICENSE-2.0
				35	#
				36	# Unless required by applicable law or agreed to in writing, software
				37	# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
				38	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				39	# See the License for the specific language governing permissions and
				40	# limitations under the License.
				41	#
Bence Szépkúti	f744bd7	2020-06-05 13:02:18 +0200	[diff] [blame]	42	# **********
				43	#
				44	# **********
				45	# GNU General Public License v2.0 or later:
				46	#
				47	# This program is free software; you can redistribute it and/or modify
				48	# it under the terms of the GNU General Public License as published by
				49	# the Free Software Foundation; either version 2 of the License, or
				50	# (at your option) any later version.
				51	#
				52	# This program is distributed in the hope that it will be useful,
				53	# but WITHOUT ANY WARRANTY; without even the implied warranty of
				54	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				55	# GNU General Public License for more details.
				56	#
				57	# You should have received a copy of the GNU General Public License along
				58	# with this program; if not, write to the Free Software Foundation, Inc.,
				59	# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
				60	#
				61	# **********
Gilles Peskine	cff94e3	2020-04-21 18:33:12 +0200	[diff] [blame]	62
				63	import argparse
				64	from collections import OrderedDict, namedtuple
				65	import datetime
				66	import functools
				67	import glob
				68	import os
				69	import re
				70	import subprocess
				71	import sys
				72
				73	class InputFormatError(Exception):
				74	def __init__(self, filename, line_number, message, args, *kwargs):
				75	message = '{}:{}: {}'.format(filename, line_number,
				76	message.format(args, *kwargs))
				77	super().__init__(message)
				78
				79	class CategoryParseError(Exception):
				80	def __init__(self, line_offset, error_message):
				81	self.line_offset = line_offset
				82	self.error_message = error_message
				83	super().__init__('{}: {}'.format(line_offset, error_message))
				84
				85	class LostContent(Exception):
				86	def __init__(self, filename, line):
				87	message = ('Lost content from {}: "{}"'.format(filename, line))
				88	super().__init__(message)
				89
				90	# The category names we use in the changelog.
				91	# If you edit this, update ChangeLog.d/README.md.
				92	STANDARD_CATEGORIES = (
				93	b'API changes',
				94	b'Default behavior changes',
				95	b'Requirement changes',
				96	b'New deprecations',
				97	b'Removals',
				98	b'Features',
				99	b'Security',
				100	b'Bugfix',
				101	b'Changes',
				102	)
				103
Paul Elliott	c24a1e8	2021-03-05 12:22:51 +0000	[diff] [blame^]	104	# The maximum line length for an entry
				105	MAX_LINE_LENGTH = 80
				106
Gilles Peskine	cff94e3	2020-04-21 18:33:12 +0200	[diff] [blame]	107	CategoryContent = namedtuple('CategoryContent', [
				108	'name', 'title_line', # Title text and line number of the title
				109	'body', 'body_line', # Body text and starting line number of the body
				110	])
				111
				112	class ChangelogFormat:
				113	"""Virtual class documenting how to write a changelog format class."""
				114
				115	@classmethod
				116	def extract_top_version(cls, changelog_file_content):
				117	"""Split out the top version section.
				118
				119	If the top version is already released, create a new top
				120	version section for an unreleased version.
				121
				122	Return ``(header, top_version_title, top_version_body, trailer)``
				123	where the "top version" is the existing top version section if it's
				124	for unreleased changes, and a newly created section otherwise.
				125	To assemble the changelog after modifying top_version_body,
				126	concatenate the four pieces.
				127	"""
				128	raise NotImplementedError
				129
				130	@classmethod
				131	def version_title_text(cls, version_title):
				132	"""Return the text of a formatted version section title."""
				133	raise NotImplementedError
				134
				135	@classmethod
				136	def split_categories(cls, version_body):
				137	"""Split a changelog version section body into categories.
				138
				139	Return a list of `CategoryContent` the name is category title
				140	without any formatting.
				141	"""
				142	raise NotImplementedError
				143
				144	@classmethod
				145	def format_category(cls, title, body):
				146	"""Construct the text of a category section from its title and body."""
				147	raise NotImplementedError
				148
				149	class TextChangelogFormat(ChangelogFormat):
				150	"""The traditional Mbed TLS changelog format."""
				151
				152	_unreleased_version_text = b'= mbed TLS x.x.x branch released xxxx-xx-xx'
				153	@classmethod
				154	def is_released_version(cls, title):
				155	# Look for an incomplete release date
				156	return not re.search(br'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title)
				157
				158	_top_version_re = re.compile(br'(?:\A\|\n)(=[^\n]\n+)(.?\n)(?:=\|$)',
				159	re.DOTALL)
				160	@classmethod
				161	def extract_top_version(cls, changelog_file_content):
				162	"""A version section starts with a line starting with '='."""
				163	m = re.search(cls._top_version_re, changelog_file_content)
				164	top_version_start = m.start(1)
				165	top_version_end = m.end(2)
				166	top_version_title = m.group(1)
				167	top_version_body = m.group(2)
				168	if cls.is_released_version(top_version_title):
				169	top_version_end = top_version_start
				170	top_version_title = cls._unreleased_version_text + b'\n\n'
				171	top_version_body = b''
				172	return (changelog_file_content[:top_version_start],
				173	top_version_title, top_version_body,
				174	changelog_file_content[top_version_end:])
				175
				176	@classmethod
				177	def version_title_text(cls, version_title):
				178	return re.sub(br'\n.*', version_title, re.DOTALL)
				179
				180	_category_title_re = re.compile(br'(^\w.*)\n+', re.MULTILINE)
				181	@classmethod
				182	def split_categories(cls, version_body):
				183	"""A category title is a line with the title in column 0."""
				184	if not version_body:
				185	return []
				186	title_matches = list(re.finditer(cls._category_title_re, version_body))
				187	if not title_matches or title_matches[0].start() != 0:
				188	# There is junk before the first category.
				189	raise CategoryParseError(0, 'Junk found where category expected')
				190	title_starts = [m.start(1) for m in title_matches]
				191	body_starts = [m.end(0) for m in title_matches]
				192	body_ends = title_starts[1:] + [len(version_body)]
				193	bodies = [version_body[body_start:body_end].rstrip(b'\n') + b'\n'
				194	for (body_start, body_end) in zip(body_starts, body_ends)]
				195	title_lines = [version_body[:pos].count(b'\n') for pos in title_starts]
				196	body_lines = [version_body[:pos].count(b'\n') for pos in body_starts]
				197	return [CategoryContent(title_match.group(1), title_line,
				198	body, body_line)
				199	for title_match, title_line, body, body_line
				200	in zip(title_matches, title_lines, bodies, body_lines)]
				201
				202	@classmethod
				203	def format_category(cls, title, body):
				204	# `split_categories` ensures that each body ends with a newline.
				205	# Make sure that there is additionally a blank line between categories.
				206	if not body.endswith(b'\n\n'):
				207	body += b'\n'
				208	return title + b'\n' + body
				209
				210	class ChangeLog:
				211	"""An Mbed TLS changelog.
				212
				213	A changelog file consists of some header text followed by one or
				214	more version sections. The version sections are in reverse
				215	chronological order. Each version section consists of a title and a body.
				216
				217	The body of a version section consists of zero or more category
				218	subsections. Each category subsection consists of a title and a body.
				219
				220	A changelog entry file has the same format as the body of a version section.
				221
				222	A `ChangelogFormat` object defines the concrete syntax of the changelog.
				223	Entry files must have the same format as the changelog file.
				224	"""
				225
				226	# Only accept dotted version numbers (e.g. "3.1", not "3").
				227	# Refuse ".x" in a version number where x is a letter: this indicates
				228	# a version that is not yet released. Something like "3.1a" is accepted.
				229	_version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
				230	_incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
				231
				232	def add_categories_from_text(self, filename, line_offset,
				233	text, allow_unknown_category):
				234	"""Parse a version section or entry file."""
				235	try:
				236	categories = self.format.split_categories(text)
				237	except CategoryParseError as e:
				238	raise InputFormatError(filename, line_offset + e.line_offset,
				239	e.error_message)
				240	for category in categories:
				241	if not allow_unknown_category and \
				242	category.name not in self.categories:
				243	raise InputFormatError(filename,
				244	line_offset + category.title_line,
				245	'Unknown category: "{}"',
				246	category.name.decode('utf8'))
Paul Elliott	c24a1e8	2021-03-05 12:22:51 +0000	[diff] [blame^]	247
				248	body_split = category.body.splitlines()
				249	for line in body_split:
				250	if len(line) > MAX_LINE_LENGTH:
				251	raise InputFormatError(filename,
				252	line_offset + category.title_line,
				253	'Category body line too long: "{} ({})"',
				254	category.name.decode('utf8'), len(line))
				255
Gilles Peskine	cff94e3	2020-04-21 18:33:12 +0200	[diff] [blame]	256	self.categories[category.name] += category.body
				257
				258	def __init__(self, input_stream, changelog_format):
				259	"""Create a changelog object.
				260
				261	Populate the changelog object from the content of the file
				262	input_stream.
				263	"""
				264	self.format = changelog_format
				265	whole_file = input_stream.read()
				266	(self.header,
				267	self.top_version_title, top_version_body,
				268	self.trailer) = self.format.extract_top_version(whole_file)
				269	# Split the top version section into categories.
				270	self.categories = OrderedDict()
				271	for category in STANDARD_CATEGORIES:
				272	self.categories[category] = b''
				273	offset = (self.header + self.top_version_title).count(b'\n') + 1
				274	self.add_categories_from_text(input_stream.name, offset,
				275	top_version_body, True)
				276
				277	def add_file(self, input_stream):
				278	"""Add changelog entries from a file.
				279	"""
				280	self.add_categories_from_text(input_stream.name, 1,
				281	input_stream.read(), False)
				282
				283	def write(self, filename):
				284	"""Write the changelog to the specified file.
				285	"""
				286	with open(filename, 'wb') as out:
				287	out.write(self.header)
				288	out.write(self.top_version_title)
				289	for title, body in self.categories.items():
				290	if not body:
				291	continue
				292	out.write(self.format.format_category(title, body))
				293	out.write(self.trailer)
				294
				295
				296	@functools.total_ordering
				297	class EntryFileSortKey:
				298	"""This classes defines an ordering on changelog entry files: older < newer.
				299
				300	* Merged entry files are sorted according to their merge date (date of
				301	the merge commit that brought the commit that created the file into
				302	the target branch).
				303	* Committed but unmerged entry files are sorted according to the date
				304	of the commit that adds them.
				305	* Uncommitted entry files are sorted according to their modification time.
				306
				307	This class assumes that the file is in a git working directory with
				308	the target branch checked out.
				309	"""
				310
				311	# Categories of files. A lower number is considered older.
				312	MERGED = 0
				313	COMMITTED = 1
				314	LOCAL = 2
				315
				316	@staticmethod
				317	def creation_hash(filename):
				318	"""Return the git commit id at which the given file was created.
				319
				320	Return None if the file was never checked into git.
				321	"""
				322	hashes = subprocess.check_output(['git', 'log', '--format=%H',
				323	'--follow',
				324	'--', filename])
				325	m = re.search(b'(.+)$', hashes)
				326	if not m:
				327	# The git output is empty. This means that the file was
				328	# never checked in.
				329	return None
				330	# The last commit in the log is the oldest one, which is when the
				331	# file was created.
				332	return m.group(0)
				333
				334	@staticmethod
				335	def list_merges(some_hash, target, *options):
				336	"""List merge commits from some_hash to target.
				337
				338	Pass options to git to select which commits are included.
				339	"""
				340	text = subprocess.check_output(['git', 'rev-list',
				341	'--merges', *options,
				342	b'..'.join([some_hash, target])])
				343	return text.rstrip(b'\n').split(b'\n')
				344
				345	@classmethod
				346	def merge_hash(cls, some_hash):
				347	"""Return the git commit id at which the given commit was merged.
				348
				349	Return None if the given commit was never merged.
				350	"""
				351	target = b'HEAD'
				352	# List the merges from some_hash to the target in two ways.
				353	# The ancestry list is the ones that are both descendants of
				354	# some_hash and ancestors of the target.
				355	ancestry = frozenset(cls.list_merges(some_hash, target,
				356	'--ancestry-path'))
				357	# The first_parents list only contains merges that are directly
				358	# on the target branch. We want it in reverse order (oldest first).
				359	first_parents = cls.list_merges(some_hash, target,
				360	'--first-parent', '--reverse')
				361	# Look for the oldest merge commit that's both on the direct path
				362	# and directly on the target branch. That's the place where some_hash
				363	# was merged on the target branch. See
				364	# https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
				365	for commit in first_parents:
				366	if commit in ancestry:
				367	return commit
				368	return None
				369
				370	@staticmethod
				371	def commit_timestamp(commit_id):
				372	"""Return the timestamp of the given commit."""
				373	text = subprocess.check_output(['git', 'show', '-s',
				374	'--format=%ct',
				375	commit_id])
				376	return datetime.datetime.utcfromtimestamp(int(text))
				377
				378	@staticmethod
				379	def file_timestamp(filename):
				380	"""Return the modification timestamp of the given file."""
				381	mtime = os.stat(filename).st_mtime
				382	return datetime.datetime.fromtimestamp(mtime)
				383
				384	def __init__(self, filename):
				385	"""Determine position of the file in the changelog entry order.
				386
				387	This constructor returns an object that can be used with comparison
				388	operators, with `sort` and `sorted`, etc. Older entries are sorted
				389	before newer entries.
				390	"""
				391	self.filename = filename
				392	creation_hash = self.creation_hash(filename)
				393	if not creation_hash:
				394	self.category = self.LOCAL
				395	self.datetime = self.file_timestamp(filename)
				396	return
				397	merge_hash = self.merge_hash(creation_hash)
				398	if not merge_hash:
				399	self.category = self.COMMITTED
				400	self.datetime = self.commit_timestamp(creation_hash)
				401	return
				402	self.category = self.MERGED
				403	self.datetime = self.commit_timestamp(merge_hash)
				404
				405	def sort_key(self):
				406	""""Return a concrete sort key for this entry file sort key object.
				407
				408	``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``.
				409	"""
				410	return (self.category, self.datetime, self.filename)
				411
				412	def __eq__(self, other):
				413	return self.sort_key() == other.sort_key()
				414
				415	def __lt__(self, other):
				416	return self.sort_key() < other.sort_key()
				417
				418
				419	def check_output(generated_output_file, main_input_file, merged_files):
				420	"""Make sanity checks on the generated output.
				421
				422	The intent of these sanity checks is to have reasonable confidence
				423	that no content has been lost.
				424
				425	The sanity check is that every line that is present in an input file
				426	is also present in an output file. This is not perfect but good enough
				427	for now.
				428	"""
				429	generated_output = set(open(generated_output_file, 'rb'))
				430	for line in open(main_input_file, 'rb'):
				431	if line not in generated_output:
				432	raise LostContent('original file', line)
				433	for merged_file in merged_files:
				434	for line in open(merged_file, 'rb'):
				435	if line not in generated_output:
				436	raise LostContent(merged_file, line)
				437
				438	def finish_output(changelog, output_file, input_file, merged_files):
				439	"""Write the changelog to the output file.
				440
				441	The input file and the list of merged files are used only for sanity
				442	checks on the output.
				443	"""
				444	if os.path.exists(output_file) and not os.path.isfile(output_file):
				445	# The output is a non-regular file (e.g. pipe). Write to it directly.
				446	output_temp = output_file
				447	else:
				448	# The output is a regular file. Write to a temporary file,
				449	# then move it into place atomically.
				450	output_temp = output_file + '.tmp'
				451	changelog.write(output_temp)
				452	check_output(output_temp, input_file, merged_files)
				453	if output_temp != output_file:
				454	os.rename(output_temp, output_file)
				455
				456	def remove_merged_entries(files_to_remove):
				457	for filename in files_to_remove:
				458	os.remove(filename)
				459
				460	def list_files_to_merge(options):
				461	"""List the entry files to merge, oldest first.
				462
				463	"Oldest" is defined by `EntryFileSortKey`.
				464	"""
				465	files_to_merge = glob.glob(os.path.join(options.dir, '*.txt'))
				466	files_to_merge.sort(key=EntryFileSortKey)
				467	return files_to_merge
				468
				469	def merge_entries(options):
				470	"""Merge changelog entries into the changelog file.
				471
				472	Read the changelog file from options.input.
				473	Read entries to merge from the directory options.dir.
				474	Write the new changelog to options.output.
				475	Remove the merged entries if options.keep_entries is false.
				476	"""
				477	with open(options.input, 'rb') as input_file:
				478	changelog = ChangeLog(input_file, TextChangelogFormat)
				479	files_to_merge = list_files_to_merge(options)
				480	if not files_to_merge:
				481	sys.stderr.write('There are no pending changelog entries.\n')
				482	return
				483	for filename in files_to_merge:
				484	with open(filename, 'rb') as input_file:
				485	changelog.add_file(input_file)
				486	finish_output(changelog, options.output, options.input, files_to_merge)
				487	if not options.keep_entries:
				488	remove_merged_entries(files_to_merge)
				489
				490	def show_file_timestamps(options):
				491	"""List the files to merge and their timestamp.
				492
				493	This is only intended for debugging purposes.
				494	"""
				495	files = list_files_to_merge(options)
				496	for filename in files:
				497	ts = EntryFileSortKey(filename)
				498	print(ts.category, ts.datetime, filename)
				499
				500	def set_defaults(options):
				501	"""Add default values for missing options."""
				502	output_file = getattr(options, 'output', None)
				503	if output_file is None:
				504	options.output = options.input
				505	if getattr(options, 'keep_entries', None) is None:
				506	options.keep_entries = (output_file is not None)
				507
				508	def main():
				509	"""Command line entry point."""
				510	parser = argparse.ArgumentParser(description=__doc__)
				511	parser.add_argument('--dir', '-d', metavar='DIR',
				512	default='ChangeLog.d',
				513	help='Directory to read entries from'
				514	' (default: ChangeLog.d)')
				515	parser.add_argument('--input', '-i', metavar='FILE',
				516	default='ChangeLog',
				517	help='Existing changelog file to read from and augment'
				518	' (default: ChangeLog)')
				519	parser.add_argument('--keep-entries',
				520	action='store_true', dest='keep_entries', default=None,
				521	help='Keep the files containing entries'
				522	' (default: remove them if --output/-o is not specified)')
				523	parser.add_argument('--no-keep-entries',
				524	action='store_false', dest='keep_entries',
				525	help='Remove the files containing entries after they are merged'
				526	' (default: remove them if --output/-o is not specified)')
				527	parser.add_argument('--output', '-o', metavar='FILE',
				528	help='Output changelog file'
				529	' (default: overwrite the input)')
				530	parser.add_argument('--list-files-only',
				531	action='store_true',
				532	help=('Only list the files that would be processed '
				533	'(with some debugging information)'))
				534	options = parser.parse_args()
				535	set_defaults(options)
				536	if options.list_files_only:
				537	show_file_timestamps(options)
				538	return
				539	merge_entries(options)
				540
				541	if __name__ == '__main__':
				542	main()