Blame - scripts/assemble_changelog.py - mirror/mbed-tls

blob: 02bae25b70163d67b1385a4eda7a9b09f5869d35 [file] [log] [blame]

Gilles Peskine	96377d3	2020-04-21 18:36:17 +0200	[diff] [blame]	1	#!/usr/bin/env python3
				2
				3	"""Assemble Mbed TLS change log entries into the change log file.
				4
				5	Add changelog entries to the first level-2 section.
				6	Create a new level-2 section for unreleased changes if needed.
				7	Remove the input files unless --keep-entries is specified.
				8
				9	In each level-3 section, entries are sorted in chronological order
				10	(oldest first). From oldest to newest:
				11	* Merged entry files are sorted according to their merge date (date of
				12	the merge commit that brought the commit that created the file into
				13	the target branch).
				14	* Committed but unmerged entry files are sorted according to the date
				15	of the commit that adds them.
				16	* Uncommitted entry files are sorted according to their modification time.
				17
				18	You must run this program from within a git working directory.
				19	"""
				20
Bence Szépkúti	44bfbe3	2020-08-19 16:54:51 +0200	[diff] [blame^]	21	# Copyright The Mbed TLS Contributors
Bence Szépkúti	4e9f712	2020-06-05 13:02:18 +0200	[diff] [blame]	22	# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
				23	#
				24	# This file is provided under the Apache License 2.0, or the
				25	# GNU General Public License v2.0 or later.
				26	#
				27	# **********
				28	# Apache License 2.0:
Gilles Peskine	96377d3	2020-04-21 18:36:17 +0200	[diff] [blame]	29	#
				30	# Licensed under the Apache License, Version 2.0 (the "License"); you may
				31	# not use this file except in compliance with the License.
				32	# You may obtain a copy of the License at
				33	#
				34	# http://www.apache.org/licenses/LICENSE-2.0
				35	#
				36	# Unless required by applicable law or agreed to in writing, software
				37	# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
				38	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				39	# See the License for the specific language governing permissions and
				40	# limitations under the License.
				41	#
Bence Szépkúti	4e9f712	2020-06-05 13:02:18 +0200	[diff] [blame]	42	# **********
				43	#
				44	# **********
				45	# GNU General Public License v2.0 or later:
				46	#
				47	# This program is free software; you can redistribute it and/or modify
				48	# it under the terms of the GNU General Public License as published by
				49	# the Free Software Foundation; either version 2 of the License, or
				50	# (at your option) any later version.
				51	#
				52	# This program is distributed in the hope that it will be useful,
				53	# but WITHOUT ANY WARRANTY; without even the implied warranty of
				54	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				55	# GNU General Public License for more details.
				56	#
				57	# You should have received a copy of the GNU General Public License along
				58	# with this program; if not, write to the Free Software Foundation, Inc.,
				59	# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
				60	#
				61	# **********
Gilles Peskine	96377d3	2020-04-21 18:36:17 +0200	[diff] [blame]	62
				63	import argparse
				64	from collections import OrderedDict, namedtuple
				65	import datetime
				66	import functools
				67	import glob
				68	import os
				69	import re
				70	import subprocess
				71	import sys
				72
				73	class InputFormatError(Exception):
				74	def __init__(self, filename, line_number, message, args, *kwargs):
				75	message = '{}:{}: {}'.format(filename, line_number,
				76	message.format(args, *kwargs))
				77	super().__init__(message)
				78
				79	class CategoryParseError(Exception):
				80	def __init__(self, line_offset, error_message):
				81	self.line_offset = line_offset
				82	self.error_message = error_message
				83	super().__init__('{}: {}'.format(line_offset, error_message))
				84
				85	class LostContent(Exception):
				86	def __init__(self, filename, line):
				87	message = ('Lost content from {}: "{}"'.format(filename, line))
				88	super().__init__(message)
				89
				90	# The category names we use in the changelog.
				91	# If you edit this, update ChangeLog.d/README.md.
				92	STANDARD_CATEGORIES = (
				93	b'API changes',
				94	b'Default behavior changes',
				95	b'Requirement changes',
				96	b'New deprecations',
				97	b'Removals',
				98	b'Features',
				99	b'Security',
				100	b'Bugfix',
				101	b'Changes',
				102	)
				103
				104	CategoryContent = namedtuple('CategoryContent', [
				105	'name', 'title_line', # Title text and line number of the title
				106	'body', 'body_line', # Body text and starting line number of the body
				107	])
				108
				109	class ChangelogFormat:
				110	"""Virtual class documenting how to write a changelog format class."""
				111
				112	@classmethod
				113	def extract_top_version(cls, changelog_file_content):
				114	"""Split out the top version section.
				115
				116	If the top version is already released, create a new top
				117	version section for an unreleased version.
				118
				119	Return ``(header, top_version_title, top_version_body, trailer)``
				120	where the "top version" is the existing top version section if it's
				121	for unreleased changes, and a newly created section otherwise.
				122	To assemble the changelog after modifying top_version_body,
				123	concatenate the four pieces.
				124	"""
				125	raise NotImplementedError
				126
				127	@classmethod
				128	def version_title_text(cls, version_title):
				129	"""Return the text of a formatted version section title."""
				130	raise NotImplementedError
				131
				132	@classmethod
				133	def split_categories(cls, version_body):
				134	"""Split a changelog version section body into categories.
				135
				136	Return a list of `CategoryContent` the name is category title
				137	without any formatting.
				138	"""
				139	raise NotImplementedError
				140
				141	@classmethod
				142	def format_category(cls, title, body):
				143	"""Construct the text of a category section from its title and body."""
				144	raise NotImplementedError
				145
				146	class TextChangelogFormat(ChangelogFormat):
				147	"""The traditional Mbed TLS changelog format."""
				148
				149	_unreleased_version_text = b'= mbed TLS x.x.x branch released xxxx-xx-xx'
				150	@classmethod
				151	def is_released_version(cls, title):
				152	# Look for an incomplete release date
				153	return not re.search(br'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title)
				154
				155	_top_version_re = re.compile(br'(?:\A\|\n)(=[^\n]\n+)(.?\n)(?:=\|$)',
				156	re.DOTALL)
				157	@classmethod
				158	def extract_top_version(cls, changelog_file_content):
				159	"""A version section starts with a line starting with '='."""
				160	m = re.search(cls._top_version_re, changelog_file_content)
				161	top_version_start = m.start(1)
				162	top_version_end = m.end(2)
				163	top_version_title = m.group(1)
				164	top_version_body = m.group(2)
				165	if cls.is_released_version(top_version_title):
				166	top_version_end = top_version_start
				167	top_version_title = cls._unreleased_version_text + b'\n\n'
				168	top_version_body = b''
				169	return (changelog_file_content[:top_version_start],
				170	top_version_title, top_version_body,
				171	changelog_file_content[top_version_end:])
				172
				173	@classmethod
				174	def version_title_text(cls, version_title):
				175	return re.sub(br'\n.*', version_title, re.DOTALL)
				176
				177	_category_title_re = re.compile(br'(^\w.*)\n+', re.MULTILINE)
				178	@classmethod
				179	def split_categories(cls, version_body):
				180	"""A category title is a line with the title in column 0."""
				181	if not version_body:
				182	return []
				183	title_matches = list(re.finditer(cls._category_title_re, version_body))
				184	if not title_matches or title_matches[0].start() != 0:
				185	# There is junk before the first category.
				186	raise CategoryParseError(0, 'Junk found where category expected')
				187	title_starts = [m.start(1) for m in title_matches]
				188	body_starts = [m.end(0) for m in title_matches]
				189	body_ends = title_starts[1:] + [len(version_body)]
				190	bodies = [version_body[body_start:body_end].rstrip(b'\n') + b'\n'
				191	for (body_start, body_end) in zip(body_starts, body_ends)]
				192	title_lines = [version_body[:pos].count(b'\n') for pos in title_starts]
				193	body_lines = [version_body[:pos].count(b'\n') for pos in body_starts]
				194	return [CategoryContent(title_match.group(1), title_line,
				195	body, body_line)
				196	for title_match, title_line, body, body_line
				197	in zip(title_matches, title_lines, bodies, body_lines)]
				198
				199	@classmethod
				200	def format_category(cls, title, body):
				201	# `split_categories` ensures that each body ends with a newline.
				202	# Make sure that there is additionally a blank line between categories.
				203	if not body.endswith(b'\n\n'):
				204	body += b'\n'
				205	return title + b'\n' + body
				206
				207	class ChangeLog:
				208	"""An Mbed TLS changelog.
				209
				210	A changelog file consists of some header text followed by one or
				211	more version sections. The version sections are in reverse
				212	chronological order. Each version section consists of a title and a body.
				213
				214	The body of a version section consists of zero or more category
				215	subsections. Each category subsection consists of a title and a body.
				216
				217	A changelog entry file has the same format as the body of a version section.
				218
				219	A `ChangelogFormat` object defines the concrete syntax of the changelog.
				220	Entry files must have the same format as the changelog file.
				221	"""
				222
				223	# Only accept dotted version numbers (e.g. "3.1", not "3").
				224	# Refuse ".x" in a version number where x is a letter: this indicates
				225	# a version that is not yet released. Something like "3.1a" is accepted.
				226	_version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
				227	_incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
				228
				229	def add_categories_from_text(self, filename, line_offset,
				230	text, allow_unknown_category):
				231	"""Parse a version section or entry file."""
				232	try:
				233	categories = self.format.split_categories(text)
				234	except CategoryParseError as e:
				235	raise InputFormatError(filename, line_offset + e.line_offset,
				236	e.error_message)
				237	for category in categories:
				238	if not allow_unknown_category and \
				239	category.name not in self.categories:
				240	raise InputFormatError(filename,
				241	line_offset + category.title_line,
				242	'Unknown category: "{}"',
				243	category.name.decode('utf8'))
				244	self.categories[category.name] += category.body
				245
				246	def __init__(self, input_stream, changelog_format):
				247	"""Create a changelog object.
				248
				249	Populate the changelog object from the content of the file
				250	input_stream.
				251	"""
				252	self.format = changelog_format
				253	whole_file = input_stream.read()
				254	(self.header,
				255	self.top_version_title, top_version_body,
				256	self.trailer) = self.format.extract_top_version(whole_file)
				257	# Split the top version section into categories.
				258	self.categories = OrderedDict()
				259	for category in STANDARD_CATEGORIES:
				260	self.categories[category] = b''
				261	offset = (self.header + self.top_version_title).count(b'\n') + 1
				262	self.add_categories_from_text(input_stream.name, offset,
				263	top_version_body, True)
				264
				265	def add_file(self, input_stream):
				266	"""Add changelog entries from a file.
				267	"""
				268	self.add_categories_from_text(input_stream.name, 1,
				269	input_stream.read(), False)
				270
				271	def write(self, filename):
				272	"""Write the changelog to the specified file.
				273	"""
				274	with open(filename, 'wb') as out:
				275	out.write(self.header)
				276	out.write(self.top_version_title)
				277	for title, body in self.categories.items():
				278	if not body:
				279	continue
				280	out.write(self.format.format_category(title, body))
				281	out.write(self.trailer)
				282
				283
				284	@functools.total_ordering
				285	class EntryFileSortKey:
				286	"""This classes defines an ordering on changelog entry files: older < newer.
				287
				288	* Merged entry files are sorted according to their merge date (date of
				289	the merge commit that brought the commit that created the file into
				290	the target branch).
				291	* Committed but unmerged entry files are sorted according to the date
				292	of the commit that adds them.
				293	* Uncommitted entry files are sorted according to their modification time.
				294
				295	This class assumes that the file is in a git working directory with
				296	the target branch checked out.
				297	"""
				298
				299	# Categories of files. A lower number is considered older.
				300	MERGED = 0
				301	COMMITTED = 1
				302	LOCAL = 2
				303
				304	@staticmethod
				305	def creation_hash(filename):
				306	"""Return the git commit id at which the given file was created.
				307
				308	Return None if the file was never checked into git.
				309	"""
				310	hashes = subprocess.check_output(['git', 'log', '--format=%H',
				311	'--follow',
				312	'--', filename])
				313	m = re.search(b'(.+)$', hashes)
				314	if not m:
				315	# The git output is empty. This means that the file was
				316	# never checked in.
				317	return None
				318	# The last commit in the log is the oldest one, which is when the
				319	# file was created.
				320	return m.group(0)
				321
				322	@staticmethod
				323	def list_merges(some_hash, target, *options):
				324	"""List merge commits from some_hash to target.
				325
				326	Pass options to git to select which commits are included.
				327	"""
				328	text = subprocess.check_output(['git', 'rev-list',
				329	'--merges', *options,
				330	b'..'.join([some_hash, target])])
				331	return text.rstrip(b'\n').split(b'\n')
				332
				333	@classmethod
				334	def merge_hash(cls, some_hash):
				335	"""Return the git commit id at which the given commit was merged.
				336
				337	Return None if the given commit was never merged.
				338	"""
				339	target = b'HEAD'
				340	# List the merges from some_hash to the target in two ways.
				341	# The ancestry list is the ones that are both descendants of
				342	# some_hash and ancestors of the target.
				343	ancestry = frozenset(cls.list_merges(some_hash, target,
				344	'--ancestry-path'))
				345	# The first_parents list only contains merges that are directly
				346	# on the target branch. We want it in reverse order (oldest first).
				347	first_parents = cls.list_merges(some_hash, target,
				348	'--first-parent', '--reverse')
				349	# Look for the oldest merge commit that's both on the direct path
				350	# and directly on the target branch. That's the place where some_hash
				351	# was merged on the target branch. See
				352	# https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
				353	for commit in first_parents:
				354	if commit in ancestry:
				355	return commit
				356	return None
				357
				358	@staticmethod
				359	def commit_timestamp(commit_id):
				360	"""Return the timestamp of the given commit."""
				361	text = subprocess.check_output(['git', 'show', '-s',
				362	'--format=%ct',
				363	commit_id])
				364	return datetime.datetime.utcfromtimestamp(int(text))
				365
				366	@staticmethod
				367	def file_timestamp(filename):
				368	"""Return the modification timestamp of the given file."""
				369	mtime = os.stat(filename).st_mtime
				370	return datetime.datetime.fromtimestamp(mtime)
				371
				372	def __init__(self, filename):
				373	"""Determine position of the file in the changelog entry order.
				374
				375	This constructor returns an object that can be used with comparison
				376	operators, with `sort` and `sorted`, etc. Older entries are sorted
				377	before newer entries.
				378	"""
				379	self.filename = filename
				380	creation_hash = self.creation_hash(filename)
				381	if not creation_hash:
				382	self.category = self.LOCAL
				383	self.datetime = self.file_timestamp(filename)
				384	return
				385	merge_hash = self.merge_hash(creation_hash)
				386	if not merge_hash:
				387	self.category = self.COMMITTED
				388	self.datetime = self.commit_timestamp(creation_hash)
				389	return
				390	self.category = self.MERGED
				391	self.datetime = self.commit_timestamp(merge_hash)
				392
				393	def sort_key(self):
				394	""""Return a concrete sort key for this entry file sort key object.
				395
				396	``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``.
				397	"""
				398	return (self.category, self.datetime, self.filename)
				399
				400	def __eq__(self, other):
				401	return self.sort_key() == other.sort_key()
				402
				403	def __lt__(self, other):
				404	return self.sort_key() < other.sort_key()
				405
				406
				407	def check_output(generated_output_file, main_input_file, merged_files):
				408	"""Make sanity checks on the generated output.
				409
				410	The intent of these sanity checks is to have reasonable confidence
				411	that no content has been lost.
				412
				413	The sanity check is that every line that is present in an input file
				414	is also present in an output file. This is not perfect but good enough
				415	for now.
				416	"""
				417	generated_output = set(open(generated_output_file, 'rb'))
				418	for line in open(main_input_file, 'rb'):
				419	if line not in generated_output:
				420	raise LostContent('original file', line)
				421	for merged_file in merged_files:
				422	for line in open(merged_file, 'rb'):
				423	if line not in generated_output:
				424	raise LostContent(merged_file, line)
				425
				426	def finish_output(changelog, output_file, input_file, merged_files):
				427	"""Write the changelog to the output file.
				428
				429	The input file and the list of merged files are used only for sanity
				430	checks on the output.
				431	"""
				432	if os.path.exists(output_file) and not os.path.isfile(output_file):
				433	# The output is a non-regular file (e.g. pipe). Write to it directly.
				434	output_temp = output_file
				435	else:
				436	# The output is a regular file. Write to a temporary file,
				437	# then move it into place atomically.
				438	output_temp = output_file + '.tmp'
				439	changelog.write(output_temp)
				440	check_output(output_temp, input_file, merged_files)
				441	if output_temp != output_file:
				442	os.rename(output_temp, output_file)
				443
				444	def remove_merged_entries(files_to_remove):
				445	for filename in files_to_remove:
				446	os.remove(filename)
				447
				448	def list_files_to_merge(options):
				449	"""List the entry files to merge, oldest first.
				450
				451	"Oldest" is defined by `EntryFileSortKey`.
				452	"""
				453	files_to_merge = glob.glob(os.path.join(options.dir, '*.txt'))
				454	files_to_merge.sort(key=EntryFileSortKey)
				455	return files_to_merge
				456
				457	def merge_entries(options):
				458	"""Merge changelog entries into the changelog file.
				459
				460	Read the changelog file from options.input.
				461	Read entries to merge from the directory options.dir.
				462	Write the new changelog to options.output.
				463	Remove the merged entries if options.keep_entries is false.
				464	"""
				465	with open(options.input, 'rb') as input_file:
				466	changelog = ChangeLog(input_file, TextChangelogFormat)
				467	files_to_merge = list_files_to_merge(options)
				468	if not files_to_merge:
				469	sys.stderr.write('There are no pending changelog entries.\n')
				470	return
				471	for filename in files_to_merge:
				472	with open(filename, 'rb') as input_file:
				473	changelog.add_file(input_file)
				474	finish_output(changelog, options.output, options.input, files_to_merge)
				475	if not options.keep_entries:
				476	remove_merged_entries(files_to_merge)
				477
				478	def show_file_timestamps(options):
				479	"""List the files to merge and their timestamp.
				480
				481	This is only intended for debugging purposes.
				482	"""
				483	files = list_files_to_merge(options)
				484	for filename in files:
				485	ts = EntryFileSortKey(filename)
				486	print(ts.category, ts.datetime, filename)
				487
				488	def set_defaults(options):
				489	"""Add default values for missing options."""
				490	output_file = getattr(options, 'output', None)
				491	if output_file is None:
				492	options.output = options.input
				493	if getattr(options, 'keep_entries', None) is None:
				494	options.keep_entries = (output_file is not None)
				495
				496	def main():
				497	"""Command line entry point."""
				498	parser = argparse.ArgumentParser(description=__doc__)
				499	parser.add_argument('--dir', '-d', metavar='DIR',
				500	default='ChangeLog.d',
				501	help='Directory to read entries from'
				502	' (default: ChangeLog.d)')
				503	parser.add_argument('--input', '-i', metavar='FILE',
				504	default='ChangeLog',
				505	help='Existing changelog file to read from and augment'
				506	' (default: ChangeLog)')
				507	parser.add_argument('--keep-entries',
				508	action='store_true', dest='keep_entries', default=None,
				509	help='Keep the files containing entries'
				510	' (default: remove them if --output/-o is not specified)')
				511	parser.add_argument('--no-keep-entries',
				512	action='store_false', dest='keep_entries',
				513	help='Remove the files containing entries after they are merged'
				514	' (default: remove them if --output/-o is not specified)')
				515	parser.add_argument('--output', '-o', metavar='FILE',
				516	help='Output changelog file'
				517	' (default: overwrite the input)')
				518	parser.add_argument('--list-files-only',
				519	action='store_true',
				520	help=('Only list the files that would be processed '
				521	'(with some debugging information)'))
				522	options = parser.parse_args()
				523	set_defaults(options)
				524	if options.list_files_only:
				525	show_file_timestamps(options)
				526	return
				527	merge_entries(options)
				528
				529	if __name__ == '__main__':
				530	main()