Blame - scripts/assemble_changelog.py - mirror/mbed-tls

blob: 8bf6c7e1d5898c43fe8448079eba6bf89df7d58b [file] [log] [blame]

Gilles Peskine	96377d3	2020-04-21 18:36:17 +0200	[diff] [blame]	1	#!/usr/bin/env python3
				2
				3	"""Assemble Mbed TLS change log entries into the change log file.
				4
				5	Add changelog entries to the first level-2 section.
				6	Create a new level-2 section for unreleased changes if needed.
				7	Remove the input files unless --keep-entries is specified.
				8
				9	In each level-3 section, entries are sorted in chronological order
				10	(oldest first). From oldest to newest:
				11	* Merged entry files are sorted according to their merge date (date of
				12	the merge commit that brought the commit that created the file into
				13	the target branch).
				14	* Committed but unmerged entry files are sorted according to the date
				15	of the commit that adds them.
				16	* Uncommitted entry files are sorted according to their modification time.
				17
				18	You must run this program from within a git working directory.
				19	"""
				20
				21	# Copyright (C) 2019, Arm Limited, All Rights Reserved
Bence Szépkúti	4e9f712	2020-06-05 13:02:18 +0200	[diff] [blame]	22	# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
				23	#
				24	# This file is provided under the Apache License 2.0, or the
				25	# GNU General Public License v2.0 or later.
				26	#
				27	# **********
				28	# Apache License 2.0:
Gilles Peskine	96377d3	2020-04-21 18:36:17 +0200	[diff] [blame]	29	#
				30	# Licensed under the Apache License, Version 2.0 (the "License"); you may
				31	# not use this file except in compliance with the License.
				32	# You may obtain a copy of the License at
				33	#
				34	# http://www.apache.org/licenses/LICENSE-2.0
				35	#
				36	# Unless required by applicable law or agreed to in writing, software
				37	# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
				38	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				39	# See the License for the specific language governing permissions and
				40	# limitations under the License.
				41	#
Bence Szépkúti	4e9f712	2020-06-05 13:02:18 +0200	[diff] [blame]	42	# **********
				43	#
				44	# **********
				45	# GNU General Public License v2.0 or later:
				46	#
				47	# This program is free software; you can redistribute it and/or modify
				48	# it under the terms of the GNU General Public License as published by
				49	# the Free Software Foundation; either version 2 of the License, or
				50	# (at your option) any later version.
				51	#
				52	# This program is distributed in the hope that it will be useful,
				53	# but WITHOUT ANY WARRANTY; without even the implied warranty of
				54	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				55	# GNU General Public License for more details.
				56	#
				57	# You should have received a copy of the GNU General Public License along
				58	# with this program; if not, write to the Free Software Foundation, Inc.,
				59	# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
				60	#
				61	# **********
				62	#
Gilles Peskine	96377d3	2020-04-21 18:36:17 +0200	[diff] [blame]	63	# This file is part of Mbed TLS (https://tls.mbed.org)
				64
				65	import argparse
				66	from collections import OrderedDict, namedtuple
				67	import datetime
				68	import functools
				69	import glob
				70	import os
				71	import re
				72	import subprocess
				73	import sys
				74
				75	class InputFormatError(Exception):
				76	def __init__(self, filename, line_number, message, args, *kwargs):
				77	message = '{}:{}: {}'.format(filename, line_number,
				78	message.format(args, *kwargs))
				79	super().__init__(message)
				80
				81	class CategoryParseError(Exception):
				82	def __init__(self, line_offset, error_message):
				83	self.line_offset = line_offset
				84	self.error_message = error_message
				85	super().__init__('{}: {}'.format(line_offset, error_message))
				86
				87	class LostContent(Exception):
				88	def __init__(self, filename, line):
				89	message = ('Lost content from {}: "{}"'.format(filename, line))
				90	super().__init__(message)
				91
				92	# The category names we use in the changelog.
				93	# If you edit this, update ChangeLog.d/README.md.
				94	STANDARD_CATEGORIES = (
				95	b'API changes',
				96	b'Default behavior changes',
				97	b'Requirement changes',
				98	b'New deprecations',
				99	b'Removals',
				100	b'Features',
				101	b'Security',
				102	b'Bugfix',
				103	b'Changes',
				104	)
				105
				106	CategoryContent = namedtuple('CategoryContent', [
				107	'name', 'title_line', # Title text and line number of the title
				108	'body', 'body_line', # Body text and starting line number of the body
				109	])
				110
				111	class ChangelogFormat:
				112	"""Virtual class documenting how to write a changelog format class."""
				113
				114	@classmethod
				115	def extract_top_version(cls, changelog_file_content):
				116	"""Split out the top version section.
				117
				118	If the top version is already released, create a new top
				119	version section for an unreleased version.
				120
				121	Return ``(header, top_version_title, top_version_body, trailer)``
				122	where the "top version" is the existing top version section if it's
				123	for unreleased changes, and a newly created section otherwise.
				124	To assemble the changelog after modifying top_version_body,
				125	concatenate the four pieces.
				126	"""
				127	raise NotImplementedError
				128
				129	@classmethod
				130	def version_title_text(cls, version_title):
				131	"""Return the text of a formatted version section title."""
				132	raise NotImplementedError
				133
				134	@classmethod
				135	def split_categories(cls, version_body):
				136	"""Split a changelog version section body into categories.
				137
				138	Return a list of `CategoryContent` the name is category title
				139	without any formatting.
				140	"""
				141	raise NotImplementedError
				142
				143	@classmethod
				144	def format_category(cls, title, body):
				145	"""Construct the text of a category section from its title and body."""
				146	raise NotImplementedError
				147
				148	class TextChangelogFormat(ChangelogFormat):
				149	"""The traditional Mbed TLS changelog format."""
				150
				151	_unreleased_version_text = b'= mbed TLS x.x.x branch released xxxx-xx-xx'
				152	@classmethod
				153	def is_released_version(cls, title):
				154	# Look for an incomplete release date
				155	return not re.search(br'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title)
				156
				157	_top_version_re = re.compile(br'(?:\A\|\n)(=[^\n]\n+)(.?\n)(?:=\|$)',
				158	re.DOTALL)
				159	@classmethod
				160	def extract_top_version(cls, changelog_file_content):
				161	"""A version section starts with a line starting with '='."""
				162	m = re.search(cls._top_version_re, changelog_file_content)
				163	top_version_start = m.start(1)
				164	top_version_end = m.end(2)
				165	top_version_title = m.group(1)
				166	top_version_body = m.group(2)
				167	if cls.is_released_version(top_version_title):
				168	top_version_end = top_version_start
				169	top_version_title = cls._unreleased_version_text + b'\n\n'
				170	top_version_body = b''
				171	return (changelog_file_content[:top_version_start],
				172	top_version_title, top_version_body,
				173	changelog_file_content[top_version_end:])
				174
				175	@classmethod
				176	def version_title_text(cls, version_title):
				177	return re.sub(br'\n.*', version_title, re.DOTALL)
				178
				179	_category_title_re = re.compile(br'(^\w.*)\n+', re.MULTILINE)
				180	@classmethod
				181	def split_categories(cls, version_body):
				182	"""A category title is a line with the title in column 0."""
				183	if not version_body:
				184	return []
				185	title_matches = list(re.finditer(cls._category_title_re, version_body))
				186	if not title_matches or title_matches[0].start() != 0:
				187	# There is junk before the first category.
				188	raise CategoryParseError(0, 'Junk found where category expected')
				189	title_starts = [m.start(1) for m in title_matches]
				190	body_starts = [m.end(0) for m in title_matches]
				191	body_ends = title_starts[1:] + [len(version_body)]
				192	bodies = [version_body[body_start:body_end].rstrip(b'\n') + b'\n'
				193	for (body_start, body_end) in zip(body_starts, body_ends)]
				194	title_lines = [version_body[:pos].count(b'\n') for pos in title_starts]
				195	body_lines = [version_body[:pos].count(b'\n') for pos in body_starts]
				196	return [CategoryContent(title_match.group(1), title_line,
				197	body, body_line)
				198	for title_match, title_line, body, body_line
				199	in zip(title_matches, title_lines, bodies, body_lines)]
				200
				201	@classmethod
				202	def format_category(cls, title, body):
				203	# `split_categories` ensures that each body ends with a newline.
				204	# Make sure that there is additionally a blank line between categories.
				205	if not body.endswith(b'\n\n'):
				206	body += b'\n'
				207	return title + b'\n' + body
				208
				209	class ChangeLog:
				210	"""An Mbed TLS changelog.
				211
				212	A changelog file consists of some header text followed by one or
				213	more version sections. The version sections are in reverse
				214	chronological order. Each version section consists of a title and a body.
				215
				216	The body of a version section consists of zero or more category
				217	subsections. Each category subsection consists of a title and a body.
				218
				219	A changelog entry file has the same format as the body of a version section.
				220
				221	A `ChangelogFormat` object defines the concrete syntax of the changelog.
				222	Entry files must have the same format as the changelog file.
				223	"""
				224
				225	# Only accept dotted version numbers (e.g. "3.1", not "3").
				226	# Refuse ".x" in a version number where x is a letter: this indicates
				227	# a version that is not yet released. Something like "3.1a" is accepted.
				228	_version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
				229	_incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
				230
				231	def add_categories_from_text(self, filename, line_offset,
				232	text, allow_unknown_category):
				233	"""Parse a version section or entry file."""
				234	try:
				235	categories = self.format.split_categories(text)
				236	except CategoryParseError as e:
				237	raise InputFormatError(filename, line_offset + e.line_offset,
				238	e.error_message)
				239	for category in categories:
				240	if not allow_unknown_category and \
				241	category.name not in self.categories:
				242	raise InputFormatError(filename,
				243	line_offset + category.title_line,
				244	'Unknown category: "{}"',
				245	category.name.decode('utf8'))
				246	self.categories[category.name] += category.body
				247
				248	def __init__(self, input_stream, changelog_format):
				249	"""Create a changelog object.
				250
				251	Populate the changelog object from the content of the file
				252	input_stream.
				253	"""
				254	self.format = changelog_format
				255	whole_file = input_stream.read()
				256	(self.header,
				257	self.top_version_title, top_version_body,
				258	self.trailer) = self.format.extract_top_version(whole_file)
				259	# Split the top version section into categories.
				260	self.categories = OrderedDict()
				261	for category in STANDARD_CATEGORIES:
				262	self.categories[category] = b''
				263	offset = (self.header + self.top_version_title).count(b'\n') + 1
				264	self.add_categories_from_text(input_stream.name, offset,
				265	top_version_body, True)
				266
				267	def add_file(self, input_stream):
				268	"""Add changelog entries from a file.
				269	"""
				270	self.add_categories_from_text(input_stream.name, 1,
				271	input_stream.read(), False)
				272
				273	def write(self, filename):
				274	"""Write the changelog to the specified file.
				275	"""
				276	with open(filename, 'wb') as out:
				277	out.write(self.header)
				278	out.write(self.top_version_title)
				279	for title, body in self.categories.items():
				280	if not body:
				281	continue
				282	out.write(self.format.format_category(title, body))
				283	out.write(self.trailer)
				284
				285
				286	@functools.total_ordering
				287	class EntryFileSortKey:
				288	"""This classes defines an ordering on changelog entry files: older < newer.
				289
				290	* Merged entry files are sorted according to their merge date (date of
				291	the merge commit that brought the commit that created the file into
				292	the target branch).
				293	* Committed but unmerged entry files are sorted according to the date
				294	of the commit that adds them.
				295	* Uncommitted entry files are sorted according to their modification time.
				296
				297	This class assumes that the file is in a git working directory with
				298	the target branch checked out.
				299	"""
				300
				301	# Categories of files. A lower number is considered older.
				302	MERGED = 0
				303	COMMITTED = 1
				304	LOCAL = 2
				305
				306	@staticmethod
				307	def creation_hash(filename):
				308	"""Return the git commit id at which the given file was created.
				309
				310	Return None if the file was never checked into git.
				311	"""
				312	hashes = subprocess.check_output(['git', 'log', '--format=%H',
				313	'--follow',
				314	'--', filename])
				315	m = re.search(b'(.+)$', hashes)
				316	if not m:
				317	# The git output is empty. This means that the file was
				318	# never checked in.
				319	return None
				320	# The last commit in the log is the oldest one, which is when the
				321	# file was created.
				322	return m.group(0)
				323
				324	@staticmethod
				325	def list_merges(some_hash, target, *options):
				326	"""List merge commits from some_hash to target.
				327
				328	Pass options to git to select which commits are included.
				329	"""
				330	text = subprocess.check_output(['git', 'rev-list',
				331	'--merges', *options,
				332	b'..'.join([some_hash, target])])
				333	return text.rstrip(b'\n').split(b'\n')
				334
				335	@classmethod
				336	def merge_hash(cls, some_hash):
				337	"""Return the git commit id at which the given commit was merged.
				338
				339	Return None if the given commit was never merged.
				340	"""
				341	target = b'HEAD'
				342	# List the merges from some_hash to the target in two ways.
				343	# The ancestry list is the ones that are both descendants of
				344	# some_hash and ancestors of the target.
				345	ancestry = frozenset(cls.list_merges(some_hash, target,
				346	'--ancestry-path'))
				347	# The first_parents list only contains merges that are directly
				348	# on the target branch. We want it in reverse order (oldest first).
				349	first_parents = cls.list_merges(some_hash, target,
				350	'--first-parent', '--reverse')
				351	# Look for the oldest merge commit that's both on the direct path
				352	# and directly on the target branch. That's the place where some_hash
				353	# was merged on the target branch. See
				354	# https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
				355	for commit in first_parents:
				356	if commit in ancestry:
				357	return commit
				358	return None
				359
				360	@staticmethod
				361	def commit_timestamp(commit_id):
				362	"""Return the timestamp of the given commit."""
				363	text = subprocess.check_output(['git', 'show', '-s',
				364	'--format=%ct',
				365	commit_id])
				366	return datetime.datetime.utcfromtimestamp(int(text))
				367
				368	@staticmethod
				369	def file_timestamp(filename):
				370	"""Return the modification timestamp of the given file."""
				371	mtime = os.stat(filename).st_mtime
				372	return datetime.datetime.fromtimestamp(mtime)
				373
				374	def __init__(self, filename):
				375	"""Determine position of the file in the changelog entry order.
				376
				377	This constructor returns an object that can be used with comparison
				378	operators, with `sort` and `sorted`, etc. Older entries are sorted
				379	before newer entries.
				380	"""
				381	self.filename = filename
				382	creation_hash = self.creation_hash(filename)
				383	if not creation_hash:
				384	self.category = self.LOCAL
				385	self.datetime = self.file_timestamp(filename)
				386	return
				387	merge_hash = self.merge_hash(creation_hash)
				388	if not merge_hash:
				389	self.category = self.COMMITTED
				390	self.datetime = self.commit_timestamp(creation_hash)
				391	return
				392	self.category = self.MERGED
				393	self.datetime = self.commit_timestamp(merge_hash)
				394
				395	def sort_key(self):
				396	""""Return a concrete sort key for this entry file sort key object.
				397
				398	``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``.
				399	"""
				400	return (self.category, self.datetime, self.filename)
				401
				402	def __eq__(self, other):
				403	return self.sort_key() == other.sort_key()
				404
				405	def __lt__(self, other):
				406	return self.sort_key() < other.sort_key()
				407
				408
				409	def check_output(generated_output_file, main_input_file, merged_files):
				410	"""Make sanity checks on the generated output.
				411
				412	The intent of these sanity checks is to have reasonable confidence
				413	that no content has been lost.
				414
				415	The sanity check is that every line that is present in an input file
				416	is also present in an output file. This is not perfect but good enough
				417	for now.
				418	"""
				419	generated_output = set(open(generated_output_file, 'rb'))
				420	for line in open(main_input_file, 'rb'):
				421	if line not in generated_output:
				422	raise LostContent('original file', line)
				423	for merged_file in merged_files:
				424	for line in open(merged_file, 'rb'):
				425	if line not in generated_output:
				426	raise LostContent(merged_file, line)
				427
				428	def finish_output(changelog, output_file, input_file, merged_files):
				429	"""Write the changelog to the output file.
				430
				431	The input file and the list of merged files are used only for sanity
				432	checks on the output.
				433	"""
				434	if os.path.exists(output_file) and not os.path.isfile(output_file):
				435	# The output is a non-regular file (e.g. pipe). Write to it directly.
				436	output_temp = output_file
				437	else:
				438	# The output is a regular file. Write to a temporary file,
				439	# then move it into place atomically.
				440	output_temp = output_file + '.tmp'
				441	changelog.write(output_temp)
				442	check_output(output_temp, input_file, merged_files)
				443	if output_temp != output_file:
				444	os.rename(output_temp, output_file)
				445
				446	def remove_merged_entries(files_to_remove):
				447	for filename in files_to_remove:
				448	os.remove(filename)
				449
				450	def list_files_to_merge(options):
				451	"""List the entry files to merge, oldest first.
				452
				453	"Oldest" is defined by `EntryFileSortKey`.
				454	"""
				455	files_to_merge = glob.glob(os.path.join(options.dir, '*.txt'))
				456	files_to_merge.sort(key=EntryFileSortKey)
				457	return files_to_merge
				458
				459	def merge_entries(options):
				460	"""Merge changelog entries into the changelog file.
				461
				462	Read the changelog file from options.input.
				463	Read entries to merge from the directory options.dir.
				464	Write the new changelog to options.output.
				465	Remove the merged entries if options.keep_entries is false.
				466	"""
				467	with open(options.input, 'rb') as input_file:
				468	changelog = ChangeLog(input_file, TextChangelogFormat)
				469	files_to_merge = list_files_to_merge(options)
				470	if not files_to_merge:
				471	sys.stderr.write('There are no pending changelog entries.\n')
				472	return
				473	for filename in files_to_merge:
				474	with open(filename, 'rb') as input_file:
				475	changelog.add_file(input_file)
				476	finish_output(changelog, options.output, options.input, files_to_merge)
				477	if not options.keep_entries:
				478	remove_merged_entries(files_to_merge)
				479
				480	def show_file_timestamps(options):
				481	"""List the files to merge and their timestamp.
				482
				483	This is only intended for debugging purposes.
				484	"""
				485	files = list_files_to_merge(options)
				486	for filename in files:
				487	ts = EntryFileSortKey(filename)
				488	print(ts.category, ts.datetime, filename)
				489
				490	def set_defaults(options):
				491	"""Add default values for missing options."""
				492	output_file = getattr(options, 'output', None)
				493	if output_file is None:
				494	options.output = options.input
				495	if getattr(options, 'keep_entries', None) is None:
				496	options.keep_entries = (output_file is not None)
				497
				498	def main():
				499	"""Command line entry point."""
				500	parser = argparse.ArgumentParser(description=__doc__)
				501	parser.add_argument('--dir', '-d', metavar='DIR',
				502	default='ChangeLog.d',
				503	help='Directory to read entries from'
				504	' (default: ChangeLog.d)')
				505	parser.add_argument('--input', '-i', metavar='FILE',
				506	default='ChangeLog',
				507	help='Existing changelog file to read from and augment'
				508	' (default: ChangeLog)')
				509	parser.add_argument('--keep-entries',
				510	action='store_true', dest='keep_entries', default=None,
				511	help='Keep the files containing entries'
				512	' (default: remove them if --output/-o is not specified)')
				513	parser.add_argument('--no-keep-entries',
				514	action='store_false', dest='keep_entries',
				515	help='Remove the files containing entries after they are merged'
				516	' (default: remove them if --output/-o is not specified)')
				517	parser.add_argument('--output', '-o', metavar='FILE',
				518	help='Output changelog file'
				519	' (default: overwrite the input)')
				520	parser.add_argument('--list-files-only',
				521	action='store_true',
				522	help=('Only list the files that would be processed '
				523	'(with some debugging information)'))
				524	options = parser.parse_args()
				525	set_defaults(options)
				526	if options.list_files_only:
				527	show_file_timestamps(options)
				528	return
				529	merge_entries(options)
				530
				531	if __name__ == '__main__':
				532	main()