Sort changelog entries by their merge date
Changelog entry files were listed in reverse alphabetical order of the
file name, by happenstance. Now, changelog entry files are listed in
the order in which the changes were merged. More precisely: look for
the git commit where the entry file was created, and look where this
commit was merged into the current branch. List older merges first.
List never-merged commits in date order after all the merged ones.
List never-committed files in file timestamp order after all the
committed ones.
Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
diff --git a/scripts/assemble_changelog.py b/scripts/assemble_changelog.py
index adea8bd..928f73b 100755
--- a/scripts/assemble_changelog.py
+++ b/scripts/assemble_changelog.py
@@ -26,9 +26,12 @@
import argparse
from collections import OrderedDict
+import datetime
+import functools
import glob
import os
import re
+import subprocess
import sys
class InputFormatError(Exception):
@@ -219,6 +222,115 @@
for line in self.trailer:
out.write(line)
+
+@functools.total_ordering
+class FileMergeTimestamp:
+ """A timestamp indicating when a file was merged.
+
+ If file1 was merged before file2, then
+ FileMergeTimestamp(file1) <= FileMergeTimestamp(file2).
+ """
+
+ # Categories of files. A lower number is considered older.
+ MERGED = 0
+ COMMITTED = 1
+ LOCAL = 2
+
+ @staticmethod
+ def creation_hash(filename):
+ """Return the git commit id at which the given file was created.
+
+ Return None if the file was never checked into git.
+ """
+ hashes = subprocess.check_output(['git', 'log', '--format=%H', '--', filename])
+ if not hashes:
+ # The file was never checked in.
+ return None
+ hashes = hashes.rstrip(b'\n')
+ last_hash = hashes[hashes.rfind(b'\n')+1:]
+ return last_hash
+
+ @staticmethod
+ def list_merges(some_hash, target, *options):
+ """List merge commits from some_hash to target.
+
+ Pass options to git to select which commits are included.
+ """
+ text = subprocess.check_output(['git', 'rev-list',
+ '--merges', *options,
+ b'..'.join([some_hash, target])])
+ return text.rstrip(b'\n').split(b'\n')
+
+ @classmethod
+ def merge_hash(cls, some_hash):
+ """Return the git commit id at which the given commit was merged.
+
+ Return None if the given commit was never merged.
+ """
+ target = b'HEAD'
+ # List the merges from some_hash to the target in two ways.
+ # The ancestry list is the ones that are both descendants of
+ # some_hash and ancestors of the target.
+ ancestry = frozenset(cls.list_merges(some_hash, target,
+ '--ancestry-path'))
+ # The first_parents list only contains merges that are directly
+ # on the target branch. We want it in reverse order (oldest first).
+ first_parents = cls.list_merges(some_hash, target,
+ '--first-parent', '--reverse')
+ # Look for the oldest merge commit that's both on the direct path
+ # and directly on the target branch. That's the place where some_hash
+ # was merged on the target branch. See
+ # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
+ for commit in first_parents:
+ if commit in ancestry:
+ return commit
+ return None
+
+ @staticmethod
+ def commit_timestamp(commit_id):
+ """Return the timestamp of the given commit."""
+ text = subprocess.check_output(['git', 'show', '-s',
+ '--format=%ct',
+ commit_id])
+ return datetime.datetime.utcfromtimestamp(int(text))
+
+ @staticmethod
+ def file_timestamp(filename):
+ """Return the modification timestamp of the given file."""
+ mtime = os.stat(filename).st_mtime
+ return datetime.datetime.fromtimestamp(mtime)
+
+ def __init__(self, filename):
+ """Determine the timestamp at which the file was merged."""
+ self.filename = filename
+ creation_hash = self.creation_hash(filename)
+ if not creation_hash:
+ self.category = self.LOCAL
+ self.datetime = self.file_timestamp(filename)
+ return
+ merge_hash = self.merge_hash(creation_hash)
+ if not merge_hash:
+ self.category = self.COMMITTED
+ self.datetime = self.commit_timestamp(creation_hash)
+ return
+ self.category = self.MERGED
+ self.datetime = self.commit_timestamp(merge_hash)
+
+ def sort_key(self):
+ """"Return a sort key for this merge timestamp object.
+
+ ts1.sort_key() < ts2.sort_key() if and only if ts1 is
+ considered to be older than ts2.
+ """
+ return (self.category, self.datetime, self.filename)
+
+ def __eq__(self, other):
+ return self.sort_key() == other.sort_key()
+
+ def __lt__(self, other):
+ return self.sort_key() < other.sort_key()
+
+
def check_output(generated_output_file, main_input_file, merged_files):
"""Make sanity checks on the generated output.
@@ -267,6 +379,7 @@
`FileMergeTimestamp` for details.
"""
files_to_merge = glob.glob(os.path.join(options.dir, '*.md'))
+ files_to_merge.sort(key=lambda f: FileMergeTimestamp(f).sort_key())
return files_to_merge
def merge_entries(options):
@@ -290,6 +403,16 @@
if not options.keep_entries:
remove_merged_entries(files_to_merge)
+def show_file_timestamps(options):
+ """List the files to merge and their timestamp.
+
+ This is only intended for debugging purposes.
+ """
+ files = list_files_to_merge(options)
+ for filename in files:
+ ts = FileMergeTimestamp(filename)
+ print(ts.category, ts.datetime, filename)
+
def set_defaults(options):
"""Add default values for missing options."""
output_file = getattr(options, 'output', None)
@@ -320,8 +443,14 @@
parser.add_argument('--output', '-o', metavar='FILE',
help='Output changelog file'
' (default: overwrite the input)')
+ parser.add_argument('--list-files-only',
+ action='store_true',
+ help='Only list the files that would be processed (with some debugging information)')
options = parser.parse_args()
set_defaults(options)
+ if options.list_files_only:
+ show_file_timestamps(options)
+ return
merge_entries(options)
if __name__ == '__main__':