Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | |
| 3 | """Assemble Mbed Crypto change log entries into the change log file. |
| 4 | """ |
| 5 | |
| 6 | # Copyright (C) 2019, Arm Limited, All Rights Reserved |
| 7 | # SPDX-License-Identifier: Apache-2.0 |
| 8 | # |
| 9 | # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 10 | # not use this file except in compliance with the License. |
| 11 | # You may obtain a copy of the License at |
| 12 | # |
| 13 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 14 | # |
| 15 | # Unless required by applicable law or agreed to in writing, software |
| 16 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 17 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 18 | # See the License for the specific language governing permissions and |
| 19 | # limitations under the License. |
| 20 | # |
| 21 | # This file is part of Mbed Crypto (https://tls.mbed.org) |
| 22 | |
| 23 | import argparse |
Gilles Peskine | d8b6c77 | 2020-01-28 18:57:47 +0100 | [diff] [blame^] | 24 | from collections import OrderedDict |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 25 | import glob |
| 26 | import os |
| 27 | import re |
| 28 | import sys |
| 29 | |
| 30 | class InputFormatError(Exception): |
| 31 | def __init__(self, filename, line_number, message, *args, **kwargs): |
Gilles Peskine | 566407d | 2020-01-22 15:55:36 +0100 | [diff] [blame] | 32 | message = '{}:{}: {}'.format(filename, line_number, |
| 33 | message.format(*args, **kwargs)) |
| 34 | super().__init__(message) |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 35 | |
Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 36 | class LostContent(Exception): |
| 37 | def __init__(self, filename, line): |
| 38 | message = ('Lost content from {}: "{}"'.format(filename, line)) |
| 39 | super().__init__(message) |
| 40 | |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 41 | STANDARD_SECTIONS = ( |
| 42 | b'Interface changes', |
| 43 | b'Default behavior changes', |
| 44 | b'Requirement changes', |
| 45 | b'New deprecations', |
| 46 | b'Removals', |
| 47 | b'New features', |
| 48 | b'Security', |
| 49 | b'Bug fixes', |
| 50 | b'Performance improvements', |
| 51 | b'Other changes', |
| 52 | ) |
| 53 | |
| 54 | class ChangeLog: |
| 55 | """An Mbed Crypto changelog. |
| 56 | |
| 57 | A changelog is a file in Markdown format. Each level 2 section title |
| 58 | starts a version, and versions are sorted in reverse chronological |
| 59 | order. Lines with a level 2 section title must start with '##'. |
| 60 | |
| 61 | Within a version, there are multiple sections, each devoted to a kind |
| 62 | of change: bug fix, feature request, etc. Section titles should match |
| 63 | entries in STANDARD_SECTIONS exactly. |
| 64 | |
| 65 | Within each section, each separate change should be on a line starting |
| 66 | with a '*' bullet. There may be blank lines surrounding titles, but |
| 67 | there should not be any blank line inside a section. |
| 68 | """ |
| 69 | |
| 70 | _title_re = re.compile(br'#*') |
| 71 | def title_level(self, line): |
| 72 | """Determine whether the line is a title. |
| 73 | |
| 74 | Return (level, content) where level is the Markdown section level |
| 75 | (1 for '#', 2 for '##', etc.) and content is the section title |
| 76 | without leading or trailing whitespace. For a non-title line, |
| 77 | the level is 0. |
| 78 | """ |
| 79 | level = re.match(self._title_re, line).end() |
| 80 | return level, line[level:].strip() |
| 81 | |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 82 | def __init__(self, input_stream): |
| 83 | """Create a changelog object. |
| 84 | |
Gilles Peskine | 974232f | 2020-01-22 12:43:29 +0100 | [diff] [blame] | 85 | Populate the changelog object from the content of the file |
| 86 | input_stream. This is typically a file opened for reading, but |
| 87 | can be any generator returning the lines to read. |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 88 | """ |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 89 | self.header = [] |
Gilles Peskine | d8b6c77 | 2020-01-28 18:57:47 +0100 | [diff] [blame^] | 90 | self.section_content = OrderedDict() |
| 91 | for section in STANDARD_SECTIONS: |
| 92 | self.section_content[section] = [] |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 93 | self.trailer = [] |
Gilles Peskine | 8c4a84c | 2020-01-22 15:40:39 +0100 | [diff] [blame] | 94 | self.read_main_file(input_stream) |
| 95 | |
| 96 | def read_main_file(self, input_stream): |
| 97 | """Populate the changelog object from the content of the file. |
| 98 | |
| 99 | This method is only intended to be called as part of the constructor |
| 100 | of the class and may not act sensibly on an object that is already |
| 101 | partially populated. |
| 102 | """ |
| 103 | level_2_seen = 0 |
| 104 | current_section = None |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 105 | for line in input_stream: |
| 106 | level, content = self.title_level(line) |
| 107 | if level == 2: |
| 108 | level_2_seen += 1 |
| 109 | if level_2_seen <= 1: |
| 110 | self.header.append(line) |
| 111 | else: |
| 112 | self.trailer.append(line) |
| 113 | elif level == 3 and level_2_seen == 1: |
| 114 | current_section = content |
Gilles Peskine | d8b6c77 | 2020-01-28 18:57:47 +0100 | [diff] [blame^] | 115 | self.section_content.setdefault(content, []) |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 116 | elif level_2_seen == 1 and current_section != None: |
| 117 | if line.strip(): |
| 118 | self.section_content[current_section].append(line) |
| 119 | elif level_2_seen <= 1: |
| 120 | self.header.append(line) |
| 121 | else: |
| 122 | self.trailer.append(line) |
| 123 | |
| 124 | def add_file(self, input_stream): |
| 125 | """Add changelog entries from a file. |
| 126 | |
| 127 | Read lines from input_stream, which is typically a file opened |
| 128 | for reading. These lines must contain a series of level 3 |
| 129 | Markdown sections with recognized titles. The corresponding |
| 130 | content is injected into the respective sections in the changelog. |
| 131 | The section titles must be either one of the hard-coded values |
Gilles Peskine | 974232f | 2020-01-22 12:43:29 +0100 | [diff] [blame] | 132 | in STANDARD_SECTIONS in assemble_changelog.py or already present |
| 133 | in ChangeLog.md. Section titles must match byte-for-byte except that |
| 134 | leading or trailing whitespace is ignored. |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 135 | """ |
| 136 | filename = input_stream.name |
| 137 | current_section = None |
| 138 | for line_number, line in enumerate(input_stream, 1): |
| 139 | if not line.strip(): |
| 140 | continue |
| 141 | level, content = self.title_level(line) |
| 142 | if level == 3: |
| 143 | current_section = content |
| 144 | if current_section not in self.section_content: |
| 145 | raise InputFormatError(filename, line_number, |
| 146 | 'Section {} is not recognized', |
| 147 | str(current_section)[1:]) |
| 148 | elif level == 0: |
| 149 | if current_section is None: |
| 150 | raise InputFormatError(filename, line_number, |
| 151 | 'Missing section title at the beginning of the file') |
| 152 | self.section_content[current_section].append(line) |
| 153 | else: |
| 154 | raise InputFormatError(filename, line_number, |
| 155 | 'Only level 3 headers (###) are permitted') |
| 156 | |
| 157 | def write(self, filename): |
| 158 | """Write the changelog to the specified file. |
| 159 | """ |
| 160 | with open(filename, 'wb') as out: |
| 161 | for line in self.header: |
| 162 | out.write(line) |
Gilles Peskine | d8b6c77 | 2020-01-28 18:57:47 +0100 | [diff] [blame^] | 163 | for section, lines in self.section_content.items(): |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 164 | while lines and not lines[0].strip(): |
| 165 | del lines[0] |
| 166 | while lines and not lines[-1].strip(): |
| 167 | del lines[-1] |
| 168 | if not lines: |
| 169 | continue |
| 170 | out.write(b'### ' + section + b'\n\n') |
| 171 | for line in lines: |
| 172 | out.write(line) |
| 173 | out.write(b'\n') |
| 174 | for line in self.trailer: |
| 175 | out.write(line) |
| 176 | |
Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 177 | def check_output(generated_output_file, main_input_file, merged_files): |
| 178 | """Make sanity checks on the generated output. |
| 179 | |
| 180 | The intent of these sanity checks is to have reasonable confidence |
| 181 | that no content has been lost. |
| 182 | |
| 183 | The sanity check is that every line that is present in an input file |
| 184 | is also present in an output file. This is not perfect but good enough |
| 185 | for now. |
| 186 | """ |
| 187 | generated_output = set(open(generated_output_file, 'rb')) |
| 188 | for line in open(main_input_file, 'rb'): |
| 189 | if line not in generated_output: |
| 190 | raise LostContent('original file', line) |
| 191 | for merged_file in merged_files: |
| 192 | for line in open(merged_file, 'rb'): |
| 193 | if line not in generated_output: |
| 194 | raise LostContent(merged_file, line) |
| 195 | |
| 196 | def finish_output(changelog, output_file, input_file, merged_files): |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 197 | """Write the changelog to the output file. |
| 198 | |
Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 199 | The input file and the list of merged files are used only for sanity |
| 200 | checks on the output. |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 201 | """ |
| 202 | if os.path.exists(output_file) and not os.path.isfile(output_file): |
| 203 | # The output is a non-regular file (e.g. pipe). Write to it directly. |
| 204 | output_temp = output_file |
| 205 | else: |
| 206 | # The output is a regular file. Write to a temporary file, |
| 207 | # then move it into place atomically. |
| 208 | output_temp = output_file + '.tmp' |
| 209 | changelog.write(output_temp) |
Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 210 | check_output(output_temp, input_file, merged_files) |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 211 | if output_temp != output_file: |
| 212 | os.rename(output_temp, output_file) |
| 213 | |
Gilles Peskine | 5e39c9e | 2020-01-22 14:55:37 +0100 | [diff] [blame] | 214 | def remove_merged_entries(files_to_remove): |
| 215 | for filename in files_to_remove: |
| 216 | os.remove(filename) |
| 217 | |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 218 | def merge_entries(options): |
| 219 | """Merge changelog entries into the changelog file. |
| 220 | |
| 221 | Read the changelog file from options.input. |
| 222 | Read entries to merge from the directory options.dir. |
| 223 | Write the new changelog to options.output. |
| 224 | Remove the merged entries if options.keep_entries is false. |
| 225 | """ |
| 226 | with open(options.input, 'rb') as input_file: |
| 227 | changelog = ChangeLog(input_file) |
| 228 | files_to_merge = glob.glob(os.path.join(options.dir, '*.md')) |
| 229 | if not files_to_merge: |
| 230 | sys.stderr.write('There are no pending changelog entries.\n') |
| 231 | return |
| 232 | for filename in files_to_merge: |
| 233 | with open(filename, 'rb') as input_file: |
| 234 | changelog.add_file(input_file) |
Gilles Peskine | 2b24249 | 2020-01-22 15:41:50 +0100 | [diff] [blame] | 235 | finish_output(changelog, options.output, options.input, files_to_merge) |
Gilles Peskine | 5e39c9e | 2020-01-22 14:55:37 +0100 | [diff] [blame] | 236 | if not options.keep_entries: |
| 237 | remove_merged_entries(files_to_merge) |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 238 | |
| 239 | def set_defaults(options): |
| 240 | """Add default values for missing options.""" |
| 241 | output_file = getattr(options, 'output', None) |
| 242 | if output_file is None: |
| 243 | options.output = options.input |
| 244 | if getattr(options, 'keep_entries', None) is None: |
| 245 | options.keep_entries = (output_file is not None) |
| 246 | |
| 247 | def main(): |
| 248 | """Command line entry point.""" |
| 249 | parser = argparse.ArgumentParser(description=__doc__) |
| 250 | parser.add_argument('--dir', '-d', metavar='DIR', |
| 251 | default='ChangeLog.d', |
Gilles Peskine | 6e91009 | 2020-01-22 15:58:18 +0100 | [diff] [blame] | 252 | help='Directory to read entries from' |
| 253 | ' (default: ChangeLog.d)') |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 254 | parser.add_argument('--input', '-i', metavar='FILE', |
| 255 | default='ChangeLog.md', |
Gilles Peskine | 6e91009 | 2020-01-22 15:58:18 +0100 | [diff] [blame] | 256 | help='Existing changelog file to read from and augment' |
| 257 | ' (default: ChangeLog.md)') |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 258 | parser.add_argument('--keep-entries', |
| 259 | action='store_true', dest='keep_entries', default=None, |
Gilles Peskine | 6e91009 | 2020-01-22 15:58:18 +0100 | [diff] [blame] | 260 | help='Keep the files containing entries' |
| 261 | ' (default: remove them if --output/-o is not specified)') |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 262 | parser.add_argument('--no-keep-entries', |
| 263 | action='store_false', dest='keep_entries', |
Gilles Peskine | 6e91009 | 2020-01-22 15:58:18 +0100 | [diff] [blame] | 264 | help='Remove the files containing entries after they are merged' |
| 265 | ' (default: remove them if --output/-o is not specified)') |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 266 | parser.add_argument('--output', '-o', metavar='FILE', |
Gilles Peskine | 6e91009 | 2020-01-22 15:58:18 +0100 | [diff] [blame] | 267 | help='Output changelog file' |
| 268 | ' (default: overwrite the input)') |
Gilles Peskine | 40b3f41 | 2019-10-13 21:44:25 +0200 | [diff] [blame] | 269 | options = parser.parse_args() |
| 270 | set_defaults(options) |
| 271 | merge_entries(options) |
| 272 | |
| 273 | if __name__ == '__main__': |
| 274 | main() |