blob: 4483f557371aef7a99f91b32ffcae8bf276b54da [file] [log] [blame]
Darryl Green10d9ce32018-02-28 10:02:55 +00001#!/usr/bin/env python3
Gilles Peskine7dfcfce2019-07-04 19:31:02 +02002
Bence Szépkúti1e148272020-08-07 13:07:28 +02003# Copyright The Mbed TLS Contributors
Dave Rodgman16799db2023-11-02 19:47:20 +00004# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
Gilles Peskine7dfcfce2019-07-04 19:31:02 +02005
Darryl Green10d9ce32018-02-28 10:02:55 +00006"""
Darryl Green10d9ce32018-02-28 10:02:55 +00007This script checks the current state of the source code for minor issues,
8including incorrect file permissions, presence of tabs, non-Unix line endings,
Gilles Peskine55b49ee2019-07-04 19:31:33 +02009trailing whitespace, and presence of UTF-8 BOM.
Darryl Green10d9ce32018-02-28 10:02:55 +000010Note: requires python 3, must be run from Mbed TLS root.
11"""
12
Darryl Green10d9ce32018-02-28 10:02:55 +000013import argparse
Darryl Green10d9ce32018-02-28 10:02:55 +000014import codecs
Gilles Peskinef2fb9f62023-11-03 14:13:55 +010015import inspect
Gilles Peskine990030b2023-11-03 13:55:00 +010016import logging
17import os
Gilles Peskine0598db82020-05-10 16:57:16 +020018import re
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +020019import subprocess
Darryl Green10d9ce32018-02-28 10:02:55 +000020import sys
Gilles Peskineac9e7c02020-08-11 15:11:50 +020021try:
22 from typing import FrozenSet, Optional, Pattern # pylint: disable=unused-import
23except ImportError:
24 pass
Darryl Green10d9ce32018-02-28 10:02:55 +000025
Gilles Peskined9071e72022-09-18 21:17:09 +020026import scripts_path # pylint: disable=unused-import
27from mbedtls_dev import build_tree
28
Darryl Green10d9ce32018-02-28 10:02:55 +000029
Gilles Peskine184c0962020-03-24 18:25:17 +010030class FileIssueTracker:
Gilles Peskine6ee576e2019-02-25 20:59:05 +010031 """Base class for file-wide issue tracking.
32
33 To implement a checker that processes a file as a whole, inherit from
Gilles Peskine1e9698a2019-02-25 21:10:04 +010034 this class and implement `check_file_for_issue` and define ``heading``.
35
Gilles Peskine05a51a82020-05-10 16:52:44 +020036 ``suffix_exemptions``: files whose name ends with a string in this set
Gilles Peskine1e9698a2019-02-25 21:10:04 +010037 will not be checked.
38
Gilles Peskine0598db82020-05-10 16:57:16 +020039 ``path_exemptions``: files whose path (relative to the root of the source
40 tree) matches this regular expression will not be checked. This can be
41 ``None`` to match no path. Paths are normalized and converted to ``/``
42 separators before matching.
43
Gilles Peskine1e9698a2019-02-25 21:10:04 +010044 ``heading``: human-readable description of the issue
Gilles Peskine6ee576e2019-02-25 20:59:05 +010045 """
Darryl Green10d9ce32018-02-28 10:02:55 +000046
Gilles Peskineac9e7c02020-08-11 15:11:50 +020047 suffix_exemptions = frozenset() #type: FrozenSet[str]
48 path_exemptions = None #type: Optional[Pattern[str]]
Gilles Peskine1e9698a2019-02-25 21:10:04 +010049 # heading must be defined in derived classes.
50 # pylint: disable=no-member
51
Darryl Green10d9ce32018-02-28 10:02:55 +000052 def __init__(self):
Darryl Green10d9ce32018-02-28 10:02:55 +000053 self.files_with_issues = {}
54
Gilles Peskine0598db82020-05-10 16:57:16 +020055 @staticmethod
56 def normalize_path(filepath):
Gilles Peskineeca95db2020-05-28 18:19:20 +020057 """Normalize ``filepath`` with / as the directory separator."""
Gilles Peskine0598db82020-05-10 16:57:16 +020058 filepath = os.path.normpath(filepath)
Gilles Peskineeca95db2020-05-28 18:19:20 +020059 # On Windows, we may have backslashes to separate directories.
60 # We need slashes to match exemption lists.
Gilles Peskine0598db82020-05-10 16:57:16 +020061 seps = os.path.sep
62 if os.path.altsep is not None:
63 seps += os.path.altsep
64 return '/'.join(filepath.split(seps))
65
Darryl Green10d9ce32018-02-28 10:02:55 +000066 def should_check_file(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +010067 """Whether the given file name should be checked.
68
Gilles Peskine05a51a82020-05-10 16:52:44 +020069 Files whose name ends with a string listed in ``self.suffix_exemptions``
70 or whose path matches ``self.path_exemptions`` will not be checked.
Gilles Peskineaaee4442020-03-24 16:49:21 +010071 """
Gilles Peskine05a51a82020-05-10 16:52:44 +020072 for files_exemption in self.suffix_exemptions:
Darryl Green10d9ce32018-02-28 10:02:55 +000073 if filepath.endswith(files_exemption):
74 return False
Gilles Peskine0598db82020-05-10 16:57:16 +020075 if self.path_exemptions and \
76 re.match(self.path_exemptions, self.normalize_path(filepath)):
77 return False
Darryl Green10d9ce32018-02-28 10:02:55 +000078 return True
79
Darryl Green10d9ce32018-02-28 10:02:55 +000080 def check_file_for_issue(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +010081 """Check the specified file for the issue that this class is for.
82
83 Subclasses must implement this method.
84 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +010085 raise NotImplementedError
Darryl Green10d9ce32018-02-28 10:02:55 +000086
Gilles Peskine04398052018-11-23 21:11:30 +010087 def record_issue(self, filepath, line_number):
Gilles Peskineaaee4442020-03-24 16:49:21 +010088 """Record that an issue was found at the specified location."""
Gilles Peskine04398052018-11-23 21:11:30 +010089 if filepath not in self.files_with_issues.keys():
90 self.files_with_issues[filepath] = []
91 self.files_with_issues[filepath].append(line_number)
92
Darryl Green10d9ce32018-02-28 10:02:55 +000093 def output_file_issues(self, logger):
Gilles Peskineaaee4442020-03-24 16:49:21 +010094 """Log all the locations where the issue was found."""
Darryl Green10d9ce32018-02-28 10:02:55 +000095 if self.files_with_issues.values():
96 logger.info(self.heading)
97 for filename, lines in sorted(self.files_with_issues.items()):
98 if lines:
99 logger.info("{}: {}".format(
100 filename, ", ".join(str(x) for x in lines)
101 ))
102 else:
103 logger.info(filename)
104 logger.info("")
105
Gilles Peskined4a853d2020-05-10 16:57:59 +0200106BINARY_FILE_PATH_RE_LIST = [
107 r'docs/.*\.pdf\Z',
Ryan Everettabd89772023-12-15 12:28:38 +0000108 r'docs/.*\.png\Z',
Gilles Peskined4a853d2020-05-10 16:57:59 +0200109 r'programs/fuzz/corpuses/[^.]+\Z',
110 r'tests/data_files/[^.]+\Z',
111 r'tests/data_files/.*\.(crt|csr|db|der|key|pubkey)\Z',
112 r'tests/data_files/.*\.req\.[^/]+\Z',
113 r'tests/data_files/.*malformed[^/]+\Z',
114 r'tests/data_files/format_pkcs12\.fmt\Z',
Gilles Peskine0ed9e782023-01-05 20:27:18 +0100115 r'tests/data_files/.*\.bin\Z',
Gilles Peskined4a853d2020-05-10 16:57:59 +0200116]
117BINARY_FILE_PATH_RE = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST))
118
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100119class LineIssueTracker(FileIssueTracker):
120 """Base class for line-by-line issue tracking.
Darryl Green10d9ce32018-02-28 10:02:55 +0000121
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100122 To implement a checker that processes files line by line, inherit from
123 this class and implement `line_with_issue`.
124 """
125
Gilles Peskined4a853d2020-05-10 16:57:59 +0200126 # Exclude binary files.
127 path_exemptions = BINARY_FILE_PATH_RE
128
Gilles Peskineb3897432023-01-05 20:28:30 +0100129 def issue_with_line(self, line, filepath, line_number):
Gilles Peskineaaee4442020-03-24 16:49:21 +0100130 """Check the specified line for the issue that this class is for.
131
132 Subclasses must implement this method.
133 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100134 raise NotImplementedError
135
136 def check_file_line(self, filepath, line, line_number):
Gilles Peskineb3897432023-01-05 20:28:30 +0100137 if self.issue_with_line(line, filepath, line_number):
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100138 self.record_issue(filepath, line_number)
139
140 def check_file_for_issue(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +0100141 """Check the lines of the specified file.
142
143 Subclasses must implement the ``issue_with_line`` method.
144 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100145 with open(filepath, "rb") as f:
146 for i, line in enumerate(iter(f.readline, b"")):
147 self.check_file_line(filepath, line, i + 1)
148
Gilles Peskine2c618732020-03-24 22:26:01 +0100149
150def is_windows_file(filepath):
151 _root, ext = os.path.splitext(filepath)
Gilles Peskined2df86f2020-05-10 17:36:51 +0200152 return ext in ('.bat', '.dsp', '.dsw', '.sln', '.vcxproj')
Gilles Peskine2c618732020-03-24 22:26:01 +0100153
154
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200155class ShebangIssueTracker(FileIssueTracker):
156 """Track files with a bad, missing or extraneous shebang line.
157
158 Executable scripts must start with a valid shebang (#!) line.
159 """
160
161 heading = "Invalid shebang line:"
162
163 # Allow either /bin/sh, /bin/bash, or /usr/bin/env.
164 # Allow at most one argument (this is a Linux limitation).
165 # For sh and bash, the argument if present must be options.
Shaun Case8b0ecbc2021-12-20 21:14:10 -0800166 # For env, the argument must be the base name of the interpreter.
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200167 _shebang_re = re.compile(rb'^#! ?(?:/bin/(bash|sh)(?: -[^\n ]*)?'
168 rb'|/usr/bin/env ([^\n /]+))$')
169 _extensions = {
170 b'bash': 'sh',
171 b'perl': 'pl',
172 b'python3': 'py',
173 b'sh': 'sh',
174 }
175
Dave Rodgman5c745fa2024-01-17 09:59:10 +0000176 path_exemptions = re.compile(r'tests/scripts/quiet/.*')
177
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200178 def is_valid_shebang(self, first_line, filepath):
179 m = re.match(self._shebang_re, first_line)
180 if not m:
181 return False
182 interpreter = m.group(1) or m.group(2)
183 if interpreter not in self._extensions:
184 return False
185 if not filepath.endswith('.' + self._extensions[interpreter]):
186 return False
187 return True
188
189 def check_file_for_issue(self, filepath):
190 is_executable = os.access(filepath, os.X_OK)
191 with open(filepath, "rb") as f:
192 first_line = f.readline()
193 if first_line.startswith(b'#!'):
194 if not is_executable:
195 # Shebang on a non-executable file
196 self.files_with_issues[filepath] = None
197 elif not self.is_valid_shebang(first_line, filepath):
198 self.files_with_issues[filepath] = [1]
199 elif is_executable:
200 # Executable without a shebang
201 self.files_with_issues[filepath] = None
202
203
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100204class EndOfFileNewlineIssueTracker(FileIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100205 """Track files that end with an incomplete line
206 (no newline character at the end of the last line)."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000207
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100208 heading = "Missing newline at end of file:"
Darryl Green10d9ce32018-02-28 10:02:55 +0000209
Gilles Peskined4a853d2020-05-10 16:57:59 +0200210 path_exemptions = BINARY_FILE_PATH_RE
211
Darryl Green10d9ce32018-02-28 10:02:55 +0000212 def check_file_for_issue(self, filepath):
213 with open(filepath, "rb") as f:
Gilles Peskine12b180a2020-05-10 17:36:42 +0200214 try:
215 f.seek(-1, 2)
216 except OSError:
217 # This script only works on regular files. If we can't seek
218 # 1 before the end, it means that this position is before
219 # the beginning of the file, i.e. that the file is empty.
220 return
221 if f.read(1) != b"\n":
Darryl Green10d9ce32018-02-28 10:02:55 +0000222 self.files_with_issues[filepath] = None
223
224
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100225class Utf8BomIssueTracker(FileIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100226 """Track files that start with a UTF-8 BOM.
227 Files should be ASCII or UTF-8. Valid UTF-8 does not start with a BOM."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000228
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100229 heading = "UTF-8 BOM present:"
Darryl Green10d9ce32018-02-28 10:02:55 +0000230
Gilles Peskine05a51a82020-05-10 16:52:44 +0200231 suffix_exemptions = frozenset([".vcxproj", ".sln"])
Gilles Peskined4a853d2020-05-10 16:57:59 +0200232 path_exemptions = BINARY_FILE_PATH_RE
Gilles Peskine2c618732020-03-24 22:26:01 +0100233
Darryl Green10d9ce32018-02-28 10:02:55 +0000234 def check_file_for_issue(self, filepath):
235 with open(filepath, "rb") as f:
236 if f.read().startswith(codecs.BOM_UTF8):
237 self.files_with_issues[filepath] = None
238
239
Gilles Peskined11bb472023-01-05 20:28:57 +0100240class UnicodeIssueTracker(LineIssueTracker):
241 """Track lines with invalid characters or invalid text encoding."""
242
243 heading = "Invalid UTF-8 or forbidden character:"
244
Aditya Deshpande15b6dd02023-01-30 13:46:58 +0000245 # Only allow valid UTF-8, and only other explicitly allowed characters.
Gilles Peskined11bb472023-01-05 20:28:57 +0100246 # We deliberately exclude all characters that aren't a simple non-blank,
247 # non-zero-width glyph, apart from a very small set (tab, ordinary space,
248 # line breaks, "basic" no-break space and soft hyphen). In particular,
249 # non-ASCII control characters, combinig characters, and Unicode state
250 # changes (e.g. right-to-left text) are forbidden.
251 # Note that we do allow some characters with a risk of visual confusion,
252 # for example '-' (U+002D HYPHEN-MINUS) vs '­' (U+00AD SOFT HYPHEN) vs
253 # '‐' (U+2010 HYPHEN), or 'A' (U+0041 LATIN CAPITAL LETTER A) vs
254 # 'Α' (U+0391 GREEK CAPITAL LETTER ALPHA).
255 GOOD_CHARACTERS = ''.join([
256 '\t\n\r -~', # ASCII (tabs and line endings are checked separately)
257 '\u00A0-\u00FF', # Latin-1 Supplement (for NO-BREAK SPACE and punctuation)
258 '\u2010-\u2027\u2030-\u205E', # General Punctuation (printable)
259 '\u2070\u2071\u2074-\u208E\u2090-\u209C', # Superscripts and Subscripts
260 '\u2190-\u21FF', # Arrows
261 '\u2200-\u22FF', # Mathematical Symbols
Aditya Deshpandeebb22692023-02-01 13:30:26 +0000262 '\u2500-\u257F' # Box Drawings characters used in markdown trees
Gilles Peskined11bb472023-01-05 20:28:57 +0100263 ])
264 # Allow any of the characters and ranges above, and anything classified
265 # as a word constituent.
266 GOOD_CHARACTERS_RE = re.compile(r'[\w{}]+\Z'.format(GOOD_CHARACTERS))
267
268 def issue_with_line(self, line, _filepath, line_number):
269 try:
270 text = line.decode('utf-8')
271 except UnicodeDecodeError:
272 return True
273 if line_number == 1 and text.startswith('\uFEFF'):
274 # Strip BOM (U+FEFF ZERO WIDTH NO-BREAK SPACE) at the beginning.
275 # Which files are allowed to have a BOM is handled in
276 # Utf8BomIssueTracker.
277 text = text[1:]
278 return not self.GOOD_CHARACTERS_RE.match(text)
279
Gilles Peskine2c618732020-03-24 22:26:01 +0100280class UnixLineEndingIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100281 """Track files with non-Unix line endings (i.e. files with CR)."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000282
Gilles Peskine2c618732020-03-24 22:26:01 +0100283 heading = "Non-Unix line endings:"
284
285 def should_check_file(self, filepath):
Gilles Peskine0598db82020-05-10 16:57:16 +0200286 if not super().should_check_file(filepath):
287 return False
Gilles Peskine2c618732020-03-24 22:26:01 +0100288 return not is_windows_file(filepath)
Darryl Green10d9ce32018-02-28 10:02:55 +0000289
Gilles Peskineb3897432023-01-05 20:28:30 +0100290 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000291 return b"\r" in line
292
293
Gilles Peskine545e13f2020-03-24 22:29:11 +0100294class WindowsLineEndingIssueTracker(LineIssueTracker):
Gilles Peskined703a2e2020-04-01 13:35:46 +0200295 """Track files with non-Windows line endings (i.e. CR or LF not in CRLF)."""
Gilles Peskine545e13f2020-03-24 22:29:11 +0100296
297 heading = "Non-Windows line endings:"
298
299 def should_check_file(self, filepath):
Gilles Peskine0598db82020-05-10 16:57:16 +0200300 if not super().should_check_file(filepath):
301 return False
Gilles Peskine545e13f2020-03-24 22:29:11 +0100302 return is_windows_file(filepath)
303
Gilles Peskineb3897432023-01-05 20:28:30 +0100304 def issue_with_line(self, line, _filepath, _line_number):
Gilles Peskined703a2e2020-04-01 13:35:46 +0200305 return not line.endswith(b"\r\n") or b"\r" in line[:-2]
Gilles Peskine545e13f2020-03-24 22:29:11 +0100306
307
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100308class TrailingWhitespaceIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100309 """Track lines with trailing whitespace."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000310
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100311 heading = "Trailing whitespace:"
Gilles Peskine05a51a82020-05-10 16:52:44 +0200312 suffix_exemptions = frozenset([".dsp", ".md"])
Darryl Green10d9ce32018-02-28 10:02:55 +0000313
Gilles Peskineb3897432023-01-05 20:28:30 +0100314 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000315 return line.rstrip(b"\r\n") != line.rstrip()
316
317
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100318class TabIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100319 """Track lines with tabs."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000320
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100321 heading = "Tabs present:"
Gilles Peskine05a51a82020-05-10 16:52:44 +0200322 suffix_exemptions = frozenset([
Gilles Peskine76022982023-12-22 15:28:07 +0100323 ".make",
Gilles Peskine344da1c2020-05-10 17:37:02 +0200324 ".pem", # some openssl dumps have tabs
Gilles Peskine2c618732020-03-24 22:26:01 +0100325 ".sln",
Gilles Peskine6e8d5a02020-03-24 22:01:28 +0100326 "/Makefile",
327 "/Makefile.inc",
328 "/generate_visualc_files.pl",
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100329 ])
Darryl Green10d9ce32018-02-28 10:02:55 +0000330
Gilles Peskineb3897432023-01-05 20:28:30 +0100331 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000332 return b"\t" in line
333
334
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100335class MergeArtifactIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100336 """Track lines with merge artifacts.
337 These are leftovers from a ``git merge`` that wasn't fully edited."""
Gilles Peskinec117d592018-11-23 21:11:52 +0100338
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100339 heading = "Merge artifact:"
Gilles Peskinec117d592018-11-23 21:11:52 +0100340
Gilles Peskineb3897432023-01-05 20:28:30 +0100341 def issue_with_line(self, line, _filepath, _line_number):
Gilles Peskinec117d592018-11-23 21:11:52 +0100342 # Detect leftover git conflict markers.
343 if line.startswith(b'<<<<<<< ') or line.startswith(b'>>>>>>> '):
344 return True
345 if line.startswith(b'||||||| '): # from merge.conflictStyle=diff3
346 return True
347 if line.rstrip(b'\r\n') == b'=======' and \
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100348 not _filepath.endswith('.md'):
Gilles Peskinec117d592018-11-23 21:11:52 +0100349 return True
350 return False
351
Darryl Green10d9ce32018-02-28 10:02:55 +0000352
Gilles Peskinece782002023-11-03 14:49:12 +0100353def this_location():
354 frame = inspect.currentframe()
355 assert frame is not None
356 info = inspect.getframeinfo(frame)
357 return os.path.basename(info.filename), info.lineno
358THIS_FILE_BASE_NAME, LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER = this_location()
359
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100360class LicenseIssueTracker(LineIssueTracker):
361 """Check copyright statements and license indications.
362
363 This class only checks that statements are correct if present. It does
364 not enforce the presence of statements in each file.
365 """
366
367 heading = "License issue:"
368
369 LICENSE_EXEMPTION_RE_LIST = [
370 # Third-party code, other than whitelisted third-party modules,
371 # may be under a different license.
372 r'3rdparty/(?!(p256-m)/.*)',
373 # Documentation explaining the license may have accidental
374 # false positives.
375 r'(ChangeLog|LICENSE|[-0-9A-Z_a-z]+\.md)\Z',
376 # Files imported from TF-M, and not used except in test builds,
377 # may be under a different license.
Dave Rodgman1c910572023-12-08 17:58:44 +0000378 r'configs/ext/crypto_config_profile_medium\.h\Z',
379 r'configs/ext/tfm_mbedcrypto_config_profile_medium\.h\Z',
380 r'configs/ext/README\.md\Z',
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100381 # Third-party file.
382 r'dco\.txt\Z',
383 ]
384 path_exemptions = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST +
385 LICENSE_EXEMPTION_RE_LIST))
386
387 COPYRIGHT_HOLDER = rb'The Mbed TLS Contributors'
388 # Catch "Copyright foo", "Copyright (C) foo", "Copyright © foo", etc.
389 COPYRIGHT_RE = re.compile(rb'.*\bcopyright\s+((?:\w|\s|[()]|[^ -~])*\w)', re.I)
390
391 SPDX_HEADER_KEY = b'SPDX-License-Identifier'
392 LICENSE_IDENTIFIER = b'Apache-2.0 OR GPL-2.0-or-later'
393 SPDX_RE = re.compile(br'.*?(' +
394 re.escape(SPDX_HEADER_KEY) +
395 br')(:\s*(.*?)\W*\Z|.*)', re.I)
396
Gilles Peskine3b9facd2023-11-03 14:35:28 +0100397 LICENSE_MENTION_RE = re.compile(rb'.*(?:' + rb'|'.join([
398 rb'Apache License',
399 rb'General Public License',
400 ]) + rb')', re.I)
401
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100402 def __init__(self):
403 super().__init__()
404 # Record what problem was caused. We can't easily report it due to
405 # the structure of the script. To be fixed after
406 # https://github.com/Mbed-TLS/mbedtls/pull/2506
407 self.problem = None
408
409 def issue_with_line(self, line, filepath, line_number):
Gilles Peskine3b9facd2023-11-03 14:35:28 +0100410 #pylint: disable=too-many-return-statements
411
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100412 # Use endswith() rather than the more correct os.path.basename()
413 # because experimentally, it makes a significant difference to
414 # the running time.
415 if filepath.endswith(THIS_FILE_BASE_NAME) and \
416 line_number > LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER:
417 # Avoid false positives from the code in this class.
418 # Also skip the rest of this file, which is highly unlikely to
419 # contain any problematic statements since we put those near the
420 # top of files.
421 return False
422
423 m = self.COPYRIGHT_RE.match(line)
424 if m and m.group(1) != self.COPYRIGHT_HOLDER:
425 self.problem = 'Invalid copyright line'
426 return True
427
428 m = self.SPDX_RE.match(line)
429 if m:
430 if m.group(1) != self.SPDX_HEADER_KEY:
431 self.problem = 'Misspelled ' + self.SPDX_HEADER_KEY.decode()
432 return True
433 if not m.group(3):
434 self.problem = 'Improperly formatted SPDX license identifier'
435 return True
436 if m.group(3) != self.LICENSE_IDENTIFIER:
437 self.problem = 'Wrong SPDX license identifier'
438 return True
Gilles Peskine3b9facd2023-11-03 14:35:28 +0100439
440 m = self.LICENSE_MENTION_RE.match(line)
441 if m:
442 self.problem = 'Suspicious license mention'
443 return True
444
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100445 return False
446
447
Gilles Peskine184c0962020-03-24 18:25:17 +0100448class IntegrityChecker:
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100449 """Sanity-check files under the current directory."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000450
451 def __init__(self, log_file):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100452 """Instantiate the sanity checker.
453 Check files under the current directory.
454 Write a report of issues to log_file."""
Gilles Peskined9071e72022-09-18 21:17:09 +0200455 build_tree.check_repo_path()
Darryl Green10d9ce32018-02-28 10:02:55 +0000456 self.logger = None
457 self.setup_logger(log_file)
Darryl Green10d9ce32018-02-28 10:02:55 +0000458 self.issues_to_check = [
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200459 ShebangIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000460 EndOfFileNewlineIssueTracker(),
461 Utf8BomIssueTracker(),
Gilles Peskined11bb472023-01-05 20:28:57 +0100462 UnicodeIssueTracker(),
Gilles Peskine2c618732020-03-24 22:26:01 +0100463 UnixLineEndingIssueTracker(),
Gilles Peskine545e13f2020-03-24 22:29:11 +0100464 WindowsLineEndingIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000465 TrailingWhitespaceIssueTracker(),
466 TabIssueTracker(),
Gilles Peskinec117d592018-11-23 21:11:52 +0100467 MergeArtifactIssueTracker(),
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100468 LicenseIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000469 ]
470
Darryl Green10d9ce32018-02-28 10:02:55 +0000471 def setup_logger(self, log_file, level=logging.INFO):
472 self.logger = logging.getLogger()
473 self.logger.setLevel(level)
474 if log_file:
475 handler = logging.FileHandler(log_file)
476 self.logger.addHandler(handler)
477 else:
478 console = logging.StreamHandler()
479 self.logger.addHandler(console)
480
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200481 @staticmethod
482 def collect_files():
483 bytes_output = subprocess.check_output(['git', 'ls-files', '-z'])
484 bytes_filepaths = bytes_output.split(b'\0')[:-1]
485 ascii_filepaths = map(lambda fp: fp.decode('ascii'), bytes_filepaths)
486 # Prepend './' to files in the top-level directory so that
487 # something like `'/Makefile' in fp` matches in the top-level
488 # directory as well as in subdirectories.
489 return [fp if os.path.dirname(fp) else os.path.join(os.curdir, fp)
490 for fp in ascii_filepaths]
Gilles Peskine95c55752018-09-28 11:48:10 +0200491
Darryl Green10d9ce32018-02-28 10:02:55 +0000492 def check_files(self):
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200493 for issue_to_check in self.issues_to_check:
494 for filepath in self.collect_files():
495 if issue_to_check.should_check_file(filepath):
496 issue_to_check.check_file_for_issue(filepath)
Darryl Green10d9ce32018-02-28 10:02:55 +0000497
498 def output_issues(self):
499 integrity_return_code = 0
500 for issue_to_check in self.issues_to_check:
501 if issue_to_check.files_with_issues:
502 integrity_return_code = 1
503 issue_to_check.output_file_issues(self.logger)
504 return integrity_return_code
505
506
507def run_main():
Gilles Peskine7dfcfce2019-07-04 19:31:02 +0200508 parser = argparse.ArgumentParser(description=__doc__)
Darryl Green10d9ce32018-02-28 10:02:55 +0000509 parser.add_argument(
510 "-l", "--log_file", type=str, help="path to optional output log",
511 )
512 check_args = parser.parse_args()
513 integrity_check = IntegrityChecker(check_args.log_file)
514 integrity_check.check_files()
515 return_code = integrity_check.output_issues()
516 sys.exit(return_code)
517
518
519if __name__ == "__main__":
520 run_main()