blob: 2a5d64b79fbb2d0fecd4c8a74992198a5faeccea [file] [log] [blame]
Darryl Green10d9ce32018-02-28 10:02:55 +00001#!/usr/bin/env python3
Gilles Peskine7dfcfce2019-07-04 19:31:02 +02002
Bence Szépkúti1e148272020-08-07 13:07:28 +02003# Copyright The Mbed TLS Contributors
Dave Rodgman16799db2023-11-02 19:47:20 +00004# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
Gilles Peskine7dfcfce2019-07-04 19:31:02 +02005
Darryl Green10d9ce32018-02-28 10:02:55 +00006"""
Darryl Green10d9ce32018-02-28 10:02:55 +00007This script checks the current state of the source code for minor issues,
8including incorrect file permissions, presence of tabs, non-Unix line endings,
Gilles Peskine55b49ee2019-07-04 19:31:33 +02009trailing whitespace, and presence of UTF-8 BOM.
Darryl Green10d9ce32018-02-28 10:02:55 +000010Note: requires python 3, must be run from Mbed TLS root.
11"""
12
Darryl Green10d9ce32018-02-28 10:02:55 +000013import argparse
Darryl Green10d9ce32018-02-28 10:02:55 +000014import codecs
Gilles Peskinef2fb9f62023-11-03 14:13:55 +010015import inspect
Gilles Peskine990030b2023-11-03 13:55:00 +010016import logging
17import os
Gilles Peskine0598db82020-05-10 16:57:16 +020018import re
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +020019import subprocess
Darryl Green10d9ce32018-02-28 10:02:55 +000020import sys
Gilles Peskineac9e7c02020-08-11 15:11:50 +020021try:
22 from typing import FrozenSet, Optional, Pattern # pylint: disable=unused-import
23except ImportError:
24 pass
Darryl Green10d9ce32018-02-28 10:02:55 +000025
Gilles Peskined9071e72022-09-18 21:17:09 +020026import scripts_path # pylint: disable=unused-import
David Horstmann9638ca32024-05-03 14:36:12 +010027from mbedtls_framework import build_tree
Gilles Peskined9071e72022-09-18 21:17:09 +020028
Darryl Green10d9ce32018-02-28 10:02:55 +000029
Gilles Peskine184c0962020-03-24 18:25:17 +010030class FileIssueTracker:
Gilles Peskine6ee576e2019-02-25 20:59:05 +010031 """Base class for file-wide issue tracking.
32
33 To implement a checker that processes a file as a whole, inherit from
Gilles Peskine1e9698a2019-02-25 21:10:04 +010034 this class and implement `check_file_for_issue` and define ``heading``.
35
Gilles Peskine05a51a82020-05-10 16:52:44 +020036 ``suffix_exemptions``: files whose name ends with a string in this set
Gilles Peskine1e9698a2019-02-25 21:10:04 +010037 will not be checked.
38
Gilles Peskine0598db82020-05-10 16:57:16 +020039 ``path_exemptions``: files whose path (relative to the root of the source
40 tree) matches this regular expression will not be checked. This can be
41 ``None`` to match no path. Paths are normalized and converted to ``/``
42 separators before matching.
43
Gilles Peskine1e9698a2019-02-25 21:10:04 +010044 ``heading``: human-readable description of the issue
Gilles Peskine6ee576e2019-02-25 20:59:05 +010045 """
Darryl Green10d9ce32018-02-28 10:02:55 +000046
Gilles Peskineac9e7c02020-08-11 15:11:50 +020047 suffix_exemptions = frozenset() #type: FrozenSet[str]
48 path_exemptions = None #type: Optional[Pattern[str]]
Gilles Peskine1e9698a2019-02-25 21:10:04 +010049 # heading must be defined in derived classes.
50 # pylint: disable=no-member
51
Darryl Green10d9ce32018-02-28 10:02:55 +000052 def __init__(self):
Darryl Green10d9ce32018-02-28 10:02:55 +000053 self.files_with_issues = {}
54
Gilles Peskine0598db82020-05-10 16:57:16 +020055 @staticmethod
56 def normalize_path(filepath):
Gilles Peskineeca95db2020-05-28 18:19:20 +020057 """Normalize ``filepath`` with / as the directory separator."""
Gilles Peskine0598db82020-05-10 16:57:16 +020058 filepath = os.path.normpath(filepath)
Gilles Peskineeca95db2020-05-28 18:19:20 +020059 # On Windows, we may have backslashes to separate directories.
60 # We need slashes to match exemption lists.
Gilles Peskine0598db82020-05-10 16:57:16 +020061 seps = os.path.sep
62 if os.path.altsep is not None:
63 seps += os.path.altsep
64 return '/'.join(filepath.split(seps))
65
Darryl Green10d9ce32018-02-28 10:02:55 +000066 def should_check_file(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +010067 """Whether the given file name should be checked.
68
Gilles Peskine05a51a82020-05-10 16:52:44 +020069 Files whose name ends with a string listed in ``self.suffix_exemptions``
70 or whose path matches ``self.path_exemptions`` will not be checked.
Gilles Peskineaaee4442020-03-24 16:49:21 +010071 """
Gilles Peskine05a51a82020-05-10 16:52:44 +020072 for files_exemption in self.suffix_exemptions:
Darryl Green10d9ce32018-02-28 10:02:55 +000073 if filepath.endswith(files_exemption):
74 return False
Gilles Peskine0598db82020-05-10 16:57:16 +020075 if self.path_exemptions and \
76 re.match(self.path_exemptions, self.normalize_path(filepath)):
77 return False
Darryl Green10d9ce32018-02-28 10:02:55 +000078 return True
79
Darryl Green10d9ce32018-02-28 10:02:55 +000080 def check_file_for_issue(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +010081 """Check the specified file for the issue that this class is for.
82
83 Subclasses must implement this method.
84 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +010085 raise NotImplementedError
Darryl Green10d9ce32018-02-28 10:02:55 +000086
Gilles Peskine04398052018-11-23 21:11:30 +010087 def record_issue(self, filepath, line_number):
Gilles Peskineaaee4442020-03-24 16:49:21 +010088 """Record that an issue was found at the specified location."""
Gilles Peskine04398052018-11-23 21:11:30 +010089 if filepath not in self.files_with_issues.keys():
90 self.files_with_issues[filepath] = []
91 self.files_with_issues[filepath].append(line_number)
92
Darryl Green10d9ce32018-02-28 10:02:55 +000093 def output_file_issues(self, logger):
Gilles Peskineaaee4442020-03-24 16:49:21 +010094 """Log all the locations where the issue was found."""
Darryl Green10d9ce32018-02-28 10:02:55 +000095 if self.files_with_issues.values():
96 logger.info(self.heading)
97 for filename, lines in sorted(self.files_with_issues.items()):
98 if lines:
99 logger.info("{}: {}".format(
100 filename, ", ".join(str(x) for x in lines)
101 ))
102 else:
103 logger.info(filename)
104 logger.info("")
105
Gilles Peskined4a853d2020-05-10 16:57:59 +0200106BINARY_FILE_PATH_RE_LIST = [
107 r'docs/.*\.pdf\Z',
Ryan Everettabd89772023-12-15 12:28:38 +0000108 r'docs/.*\.png\Z',
Gilles Peskined4a853d2020-05-10 16:57:59 +0200109 r'programs/fuzz/corpuses/[^.]+\Z',
David Horstmann9c4dd4e2024-06-11 17:44:00 +0100110 r'framework/data_files/[^.]+\Z',
111 r'framework/data_files/.*\.(crt|csr|db|der|key|pubkey)\Z',
112 r'framework/data_files/.*\.req\.[^/]+\Z',
113 r'framework/data_files/.*malformed[^/]+\Z',
114 r'framework/data_files/format_pkcs12\.fmt\Z',
115 r'framework/data_files/.*\.bin\Z',
Gilles Peskined4a853d2020-05-10 16:57:59 +0200116]
117BINARY_FILE_PATH_RE = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST))
118
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100119class LineIssueTracker(FileIssueTracker):
120 """Base class for line-by-line issue tracking.
Darryl Green10d9ce32018-02-28 10:02:55 +0000121
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100122 To implement a checker that processes files line by line, inherit from
123 this class and implement `line_with_issue`.
124 """
125
Gilles Peskined4a853d2020-05-10 16:57:59 +0200126 # Exclude binary files.
127 path_exemptions = BINARY_FILE_PATH_RE
128
Gilles Peskineb3897432023-01-05 20:28:30 +0100129 def issue_with_line(self, line, filepath, line_number):
Gilles Peskineaaee4442020-03-24 16:49:21 +0100130 """Check the specified line for the issue that this class is for.
131
132 Subclasses must implement this method.
133 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100134 raise NotImplementedError
135
136 def check_file_line(self, filepath, line, line_number):
Gilles Peskineb3897432023-01-05 20:28:30 +0100137 if self.issue_with_line(line, filepath, line_number):
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100138 self.record_issue(filepath, line_number)
139
140 def check_file_for_issue(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +0100141 """Check the lines of the specified file.
142
143 Subclasses must implement the ``issue_with_line`` method.
144 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100145 with open(filepath, "rb") as f:
146 for i, line in enumerate(iter(f.readline, b"")):
147 self.check_file_line(filepath, line, i + 1)
148
Gilles Peskine2c618732020-03-24 22:26:01 +0100149
150def is_windows_file(filepath):
151 _root, ext = os.path.splitext(filepath)
Gilles Peskined2df86f2020-05-10 17:36:51 +0200152 return ext in ('.bat', '.dsp', '.dsw', '.sln', '.vcxproj')
Gilles Peskine2c618732020-03-24 22:26:01 +0100153
154
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200155class ShebangIssueTracker(FileIssueTracker):
156 """Track files with a bad, missing or extraneous shebang line.
157
158 Executable scripts must start with a valid shebang (#!) line.
159 """
160
161 heading = "Invalid shebang line:"
162
163 # Allow either /bin/sh, /bin/bash, or /usr/bin/env.
164 # Allow at most one argument (this is a Linux limitation).
165 # For sh and bash, the argument if present must be options.
Shaun Case8b0ecbc2021-12-20 21:14:10 -0800166 # For env, the argument must be the base name of the interpreter.
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200167 _shebang_re = re.compile(rb'^#! ?(?:/bin/(bash|sh)(?: -[^\n ]*)?'
168 rb'|/usr/bin/env ([^\n /]+))$')
169 _extensions = {
170 b'bash': 'sh',
171 b'perl': 'pl',
172 b'python3': 'py',
173 b'sh': 'sh',
174 }
175
Dave Rodgman5c745fa2024-01-17 09:59:10 +0000176 path_exemptions = re.compile(r'tests/scripts/quiet/.*')
177
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200178 def is_valid_shebang(self, first_line, filepath):
179 m = re.match(self._shebang_re, first_line)
180 if not m:
181 return False
182 interpreter = m.group(1) or m.group(2)
183 if interpreter not in self._extensions:
184 return False
185 if not filepath.endswith('.' + self._extensions[interpreter]):
186 return False
187 return True
188
189 def check_file_for_issue(self, filepath):
190 is_executable = os.access(filepath, os.X_OK)
191 with open(filepath, "rb") as f:
192 first_line = f.readline()
193 if first_line.startswith(b'#!'):
194 if not is_executable:
195 # Shebang on a non-executable file
196 self.files_with_issues[filepath] = None
197 elif not self.is_valid_shebang(first_line, filepath):
198 self.files_with_issues[filepath] = [1]
199 elif is_executable:
200 # Executable without a shebang
201 self.files_with_issues[filepath] = None
202
203
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100204class EndOfFileNewlineIssueTracker(FileIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100205 """Track files that end with an incomplete line
206 (no newline character at the end of the last line)."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000207
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100208 heading = "Missing newline at end of file:"
Darryl Green10d9ce32018-02-28 10:02:55 +0000209
Gilles Peskined4a853d2020-05-10 16:57:59 +0200210 path_exemptions = BINARY_FILE_PATH_RE
211
Darryl Green10d9ce32018-02-28 10:02:55 +0000212 def check_file_for_issue(self, filepath):
213 with open(filepath, "rb") as f:
Gilles Peskine12b180a2020-05-10 17:36:42 +0200214 try:
215 f.seek(-1, 2)
216 except OSError:
217 # This script only works on regular files. If we can't seek
218 # 1 before the end, it means that this position is before
219 # the beginning of the file, i.e. that the file is empty.
220 return
221 if f.read(1) != b"\n":
Darryl Green10d9ce32018-02-28 10:02:55 +0000222 self.files_with_issues[filepath] = None
223
224
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100225class Utf8BomIssueTracker(FileIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100226 """Track files that start with a UTF-8 BOM.
227 Files should be ASCII or UTF-8. Valid UTF-8 does not start with a BOM."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000228
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100229 heading = "UTF-8 BOM present:"
Darryl Green10d9ce32018-02-28 10:02:55 +0000230
Gilles Peskine05a51a82020-05-10 16:52:44 +0200231 suffix_exemptions = frozenset([".vcxproj", ".sln"])
Gilles Peskined4a853d2020-05-10 16:57:59 +0200232 path_exemptions = BINARY_FILE_PATH_RE
Gilles Peskine2c618732020-03-24 22:26:01 +0100233
Darryl Green10d9ce32018-02-28 10:02:55 +0000234 def check_file_for_issue(self, filepath):
235 with open(filepath, "rb") as f:
236 if f.read().startswith(codecs.BOM_UTF8):
237 self.files_with_issues[filepath] = None
238
239
Gilles Peskined11bb472023-01-05 20:28:57 +0100240class UnicodeIssueTracker(LineIssueTracker):
241 """Track lines with invalid characters or invalid text encoding."""
242
243 heading = "Invalid UTF-8 or forbidden character:"
244
Aditya Deshpande15b6dd02023-01-30 13:46:58 +0000245 # Only allow valid UTF-8, and only other explicitly allowed characters.
Gilles Peskined11bb472023-01-05 20:28:57 +0100246 # We deliberately exclude all characters that aren't a simple non-blank,
247 # non-zero-width glyph, apart from a very small set (tab, ordinary space,
248 # line breaks, "basic" no-break space and soft hyphen). In particular,
249 # non-ASCII control characters, combinig characters, and Unicode state
250 # changes (e.g. right-to-left text) are forbidden.
251 # Note that we do allow some characters with a risk of visual confusion,
252 # for example '-' (U+002D HYPHEN-MINUS) vs '­' (U+00AD SOFT HYPHEN) vs
253 # '‐' (U+2010 HYPHEN), or 'A' (U+0041 LATIN CAPITAL LETTER A) vs
254 # 'Α' (U+0391 GREEK CAPITAL LETTER ALPHA).
255 GOOD_CHARACTERS = ''.join([
256 '\t\n\r -~', # ASCII (tabs and line endings are checked separately)
257 '\u00A0-\u00FF', # Latin-1 Supplement (for NO-BREAK SPACE and punctuation)
258 '\u2010-\u2027\u2030-\u205E', # General Punctuation (printable)
259 '\u2070\u2071\u2074-\u208E\u2090-\u209C', # Superscripts and Subscripts
260 '\u2190-\u21FF', # Arrows
261 '\u2200-\u22FF', # Mathematical Symbols
Aditya Deshpandeebb22692023-02-01 13:30:26 +0000262 '\u2500-\u257F' # Box Drawings characters used in markdown trees
Gilles Peskined11bb472023-01-05 20:28:57 +0100263 ])
264 # Allow any of the characters and ranges above, and anything classified
265 # as a word constituent.
266 GOOD_CHARACTERS_RE = re.compile(r'[\w{}]+\Z'.format(GOOD_CHARACTERS))
267
268 def issue_with_line(self, line, _filepath, line_number):
269 try:
270 text = line.decode('utf-8')
271 except UnicodeDecodeError:
272 return True
273 if line_number == 1 and text.startswith('\uFEFF'):
274 # Strip BOM (U+FEFF ZERO WIDTH NO-BREAK SPACE) at the beginning.
275 # Which files are allowed to have a BOM is handled in
276 # Utf8BomIssueTracker.
277 text = text[1:]
278 return not self.GOOD_CHARACTERS_RE.match(text)
279
Gilles Peskine2c618732020-03-24 22:26:01 +0100280class UnixLineEndingIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100281 """Track files with non-Unix line endings (i.e. files with CR)."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000282
Gilles Peskine2c618732020-03-24 22:26:01 +0100283 heading = "Non-Unix line endings:"
284
285 def should_check_file(self, filepath):
Gilles Peskine0598db82020-05-10 16:57:16 +0200286 if not super().should_check_file(filepath):
287 return False
Gilles Peskine2c618732020-03-24 22:26:01 +0100288 return not is_windows_file(filepath)
Darryl Green10d9ce32018-02-28 10:02:55 +0000289
Gilles Peskineb3897432023-01-05 20:28:30 +0100290 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000291 return b"\r" in line
292
293
Gilles Peskine545e13f2020-03-24 22:29:11 +0100294class WindowsLineEndingIssueTracker(LineIssueTracker):
Gilles Peskined703a2e2020-04-01 13:35:46 +0200295 """Track files with non-Windows line endings (i.e. CR or LF not in CRLF)."""
Gilles Peskine545e13f2020-03-24 22:29:11 +0100296
297 heading = "Non-Windows line endings:"
298
299 def should_check_file(self, filepath):
Gilles Peskine0598db82020-05-10 16:57:16 +0200300 if not super().should_check_file(filepath):
301 return False
Gilles Peskine545e13f2020-03-24 22:29:11 +0100302 return is_windows_file(filepath)
303
Gilles Peskineb3897432023-01-05 20:28:30 +0100304 def issue_with_line(self, line, _filepath, _line_number):
Gilles Peskined703a2e2020-04-01 13:35:46 +0200305 return not line.endswith(b"\r\n") or b"\r" in line[:-2]
Gilles Peskine545e13f2020-03-24 22:29:11 +0100306
307
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100308class TrailingWhitespaceIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100309 """Track lines with trailing whitespace."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000310
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100311 heading = "Trailing whitespace:"
Gilles Peskine05a51a82020-05-10 16:52:44 +0200312 suffix_exemptions = frozenset([".dsp", ".md"])
Darryl Green10d9ce32018-02-28 10:02:55 +0000313
Gilles Peskineb3897432023-01-05 20:28:30 +0100314 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000315 return line.rstrip(b"\r\n") != line.rstrip()
316
317
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100318class TabIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100319 """Track lines with tabs."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000320
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100321 heading = "Tabs present:"
Gilles Peskine05a51a82020-05-10 16:52:44 +0200322 suffix_exemptions = frozenset([
Gilles Peskine76022982023-12-22 15:28:07 +0100323 ".make",
Gilles Peskine344da1c2020-05-10 17:37:02 +0200324 ".pem", # some openssl dumps have tabs
Gilles Peskine2c618732020-03-24 22:26:01 +0100325 ".sln",
Gilles Peskine2aa63ea2024-03-04 11:08:19 +0100326 "/.gitmodules",
Gilles Peskine6e8d5a02020-03-24 22:01:28 +0100327 "/Makefile",
328 "/Makefile.inc",
329 "/generate_visualc_files.pl",
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100330 ])
Darryl Green10d9ce32018-02-28 10:02:55 +0000331
Gilles Peskineb3897432023-01-05 20:28:30 +0100332 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000333 return b"\t" in line
334
335
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100336class MergeArtifactIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100337 """Track lines with merge artifacts.
338 These are leftovers from a ``git merge`` that wasn't fully edited."""
Gilles Peskinec117d592018-11-23 21:11:52 +0100339
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100340 heading = "Merge artifact:"
Gilles Peskinec117d592018-11-23 21:11:52 +0100341
Gilles Peskineb3897432023-01-05 20:28:30 +0100342 def issue_with_line(self, line, _filepath, _line_number):
Gilles Peskinec117d592018-11-23 21:11:52 +0100343 # Detect leftover git conflict markers.
344 if line.startswith(b'<<<<<<< ') or line.startswith(b'>>>>>>> '):
345 return True
346 if line.startswith(b'||||||| '): # from merge.conflictStyle=diff3
347 return True
348 if line.rstrip(b'\r\n') == b'=======' and \
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100349 not _filepath.endswith('.md'):
Gilles Peskinec117d592018-11-23 21:11:52 +0100350 return True
351 return False
352
Darryl Green10d9ce32018-02-28 10:02:55 +0000353
Gilles Peskinece782002023-11-03 14:49:12 +0100354def this_location():
355 frame = inspect.currentframe()
356 assert frame is not None
357 info = inspect.getframeinfo(frame)
358 return os.path.basename(info.filename), info.lineno
359THIS_FILE_BASE_NAME, LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER = this_location()
360
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100361class LicenseIssueTracker(LineIssueTracker):
362 """Check copyright statements and license indications.
363
364 This class only checks that statements are correct if present. It does
365 not enforce the presence of statements in each file.
366 """
367
368 heading = "License issue:"
369
370 LICENSE_EXEMPTION_RE_LIST = [
371 # Third-party code, other than whitelisted third-party modules,
372 # may be under a different license.
373 r'3rdparty/(?!(p256-m)/.*)',
374 # Documentation explaining the license may have accidental
375 # false positives.
Ronald Cronc521bad2024-04-25 12:24:00 +0200376 r'(ChangeLog|LICENSE|framework\/LICENSE|[-0-9A-Z_a-z]+\.md)\Z',
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100377 # Files imported from TF-M, and not used except in test builds,
378 # may be under a different license.
Dave Rodgman1c910572023-12-08 17:58:44 +0000379 r'configs/ext/crypto_config_profile_medium\.h\Z',
380 r'configs/ext/tfm_mbedcrypto_config_profile_medium\.h\Z',
381 r'configs/ext/README\.md\Z',
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100382 # Third-party file.
383 r'dco\.txt\Z',
Ronald Cronc521bad2024-04-25 12:24:00 +0200384 r'framework\/dco\.txt\Z',
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100385 ]
386 path_exemptions = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST +
387 LICENSE_EXEMPTION_RE_LIST))
388
389 COPYRIGHT_HOLDER = rb'The Mbed TLS Contributors'
390 # Catch "Copyright foo", "Copyright (C) foo", "Copyright © foo", etc.
391 COPYRIGHT_RE = re.compile(rb'.*\bcopyright\s+((?:\w|\s|[()]|[^ -~])*\w)', re.I)
392
393 SPDX_HEADER_KEY = b'SPDX-License-Identifier'
394 LICENSE_IDENTIFIER = b'Apache-2.0 OR GPL-2.0-or-later'
395 SPDX_RE = re.compile(br'.*?(' +
396 re.escape(SPDX_HEADER_KEY) +
397 br')(:\s*(.*?)\W*\Z|.*)', re.I)
398
Gilles Peskine3b9facd2023-11-03 14:35:28 +0100399 LICENSE_MENTION_RE = re.compile(rb'.*(?:' + rb'|'.join([
400 rb'Apache License',
401 rb'General Public License',
402 ]) + rb')', re.I)
403
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100404 def __init__(self):
405 super().__init__()
406 # Record what problem was caused. We can't easily report it due to
407 # the structure of the script. To be fixed after
408 # https://github.com/Mbed-TLS/mbedtls/pull/2506
409 self.problem = None
410
411 def issue_with_line(self, line, filepath, line_number):
Gilles Peskine3b9facd2023-11-03 14:35:28 +0100412 #pylint: disable=too-many-return-statements
413
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100414 # Use endswith() rather than the more correct os.path.basename()
415 # because experimentally, it makes a significant difference to
416 # the running time.
417 if filepath.endswith(THIS_FILE_BASE_NAME) and \
418 line_number > LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER:
419 # Avoid false positives from the code in this class.
420 # Also skip the rest of this file, which is highly unlikely to
421 # contain any problematic statements since we put those near the
422 # top of files.
423 return False
424
425 m = self.COPYRIGHT_RE.match(line)
426 if m and m.group(1) != self.COPYRIGHT_HOLDER:
427 self.problem = 'Invalid copyright line'
428 return True
429
430 m = self.SPDX_RE.match(line)
431 if m:
432 if m.group(1) != self.SPDX_HEADER_KEY:
433 self.problem = 'Misspelled ' + self.SPDX_HEADER_KEY.decode()
434 return True
435 if not m.group(3):
436 self.problem = 'Improperly formatted SPDX license identifier'
437 return True
438 if m.group(3) != self.LICENSE_IDENTIFIER:
439 self.problem = 'Wrong SPDX license identifier'
440 return True
Gilles Peskine3b9facd2023-11-03 14:35:28 +0100441
442 m = self.LICENSE_MENTION_RE.match(line)
443 if m:
444 self.problem = 'Suspicious license mention'
445 return True
446
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100447 return False
448
449
Gilles Peskine184c0962020-03-24 18:25:17 +0100450class IntegrityChecker:
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100451 """Sanity-check files under the current directory."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000452
453 def __init__(self, log_file):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100454 """Instantiate the sanity checker.
455 Check files under the current directory.
456 Write a report of issues to log_file."""
Gilles Peskined9071e72022-09-18 21:17:09 +0200457 build_tree.check_repo_path()
Darryl Green10d9ce32018-02-28 10:02:55 +0000458 self.logger = None
459 self.setup_logger(log_file)
Darryl Green10d9ce32018-02-28 10:02:55 +0000460 self.issues_to_check = [
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200461 ShebangIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000462 EndOfFileNewlineIssueTracker(),
463 Utf8BomIssueTracker(),
Gilles Peskined11bb472023-01-05 20:28:57 +0100464 UnicodeIssueTracker(),
Gilles Peskine2c618732020-03-24 22:26:01 +0100465 UnixLineEndingIssueTracker(),
Gilles Peskine545e13f2020-03-24 22:29:11 +0100466 WindowsLineEndingIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000467 TrailingWhitespaceIssueTracker(),
468 TabIssueTracker(),
Gilles Peskinec117d592018-11-23 21:11:52 +0100469 MergeArtifactIssueTracker(),
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100470 LicenseIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000471 ]
472
Darryl Green10d9ce32018-02-28 10:02:55 +0000473 def setup_logger(self, log_file, level=logging.INFO):
Gilles Peskinede047b02024-03-04 11:51:31 +0100474 """Log to log_file if provided, or to stderr if None."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000475 self.logger = logging.getLogger()
476 self.logger.setLevel(level)
477 if log_file:
478 handler = logging.FileHandler(log_file)
479 self.logger.addHandler(handler)
480 else:
481 console = logging.StreamHandler()
482 self.logger.addHandler(console)
483
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200484 @staticmethod
485 def collect_files():
Gilles Peskinede047b02024-03-04 11:51:31 +0100486 """Return the list of files to check.
487
488 These are the regular files commited into Git.
489 """
Ronald Cron22a092b2024-05-03 10:12:01 +0200490 bytes_output = subprocess.check_output(['git', '-C', 'framework',
491 'ls-files', '-z'])
492 bytes_framework_filepaths = bytes_output.split(b'\0')[:-1]
493 bytes_framework_filepaths = ["framework/".encode() + filepath
494 for filepath in bytes_framework_filepaths]
495
496 bytes_output = subprocess.check_output(['git', 'ls-files', '-z'])
497 bytes_filepaths = bytes_output.split(b'\0')[:-1] + \
498 bytes_framework_filepaths
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200499 ascii_filepaths = map(lambda fp: fp.decode('ascii'), bytes_filepaths)
Ronald Cron22a092b2024-05-03 10:12:01 +0200500
Gilles Peskine2aa63ea2024-03-04 11:08:19 +0100501 # Filter out directories. Normally Git doesn't list directories
502 # (it only knows about the files inside them), but there is
503 # at least one case where 'git ls-files' includes a directory:
504 # submodules. Just skip submodules (and any other directories).
505 ascii_filepaths = [fp for fp in ascii_filepaths
506 if os.path.isfile(fp)]
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200507 # Prepend './' to files in the top-level directory so that
508 # something like `'/Makefile' in fp` matches in the top-level
509 # directory as well as in subdirectories.
510 return [fp if os.path.dirname(fp) else os.path.join(os.curdir, fp)
511 for fp in ascii_filepaths]
Gilles Peskine95c55752018-09-28 11:48:10 +0200512
Darryl Green10d9ce32018-02-28 10:02:55 +0000513 def check_files(self):
Gilles Peskinede047b02024-03-04 11:51:31 +0100514 """Check all files for all issues."""
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200515 for issue_to_check in self.issues_to_check:
516 for filepath in self.collect_files():
517 if issue_to_check.should_check_file(filepath):
518 issue_to_check.check_file_for_issue(filepath)
Darryl Green10d9ce32018-02-28 10:02:55 +0000519
520 def output_issues(self):
Gilles Peskinede047b02024-03-04 11:51:31 +0100521 """Log the issues found and their locations.
522
523 Return 1 if there were issues, 0 otherwise.
524 """
Darryl Green10d9ce32018-02-28 10:02:55 +0000525 integrity_return_code = 0
526 for issue_to_check in self.issues_to_check:
527 if issue_to_check.files_with_issues:
528 integrity_return_code = 1
529 issue_to_check.output_file_issues(self.logger)
530 return integrity_return_code
531
532
533def run_main():
Gilles Peskine7dfcfce2019-07-04 19:31:02 +0200534 parser = argparse.ArgumentParser(description=__doc__)
Darryl Green10d9ce32018-02-28 10:02:55 +0000535 parser.add_argument(
536 "-l", "--log_file", type=str, help="path to optional output log",
537 )
538 check_args = parser.parse_args()
539 integrity_check = IntegrityChecker(check_args.log_file)
540 integrity_check.check_files()
541 return_code = integrity_check.output_issues()
542 sys.exit(return_code)
543
544
545if __name__ == "__main__":
546 run_main()