blob: f6f6d6c713ff01b4152b51b821a83285beb0fc2e [file] [log] [blame]
Darryl Green10d9ce32018-02-28 10:02:55 +00001#!/usr/bin/env python3
Gilles Peskine7dfcfce2019-07-04 19:31:02 +02002
Bence Szépkúti1e148272020-08-07 13:07:28 +02003# Copyright The Mbed TLS Contributors
Dave Rodgman16799db2023-11-02 19:47:20 +00004# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
Gilles Peskine7dfcfce2019-07-04 19:31:02 +02005
Darryl Green10d9ce32018-02-28 10:02:55 +00006"""
Darryl Green10d9ce32018-02-28 10:02:55 +00007This script checks the current state of the source code for minor issues,
8including incorrect file permissions, presence of tabs, non-Unix line endings,
Gilles Peskine55b49ee2019-07-04 19:31:33 +02009trailing whitespace, and presence of UTF-8 BOM.
Darryl Green10d9ce32018-02-28 10:02:55 +000010Note: requires python 3, must be run from Mbed TLS root.
11"""
12
Darryl Green10d9ce32018-02-28 10:02:55 +000013import argparse
Darryl Green10d9ce32018-02-28 10:02:55 +000014import codecs
Gilles Peskinef2fb9f62023-11-03 14:13:55 +010015import inspect
Gilles Peskine990030b2023-11-03 13:55:00 +010016import logging
17import os
Gilles Peskine0598db82020-05-10 16:57:16 +020018import re
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +020019import subprocess
Darryl Green10d9ce32018-02-28 10:02:55 +000020import sys
Gilles Peskineac9e7c02020-08-11 15:11:50 +020021try:
22 from typing import FrozenSet, Optional, Pattern # pylint: disable=unused-import
23except ImportError:
24 pass
Darryl Green10d9ce32018-02-28 10:02:55 +000025
Gilles Peskined9071e72022-09-18 21:17:09 +020026import scripts_path # pylint: disable=unused-import
27from mbedtls_dev import build_tree
28
Darryl Green10d9ce32018-02-28 10:02:55 +000029
Gilles Peskine184c0962020-03-24 18:25:17 +010030class FileIssueTracker:
Gilles Peskine6ee576e2019-02-25 20:59:05 +010031 """Base class for file-wide issue tracking.
32
33 To implement a checker that processes a file as a whole, inherit from
Gilles Peskine1e9698a2019-02-25 21:10:04 +010034 this class and implement `check_file_for_issue` and define ``heading``.
35
Gilles Peskine05a51a82020-05-10 16:52:44 +020036 ``suffix_exemptions``: files whose name ends with a string in this set
Gilles Peskine1e9698a2019-02-25 21:10:04 +010037 will not be checked.
38
Gilles Peskine0598db82020-05-10 16:57:16 +020039 ``path_exemptions``: files whose path (relative to the root of the source
40 tree) matches this regular expression will not be checked. This can be
41 ``None`` to match no path. Paths are normalized and converted to ``/``
42 separators before matching.
43
Gilles Peskine1e9698a2019-02-25 21:10:04 +010044 ``heading``: human-readable description of the issue
Gilles Peskine6ee576e2019-02-25 20:59:05 +010045 """
Darryl Green10d9ce32018-02-28 10:02:55 +000046
Gilles Peskineac9e7c02020-08-11 15:11:50 +020047 suffix_exemptions = frozenset() #type: FrozenSet[str]
48 path_exemptions = None #type: Optional[Pattern[str]]
Gilles Peskine1e9698a2019-02-25 21:10:04 +010049 # heading must be defined in derived classes.
50 # pylint: disable=no-member
51
Darryl Green10d9ce32018-02-28 10:02:55 +000052 def __init__(self):
Darryl Green10d9ce32018-02-28 10:02:55 +000053 self.files_with_issues = {}
54
Gilles Peskine0598db82020-05-10 16:57:16 +020055 @staticmethod
56 def normalize_path(filepath):
Gilles Peskineeca95db2020-05-28 18:19:20 +020057 """Normalize ``filepath`` with / as the directory separator."""
Gilles Peskine0598db82020-05-10 16:57:16 +020058 filepath = os.path.normpath(filepath)
Gilles Peskineeca95db2020-05-28 18:19:20 +020059 # On Windows, we may have backslashes to separate directories.
60 # We need slashes to match exemption lists.
Gilles Peskine0598db82020-05-10 16:57:16 +020061 seps = os.path.sep
62 if os.path.altsep is not None:
63 seps += os.path.altsep
64 return '/'.join(filepath.split(seps))
65
Darryl Green10d9ce32018-02-28 10:02:55 +000066 def should_check_file(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +010067 """Whether the given file name should be checked.
68
Gilles Peskine05a51a82020-05-10 16:52:44 +020069 Files whose name ends with a string listed in ``self.suffix_exemptions``
70 or whose path matches ``self.path_exemptions`` will not be checked.
Gilles Peskineaaee4442020-03-24 16:49:21 +010071 """
Gilles Peskine05a51a82020-05-10 16:52:44 +020072 for files_exemption in self.suffix_exemptions:
Darryl Green10d9ce32018-02-28 10:02:55 +000073 if filepath.endswith(files_exemption):
74 return False
Gilles Peskine0598db82020-05-10 16:57:16 +020075 if self.path_exemptions and \
76 re.match(self.path_exemptions, self.normalize_path(filepath)):
77 return False
Darryl Green10d9ce32018-02-28 10:02:55 +000078 return True
79
Darryl Green10d9ce32018-02-28 10:02:55 +000080 def check_file_for_issue(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +010081 """Check the specified file for the issue that this class is for.
82
83 Subclasses must implement this method.
84 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +010085 raise NotImplementedError
Darryl Green10d9ce32018-02-28 10:02:55 +000086
Gilles Peskine04398052018-11-23 21:11:30 +010087 def record_issue(self, filepath, line_number):
Gilles Peskineaaee4442020-03-24 16:49:21 +010088 """Record that an issue was found at the specified location."""
Gilles Peskine04398052018-11-23 21:11:30 +010089 if filepath not in self.files_with_issues.keys():
90 self.files_with_issues[filepath] = []
91 self.files_with_issues[filepath].append(line_number)
92
Darryl Green10d9ce32018-02-28 10:02:55 +000093 def output_file_issues(self, logger):
Gilles Peskineaaee4442020-03-24 16:49:21 +010094 """Log all the locations where the issue was found."""
Darryl Green10d9ce32018-02-28 10:02:55 +000095 if self.files_with_issues.values():
96 logger.info(self.heading)
97 for filename, lines in sorted(self.files_with_issues.items()):
98 if lines:
99 logger.info("{}: {}".format(
100 filename, ", ".join(str(x) for x in lines)
101 ))
102 else:
103 logger.info(filename)
104 logger.info("")
105
Gilles Peskined4a853d2020-05-10 16:57:59 +0200106BINARY_FILE_PATH_RE_LIST = [
107 r'docs/.*\.pdf\Z',
Ryan Everettabd89772023-12-15 12:28:38 +0000108 r'docs/.*\.png\Z',
Gilles Peskined4a853d2020-05-10 16:57:59 +0200109 r'programs/fuzz/corpuses/[^.]+\Z',
110 r'tests/data_files/[^.]+\Z',
111 r'tests/data_files/.*\.(crt|csr|db|der|key|pubkey)\Z',
112 r'tests/data_files/.*\.req\.[^/]+\Z',
113 r'tests/data_files/.*malformed[^/]+\Z',
114 r'tests/data_files/format_pkcs12\.fmt\Z',
Gilles Peskine0ed9e782023-01-05 20:27:18 +0100115 r'tests/data_files/.*\.bin\Z',
Gilles Peskined4a853d2020-05-10 16:57:59 +0200116]
117BINARY_FILE_PATH_RE = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST))
118
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100119class LineIssueTracker(FileIssueTracker):
120 """Base class for line-by-line issue tracking.
Darryl Green10d9ce32018-02-28 10:02:55 +0000121
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100122 To implement a checker that processes files line by line, inherit from
123 this class and implement `line_with_issue`.
124 """
125
Gilles Peskined4a853d2020-05-10 16:57:59 +0200126 # Exclude binary files.
127 path_exemptions = BINARY_FILE_PATH_RE
128
Gilles Peskineb3897432023-01-05 20:28:30 +0100129 def issue_with_line(self, line, filepath, line_number):
Gilles Peskineaaee4442020-03-24 16:49:21 +0100130 """Check the specified line for the issue that this class is for.
131
132 Subclasses must implement this method.
133 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100134 raise NotImplementedError
135
136 def check_file_line(self, filepath, line, line_number):
Gilles Peskineb3897432023-01-05 20:28:30 +0100137 if self.issue_with_line(line, filepath, line_number):
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100138 self.record_issue(filepath, line_number)
139
140 def check_file_for_issue(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +0100141 """Check the lines of the specified file.
142
143 Subclasses must implement the ``issue_with_line`` method.
144 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100145 with open(filepath, "rb") as f:
146 for i, line in enumerate(iter(f.readline, b"")):
147 self.check_file_line(filepath, line, i + 1)
148
Gilles Peskine2c618732020-03-24 22:26:01 +0100149
150def is_windows_file(filepath):
151 _root, ext = os.path.splitext(filepath)
Gilles Peskined2df86f2020-05-10 17:36:51 +0200152 return ext in ('.bat', '.dsp', '.dsw', '.sln', '.vcxproj')
Gilles Peskine2c618732020-03-24 22:26:01 +0100153
154
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200155class ShebangIssueTracker(FileIssueTracker):
156 """Track files with a bad, missing or extraneous shebang line.
157
158 Executable scripts must start with a valid shebang (#!) line.
159 """
160
161 heading = "Invalid shebang line:"
162
163 # Allow either /bin/sh, /bin/bash, or /usr/bin/env.
164 # Allow at most one argument (this is a Linux limitation).
165 # For sh and bash, the argument if present must be options.
Shaun Case8b0ecbc2021-12-20 21:14:10 -0800166 # For env, the argument must be the base name of the interpreter.
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200167 _shebang_re = re.compile(rb'^#! ?(?:/bin/(bash|sh)(?: -[^\n ]*)?'
168 rb'|/usr/bin/env ([^\n /]+))$')
169 _extensions = {
170 b'bash': 'sh',
171 b'perl': 'pl',
172 b'python3': 'py',
173 b'sh': 'sh',
174 }
175
176 def is_valid_shebang(self, first_line, filepath):
177 m = re.match(self._shebang_re, first_line)
178 if not m:
179 return False
180 interpreter = m.group(1) or m.group(2)
181 if interpreter not in self._extensions:
182 return False
183 if not filepath.endswith('.' + self._extensions[interpreter]):
184 return False
185 return True
186
187 def check_file_for_issue(self, filepath):
188 is_executable = os.access(filepath, os.X_OK)
189 with open(filepath, "rb") as f:
190 first_line = f.readline()
191 if first_line.startswith(b'#!'):
192 if not is_executable:
193 # Shebang on a non-executable file
194 self.files_with_issues[filepath] = None
195 elif not self.is_valid_shebang(first_line, filepath):
196 self.files_with_issues[filepath] = [1]
197 elif is_executable:
198 # Executable without a shebang
199 self.files_with_issues[filepath] = None
200
201
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100202class EndOfFileNewlineIssueTracker(FileIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100203 """Track files that end with an incomplete line
204 (no newline character at the end of the last line)."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000205
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100206 heading = "Missing newline at end of file:"
Darryl Green10d9ce32018-02-28 10:02:55 +0000207
Gilles Peskined4a853d2020-05-10 16:57:59 +0200208 path_exemptions = BINARY_FILE_PATH_RE
209
Darryl Green10d9ce32018-02-28 10:02:55 +0000210 def check_file_for_issue(self, filepath):
211 with open(filepath, "rb") as f:
Gilles Peskine12b180a2020-05-10 17:36:42 +0200212 try:
213 f.seek(-1, 2)
214 except OSError:
215 # This script only works on regular files. If we can't seek
216 # 1 before the end, it means that this position is before
217 # the beginning of the file, i.e. that the file is empty.
218 return
219 if f.read(1) != b"\n":
Darryl Green10d9ce32018-02-28 10:02:55 +0000220 self.files_with_issues[filepath] = None
221
222
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100223class Utf8BomIssueTracker(FileIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100224 """Track files that start with a UTF-8 BOM.
225 Files should be ASCII or UTF-8. Valid UTF-8 does not start with a BOM."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000226
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100227 heading = "UTF-8 BOM present:"
Darryl Green10d9ce32018-02-28 10:02:55 +0000228
Gilles Peskine05a51a82020-05-10 16:52:44 +0200229 suffix_exemptions = frozenset([".vcxproj", ".sln"])
Gilles Peskined4a853d2020-05-10 16:57:59 +0200230 path_exemptions = BINARY_FILE_PATH_RE
Gilles Peskine2c618732020-03-24 22:26:01 +0100231
Darryl Green10d9ce32018-02-28 10:02:55 +0000232 def check_file_for_issue(self, filepath):
233 with open(filepath, "rb") as f:
234 if f.read().startswith(codecs.BOM_UTF8):
235 self.files_with_issues[filepath] = None
236
237
Gilles Peskined11bb472023-01-05 20:28:57 +0100238class UnicodeIssueTracker(LineIssueTracker):
239 """Track lines with invalid characters or invalid text encoding."""
240
241 heading = "Invalid UTF-8 or forbidden character:"
242
Aditya Deshpande15b6dd02023-01-30 13:46:58 +0000243 # Only allow valid UTF-8, and only other explicitly allowed characters.
Gilles Peskined11bb472023-01-05 20:28:57 +0100244 # We deliberately exclude all characters that aren't a simple non-blank,
245 # non-zero-width glyph, apart from a very small set (tab, ordinary space,
246 # line breaks, "basic" no-break space and soft hyphen). In particular,
247 # non-ASCII control characters, combinig characters, and Unicode state
248 # changes (e.g. right-to-left text) are forbidden.
249 # Note that we do allow some characters with a risk of visual confusion,
250 # for example '-' (U+002D HYPHEN-MINUS) vs '­' (U+00AD SOFT HYPHEN) vs
251 # '‐' (U+2010 HYPHEN), or 'A' (U+0041 LATIN CAPITAL LETTER A) vs
252 # 'Α' (U+0391 GREEK CAPITAL LETTER ALPHA).
253 GOOD_CHARACTERS = ''.join([
254 '\t\n\r -~', # ASCII (tabs and line endings are checked separately)
255 '\u00A0-\u00FF', # Latin-1 Supplement (for NO-BREAK SPACE and punctuation)
256 '\u2010-\u2027\u2030-\u205E', # General Punctuation (printable)
257 '\u2070\u2071\u2074-\u208E\u2090-\u209C', # Superscripts and Subscripts
258 '\u2190-\u21FF', # Arrows
259 '\u2200-\u22FF', # Mathematical Symbols
Aditya Deshpandeebb22692023-02-01 13:30:26 +0000260 '\u2500-\u257F' # Box Drawings characters used in markdown trees
Gilles Peskined11bb472023-01-05 20:28:57 +0100261 ])
262 # Allow any of the characters and ranges above, and anything classified
263 # as a word constituent.
264 GOOD_CHARACTERS_RE = re.compile(r'[\w{}]+\Z'.format(GOOD_CHARACTERS))
265
266 def issue_with_line(self, line, _filepath, line_number):
267 try:
268 text = line.decode('utf-8')
269 except UnicodeDecodeError:
270 return True
271 if line_number == 1 and text.startswith('\uFEFF'):
272 # Strip BOM (U+FEFF ZERO WIDTH NO-BREAK SPACE) at the beginning.
273 # Which files are allowed to have a BOM is handled in
274 # Utf8BomIssueTracker.
275 text = text[1:]
276 return not self.GOOD_CHARACTERS_RE.match(text)
277
Gilles Peskine2c618732020-03-24 22:26:01 +0100278class UnixLineEndingIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100279 """Track files with non-Unix line endings (i.e. files with CR)."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000280
Gilles Peskine2c618732020-03-24 22:26:01 +0100281 heading = "Non-Unix line endings:"
282
283 def should_check_file(self, filepath):
Gilles Peskine0598db82020-05-10 16:57:16 +0200284 if not super().should_check_file(filepath):
285 return False
Gilles Peskine2c618732020-03-24 22:26:01 +0100286 return not is_windows_file(filepath)
Darryl Green10d9ce32018-02-28 10:02:55 +0000287
Gilles Peskineb3897432023-01-05 20:28:30 +0100288 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000289 return b"\r" in line
290
291
Gilles Peskine545e13f2020-03-24 22:29:11 +0100292class WindowsLineEndingIssueTracker(LineIssueTracker):
Gilles Peskined703a2e2020-04-01 13:35:46 +0200293 """Track files with non-Windows line endings (i.e. CR or LF not in CRLF)."""
Gilles Peskine545e13f2020-03-24 22:29:11 +0100294
295 heading = "Non-Windows line endings:"
296
297 def should_check_file(self, filepath):
Gilles Peskine0598db82020-05-10 16:57:16 +0200298 if not super().should_check_file(filepath):
299 return False
Gilles Peskine545e13f2020-03-24 22:29:11 +0100300 return is_windows_file(filepath)
301
Gilles Peskineb3897432023-01-05 20:28:30 +0100302 def issue_with_line(self, line, _filepath, _line_number):
Gilles Peskined703a2e2020-04-01 13:35:46 +0200303 return not line.endswith(b"\r\n") or b"\r" in line[:-2]
Gilles Peskine545e13f2020-03-24 22:29:11 +0100304
305
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100306class TrailingWhitespaceIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100307 """Track lines with trailing whitespace."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000308
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100309 heading = "Trailing whitespace:"
Gilles Peskine05a51a82020-05-10 16:52:44 +0200310 suffix_exemptions = frozenset([".dsp", ".md"])
Darryl Green10d9ce32018-02-28 10:02:55 +0000311
Gilles Peskineb3897432023-01-05 20:28:30 +0100312 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000313 return line.rstrip(b"\r\n") != line.rstrip()
314
315
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100316class TabIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100317 """Track lines with tabs."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000318
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100319 heading = "Tabs present:"
Gilles Peskine05a51a82020-05-10 16:52:44 +0200320 suffix_exemptions = frozenset([
Gilles Peskine76022982023-12-22 15:28:07 +0100321 ".make",
Gilles Peskine344da1c2020-05-10 17:37:02 +0200322 ".pem", # some openssl dumps have tabs
Gilles Peskine2c618732020-03-24 22:26:01 +0100323 ".sln",
Gilles Peskine6e8d5a02020-03-24 22:01:28 +0100324 "/Makefile",
325 "/Makefile.inc",
326 "/generate_visualc_files.pl",
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100327 ])
Darryl Green10d9ce32018-02-28 10:02:55 +0000328
Gilles Peskineb3897432023-01-05 20:28:30 +0100329 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000330 return b"\t" in line
331
332
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100333class MergeArtifactIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100334 """Track lines with merge artifacts.
335 These are leftovers from a ``git merge`` that wasn't fully edited."""
Gilles Peskinec117d592018-11-23 21:11:52 +0100336
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100337 heading = "Merge artifact:"
Gilles Peskinec117d592018-11-23 21:11:52 +0100338
Gilles Peskineb3897432023-01-05 20:28:30 +0100339 def issue_with_line(self, line, _filepath, _line_number):
Gilles Peskinec117d592018-11-23 21:11:52 +0100340 # Detect leftover git conflict markers.
341 if line.startswith(b'<<<<<<< ') or line.startswith(b'>>>>>>> '):
342 return True
343 if line.startswith(b'||||||| '): # from merge.conflictStyle=diff3
344 return True
345 if line.rstrip(b'\r\n') == b'=======' and \
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100346 not _filepath.endswith('.md'):
Gilles Peskinec117d592018-11-23 21:11:52 +0100347 return True
348 return False
349
Darryl Green10d9ce32018-02-28 10:02:55 +0000350
Gilles Peskinece782002023-11-03 14:49:12 +0100351def this_location():
352 frame = inspect.currentframe()
353 assert frame is not None
354 info = inspect.getframeinfo(frame)
355 return os.path.basename(info.filename), info.lineno
356THIS_FILE_BASE_NAME, LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER = this_location()
357
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100358class LicenseIssueTracker(LineIssueTracker):
359 """Check copyright statements and license indications.
360
361 This class only checks that statements are correct if present. It does
362 not enforce the presence of statements in each file.
363 """
364
365 heading = "License issue:"
366
367 LICENSE_EXEMPTION_RE_LIST = [
368 # Third-party code, other than whitelisted third-party modules,
369 # may be under a different license.
370 r'3rdparty/(?!(p256-m)/.*)',
371 # Documentation explaining the license may have accidental
372 # false positives.
373 r'(ChangeLog|LICENSE|[-0-9A-Z_a-z]+\.md)\Z',
374 # Files imported from TF-M, and not used except in test builds,
375 # may be under a different license.
376 r'configs/crypto_config_profile_medium\.h\Z',
377 r'configs/tfm_mbedcrypto_config_profile_medium\.h\Z',
378 # Third-party file.
379 r'dco\.txt\Z',
380 ]
381 path_exemptions = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST +
382 LICENSE_EXEMPTION_RE_LIST))
383
384 COPYRIGHT_HOLDER = rb'The Mbed TLS Contributors'
385 # Catch "Copyright foo", "Copyright (C) foo", "Copyright © foo", etc.
386 COPYRIGHT_RE = re.compile(rb'.*\bcopyright\s+((?:\w|\s|[()]|[^ -~])*\w)', re.I)
387
388 SPDX_HEADER_KEY = b'SPDX-License-Identifier'
389 LICENSE_IDENTIFIER = b'Apache-2.0 OR GPL-2.0-or-later'
390 SPDX_RE = re.compile(br'.*?(' +
391 re.escape(SPDX_HEADER_KEY) +
392 br')(:\s*(.*?)\W*\Z|.*)', re.I)
393
Gilles Peskine3b9facd2023-11-03 14:35:28 +0100394 LICENSE_MENTION_RE = re.compile(rb'.*(?:' + rb'|'.join([
395 rb'Apache License',
396 rb'General Public License',
397 ]) + rb')', re.I)
398
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100399 def __init__(self):
400 super().__init__()
401 # Record what problem was caused. We can't easily report it due to
402 # the structure of the script. To be fixed after
403 # https://github.com/Mbed-TLS/mbedtls/pull/2506
404 self.problem = None
405
406 def issue_with_line(self, line, filepath, line_number):
Gilles Peskine3b9facd2023-11-03 14:35:28 +0100407 #pylint: disable=too-many-return-statements
408
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100409 # Use endswith() rather than the more correct os.path.basename()
410 # because experimentally, it makes a significant difference to
411 # the running time.
412 if filepath.endswith(THIS_FILE_BASE_NAME) and \
413 line_number > LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER:
414 # Avoid false positives from the code in this class.
415 # Also skip the rest of this file, which is highly unlikely to
416 # contain any problematic statements since we put those near the
417 # top of files.
418 return False
419
420 m = self.COPYRIGHT_RE.match(line)
421 if m and m.group(1) != self.COPYRIGHT_HOLDER:
422 self.problem = 'Invalid copyright line'
423 return True
424
425 m = self.SPDX_RE.match(line)
426 if m:
427 if m.group(1) != self.SPDX_HEADER_KEY:
428 self.problem = 'Misspelled ' + self.SPDX_HEADER_KEY.decode()
429 return True
430 if not m.group(3):
431 self.problem = 'Improperly formatted SPDX license identifier'
432 return True
433 if m.group(3) != self.LICENSE_IDENTIFIER:
434 self.problem = 'Wrong SPDX license identifier'
435 return True
Gilles Peskine3b9facd2023-11-03 14:35:28 +0100436
437 m = self.LICENSE_MENTION_RE.match(line)
438 if m:
439 self.problem = 'Suspicious license mention'
440 return True
441
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100442 return False
443
444
Gilles Peskine184c0962020-03-24 18:25:17 +0100445class IntegrityChecker:
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100446 """Sanity-check files under the current directory."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000447
448 def __init__(self, log_file):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100449 """Instantiate the sanity checker.
450 Check files under the current directory.
451 Write a report of issues to log_file."""
Gilles Peskined9071e72022-09-18 21:17:09 +0200452 build_tree.check_repo_path()
Darryl Green10d9ce32018-02-28 10:02:55 +0000453 self.logger = None
454 self.setup_logger(log_file)
Darryl Green10d9ce32018-02-28 10:02:55 +0000455 self.issues_to_check = [
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200456 ShebangIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000457 EndOfFileNewlineIssueTracker(),
458 Utf8BomIssueTracker(),
Gilles Peskined11bb472023-01-05 20:28:57 +0100459 UnicodeIssueTracker(),
Gilles Peskine2c618732020-03-24 22:26:01 +0100460 UnixLineEndingIssueTracker(),
Gilles Peskine545e13f2020-03-24 22:29:11 +0100461 WindowsLineEndingIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000462 TrailingWhitespaceIssueTracker(),
463 TabIssueTracker(),
Gilles Peskinec117d592018-11-23 21:11:52 +0100464 MergeArtifactIssueTracker(),
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100465 LicenseIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000466 ]
467
Darryl Green10d9ce32018-02-28 10:02:55 +0000468 def setup_logger(self, log_file, level=logging.INFO):
469 self.logger = logging.getLogger()
470 self.logger.setLevel(level)
471 if log_file:
472 handler = logging.FileHandler(log_file)
473 self.logger.addHandler(handler)
474 else:
475 console = logging.StreamHandler()
476 self.logger.addHandler(console)
477
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200478 @staticmethod
479 def collect_files():
480 bytes_output = subprocess.check_output(['git', 'ls-files', '-z'])
481 bytes_filepaths = bytes_output.split(b'\0')[:-1]
482 ascii_filepaths = map(lambda fp: fp.decode('ascii'), bytes_filepaths)
483 # Prepend './' to files in the top-level directory so that
484 # something like `'/Makefile' in fp` matches in the top-level
485 # directory as well as in subdirectories.
486 return [fp if os.path.dirname(fp) else os.path.join(os.curdir, fp)
487 for fp in ascii_filepaths]
Gilles Peskine95c55752018-09-28 11:48:10 +0200488
Darryl Green10d9ce32018-02-28 10:02:55 +0000489 def check_files(self):
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200490 for issue_to_check in self.issues_to_check:
491 for filepath in self.collect_files():
492 if issue_to_check.should_check_file(filepath):
493 issue_to_check.check_file_for_issue(filepath)
Darryl Green10d9ce32018-02-28 10:02:55 +0000494
495 def output_issues(self):
496 integrity_return_code = 0
497 for issue_to_check in self.issues_to_check:
498 if issue_to_check.files_with_issues:
499 integrity_return_code = 1
500 issue_to_check.output_file_issues(self.logger)
501 return integrity_return_code
502
503
504def run_main():
Gilles Peskine7dfcfce2019-07-04 19:31:02 +0200505 parser = argparse.ArgumentParser(description=__doc__)
Darryl Green10d9ce32018-02-28 10:02:55 +0000506 parser.add_argument(
507 "-l", "--log_file", type=str, help="path to optional output log",
508 )
509 check_args = parser.parse_args()
510 integrity_check = IntegrityChecker(check_args.log_file)
511 integrity_check.check_files()
512 return_code = integrity_check.output_issues()
513 sys.exit(return_code)
514
515
516if __name__ == "__main__":
517 run_main()