blob: a2a9dfa8d018f683cabeecd625edad9e406d3c9d [file] [log] [blame]
Darryl Green10d9ce32018-02-28 10:02:55 +00001#!/usr/bin/env python3
Gilles Peskine7dfcfce2019-07-04 19:31:02 +02002
Bence Szépkúti1e148272020-08-07 13:07:28 +02003# Copyright The Mbed TLS Contributors
Dave Rodgman16799db2023-11-02 19:47:20 +00004# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
Gilles Peskine7dfcfce2019-07-04 19:31:02 +02005
Darryl Green10d9ce32018-02-28 10:02:55 +00006"""
Darryl Green10d9ce32018-02-28 10:02:55 +00007This script checks the current state of the source code for minor issues,
8including incorrect file permissions, presence of tabs, non-Unix line endings,
Gilles Peskine55b49ee2019-07-04 19:31:33 +02009trailing whitespace, and presence of UTF-8 BOM.
Darryl Green10d9ce32018-02-28 10:02:55 +000010Note: requires python 3, must be run from Mbed TLS root.
11"""
12
Darryl Green10d9ce32018-02-28 10:02:55 +000013import argparse
Darryl Green10d9ce32018-02-28 10:02:55 +000014import codecs
Gilles Peskinef2fb9f62023-11-03 14:13:55 +010015import inspect
Gilles Peskine990030b2023-11-03 13:55:00 +010016import logging
17import os
Gilles Peskine0598db82020-05-10 16:57:16 +020018import re
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +020019import subprocess
Darryl Green10d9ce32018-02-28 10:02:55 +000020import sys
Gilles Peskineac9e7c02020-08-11 15:11:50 +020021try:
22 from typing import FrozenSet, Optional, Pattern # pylint: disable=unused-import
23except ImportError:
24 pass
Darryl Green10d9ce32018-02-28 10:02:55 +000025
Gilles Peskined9071e72022-09-18 21:17:09 +020026import scripts_path # pylint: disable=unused-import
27from mbedtls_dev import build_tree
28
Darryl Green10d9ce32018-02-28 10:02:55 +000029
Gilles Peskine184c0962020-03-24 18:25:17 +010030class FileIssueTracker:
Gilles Peskine6ee576e2019-02-25 20:59:05 +010031 """Base class for file-wide issue tracking.
32
33 To implement a checker that processes a file as a whole, inherit from
Gilles Peskine1e9698a2019-02-25 21:10:04 +010034 this class and implement `check_file_for_issue` and define ``heading``.
35
Gilles Peskine05a51a82020-05-10 16:52:44 +020036 ``suffix_exemptions``: files whose name ends with a string in this set
Gilles Peskine1e9698a2019-02-25 21:10:04 +010037 will not be checked.
38
Gilles Peskine0598db82020-05-10 16:57:16 +020039 ``path_exemptions``: files whose path (relative to the root of the source
40 tree) matches this regular expression will not be checked. This can be
41 ``None`` to match no path. Paths are normalized and converted to ``/``
42 separators before matching.
43
Gilles Peskine1e9698a2019-02-25 21:10:04 +010044 ``heading``: human-readable description of the issue
Gilles Peskine6ee576e2019-02-25 20:59:05 +010045 """
Darryl Green10d9ce32018-02-28 10:02:55 +000046
Gilles Peskineac9e7c02020-08-11 15:11:50 +020047 suffix_exemptions = frozenset() #type: FrozenSet[str]
48 path_exemptions = None #type: Optional[Pattern[str]]
Gilles Peskine1e9698a2019-02-25 21:10:04 +010049 # heading must be defined in derived classes.
50 # pylint: disable=no-member
51
Darryl Green10d9ce32018-02-28 10:02:55 +000052 def __init__(self):
Darryl Green10d9ce32018-02-28 10:02:55 +000053 self.files_with_issues = {}
54
Gilles Peskine0598db82020-05-10 16:57:16 +020055 @staticmethod
56 def normalize_path(filepath):
Gilles Peskineeca95db2020-05-28 18:19:20 +020057 """Normalize ``filepath`` with / as the directory separator."""
Gilles Peskine0598db82020-05-10 16:57:16 +020058 filepath = os.path.normpath(filepath)
Gilles Peskineeca95db2020-05-28 18:19:20 +020059 # On Windows, we may have backslashes to separate directories.
60 # We need slashes to match exemption lists.
Gilles Peskine0598db82020-05-10 16:57:16 +020061 seps = os.path.sep
62 if os.path.altsep is not None:
63 seps += os.path.altsep
64 return '/'.join(filepath.split(seps))
65
Darryl Green10d9ce32018-02-28 10:02:55 +000066 def should_check_file(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +010067 """Whether the given file name should be checked.
68
Gilles Peskine05a51a82020-05-10 16:52:44 +020069 Files whose name ends with a string listed in ``self.suffix_exemptions``
70 or whose path matches ``self.path_exemptions`` will not be checked.
Gilles Peskineaaee4442020-03-24 16:49:21 +010071 """
Gilles Peskine05a51a82020-05-10 16:52:44 +020072 for files_exemption in self.suffix_exemptions:
Darryl Green10d9ce32018-02-28 10:02:55 +000073 if filepath.endswith(files_exemption):
74 return False
Gilles Peskine0598db82020-05-10 16:57:16 +020075 if self.path_exemptions and \
76 re.match(self.path_exemptions, self.normalize_path(filepath)):
77 return False
Darryl Green10d9ce32018-02-28 10:02:55 +000078 return True
79
Darryl Green10d9ce32018-02-28 10:02:55 +000080 def check_file_for_issue(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +010081 """Check the specified file for the issue that this class is for.
82
83 Subclasses must implement this method.
84 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +010085 raise NotImplementedError
Darryl Green10d9ce32018-02-28 10:02:55 +000086
Gilles Peskine04398052018-11-23 21:11:30 +010087 def record_issue(self, filepath, line_number):
Gilles Peskineaaee4442020-03-24 16:49:21 +010088 """Record that an issue was found at the specified location."""
Gilles Peskine04398052018-11-23 21:11:30 +010089 if filepath not in self.files_with_issues.keys():
90 self.files_with_issues[filepath] = []
91 self.files_with_issues[filepath].append(line_number)
92
Darryl Green10d9ce32018-02-28 10:02:55 +000093 def output_file_issues(self, logger):
Gilles Peskineaaee4442020-03-24 16:49:21 +010094 """Log all the locations where the issue was found."""
Darryl Green10d9ce32018-02-28 10:02:55 +000095 if self.files_with_issues.values():
96 logger.info(self.heading)
97 for filename, lines in sorted(self.files_with_issues.items()):
98 if lines:
99 logger.info("{}: {}".format(
100 filename, ", ".join(str(x) for x in lines)
101 ))
102 else:
103 logger.info(filename)
104 logger.info("")
105
Gilles Peskined4a853d2020-05-10 16:57:59 +0200106BINARY_FILE_PATH_RE_LIST = [
107 r'docs/.*\.pdf\Z',
108 r'programs/fuzz/corpuses/[^.]+\Z',
109 r'tests/data_files/[^.]+\Z',
110 r'tests/data_files/.*\.(crt|csr|db|der|key|pubkey)\Z',
111 r'tests/data_files/.*\.req\.[^/]+\Z',
112 r'tests/data_files/.*malformed[^/]+\Z',
113 r'tests/data_files/format_pkcs12\.fmt\Z',
Gilles Peskine0ed9e782023-01-05 20:27:18 +0100114 r'tests/data_files/.*\.bin\Z',
Gilles Peskined4a853d2020-05-10 16:57:59 +0200115]
116BINARY_FILE_PATH_RE = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST))
117
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100118class LineIssueTracker(FileIssueTracker):
119 """Base class for line-by-line issue tracking.
Darryl Green10d9ce32018-02-28 10:02:55 +0000120
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100121 To implement a checker that processes files line by line, inherit from
122 this class and implement `line_with_issue`.
123 """
124
Gilles Peskined4a853d2020-05-10 16:57:59 +0200125 # Exclude binary files.
126 path_exemptions = BINARY_FILE_PATH_RE
127
Gilles Peskineb3897432023-01-05 20:28:30 +0100128 def issue_with_line(self, line, filepath, line_number):
Gilles Peskineaaee4442020-03-24 16:49:21 +0100129 """Check the specified line for the issue that this class is for.
130
131 Subclasses must implement this method.
132 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100133 raise NotImplementedError
134
135 def check_file_line(self, filepath, line, line_number):
Gilles Peskineb3897432023-01-05 20:28:30 +0100136 if self.issue_with_line(line, filepath, line_number):
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100137 self.record_issue(filepath, line_number)
138
139 def check_file_for_issue(self, filepath):
Gilles Peskineaaee4442020-03-24 16:49:21 +0100140 """Check the lines of the specified file.
141
142 Subclasses must implement the ``issue_with_line`` method.
143 """
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100144 with open(filepath, "rb") as f:
145 for i, line in enumerate(iter(f.readline, b"")):
146 self.check_file_line(filepath, line, i + 1)
147
Gilles Peskine2c618732020-03-24 22:26:01 +0100148
149def is_windows_file(filepath):
150 _root, ext = os.path.splitext(filepath)
Gilles Peskined2df86f2020-05-10 17:36:51 +0200151 return ext in ('.bat', '.dsp', '.dsw', '.sln', '.vcxproj')
Gilles Peskine2c618732020-03-24 22:26:01 +0100152
153
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200154class ShebangIssueTracker(FileIssueTracker):
155 """Track files with a bad, missing or extraneous shebang line.
156
157 Executable scripts must start with a valid shebang (#!) line.
158 """
159
160 heading = "Invalid shebang line:"
161
162 # Allow either /bin/sh, /bin/bash, or /usr/bin/env.
163 # Allow at most one argument (this is a Linux limitation).
164 # For sh and bash, the argument if present must be options.
Shaun Case8b0ecbc2021-12-20 21:14:10 -0800165 # For env, the argument must be the base name of the interpreter.
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200166 _shebang_re = re.compile(rb'^#! ?(?:/bin/(bash|sh)(?: -[^\n ]*)?'
167 rb'|/usr/bin/env ([^\n /]+))$')
168 _extensions = {
169 b'bash': 'sh',
170 b'perl': 'pl',
171 b'python3': 'py',
172 b'sh': 'sh',
173 }
174
175 def is_valid_shebang(self, first_line, filepath):
176 m = re.match(self._shebang_re, first_line)
177 if not m:
178 return False
179 interpreter = m.group(1) or m.group(2)
180 if interpreter not in self._extensions:
181 return False
182 if not filepath.endswith('.' + self._extensions[interpreter]):
183 return False
184 return True
185
186 def check_file_for_issue(self, filepath):
187 is_executable = os.access(filepath, os.X_OK)
188 with open(filepath, "rb") as f:
189 first_line = f.readline()
190 if first_line.startswith(b'#!'):
191 if not is_executable:
192 # Shebang on a non-executable file
193 self.files_with_issues[filepath] = None
194 elif not self.is_valid_shebang(first_line, filepath):
195 self.files_with_issues[filepath] = [1]
196 elif is_executable:
197 # Executable without a shebang
198 self.files_with_issues[filepath] = None
199
200
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100201class EndOfFileNewlineIssueTracker(FileIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100202 """Track files that end with an incomplete line
203 (no newline character at the end of the last line)."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000204
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100205 heading = "Missing newline at end of file:"
Darryl Green10d9ce32018-02-28 10:02:55 +0000206
Gilles Peskined4a853d2020-05-10 16:57:59 +0200207 path_exemptions = BINARY_FILE_PATH_RE
208
Darryl Green10d9ce32018-02-28 10:02:55 +0000209 def check_file_for_issue(self, filepath):
210 with open(filepath, "rb") as f:
Gilles Peskine12b180a2020-05-10 17:36:42 +0200211 try:
212 f.seek(-1, 2)
213 except OSError:
214 # This script only works on regular files. If we can't seek
215 # 1 before the end, it means that this position is before
216 # the beginning of the file, i.e. that the file is empty.
217 return
218 if f.read(1) != b"\n":
Darryl Green10d9ce32018-02-28 10:02:55 +0000219 self.files_with_issues[filepath] = None
220
221
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100222class Utf8BomIssueTracker(FileIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100223 """Track files that start with a UTF-8 BOM.
224 Files should be ASCII or UTF-8. Valid UTF-8 does not start with a BOM."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000225
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100226 heading = "UTF-8 BOM present:"
Darryl Green10d9ce32018-02-28 10:02:55 +0000227
Gilles Peskine05a51a82020-05-10 16:52:44 +0200228 suffix_exemptions = frozenset([".vcxproj", ".sln"])
Gilles Peskined4a853d2020-05-10 16:57:59 +0200229 path_exemptions = BINARY_FILE_PATH_RE
Gilles Peskine2c618732020-03-24 22:26:01 +0100230
Darryl Green10d9ce32018-02-28 10:02:55 +0000231 def check_file_for_issue(self, filepath):
232 with open(filepath, "rb") as f:
233 if f.read().startswith(codecs.BOM_UTF8):
234 self.files_with_issues[filepath] = None
235
236
Gilles Peskined11bb472023-01-05 20:28:57 +0100237class UnicodeIssueTracker(LineIssueTracker):
238 """Track lines with invalid characters or invalid text encoding."""
239
240 heading = "Invalid UTF-8 or forbidden character:"
241
Aditya Deshpande15b6dd02023-01-30 13:46:58 +0000242 # Only allow valid UTF-8, and only other explicitly allowed characters.
Gilles Peskined11bb472023-01-05 20:28:57 +0100243 # We deliberately exclude all characters that aren't a simple non-blank,
244 # non-zero-width glyph, apart from a very small set (tab, ordinary space,
245 # line breaks, "basic" no-break space and soft hyphen). In particular,
246 # non-ASCII control characters, combinig characters, and Unicode state
247 # changes (e.g. right-to-left text) are forbidden.
248 # Note that we do allow some characters with a risk of visual confusion,
249 # for example '-' (U+002D HYPHEN-MINUS) vs '­' (U+00AD SOFT HYPHEN) vs
250 # '‐' (U+2010 HYPHEN), or 'A' (U+0041 LATIN CAPITAL LETTER A) vs
251 # 'Α' (U+0391 GREEK CAPITAL LETTER ALPHA).
252 GOOD_CHARACTERS = ''.join([
253 '\t\n\r -~', # ASCII (tabs and line endings are checked separately)
254 '\u00A0-\u00FF', # Latin-1 Supplement (for NO-BREAK SPACE and punctuation)
255 '\u2010-\u2027\u2030-\u205E', # General Punctuation (printable)
256 '\u2070\u2071\u2074-\u208E\u2090-\u209C', # Superscripts and Subscripts
257 '\u2190-\u21FF', # Arrows
258 '\u2200-\u22FF', # Mathematical Symbols
Aditya Deshpandeebb22692023-02-01 13:30:26 +0000259 '\u2500-\u257F' # Box Drawings characters used in markdown trees
Gilles Peskined11bb472023-01-05 20:28:57 +0100260 ])
261 # Allow any of the characters and ranges above, and anything classified
262 # as a word constituent.
263 GOOD_CHARACTERS_RE = re.compile(r'[\w{}]+\Z'.format(GOOD_CHARACTERS))
264
265 def issue_with_line(self, line, _filepath, line_number):
266 try:
267 text = line.decode('utf-8')
268 except UnicodeDecodeError:
269 return True
270 if line_number == 1 and text.startswith('\uFEFF'):
271 # Strip BOM (U+FEFF ZERO WIDTH NO-BREAK SPACE) at the beginning.
272 # Which files are allowed to have a BOM is handled in
273 # Utf8BomIssueTracker.
274 text = text[1:]
275 return not self.GOOD_CHARACTERS_RE.match(text)
276
Gilles Peskine2c618732020-03-24 22:26:01 +0100277class UnixLineEndingIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100278 """Track files with non-Unix line endings (i.e. files with CR)."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000279
Gilles Peskine2c618732020-03-24 22:26:01 +0100280 heading = "Non-Unix line endings:"
281
282 def should_check_file(self, filepath):
Gilles Peskine0598db82020-05-10 16:57:16 +0200283 if not super().should_check_file(filepath):
284 return False
Gilles Peskine2c618732020-03-24 22:26:01 +0100285 return not is_windows_file(filepath)
Darryl Green10d9ce32018-02-28 10:02:55 +0000286
Gilles Peskineb3897432023-01-05 20:28:30 +0100287 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000288 return b"\r" in line
289
290
Gilles Peskine545e13f2020-03-24 22:29:11 +0100291class WindowsLineEndingIssueTracker(LineIssueTracker):
Gilles Peskined703a2e2020-04-01 13:35:46 +0200292 """Track files with non-Windows line endings (i.e. CR or LF not in CRLF)."""
Gilles Peskine545e13f2020-03-24 22:29:11 +0100293
294 heading = "Non-Windows line endings:"
295
296 def should_check_file(self, filepath):
Gilles Peskine0598db82020-05-10 16:57:16 +0200297 if not super().should_check_file(filepath):
298 return False
Gilles Peskine545e13f2020-03-24 22:29:11 +0100299 return is_windows_file(filepath)
300
Gilles Peskineb3897432023-01-05 20:28:30 +0100301 def issue_with_line(self, line, _filepath, _line_number):
Gilles Peskined703a2e2020-04-01 13:35:46 +0200302 return not line.endswith(b"\r\n") or b"\r" in line[:-2]
Gilles Peskine545e13f2020-03-24 22:29:11 +0100303
304
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100305class TrailingWhitespaceIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100306 """Track lines with trailing whitespace."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000307
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100308 heading = "Trailing whitespace:"
Gilles Peskine05a51a82020-05-10 16:52:44 +0200309 suffix_exemptions = frozenset([".dsp", ".md"])
Darryl Green10d9ce32018-02-28 10:02:55 +0000310
Gilles Peskineb3897432023-01-05 20:28:30 +0100311 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000312 return line.rstrip(b"\r\n") != line.rstrip()
313
314
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100315class TabIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100316 """Track lines with tabs."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000317
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100318 heading = "Tabs present:"
Gilles Peskine05a51a82020-05-10 16:52:44 +0200319 suffix_exemptions = frozenset([
Gilles Peskine344da1c2020-05-10 17:37:02 +0200320 ".pem", # some openssl dumps have tabs
Gilles Peskine2c618732020-03-24 22:26:01 +0100321 ".sln",
Gilles Peskine6e8d5a02020-03-24 22:01:28 +0100322 "/Makefile",
323 "/Makefile.inc",
324 "/generate_visualc_files.pl",
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100325 ])
Darryl Green10d9ce32018-02-28 10:02:55 +0000326
Gilles Peskineb3897432023-01-05 20:28:30 +0100327 def issue_with_line(self, line, _filepath, _line_number):
Darryl Green10d9ce32018-02-28 10:02:55 +0000328 return b"\t" in line
329
330
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100331class MergeArtifactIssueTracker(LineIssueTracker):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100332 """Track lines with merge artifacts.
333 These are leftovers from a ``git merge`` that wasn't fully edited."""
Gilles Peskinec117d592018-11-23 21:11:52 +0100334
Gilles Peskine1e9698a2019-02-25 21:10:04 +0100335 heading = "Merge artifact:"
Gilles Peskinec117d592018-11-23 21:11:52 +0100336
Gilles Peskineb3897432023-01-05 20:28:30 +0100337 def issue_with_line(self, line, _filepath, _line_number):
Gilles Peskinec117d592018-11-23 21:11:52 +0100338 # Detect leftover git conflict markers.
339 if line.startswith(b'<<<<<<< ') or line.startswith(b'>>>>>>> '):
340 return True
341 if line.startswith(b'||||||| '): # from merge.conflictStyle=diff3
342 return True
343 if line.rstrip(b'\r\n') == b'=======' and \
Gilles Peskine6ee576e2019-02-25 20:59:05 +0100344 not _filepath.endswith('.md'):
Gilles Peskinec117d592018-11-23 21:11:52 +0100345 return True
346 return False
347
Darryl Green10d9ce32018-02-28 10:02:55 +0000348
Gilles Peskinece782002023-11-03 14:49:12 +0100349def this_location():
350 frame = inspect.currentframe()
351 assert frame is not None
352 info = inspect.getframeinfo(frame)
353 return os.path.basename(info.filename), info.lineno
354THIS_FILE_BASE_NAME, LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER = this_location()
355
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100356class LicenseIssueTracker(LineIssueTracker):
357 """Check copyright statements and license indications.
358
359 This class only checks that statements are correct if present. It does
360 not enforce the presence of statements in each file.
361 """
362
363 heading = "License issue:"
364
365 LICENSE_EXEMPTION_RE_LIST = [
366 # Third-party code, other than whitelisted third-party modules,
367 # may be under a different license.
368 r'3rdparty/(?!(p256-m)/.*)',
369 # Documentation explaining the license may have accidental
370 # false positives.
371 r'(ChangeLog|LICENSE|[-0-9A-Z_a-z]+\.md)\Z',
372 # Files imported from TF-M, and not used except in test builds,
373 # may be under a different license.
374 r'configs/crypto_config_profile_medium\.h\Z',
375 r'configs/tfm_mbedcrypto_config_profile_medium\.h\Z',
376 # Third-party file.
377 r'dco\.txt\Z',
378 ]
379 path_exemptions = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST +
380 LICENSE_EXEMPTION_RE_LIST))
381
382 COPYRIGHT_HOLDER = rb'The Mbed TLS Contributors'
383 # Catch "Copyright foo", "Copyright (C) foo", "Copyright © foo", etc.
384 COPYRIGHT_RE = re.compile(rb'.*\bcopyright\s+((?:\w|\s|[()]|[^ -~])*\w)', re.I)
385
386 SPDX_HEADER_KEY = b'SPDX-License-Identifier'
387 LICENSE_IDENTIFIER = b'Apache-2.0 OR GPL-2.0-or-later'
388 SPDX_RE = re.compile(br'.*?(' +
389 re.escape(SPDX_HEADER_KEY) +
390 br')(:\s*(.*?)\W*\Z|.*)', re.I)
391
Gilles Peskine3b9facd2023-11-03 14:35:28 +0100392 LICENSE_MENTION_RE = re.compile(rb'.*(?:' + rb'|'.join([
393 rb'Apache License',
394 rb'General Public License',
395 ]) + rb')', re.I)
396
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100397 def __init__(self):
398 super().__init__()
399 # Record what problem was caused. We can't easily report it due to
400 # the structure of the script. To be fixed after
401 # https://github.com/Mbed-TLS/mbedtls/pull/2506
402 self.problem = None
403
404 def issue_with_line(self, line, filepath, line_number):
Gilles Peskine3b9facd2023-11-03 14:35:28 +0100405 #pylint: disable=too-many-return-statements
406
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100407 # Use endswith() rather than the more correct os.path.basename()
408 # because experimentally, it makes a significant difference to
409 # the running time.
410 if filepath.endswith(THIS_FILE_BASE_NAME) and \
411 line_number > LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER:
412 # Avoid false positives from the code in this class.
413 # Also skip the rest of this file, which is highly unlikely to
414 # contain any problematic statements since we put those near the
415 # top of files.
416 return False
417
418 m = self.COPYRIGHT_RE.match(line)
419 if m and m.group(1) != self.COPYRIGHT_HOLDER:
420 self.problem = 'Invalid copyright line'
421 return True
422
423 m = self.SPDX_RE.match(line)
424 if m:
425 if m.group(1) != self.SPDX_HEADER_KEY:
426 self.problem = 'Misspelled ' + self.SPDX_HEADER_KEY.decode()
427 return True
428 if not m.group(3):
429 self.problem = 'Improperly formatted SPDX license identifier'
430 return True
431 if m.group(3) != self.LICENSE_IDENTIFIER:
432 self.problem = 'Wrong SPDX license identifier'
433 return True
Gilles Peskine3b9facd2023-11-03 14:35:28 +0100434
435 m = self.LICENSE_MENTION_RE.match(line)
436 if m:
437 self.problem = 'Suspicious license mention'
438 return True
439
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100440 return False
441
442
Gilles Peskine184c0962020-03-24 18:25:17 +0100443class IntegrityChecker:
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100444 """Sanity-check files under the current directory."""
Darryl Green10d9ce32018-02-28 10:02:55 +0000445
446 def __init__(self, log_file):
Gilles Peskine0d060ef2019-02-25 20:35:31 +0100447 """Instantiate the sanity checker.
448 Check files under the current directory.
449 Write a report of issues to log_file."""
Gilles Peskined9071e72022-09-18 21:17:09 +0200450 build_tree.check_repo_path()
Darryl Green10d9ce32018-02-28 10:02:55 +0000451 self.logger = None
452 self.setup_logger(log_file)
Darryl Green10d9ce32018-02-28 10:02:55 +0000453 self.issues_to_check = [
Gilles Peskine4aebb8d2020-08-08 23:15:18 +0200454 ShebangIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000455 EndOfFileNewlineIssueTracker(),
456 Utf8BomIssueTracker(),
Gilles Peskined11bb472023-01-05 20:28:57 +0100457 UnicodeIssueTracker(),
Gilles Peskine2c618732020-03-24 22:26:01 +0100458 UnixLineEndingIssueTracker(),
Gilles Peskine545e13f2020-03-24 22:29:11 +0100459 WindowsLineEndingIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000460 TrailingWhitespaceIssueTracker(),
461 TabIssueTracker(),
Gilles Peskinec117d592018-11-23 21:11:52 +0100462 MergeArtifactIssueTracker(),
Gilles Peskinef2fb9f62023-11-03 14:13:55 +0100463 LicenseIssueTracker(),
Darryl Green10d9ce32018-02-28 10:02:55 +0000464 ]
465
Darryl Green10d9ce32018-02-28 10:02:55 +0000466 def setup_logger(self, log_file, level=logging.INFO):
467 self.logger = logging.getLogger()
468 self.logger.setLevel(level)
469 if log_file:
470 handler = logging.FileHandler(log_file)
471 self.logger.addHandler(handler)
472 else:
473 console = logging.StreamHandler()
474 self.logger.addHandler(console)
475
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200476 @staticmethod
477 def collect_files():
478 bytes_output = subprocess.check_output(['git', 'ls-files', '-z'])
479 bytes_filepaths = bytes_output.split(b'\0')[:-1]
480 ascii_filepaths = map(lambda fp: fp.decode('ascii'), bytes_filepaths)
481 # Prepend './' to files in the top-level directory so that
482 # something like `'/Makefile' in fp` matches in the top-level
483 # directory as well as in subdirectories.
484 return [fp if os.path.dirname(fp) else os.path.join(os.curdir, fp)
485 for fp in ascii_filepaths]
Gilles Peskine95c55752018-09-28 11:48:10 +0200486
Darryl Green10d9ce32018-02-28 10:02:55 +0000487 def check_files(self):
Gilles Peskine3e2ee3c2020-05-10 17:18:06 +0200488 for issue_to_check in self.issues_to_check:
489 for filepath in self.collect_files():
490 if issue_to_check.should_check_file(filepath):
491 issue_to_check.check_file_for_issue(filepath)
Darryl Green10d9ce32018-02-28 10:02:55 +0000492
493 def output_issues(self):
494 integrity_return_code = 0
495 for issue_to_check in self.issues_to_check:
496 if issue_to_check.files_with_issues:
497 integrity_return_code = 1
498 issue_to_check.output_file_issues(self.logger)
499 return integrity_return_code
500
501
502def run_main():
Gilles Peskine7dfcfce2019-07-04 19:31:02 +0200503 parser = argparse.ArgumentParser(description=__doc__)
Darryl Green10d9ce32018-02-28 10:02:55 +0000504 parser.add_argument(
505 "-l", "--log_file", type=str, help="path to optional output log",
506 )
507 check_args = parser.parse_args()
508 integrity_check = IntegrityChecker(check_args.log_file)
509 integrity_check.check_files()
510 return_code = integrity_check.output_issues()
511 sys.exit(return_code)
512
513
514if __name__ == "__main__":
515 run_main()