blob: f494f7666ca0f8f4b7f68b4b664b59667fc0fb86 [file] [log] [blame]
Yuto Takano39639672021-08-05 19:47:48 +01001#!/usr/bin/env python3
2#
3# Copyright The Mbed TLS Contributors
4# SPDX-License-Identifier: Apache-2.0
5#
6# Licensed under the Apache License, Version 2.0 (the "License"); you may
7# not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
Darryl Greend5802922018-05-08 15:30:59 +010018"""
Yuto Takano39639672021-08-05 19:47:48 +010019This script confirms that the naming of all symbols and identifiers in Mbed TLS
20are consistent with the house style and are also self-consistent.
Darryl Greend5802922018-05-08 15:30:59 +010021"""
Yuto Takano39639672021-08-05 19:47:48 +010022
23import argparse
24import textwrap
Darryl Greend5802922018-05-08 15:30:59 +010025import os
26import sys
27import traceback
28import re
29import shutil
30import subprocess
31import logging
32
Yuto Takano39639672021-08-05 19:47:48 +010033# Naming patterns to check against
Yuto Takanobb7dca42021-08-05 19:57:58 +010034MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$"
Yuto Takanoc1838932021-08-05 19:52:09 +010035IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
Yuto Takano39639672021-08-05 19:47:48 +010036
37class Match(object):
38 def __init__(self, filename, line, pos, name):
39 self.filename = filename
40 self.line = line
41 self.pos = pos
42 self.name = name
43
44 def __str__(self):
45 return self.name
46
47class Problem(object):
48 def __init__(self):
49 self.textwrapper = textwrap.TextWrapper()
50 self.textwrapper.initial_indent = " * "
51 self.textwrapper.subsequent_indent = " "
52
53class SymbolNotInHeader(Problem):
54 def __init__(self, symbol_name):
55 self.symbol_name = symbol_name
56 Problem.__init__(self)
57
58 def __str__(self):
59 return self.textwrapper.fill(
60 "'{0}' was found as an available symbol in the output of nm, "
61 "however it was not declared in any header files."
62 .format(self.symbol_name))
63
64class PatternMismatch(Problem):
65 def __init__(self, pattern, match):
66 self.pattern = pattern
67 self.match = match
68 Problem.__init__(self)
69
70 def __str__(self):
71 return self.textwrapper.fill(
72 "{0}: '{1}' does not match the required pattern '{2}'."
73 .format(self.match.filename, self.match.name, self.pattern))
74
75class Typo(Problem):
76 def __init__(self, match):
77 self.match = match
78 Problem.__init__(self)
79
80 def __str__(self):
81 return self.textwrapper.fill(
82 "{0}: '{1}' looks like a typo. It was not found in any macros or "
83 "any enums. If this is not a typo, put //no-check-names after it."
84 .format(self.match.filename, self.match.name))
Darryl Greend5802922018-05-08 15:30:59 +010085
86class NameCheck(object):
87 def __init__(self):
88 self.log = None
Darryl Greend5802922018-05-08 15:30:59 +010089 self.check_repo_path()
90 self.return_code = 0
91 self.excluded_files = ["compat-1.3.h"]
Darryl Greend5802922018-05-08 15:30:59 +010092 self.typo_check_pattern = r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$"
Darryl Greend5802922018-05-08 15:30:59 +010093
94 def set_return_code(self, return_code):
95 if return_code > self.return_code:
96 self.return_code = return_code
97
Yuto Takano39639672021-08-05 19:47:48 +010098 def setup_logger(self, verbose=False):
99 """
100 Set up a logger and set the change the default logging level from
101 WARNING to INFO. Loggers are better than print statements since their
102 verbosity can be controlled.
103 """
Darryl Greend5802922018-05-08 15:30:59 +0100104 self.log = logging.getLogger()
Yuto Takano39639672021-08-05 19:47:48 +0100105 if verbose:
106 self.log.setLevel(logging.DEBUG)
107 else:
108 self.log.setLevel(logging.INFO)
Darryl Greend5802922018-05-08 15:30:59 +0100109 self.log.addHandler(logging.StreamHandler())
110
111 def check_repo_path(self):
Yuto Takano39639672021-08-05 19:47:48 +0100112 """
113 Check that the current working directory is the project root, and throw
114 an exception if not.
115 """
Darryl Greend5802922018-05-08 15:30:59 +0100116 current_dir = os.path.realpath('.')
117 root_dir = os.path.dirname(os.path.dirname(
118 os.path.dirname(os.path.realpath(__file__))))
119 if current_dir != root_dir:
120 raise Exception("Must be run from Mbed TLS root")
121
Yuto Takano157444c2021-08-05 20:10:45 +0100122 def get_files(self, extension, directory):
Darryl Greend5802922018-05-08 15:30:59 +0100123 filenames = []
124 for root, dirs, files in sorted(os.walk(directory)):
125 for filename in sorted(files):
126 if (filename not in self.excluded_files and
Yuto Takano157444c2021-08-05 20:10:45 +0100127 filename.endswith("." + extension)):
Darryl Greend5802922018-05-08 15:30:59 +0100128 filenames.append(os.path.join(root, filename))
129 return filenames
130
Yuto Takano39639672021-08-05 19:47:48 +0100131 def parse_macros(self, header_files):
132 """
133 Parse all macros defined by #define preprocessor directives.
134
135 Args:
136 header_files: A list of filepaths to look through.
137
138 Returns:
139 A list of Match objects for the macros.
140 """
141 MACRO_REGEX = r"#define (?P<macro>\w+)"
142 NON_MACROS = (
143 "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
144 )
145
146 macros = []
147
148 for header_file in header_files:
Darryl Greend5802922018-05-08 15:30:59 +0100149 with open(header_file, "r") as header:
Yuto Takano39639672021-08-05 19:47:48 +0100150 for line in header:
151 macro = re.search(MACRO_REGEX, line)
152 if (macro and
153 not macro.group("macro").startswith(NON_MACROS)):
154 macros.append(Match(
155 header_file,
156 line,
157 (macro.start(), macro.end()),
158 macro.group("macro")))
Darryl Greend5802922018-05-08 15:30:59 +0100159
Yuto Takano39639672021-08-05 19:47:48 +0100160 return macros
Darryl Greend5802922018-05-08 15:30:59 +0100161
Yuto Takanobb7dca42021-08-05 19:57:58 +0100162 def parse_MBED_names(self, files):
Yuto Takano39639672021-08-05 19:47:48 +0100163 """
164 Parse all words in the file that begin with MBED. Includes macros.
165
166 Args:
Yuto Takanobb7dca42021-08-05 19:57:58 +0100167 files: A list of filepaths to look through.
Yuto Takano39639672021-08-05 19:47:48 +0100168
169 Returns:
170 A list of Match objects for words beginning with MBED.
171 """
172 MBED_names = []
173
Yuto Takanobb7dca42021-08-05 19:57:58 +0100174 for filename in files:
Yuto Takano39639672021-08-05 19:47:48 +0100175 with open(filename, "r") as fp:
176 for line in fp:
177 for name in re.finditer(r"\bMBED.+?_[A-Z0-9_]*", line):
178 MBED_names.append(Match(
179 filename,
180 line,
181 (name.start(), name.end()),
182 name.group(0)
183 ))
184
185 return MBED_names
186
187 def parse_enum_consts(self, header_files):
188 """
189 Parse all enum value constants that are declared.
190
191 Args:
192 header_files: A list of filepaths to look through.
193
194 Returns:
195 A list of (enum constants, containing filename).
196 """
197
198 enum_consts = []
199
200 for header_file in header_files:
201 # Emulate a finite state machine to parse enum declarations.
Darryl Greend5802922018-05-08 15:30:59 +0100202 state = 0
203 with open(header_file, "r") as header:
Yuto Takano39639672021-08-05 19:47:48 +0100204 for line in header:
Darryl Greend5802922018-05-08 15:30:59 +0100205 if state is 0 and re.match(r"^(typedef )?enum {", line):
206 state = 1
207 elif state is 0 and re.match(r"^(typedef )?enum", line):
208 state = 2
209 elif state is 2 and re.match(r"^{", line):
210 state = 1
211 elif state is 1 and re.match(r"^}", line):
212 state = 0
213 elif state is 1:
214 enum_const = re.match(r"^\s*(?P<enum_const>\w+)", line)
215 if enum_const:
Yuto Takano39639672021-08-05 19:47:48 +0100216 enum_consts.append(Match(
217 header_file,
218 line,
219 (enum_const.start(), enum_const.end()),
220 enum_const.group("enum_const")))
221
222 return enum_consts
Darryl Greend5802922018-05-08 15:30:59 +0100223
Yuto Takano39639672021-08-05 19:47:48 +0100224 def parse_identifiers(self, header_files):
225 """
226 Parse all lines of a header where a function identifier is declared,
227 based on some huersitics. Assumes every line that is not a comment or a
228 preprocessor directive contains some identifier.
Darryl Greend5802922018-05-08 15:30:59 +0100229
Yuto Takano39639672021-08-05 19:47:48 +0100230 Args:
231 header_files: A list of filepaths to look through.
232
233 Returns:
234 A list of (identifier, containing filename)
235 """
236 EXCLUDED_DECLARATIONS = (
237 r"^(extern \"C\"|(typedef )?(struct|enum)( {)?$|};?$|$)"
Darryl Greend5802922018-05-08 15:30:59 +0100238 )
Darryl Greend5802922018-05-08 15:30:59 +0100239
Yuto Takano39639672021-08-05 19:47:48 +0100240 identifiers = []
241
242 for header_file in header_files:
Darryl Greend5802922018-05-08 15:30:59 +0100243 with open(header_file, "r") as header:
Yuto Takano39639672021-08-05 19:47:48 +0100244 in_block_comment = False
Darryl Greend5802922018-05-08 15:30:59 +0100245
Yuto Takano39639672021-08-05 19:47:48 +0100246 for line in header:
247 # Skip parsing this line if it begins or ends a block
248 # comment, and set the state machine's state.
249 if re.search(r"/\*", line):
250 in_block_comment = True
251 continue
252 elif re.search(r"\*/", line) and in_block_comment:
253 in_block_comment = False
254 continue
255
256 # Skip parsing this line if it's a line comment, or if it
257 # begins with a preprocessor directive
258 if in_block_comment or re.match(r"(//|#)", line):
259 continue
260
261 if re.match(EXCLUDED_DECLARATIONS, line):
262 continue
263
264 identifier = re.search(
265 # Matches: "mbedtls_aes_init("
266 r"([a-zA-Z_][a-zA-Z0-9_]*)\(|"
267 # Matches: "(*f_rng)("
268 r"\(\*(.+)\)\(|"
269 # TODO: unknown purpose
270 r"(\w+)\W*$",
271 line
272 )
273
274 if identifier:
275 for group in identifier.groups():
276 if group:
277 identifiers.append(Match(
278 header_file,
279 line,
280 (identifier.start(), identifier.end()),
281 identifier.group(0)))
282
283 return identifiers
284
285 def parse_symbols(self):
286 """
287 Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
288 object files using nm to retrieve the list of referenced symbols.
289
290 Returns:
291 A list of unique symbols defined and used in the libraries.
292 """
293
294 symbols = []
295
296 # Back up the config and atomically compile with the full configratuion.
297 shutil.copy("include/mbedtls/mbedtls_config.h",
298 "include/mbedtls/mbedtls_config.h.bak")
Darryl Greend5802922018-05-08 15:30:59 +0100299 try:
Yuto Takano39639672021-08-05 19:47:48 +0100300 subprocess.run(
Darryl Greend5802922018-05-08 15:30:59 +0100301 ["perl", "scripts/config.pl", "full"],
Yuto Takano39639672021-08-05 19:47:48 +0100302 encoding=sys.stdout.encoding,
303 check=True
Darryl Greend5802922018-05-08 15:30:59 +0100304 )
305 my_environment = os.environ.copy()
306 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
Yuto Takano39639672021-08-05 19:47:48 +0100307 subprocess.run(
Darryl Greend5802922018-05-08 15:30:59 +0100308 ["make", "clean", "lib"],
309 env=my_environment,
Yuto Takano39639672021-08-05 19:47:48 +0100310 encoding=sys.stdout.encoding,
311 stdout=subprocess.PIPE,
Darryl Greend5802922018-05-08 15:30:59 +0100312 stderr=subprocess.STDOUT,
Yuto Takano39639672021-08-05 19:47:48 +0100313 check=True
Darryl Greend5802922018-05-08 15:30:59 +0100314 )
Yuto Takano39639672021-08-05 19:47:48 +0100315
316 # Perform object file analysis using nm
317 symbols = self.parse_symbols_from_nm(
318 ["library/libmbedcrypto.a",
319 "library/libmbedtls.a",
320 "library/libmbedx509.a"])
321
322 symbols.sort()
323
324 subprocess.run(
Darryl Greend5802922018-05-08 15:30:59 +0100325 ["make", "clean"],
Yuto Takano39639672021-08-05 19:47:48 +0100326 encoding=sys.stdout.encoding,
327 check=True
Darryl Greend5802922018-05-08 15:30:59 +0100328 )
329 except subprocess.CalledProcessError as error:
330 self.log.error(error)
331 self.set_return_code(2)
Yuto Takano39639672021-08-05 19:47:48 +0100332 finally:
333 shutil.move("include/mbedtls/mbedtls_config.h.bak",
334 "include/mbedtls/mbedtls_config.h")
335
336 return symbols
337
338 def parse_symbols_from_nm(self, object_files):
339 """
340 Run nm to retrieve the list of referenced symbols in each object file.
341 Does not return the position data since it is of no use.
342
343 Returns:
344 A list of unique symbols defined and used in any of the object files.
345 """
346 UNDEFINED_SYMBOL = r"^\S+: +U |^$|^\S+:$"
347 VALID_SYMBOL = r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)"
348
349 symbols = []
350
351 nm_output = ""
352 for lib in object_files:
353 nm_output += subprocess.run(
354 ["nm", "-og", lib],
355 encoding=sys.stdout.encoding,
356 stdout=subprocess.PIPE,
357 stderr=subprocess.STDOUT,
358 check=True
359 ).stdout
360 for line in nm_output.splitlines():
361 if not re.match(UNDEFINED_SYMBOL, line):
362 symbol = re.match(VALID_SYMBOL, line)
363 if symbol:
364 symbols.append(symbol.group('symbol'))
365 else:
366 self.log.error(line)
367
368 return symbols
369
370 def parse_names_in_source(self):
371 """
372 Calls each parsing function to retrieve various elements of the code,
373 together with their source location. Puts the parsed values in the
374 internal variable self.parse_result.
375 """
376 self.log.info("Parsing source code...")
377
Yuto Takano157444c2021-08-05 20:10:45 +0100378 m_headers = self.get_files("h", os.path.join("include", "mbedtls"))
379 p_headers = self.get_files("h", os.path.join("include", "psa"))
Yuto Takanofa950ae2021-08-05 20:03:44 +0100380 t_headers = ["3rdparty/everest/include/everest/everest.h",
381 "3rdparty/everest/include/everest/x25519.h"]
Yuto Takano157444c2021-08-05 20:10:45 +0100382 l_headers = self.get_files("h", "library")
383 libraries = self.get_files("c", "library") + [
Yuto Takanofa950ae2021-08-05 20:03:44 +0100384 "3rdparty/everest/library/everest.c",
385 "3rdparty/everest/library/x25519.c"]
Yuto Takano39639672021-08-05 19:47:48 +0100386
Yuto Takanobb7dca42021-08-05 19:57:58 +0100387 all_macros = self.parse_macros(
Yuto Takano157444c2021-08-05 20:10:45 +0100388 m_headers + p_headers + t_headers + l_headers)
Yuto Takanofa950ae2021-08-05 20:03:44 +0100389 enum_consts = self.parse_enum_consts(m_headers + t_headers)
390 identifiers = self.parse_identifiers(m_headers + p_headers + t_headers)
Yuto Takano39639672021-08-05 19:47:48 +0100391 symbols = self.parse_symbols()
Yuto Takanofa950ae2021-08-05 20:03:44 +0100392 mbed_names = self.parse_MBED_names(
Yuto Takano157444c2021-08-05 20:10:45 +0100393 m_headers + p_headers + t_headers + l_headers + libraries)
Yuto Takano39639672021-08-05 19:47:48 +0100394
395 # Remove identifier macros like mbedtls_printf or mbedtls_calloc
396 macros = list(set(all_macros) - set(identifiers))
397
398 self.log.info("Found:")
399 self.log.info(" {} Macros".format(len(all_macros)))
400 self.log.info(" {} Enum Constants".format(len(enum_consts)))
401 self.log.info(" {} Identifiers".format(len(identifiers)))
402 self.log.info(" {} Exported Symbols".format(len(symbols)))
403 self.log.info("Analysing...")
404
405 self.parse_result = {
406 "macros": macros,
407 "enum_consts": enum_consts,
408 "identifiers": identifiers,
409 "symbols": symbols,
410 "mbed_names": mbed_names
411 }
412
413 def perform_checks(self):
414 """
415 Perform each check in order, output its PASS/FAIL status. Maintain an
416 overall test status, and output that at the end.
417 """
418 problems = 0
419
420 problems += self.check_symbols_declared_in_header()
421
422 pattern_checks = [
423 ("macros", MACRO_PATTERN),
424 ("enum_consts", MACRO_PATTERN),
425 ("identifiers", IDENTIFIER_PATTERN)]
426 for group, check_pattern in pattern_checks:
427 problems += self.check_match_pattern(group, check_pattern)
428
429 problems += self.check_for_typos()
430
431 self.log.info("=============")
432 if problems > 0:
433 self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
434 else:
435 self.log.info("PASS")
Darryl Greend5802922018-05-08 15:30:59 +0100436
437 def check_symbols_declared_in_header(self):
Yuto Takano39639672021-08-05 19:47:48 +0100438 """
439 Perform a check that all detected symbols in the library object files
440 are properly declared in headers.
441
442 Outputs to the logger the PASS/FAIL status, followed by the location of
443 problems.
Darryl Greend5802922018-05-08 15:30:59 +0100444
Yuto Takano39639672021-08-05 19:47:48 +0100445 Returns the number of problems that needs fixing.
446 """
447 problems = []
448 for symbol in self.parse_result["symbols"]:
449 found_symbol_declared = False
450 for identifier_match in self.parse_result["identifiers"]:
451 if symbol == identifier_match.name:
452 found_symbol_declared = True
453 break
454
455 if not found_symbol_declared:
456 problems.append(SymbolNotInHeader(symbol))
457
458 if problems:
Darryl Greend5802922018-05-08 15:30:59 +0100459 self.set_return_code(1)
Yuto Takano39639672021-08-05 19:47:48 +0100460 self.log.info("All symbols in header: FAIL")
461 for problem in problems:
462 self.log.info(str(problem) + "\n")
Darryl Greend5802922018-05-08 15:30:59 +0100463 else:
Yuto Takano39639672021-08-05 19:47:48 +0100464 self.log.info("All symbols in header: PASS")
465
466 return len(problems)
467
468 def check_match_pattern(self, group_to_check, check_pattern):
469 problems = []
470 for item_match in self.parse_result[group_to_check]:
471 if not re.match(check_pattern, item_match.name):
472 problems.append(PatternMismatch(check_pattern, item_match))
Yuto Takanoc763cc32021-08-05 20:06:34 +0100473 if re.match(r".*__.*", item_match.name):
474 problems.append(PatternMismatch("double underscore", item_match))
Yuto Takano39639672021-08-05 19:47:48 +0100475
476 if problems:
477 self.set_return_code(1)
478 self.log.info("Naming patterns of {}: FAIL".format(group_to_check))
479 for problem in problems:
480 self.log.info(str(problem) + "\n")
481 else:
482 self.log.info("Naming patterns of {}: PASS".format(group_to_check))
483
484 return len(problems)
Darryl Greend5802922018-05-08 15:30:59 +0100485
486 def check_for_typos(self):
Yuto Takano39639672021-08-05 19:47:48 +0100487 problems = []
488 all_caps_names = list(set([
489 match.name for match
490 in self.parse_result["macros"] + self.parse_result["enum_consts"]]
Darryl Greend5802922018-05-08 15:30:59 +0100491 ))
Yuto Takano39639672021-08-05 19:47:48 +0100492
493 TYPO_EXCLUSION = r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$"
494
495 for name_match in self.parse_result["mbed_names"]:
496 if name_match.name not in all_caps_names:
497 if not re.search(TYPO_EXCLUSION, name_match.name):
498 problems.append(Typo(name_match))
499
500 if problems:
Darryl Greend5802922018-05-08 15:30:59 +0100501 self.set_return_code(1)
502 self.log.info("Likely typos: FAIL")
Yuto Takano39639672021-08-05 19:47:48 +0100503 for problem in problems:
504 self.log.info(str(problem) + "\n")
Darryl Greend5802922018-05-08 15:30:59 +0100505 else:
506 self.log.info("Likely typos: PASS")
Yuto Takano39639672021-08-05 19:47:48 +0100507
508 return len(problems)
Darryl Greend5802922018-05-08 15:30:59 +0100509
Yuto Takano39639672021-08-05 19:47:48 +0100510def main():
511 """
512 Main function, parses command-line arguments.
513 """
Darryl Greend5802922018-05-08 15:30:59 +0100514
Yuto Takano39639672021-08-05 19:47:48 +0100515 parser = argparse.ArgumentParser(
516 formatter_class=argparse.RawDescriptionHelpFormatter,
517 description=(
518 "This script confirms that the naming of all symbols and identifiers "
519 "in Mbed TLS are consistent with the house style and are also "
520 "self-consistent.\n\n"
521 "Expected to be run from the MbedTLS root directory."))
Darryl Greend5802922018-05-08 15:30:59 +0100522
Yuto Takano39639672021-08-05 19:47:48 +0100523 parser.add_argument("-v", "--verbose",
524 action="store_true",
525 help="enable script debug outputs")
526
527 args = parser.parse_args()
Darryl Greend5802922018-05-08 15:30:59 +0100528
Darryl Greend5802922018-05-08 15:30:59 +0100529 try:
530 name_check = NameCheck()
Yuto Takano39639672021-08-05 19:47:48 +0100531 name_check.setup_logger(verbose=args.verbose)
532 name_check.parse_names_in_source()
533 name_check.perform_checks()
Darryl Greend5802922018-05-08 15:30:59 +0100534 sys.exit(name_check.return_code)
535 except Exception:
536 traceback.print_exc()
537 sys.exit(2)
538
539
540if __name__ == "__main__":
Yuto Takano39639672021-08-05 19:47:48 +0100541 main()