blob: 5b8159681cc178ff45b0f05ce0d97577bcafcbee [file] [log] [blame]
Yuto Takano39639672021-08-05 19:47:48 +01001#!/usr/bin/env python3
2#
3# Copyright The Mbed TLS Contributors
4# SPDX-License-Identifier: Apache-2.0
5#
6# Licensed under the Apache License, Version 2.0 (the "License"); you may
7# not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
Darryl Greend5802922018-05-08 15:30:59 +010018"""
Yuto Takano39639672021-08-05 19:47:48 +010019This script confirms that the naming of all symbols and identifiers in Mbed TLS
20are consistent with the house style and are also self-consistent.
Darryl Greend5802922018-05-08 15:30:59 +010021"""
Yuto Takano39639672021-08-05 19:47:48 +010022
23import argparse
24import textwrap
Darryl Greend5802922018-05-08 15:30:59 +010025import os
26import sys
27import traceback
28import re
29import shutil
30import subprocess
31import logging
32
Yuto Takano39639672021-08-05 19:47:48 +010033# Naming patterns to check against
Yuto Takanobb7dca42021-08-05 19:57:58 +010034MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$"
Yuto Takanoc1838932021-08-05 19:52:09 +010035IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
Yuto Takano39639672021-08-05 19:47:48 +010036
37class Match(object):
38 def __init__(self, filename, line, pos, name):
39 self.filename = filename
40 self.line = line
41 self.pos = pos
42 self.name = name
43
44 def __str__(self):
45 return self.name
46
47class Problem(object):
48 def __init__(self):
49 self.textwrapper = textwrap.TextWrapper()
50 self.textwrapper.initial_indent = " * "
51 self.textwrapper.subsequent_indent = " "
52
53class SymbolNotInHeader(Problem):
54 def __init__(self, symbol_name):
55 self.symbol_name = symbol_name
56 Problem.__init__(self)
57
58 def __str__(self):
59 return self.textwrapper.fill(
60 "'{0}' was found as an available symbol in the output of nm, "
61 "however it was not declared in any header files."
62 .format(self.symbol_name))
63
64class PatternMismatch(Problem):
65 def __init__(self, pattern, match):
66 self.pattern = pattern
67 self.match = match
68 Problem.__init__(self)
69
70 def __str__(self):
71 return self.textwrapper.fill(
72 "{0}: '{1}' does not match the required pattern '{2}'."
73 .format(self.match.filename, self.match.name, self.pattern))
74
75class Typo(Problem):
76 def __init__(self, match):
77 self.match = match
78 Problem.__init__(self)
79
80 def __str__(self):
81 return self.textwrapper.fill(
82 "{0}: '{1}' looks like a typo. It was not found in any macros or "
83 "any enums. If this is not a typo, put //no-check-names after it."
84 .format(self.match.filename, self.match.name))
Darryl Greend5802922018-05-08 15:30:59 +010085
86class NameCheck(object):
87 def __init__(self):
88 self.log = None
Darryl Greend5802922018-05-08 15:30:59 +010089 self.check_repo_path()
90 self.return_code = 0
Yuto Takanoe503d612021-08-05 20:14:05 +010091 self.excluded_files = ["bn_mul"]
Darryl Greend5802922018-05-08 15:30:59 +010092
93 def set_return_code(self, return_code):
94 if return_code > self.return_code:
95 self.return_code = return_code
96
Yuto Takano39639672021-08-05 19:47:48 +010097 def setup_logger(self, verbose=False):
98 """
99 Set up a logger and set the change the default logging level from
100 WARNING to INFO. Loggers are better than print statements since their
101 verbosity can be controlled.
102 """
Darryl Greend5802922018-05-08 15:30:59 +0100103 self.log = logging.getLogger()
Yuto Takano39639672021-08-05 19:47:48 +0100104 if verbose:
105 self.log.setLevel(logging.DEBUG)
106 else:
107 self.log.setLevel(logging.INFO)
Darryl Greend5802922018-05-08 15:30:59 +0100108 self.log.addHandler(logging.StreamHandler())
109
110 def check_repo_path(self):
Yuto Takano39639672021-08-05 19:47:48 +0100111 """
112 Check that the current working directory is the project root, and throw
113 an exception if not.
114 """
Darryl Greend5802922018-05-08 15:30:59 +0100115 current_dir = os.path.realpath('.')
116 root_dir = os.path.dirname(os.path.dirname(
117 os.path.dirname(os.path.realpath(__file__))))
118 if current_dir != root_dir:
119 raise Exception("Must be run from Mbed TLS root")
120
Yuto Takano157444c2021-08-05 20:10:45 +0100121 def get_files(self, extension, directory):
Darryl Greend5802922018-05-08 15:30:59 +0100122 filenames = []
123 for root, dirs, files in sorted(os.walk(directory)):
124 for filename in sorted(files):
125 if (filename not in self.excluded_files and
Yuto Takano157444c2021-08-05 20:10:45 +0100126 filename.endswith("." + extension)):
Darryl Greend5802922018-05-08 15:30:59 +0100127 filenames.append(os.path.join(root, filename))
128 return filenames
129
Yuto Takano39639672021-08-05 19:47:48 +0100130 def parse_macros(self, header_files):
131 """
132 Parse all macros defined by #define preprocessor directives.
133
134 Args:
135 header_files: A list of filepaths to look through.
136
137 Returns:
138 A list of Match objects for the macros.
139 """
140 MACRO_REGEX = r"#define (?P<macro>\w+)"
141 NON_MACROS = (
142 "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
143 )
144
145 macros = []
146
147 for header_file in header_files:
Darryl Greend5802922018-05-08 15:30:59 +0100148 with open(header_file, "r") as header:
Yuto Takano39639672021-08-05 19:47:48 +0100149 for line in header:
150 macro = re.search(MACRO_REGEX, line)
151 if (macro and
152 not macro.group("macro").startswith(NON_MACROS)):
153 macros.append(Match(
154 header_file,
155 line,
156 (macro.start(), macro.end()),
157 macro.group("macro")))
Darryl Greend5802922018-05-08 15:30:59 +0100158
Yuto Takano39639672021-08-05 19:47:48 +0100159 return macros
Darryl Greend5802922018-05-08 15:30:59 +0100160
Yuto Takanobb7dca42021-08-05 19:57:58 +0100161 def parse_MBED_names(self, files):
Yuto Takano39639672021-08-05 19:47:48 +0100162 """
163 Parse all words in the file that begin with MBED. Includes macros.
164
165 Args:
Yuto Takanobb7dca42021-08-05 19:57:58 +0100166 files: A list of filepaths to look through.
Yuto Takano39639672021-08-05 19:47:48 +0100167
168 Returns:
169 A list of Match objects for words beginning with MBED.
170 """
171 MBED_names = []
172
Yuto Takanobb7dca42021-08-05 19:57:58 +0100173 for filename in files:
Yuto Takano39639672021-08-05 19:47:48 +0100174 with open(filename, "r") as fp:
175 for line in fp:
Yuto Takanoc62b4082021-08-05 20:17:07 +0100176 # Ignore any names that are deliberately opted-out
177 if re.search(r"// *no-check-names", line):
178 continue
179
Yuto Takano39639672021-08-05 19:47:48 +0100180 for name in re.finditer(r"\bMBED.+?_[A-Z0-9_]*", line):
181 MBED_names.append(Match(
182 filename,
183 line,
184 (name.start(), name.end()),
185 name.group(0)
186 ))
187
188 return MBED_names
189
190 def parse_enum_consts(self, header_files):
191 """
192 Parse all enum value constants that are declared.
193
194 Args:
195 header_files: A list of filepaths to look through.
196
197 Returns:
198 A list of (enum constants, containing filename).
199 """
200
201 enum_consts = []
202
203 for header_file in header_files:
204 # Emulate a finite state machine to parse enum declarations.
Darryl Greend5802922018-05-08 15:30:59 +0100205 state = 0
206 with open(header_file, "r") as header:
Yuto Takano39639672021-08-05 19:47:48 +0100207 for line in header:
Darryl Greend5802922018-05-08 15:30:59 +0100208 if state is 0 and re.match(r"^(typedef )?enum {", line):
209 state = 1
210 elif state is 0 and re.match(r"^(typedef )?enum", line):
211 state = 2
212 elif state is 2 and re.match(r"^{", line):
213 state = 1
214 elif state is 1 and re.match(r"^}", line):
215 state = 0
216 elif state is 1:
217 enum_const = re.match(r"^\s*(?P<enum_const>\w+)", line)
218 if enum_const:
Yuto Takano39639672021-08-05 19:47:48 +0100219 enum_consts.append(Match(
220 header_file,
221 line,
222 (enum_const.start(), enum_const.end()),
223 enum_const.group("enum_const")))
224
225 return enum_consts
Darryl Greend5802922018-05-08 15:30:59 +0100226
Yuto Takano39639672021-08-05 19:47:48 +0100227 def parse_identifiers(self, header_files):
228 """
229 Parse all lines of a header where a function identifier is declared,
230 based on some huersitics. Assumes every line that is not a comment or a
231 preprocessor directive contains some identifier.
Darryl Greend5802922018-05-08 15:30:59 +0100232
Yuto Takano39639672021-08-05 19:47:48 +0100233 Args:
234 header_files: A list of filepaths to look through.
235
236 Returns:
237 A list of (identifier, containing filename)
238 """
239 EXCLUDED_DECLARATIONS = (
240 r"^(extern \"C\"|(typedef )?(struct|enum)( {)?$|};?$|$)"
Darryl Greend5802922018-05-08 15:30:59 +0100241 )
Darryl Greend5802922018-05-08 15:30:59 +0100242
Yuto Takano39639672021-08-05 19:47:48 +0100243 identifiers = []
244
245 for header_file in header_files:
Darryl Greend5802922018-05-08 15:30:59 +0100246 with open(header_file, "r") as header:
Yuto Takano39639672021-08-05 19:47:48 +0100247 in_block_comment = False
Darryl Greend5802922018-05-08 15:30:59 +0100248
Yuto Takano39639672021-08-05 19:47:48 +0100249 for line in header:
250 # Skip parsing this line if it begins or ends a block
251 # comment, and set the state machine's state.
252 if re.search(r"/\*", line):
253 in_block_comment = True
254 continue
255 elif re.search(r"\*/", line) and in_block_comment:
256 in_block_comment = False
257 continue
258
259 # Skip parsing this line if it's a line comment, or if it
260 # begins with a preprocessor directive
261 if in_block_comment or re.match(r"(//|#)", line):
262 continue
263
264 if re.match(EXCLUDED_DECLARATIONS, line):
265 continue
266
267 identifier = re.search(
268 # Matches: "mbedtls_aes_init("
269 r"([a-zA-Z_][a-zA-Z0-9_]*)\(|"
270 # Matches: "(*f_rng)("
271 r"\(\*(.+)\)\(|"
272 # TODO: unknown purpose
273 r"(\w+)\W*$",
274 line
275 )
276
277 if identifier:
278 for group in identifier.groups():
279 if group:
280 identifiers.append(Match(
281 header_file,
282 line,
283 (identifier.start(), identifier.end()),
284 identifier.group(0)))
285
286 return identifiers
287
288 def parse_symbols(self):
289 """
290 Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
291 object files using nm to retrieve the list of referenced symbols.
292
293 Returns:
294 A list of unique symbols defined and used in the libraries.
295 """
296
297 symbols = []
298
299 # Back up the config and atomically compile with the full configratuion.
300 shutil.copy("include/mbedtls/mbedtls_config.h",
301 "include/mbedtls/mbedtls_config.h.bak")
Darryl Greend5802922018-05-08 15:30:59 +0100302 try:
Yuto Takano39639672021-08-05 19:47:48 +0100303 subprocess.run(
Yuto Takano062289c2021-08-05 20:19:57 +0100304 ["perl", "scripts/config.py", "full"],
Yuto Takano39639672021-08-05 19:47:48 +0100305 encoding=sys.stdout.encoding,
306 check=True
Darryl Greend5802922018-05-08 15:30:59 +0100307 )
308 my_environment = os.environ.copy()
309 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
Yuto Takano39639672021-08-05 19:47:48 +0100310 subprocess.run(
Darryl Greend5802922018-05-08 15:30:59 +0100311 ["make", "clean", "lib"],
312 env=my_environment,
Yuto Takano39639672021-08-05 19:47:48 +0100313 encoding=sys.stdout.encoding,
314 stdout=subprocess.PIPE,
Darryl Greend5802922018-05-08 15:30:59 +0100315 stderr=subprocess.STDOUT,
Yuto Takano39639672021-08-05 19:47:48 +0100316 check=True
Darryl Greend5802922018-05-08 15:30:59 +0100317 )
Yuto Takano39639672021-08-05 19:47:48 +0100318
319 # Perform object file analysis using nm
320 symbols = self.parse_symbols_from_nm(
321 ["library/libmbedcrypto.a",
322 "library/libmbedtls.a",
323 "library/libmbedx509.a"])
324
325 symbols.sort()
326
327 subprocess.run(
Darryl Greend5802922018-05-08 15:30:59 +0100328 ["make", "clean"],
Yuto Takano39639672021-08-05 19:47:48 +0100329 encoding=sys.stdout.encoding,
330 check=True
Darryl Greend5802922018-05-08 15:30:59 +0100331 )
332 except subprocess.CalledProcessError as error:
333 self.log.error(error)
334 self.set_return_code(2)
Yuto Takano39639672021-08-05 19:47:48 +0100335 finally:
336 shutil.move("include/mbedtls/mbedtls_config.h.bak",
337 "include/mbedtls/mbedtls_config.h")
338
339 return symbols
340
341 def parse_symbols_from_nm(self, object_files):
342 """
343 Run nm to retrieve the list of referenced symbols in each object file.
344 Does not return the position data since it is of no use.
345
346 Returns:
347 A list of unique symbols defined and used in any of the object files.
348 """
349 UNDEFINED_SYMBOL = r"^\S+: +U |^$|^\S+:$"
350 VALID_SYMBOL = r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)"
Yuto Takanoe77f6992021-08-05 20:22:59 +0100351 EXCLUSIONS = ("FStar", "Hacl")
Yuto Takano39639672021-08-05 19:47:48 +0100352
353 symbols = []
354
355 nm_output = ""
356 for lib in object_files:
357 nm_output += subprocess.run(
358 ["nm", "-og", lib],
359 encoding=sys.stdout.encoding,
360 stdout=subprocess.PIPE,
361 stderr=subprocess.STDOUT,
362 check=True
363 ).stdout
364 for line in nm_output.splitlines():
365 if not re.match(UNDEFINED_SYMBOL, line):
366 symbol = re.match(VALID_SYMBOL, line)
Yuto Takanoe77f6992021-08-05 20:22:59 +0100367 if symbol and not symbol.group("symbol").startswith(EXCLUSIONS):
368 symbols.append(symbol.group("symbol"))
Yuto Takano39639672021-08-05 19:47:48 +0100369 else:
370 self.log.error(line)
371
372 return symbols
373
374 def parse_names_in_source(self):
375 """
376 Calls each parsing function to retrieve various elements of the code,
377 together with their source location. Puts the parsed values in the
378 internal variable self.parse_result.
379 """
380 self.log.info("Parsing source code...")
381
Yuto Takano157444c2021-08-05 20:10:45 +0100382 m_headers = self.get_files("h", os.path.join("include", "mbedtls"))
383 p_headers = self.get_files("h", os.path.join("include", "psa"))
Yuto Takanofa950ae2021-08-05 20:03:44 +0100384 t_headers = ["3rdparty/everest/include/everest/everest.h",
385 "3rdparty/everest/include/everest/x25519.h"]
Yuto Takano56e3a5c2021-08-05 20:29:42 +0100386 d_headers = self.get_files("h", os.path.join("tests", "include", "test", "drivers"))
Yuto Takano157444c2021-08-05 20:10:45 +0100387 l_headers = self.get_files("h", "library")
388 libraries = self.get_files("c", "library") + [
Yuto Takanofa950ae2021-08-05 20:03:44 +0100389 "3rdparty/everest/library/everest.c",
390 "3rdparty/everest/library/x25519.c"]
Yuto Takano39639672021-08-05 19:47:48 +0100391
Yuto Takanobb7dca42021-08-05 19:57:58 +0100392 all_macros = self.parse_macros(
Yuto Takano56e3a5c2021-08-05 20:29:42 +0100393 m_headers + p_headers + t_headers + l_headers + d_headers)
Yuto Takanofa950ae2021-08-05 20:03:44 +0100394 enum_consts = self.parse_enum_consts(m_headers + t_headers)
Yuto Takano17220982021-08-05 20:30:18 +0100395 identifiers = self.parse_identifiers(m_headers + p_headers + t_headers + l_headers)
Yuto Takano39639672021-08-05 19:47:48 +0100396 symbols = self.parse_symbols()
Yuto Takanofa950ae2021-08-05 20:03:44 +0100397 mbed_names = self.parse_MBED_names(
Yuto Takano157444c2021-08-05 20:10:45 +0100398 m_headers + p_headers + t_headers + l_headers + libraries)
Yuto Takano39639672021-08-05 19:47:48 +0100399
400 # Remove identifier macros like mbedtls_printf or mbedtls_calloc
401 macros = list(set(all_macros) - set(identifiers))
402
403 self.log.info("Found:")
404 self.log.info(" {} Macros".format(len(all_macros)))
405 self.log.info(" {} Enum Constants".format(len(enum_consts)))
406 self.log.info(" {} Identifiers".format(len(identifiers)))
407 self.log.info(" {} Exported Symbols".format(len(symbols)))
408 self.log.info("Analysing...")
409
410 self.parse_result = {
411 "macros": macros,
412 "enum_consts": enum_consts,
413 "identifiers": identifiers,
414 "symbols": symbols,
415 "mbed_names": mbed_names
416 }
417
418 def perform_checks(self):
419 """
420 Perform each check in order, output its PASS/FAIL status. Maintain an
421 overall test status, and output that at the end.
422 """
423 problems = 0
424
425 problems += self.check_symbols_declared_in_header()
426
427 pattern_checks = [
428 ("macros", MACRO_PATTERN),
429 ("enum_consts", MACRO_PATTERN),
430 ("identifiers", IDENTIFIER_PATTERN)]
431 for group, check_pattern in pattern_checks:
432 problems += self.check_match_pattern(group, check_pattern)
433
434 problems += self.check_for_typos()
435
436 self.log.info("=============")
437 if problems > 0:
438 self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
439 else:
440 self.log.info("PASS")
Darryl Greend5802922018-05-08 15:30:59 +0100441
442 def check_symbols_declared_in_header(self):
Yuto Takano39639672021-08-05 19:47:48 +0100443 """
444 Perform a check that all detected symbols in the library object files
445 are properly declared in headers.
446
447 Outputs to the logger the PASS/FAIL status, followed by the location of
448 problems.
Darryl Greend5802922018-05-08 15:30:59 +0100449
Yuto Takano39639672021-08-05 19:47:48 +0100450 Returns the number of problems that needs fixing.
451 """
452 problems = []
453 for symbol in self.parse_result["symbols"]:
454 found_symbol_declared = False
455 for identifier_match in self.parse_result["identifiers"]:
456 if symbol == identifier_match.name:
457 found_symbol_declared = True
458 break
459
460 if not found_symbol_declared:
461 problems.append(SymbolNotInHeader(symbol))
462
463 if problems:
Darryl Greend5802922018-05-08 15:30:59 +0100464 self.set_return_code(1)
Yuto Takano39639672021-08-05 19:47:48 +0100465 self.log.info("All symbols in header: FAIL")
466 for problem in problems:
467 self.log.info(str(problem) + "\n")
Darryl Greend5802922018-05-08 15:30:59 +0100468 else:
Yuto Takano39639672021-08-05 19:47:48 +0100469 self.log.info("All symbols in header: PASS")
470
471 return len(problems)
472
473 def check_match_pattern(self, group_to_check, check_pattern):
474 problems = []
475 for item_match in self.parse_result[group_to_check]:
476 if not re.match(check_pattern, item_match.name):
477 problems.append(PatternMismatch(check_pattern, item_match))
Yuto Takanoc763cc32021-08-05 20:06:34 +0100478 if re.match(r".*__.*", item_match.name):
479 problems.append(PatternMismatch("double underscore", item_match))
Yuto Takano39639672021-08-05 19:47:48 +0100480
481 if problems:
482 self.set_return_code(1)
483 self.log.info("Naming patterns of {}: FAIL".format(group_to_check))
484 for problem in problems:
485 self.log.info(str(problem) + "\n")
486 else:
487 self.log.info("Naming patterns of {}: PASS".format(group_to_check))
488
489 return len(problems)
Darryl Greend5802922018-05-08 15:30:59 +0100490
491 def check_for_typos(self):
Yuto Takano39639672021-08-05 19:47:48 +0100492 problems = []
493 all_caps_names = list(set([
494 match.name for match
495 in self.parse_result["macros"] + self.parse_result["enum_consts"]]
Darryl Greend5802922018-05-08 15:30:59 +0100496 ))
Yuto Takano39639672021-08-05 19:47:48 +0100497
498 TYPO_EXCLUSION = r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$"
499
500 for name_match in self.parse_result["mbed_names"]:
501 if name_match.name not in all_caps_names:
502 if not re.search(TYPO_EXCLUSION, name_match.name):
503 problems.append(Typo(name_match))
504
505 if problems:
Darryl Greend5802922018-05-08 15:30:59 +0100506 self.set_return_code(1)
507 self.log.info("Likely typos: FAIL")
Yuto Takano39639672021-08-05 19:47:48 +0100508 for problem in problems:
509 self.log.info(str(problem) + "\n")
Darryl Greend5802922018-05-08 15:30:59 +0100510 else:
511 self.log.info("Likely typos: PASS")
Yuto Takano39639672021-08-05 19:47:48 +0100512
513 return len(problems)
Darryl Greend5802922018-05-08 15:30:59 +0100514
Yuto Takano39639672021-08-05 19:47:48 +0100515def main():
516 """
517 Main function, parses command-line arguments.
518 """
Darryl Greend5802922018-05-08 15:30:59 +0100519
Yuto Takano39639672021-08-05 19:47:48 +0100520 parser = argparse.ArgumentParser(
521 formatter_class=argparse.RawDescriptionHelpFormatter,
522 description=(
523 "This script confirms that the naming of all symbols and identifiers "
524 "in Mbed TLS are consistent with the house style and are also "
525 "self-consistent.\n\n"
526 "Expected to be run from the MbedTLS root directory."))
Darryl Greend5802922018-05-08 15:30:59 +0100527
Yuto Takano39639672021-08-05 19:47:48 +0100528 parser.add_argument("-v", "--verbose",
529 action="store_true",
530 help="enable script debug outputs")
531
532 args = parser.parse_args()
Darryl Greend5802922018-05-08 15:30:59 +0100533
Darryl Greend5802922018-05-08 15:30:59 +0100534 try:
535 name_check = NameCheck()
Yuto Takano39639672021-08-05 19:47:48 +0100536 name_check.setup_logger(verbose=args.verbose)
537 name_check.parse_names_in_source()
538 name_check.perform_checks()
Darryl Greend5802922018-05-08 15:30:59 +0100539 sys.exit(name_check.return_code)
540 except Exception:
541 traceback.print_exc()
542 sys.exit(2)
543
544
545if __name__ == "__main__":
Yuto Takano39639672021-08-05 19:47:48 +0100546 main()