blob: 46cb00e2246e9935a1163cfa13a399a8bd664deb [file] [log] [blame]
Yuto Takano39639672021-08-05 19:47:48 +01001#!/usr/bin/env python3
2#
3# Copyright The Mbed TLS Contributors
4# SPDX-License-Identifier: Apache-2.0
5#
6# Licensed under the Apache License, Version 2.0 (the "License"); you may
7# not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
Darryl Greend5802922018-05-08 15:30:59 +010018"""
Yuto Takano39639672021-08-05 19:47:48 +010019This script confirms that the naming of all symbols and identifiers in Mbed TLS
20are consistent with the house style and are also self-consistent.
Darryl Greend5802922018-05-08 15:30:59 +010021"""
Yuto Takano39639672021-08-05 19:47:48 +010022
23import argparse
24import textwrap
Darryl Greend5802922018-05-08 15:30:59 +010025import os
26import sys
27import traceback
28import re
29import shutil
30import subprocess
31import logging
32
Yuto Takano39639672021-08-05 19:47:48 +010033# Naming patterns to check against
Yuto Takanobb7dca42021-08-05 19:57:58 +010034MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$"
Yuto Takanoc1838932021-08-05 19:52:09 +010035IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
Yuto Takano39639672021-08-05 19:47:48 +010036
37class Match(object):
38 def __init__(self, filename, line, pos, name):
39 self.filename = filename
40 self.line = line
41 self.pos = pos
42 self.name = name
43
44 def __str__(self):
45 return self.name
46
47class Problem(object):
48 def __init__(self):
49 self.textwrapper = textwrap.TextWrapper()
50 self.textwrapper.initial_indent = " * "
51 self.textwrapper.subsequent_indent = " "
52
53class SymbolNotInHeader(Problem):
54 def __init__(self, symbol_name):
55 self.symbol_name = symbol_name
56 Problem.__init__(self)
57
58 def __str__(self):
59 return self.textwrapper.fill(
60 "'{0}' was found as an available symbol in the output of nm, "
61 "however it was not declared in any header files."
62 .format(self.symbol_name))
63
64class PatternMismatch(Problem):
65 def __init__(self, pattern, match):
66 self.pattern = pattern
67 self.match = match
68 Problem.__init__(self)
69
70 def __str__(self):
71 return self.textwrapper.fill(
72 "{0}: '{1}' does not match the required pattern '{2}'."
73 .format(self.match.filename, self.match.name, self.pattern))
74
75class Typo(Problem):
76 def __init__(self, match):
77 self.match = match
78 Problem.__init__(self)
79
80 def __str__(self):
81 return self.textwrapper.fill(
82 "{0}: '{1}' looks like a typo. It was not found in any macros or "
83 "any enums. If this is not a typo, put //no-check-names after it."
84 .format(self.match.filename, self.match.name))
Darryl Greend5802922018-05-08 15:30:59 +010085
86class NameCheck(object):
87 def __init__(self):
88 self.log = None
Darryl Greend5802922018-05-08 15:30:59 +010089 self.check_repo_path()
90 self.return_code = 0
Yuto Takanoe503d612021-08-05 20:14:05 +010091 self.excluded_files = ["bn_mul"]
Darryl Greend5802922018-05-08 15:30:59 +010092
93 def set_return_code(self, return_code):
94 if return_code > self.return_code:
95 self.return_code = return_code
96
Yuto Takano39639672021-08-05 19:47:48 +010097 def setup_logger(self, verbose=False):
98 """
99 Set up a logger and set the change the default logging level from
100 WARNING to INFO. Loggers are better than print statements since their
101 verbosity can be controlled.
102 """
Darryl Greend5802922018-05-08 15:30:59 +0100103 self.log = logging.getLogger()
Yuto Takano39639672021-08-05 19:47:48 +0100104 if verbose:
105 self.log.setLevel(logging.DEBUG)
106 else:
107 self.log.setLevel(logging.INFO)
Darryl Greend5802922018-05-08 15:30:59 +0100108 self.log.addHandler(logging.StreamHandler())
109
110 def check_repo_path(self):
Yuto Takano39639672021-08-05 19:47:48 +0100111 """
112 Check that the current working directory is the project root, and throw
113 an exception if not.
114 """
Darryl Greend5802922018-05-08 15:30:59 +0100115 current_dir = os.path.realpath('.')
116 root_dir = os.path.dirname(os.path.dirname(
117 os.path.dirname(os.path.realpath(__file__))))
118 if current_dir != root_dir:
119 raise Exception("Must be run from Mbed TLS root")
120
Yuto Takano157444c2021-08-05 20:10:45 +0100121 def get_files(self, extension, directory):
Darryl Greend5802922018-05-08 15:30:59 +0100122 filenames = []
123 for root, dirs, files in sorted(os.walk(directory)):
124 for filename in sorted(files):
125 if (filename not in self.excluded_files and
Yuto Takano157444c2021-08-05 20:10:45 +0100126 filename.endswith("." + extension)):
Darryl Greend5802922018-05-08 15:30:59 +0100127 filenames.append(os.path.join(root, filename))
128 return filenames
129
Yuto Takano39639672021-08-05 19:47:48 +0100130 def parse_macros(self, header_files):
131 """
132 Parse all macros defined by #define preprocessor directives.
133
134 Args:
135 header_files: A list of filepaths to look through.
136
137 Returns:
138 A list of Match objects for the macros.
139 """
140 MACRO_REGEX = r"#define (?P<macro>\w+)"
141 NON_MACROS = (
142 "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
143 )
144
145 macros = []
146
147 for header_file in header_files:
Darryl Greend5802922018-05-08 15:30:59 +0100148 with open(header_file, "r") as header:
Yuto Takano39639672021-08-05 19:47:48 +0100149 for line in header:
150 macro = re.search(MACRO_REGEX, line)
151 if (macro and
152 not macro.group("macro").startswith(NON_MACROS)):
153 macros.append(Match(
154 header_file,
155 line,
156 (macro.start(), macro.end()),
157 macro.group("macro")))
Darryl Greend5802922018-05-08 15:30:59 +0100158
Yuto Takano39639672021-08-05 19:47:48 +0100159 return macros
Darryl Greend5802922018-05-08 15:30:59 +0100160
Yuto Takanobb7dca42021-08-05 19:57:58 +0100161 def parse_MBED_names(self, files):
Yuto Takano39639672021-08-05 19:47:48 +0100162 """
163 Parse all words in the file that begin with MBED. Includes macros.
164
165 Args:
Yuto Takanobb7dca42021-08-05 19:57:58 +0100166 files: A list of filepaths to look through.
Yuto Takano39639672021-08-05 19:47:48 +0100167
168 Returns:
169 A list of Match objects for words beginning with MBED.
170 """
171 MBED_names = []
172
Yuto Takanobb7dca42021-08-05 19:57:58 +0100173 for filename in files:
Yuto Takano39639672021-08-05 19:47:48 +0100174 with open(filename, "r") as fp:
175 for line in fp:
176 for name in re.finditer(r"\bMBED.+?_[A-Z0-9_]*", line):
177 MBED_names.append(Match(
178 filename,
179 line,
180 (name.start(), name.end()),
181 name.group(0)
182 ))
183
184 return MBED_names
185
186 def parse_enum_consts(self, header_files):
187 """
188 Parse all enum value constants that are declared.
189
190 Args:
191 header_files: A list of filepaths to look through.
192
193 Returns:
194 A list of (enum constants, containing filename).
195 """
196
197 enum_consts = []
198
199 for header_file in header_files:
200 # Emulate a finite state machine to parse enum declarations.
Darryl Greend5802922018-05-08 15:30:59 +0100201 state = 0
202 with open(header_file, "r") as header:
Yuto Takano39639672021-08-05 19:47:48 +0100203 for line in header:
Darryl Greend5802922018-05-08 15:30:59 +0100204 if state is 0 and re.match(r"^(typedef )?enum {", line):
205 state = 1
206 elif state is 0 and re.match(r"^(typedef )?enum", line):
207 state = 2
208 elif state is 2 and re.match(r"^{", line):
209 state = 1
210 elif state is 1 and re.match(r"^}", line):
211 state = 0
212 elif state is 1:
213 enum_const = re.match(r"^\s*(?P<enum_const>\w+)", line)
214 if enum_const:
Yuto Takano39639672021-08-05 19:47:48 +0100215 enum_consts.append(Match(
216 header_file,
217 line,
218 (enum_const.start(), enum_const.end()),
219 enum_const.group("enum_const")))
220
221 return enum_consts
Darryl Greend5802922018-05-08 15:30:59 +0100222
Yuto Takano39639672021-08-05 19:47:48 +0100223 def parse_identifiers(self, header_files):
224 """
225 Parse all lines of a header where a function identifier is declared,
226 based on some huersitics. Assumes every line that is not a comment or a
227 preprocessor directive contains some identifier.
Darryl Greend5802922018-05-08 15:30:59 +0100228
Yuto Takano39639672021-08-05 19:47:48 +0100229 Args:
230 header_files: A list of filepaths to look through.
231
232 Returns:
233 A list of (identifier, containing filename)
234 """
235 EXCLUDED_DECLARATIONS = (
236 r"^(extern \"C\"|(typedef )?(struct|enum)( {)?$|};?$|$)"
Darryl Greend5802922018-05-08 15:30:59 +0100237 )
Darryl Greend5802922018-05-08 15:30:59 +0100238
Yuto Takano39639672021-08-05 19:47:48 +0100239 identifiers = []
240
241 for header_file in header_files:
Darryl Greend5802922018-05-08 15:30:59 +0100242 with open(header_file, "r") as header:
Yuto Takano39639672021-08-05 19:47:48 +0100243 in_block_comment = False
Darryl Greend5802922018-05-08 15:30:59 +0100244
Yuto Takano39639672021-08-05 19:47:48 +0100245 for line in header:
246 # Skip parsing this line if it begins or ends a block
247 # comment, and set the state machine's state.
248 if re.search(r"/\*", line):
249 in_block_comment = True
250 continue
251 elif re.search(r"\*/", line) and in_block_comment:
252 in_block_comment = False
253 continue
254
255 # Skip parsing this line if it's a line comment, or if it
256 # begins with a preprocessor directive
257 if in_block_comment or re.match(r"(//|#)", line):
258 continue
259
260 if re.match(EXCLUDED_DECLARATIONS, line):
261 continue
262
263 identifier = re.search(
264 # Matches: "mbedtls_aes_init("
265 r"([a-zA-Z_][a-zA-Z0-9_]*)\(|"
266 # Matches: "(*f_rng)("
267 r"\(\*(.+)\)\(|"
268 # TODO: unknown purpose
269 r"(\w+)\W*$",
270 line
271 )
272
273 if identifier:
274 for group in identifier.groups():
275 if group:
276 identifiers.append(Match(
277 header_file,
278 line,
279 (identifier.start(), identifier.end()),
280 identifier.group(0)))
281
282 return identifiers
283
284 def parse_symbols(self):
285 """
286 Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
287 object files using nm to retrieve the list of referenced symbols.
288
289 Returns:
290 A list of unique symbols defined and used in the libraries.
291 """
292
293 symbols = []
294
295 # Back up the config and atomically compile with the full configratuion.
296 shutil.copy("include/mbedtls/mbedtls_config.h",
297 "include/mbedtls/mbedtls_config.h.bak")
Darryl Greend5802922018-05-08 15:30:59 +0100298 try:
Yuto Takano39639672021-08-05 19:47:48 +0100299 subprocess.run(
Darryl Greend5802922018-05-08 15:30:59 +0100300 ["perl", "scripts/config.pl", "full"],
Yuto Takano39639672021-08-05 19:47:48 +0100301 encoding=sys.stdout.encoding,
302 check=True
Darryl Greend5802922018-05-08 15:30:59 +0100303 )
304 my_environment = os.environ.copy()
305 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
Yuto Takano39639672021-08-05 19:47:48 +0100306 subprocess.run(
Darryl Greend5802922018-05-08 15:30:59 +0100307 ["make", "clean", "lib"],
308 env=my_environment,
Yuto Takano39639672021-08-05 19:47:48 +0100309 encoding=sys.stdout.encoding,
310 stdout=subprocess.PIPE,
Darryl Greend5802922018-05-08 15:30:59 +0100311 stderr=subprocess.STDOUT,
Yuto Takano39639672021-08-05 19:47:48 +0100312 check=True
Darryl Greend5802922018-05-08 15:30:59 +0100313 )
Yuto Takano39639672021-08-05 19:47:48 +0100314
315 # Perform object file analysis using nm
316 symbols = self.parse_symbols_from_nm(
317 ["library/libmbedcrypto.a",
318 "library/libmbedtls.a",
319 "library/libmbedx509.a"])
320
321 symbols.sort()
322
323 subprocess.run(
Darryl Greend5802922018-05-08 15:30:59 +0100324 ["make", "clean"],
Yuto Takano39639672021-08-05 19:47:48 +0100325 encoding=sys.stdout.encoding,
326 check=True
Darryl Greend5802922018-05-08 15:30:59 +0100327 )
328 except subprocess.CalledProcessError as error:
329 self.log.error(error)
330 self.set_return_code(2)
Yuto Takano39639672021-08-05 19:47:48 +0100331 finally:
332 shutil.move("include/mbedtls/mbedtls_config.h.bak",
333 "include/mbedtls/mbedtls_config.h")
334
335 return symbols
336
337 def parse_symbols_from_nm(self, object_files):
338 """
339 Run nm to retrieve the list of referenced symbols in each object file.
340 Does not return the position data since it is of no use.
341
342 Returns:
343 A list of unique symbols defined and used in any of the object files.
344 """
345 UNDEFINED_SYMBOL = r"^\S+: +U |^$|^\S+:$"
346 VALID_SYMBOL = r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)"
347
348 symbols = []
349
350 nm_output = ""
351 for lib in object_files:
352 nm_output += subprocess.run(
353 ["nm", "-og", lib],
354 encoding=sys.stdout.encoding,
355 stdout=subprocess.PIPE,
356 stderr=subprocess.STDOUT,
357 check=True
358 ).stdout
359 for line in nm_output.splitlines():
360 if not re.match(UNDEFINED_SYMBOL, line):
361 symbol = re.match(VALID_SYMBOL, line)
362 if symbol:
363 symbols.append(symbol.group('symbol'))
364 else:
365 self.log.error(line)
366
367 return symbols
368
369 def parse_names_in_source(self):
370 """
371 Calls each parsing function to retrieve various elements of the code,
372 together with their source location. Puts the parsed values in the
373 internal variable self.parse_result.
374 """
375 self.log.info("Parsing source code...")
376
Yuto Takano157444c2021-08-05 20:10:45 +0100377 m_headers = self.get_files("h", os.path.join("include", "mbedtls"))
378 p_headers = self.get_files("h", os.path.join("include", "psa"))
Yuto Takanofa950ae2021-08-05 20:03:44 +0100379 t_headers = ["3rdparty/everest/include/everest/everest.h",
380 "3rdparty/everest/include/everest/x25519.h"]
Yuto Takano157444c2021-08-05 20:10:45 +0100381 l_headers = self.get_files("h", "library")
382 libraries = self.get_files("c", "library") + [
Yuto Takanofa950ae2021-08-05 20:03:44 +0100383 "3rdparty/everest/library/everest.c",
384 "3rdparty/everest/library/x25519.c"]
Yuto Takano39639672021-08-05 19:47:48 +0100385
Yuto Takanobb7dca42021-08-05 19:57:58 +0100386 all_macros = self.parse_macros(
Yuto Takano157444c2021-08-05 20:10:45 +0100387 m_headers + p_headers + t_headers + l_headers)
Yuto Takanofa950ae2021-08-05 20:03:44 +0100388 enum_consts = self.parse_enum_consts(m_headers + t_headers)
389 identifiers = self.parse_identifiers(m_headers + p_headers + t_headers)
Yuto Takano39639672021-08-05 19:47:48 +0100390 symbols = self.parse_symbols()
Yuto Takanofa950ae2021-08-05 20:03:44 +0100391 mbed_names = self.parse_MBED_names(
Yuto Takano157444c2021-08-05 20:10:45 +0100392 m_headers + p_headers + t_headers + l_headers + libraries)
Yuto Takano39639672021-08-05 19:47:48 +0100393
394 # Remove identifier macros like mbedtls_printf or mbedtls_calloc
395 macros = list(set(all_macros) - set(identifiers))
396
397 self.log.info("Found:")
398 self.log.info(" {} Macros".format(len(all_macros)))
399 self.log.info(" {} Enum Constants".format(len(enum_consts)))
400 self.log.info(" {} Identifiers".format(len(identifiers)))
401 self.log.info(" {} Exported Symbols".format(len(symbols)))
402 self.log.info("Analysing...")
403
404 self.parse_result = {
405 "macros": macros,
406 "enum_consts": enum_consts,
407 "identifiers": identifiers,
408 "symbols": symbols,
409 "mbed_names": mbed_names
410 }
411
412 def perform_checks(self):
413 """
414 Perform each check in order, output its PASS/FAIL status. Maintain an
415 overall test status, and output that at the end.
416 """
417 problems = 0
418
419 problems += self.check_symbols_declared_in_header()
420
421 pattern_checks = [
422 ("macros", MACRO_PATTERN),
423 ("enum_consts", MACRO_PATTERN),
424 ("identifiers", IDENTIFIER_PATTERN)]
425 for group, check_pattern in pattern_checks:
426 problems += self.check_match_pattern(group, check_pattern)
427
428 problems += self.check_for_typos()
429
430 self.log.info("=============")
431 if problems > 0:
432 self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
433 else:
434 self.log.info("PASS")
Darryl Greend5802922018-05-08 15:30:59 +0100435
436 def check_symbols_declared_in_header(self):
Yuto Takano39639672021-08-05 19:47:48 +0100437 """
438 Perform a check that all detected symbols in the library object files
439 are properly declared in headers.
440
441 Outputs to the logger the PASS/FAIL status, followed by the location of
442 problems.
Darryl Greend5802922018-05-08 15:30:59 +0100443
Yuto Takano39639672021-08-05 19:47:48 +0100444 Returns the number of problems that needs fixing.
445 """
446 problems = []
447 for symbol in self.parse_result["symbols"]:
448 found_symbol_declared = False
449 for identifier_match in self.parse_result["identifiers"]:
450 if symbol == identifier_match.name:
451 found_symbol_declared = True
452 break
453
454 if not found_symbol_declared:
455 problems.append(SymbolNotInHeader(symbol))
456
457 if problems:
Darryl Greend5802922018-05-08 15:30:59 +0100458 self.set_return_code(1)
Yuto Takano39639672021-08-05 19:47:48 +0100459 self.log.info("All symbols in header: FAIL")
460 for problem in problems:
461 self.log.info(str(problem) + "\n")
Darryl Greend5802922018-05-08 15:30:59 +0100462 else:
Yuto Takano39639672021-08-05 19:47:48 +0100463 self.log.info("All symbols in header: PASS")
464
465 return len(problems)
466
467 def check_match_pattern(self, group_to_check, check_pattern):
468 problems = []
469 for item_match in self.parse_result[group_to_check]:
470 if not re.match(check_pattern, item_match.name):
471 problems.append(PatternMismatch(check_pattern, item_match))
Yuto Takanoc763cc32021-08-05 20:06:34 +0100472 if re.match(r".*__.*", item_match.name):
473 problems.append(PatternMismatch("double underscore", item_match))
Yuto Takano39639672021-08-05 19:47:48 +0100474
475 if problems:
476 self.set_return_code(1)
477 self.log.info("Naming patterns of {}: FAIL".format(group_to_check))
478 for problem in problems:
479 self.log.info(str(problem) + "\n")
480 else:
481 self.log.info("Naming patterns of {}: PASS".format(group_to_check))
482
483 return len(problems)
Darryl Greend5802922018-05-08 15:30:59 +0100484
485 def check_for_typos(self):
Yuto Takano39639672021-08-05 19:47:48 +0100486 problems = []
487 all_caps_names = list(set([
488 match.name for match
489 in self.parse_result["macros"] + self.parse_result["enum_consts"]]
Darryl Greend5802922018-05-08 15:30:59 +0100490 ))
Yuto Takano39639672021-08-05 19:47:48 +0100491
492 TYPO_EXCLUSION = r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$"
493
494 for name_match in self.parse_result["mbed_names"]:
495 if name_match.name not in all_caps_names:
496 if not re.search(TYPO_EXCLUSION, name_match.name):
497 problems.append(Typo(name_match))
498
499 if problems:
Darryl Greend5802922018-05-08 15:30:59 +0100500 self.set_return_code(1)
501 self.log.info("Likely typos: FAIL")
Yuto Takano39639672021-08-05 19:47:48 +0100502 for problem in problems:
503 self.log.info(str(problem) + "\n")
Darryl Greend5802922018-05-08 15:30:59 +0100504 else:
505 self.log.info("Likely typos: PASS")
Yuto Takano39639672021-08-05 19:47:48 +0100506
507 return len(problems)
Darryl Greend5802922018-05-08 15:30:59 +0100508
Yuto Takano39639672021-08-05 19:47:48 +0100509def main():
510 """
511 Main function, parses command-line arguments.
512 """
Darryl Greend5802922018-05-08 15:30:59 +0100513
Yuto Takano39639672021-08-05 19:47:48 +0100514 parser = argparse.ArgumentParser(
515 formatter_class=argparse.RawDescriptionHelpFormatter,
516 description=(
517 "This script confirms that the naming of all symbols and identifiers "
518 "in Mbed TLS are consistent with the house style and are also "
519 "self-consistent.\n\n"
520 "Expected to be run from the MbedTLS root directory."))
Darryl Greend5802922018-05-08 15:30:59 +0100521
Yuto Takano39639672021-08-05 19:47:48 +0100522 parser.add_argument("-v", "--verbose",
523 action="store_true",
524 help="enable script debug outputs")
525
526 args = parser.parse_args()
Darryl Greend5802922018-05-08 15:30:59 +0100527
Darryl Greend5802922018-05-08 15:30:59 +0100528 try:
529 name_check = NameCheck()
Yuto Takano39639672021-08-05 19:47:48 +0100530 name_check.setup_logger(verbose=args.verbose)
531 name_check.parse_names_in_source()
532 name_check.perform_checks()
Darryl Greend5802922018-05-08 15:30:59 +0100533 sys.exit(name_check.return_code)
534 except Exception:
535 traceback.print_exc()
536 sys.exit(2)
537
538
539if __name__ == "__main__":
Yuto Takano39639672021-08-05 19:47:48 +0100540 main()