blob: 431bcbb5c937053b2d2739f5c4ede3476526f15b [file] [log] [blame]
Yuto Takano39639672021-08-05 19:47:48 +01001#!/usr/bin/env python3
2#
3# Copyright The Mbed TLS Contributors
4# SPDX-License-Identifier: Apache-2.0
5#
6# Licensed under the Apache License, Version 2.0 (the "License"); you may
7# not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
Darryl Greend5802922018-05-08 15:30:59 +010018"""
Yuto Takano39639672021-08-05 19:47:48 +010019This script confirms that the naming of all symbols and identifiers in Mbed TLS
20are consistent with the house style and are also self-consistent.
Darryl Greend5802922018-05-08 15:30:59 +010021"""
Yuto Takano39639672021-08-05 19:47:48 +010022
23import argparse
24import textwrap
Darryl Greend5802922018-05-08 15:30:59 +010025import os
26import sys
27import traceback
28import re
29import shutil
30import subprocess
31import logging
32
Yuto Takano39639672021-08-05 19:47:48 +010033# Naming patterns to check against
34MACRO_PATTERN = r"^MBEDTLS_[0-9A-Z_]*[0-9A-Z]$|^YOTTA_[0-9A-Z_]*[0-9A-Z]$"
35IDENTIFIER_PATTERN = r"^mbedtls_[0-9a-z_]*[0-9a-z]$"
36
37class Match(object):
38 def __init__(self, filename, line, pos, name):
39 self.filename = filename
40 self.line = line
41 self.pos = pos
42 self.name = name
43
44 def __str__(self):
45 return self.name
46
47class Problem(object):
48 def __init__(self):
49 self.textwrapper = textwrap.TextWrapper()
50 self.textwrapper.initial_indent = " * "
51 self.textwrapper.subsequent_indent = " "
52
53class SymbolNotInHeader(Problem):
54 def __init__(self, symbol_name):
55 self.symbol_name = symbol_name
56 Problem.__init__(self)
57
58 def __str__(self):
59 return self.textwrapper.fill(
60 "'{0}' was found as an available symbol in the output of nm, "
61 "however it was not declared in any header files."
62 .format(self.symbol_name))
63
64class PatternMismatch(Problem):
65 def __init__(self, pattern, match):
66 self.pattern = pattern
67 self.match = match
68 Problem.__init__(self)
69
70 def __str__(self):
71 return self.textwrapper.fill(
72 "{0}: '{1}' does not match the required pattern '{2}'."
73 .format(self.match.filename, self.match.name, self.pattern))
74
75class Typo(Problem):
76 def __init__(self, match):
77 self.match = match
78 Problem.__init__(self)
79
80 def __str__(self):
81 return self.textwrapper.fill(
82 "{0}: '{1}' looks like a typo. It was not found in any macros or "
83 "any enums. If this is not a typo, put //no-check-names after it."
84 .format(self.match.filename, self.match.name))
Darryl Greend5802922018-05-08 15:30:59 +010085
86class NameCheck(object):
87 def __init__(self):
88 self.log = None
Darryl Greend5802922018-05-08 15:30:59 +010089 self.check_repo_path()
90 self.return_code = 0
91 self.excluded_files = ["compat-1.3.h"]
Darryl Greend5802922018-05-08 15:30:59 +010092 self.typo_check_pattern = r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$"
Darryl Greend5802922018-05-08 15:30:59 +010093
94 def set_return_code(self, return_code):
95 if return_code > self.return_code:
96 self.return_code = return_code
97
Yuto Takano39639672021-08-05 19:47:48 +010098 def setup_logger(self, verbose=False):
99 """
100 Set up a logger and set the change the default logging level from
101 WARNING to INFO. Loggers are better than print statements since their
102 verbosity can be controlled.
103 """
Darryl Greend5802922018-05-08 15:30:59 +0100104 self.log = logging.getLogger()
Yuto Takano39639672021-08-05 19:47:48 +0100105 if verbose:
106 self.log.setLevel(logging.DEBUG)
107 else:
108 self.log.setLevel(logging.INFO)
Darryl Greend5802922018-05-08 15:30:59 +0100109 self.log.addHandler(logging.StreamHandler())
110
111 def check_repo_path(self):
Yuto Takano39639672021-08-05 19:47:48 +0100112 """
113 Check that the current working directory is the project root, and throw
114 an exception if not.
115 """
Darryl Greend5802922018-05-08 15:30:59 +0100116 current_dir = os.path.realpath('.')
117 root_dir = os.path.dirname(os.path.dirname(
118 os.path.dirname(os.path.realpath(__file__))))
119 if current_dir != root_dir:
120 raise Exception("Must be run from Mbed TLS root")
121
122 def get_files(self, directory):
123 filenames = []
124 for root, dirs, files in sorted(os.walk(directory)):
125 for filename in sorted(files):
126 if (filename not in self.excluded_files and
127 filename.endswith((".c", ".h"))):
128 filenames.append(os.path.join(root, filename))
129 return filenames
130
Yuto Takano39639672021-08-05 19:47:48 +0100131 def parse_macros(self, header_files):
132 """
133 Parse all macros defined by #define preprocessor directives.
134
135 Args:
136 header_files: A list of filepaths to look through.
137
138 Returns:
139 A list of Match objects for the macros.
140 """
141 MACRO_REGEX = r"#define (?P<macro>\w+)"
142 NON_MACROS = (
143 "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
144 )
145
146 macros = []
147
148 for header_file in header_files:
Darryl Greend5802922018-05-08 15:30:59 +0100149 with open(header_file, "r") as header:
Yuto Takano39639672021-08-05 19:47:48 +0100150 for line in header:
151 macro = re.search(MACRO_REGEX, line)
152 if (macro and
153 not macro.group("macro").startswith(NON_MACROS)):
154 macros.append(Match(
155 header_file,
156 line,
157 (macro.start(), macro.end()),
158 macro.group("macro")))
Darryl Greend5802922018-05-08 15:30:59 +0100159
Yuto Takano39639672021-08-05 19:47:48 +0100160 return macros
Darryl Greend5802922018-05-08 15:30:59 +0100161
Yuto Takano39639672021-08-05 19:47:48 +0100162 def parse_MBED_names(self, header_files, library_files):
163 """
164 Parse all words in the file that begin with MBED. Includes macros.
165
166 Args:
167 header_files: A list of filepaths to look through.
168 library_files: A list of filepaths to look through.
169
170 Returns:
171 A list of Match objects for words beginning with MBED.
172 """
173 MBED_names = []
174
175 for filename in header_files + library_files:
176 with open(filename, "r") as fp:
177 for line in fp:
178 for name in re.finditer(r"\bMBED.+?_[A-Z0-9_]*", line):
179 MBED_names.append(Match(
180 filename,
181 line,
182 (name.start(), name.end()),
183 name.group(0)
184 ))
185
186 return MBED_names
187
188 def parse_enum_consts(self, header_files):
189 """
190 Parse all enum value constants that are declared.
191
192 Args:
193 header_files: A list of filepaths to look through.
194
195 Returns:
196 A list of (enum constants, containing filename).
197 """
198
199 enum_consts = []
200
201 for header_file in header_files:
202 # Emulate a finite state machine to parse enum declarations.
Darryl Greend5802922018-05-08 15:30:59 +0100203 state = 0
204 with open(header_file, "r") as header:
Yuto Takano39639672021-08-05 19:47:48 +0100205 for line in header:
Darryl Greend5802922018-05-08 15:30:59 +0100206 if state is 0 and re.match(r"^(typedef )?enum {", line):
207 state = 1
208 elif state is 0 and re.match(r"^(typedef )?enum", line):
209 state = 2
210 elif state is 2 and re.match(r"^{", line):
211 state = 1
212 elif state is 1 and re.match(r"^}", line):
213 state = 0
214 elif state is 1:
215 enum_const = re.match(r"^\s*(?P<enum_const>\w+)", line)
216 if enum_const:
Yuto Takano39639672021-08-05 19:47:48 +0100217 enum_consts.append(Match(
218 header_file,
219 line,
220 (enum_const.start(), enum_const.end()),
221 enum_const.group("enum_const")))
222
223 return enum_consts
Darryl Greend5802922018-05-08 15:30:59 +0100224
Yuto Takano39639672021-08-05 19:47:48 +0100225 def parse_identifiers(self, header_files):
226 """
227 Parse all lines of a header where a function identifier is declared,
228 based on some huersitics. Assumes every line that is not a comment or a
229 preprocessor directive contains some identifier.
Darryl Greend5802922018-05-08 15:30:59 +0100230
Yuto Takano39639672021-08-05 19:47:48 +0100231 Args:
232 header_files: A list of filepaths to look through.
233
234 Returns:
235 A list of (identifier, containing filename)
236 """
237 EXCLUDED_DECLARATIONS = (
238 r"^(extern \"C\"|(typedef )?(struct|enum)( {)?$|};?$|$)"
Darryl Greend5802922018-05-08 15:30:59 +0100239 )
Darryl Greend5802922018-05-08 15:30:59 +0100240
Yuto Takano39639672021-08-05 19:47:48 +0100241 identifiers = []
242
243 for header_file in header_files:
Darryl Greend5802922018-05-08 15:30:59 +0100244 with open(header_file, "r") as header:
Yuto Takano39639672021-08-05 19:47:48 +0100245 in_block_comment = False
Darryl Greend5802922018-05-08 15:30:59 +0100246
Yuto Takano39639672021-08-05 19:47:48 +0100247 for line in header:
248 # Skip parsing this line if it begins or ends a block
249 # comment, and set the state machine's state.
250 if re.search(r"/\*", line):
251 in_block_comment = True
252 continue
253 elif re.search(r"\*/", line) and in_block_comment:
254 in_block_comment = False
255 continue
256
257 # Skip parsing this line if it's a line comment, or if it
258 # begins with a preprocessor directive
259 if in_block_comment or re.match(r"(//|#)", line):
260 continue
261
262 if re.match(EXCLUDED_DECLARATIONS, line):
263 continue
264
265 identifier = re.search(
266 # Matches: "mbedtls_aes_init("
267 r"([a-zA-Z_][a-zA-Z0-9_]*)\(|"
268 # Matches: "(*f_rng)("
269 r"\(\*(.+)\)\(|"
270 # TODO: unknown purpose
271 r"(\w+)\W*$",
272 line
273 )
274
275 if identifier:
276 for group in identifier.groups():
277 if group:
278 identifiers.append(Match(
279 header_file,
280 line,
281 (identifier.start(), identifier.end()),
282 identifier.group(0)))
283
284 return identifiers
285
286 def parse_symbols(self):
287 """
288 Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
289 object files using nm to retrieve the list of referenced symbols.
290
291 Returns:
292 A list of unique symbols defined and used in the libraries.
293 """
294
295 symbols = []
296
297 # Back up the config and atomically compile with the full configratuion.
298 shutil.copy("include/mbedtls/mbedtls_config.h",
299 "include/mbedtls/mbedtls_config.h.bak")
Darryl Greend5802922018-05-08 15:30:59 +0100300 try:
Yuto Takano39639672021-08-05 19:47:48 +0100301 subprocess.run(
Darryl Greend5802922018-05-08 15:30:59 +0100302 ["perl", "scripts/config.pl", "full"],
Yuto Takano39639672021-08-05 19:47:48 +0100303 encoding=sys.stdout.encoding,
304 check=True
Darryl Greend5802922018-05-08 15:30:59 +0100305 )
306 my_environment = os.environ.copy()
307 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
Yuto Takano39639672021-08-05 19:47:48 +0100308 subprocess.run(
Darryl Greend5802922018-05-08 15:30:59 +0100309 ["make", "clean", "lib"],
310 env=my_environment,
Yuto Takano39639672021-08-05 19:47:48 +0100311 encoding=sys.stdout.encoding,
312 stdout=subprocess.PIPE,
Darryl Greend5802922018-05-08 15:30:59 +0100313 stderr=subprocess.STDOUT,
Yuto Takano39639672021-08-05 19:47:48 +0100314 check=True
Darryl Greend5802922018-05-08 15:30:59 +0100315 )
Yuto Takano39639672021-08-05 19:47:48 +0100316
317 # Perform object file analysis using nm
318 symbols = self.parse_symbols_from_nm(
319 ["library/libmbedcrypto.a",
320 "library/libmbedtls.a",
321 "library/libmbedx509.a"])
322
323 symbols.sort()
324
325 subprocess.run(
Darryl Greend5802922018-05-08 15:30:59 +0100326 ["make", "clean"],
Yuto Takano39639672021-08-05 19:47:48 +0100327 encoding=sys.stdout.encoding,
328 check=True
Darryl Greend5802922018-05-08 15:30:59 +0100329 )
330 except subprocess.CalledProcessError as error:
331 self.log.error(error)
332 self.set_return_code(2)
Yuto Takano39639672021-08-05 19:47:48 +0100333 finally:
334 shutil.move("include/mbedtls/mbedtls_config.h.bak",
335 "include/mbedtls/mbedtls_config.h")
336
337 return symbols
338
339 def parse_symbols_from_nm(self, object_files):
340 """
341 Run nm to retrieve the list of referenced symbols in each object file.
342 Does not return the position data since it is of no use.
343
344 Returns:
345 A list of unique symbols defined and used in any of the object files.
346 """
347 UNDEFINED_SYMBOL = r"^\S+: +U |^$|^\S+:$"
348 VALID_SYMBOL = r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)"
349
350 symbols = []
351
352 nm_output = ""
353 for lib in object_files:
354 nm_output += subprocess.run(
355 ["nm", "-og", lib],
356 encoding=sys.stdout.encoding,
357 stdout=subprocess.PIPE,
358 stderr=subprocess.STDOUT,
359 check=True
360 ).stdout
361 for line in nm_output.splitlines():
362 if not re.match(UNDEFINED_SYMBOL, line):
363 symbol = re.match(VALID_SYMBOL, line)
364 if symbol:
365 symbols.append(symbol.group('symbol'))
366 else:
367 self.log.error(line)
368
369 return symbols
370
371 def parse_names_in_source(self):
372 """
373 Calls each parsing function to retrieve various elements of the code,
374 together with their source location. Puts the parsed values in the
375 internal variable self.parse_result.
376 """
377 self.log.info("Parsing source code...")
378
379 m_headers = self.get_files(os.path.join("include", "mbedtls"))
380 libraries = self.get_files("library")
381
382 all_macros = self.parse_macros(m_headers)
383 enum_consts = self.parse_enum_consts(m_headers)
384 identifiers = self.parse_identifiers(m_headers)
385 symbols = self.parse_symbols()
386 mbed_names = self.parse_MBED_names(m_headers, libraries)
387
388 # Remove identifier macros like mbedtls_printf or mbedtls_calloc
389 macros = list(set(all_macros) - set(identifiers))
390
391 self.log.info("Found:")
392 self.log.info(" {} Macros".format(len(all_macros)))
393 self.log.info(" {} Enum Constants".format(len(enum_consts)))
394 self.log.info(" {} Identifiers".format(len(identifiers)))
395 self.log.info(" {} Exported Symbols".format(len(symbols)))
396 self.log.info("Analysing...")
397
398 self.parse_result = {
399 "macros": macros,
400 "enum_consts": enum_consts,
401 "identifiers": identifiers,
402 "symbols": symbols,
403 "mbed_names": mbed_names
404 }
405
406 def perform_checks(self):
407 """
408 Perform each check in order, output its PASS/FAIL status. Maintain an
409 overall test status, and output that at the end.
410 """
411 problems = 0
412
413 problems += self.check_symbols_declared_in_header()
414
415 pattern_checks = [
416 ("macros", MACRO_PATTERN),
417 ("enum_consts", MACRO_PATTERN),
418 ("identifiers", IDENTIFIER_PATTERN)]
419 for group, check_pattern in pattern_checks:
420 problems += self.check_match_pattern(group, check_pattern)
421
422 problems += self.check_for_typos()
423
424 self.log.info("=============")
425 if problems > 0:
426 self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
427 else:
428 self.log.info("PASS")
Darryl Greend5802922018-05-08 15:30:59 +0100429
430 def check_symbols_declared_in_header(self):
Yuto Takano39639672021-08-05 19:47:48 +0100431 """
432 Perform a check that all detected symbols in the library object files
433 are properly declared in headers.
434
435 Outputs to the logger the PASS/FAIL status, followed by the location of
436 problems.
Darryl Greend5802922018-05-08 15:30:59 +0100437
Yuto Takano39639672021-08-05 19:47:48 +0100438 Returns the number of problems that needs fixing.
439 """
440 problems = []
441 for symbol in self.parse_result["symbols"]:
442 found_symbol_declared = False
443 for identifier_match in self.parse_result["identifiers"]:
444 if symbol == identifier_match.name:
445 found_symbol_declared = True
446 break
447
448 if not found_symbol_declared:
449 problems.append(SymbolNotInHeader(symbol))
450
451 if problems:
Darryl Greend5802922018-05-08 15:30:59 +0100452 self.set_return_code(1)
Yuto Takano39639672021-08-05 19:47:48 +0100453 self.log.info("All symbols in header: FAIL")
454 for problem in problems:
455 self.log.info(str(problem) + "\n")
Darryl Greend5802922018-05-08 15:30:59 +0100456 else:
Yuto Takano39639672021-08-05 19:47:48 +0100457 self.log.info("All symbols in header: PASS")
458
459 return len(problems)
460
461 def check_match_pattern(self, group_to_check, check_pattern):
462 problems = []
463 for item_match in self.parse_result[group_to_check]:
464 if not re.match(check_pattern, item_match.name):
465 problems.append(PatternMismatch(check_pattern, item_match))
466
467 if problems:
468 self.set_return_code(1)
469 self.log.info("Naming patterns of {}: FAIL".format(group_to_check))
470 for problem in problems:
471 self.log.info(str(problem) + "\n")
472 else:
473 self.log.info("Naming patterns of {}: PASS".format(group_to_check))
474
475 return len(problems)
Darryl Greend5802922018-05-08 15:30:59 +0100476
477 def check_for_typos(self):
Yuto Takano39639672021-08-05 19:47:48 +0100478 problems = []
479 all_caps_names = list(set([
480 match.name for match
481 in self.parse_result["macros"] + self.parse_result["enum_consts"]]
Darryl Greend5802922018-05-08 15:30:59 +0100482 ))
Yuto Takano39639672021-08-05 19:47:48 +0100483
484 TYPO_EXCLUSION = r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$"
485
486 for name_match in self.parse_result["mbed_names"]:
487 if name_match.name not in all_caps_names:
488 if not re.search(TYPO_EXCLUSION, name_match.name):
489 problems.append(Typo(name_match))
490
491 if problems:
Darryl Greend5802922018-05-08 15:30:59 +0100492 self.set_return_code(1)
493 self.log.info("Likely typos: FAIL")
Yuto Takano39639672021-08-05 19:47:48 +0100494 for problem in problems:
495 self.log.info(str(problem) + "\n")
Darryl Greend5802922018-05-08 15:30:59 +0100496 else:
497 self.log.info("Likely typos: PASS")
Yuto Takano39639672021-08-05 19:47:48 +0100498
499 return len(problems)
Darryl Greend5802922018-05-08 15:30:59 +0100500
Yuto Takano39639672021-08-05 19:47:48 +0100501def main():
502 """
503 Main function, parses command-line arguments.
504 """
Darryl Greend5802922018-05-08 15:30:59 +0100505
Yuto Takano39639672021-08-05 19:47:48 +0100506 parser = argparse.ArgumentParser(
507 formatter_class=argparse.RawDescriptionHelpFormatter,
508 description=(
509 "This script confirms that the naming of all symbols and identifiers "
510 "in Mbed TLS are consistent with the house style and are also "
511 "self-consistent.\n\n"
512 "Expected to be run from the MbedTLS root directory."))
Darryl Greend5802922018-05-08 15:30:59 +0100513
Yuto Takano39639672021-08-05 19:47:48 +0100514 parser.add_argument("-v", "--verbose",
515 action="store_true",
516 help="enable script debug outputs")
517
518 args = parser.parse_args()
Darryl Greend5802922018-05-08 15:30:59 +0100519
Darryl Greend5802922018-05-08 15:30:59 +0100520 try:
521 name_check = NameCheck()
Yuto Takano39639672021-08-05 19:47:48 +0100522 name_check.setup_logger(verbose=args.verbose)
523 name_check.parse_names_in_source()
524 name_check.perform_checks()
Darryl Greend5802922018-05-08 15:30:59 +0100525 sys.exit(name_check.return_code)
526 except Exception:
527 traceback.print_exc()
528 sys.exit(2)
529
530
531if __name__ == "__main__":
Yuto Takano39639672021-08-05 19:47:48 +0100532 main()