blob: acf60db46d7cbaecc72aa901d225be2717d43947 [file] [log] [blame]
Gilles Peskine8266b5b2021-09-27 19:53:31 +02001#!/usr/bin/env python3
2#
3# Copyright The Mbed TLS Contributors
4# SPDX-License-Identifier: Apache-2.0
5#
6# Licensed under the Apache License, Version 2.0 (the "License"); you may
7# not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""
19This script confirms that the naming of all symbols and identifiers in Mbed TLS
20are consistent with the house style and are also self-consistent. It only runs
21on Linux and macOS since it depends on nm.
22
23It contains two major Python classes, CodeParser and NameChecker. They both have
24a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
25but the individual functions can also be used for specific needs.
26
27CodeParser makes heavy use of regular expressions to parse the code, and is
28dependent on the current code formatting. Many Python C parser libraries require
29preprocessed C code, which means no macro parsing. Compiler tools are also not
30very helpful when we want the exact location in the original source (which
31becomes impossible when e.g. comments are stripped).
32
33NameChecker performs the following checks:
34
35- All exported and available symbols in the library object files, are explicitly
36 declared in the header files. This uses the nm command.
37- All macros, constants, and identifiers (function names, struct names, etc)
38 follow the required regex pattern.
39- Typo checking: All words that begin with MBED exist as macros or constants.
40
41The script returns 0 on success, 1 on test failure, and 2 if there is a script
42error. It must be run from Mbed TLS root.
43"""
44
45import abc
46import argparse
Gilles Peskine7bf52052021-09-27 19:20:17 +020047import fnmatch
Gilles Peskine8266b5b2021-09-27 19:53:31 +020048import glob
49import textwrap
50import os
51import sys
52import traceback
53import re
54import enum
55import shutil
56import subprocess
57import logging
58
59# Naming patterns to check against. These are defined outside the NameCheck
60# class for ease of modification.
61MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$"
62CONSTANTS_PATTERN = MACRO_PATTERN
63IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
64
65class Match(): # pylint: disable=too-few-public-methods
66 """
67 A class representing a match, together with its found position.
68
69 Fields:
70 * filename: the file that the match was in.
71 * line: the full line containing the match.
72 * line_no: the line number.
73 * pos: a tuple of (start, end) positions on the line where the match is.
74 * name: the match itself.
75 """
76 def __init__(self, filename, line, line_no, pos, name):
77 # pylint: disable=too-many-arguments
78 self.filename = filename
79 self.line = line
80 self.line_no = line_no
81 self.pos = pos
82 self.name = name
83
84 def __str__(self):
85 """
86 Return a formatted code listing representation of the erroneous line.
87 """
88 gutter = format(self.line_no, "4d")
89 underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
90
91 return (
92 " {0} |\n".format(" " * len(gutter)) +
93 " {0} | {1}".format(gutter, self.line) +
94 " {0} | {1}\n".format(" " * len(gutter), underline)
95 )
96
97class Problem(abc.ABC): # pylint: disable=too-few-public-methods
98 """
99 An abstract parent class representing a form of static analysis error.
100 It extends an Abstract Base Class, which means it is not instantiable, and
101 it also mandates certain abstract methods to be implemented in subclasses.
102 """
103 # Class variable to control the quietness of all problems
104 quiet = False
105 def __init__(self):
106 self.textwrapper = textwrap.TextWrapper()
107 self.textwrapper.width = 80
108 self.textwrapper.initial_indent = " > "
109 self.textwrapper.subsequent_indent = " "
110
111 def __str__(self):
112 """
113 Unified string representation method for all Problems.
114 """
115 if self.__class__.quiet:
116 return self.quiet_output()
117 return self.verbose_output()
118
119 @abc.abstractmethod
120 def quiet_output(self):
121 """
122 The output when --quiet is enabled.
123 """
124 pass
125
126 @abc.abstractmethod
127 def verbose_output(self):
128 """
129 The default output with explanation and code snippet if appropriate.
130 """
131 pass
132
133class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
134 """
135 A problem that occurs when an exported/available symbol in the object file
136 is not explicitly declared in header files. Created with
137 NameCheck.check_symbols_declared_in_header()
138
139 Fields:
140 * symbol_name: the name of the symbol.
141 """
142 def __init__(self, symbol_name):
143 self.symbol_name = symbol_name
144 Problem.__init__(self)
145
146 def quiet_output(self):
147 return "{0}".format(self.symbol_name)
148
149 def verbose_output(self):
150 return self.textwrapper.fill(
151 "'{0}' was found as an available symbol in the output of nm, "
152 "however it was not declared in any header files."
153 .format(self.symbol_name))
154
155class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
156 """
157 A problem that occurs when something doesn't match the expected pattern.
158 Created with NameCheck.check_match_pattern()
159
160 Fields:
161 * pattern: the expected regex pattern
162 * match: the Match object in question
163 """
164 def __init__(self, pattern, match):
165 self.pattern = pattern
166 self.match = match
167 Problem.__init__(self)
168
169
170 def quiet_output(self):
171 return (
172 "{0}:{1}:{2}"
173 .format(self.match.filename, self.match.line_no, self.match.name)
174 )
175
176 def verbose_output(self):
177 return self.textwrapper.fill(
178 "{0}:{1}: '{2}' does not match the required pattern '{3}'."
179 .format(
180 self.match.filename,
181 self.match.line_no,
182 self.match.name,
183 self.pattern
184 )
185 ) + "\n" + str(self.match)
186
187class Typo(Problem): # pylint: disable=too-few-public-methods
188 """
189 A problem that occurs when a word using MBED doesn't appear to be defined as
190 constants nor enum values. Created with NameCheck.check_for_typos()
191
192 Fields:
193 * match: the Match object of the MBED name in question.
194 """
195 def __init__(self, match):
196 self.match = match
197 Problem.__init__(self)
198
199 def quiet_output(self):
200 return (
201 "{0}:{1}:{2}"
202 .format(self.match.filename, self.match.line_no, self.match.name)
203 )
204
205 def verbose_output(self):
206 return self.textwrapper.fill(
207 "{0}:{1}: '{2}' looks like a typo. It was not found in any "
208 "macros or any enums. If this is not a typo, put "
209 "//no-check-names after it."
210 .format(self.match.filename, self.match.line_no, self.match.name)
211 ) + "\n" + str(self.match)
212
213class CodeParser():
214 """
215 Class for retrieving files and parsing the code. This can be used
216 independently of the checks that NameChecker performs, for example for
217 list_internal_identifiers.py.
218 """
219 def __init__(self, log):
220 self.log = log
221 self.check_repo_path()
222
223 # Memo for storing "glob expression": set(filepaths)
224 self.files = {}
225
Gilles Peskine7bf52052021-09-27 19:20:17 +0200226 # Globally excluded filenames.
227 # Note that "*" can match directory separators in exclude lists.
Gilles Peskined47f6362021-09-27 20:12:00 +0200228 self.excluded_files = ["*/bn_mul", "*/compat-1.3.h"]
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200229
230 @staticmethod
231 def check_repo_path():
232 """
233 Check that the current working directory is the project root, and throw
234 an exception if not.
235 """
236 if not all(os.path.isdir(d) for d in ["include", "library", "tests"]):
237 raise Exception("This script must be run from Mbed TLS root")
238
239 def comprehensive_parse(self):
240 """
241 Comprehensive ("default") function to call each parsing function and
242 retrieve various elements of the code, together with the source location.
243
244 Returns a dict of parsed item key to the corresponding List of Matches.
245 """
246 self.log.info("Parsing source code...")
247 self.log.debug(
248 "The following files are excluded from the search: {}"
249 .format(str(self.excluded_files))
250 )
251
252 all_macros = self.parse_macros([
253 "include/mbedtls/*.h",
254 "include/psa/*.h",
255 "library/*.h",
256 "tests/include/test/drivers/*.h",
257 "3rdparty/everest/include/everest/everest.h",
258 "3rdparty/everest/include/everest/x25519.h"
259 ])
260 enum_consts = self.parse_enum_consts([
261 "include/mbedtls/*.h",
262 "library/*.h",
263 "3rdparty/everest/include/everest/everest.h",
264 "3rdparty/everest/include/everest/x25519.h"
265 ])
266 identifiers = self.parse_identifiers([
267 "include/mbedtls/*.h",
268 "include/psa/*.h",
269 "library/*.h",
270 "3rdparty/everest/include/everest/everest.h",
271 "3rdparty/everest/include/everest/x25519.h"
272 ])
273 mbed_words = self.parse_mbed_words([
274 "include/mbedtls/*.h",
275 "include/psa/*.h",
276 "library/*.h",
277 "3rdparty/everest/include/everest/everest.h",
278 "3rdparty/everest/include/everest/x25519.h",
279 "library/*.c",
280 "3rdparty/everest/library/everest.c",
281 "3rdparty/everest/library/x25519.c"
282 ])
283 symbols = self.parse_symbols()
284
285 # Remove identifier macros like mbedtls_printf or mbedtls_calloc
286 identifiers_justname = [x.name for x in identifiers]
287 actual_macros = []
288 for macro in all_macros:
289 if macro.name not in identifiers_justname:
290 actual_macros.append(macro)
291
292 self.log.debug("Found:")
293 # Aligns the counts on the assumption that none exceeds 4 digits
294 self.log.debug(" {:4} Total Macros".format(len(all_macros)))
295 self.log.debug(" {:4} Non-identifier Macros".format(len(actual_macros)))
296 self.log.debug(" {:4} Enum Constants".format(len(enum_consts)))
297 self.log.debug(" {:4} Identifiers".format(len(identifiers)))
298 self.log.debug(" {:4} Exported Symbols".format(len(symbols)))
299 return {
300 "macros": actual_macros,
301 "enum_consts": enum_consts,
302 "identifiers": identifiers,
303 "symbols": symbols,
304 "mbed_words": mbed_words
305 }
306
Gilles Peskine7bf52052021-09-27 19:20:17 +0200307 def is_file_excluded(self, path, exclude_wildcards):
Gilles Peskine1c399752021-09-28 10:12:49 +0200308 """Whether the given file path is excluded."""
Gilles Peskine7bf52052021-09-27 19:20:17 +0200309 # exclude_wildcards may be None. Also, consider the global exclusions.
310 exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
311 for pattern in exclude_wildcards:
312 if fnmatch.fnmatch(path, pattern):
313 return True
314 return False
315
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200316 def get_files(self, include_wildcards, exclude_wildcards):
317 """
318 Get all files that match any of the UNIX-style wildcards. While the
319 check_names script is designed only for use on UNIX/macOS (due to nm),
320 this function alone would work fine on Windows even with forward slashes
321 in the wildcard.
322
323 Args:
324 * include_wildcards: a List of shell-style wildcards to match filepaths.
325 * exclude_wildcards: a List of shell-style wildcards to exclude.
326
327 Returns a List of relative filepaths.
328 """
329 accumulator = set()
330
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200331 for include_wildcard in include_wildcards:
Gilles Peskine7bf52052021-09-27 19:20:17 +0200332 accumulator = accumulator.union(glob.iglob(include_wildcard))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200333
Gilles Peskine7bf52052021-09-27 19:20:17 +0200334 return list(path for path in accumulator
335 if not self.is_file_excluded(path, exclude_wildcards))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200336
337 def parse_macros(self, include, exclude=None):
338 """
339 Parse all macros defined by #define preprocessor directives.
340
341 Args:
342 * include: A List of glob expressions to look for files through.
343 * exclude: A List of glob expressions for excluding files.
344
345 Returns a List of Match objects for the found macros.
346 """
347 macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
348 exclusions = (
349 "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
350 )
351
352 files = self.get_files(include, exclude)
353 self.log.debug("Looking for macros in {} files".format(len(files)))
354
355 macros = []
356 for header_file in files:
357 with open(header_file, "r", encoding="utf-8") as header:
358 for line_no, line in enumerate(header):
359 for macro in macro_regex.finditer(line):
360 if macro.group("macro").startswith(exclusions):
361 continue
362
363 macros.append(Match(
364 header_file,
365 line,
366 line_no,
367 macro.span("macro"),
368 macro.group("macro")))
369
370 return macros
371
372 def parse_mbed_words(self, include, exclude=None):
373 """
374 Parse all words in the file that begin with MBED, in and out of macros,
375 comments, anything.
376
377 Args:
378 * include: A List of glob expressions to look for files through.
379 * exclude: A List of glob expressions for excluding files.
380
381 Returns a List of Match objects for words beginning with MBED.
382 """
383 # Typos of TLS are common, hence the broader check below than MBEDTLS.
384 mbed_regex = re.compile(r"\bMBED.+?_[A-Z0-9_]*")
385 exclusions = re.compile(r"// *no-check-names|#error")
386
387 files = self.get_files(include, exclude)
388 self.log.debug("Looking for MBED words in {} files".format(len(files)))
389
390 mbed_words = []
391 for filename in files:
392 with open(filename, "r", encoding="utf-8") as fp:
393 for line_no, line in enumerate(fp):
394 if exclusions.search(line):
395 continue
396
397 for name in mbed_regex.finditer(line):
398 mbed_words.append(Match(
399 filename,
400 line,
401 line_no,
402 name.span(0),
403 name.group(0)))
404
405 return mbed_words
406
407 def parse_enum_consts(self, include, exclude=None):
408 """
409 Parse all enum value constants that are declared.
410
411 Args:
412 * include: A List of glob expressions to look for files through.
413 * exclude: A List of glob expressions for excluding files.
414
415 Returns a List of Match objects for the findings.
416 """
417 files = self.get_files(include, exclude)
418 self.log.debug("Looking for enum consts in {} files".format(len(files)))
419
420 # Emulate a finite state machine to parse enum declarations.
421 # OUTSIDE_KEYWORD = outside the enum keyword
422 # IN_BRACES = inside enum opening braces
423 # IN_BETWEEN = between enum keyword and opening braces
424 states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
425 enum_consts = []
426 for header_file in files:
427 state = states.OUTSIDE_KEYWORD
428 with open(header_file, "r", encoding="utf-8") as header:
429 for line_no, line in enumerate(header):
430 # Match typedefs and brackets only when they are at the
431 # beginning of the line -- if they are indented, they might
432 # be sub-structures within structs, etc.
433 if (state == states.OUTSIDE_KEYWORD and
434 re.search(r"^(typedef +)?enum +{", line)):
435 state = states.IN_BRACES
436 elif (state == states.OUTSIDE_KEYWORD and
437 re.search(r"^(typedef +)?enum", line)):
438 state = states.IN_BETWEEN
439 elif (state == states.IN_BETWEEN and
440 re.search(r"^{", line)):
441 state = states.IN_BRACES
442 elif (state == states.IN_BRACES and
443 re.search(r"^}", line)):
444 state = states.OUTSIDE_KEYWORD
445 elif (state == states.IN_BRACES and
446 not re.search(r"^ *#", line)):
447 enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
448 if not enum_const:
449 continue
450
451 enum_consts.append(Match(
452 header_file,
453 line,
454 line_no,
455 enum_const.span("enum_const"),
456 enum_const.group("enum_const")))
457
458 return enum_consts
459
Gilles Peskinedf306652021-11-17 20:32:31 +0100460 def strip_comments_and_literals(self, line, in_block_comment):
461 """Strip comments and string literals from line.
462
463 Continuation lines are not supported.
464
465 If in_block_comment is true, assume that the line starts inside a
466 block comment.
467
468 Return updated values of (line, in_block_comment) where:
469 * Comments in line have been replaced by a space (or nothing at the
470 start or end of the line).
471 * String contents have been removed.
472 * in_block_comment indicates whether the line ends inside a block
473 comment that continues on the next line.
474 """
475 # Terminate current comment?
476 if in_block_comment:
477 line = re.sub(r".*?\*/", r"", line, 1)
478 in_block_comment = False
479 # Remove full comments and string literals
480 line = re.sub(r'/\*.*?\*/|(")(?:[^\\\"]|\\.)*"',
481 lambda s: '""' if s.group(1) else ' ',
482 line)
483 # Start an unfinished comment?
484 m = re.match(r"/\*", line)
485 if m:
486 in_block_comment = True
487 line = line[:m.end(0)]
488 return line, in_block_comment
489
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100490 IDENTIFIER_REGEX = re.compile('|'.join([
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100491 # Match " something(a" or " *something(a". Functions.
492 # Assumptions:
493 # - function definition from return type to one of its arguments is
494 # all on one line
495 # - function definition line only contains alphanumeric, asterisk,
496 # underscore, and open bracket
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100497 r".* \**(\w+) *\( *\w",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100498 # Match "(*something)(".
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100499 r".*\( *\* *(\w+) *\) *\(",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100500 # Match names of named data structures.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100501 r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100502 # Match names of typedef instances, after closing bracket.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100503 r"}? *(\w+)[;[].*",
504 ]))
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100505 # The regex below is indented for clarity.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100506 EXCLUSION_LINES = re.compile("|".join([
507 r"extern +\"C\"",
508 r"(typedef +)?(struct|union|enum)( *{)?$",
509 r"} *;?$",
510 r"$",
511 r"//",
512 r"#",
513 ]))
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100514
515 def parse_identifiers_in_file(self, header_file, identifiers):
516 """
517 Parse all lines of a header where a function/enum/struct/union/typedef
518 identifier is declared, based on some regex and heuristics. Highly
519 dependent on formatting style.
520
521 Append found matches to the list ``identifiers``.
522 """
523
524 with open(header_file, "r", encoding="utf-8") as header:
525 in_block_comment = False
526 # The previous line variable is used for concatenating lines
527 # when identifiers are formatted and spread across multiple
528 # lines.
529 previous_line = ""
530
531 for line_no, line in enumerate(header):
Gilles Peskinedf306652021-11-17 20:32:31 +0100532 line, in_block_comment = \
533 self.strip_comments_and_literals(line, in_block_comment)
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100534
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100535 if self.EXCLUSION_LINES.match(line):
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100536 previous_line = ""
537 continue
538
539 # If the line contains only space-separated alphanumeric
540 # characters (or underscore, asterisk, or, open bracket),
541 # and nothing else, high chance it's a declaration that
542 # continues on the next line
543 if re.search(r"^([\w\*\(]+\s+)+$", line):
544 previous_line += line
545 continue
546
547 # If previous line seemed to start an unfinished declaration
548 # (as above), concat and treat them as one.
549 if previous_line:
550 line = previous_line.strip() + " " + line.strip() + "\n"
551 previous_line = ""
552
553 # Skip parsing if line has a space in front = heuristic to
554 # skip function argument lines (highly subject to formatting
555 # changes)
556 if line[0] == " ":
557 continue
558
559 identifier = self.IDENTIFIER_REGEX.search(line)
560
561 if not identifier:
562 continue
563
564 # Find the group that matched, and append it
565 for group in identifier.groups():
566 if not group:
567 continue
568
569 identifiers.append(Match(
570 header_file,
571 line,
572 line_no,
573 identifier.span(),
574 group))
575
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200576 def parse_identifiers(self, include, exclude=None):
577 """
578 Parse all lines of a header where a function/enum/struct/union/typedef
579 identifier is declared, based on some regex and heuristics. Highly
580 dependent on formatting style.
581
582 Args:
583 * include: A List of glob expressions to look for files through.
584 * exclude: A List of glob expressions for excluding files.
585
586 Returns a List of Match objects with identifiers.
587 """
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200588
589 files = self.get_files(include, exclude)
590 self.log.debug("Looking for identifiers in {} files".format(len(files)))
591
592 identifiers = []
593 for header_file in files:
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100594 self.parse_identifiers_in_file(header_file, identifiers)
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200595
596 return identifiers
597
598 def parse_symbols(self):
599 """
600 Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
601 object files using nm to retrieve the list of referenced symbols.
602 Exceptions thrown here are rethrown because they would be critical
603 errors that void several tests, and thus needs to halt the program. This
604 is explicitly done for clarity.
605
606 Returns a List of unique symbols defined and used in the libraries.
607 """
608 self.log.info("Compiling...")
609 symbols = []
610
611 # Back up the config and atomically compile with the full configratuion.
612 shutil.copy(
Gilles Peskined47f6362021-09-27 20:12:00 +0200613 "include/mbedtls/config.h",
614 "include/mbedtls/config.h.bak"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200615 )
616 try:
617 # Use check=True in all subprocess calls so that failures are raised
618 # as exceptions and logged.
619 subprocess.run(
620 ["python3", "scripts/config.py", "full"],
621 universal_newlines=True,
622 check=True
623 )
624 my_environment = os.environ.copy()
625 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
626 # Run make clean separately to lib to prevent unwanted behavior when
627 # make is invoked with parallelism.
628 subprocess.run(
629 ["make", "clean"],
630 universal_newlines=True,
631 check=True
632 )
633 subprocess.run(
634 ["make", "lib"],
635 env=my_environment,
636 universal_newlines=True,
637 stdout=subprocess.PIPE,
638 stderr=subprocess.STDOUT,
639 check=True
640 )
641
642 # Perform object file analysis using nm
643 symbols = self.parse_symbols_from_nm([
644 "library/libmbedcrypto.a",
645 "library/libmbedtls.a",
646 "library/libmbedx509.a"
647 ])
648
649 subprocess.run(
650 ["make", "clean"],
651 universal_newlines=True,
652 check=True
653 )
654 except subprocess.CalledProcessError as error:
655 self.log.debug(error.output)
656 raise error
657 finally:
658 # Put back the original config regardless of there being errors.
659 # Works also for keyboard interrupts.
660 shutil.move(
Gilles Peskined47f6362021-09-27 20:12:00 +0200661 "include/mbedtls/config.h.bak",
662 "include/mbedtls/config.h"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200663 )
664
665 return symbols
666
667 def parse_symbols_from_nm(self, object_files):
668 """
669 Run nm to retrieve the list of referenced symbols in each object file.
670 Does not return the position data since it is of no use.
671
672 Args:
673 * object_files: a List of compiled object filepaths to search through.
674
675 Returns a List of unique symbols defined and used in any of the object
676 files.
677 """
678 nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$")
679 nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
680 exclusions = ("FStar", "Hacl")
681
682 symbols = []
683
684 # Gather all outputs of nm
685 nm_output = ""
686 for lib in object_files:
687 nm_output += subprocess.run(
688 ["nm", "-og", lib],
689 universal_newlines=True,
690 stdout=subprocess.PIPE,
691 stderr=subprocess.STDOUT,
692 check=True
693 ).stdout
694
695 for line in nm_output.splitlines():
696 if not nm_undefined_regex.search(line):
697 symbol = nm_valid_regex.search(line)
698 if (symbol and not symbol.group("symbol").startswith(exclusions)):
699 symbols.append(symbol.group("symbol"))
700 else:
701 self.log.error(line)
702
703 return symbols
704
705class NameChecker():
706 """
707 Representation of the core name checking operation performed by this script.
708 """
709 def __init__(self, parse_result, log):
710 self.parse_result = parse_result
711 self.log = log
712
713 def perform_checks(self, quiet=False):
714 """
715 A comprehensive checker that performs each check in order, and outputs
716 a final verdict.
717
718 Args:
719 * quiet: whether to hide detailed problem explanation.
720 """
721 self.log.info("=============")
722 Problem.quiet = quiet
723 problems = 0
724 problems += self.check_symbols_declared_in_header()
725
726 pattern_checks = [
727 ("macros", MACRO_PATTERN),
728 ("enum_consts", CONSTANTS_PATTERN),
729 ("identifiers", IDENTIFIER_PATTERN)
730 ]
731 for group, check_pattern in pattern_checks:
732 problems += self.check_match_pattern(group, check_pattern)
733
734 problems += self.check_for_typos()
735
736 self.log.info("=============")
737 if problems > 0:
738 self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
739 if quiet:
740 self.log.info("Remove --quiet to see explanations.")
741 else:
742 self.log.info("Use --quiet for minimal output.")
743 return 1
744 else:
745 self.log.info("PASS")
746 return 0
747
748 def check_symbols_declared_in_header(self):
749 """
750 Perform a check that all detected symbols in the library object files
751 are properly declared in headers.
752 Assumes parse_names_in_source() was called before this.
753
754 Returns the number of problems that need fixing.
755 """
756 problems = []
757
758 for symbol in self.parse_result["symbols"]:
759 found_symbol_declared = False
760 for identifier_match in self.parse_result["identifiers"]:
761 if symbol == identifier_match.name:
762 found_symbol_declared = True
763 break
764
765 if not found_symbol_declared:
766 problems.append(SymbolNotInHeader(symbol))
767
768 self.output_check_result("All symbols in header", problems)
769 return len(problems)
770
771 def check_match_pattern(self, group_to_check, check_pattern):
772 """
773 Perform a check that all items of a group conform to a regex pattern.
774 Assumes parse_names_in_source() was called before this.
775
776 Args:
777 * group_to_check: string key to index into self.parse_result.
778 * check_pattern: the regex to check against.
779
780 Returns the number of problems that need fixing.
781 """
782 problems = []
783
784 for item_match in self.parse_result[group_to_check]:
785 if not re.search(check_pattern, item_match.name):
786 problems.append(PatternMismatch(check_pattern, item_match))
787 # Double underscore should not be used for names
788 if re.search(r".*__.*", item_match.name):
789 problems.append(
790 PatternMismatch("no double underscore allowed", item_match))
791
792 self.output_check_result(
793 "Naming patterns of {}".format(group_to_check),
794 problems)
795 return len(problems)
796
797 def check_for_typos(self):
798 """
799 Perform a check that all words in the soure code beginning with MBED are
800 either defined as macros, or as enum constants.
801 Assumes parse_names_in_source() was called before this.
802
803 Returns the number of problems that need fixing.
804 """
805 problems = []
806
807 # Set comprehension, equivalent to a list comprehension wrapped by set()
808 all_caps_names = {
809 match.name
810 for match
811 in self.parse_result["macros"] + self.parse_result["enum_consts"]}
812 typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$")
813
814 for name_match in self.parse_result["mbed_words"]:
815 found = name_match.name in all_caps_names
816
817 # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
818 # PSA driver, they will not exist as macros. However, they
819 # should still be checked for typos using the equivalent
820 # BUILTINs that exist.
821 if "MBEDTLS_PSA_ACCEL_" in name_match.name:
822 found = name_match.name.replace(
823 "MBEDTLS_PSA_ACCEL_",
824 "MBEDTLS_PSA_BUILTIN_") in all_caps_names
825
826 if not found and not typo_exclusion.search(name_match.name):
827 problems.append(Typo(name_match))
828
829 self.output_check_result("Likely typos", problems)
830 return len(problems)
831
832 def output_check_result(self, name, problems):
833 """
834 Write out the PASS/FAIL status of a performed check depending on whether
835 there were problems.
836
837 Args:
838 * name: the name of the test
839 * problems: a List of encountered Problems
840 """
841 if problems:
842 self.log.info("{}: FAIL\n".format(name))
843 for problem in problems:
844 self.log.warning(str(problem))
845 else:
846 self.log.info("{}: PASS".format(name))
847
848def main():
849 """
850 Perform argument parsing, and create an instance of CodeParser and
851 NameChecker to begin the core operation.
852 """
853 parser = argparse.ArgumentParser(
854 formatter_class=argparse.RawDescriptionHelpFormatter,
855 description=(
856 "This script confirms that the naming of all symbols and identifiers "
857 "in Mbed TLS are consistent with the house style and are also "
858 "self-consistent.\n\n"
859 "Expected to be run from the MbedTLS root directory.")
860 )
861 parser.add_argument(
862 "-v", "--verbose",
863 action="store_true",
864 help="show parse results"
865 )
866 parser.add_argument(
867 "-q", "--quiet",
868 action="store_true",
869 help="hide unnecessary text, explanations, and highlighs"
870 )
871
872 args = parser.parse_args()
873
874 # Configure the global logger, which is then passed to the classes below
875 log = logging.getLogger()
876 log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
877 log.addHandler(logging.StreamHandler())
878
879 try:
880 code_parser = CodeParser(log)
881 parse_result = code_parser.comprehensive_parse()
882 except Exception: # pylint: disable=broad-except
883 traceback.print_exc()
884 sys.exit(2)
885
886 name_checker = NameChecker(parse_result, log)
887 return_code = name_checker.perform_checks(quiet=args.quiet)
888
889 sys.exit(return_code)
890
891if __name__ == "__main__":
892 main()