blob: 096da965257164b8b5855cebbc50fb8f0f102c5e [file] [log] [blame]
Gilles Peskine8266b5b2021-09-27 19:53:31 +02001#!/usr/bin/env python3
2#
3# Copyright The Mbed TLS Contributors
4# SPDX-License-Identifier: Apache-2.0
5#
6# Licensed under the Apache License, Version 2.0 (the "License"); you may
7# not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""
19This script confirms that the naming of all symbols and identifiers in Mbed TLS
20are consistent with the house style and are also self-consistent. It only runs
21on Linux and macOS since it depends on nm.
22
23It contains two major Python classes, CodeParser and NameChecker. They both have
24a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
25but the individual functions can also be used for specific needs.
26
27CodeParser makes heavy use of regular expressions to parse the code, and is
28dependent on the current code formatting. Many Python C parser libraries require
29preprocessed C code, which means no macro parsing. Compiler tools are also not
30very helpful when we want the exact location in the original source (which
31becomes impossible when e.g. comments are stripped).
32
33NameChecker performs the following checks:
34
35- All exported and available symbols in the library object files, are explicitly
36 declared in the header files. This uses the nm command.
37- All macros, constants, and identifiers (function names, struct names, etc)
38 follow the required regex pattern.
39- Typo checking: All words that begin with MBED exist as macros or constants.
40
41The script returns 0 on success, 1 on test failure, and 2 if there is a script
42error. It must be run from Mbed TLS root.
43"""
44
45import abc
46import argparse
Gilles Peskine7bf52052021-09-27 19:20:17 +020047import fnmatch
Gilles Peskine8266b5b2021-09-27 19:53:31 +020048import glob
49import textwrap
50import os
51import sys
52import traceback
53import re
54import enum
55import shutil
56import subprocess
57import logging
58
Gilles Peskine7ff47662022-09-18 21:17:09 +020059import scripts_path # pylint: disable=unused-import
60from mbedtls_dev import build_tree
61
62
Gilles Peskine8266b5b2021-09-27 19:53:31 +020063# Naming patterns to check against. These are defined outside the NameCheck
64# class for ease of modification.
65MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$"
66CONSTANTS_PATTERN = MACRO_PATTERN
67IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
68
69class Match(): # pylint: disable=too-few-public-methods
70 """
71 A class representing a match, together with its found position.
72
73 Fields:
74 * filename: the file that the match was in.
75 * line: the full line containing the match.
76 * line_no: the line number.
77 * pos: a tuple of (start, end) positions on the line where the match is.
78 * name: the match itself.
79 """
80 def __init__(self, filename, line, line_no, pos, name):
81 # pylint: disable=too-many-arguments
82 self.filename = filename
83 self.line = line
84 self.line_no = line_no
85 self.pos = pos
86 self.name = name
87
88 def __str__(self):
89 """
90 Return a formatted code listing representation of the erroneous line.
91 """
92 gutter = format(self.line_no, "4d")
93 underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
94
95 return (
96 " {0} |\n".format(" " * len(gutter)) +
97 " {0} | {1}".format(gutter, self.line) +
98 " {0} | {1}\n".format(" " * len(gutter), underline)
99 )
100
101class Problem(abc.ABC): # pylint: disable=too-few-public-methods
102 """
103 An abstract parent class representing a form of static analysis error.
104 It extends an Abstract Base Class, which means it is not instantiable, and
105 it also mandates certain abstract methods to be implemented in subclasses.
106 """
107 # Class variable to control the quietness of all problems
108 quiet = False
109 def __init__(self):
110 self.textwrapper = textwrap.TextWrapper()
111 self.textwrapper.width = 80
112 self.textwrapper.initial_indent = " > "
113 self.textwrapper.subsequent_indent = " "
114
115 def __str__(self):
116 """
117 Unified string representation method for all Problems.
118 """
119 if self.__class__.quiet:
120 return self.quiet_output()
121 return self.verbose_output()
122
123 @abc.abstractmethod
124 def quiet_output(self):
125 """
126 The output when --quiet is enabled.
127 """
128 pass
129
130 @abc.abstractmethod
131 def verbose_output(self):
132 """
133 The default output with explanation and code snippet if appropriate.
134 """
135 pass
136
137class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
138 """
139 A problem that occurs when an exported/available symbol in the object file
140 is not explicitly declared in header files. Created with
141 NameCheck.check_symbols_declared_in_header()
142
143 Fields:
144 * symbol_name: the name of the symbol.
145 """
146 def __init__(self, symbol_name):
147 self.symbol_name = symbol_name
148 Problem.__init__(self)
149
150 def quiet_output(self):
151 return "{0}".format(self.symbol_name)
152
153 def verbose_output(self):
154 return self.textwrapper.fill(
155 "'{0}' was found as an available symbol in the output of nm, "
156 "however it was not declared in any header files."
157 .format(self.symbol_name))
158
159class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
160 """
161 A problem that occurs when something doesn't match the expected pattern.
162 Created with NameCheck.check_match_pattern()
163
164 Fields:
165 * pattern: the expected regex pattern
166 * match: the Match object in question
167 """
168 def __init__(self, pattern, match):
169 self.pattern = pattern
170 self.match = match
171 Problem.__init__(self)
172
173
174 def quiet_output(self):
175 return (
176 "{0}:{1}:{2}"
177 .format(self.match.filename, self.match.line_no, self.match.name)
178 )
179
180 def verbose_output(self):
181 return self.textwrapper.fill(
182 "{0}:{1}: '{2}' does not match the required pattern '{3}'."
183 .format(
184 self.match.filename,
185 self.match.line_no,
186 self.match.name,
187 self.pattern
188 )
189 ) + "\n" + str(self.match)
190
191class Typo(Problem): # pylint: disable=too-few-public-methods
192 """
193 A problem that occurs when a word using MBED doesn't appear to be defined as
194 constants nor enum values. Created with NameCheck.check_for_typos()
195
196 Fields:
197 * match: the Match object of the MBED name in question.
198 """
199 def __init__(self, match):
200 self.match = match
201 Problem.__init__(self)
202
203 def quiet_output(self):
204 return (
205 "{0}:{1}:{2}"
206 .format(self.match.filename, self.match.line_no, self.match.name)
207 )
208
209 def verbose_output(self):
210 return self.textwrapper.fill(
211 "{0}:{1}: '{2}' looks like a typo. It was not found in any "
212 "macros or any enums. If this is not a typo, put "
213 "//no-check-names after it."
214 .format(self.match.filename, self.match.line_no, self.match.name)
215 ) + "\n" + str(self.match)
216
217class CodeParser():
218 """
219 Class for retrieving files and parsing the code. This can be used
220 independently of the checks that NameChecker performs, for example for
221 list_internal_identifiers.py.
222 """
223 def __init__(self, log):
224 self.log = log
Gilles Peskine7ff47662022-09-18 21:17:09 +0200225 build_tree.check_repo_path()
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200226
227 # Memo for storing "glob expression": set(filepaths)
228 self.files = {}
229
Gilles Peskine7bf52052021-09-27 19:20:17 +0200230 # Globally excluded filenames.
231 # Note that "*" can match directory separators in exclude lists.
Gilles Peskined47f6362021-09-27 20:12:00 +0200232 self.excluded_files = ["*/bn_mul", "*/compat-1.3.h"]
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200233
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200234 def comprehensive_parse(self):
235 """
236 Comprehensive ("default") function to call each parsing function and
237 retrieve various elements of the code, together with the source location.
238
239 Returns a dict of parsed item key to the corresponding List of Matches.
240 """
241 self.log.info("Parsing source code...")
242 self.log.debug(
243 "The following files are excluded from the search: {}"
244 .format(str(self.excluded_files))
245 )
246
247 all_macros = self.parse_macros([
248 "include/mbedtls/*.h",
249 "include/psa/*.h",
250 "library/*.h",
251 "tests/include/test/drivers/*.h",
252 "3rdparty/everest/include/everest/everest.h",
253 "3rdparty/everest/include/everest/x25519.h"
254 ])
255 enum_consts = self.parse_enum_consts([
256 "include/mbedtls/*.h",
257 "library/*.h",
258 "3rdparty/everest/include/everest/everest.h",
259 "3rdparty/everest/include/everest/x25519.h"
260 ])
261 identifiers = self.parse_identifiers([
262 "include/mbedtls/*.h",
263 "include/psa/*.h",
264 "library/*.h",
265 "3rdparty/everest/include/everest/everest.h",
266 "3rdparty/everest/include/everest/x25519.h"
267 ])
268 mbed_words = self.parse_mbed_words([
269 "include/mbedtls/*.h",
270 "include/psa/*.h",
271 "library/*.h",
272 "3rdparty/everest/include/everest/everest.h",
273 "3rdparty/everest/include/everest/x25519.h",
274 "library/*.c",
275 "3rdparty/everest/library/everest.c",
276 "3rdparty/everest/library/x25519.c"
277 ])
278 symbols = self.parse_symbols()
279
280 # Remove identifier macros like mbedtls_printf or mbedtls_calloc
281 identifiers_justname = [x.name for x in identifiers]
282 actual_macros = []
283 for macro in all_macros:
284 if macro.name not in identifiers_justname:
285 actual_macros.append(macro)
286
287 self.log.debug("Found:")
288 # Aligns the counts on the assumption that none exceeds 4 digits
289 self.log.debug(" {:4} Total Macros".format(len(all_macros)))
290 self.log.debug(" {:4} Non-identifier Macros".format(len(actual_macros)))
291 self.log.debug(" {:4} Enum Constants".format(len(enum_consts)))
292 self.log.debug(" {:4} Identifiers".format(len(identifiers)))
293 self.log.debug(" {:4} Exported Symbols".format(len(symbols)))
294 return {
295 "macros": actual_macros,
296 "enum_consts": enum_consts,
297 "identifiers": identifiers,
298 "symbols": symbols,
299 "mbed_words": mbed_words
300 }
301
Gilles Peskine7bf52052021-09-27 19:20:17 +0200302 def is_file_excluded(self, path, exclude_wildcards):
Gilles Peskine1c399752021-09-28 10:12:49 +0200303 """Whether the given file path is excluded."""
Gilles Peskine7bf52052021-09-27 19:20:17 +0200304 # exclude_wildcards may be None. Also, consider the global exclusions.
305 exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
306 for pattern in exclude_wildcards:
307 if fnmatch.fnmatch(path, pattern):
308 return True
309 return False
310
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200311 def get_files(self, include_wildcards, exclude_wildcards):
312 """
313 Get all files that match any of the UNIX-style wildcards. While the
314 check_names script is designed only for use on UNIX/macOS (due to nm),
315 this function alone would work fine on Windows even with forward slashes
316 in the wildcard.
317
318 Args:
319 * include_wildcards: a List of shell-style wildcards to match filepaths.
320 * exclude_wildcards: a List of shell-style wildcards to exclude.
321
322 Returns a List of relative filepaths.
323 """
324 accumulator = set()
325
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200326 for include_wildcard in include_wildcards:
Gilles Peskine7bf52052021-09-27 19:20:17 +0200327 accumulator = accumulator.union(glob.iglob(include_wildcard))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200328
Gilles Peskine7bf52052021-09-27 19:20:17 +0200329 return list(path for path in accumulator
330 if not self.is_file_excluded(path, exclude_wildcards))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200331
332 def parse_macros(self, include, exclude=None):
333 """
334 Parse all macros defined by #define preprocessor directives.
335
336 Args:
337 * include: A List of glob expressions to look for files through.
338 * exclude: A List of glob expressions for excluding files.
339
340 Returns a List of Match objects for the found macros.
341 """
342 macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
343 exclusions = (
344 "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
345 )
346
347 files = self.get_files(include, exclude)
348 self.log.debug("Looking for macros in {} files".format(len(files)))
349
350 macros = []
351 for header_file in files:
352 with open(header_file, "r", encoding="utf-8") as header:
353 for line_no, line in enumerate(header):
354 for macro in macro_regex.finditer(line):
355 if macro.group("macro").startswith(exclusions):
356 continue
357
358 macros.append(Match(
359 header_file,
360 line,
361 line_no,
362 macro.span("macro"),
363 macro.group("macro")))
364
365 return macros
366
367 def parse_mbed_words(self, include, exclude=None):
368 """
369 Parse all words in the file that begin with MBED, in and out of macros,
370 comments, anything.
371
372 Args:
373 * include: A List of glob expressions to look for files through.
374 * exclude: A List of glob expressions for excluding files.
375
376 Returns a List of Match objects for words beginning with MBED.
377 """
378 # Typos of TLS are common, hence the broader check below than MBEDTLS.
379 mbed_regex = re.compile(r"\bMBED.+?_[A-Z0-9_]*")
380 exclusions = re.compile(r"// *no-check-names|#error")
381
382 files = self.get_files(include, exclude)
383 self.log.debug("Looking for MBED words in {} files".format(len(files)))
384
385 mbed_words = []
386 for filename in files:
387 with open(filename, "r", encoding="utf-8") as fp:
388 for line_no, line in enumerate(fp):
389 if exclusions.search(line):
390 continue
391
392 for name in mbed_regex.finditer(line):
393 mbed_words.append(Match(
394 filename,
395 line,
396 line_no,
397 name.span(0),
398 name.group(0)))
399
400 return mbed_words
401
402 def parse_enum_consts(self, include, exclude=None):
403 """
404 Parse all enum value constants that are declared.
405
406 Args:
407 * include: A List of glob expressions to look for files through.
408 * exclude: A List of glob expressions for excluding files.
409
410 Returns a List of Match objects for the findings.
411 """
412 files = self.get_files(include, exclude)
413 self.log.debug("Looking for enum consts in {} files".format(len(files)))
414
415 # Emulate a finite state machine to parse enum declarations.
416 # OUTSIDE_KEYWORD = outside the enum keyword
417 # IN_BRACES = inside enum opening braces
418 # IN_BETWEEN = between enum keyword and opening braces
419 states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
420 enum_consts = []
421 for header_file in files:
422 state = states.OUTSIDE_KEYWORD
423 with open(header_file, "r", encoding="utf-8") as header:
424 for line_no, line in enumerate(header):
425 # Match typedefs and brackets only when they are at the
426 # beginning of the line -- if they are indented, they might
427 # be sub-structures within structs, etc.
428 if (state == states.OUTSIDE_KEYWORD and
429 re.search(r"^(typedef +)?enum +{", line)):
430 state = states.IN_BRACES
431 elif (state == states.OUTSIDE_KEYWORD and
432 re.search(r"^(typedef +)?enum", line)):
433 state = states.IN_BETWEEN
434 elif (state == states.IN_BETWEEN and
435 re.search(r"^{", line)):
436 state = states.IN_BRACES
437 elif (state == states.IN_BRACES and
438 re.search(r"^}", line)):
439 state = states.OUTSIDE_KEYWORD
440 elif (state == states.IN_BRACES and
441 not re.search(r"^ *#", line)):
442 enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
443 if not enum_const:
444 continue
445
446 enum_consts.append(Match(
447 header_file,
448 line,
449 line_no,
450 enum_const.span("enum_const"),
451 enum_const.group("enum_const")))
452
453 return enum_consts
454
Gilles Peskine44801622021-11-17 20:43:35 +0100455 IGNORED_CHUNK_REGEX = re.compile('|'.join([
456 r'/\*.*?\*/', # block comment entirely on one line
457 r'//.*', # line comment
458 r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal
459 ]))
460
Gilles Peskinedf306652021-11-17 20:32:31 +0100461 def strip_comments_and_literals(self, line, in_block_comment):
462 """Strip comments and string literals from line.
463
464 Continuation lines are not supported.
465
466 If in_block_comment is true, assume that the line starts inside a
467 block comment.
468
469 Return updated values of (line, in_block_comment) where:
470 * Comments in line have been replaced by a space (or nothing at the
471 start or end of the line).
472 * String contents have been removed.
473 * in_block_comment indicates whether the line ends inside a block
474 comment that continues on the next line.
475 """
Gilles Peskine23b40962021-11-17 20:45:39 +0100476
477 # Terminate current multiline comment?
Gilles Peskinedf306652021-11-17 20:32:31 +0100478 if in_block_comment:
Gilles Peskine23b40962021-11-17 20:45:39 +0100479 m = re.search(r"\*/", line)
480 if m:
481 in_block_comment = False
482 line = line[m.end(0):]
483 else:
484 return '', True
Gilles Peskine44801622021-11-17 20:43:35 +0100485
486 # Remove full comments and string literals.
487 # Do it all together to handle cases like "/*" correctly.
488 # Note that continuation lines are not supported.
489 line = re.sub(self.IGNORED_CHUNK_REGEX,
490 lambda s: '""' if s.group('string') else ' ',
Gilles Peskinedf306652021-11-17 20:32:31 +0100491 line)
Gilles Peskine44801622021-11-17 20:43:35 +0100492
Gilles Peskinedf306652021-11-17 20:32:31 +0100493 # Start an unfinished comment?
Gilles Peskine44801622021-11-17 20:43:35 +0100494 # (If `/*` was part of a complete comment, it's already been removed.)
Gilles Peskine23b40962021-11-17 20:45:39 +0100495 m = re.search(r"/\*", line)
Gilles Peskinedf306652021-11-17 20:32:31 +0100496 if m:
497 in_block_comment = True
Gilles Peskine23b40962021-11-17 20:45:39 +0100498 line = line[:m.start(0)]
Gilles Peskine44801622021-11-17 20:43:35 +0100499
Gilles Peskinedf306652021-11-17 20:32:31 +0100500 return line, in_block_comment
501
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100502 IDENTIFIER_REGEX = re.compile('|'.join([
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100503 # Match " something(a" or " *something(a". Functions.
504 # Assumptions:
505 # - function definition from return type to one of its arguments is
506 # all on one line
507 # - function definition line only contains alphanumeric, asterisk,
508 # underscore, and open bracket
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100509 r".* \**(\w+) *\( *\w",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100510 # Match "(*something)(".
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100511 r".*\( *\* *(\w+) *\) *\(",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100512 # Match names of named data structures.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100513 r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100514 # Match names of typedef instances, after closing bracket.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100515 r"}? *(\w+)[;[].*",
516 ]))
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100517 # The regex below is indented for clarity.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100518 EXCLUSION_LINES = re.compile("|".join([
519 r"extern +\"C\"",
520 r"(typedef +)?(struct|union|enum)( *{)?$",
521 r"} *;?$",
522 r"$",
523 r"//",
524 r"#",
525 ]))
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100526
527 def parse_identifiers_in_file(self, header_file, identifiers):
528 """
529 Parse all lines of a header where a function/enum/struct/union/typedef
530 identifier is declared, based on some regex and heuristics. Highly
531 dependent on formatting style.
532
533 Append found matches to the list ``identifiers``.
534 """
535
536 with open(header_file, "r", encoding="utf-8") as header:
537 in_block_comment = False
538 # The previous line variable is used for concatenating lines
539 # when identifiers are formatted and spread across multiple
540 # lines.
541 previous_line = ""
542
543 for line_no, line in enumerate(header):
Gilles Peskinedf306652021-11-17 20:32:31 +0100544 line, in_block_comment = \
545 self.strip_comments_and_literals(line, in_block_comment)
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100546
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100547 if self.EXCLUSION_LINES.match(line):
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100548 previous_line = ""
549 continue
550
551 # If the line contains only space-separated alphanumeric
Gilles Peskine4f04d612021-11-17 20:39:56 +0100552 # characters (or underscore, asterisk, or open parenthesis),
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100553 # and nothing else, high chance it's a declaration that
554 # continues on the next line
555 if re.search(r"^([\w\*\(]+\s+)+$", line):
556 previous_line += line
557 continue
558
559 # If previous line seemed to start an unfinished declaration
560 # (as above), concat and treat them as one.
561 if previous_line:
562 line = previous_line.strip() + " " + line.strip() + "\n"
563 previous_line = ""
564
565 # Skip parsing if line has a space in front = heuristic to
566 # skip function argument lines (highly subject to formatting
567 # changes)
568 if line[0] == " ":
569 continue
570
571 identifier = self.IDENTIFIER_REGEX.search(line)
572
573 if not identifier:
574 continue
575
576 # Find the group that matched, and append it
577 for group in identifier.groups():
578 if not group:
579 continue
580
581 identifiers.append(Match(
582 header_file,
583 line,
584 line_no,
585 identifier.span(),
586 group))
587
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200588 def parse_identifiers(self, include, exclude=None):
589 """
590 Parse all lines of a header where a function/enum/struct/union/typedef
591 identifier is declared, based on some regex and heuristics. Highly
592 dependent on formatting style.
593
594 Args:
595 * include: A List of glob expressions to look for files through.
596 * exclude: A List of glob expressions for excluding files.
597
598 Returns a List of Match objects with identifiers.
599 """
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200600
601 files = self.get_files(include, exclude)
602 self.log.debug("Looking for identifiers in {} files".format(len(files)))
603
604 identifiers = []
605 for header_file in files:
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100606 self.parse_identifiers_in_file(header_file, identifiers)
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200607
608 return identifiers
609
610 def parse_symbols(self):
611 """
612 Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
613 object files using nm to retrieve the list of referenced symbols.
614 Exceptions thrown here are rethrown because they would be critical
615 errors that void several tests, and thus needs to halt the program. This
616 is explicitly done for clarity.
617
618 Returns a List of unique symbols defined and used in the libraries.
619 """
620 self.log.info("Compiling...")
621 symbols = []
622
Tom Cosgrove49f99bc2022-12-04 16:44:21 +0000623 # Back up the config and atomically compile with the full configuration.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200624 shutil.copy(
Gilles Peskined47f6362021-09-27 20:12:00 +0200625 "include/mbedtls/config.h",
626 "include/mbedtls/config.h.bak"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200627 )
628 try:
629 # Use check=True in all subprocess calls so that failures are raised
630 # as exceptions and logged.
631 subprocess.run(
632 ["python3", "scripts/config.py", "full"],
633 universal_newlines=True,
634 check=True
635 )
636 my_environment = os.environ.copy()
637 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
638 # Run make clean separately to lib to prevent unwanted behavior when
639 # make is invoked with parallelism.
640 subprocess.run(
641 ["make", "clean"],
642 universal_newlines=True,
643 check=True
644 )
645 subprocess.run(
646 ["make", "lib"],
647 env=my_environment,
648 universal_newlines=True,
649 stdout=subprocess.PIPE,
650 stderr=subprocess.STDOUT,
651 check=True
652 )
653
654 # Perform object file analysis using nm
655 symbols = self.parse_symbols_from_nm([
656 "library/libmbedcrypto.a",
657 "library/libmbedtls.a",
658 "library/libmbedx509.a"
659 ])
660
661 subprocess.run(
662 ["make", "clean"],
663 universal_newlines=True,
664 check=True
665 )
666 except subprocess.CalledProcessError as error:
667 self.log.debug(error.output)
668 raise error
669 finally:
670 # Put back the original config regardless of there being errors.
671 # Works also for keyboard interrupts.
672 shutil.move(
Gilles Peskined47f6362021-09-27 20:12:00 +0200673 "include/mbedtls/config.h.bak",
674 "include/mbedtls/config.h"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200675 )
676
677 return symbols
678
679 def parse_symbols_from_nm(self, object_files):
680 """
681 Run nm to retrieve the list of referenced symbols in each object file.
682 Does not return the position data since it is of no use.
683
684 Args:
685 * object_files: a List of compiled object filepaths to search through.
686
687 Returns a List of unique symbols defined and used in any of the object
688 files.
689 """
690 nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$")
691 nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
692 exclusions = ("FStar", "Hacl")
693
694 symbols = []
695
696 # Gather all outputs of nm
697 nm_output = ""
698 for lib in object_files:
699 nm_output += subprocess.run(
700 ["nm", "-og", lib],
701 universal_newlines=True,
702 stdout=subprocess.PIPE,
703 stderr=subprocess.STDOUT,
704 check=True
705 ).stdout
706
707 for line in nm_output.splitlines():
708 if not nm_undefined_regex.search(line):
709 symbol = nm_valid_regex.search(line)
710 if (symbol and not symbol.group("symbol").startswith(exclusions)):
711 symbols.append(symbol.group("symbol"))
712 else:
713 self.log.error(line)
714
715 return symbols
716
717class NameChecker():
718 """
719 Representation of the core name checking operation performed by this script.
720 """
721 def __init__(self, parse_result, log):
722 self.parse_result = parse_result
723 self.log = log
724
725 def perform_checks(self, quiet=False):
726 """
727 A comprehensive checker that performs each check in order, and outputs
728 a final verdict.
729
730 Args:
731 * quiet: whether to hide detailed problem explanation.
732 """
733 self.log.info("=============")
734 Problem.quiet = quiet
735 problems = 0
736 problems += self.check_symbols_declared_in_header()
737
738 pattern_checks = [
739 ("macros", MACRO_PATTERN),
740 ("enum_consts", CONSTANTS_PATTERN),
741 ("identifiers", IDENTIFIER_PATTERN)
742 ]
743 for group, check_pattern in pattern_checks:
744 problems += self.check_match_pattern(group, check_pattern)
745
746 problems += self.check_for_typos()
747
748 self.log.info("=============")
749 if problems > 0:
750 self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
751 if quiet:
752 self.log.info("Remove --quiet to see explanations.")
753 else:
754 self.log.info("Use --quiet for minimal output.")
755 return 1
756 else:
757 self.log.info("PASS")
758 return 0
759
760 def check_symbols_declared_in_header(self):
761 """
762 Perform a check that all detected symbols in the library object files
763 are properly declared in headers.
764 Assumes parse_names_in_source() was called before this.
765
766 Returns the number of problems that need fixing.
767 """
768 problems = []
769
770 for symbol in self.parse_result["symbols"]:
771 found_symbol_declared = False
772 for identifier_match in self.parse_result["identifiers"]:
773 if symbol == identifier_match.name:
774 found_symbol_declared = True
775 break
776
777 if not found_symbol_declared:
778 problems.append(SymbolNotInHeader(symbol))
779
780 self.output_check_result("All symbols in header", problems)
781 return len(problems)
782
783 def check_match_pattern(self, group_to_check, check_pattern):
784 """
785 Perform a check that all items of a group conform to a regex pattern.
786 Assumes parse_names_in_source() was called before this.
787
788 Args:
789 * group_to_check: string key to index into self.parse_result.
790 * check_pattern: the regex to check against.
791
792 Returns the number of problems that need fixing.
793 """
794 problems = []
795
796 for item_match in self.parse_result[group_to_check]:
797 if not re.search(check_pattern, item_match.name):
798 problems.append(PatternMismatch(check_pattern, item_match))
799 # Double underscore should not be used for names
800 if re.search(r".*__.*", item_match.name):
801 problems.append(
802 PatternMismatch("no double underscore allowed", item_match))
803
804 self.output_check_result(
805 "Naming patterns of {}".format(group_to_check),
806 problems)
807 return len(problems)
808
809 def check_for_typos(self):
810 """
Shaun Case0e7791f2021-12-20 21:14:10 -0800811 Perform a check that all words in the source code beginning with MBED are
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200812 either defined as macros, or as enum constants.
813 Assumes parse_names_in_source() was called before this.
814
815 Returns the number of problems that need fixing.
816 """
817 problems = []
818
819 # Set comprehension, equivalent to a list comprehension wrapped by set()
820 all_caps_names = {
821 match.name
822 for match
823 in self.parse_result["macros"] + self.parse_result["enum_consts"]}
Ronald Cronb814bda2021-09-13 14:50:42 +0200824 typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|"
825 r"MBEDTLS_TEST_LIBTESTDRIVER*")
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200826
827 for name_match in self.parse_result["mbed_words"]:
828 found = name_match.name in all_caps_names
829
830 # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
831 # PSA driver, they will not exist as macros. However, they
832 # should still be checked for typos using the equivalent
833 # BUILTINs that exist.
834 if "MBEDTLS_PSA_ACCEL_" in name_match.name:
835 found = name_match.name.replace(
836 "MBEDTLS_PSA_ACCEL_",
837 "MBEDTLS_PSA_BUILTIN_") in all_caps_names
838
839 if not found and not typo_exclusion.search(name_match.name):
840 problems.append(Typo(name_match))
841
842 self.output_check_result("Likely typos", problems)
843 return len(problems)
844
845 def output_check_result(self, name, problems):
846 """
847 Write out the PASS/FAIL status of a performed check depending on whether
848 there were problems.
849
850 Args:
851 * name: the name of the test
852 * problems: a List of encountered Problems
853 """
854 if problems:
855 self.log.info("{}: FAIL\n".format(name))
856 for problem in problems:
857 self.log.warning(str(problem))
858 else:
859 self.log.info("{}: PASS".format(name))
860
861def main():
862 """
863 Perform argument parsing, and create an instance of CodeParser and
864 NameChecker to begin the core operation.
865 """
866 parser = argparse.ArgumentParser(
867 formatter_class=argparse.RawDescriptionHelpFormatter,
868 description=(
869 "This script confirms that the naming of all symbols and identifiers "
870 "in Mbed TLS are consistent with the house style and are also "
871 "self-consistent.\n\n"
872 "Expected to be run from the MbedTLS root directory.")
873 )
874 parser.add_argument(
875 "-v", "--verbose",
876 action="store_true",
877 help="show parse results"
878 )
879 parser.add_argument(
880 "-q", "--quiet",
881 action="store_true",
Tom Cosgrove49f99bc2022-12-04 16:44:21 +0000882 help="hide unnecessary text, explanations, and highlights"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200883 )
884
885 args = parser.parse_args()
886
887 # Configure the global logger, which is then passed to the classes below
888 log = logging.getLogger()
889 log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
890 log.addHandler(logging.StreamHandler())
891
892 try:
893 code_parser = CodeParser(log)
894 parse_result = code_parser.comprehensive_parse()
895 except Exception: # pylint: disable=broad-except
896 traceback.print_exc()
897 sys.exit(2)
898
899 name_checker = NameChecker(parse_result, log)
900 return_code = name_checker.perform_checks(quiet=args.quiet)
901
902 sys.exit(return_code)
903
904if __name__ == "__main__":
905 main()