blob: b2a72b501989062f463fb134a84361f3bc11e828 [file] [log] [blame]
Gilles Peskine8266b5b2021-09-27 19:53:31 +02001#!/usr/bin/env python3
2#
3# Copyright The Mbed TLS Contributors
4# SPDX-License-Identifier: Apache-2.0
5#
6# Licensed under the Apache License, Version 2.0 (the "License"); you may
7# not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""
19This script confirms that the naming of all symbols and identifiers in Mbed TLS
20are consistent with the house style and are also self-consistent. It only runs
21on Linux and macOS since it depends on nm.
22
23It contains two major Python classes, CodeParser and NameChecker. They both have
24a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
25but the individual functions can also be used for specific needs.
26
27CodeParser makes heavy use of regular expressions to parse the code, and is
28dependent on the current code formatting. Many Python C parser libraries require
29preprocessed C code, which means no macro parsing. Compiler tools are also not
30very helpful when we want the exact location in the original source (which
31becomes impossible when e.g. comments are stripped).
32
33NameChecker performs the following checks:
34
35- All exported and available symbols in the library object files, are explicitly
36 declared in the header files. This uses the nm command.
37- All macros, constants, and identifiers (function names, struct names, etc)
38 follow the required regex pattern.
Pengyu Lv018b2f62022-11-08 15:55:00 +080039- Typo checking: All words that begin with MBED|PSA exist as macros or constants.
Gilles Peskine8266b5b2021-09-27 19:53:31 +020040
41The script returns 0 on success, 1 on test failure, and 2 if there is a script
42error. It must be run from Mbed TLS root.
43"""
44
45import abc
46import argparse
Gilles Peskine7bf52052021-09-27 19:20:17 +020047import fnmatch
Gilles Peskine8266b5b2021-09-27 19:53:31 +020048import glob
49import textwrap
50import os
51import sys
52import traceback
53import re
54import enum
55import shutil
56import subprocess
57import logging
58
Gilles Peskine7ff47662022-09-18 21:17:09 +020059import scripts_path # pylint: disable=unused-import
60from mbedtls_dev import build_tree
61
62
Gilles Peskine8266b5b2021-09-27 19:53:31 +020063# Naming patterns to check against. These are defined outside the NameCheck
64# class for ease of modification.
65MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$"
66CONSTANTS_PATTERN = MACRO_PATTERN
67IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
68
69class Match(): # pylint: disable=too-few-public-methods
70 """
71 A class representing a match, together with its found position.
72
73 Fields:
74 * filename: the file that the match was in.
75 * line: the full line containing the match.
76 * line_no: the line number.
77 * pos: a tuple of (start, end) positions on the line where the match is.
78 * name: the match itself.
79 """
80 def __init__(self, filename, line, line_no, pos, name):
81 # pylint: disable=too-many-arguments
82 self.filename = filename
83 self.line = line
84 self.line_no = line_no
85 self.pos = pos
86 self.name = name
87
88 def __str__(self):
89 """
90 Return a formatted code listing representation of the erroneous line.
91 """
92 gutter = format(self.line_no, "4d")
93 underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
94
95 return (
96 " {0} |\n".format(" " * len(gutter)) +
97 " {0} | {1}".format(gutter, self.line) +
98 " {0} | {1}\n".format(" " * len(gutter), underline)
99 )
100
101class Problem(abc.ABC): # pylint: disable=too-few-public-methods
102 """
103 An abstract parent class representing a form of static analysis error.
104 It extends an Abstract Base Class, which means it is not instantiable, and
105 it also mandates certain abstract methods to be implemented in subclasses.
106 """
107 # Class variable to control the quietness of all problems
108 quiet = False
109 def __init__(self):
110 self.textwrapper = textwrap.TextWrapper()
111 self.textwrapper.width = 80
112 self.textwrapper.initial_indent = " > "
113 self.textwrapper.subsequent_indent = " "
114
115 def __str__(self):
116 """
117 Unified string representation method for all Problems.
118 """
119 if self.__class__.quiet:
120 return self.quiet_output()
121 return self.verbose_output()
122
123 @abc.abstractmethod
124 def quiet_output(self):
125 """
126 The output when --quiet is enabled.
127 """
128 pass
129
130 @abc.abstractmethod
131 def verbose_output(self):
132 """
133 The default output with explanation and code snippet if appropriate.
134 """
135 pass
136
137class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
138 """
139 A problem that occurs when an exported/available symbol in the object file
140 is not explicitly declared in header files. Created with
141 NameCheck.check_symbols_declared_in_header()
142
143 Fields:
144 * symbol_name: the name of the symbol.
145 """
146 def __init__(self, symbol_name):
147 self.symbol_name = symbol_name
148 Problem.__init__(self)
149
150 def quiet_output(self):
151 return "{0}".format(self.symbol_name)
152
153 def verbose_output(self):
154 return self.textwrapper.fill(
155 "'{0}' was found as an available symbol in the output of nm, "
156 "however it was not declared in any header files."
157 .format(self.symbol_name))
158
159class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
160 """
161 A problem that occurs when something doesn't match the expected pattern.
162 Created with NameCheck.check_match_pattern()
163
164 Fields:
165 * pattern: the expected regex pattern
166 * match: the Match object in question
167 """
168 def __init__(self, pattern, match):
169 self.pattern = pattern
170 self.match = match
171 Problem.__init__(self)
172
173
174 def quiet_output(self):
175 return (
176 "{0}:{1}:{2}"
177 .format(self.match.filename, self.match.line_no, self.match.name)
178 )
179
180 def verbose_output(self):
181 return self.textwrapper.fill(
182 "{0}:{1}: '{2}' does not match the required pattern '{3}'."
183 .format(
184 self.match.filename,
185 self.match.line_no,
186 self.match.name,
187 self.pattern
188 )
189 ) + "\n" + str(self.match)
190
191class Typo(Problem): # pylint: disable=too-few-public-methods
192 """
Pengyu Lv018b2f62022-11-08 15:55:00 +0800193 A problem that occurs when a word using MBED or PSA doesn't
194 appear to be defined as constants nor enum values. Created with
195 NameCheck.check_for_typos()
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200196
197 Fields:
Pengyu Lv018b2f62022-11-08 15:55:00 +0800198 * match: the Match object of the MBED|PSA name in question.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200199 """
200 def __init__(self, match):
201 self.match = match
202 Problem.__init__(self)
203
204 def quiet_output(self):
205 return (
206 "{0}:{1}:{2}"
207 .format(self.match.filename, self.match.line_no, self.match.name)
208 )
209
210 def verbose_output(self):
211 return self.textwrapper.fill(
212 "{0}:{1}: '{2}' looks like a typo. It was not found in any "
213 "macros or any enums. If this is not a typo, put "
214 "//no-check-names after it."
215 .format(self.match.filename, self.match.line_no, self.match.name)
216 ) + "\n" + str(self.match)
217
218class CodeParser():
219 """
220 Class for retrieving files and parsing the code. This can be used
221 independently of the checks that NameChecker performs, for example for
222 list_internal_identifiers.py.
223 """
224 def __init__(self, log):
225 self.log = log
Gilles Peskine7ff47662022-09-18 21:17:09 +0200226 build_tree.check_repo_path()
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200227
228 # Memo for storing "glob expression": set(filepaths)
229 self.files = {}
230
Gilles Peskine7bf52052021-09-27 19:20:17 +0200231 # Globally excluded filenames.
232 # Note that "*" can match directory separators in exclude lists.
Gilles Peskined47f6362021-09-27 20:12:00 +0200233 self.excluded_files = ["*/bn_mul", "*/compat-1.3.h"]
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200234
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200235 def comprehensive_parse(self):
236 """
237 Comprehensive ("default") function to call each parsing function and
238 retrieve various elements of the code, together with the source location.
239
240 Returns a dict of parsed item key to the corresponding List of Matches.
241 """
242 self.log.info("Parsing source code...")
243 self.log.debug(
244 "The following files are excluded from the search: {}"
245 .format(str(self.excluded_files))
246 )
247
248 all_macros = self.parse_macros([
249 "include/mbedtls/*.h",
250 "include/psa/*.h",
251 "library/*.h",
252 "tests/include/test/drivers/*.h",
253 "3rdparty/everest/include/everest/everest.h",
254 "3rdparty/everest/include/everest/x25519.h"
255 ])
Pengyu Lv018b2f62022-11-08 15:55:00 +0800256 private_macros = self.parse_macros([
257 "library/*.c",
258 ])
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200259 enum_consts = self.parse_enum_consts([
260 "include/mbedtls/*.h",
Pengyu Lv018b2f62022-11-08 15:55:00 +0800261 "include/psa/*.h",
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200262 "library/*.h",
Pengyu Lv018b2f62022-11-08 15:55:00 +0800263 "library/*.c",
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200264 "3rdparty/everest/include/everest/everest.h",
265 "3rdparty/everest/include/everest/x25519.h"
266 ])
267 identifiers = self.parse_identifiers([
268 "include/mbedtls/*.h",
269 "include/psa/*.h",
270 "library/*.h",
271 "3rdparty/everest/include/everest/everest.h",
272 "3rdparty/everest/include/everest/x25519.h"
273 ])
Pengyu Lv018b2f62022-11-08 15:55:00 +0800274 mbed_psa_words = self.parse_mbed_psa_words([
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200275 "include/mbedtls/*.h",
276 "include/psa/*.h",
277 "library/*.h",
278 "3rdparty/everest/include/everest/everest.h",
279 "3rdparty/everest/include/everest/x25519.h",
280 "library/*.c",
281 "3rdparty/everest/library/everest.c",
282 "3rdparty/everest/library/x25519.c"
283 ])
284 symbols = self.parse_symbols()
285
286 # Remove identifier macros like mbedtls_printf or mbedtls_calloc
287 identifiers_justname = [x.name for x in identifiers]
288 actual_macros = []
289 for macro in all_macros:
290 if macro.name not in identifiers_justname:
291 actual_macros.append(macro)
292
293 self.log.debug("Found:")
294 # Aligns the counts on the assumption that none exceeds 4 digits
295 self.log.debug(" {:4} Total Macros".format(len(all_macros)))
296 self.log.debug(" {:4} Non-identifier Macros".format(len(actual_macros)))
297 self.log.debug(" {:4} Enum Constants".format(len(enum_consts)))
298 self.log.debug(" {:4} Identifiers".format(len(identifiers)))
299 self.log.debug(" {:4} Exported Symbols".format(len(symbols)))
300 return {
301 "macros": actual_macros,
Pengyu Lv018b2f62022-11-08 15:55:00 +0800302 "private_macros": private_macros,
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200303 "enum_consts": enum_consts,
304 "identifiers": identifiers,
305 "symbols": symbols,
Pengyu Lv018b2f62022-11-08 15:55:00 +0800306 "mbed_psa_words": mbed_psa_words
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200307 }
308
Gilles Peskine7bf52052021-09-27 19:20:17 +0200309 def is_file_excluded(self, path, exclude_wildcards):
Gilles Peskine1c399752021-09-28 10:12:49 +0200310 """Whether the given file path is excluded."""
Gilles Peskine7bf52052021-09-27 19:20:17 +0200311 # exclude_wildcards may be None. Also, consider the global exclusions.
312 exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
313 for pattern in exclude_wildcards:
314 if fnmatch.fnmatch(path, pattern):
315 return True
316 return False
317
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200318 def get_files(self, include_wildcards, exclude_wildcards):
319 """
320 Get all files that match any of the UNIX-style wildcards. While the
321 check_names script is designed only for use on UNIX/macOS (due to nm),
322 this function alone would work fine on Windows even with forward slashes
323 in the wildcard.
324
325 Args:
326 * include_wildcards: a List of shell-style wildcards to match filepaths.
327 * exclude_wildcards: a List of shell-style wildcards to exclude.
328
329 Returns a List of relative filepaths.
330 """
331 accumulator = set()
332
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200333 for include_wildcard in include_wildcards:
Gilles Peskine7bf52052021-09-27 19:20:17 +0200334 accumulator = accumulator.union(glob.iglob(include_wildcard))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200335
Gilles Peskine7bf52052021-09-27 19:20:17 +0200336 return list(path for path in accumulator
337 if not self.is_file_excluded(path, exclude_wildcards))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200338
339 def parse_macros(self, include, exclude=None):
340 """
341 Parse all macros defined by #define preprocessor directives.
342
343 Args:
344 * include: A List of glob expressions to look for files through.
345 * exclude: A List of glob expressions for excluding files.
346
347 Returns a List of Match objects for the found macros.
348 """
349 macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
350 exclusions = (
351 "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
352 )
353
354 files = self.get_files(include, exclude)
355 self.log.debug("Looking for macros in {} files".format(len(files)))
356
357 macros = []
358 for header_file in files:
359 with open(header_file, "r", encoding="utf-8") as header:
360 for line_no, line in enumerate(header):
361 for macro in macro_regex.finditer(line):
362 if macro.group("macro").startswith(exclusions):
363 continue
364
365 macros.append(Match(
366 header_file,
367 line,
368 line_no,
369 macro.span("macro"),
370 macro.group("macro")))
371
372 return macros
373
Pengyu Lv018b2f62022-11-08 15:55:00 +0800374 def parse_mbed_psa_words(self, include, exclude=None):
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200375 """
Pengyu Lv018b2f62022-11-08 15:55:00 +0800376 Parse all words in the file that begin with MBED|PSA, in and out of
377 macros, comments, anything.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200378
379 Args:
380 * include: A List of glob expressions to look for files through.
381 * exclude: A List of glob expressions for excluding files.
382
Pengyu Lv018b2f62022-11-08 15:55:00 +0800383 Returns a List of Match objects for words beginning with MBED|PSA.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200384 """
385 # Typos of TLS are common, hence the broader check below than MBEDTLS.
Pengyu Lv018b2f62022-11-08 15:55:00 +0800386 mbed_regex = re.compile(r"\b(MBED.+?|PSA)_[A-Z0-9_]*")
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200387 exclusions = re.compile(r"// *no-check-names|#error")
388
389 files = self.get_files(include, exclude)
Pengyu Lv018b2f62022-11-08 15:55:00 +0800390 self.log.debug(
391 "Looking for MBED|PSA words in {} files"
392 .format(len(files))
393 )
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200394
Pengyu Lv018b2f62022-11-08 15:55:00 +0800395 mbed_psa_words = []
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200396 for filename in files:
397 with open(filename, "r", encoding="utf-8") as fp:
398 for line_no, line in enumerate(fp):
399 if exclusions.search(line):
400 continue
401
402 for name in mbed_regex.finditer(line):
Pengyu Lv018b2f62022-11-08 15:55:00 +0800403 mbed_psa_words.append(Match(
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200404 filename,
405 line,
406 line_no,
407 name.span(0),
408 name.group(0)))
409
Pengyu Lv018b2f62022-11-08 15:55:00 +0800410 return mbed_psa_words
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200411
412 def parse_enum_consts(self, include, exclude=None):
413 """
414 Parse all enum value constants that are declared.
415
416 Args:
417 * include: A List of glob expressions to look for files through.
418 * exclude: A List of glob expressions for excluding files.
419
420 Returns a List of Match objects for the findings.
421 """
422 files = self.get_files(include, exclude)
423 self.log.debug("Looking for enum consts in {} files".format(len(files)))
424
425 # Emulate a finite state machine to parse enum declarations.
426 # OUTSIDE_KEYWORD = outside the enum keyword
427 # IN_BRACES = inside enum opening braces
428 # IN_BETWEEN = between enum keyword and opening braces
429 states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
430 enum_consts = []
431 for header_file in files:
432 state = states.OUTSIDE_KEYWORD
433 with open(header_file, "r", encoding="utf-8") as header:
434 for line_no, line in enumerate(header):
435 # Match typedefs and brackets only when they are at the
436 # beginning of the line -- if they are indented, they might
437 # be sub-structures within structs, etc.
438 if (state == states.OUTSIDE_KEYWORD and
439 re.search(r"^(typedef +)?enum +{", line)):
440 state = states.IN_BRACES
441 elif (state == states.OUTSIDE_KEYWORD and
442 re.search(r"^(typedef +)?enum", line)):
443 state = states.IN_BETWEEN
444 elif (state == states.IN_BETWEEN and
445 re.search(r"^{", line)):
446 state = states.IN_BRACES
447 elif (state == states.IN_BRACES and
448 re.search(r"^}", line)):
449 state = states.OUTSIDE_KEYWORD
450 elif (state == states.IN_BRACES and
451 not re.search(r"^ *#", line)):
452 enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
453 if not enum_const:
454 continue
455
456 enum_consts.append(Match(
457 header_file,
458 line,
459 line_no,
460 enum_const.span("enum_const"),
461 enum_const.group("enum_const")))
462
463 return enum_consts
464
Gilles Peskine44801622021-11-17 20:43:35 +0100465 IGNORED_CHUNK_REGEX = re.compile('|'.join([
466 r'/\*.*?\*/', # block comment entirely on one line
467 r'//.*', # line comment
468 r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal
469 ]))
470
Gilles Peskinedf306652021-11-17 20:32:31 +0100471 def strip_comments_and_literals(self, line, in_block_comment):
472 """Strip comments and string literals from line.
473
474 Continuation lines are not supported.
475
476 If in_block_comment is true, assume that the line starts inside a
477 block comment.
478
479 Return updated values of (line, in_block_comment) where:
480 * Comments in line have been replaced by a space (or nothing at the
481 start or end of the line).
482 * String contents have been removed.
483 * in_block_comment indicates whether the line ends inside a block
484 comment that continues on the next line.
485 """
Gilles Peskine23b40962021-11-17 20:45:39 +0100486
487 # Terminate current multiline comment?
Gilles Peskinedf306652021-11-17 20:32:31 +0100488 if in_block_comment:
Gilles Peskine23b40962021-11-17 20:45:39 +0100489 m = re.search(r"\*/", line)
490 if m:
491 in_block_comment = False
492 line = line[m.end(0):]
493 else:
494 return '', True
Gilles Peskine44801622021-11-17 20:43:35 +0100495
496 # Remove full comments and string literals.
497 # Do it all together to handle cases like "/*" correctly.
498 # Note that continuation lines are not supported.
499 line = re.sub(self.IGNORED_CHUNK_REGEX,
500 lambda s: '""' if s.group('string') else ' ',
Gilles Peskinedf306652021-11-17 20:32:31 +0100501 line)
Gilles Peskine44801622021-11-17 20:43:35 +0100502
Gilles Peskinedf306652021-11-17 20:32:31 +0100503 # Start an unfinished comment?
Gilles Peskine44801622021-11-17 20:43:35 +0100504 # (If `/*` was part of a complete comment, it's already been removed.)
Gilles Peskine23b40962021-11-17 20:45:39 +0100505 m = re.search(r"/\*", line)
Gilles Peskinedf306652021-11-17 20:32:31 +0100506 if m:
507 in_block_comment = True
Gilles Peskine23b40962021-11-17 20:45:39 +0100508 line = line[:m.start(0)]
Gilles Peskine44801622021-11-17 20:43:35 +0100509
Gilles Peskinedf306652021-11-17 20:32:31 +0100510 return line, in_block_comment
511
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100512 IDENTIFIER_REGEX = re.compile('|'.join([
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100513 # Match " something(a" or " *something(a". Functions.
514 # Assumptions:
515 # - function definition from return type to one of its arguments is
516 # all on one line
517 # - function definition line only contains alphanumeric, asterisk,
518 # underscore, and open bracket
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100519 r".* \**(\w+) *\( *\w",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100520 # Match "(*something)(".
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100521 r".*\( *\* *(\w+) *\) *\(",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100522 # Match names of named data structures.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100523 r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100524 # Match names of typedef instances, after closing bracket.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100525 r"}? *(\w+)[;[].*",
526 ]))
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100527 # The regex below is indented for clarity.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100528 EXCLUSION_LINES = re.compile("|".join([
529 r"extern +\"C\"",
530 r"(typedef +)?(struct|union|enum)( *{)?$",
531 r"} *;?$",
532 r"$",
533 r"//",
534 r"#",
535 ]))
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100536
537 def parse_identifiers_in_file(self, header_file, identifiers):
538 """
539 Parse all lines of a header where a function/enum/struct/union/typedef
540 identifier is declared, based on some regex and heuristics. Highly
541 dependent on formatting style.
542
543 Append found matches to the list ``identifiers``.
544 """
545
546 with open(header_file, "r", encoding="utf-8") as header:
547 in_block_comment = False
548 # The previous line variable is used for concatenating lines
549 # when identifiers are formatted and spread across multiple
550 # lines.
551 previous_line = ""
552
553 for line_no, line in enumerate(header):
Gilles Peskinedf306652021-11-17 20:32:31 +0100554 line, in_block_comment = \
555 self.strip_comments_and_literals(line, in_block_comment)
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100556
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100557 if self.EXCLUSION_LINES.match(line):
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100558 previous_line = ""
559 continue
560
561 # If the line contains only space-separated alphanumeric
Gilles Peskine4f04d612021-11-17 20:39:56 +0100562 # characters (or underscore, asterisk, or open parenthesis),
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100563 # and nothing else, high chance it's a declaration that
564 # continues on the next line
565 if re.search(r"^([\w\*\(]+\s+)+$", line):
566 previous_line += line
567 continue
568
569 # If previous line seemed to start an unfinished declaration
570 # (as above), concat and treat them as one.
571 if previous_line:
572 line = previous_line.strip() + " " + line.strip() + "\n"
573 previous_line = ""
574
575 # Skip parsing if line has a space in front = heuristic to
576 # skip function argument lines (highly subject to formatting
577 # changes)
578 if line[0] == " ":
579 continue
580
581 identifier = self.IDENTIFIER_REGEX.search(line)
582
583 if not identifier:
584 continue
585
586 # Find the group that matched, and append it
587 for group in identifier.groups():
588 if not group:
589 continue
590
591 identifiers.append(Match(
592 header_file,
593 line,
594 line_no,
595 identifier.span(),
596 group))
597
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200598 def parse_identifiers(self, include, exclude=None):
599 """
600 Parse all lines of a header where a function/enum/struct/union/typedef
601 identifier is declared, based on some regex and heuristics. Highly
602 dependent on formatting style.
603
604 Args:
605 * include: A List of glob expressions to look for files through.
606 * exclude: A List of glob expressions for excluding files.
607
608 Returns a List of Match objects with identifiers.
609 """
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200610
611 files = self.get_files(include, exclude)
612 self.log.debug("Looking for identifiers in {} files".format(len(files)))
613
614 identifiers = []
615 for header_file in files:
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100616 self.parse_identifiers_in_file(header_file, identifiers)
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200617
618 return identifiers
619
620 def parse_symbols(self):
621 """
622 Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
623 object files using nm to retrieve the list of referenced symbols.
624 Exceptions thrown here are rethrown because they would be critical
625 errors that void several tests, and thus needs to halt the program. This
626 is explicitly done for clarity.
627
628 Returns a List of unique symbols defined and used in the libraries.
629 """
630 self.log.info("Compiling...")
631 symbols = []
632
633 # Back up the config and atomically compile with the full configratuion.
634 shutil.copy(
Gilles Peskined47f6362021-09-27 20:12:00 +0200635 "include/mbedtls/config.h",
636 "include/mbedtls/config.h.bak"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200637 )
638 try:
639 # Use check=True in all subprocess calls so that failures are raised
640 # as exceptions and logged.
641 subprocess.run(
642 ["python3", "scripts/config.py", "full"],
643 universal_newlines=True,
644 check=True
645 )
646 my_environment = os.environ.copy()
647 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
648 # Run make clean separately to lib to prevent unwanted behavior when
649 # make is invoked with parallelism.
650 subprocess.run(
651 ["make", "clean"],
652 universal_newlines=True,
653 check=True
654 )
655 subprocess.run(
656 ["make", "lib"],
657 env=my_environment,
658 universal_newlines=True,
659 stdout=subprocess.PIPE,
660 stderr=subprocess.STDOUT,
661 check=True
662 )
663
664 # Perform object file analysis using nm
665 symbols = self.parse_symbols_from_nm([
666 "library/libmbedcrypto.a",
667 "library/libmbedtls.a",
668 "library/libmbedx509.a"
669 ])
670
671 subprocess.run(
672 ["make", "clean"],
673 universal_newlines=True,
674 check=True
675 )
676 except subprocess.CalledProcessError as error:
677 self.log.debug(error.output)
678 raise error
679 finally:
680 # Put back the original config regardless of there being errors.
681 # Works also for keyboard interrupts.
682 shutil.move(
Gilles Peskined47f6362021-09-27 20:12:00 +0200683 "include/mbedtls/config.h.bak",
684 "include/mbedtls/config.h"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200685 )
686
687 return symbols
688
689 def parse_symbols_from_nm(self, object_files):
690 """
691 Run nm to retrieve the list of referenced symbols in each object file.
692 Does not return the position data since it is of no use.
693
694 Args:
695 * object_files: a List of compiled object filepaths to search through.
696
697 Returns a List of unique symbols defined and used in any of the object
698 files.
699 """
700 nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$")
701 nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
702 exclusions = ("FStar", "Hacl")
703
704 symbols = []
705
706 # Gather all outputs of nm
707 nm_output = ""
708 for lib in object_files:
709 nm_output += subprocess.run(
710 ["nm", "-og", lib],
711 universal_newlines=True,
712 stdout=subprocess.PIPE,
713 stderr=subprocess.STDOUT,
714 check=True
715 ).stdout
716
717 for line in nm_output.splitlines():
718 if not nm_undefined_regex.search(line):
719 symbol = nm_valid_regex.search(line)
720 if (symbol and not symbol.group("symbol").startswith(exclusions)):
721 symbols.append(symbol.group("symbol"))
722 else:
723 self.log.error(line)
724
725 return symbols
726
727class NameChecker():
728 """
729 Representation of the core name checking operation performed by this script.
730 """
731 def __init__(self, parse_result, log):
732 self.parse_result = parse_result
733 self.log = log
734
735 def perform_checks(self, quiet=False):
736 """
737 A comprehensive checker that performs each check in order, and outputs
738 a final verdict.
739
740 Args:
741 * quiet: whether to hide detailed problem explanation.
742 """
743 self.log.info("=============")
744 Problem.quiet = quiet
745 problems = 0
746 problems += self.check_symbols_declared_in_header()
747
748 pattern_checks = [
749 ("macros", MACRO_PATTERN),
750 ("enum_consts", CONSTANTS_PATTERN),
751 ("identifiers", IDENTIFIER_PATTERN)
752 ]
753 for group, check_pattern in pattern_checks:
754 problems += self.check_match_pattern(group, check_pattern)
755
756 problems += self.check_for_typos()
757
758 self.log.info("=============")
759 if problems > 0:
760 self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
761 if quiet:
762 self.log.info("Remove --quiet to see explanations.")
763 else:
764 self.log.info("Use --quiet for minimal output.")
765 return 1
766 else:
767 self.log.info("PASS")
768 return 0
769
770 def check_symbols_declared_in_header(self):
771 """
772 Perform a check that all detected symbols in the library object files
773 are properly declared in headers.
774 Assumes parse_names_in_source() was called before this.
775
776 Returns the number of problems that need fixing.
777 """
778 problems = []
779
780 for symbol in self.parse_result["symbols"]:
781 found_symbol_declared = False
782 for identifier_match in self.parse_result["identifiers"]:
783 if symbol == identifier_match.name:
784 found_symbol_declared = True
785 break
786
787 if not found_symbol_declared:
788 problems.append(SymbolNotInHeader(symbol))
789
790 self.output_check_result("All symbols in header", problems)
791 return len(problems)
792
793 def check_match_pattern(self, group_to_check, check_pattern):
794 """
795 Perform a check that all items of a group conform to a regex pattern.
796 Assumes parse_names_in_source() was called before this.
797
798 Args:
799 * group_to_check: string key to index into self.parse_result.
800 * check_pattern: the regex to check against.
801
802 Returns the number of problems that need fixing.
803 """
804 problems = []
805
806 for item_match in self.parse_result[group_to_check]:
807 if not re.search(check_pattern, item_match.name):
808 problems.append(PatternMismatch(check_pattern, item_match))
809 # Double underscore should not be used for names
810 if re.search(r".*__.*", item_match.name):
811 problems.append(
812 PatternMismatch("no double underscore allowed", item_match))
813
814 self.output_check_result(
815 "Naming patterns of {}".format(group_to_check),
816 problems)
817 return len(problems)
818
819 def check_for_typos(self):
820 """
Shaun Case0e7791f2021-12-20 21:14:10 -0800821 Perform a check that all words in the source code beginning with MBED are
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200822 either defined as macros, or as enum constants.
823 Assumes parse_names_in_source() was called before this.
824
825 Returns the number of problems that need fixing.
826 """
827 problems = []
828
829 # Set comprehension, equivalent to a list comprehension wrapped by set()
830 all_caps_names = {
831 match.name
832 for match
Pengyu Lv018b2f62022-11-08 15:55:00 +0800833 in self.parse_result["macros"] +
834 self.parse_result["private_macros"] +
835 self.parse_result["enum_consts"]
836 }
Ronald Cronb814bda2021-09-13 14:50:42 +0200837 typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|"
838 r"MBEDTLS_TEST_LIBTESTDRIVER*")
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200839
Pengyu Lv018b2f62022-11-08 15:55:00 +0800840 for name_match in self.parse_result["mbed_psa_words"]:
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200841 found = name_match.name in all_caps_names
842
843 # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
844 # PSA driver, they will not exist as macros. However, they
845 # should still be checked for typos using the equivalent
846 # BUILTINs that exist.
847 if "MBEDTLS_PSA_ACCEL_" in name_match.name:
848 found = name_match.name.replace(
849 "MBEDTLS_PSA_ACCEL_",
850 "MBEDTLS_PSA_BUILTIN_") in all_caps_names
851
852 if not found and not typo_exclusion.search(name_match.name):
853 problems.append(Typo(name_match))
854
855 self.output_check_result("Likely typos", problems)
856 return len(problems)
857
858 def output_check_result(self, name, problems):
859 """
860 Write out the PASS/FAIL status of a performed check depending on whether
861 there were problems.
862
863 Args:
864 * name: the name of the test
865 * problems: a List of encountered Problems
866 """
867 if problems:
868 self.log.info("{}: FAIL\n".format(name))
869 for problem in problems:
870 self.log.warning(str(problem))
871 else:
872 self.log.info("{}: PASS".format(name))
873
874def main():
875 """
876 Perform argument parsing, and create an instance of CodeParser and
877 NameChecker to begin the core operation.
878 """
879 parser = argparse.ArgumentParser(
880 formatter_class=argparse.RawDescriptionHelpFormatter,
881 description=(
882 "This script confirms that the naming of all symbols and identifiers "
883 "in Mbed TLS are consistent with the house style and are also "
884 "self-consistent.\n\n"
885 "Expected to be run from the MbedTLS root directory.")
886 )
887 parser.add_argument(
888 "-v", "--verbose",
889 action="store_true",
890 help="show parse results"
891 )
892 parser.add_argument(
893 "-q", "--quiet",
894 action="store_true",
895 help="hide unnecessary text, explanations, and highlighs"
896 )
897
898 args = parser.parse_args()
899
900 # Configure the global logger, which is then passed to the classes below
901 log = logging.getLogger()
902 log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
903 log.addHandler(logging.StreamHandler())
904
905 try:
906 code_parser = CodeParser(log)
907 parse_result = code_parser.comprehensive_parse()
908 except Exception: # pylint: disable=broad-except
909 traceback.print_exc()
910 sys.exit(2)
911
912 name_checker = NameChecker(parse_result, log)
913 return_code = name_checker.perform_checks(quiet=args.quiet)
914
915 sys.exit(return_code)
916
917if __name__ == "__main__":
918 main()