blob: f71a97805f718f677973fdf44264cd6d30501036 [file] [log] [blame]
Gilles Peskine8266b5b2021-09-27 19:53:31 +02001#!/usr/bin/env python3
2#
3# Copyright The Mbed TLS Contributors
4# SPDX-License-Identifier: Apache-2.0
5#
6# Licensed under the Apache License, Version 2.0 (the "License"); you may
7# not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""
19This script confirms that the naming of all symbols and identifiers in Mbed TLS
20are consistent with the house style and are also self-consistent. It only runs
21on Linux and macOS since it depends on nm.
22
23It contains two major Python classes, CodeParser and NameChecker. They both have
24a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
25but the individual functions can also be used for specific needs.
26
27CodeParser makes heavy use of regular expressions to parse the code, and is
28dependent on the current code formatting. Many Python C parser libraries require
29preprocessed C code, which means no macro parsing. Compiler tools are also not
30very helpful when we want the exact location in the original source (which
31becomes impossible when e.g. comments are stripped).
32
33NameChecker performs the following checks:
34
35- All exported and available symbols in the library object files, are explicitly
36 declared in the header files. This uses the nm command.
37- All macros, constants, and identifiers (function names, struct names, etc)
38 follow the required regex pattern.
Pengyu Lv018b2f62022-11-08 15:55:00 +080039- Typo checking: All words that begin with MBED|PSA exist as macros or constants.
Gilles Peskine8266b5b2021-09-27 19:53:31 +020040
41The script returns 0 on success, 1 on test failure, and 2 if there is a script
42error. It must be run from Mbed TLS root.
43"""
44
45import abc
46import argparse
Gilles Peskine7bf52052021-09-27 19:20:17 +020047import fnmatch
Gilles Peskine8266b5b2021-09-27 19:53:31 +020048import glob
49import textwrap
50import os
51import sys
52import traceback
53import re
54import enum
55import shutil
56import subprocess
57import logging
58
Gilles Peskine7ff47662022-09-18 21:17:09 +020059import scripts_path # pylint: disable=unused-import
60from mbedtls_dev import build_tree
61
62
Gilles Peskine8266b5b2021-09-27 19:53:31 +020063# Naming patterns to check against. These are defined outside the NameCheck
64# class for ease of modification.
65MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$"
66CONSTANTS_PATTERN = MACRO_PATTERN
67IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
68
69class Match(): # pylint: disable=too-few-public-methods
70 """
71 A class representing a match, together with its found position.
72
73 Fields:
74 * filename: the file that the match was in.
75 * line: the full line containing the match.
76 * line_no: the line number.
77 * pos: a tuple of (start, end) positions on the line where the match is.
78 * name: the match itself.
79 """
80 def __init__(self, filename, line, line_no, pos, name):
81 # pylint: disable=too-many-arguments
82 self.filename = filename
83 self.line = line
84 self.line_no = line_no
85 self.pos = pos
86 self.name = name
87
88 def __str__(self):
89 """
90 Return a formatted code listing representation of the erroneous line.
91 """
92 gutter = format(self.line_no, "4d")
93 underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
94
95 return (
96 " {0} |\n".format(" " * len(gutter)) +
97 " {0} | {1}".format(gutter, self.line) +
98 " {0} | {1}\n".format(" " * len(gutter), underline)
99 )
100
101class Problem(abc.ABC): # pylint: disable=too-few-public-methods
102 """
103 An abstract parent class representing a form of static analysis error.
104 It extends an Abstract Base Class, which means it is not instantiable, and
105 it also mandates certain abstract methods to be implemented in subclasses.
106 """
107 # Class variable to control the quietness of all problems
108 quiet = False
109 def __init__(self):
110 self.textwrapper = textwrap.TextWrapper()
111 self.textwrapper.width = 80
112 self.textwrapper.initial_indent = " > "
113 self.textwrapper.subsequent_indent = " "
114
115 def __str__(self):
116 """
117 Unified string representation method for all Problems.
118 """
119 if self.__class__.quiet:
120 return self.quiet_output()
121 return self.verbose_output()
122
123 @abc.abstractmethod
124 def quiet_output(self):
125 """
126 The output when --quiet is enabled.
127 """
128 pass
129
130 @abc.abstractmethod
131 def verbose_output(self):
132 """
133 The default output with explanation and code snippet if appropriate.
134 """
135 pass
136
137class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
138 """
139 A problem that occurs when an exported/available symbol in the object file
140 is not explicitly declared in header files. Created with
141 NameCheck.check_symbols_declared_in_header()
142
143 Fields:
144 * symbol_name: the name of the symbol.
145 """
146 def __init__(self, symbol_name):
147 self.symbol_name = symbol_name
148 Problem.__init__(self)
149
150 def quiet_output(self):
151 return "{0}".format(self.symbol_name)
152
153 def verbose_output(self):
154 return self.textwrapper.fill(
155 "'{0}' was found as an available symbol in the output of nm, "
156 "however it was not declared in any header files."
157 .format(self.symbol_name))
158
159class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
160 """
161 A problem that occurs when something doesn't match the expected pattern.
162 Created with NameCheck.check_match_pattern()
163
164 Fields:
165 * pattern: the expected regex pattern
166 * match: the Match object in question
167 """
168 def __init__(self, pattern, match):
169 self.pattern = pattern
170 self.match = match
171 Problem.__init__(self)
172
173
174 def quiet_output(self):
175 return (
176 "{0}:{1}:{2}"
177 .format(self.match.filename, self.match.line_no, self.match.name)
178 )
179
180 def verbose_output(self):
181 return self.textwrapper.fill(
182 "{0}:{1}: '{2}' does not match the required pattern '{3}'."
183 .format(
184 self.match.filename,
185 self.match.line_no,
186 self.match.name,
187 self.pattern
188 )
189 ) + "\n" + str(self.match)
190
191class Typo(Problem): # pylint: disable=too-few-public-methods
192 """
Pengyu Lv018b2f62022-11-08 15:55:00 +0800193 A problem that occurs when a word using MBED or PSA doesn't
194 appear to be defined as constants nor enum values. Created with
195 NameCheck.check_for_typos()
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200196
197 Fields:
Pengyu Lv018b2f62022-11-08 15:55:00 +0800198 * match: the Match object of the MBED|PSA name in question.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200199 """
200 def __init__(self, match):
201 self.match = match
202 Problem.__init__(self)
203
204 def quiet_output(self):
205 return (
206 "{0}:{1}:{2}"
207 .format(self.match.filename, self.match.line_no, self.match.name)
208 )
209
210 def verbose_output(self):
211 return self.textwrapper.fill(
212 "{0}:{1}: '{2}' looks like a typo. It was not found in any "
213 "macros or any enums. If this is not a typo, put "
214 "//no-check-names after it."
215 .format(self.match.filename, self.match.line_no, self.match.name)
216 ) + "\n" + str(self.match)
217
218class CodeParser():
219 """
220 Class for retrieving files and parsing the code. This can be used
221 independently of the checks that NameChecker performs, for example for
222 list_internal_identifiers.py.
223 """
224 def __init__(self, log):
225 self.log = log
Gilles Peskine7ff47662022-09-18 21:17:09 +0200226 build_tree.check_repo_path()
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200227
228 # Memo for storing "glob expression": set(filepaths)
229 self.files = {}
230
Gilles Peskine7bf52052021-09-27 19:20:17 +0200231 # Globally excluded filenames.
232 # Note that "*" can match directory separators in exclude lists.
Gilles Peskined47f6362021-09-27 20:12:00 +0200233 self.excluded_files = ["*/bn_mul", "*/compat-1.3.h"]
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200234
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200235 def comprehensive_parse(self):
236 """
237 Comprehensive ("default") function to call each parsing function and
238 retrieve various elements of the code, together with the source location.
239
240 Returns a dict of parsed item key to the corresponding List of Matches.
241 """
242 self.log.info("Parsing source code...")
243 self.log.debug(
244 "The following files are excluded from the search: {}"
245 .format(str(self.excluded_files))
246 )
247
248 all_macros = self.parse_macros([
249 "include/mbedtls/*.h",
250 "include/psa/*.h",
251 "library/*.h",
252 "tests/include/test/drivers/*.h",
253 "3rdparty/everest/include/everest/everest.h",
254 "3rdparty/everest/include/everest/x25519.h"
255 ])
Pengyu Lv018b2f62022-11-08 15:55:00 +0800256 private_macros = self.parse_macros([
257 "library/*.c",
258 ])
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200259 enum_consts = self.parse_enum_consts([
260 "include/mbedtls/*.h",
Pengyu Lv018b2f62022-11-08 15:55:00 +0800261 "include/psa/*.h",
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200262 "library/*.h",
Pengyu Lv018b2f62022-11-08 15:55:00 +0800263 "library/*.c",
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200264 "3rdparty/everest/include/everest/everest.h",
265 "3rdparty/everest/include/everest/x25519.h"
266 ])
267 identifiers = self.parse_identifiers([
268 "include/mbedtls/*.h",
269 "include/psa/*.h",
270 "library/*.h",
271 "3rdparty/everest/include/everest/everest.h",
272 "3rdparty/everest/include/everest/x25519.h"
273 ])
Pengyu Lv018b2f62022-11-08 15:55:00 +0800274 mbed_psa_words = self.parse_mbed_psa_words([
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200275 "include/mbedtls/*.h",
276 "include/psa/*.h",
277 "library/*.h",
278 "3rdparty/everest/include/everest/everest.h",
279 "3rdparty/everest/include/everest/x25519.h",
280 "library/*.c",
281 "3rdparty/everest/library/everest.c",
282 "3rdparty/everest/library/x25519.c"
283 ])
284 symbols = self.parse_symbols()
285
286 # Remove identifier macros like mbedtls_printf or mbedtls_calloc
287 identifiers_justname = [x.name for x in identifiers]
288 actual_macros = []
289 for macro in all_macros:
290 if macro.name not in identifiers_justname:
291 actual_macros.append(macro)
292
293 self.log.debug("Found:")
294 # Aligns the counts on the assumption that none exceeds 4 digits
295 self.log.debug(" {:4} Total Macros".format(len(all_macros)))
296 self.log.debug(" {:4} Non-identifier Macros".format(len(actual_macros)))
297 self.log.debug(" {:4} Enum Constants".format(len(enum_consts)))
298 self.log.debug(" {:4} Identifiers".format(len(identifiers)))
299 self.log.debug(" {:4} Exported Symbols".format(len(symbols)))
300 return {
301 "macros": actual_macros,
Pengyu Lv018b2f62022-11-08 15:55:00 +0800302 "private_macros": private_macros,
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200303 "enum_consts": enum_consts,
304 "identifiers": identifiers,
305 "symbols": symbols,
Pengyu Lv018b2f62022-11-08 15:55:00 +0800306 "mbed_psa_words": mbed_psa_words
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200307 }
308
Gilles Peskine7bf52052021-09-27 19:20:17 +0200309 def is_file_excluded(self, path, exclude_wildcards):
Gilles Peskine1c399752021-09-28 10:12:49 +0200310 """Whether the given file path is excluded."""
Gilles Peskine7bf52052021-09-27 19:20:17 +0200311 # exclude_wildcards may be None. Also, consider the global exclusions.
312 exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
313 for pattern in exclude_wildcards:
314 if fnmatch.fnmatch(path, pattern):
315 return True
316 return False
317
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200318 def get_files(self, include_wildcards, exclude_wildcards):
319 """
320 Get all files that match any of the UNIX-style wildcards. While the
321 check_names script is designed only for use on UNIX/macOS (due to nm),
322 this function alone would work fine on Windows even with forward slashes
323 in the wildcard.
324
325 Args:
326 * include_wildcards: a List of shell-style wildcards to match filepaths.
327 * exclude_wildcards: a List of shell-style wildcards to exclude.
328
329 Returns a List of relative filepaths.
330 """
331 accumulator = set()
332
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200333 for include_wildcard in include_wildcards:
Gilles Peskine7bf52052021-09-27 19:20:17 +0200334 accumulator = accumulator.union(glob.iglob(include_wildcard))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200335
Gilles Peskine7bf52052021-09-27 19:20:17 +0200336 return list(path for path in accumulator
337 if not self.is_file_excluded(path, exclude_wildcards))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200338
339 def parse_macros(self, include, exclude=None):
340 """
341 Parse all macros defined by #define preprocessor directives.
342
343 Args:
344 * include: A List of glob expressions to look for files through.
345 * exclude: A List of glob expressions for excluding files.
346
347 Returns a List of Match objects for the found macros.
348 """
349 macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
350 exclusions = (
351 "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
352 )
353
354 files = self.get_files(include, exclude)
355 self.log.debug("Looking for macros in {} files".format(len(files)))
356
357 macros = []
358 for header_file in files:
359 with open(header_file, "r", encoding="utf-8") as header:
360 for line_no, line in enumerate(header):
361 for macro in macro_regex.finditer(line):
362 if macro.group("macro").startswith(exclusions):
363 continue
364
365 macros.append(Match(
366 header_file,
367 line,
368 line_no,
369 macro.span("macro"),
370 macro.group("macro")))
371
372 return macros
373
Pengyu Lv018b2f62022-11-08 15:55:00 +0800374 def parse_mbed_psa_words(self, include, exclude=None):
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200375 """
Pengyu Lv018b2f62022-11-08 15:55:00 +0800376 Parse all words in the file that begin with MBED|PSA, in and out of
377 macros, comments, anything.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200378
379 Args:
380 * include: A List of glob expressions to look for files through.
381 * exclude: A List of glob expressions for excluding files.
382
Pengyu Lv018b2f62022-11-08 15:55:00 +0800383 Returns a List of Match objects for words beginning with MBED|PSA.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200384 """
385 # Typos of TLS are common, hence the broader check below than MBEDTLS.
Pengyu Lv018b2f62022-11-08 15:55:00 +0800386 mbed_regex = re.compile(r"\b(MBED.+?|PSA)_[A-Z0-9_]*")
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200387 exclusions = re.compile(r"// *no-check-names|#error")
388
389 files = self.get_files(include, exclude)
Pengyu Lv018b2f62022-11-08 15:55:00 +0800390 self.log.debug(
391 "Looking for MBED|PSA words in {} files"
392 .format(len(files))
393 )
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200394
Pengyu Lv018b2f62022-11-08 15:55:00 +0800395 mbed_psa_words = []
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200396 for filename in files:
397 with open(filename, "r", encoding="utf-8") as fp:
398 for line_no, line in enumerate(fp):
399 if exclusions.search(line):
400 continue
401
402 for name in mbed_regex.finditer(line):
Pengyu Lv018b2f62022-11-08 15:55:00 +0800403 mbed_psa_words.append(Match(
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200404 filename,
405 line,
406 line_no,
407 name.span(0),
408 name.group(0)))
409
Pengyu Lv018b2f62022-11-08 15:55:00 +0800410 return mbed_psa_words
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200411
412 def parse_enum_consts(self, include, exclude=None):
413 """
414 Parse all enum value constants that are declared.
415
416 Args:
417 * include: A List of glob expressions to look for files through.
418 * exclude: A List of glob expressions for excluding files.
419
420 Returns a List of Match objects for the findings.
421 """
422 files = self.get_files(include, exclude)
423 self.log.debug("Looking for enum consts in {} files".format(len(files)))
424
425 # Emulate a finite state machine to parse enum declarations.
426 # OUTSIDE_KEYWORD = outside the enum keyword
427 # IN_BRACES = inside enum opening braces
428 # IN_BETWEEN = between enum keyword and opening braces
429 states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
430 enum_consts = []
431 for header_file in files:
432 state = states.OUTSIDE_KEYWORD
433 with open(header_file, "r", encoding="utf-8") as header:
434 for line_no, line in enumerate(header):
435 # Match typedefs and brackets only when they are at the
436 # beginning of the line -- if they are indented, they might
437 # be sub-structures within structs, etc.
David Horstmanne1e776c2022-12-16 13:39:04 +0000438 optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200439 if (state == states.OUTSIDE_KEYWORD and
David Horstmanne1e776c2022-12-16 13:39:04 +0000440 re.search(r"^(typedef +)?enum " + \
441 optional_c_identifier + \
442 r" *{", line)):
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200443 state = states.IN_BRACES
444 elif (state == states.OUTSIDE_KEYWORD and
445 re.search(r"^(typedef +)?enum", line)):
446 state = states.IN_BETWEEN
447 elif (state == states.IN_BETWEEN and
448 re.search(r"^{", line)):
449 state = states.IN_BRACES
450 elif (state == states.IN_BRACES and
451 re.search(r"^}", line)):
452 state = states.OUTSIDE_KEYWORD
453 elif (state == states.IN_BRACES and
454 not re.search(r"^ *#", line)):
455 enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
456 if not enum_const:
457 continue
458
459 enum_consts.append(Match(
460 header_file,
461 line,
462 line_no,
463 enum_const.span("enum_const"),
464 enum_const.group("enum_const")))
465
466 return enum_consts
467
Gilles Peskine44801622021-11-17 20:43:35 +0100468 IGNORED_CHUNK_REGEX = re.compile('|'.join([
469 r'/\*.*?\*/', # block comment entirely on one line
470 r'//.*', # line comment
471 r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal
472 ]))
473
Gilles Peskinedf306652021-11-17 20:32:31 +0100474 def strip_comments_and_literals(self, line, in_block_comment):
475 """Strip comments and string literals from line.
476
477 Continuation lines are not supported.
478
479 If in_block_comment is true, assume that the line starts inside a
480 block comment.
481
482 Return updated values of (line, in_block_comment) where:
483 * Comments in line have been replaced by a space (or nothing at the
484 start or end of the line).
485 * String contents have been removed.
486 * in_block_comment indicates whether the line ends inside a block
487 comment that continues on the next line.
488 """
Gilles Peskine23b40962021-11-17 20:45:39 +0100489
490 # Terminate current multiline comment?
Gilles Peskinedf306652021-11-17 20:32:31 +0100491 if in_block_comment:
Gilles Peskine23b40962021-11-17 20:45:39 +0100492 m = re.search(r"\*/", line)
493 if m:
494 in_block_comment = False
495 line = line[m.end(0):]
496 else:
497 return '', True
Gilles Peskine44801622021-11-17 20:43:35 +0100498
499 # Remove full comments and string literals.
500 # Do it all together to handle cases like "/*" correctly.
501 # Note that continuation lines are not supported.
502 line = re.sub(self.IGNORED_CHUNK_REGEX,
503 lambda s: '""' if s.group('string') else ' ',
Gilles Peskinedf306652021-11-17 20:32:31 +0100504 line)
Gilles Peskine44801622021-11-17 20:43:35 +0100505
Gilles Peskinedf306652021-11-17 20:32:31 +0100506 # Start an unfinished comment?
Gilles Peskine44801622021-11-17 20:43:35 +0100507 # (If `/*` was part of a complete comment, it's already been removed.)
Gilles Peskine23b40962021-11-17 20:45:39 +0100508 m = re.search(r"/\*", line)
Gilles Peskinedf306652021-11-17 20:32:31 +0100509 if m:
510 in_block_comment = True
Gilles Peskine23b40962021-11-17 20:45:39 +0100511 line = line[:m.start(0)]
Gilles Peskine44801622021-11-17 20:43:35 +0100512
Gilles Peskinedf306652021-11-17 20:32:31 +0100513 return line, in_block_comment
514
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100515 IDENTIFIER_REGEX = re.compile('|'.join([
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100516 # Match " something(a" or " *something(a". Functions.
517 # Assumptions:
518 # - function definition from return type to one of its arguments is
519 # all on one line
520 # - function definition line only contains alphanumeric, asterisk,
521 # underscore, and open bracket
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100522 r".* \**(\w+) *\( *\w",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100523 # Match "(*something)(".
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100524 r".*\( *\* *(\w+) *\) *\(",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100525 # Match names of named data structures.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100526 r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100527 # Match names of typedef instances, after closing bracket.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100528 r"}? *(\w+)[;[].*",
529 ]))
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100530 # The regex below is indented for clarity.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100531 EXCLUSION_LINES = re.compile("|".join([
532 r"extern +\"C\"",
533 r"(typedef +)?(struct|union|enum)( *{)?$",
534 r"} *;?$",
535 r"$",
536 r"//",
537 r"#",
538 ]))
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100539
540 def parse_identifiers_in_file(self, header_file, identifiers):
541 """
542 Parse all lines of a header where a function/enum/struct/union/typedef
543 identifier is declared, based on some regex and heuristics. Highly
544 dependent on formatting style.
545
546 Append found matches to the list ``identifiers``.
547 """
548
549 with open(header_file, "r", encoding="utf-8") as header:
550 in_block_comment = False
551 # The previous line variable is used for concatenating lines
552 # when identifiers are formatted and spread across multiple
553 # lines.
554 previous_line = ""
555
556 for line_no, line in enumerate(header):
Gilles Peskinedf306652021-11-17 20:32:31 +0100557 line, in_block_comment = \
558 self.strip_comments_and_literals(line, in_block_comment)
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100559
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100560 if self.EXCLUSION_LINES.match(line):
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100561 previous_line = ""
562 continue
563
564 # If the line contains only space-separated alphanumeric
Gilles Peskine4f04d612021-11-17 20:39:56 +0100565 # characters (or underscore, asterisk, or open parenthesis),
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100566 # and nothing else, high chance it's a declaration that
567 # continues on the next line
568 if re.search(r"^([\w\*\(]+\s+)+$", line):
569 previous_line += line
570 continue
571
572 # If previous line seemed to start an unfinished declaration
573 # (as above), concat and treat them as one.
574 if previous_line:
575 line = previous_line.strip() + " " + line.strip() + "\n"
576 previous_line = ""
577
578 # Skip parsing if line has a space in front = heuristic to
579 # skip function argument lines (highly subject to formatting
580 # changes)
581 if line[0] == " ":
582 continue
583
584 identifier = self.IDENTIFIER_REGEX.search(line)
585
586 if not identifier:
587 continue
588
589 # Find the group that matched, and append it
590 for group in identifier.groups():
591 if not group:
592 continue
593
594 identifiers.append(Match(
595 header_file,
596 line,
597 line_no,
598 identifier.span(),
599 group))
600
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200601 def parse_identifiers(self, include, exclude=None):
602 """
603 Parse all lines of a header where a function/enum/struct/union/typedef
604 identifier is declared, based on some regex and heuristics. Highly
605 dependent on formatting style.
606
607 Args:
608 * include: A List of glob expressions to look for files through.
609 * exclude: A List of glob expressions for excluding files.
610
611 Returns a List of Match objects with identifiers.
612 """
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200613
614 files = self.get_files(include, exclude)
615 self.log.debug("Looking for identifiers in {} files".format(len(files)))
616
617 identifiers = []
618 for header_file in files:
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100619 self.parse_identifiers_in_file(header_file, identifiers)
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200620
621 return identifiers
622
623 def parse_symbols(self):
624 """
625 Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
626 object files using nm to retrieve the list of referenced symbols.
627 Exceptions thrown here are rethrown because they would be critical
628 errors that void several tests, and thus needs to halt the program. This
629 is explicitly done for clarity.
630
631 Returns a List of unique symbols defined and used in the libraries.
632 """
633 self.log.info("Compiling...")
634 symbols = []
635
Tom Cosgrove49f99bc2022-12-04 16:44:21 +0000636 # Back up the config and atomically compile with the full configuration.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200637 shutil.copy(
Gilles Peskined47f6362021-09-27 20:12:00 +0200638 "include/mbedtls/config.h",
639 "include/mbedtls/config.h.bak"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200640 )
641 try:
642 # Use check=True in all subprocess calls so that failures are raised
643 # as exceptions and logged.
644 subprocess.run(
645 ["python3", "scripts/config.py", "full"],
646 universal_newlines=True,
647 check=True
648 )
649 my_environment = os.environ.copy()
650 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
651 # Run make clean separately to lib to prevent unwanted behavior when
652 # make is invoked with parallelism.
653 subprocess.run(
654 ["make", "clean"],
655 universal_newlines=True,
656 check=True
657 )
658 subprocess.run(
659 ["make", "lib"],
660 env=my_environment,
661 universal_newlines=True,
662 stdout=subprocess.PIPE,
663 stderr=subprocess.STDOUT,
664 check=True
665 )
666
667 # Perform object file analysis using nm
668 symbols = self.parse_symbols_from_nm([
669 "library/libmbedcrypto.a",
670 "library/libmbedtls.a",
671 "library/libmbedx509.a"
672 ])
673
674 subprocess.run(
675 ["make", "clean"],
676 universal_newlines=True,
677 check=True
678 )
679 except subprocess.CalledProcessError as error:
680 self.log.debug(error.output)
681 raise error
682 finally:
683 # Put back the original config regardless of there being errors.
684 # Works also for keyboard interrupts.
685 shutil.move(
Gilles Peskined47f6362021-09-27 20:12:00 +0200686 "include/mbedtls/config.h.bak",
687 "include/mbedtls/config.h"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200688 )
689
690 return symbols
691
692 def parse_symbols_from_nm(self, object_files):
693 """
694 Run nm to retrieve the list of referenced symbols in each object file.
695 Does not return the position data since it is of no use.
696
697 Args:
698 * object_files: a List of compiled object filepaths to search through.
699
700 Returns a List of unique symbols defined and used in any of the object
701 files.
702 """
703 nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$")
704 nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
705 exclusions = ("FStar", "Hacl")
706
707 symbols = []
708
709 # Gather all outputs of nm
710 nm_output = ""
711 for lib in object_files:
712 nm_output += subprocess.run(
713 ["nm", "-og", lib],
714 universal_newlines=True,
715 stdout=subprocess.PIPE,
716 stderr=subprocess.STDOUT,
717 check=True
718 ).stdout
719
720 for line in nm_output.splitlines():
721 if not nm_undefined_regex.search(line):
722 symbol = nm_valid_regex.search(line)
723 if (symbol and not symbol.group("symbol").startswith(exclusions)):
724 symbols.append(symbol.group("symbol"))
725 else:
726 self.log.error(line)
727
728 return symbols
729
730class NameChecker():
731 """
732 Representation of the core name checking operation performed by this script.
733 """
734 def __init__(self, parse_result, log):
735 self.parse_result = parse_result
736 self.log = log
737
738 def perform_checks(self, quiet=False):
739 """
740 A comprehensive checker that performs each check in order, and outputs
741 a final verdict.
742
743 Args:
744 * quiet: whether to hide detailed problem explanation.
745 """
746 self.log.info("=============")
747 Problem.quiet = quiet
748 problems = 0
749 problems += self.check_symbols_declared_in_header()
750
751 pattern_checks = [
752 ("macros", MACRO_PATTERN),
753 ("enum_consts", CONSTANTS_PATTERN),
754 ("identifiers", IDENTIFIER_PATTERN)
755 ]
756 for group, check_pattern in pattern_checks:
757 problems += self.check_match_pattern(group, check_pattern)
758
759 problems += self.check_for_typos()
760
761 self.log.info("=============")
762 if problems > 0:
763 self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
764 if quiet:
765 self.log.info("Remove --quiet to see explanations.")
766 else:
767 self.log.info("Use --quiet for minimal output.")
768 return 1
769 else:
770 self.log.info("PASS")
771 return 0
772
773 def check_symbols_declared_in_header(self):
774 """
775 Perform a check that all detected symbols in the library object files
776 are properly declared in headers.
777 Assumes parse_names_in_source() was called before this.
778
779 Returns the number of problems that need fixing.
780 """
781 problems = []
782
783 for symbol in self.parse_result["symbols"]:
784 found_symbol_declared = False
785 for identifier_match in self.parse_result["identifiers"]:
786 if symbol == identifier_match.name:
787 found_symbol_declared = True
788 break
789
790 if not found_symbol_declared:
791 problems.append(SymbolNotInHeader(symbol))
792
793 self.output_check_result("All symbols in header", problems)
794 return len(problems)
795
796 def check_match_pattern(self, group_to_check, check_pattern):
797 """
798 Perform a check that all items of a group conform to a regex pattern.
799 Assumes parse_names_in_source() was called before this.
800
801 Args:
802 * group_to_check: string key to index into self.parse_result.
803 * check_pattern: the regex to check against.
804
805 Returns the number of problems that need fixing.
806 """
807 problems = []
808
809 for item_match in self.parse_result[group_to_check]:
810 if not re.search(check_pattern, item_match.name):
811 problems.append(PatternMismatch(check_pattern, item_match))
812 # Double underscore should not be used for names
813 if re.search(r".*__.*", item_match.name):
814 problems.append(
815 PatternMismatch("no double underscore allowed", item_match))
816
817 self.output_check_result(
818 "Naming patterns of {}".format(group_to_check),
819 problems)
820 return len(problems)
821
822 def check_for_typos(self):
823 """
Shaun Case0e7791f2021-12-20 21:14:10 -0800824 Perform a check that all words in the source code beginning with MBED are
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200825 either defined as macros, or as enum constants.
826 Assumes parse_names_in_source() was called before this.
827
828 Returns the number of problems that need fixing.
829 """
830 problems = []
831
832 # Set comprehension, equivalent to a list comprehension wrapped by set()
833 all_caps_names = {
834 match.name
835 for match
Pengyu Lv018b2f62022-11-08 15:55:00 +0800836 in self.parse_result["macros"] +
837 self.parse_result["private_macros"] +
838 self.parse_result["enum_consts"]
839 }
Ronald Cronb814bda2021-09-13 14:50:42 +0200840 typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|"
Pengyu Lvfda7f502022-11-08 16:56:51 +0800841 r"MBEDTLS_TEST_LIBTESTDRIVER*|"
842 r"PSA_CRYPTO_DRIVER_TEST")
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200843
Pengyu Lv018b2f62022-11-08 15:55:00 +0800844 for name_match in self.parse_result["mbed_psa_words"]:
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200845 found = name_match.name in all_caps_names
846
847 # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
848 # PSA driver, they will not exist as macros. However, they
849 # should still be checked for typos using the equivalent
850 # BUILTINs that exist.
851 if "MBEDTLS_PSA_ACCEL_" in name_match.name:
852 found = name_match.name.replace(
853 "MBEDTLS_PSA_ACCEL_",
854 "MBEDTLS_PSA_BUILTIN_") in all_caps_names
855
856 if not found and not typo_exclusion.search(name_match.name):
857 problems.append(Typo(name_match))
858
859 self.output_check_result("Likely typos", problems)
860 return len(problems)
861
862 def output_check_result(self, name, problems):
863 """
864 Write out the PASS/FAIL status of a performed check depending on whether
865 there were problems.
866
867 Args:
868 * name: the name of the test
869 * problems: a List of encountered Problems
870 """
871 if problems:
872 self.log.info("{}: FAIL\n".format(name))
873 for problem in problems:
874 self.log.warning(str(problem))
875 else:
876 self.log.info("{}: PASS".format(name))
877
878def main():
879 """
880 Perform argument parsing, and create an instance of CodeParser and
881 NameChecker to begin the core operation.
882 """
883 parser = argparse.ArgumentParser(
884 formatter_class=argparse.RawDescriptionHelpFormatter,
885 description=(
886 "This script confirms that the naming of all symbols and identifiers "
887 "in Mbed TLS are consistent with the house style and are also "
888 "self-consistent.\n\n"
889 "Expected to be run from the MbedTLS root directory.")
890 )
891 parser.add_argument(
892 "-v", "--verbose",
893 action="store_true",
894 help="show parse results"
895 )
896 parser.add_argument(
897 "-q", "--quiet",
898 action="store_true",
Tom Cosgrove49f99bc2022-12-04 16:44:21 +0000899 help="hide unnecessary text, explanations, and highlights"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200900 )
901
902 args = parser.parse_args()
903
904 # Configure the global logger, which is then passed to the classes below
905 log = logging.getLogger()
906 log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
907 log.addHandler(logging.StreamHandler())
908
909 try:
910 code_parser = CodeParser(log)
911 parse_result = code_parser.comprehensive_parse()
912 except Exception: # pylint: disable=broad-except
913 traceback.print_exc()
914 sys.exit(2)
915
916 name_checker = NameChecker(parse_result, log)
917 return_code = name_checker.perform_checks(quiet=args.quiet)
918
919 sys.exit(return_code)
920
921if __name__ == "__main__":
922 main()