blob: 8c08e5c6f35be304d0ecff4f9b368c9d0f900fec [file] [log] [blame]
Gilles Peskine8266b5b2021-09-27 19:53:31 +02001#!/usr/bin/env python3
2#
3# Copyright The Mbed TLS Contributors
4# SPDX-License-Identifier: Apache-2.0
5#
6# Licensed under the Apache License, Version 2.0 (the "License"); you may
7# not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""
19This script confirms that the naming of all symbols and identifiers in Mbed TLS
20are consistent with the house style and are also self-consistent. It only runs
21on Linux and macOS since it depends on nm.
22
23It contains two major Python classes, CodeParser and NameChecker. They both have
24a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
25but the individual functions can also be used for specific needs.
26
27CodeParser makes heavy use of regular expressions to parse the code, and is
28dependent on the current code formatting. Many Python C parser libraries require
29preprocessed C code, which means no macro parsing. Compiler tools are also not
30very helpful when we want the exact location in the original source (which
31becomes impossible when e.g. comments are stripped).
32
33NameChecker performs the following checks:
34
35- All exported and available symbols in the library object files, are explicitly
36 declared in the header files. This uses the nm command.
37- All macros, constants, and identifiers (function names, struct names, etc)
38 follow the required regex pattern.
Pengyu Lv018b2f62022-11-08 15:55:00 +080039- Typo checking: All words that begin with MBED|PSA exist as macros or constants.
Gilles Peskine8266b5b2021-09-27 19:53:31 +020040
41The script returns 0 on success, 1 on test failure, and 2 if there is a script
42error. It must be run from Mbed TLS root.
43"""
44
45import abc
46import argparse
Gilles Peskine7bf52052021-09-27 19:20:17 +020047import fnmatch
Gilles Peskine8266b5b2021-09-27 19:53:31 +020048import glob
49import textwrap
50import os
51import sys
52import traceback
53import re
54import enum
55import shutil
56import subprocess
57import logging
58
Gilles Peskine7ff47662022-09-18 21:17:09 +020059import scripts_path # pylint: disable=unused-import
60from mbedtls_dev import build_tree
61
62
Gilles Peskine8266b5b2021-09-27 19:53:31 +020063# Naming patterns to check against. These are defined outside the NameCheck
64# class for ease of modification.
65MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$"
66CONSTANTS_PATTERN = MACRO_PATTERN
67IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
68
69class Match(): # pylint: disable=too-few-public-methods
70 """
71 A class representing a match, together with its found position.
72
73 Fields:
74 * filename: the file that the match was in.
75 * line: the full line containing the match.
76 * line_no: the line number.
77 * pos: a tuple of (start, end) positions on the line where the match is.
78 * name: the match itself.
79 """
80 def __init__(self, filename, line, line_no, pos, name):
81 # pylint: disable=too-many-arguments
82 self.filename = filename
83 self.line = line
84 self.line_no = line_no
85 self.pos = pos
86 self.name = name
87
88 def __str__(self):
89 """
90 Return a formatted code listing representation of the erroneous line.
91 """
92 gutter = format(self.line_no, "4d")
93 underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
94
95 return (
96 " {0} |\n".format(" " * len(gutter)) +
97 " {0} | {1}".format(gutter, self.line) +
98 " {0} | {1}\n".format(" " * len(gutter), underline)
99 )
100
101class Problem(abc.ABC): # pylint: disable=too-few-public-methods
102 """
103 An abstract parent class representing a form of static analysis error.
104 It extends an Abstract Base Class, which means it is not instantiable, and
105 it also mandates certain abstract methods to be implemented in subclasses.
106 """
107 # Class variable to control the quietness of all problems
108 quiet = False
109 def __init__(self):
110 self.textwrapper = textwrap.TextWrapper()
111 self.textwrapper.width = 80
112 self.textwrapper.initial_indent = " > "
113 self.textwrapper.subsequent_indent = " "
114
115 def __str__(self):
116 """
117 Unified string representation method for all Problems.
118 """
119 if self.__class__.quiet:
120 return self.quiet_output()
121 return self.verbose_output()
122
123 @abc.abstractmethod
124 def quiet_output(self):
125 """
126 The output when --quiet is enabled.
127 """
128 pass
129
130 @abc.abstractmethod
131 def verbose_output(self):
132 """
133 The default output with explanation and code snippet if appropriate.
134 """
135 pass
136
137class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
138 """
139 A problem that occurs when an exported/available symbol in the object file
140 is not explicitly declared in header files. Created with
141 NameCheck.check_symbols_declared_in_header()
142
143 Fields:
144 * symbol_name: the name of the symbol.
145 """
146 def __init__(self, symbol_name):
147 self.symbol_name = symbol_name
148 Problem.__init__(self)
149
150 def quiet_output(self):
151 return "{0}".format(self.symbol_name)
152
153 def verbose_output(self):
154 return self.textwrapper.fill(
155 "'{0}' was found as an available symbol in the output of nm, "
156 "however it was not declared in any header files."
157 .format(self.symbol_name))
158
159class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
160 """
161 A problem that occurs when something doesn't match the expected pattern.
162 Created with NameCheck.check_match_pattern()
163
164 Fields:
165 * pattern: the expected regex pattern
166 * match: the Match object in question
167 """
168 def __init__(self, pattern, match):
169 self.pattern = pattern
170 self.match = match
171 Problem.__init__(self)
172
173
174 def quiet_output(self):
175 return (
176 "{0}:{1}:{2}"
177 .format(self.match.filename, self.match.line_no, self.match.name)
178 )
179
180 def verbose_output(self):
181 return self.textwrapper.fill(
182 "{0}:{1}: '{2}' does not match the required pattern '{3}'."
183 .format(
184 self.match.filename,
185 self.match.line_no,
186 self.match.name,
187 self.pattern
188 )
189 ) + "\n" + str(self.match)
190
191class Typo(Problem): # pylint: disable=too-few-public-methods
192 """
Pengyu Lv018b2f62022-11-08 15:55:00 +0800193 A problem that occurs when a word using MBED or PSA doesn't
194 appear to be defined as constants nor enum values. Created with
195 NameCheck.check_for_typos()
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200196
197 Fields:
Pengyu Lv018b2f62022-11-08 15:55:00 +0800198 * match: the Match object of the MBED|PSA name in question.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200199 """
200 def __init__(self, match):
201 self.match = match
202 Problem.__init__(self)
203
204 def quiet_output(self):
205 return (
206 "{0}:{1}:{2}"
207 .format(self.match.filename, self.match.line_no, self.match.name)
208 )
209
210 def verbose_output(self):
211 return self.textwrapper.fill(
212 "{0}:{1}: '{2}' looks like a typo. It was not found in any "
213 "macros or any enums. If this is not a typo, put "
214 "//no-check-names after it."
215 .format(self.match.filename, self.match.line_no, self.match.name)
216 ) + "\n" + str(self.match)
217
218class CodeParser():
219 """
220 Class for retrieving files and parsing the code. This can be used
221 independently of the checks that NameChecker performs, for example for
222 list_internal_identifiers.py.
223 """
224 def __init__(self, log):
225 self.log = log
Gilles Peskine7ff47662022-09-18 21:17:09 +0200226 build_tree.check_repo_path()
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200227
228 # Memo for storing "glob expression": set(filepaths)
229 self.files = {}
230
Gilles Peskine7bf52052021-09-27 19:20:17 +0200231 # Globally excluded filenames.
232 # Note that "*" can match directory separators in exclude lists.
Gilles Peskined47f6362021-09-27 20:12:00 +0200233 self.excluded_files = ["*/bn_mul", "*/compat-1.3.h"]
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200234
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200235 def comprehensive_parse(self):
236 """
237 Comprehensive ("default") function to call each parsing function and
238 retrieve various elements of the code, together with the source location.
239
240 Returns a dict of parsed item key to the corresponding List of Matches.
241 """
242 self.log.info("Parsing source code...")
243 self.log.debug(
244 "The following files are excluded from the search: {}"
245 .format(str(self.excluded_files))
246 )
247
248 all_macros = self.parse_macros([
249 "include/mbedtls/*.h",
250 "include/psa/*.h",
251 "library/*.h",
252 "tests/include/test/drivers/*.h",
253 "3rdparty/everest/include/everest/everest.h",
254 "3rdparty/everest/include/everest/x25519.h"
255 ])
Pengyu Lv018b2f62022-11-08 15:55:00 +0800256 private_macros = self.parse_macros([
257 "library/*.c",
258 ])
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200259 enum_consts = self.parse_enum_consts([
260 "include/mbedtls/*.h",
Pengyu Lv018b2f62022-11-08 15:55:00 +0800261 "include/psa/*.h",
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200262 "library/*.h",
Pengyu Lv018b2f62022-11-08 15:55:00 +0800263 "library/*.c",
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200264 "3rdparty/everest/include/everest/everest.h",
265 "3rdparty/everest/include/everest/x25519.h"
266 ])
Aditya Deshpande94375c82023-01-25 17:00:12 +0000267 identifiers, excluded_identifiers = self.parse_identifiers([
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200268 "include/mbedtls/*.h",
269 "include/psa/*.h",
270 "library/*.h",
271 "3rdparty/everest/include/everest/everest.h",
272 "3rdparty/everest/include/everest/x25519.h"
273 ])
Pengyu Lv018b2f62022-11-08 15:55:00 +0800274 mbed_psa_words = self.parse_mbed_psa_words([
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200275 "include/mbedtls/*.h",
276 "include/psa/*.h",
277 "library/*.h",
278 "3rdparty/everest/include/everest/everest.h",
279 "3rdparty/everest/include/everest/x25519.h",
280 "library/*.c",
281 "3rdparty/everest/library/everest.c",
282 "3rdparty/everest/library/x25519.c"
283 ])
284 symbols = self.parse_symbols()
285
286 # Remove identifier macros like mbedtls_printf or mbedtls_calloc
287 identifiers_justname = [x.name for x in identifiers]
288 actual_macros = []
289 for macro in all_macros:
290 if macro.name not in identifiers_justname:
291 actual_macros.append(macro)
292
293 self.log.debug("Found:")
294 # Aligns the counts on the assumption that none exceeds 4 digits
295 self.log.debug(" {:4} Total Macros".format(len(all_macros)))
296 self.log.debug(" {:4} Non-identifier Macros".format(len(actual_macros)))
297 self.log.debug(" {:4} Enum Constants".format(len(enum_consts)))
298 self.log.debug(" {:4} Identifiers".format(len(identifiers)))
299 self.log.debug(" {:4} Exported Symbols".format(len(symbols)))
300 return {
301 "macros": actual_macros,
Pengyu Lv018b2f62022-11-08 15:55:00 +0800302 "private_macros": private_macros,
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200303 "enum_consts": enum_consts,
304 "identifiers": identifiers,
Aditya Deshpande94375c82023-01-25 17:00:12 +0000305 "excluded_identifiers": excluded_identifiers,
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200306 "symbols": symbols,
Pengyu Lv018b2f62022-11-08 15:55:00 +0800307 "mbed_psa_words": mbed_psa_words
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200308 }
309
Gilles Peskine7bf52052021-09-27 19:20:17 +0200310 def is_file_excluded(self, path, exclude_wildcards):
Gilles Peskine1c399752021-09-28 10:12:49 +0200311 """Whether the given file path is excluded."""
Gilles Peskine7bf52052021-09-27 19:20:17 +0200312 # exclude_wildcards may be None. Also, consider the global exclusions.
313 exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
314 for pattern in exclude_wildcards:
315 if fnmatch.fnmatch(path, pattern):
316 return True
317 return False
318
Aditya Deshpande94375c82023-01-25 17:00:12 +0000319 def get_all_files(self, include_wildcards, exclude_wildcards):
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200320 """
Aditya Deshpande94375c82023-01-25 17:00:12 +0000321 Get all files that match any of the included UNIX-style wildcards
322 and filter them into included and excluded lists.
323 While the check_names script is designed only for use on UNIX/macOS
324 (due to nm), this function alone will work fine on Windows even with
325 forward slashes in the wildcard.
Aditya Deshpande7d20bb42023-01-27 15:45:32 +0000326
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200327 Args:
328 * include_wildcards: a List of shell-style wildcards to match filepaths.
329 * exclude_wildcards: a List of shell-style wildcards to exclude.
Aditya Deshpande7d20bb42023-01-27 15:45:32 +0000330
Aditya Deshpande94375c82023-01-25 17:00:12 +0000331 Returns:
332 * inc_files: A List of relative filepaths for included files.
333 * exc_files: A List of relative filepaths for excluded files.
334 """
335 accumulator = set()
336 all_wildcards = include_wildcards + (exclude_wildcards or [])
337 for wildcard in all_wildcards:
338 accumulator = accumulator.union(glob.iglob(wildcard))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200339
Aditya Deshpande94375c82023-01-25 17:00:12 +0000340 inc_files = []
341 exc_files = []
342 for path in accumulator:
343 if self.is_file_excluded(path, exclude_wildcards):
344 exc_files.append(path)
345 else:
346 inc_files.append(path)
347 return (inc_files, exc_files)
348
349 def get_included_files(self, include_wildcards, exclude_wildcards):
350 """
351 Get all files that match any of the included UNIX-style wildcards.
352 While the check_names script is designed only for use on UNIX/macOS
353 (due to nm), this function alone will work fine on Windows even with
354 forward slashes in the wildcard.
Aditya Deshpande7d20bb42023-01-27 15:45:32 +0000355
Aditya Deshpande94375c82023-01-25 17:00:12 +0000356 Args:
357 * include_wildcards: a List of shell-style wildcards to match filepaths.
358 * exclude_wildcards: a List of shell-style wildcards to exclude.
Aditya Deshpande7d20bb42023-01-27 15:45:32 +0000359
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200360 Returns a List of relative filepaths.
361 """
362 accumulator = set()
363
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200364 for include_wildcard in include_wildcards:
Gilles Peskine7bf52052021-09-27 19:20:17 +0200365 accumulator = accumulator.union(glob.iglob(include_wildcard))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200366
Gilles Peskine7bf52052021-09-27 19:20:17 +0200367 return list(path for path in accumulator
368 if not self.is_file_excluded(path, exclude_wildcards))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200369
370 def parse_macros(self, include, exclude=None):
371 """
372 Parse all macros defined by #define preprocessor directives.
373
374 Args:
375 * include: A List of glob expressions to look for files through.
376 * exclude: A List of glob expressions for excluding files.
377
378 Returns a List of Match objects for the found macros.
379 """
380 macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
381 exclusions = (
382 "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
383 )
384
Aditya Deshpande94375c82023-01-25 17:00:12 +0000385 files = self.get_included_files(include, exclude)
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200386 self.log.debug("Looking for macros in {} files".format(len(files)))
387
388 macros = []
389 for header_file in files:
390 with open(header_file, "r", encoding="utf-8") as header:
391 for line_no, line in enumerate(header):
392 for macro in macro_regex.finditer(line):
393 if macro.group("macro").startswith(exclusions):
394 continue
395
396 macros.append(Match(
397 header_file,
398 line,
399 line_no,
400 macro.span("macro"),
401 macro.group("macro")))
402
403 return macros
404
Pengyu Lv018b2f62022-11-08 15:55:00 +0800405 def parse_mbed_psa_words(self, include, exclude=None):
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200406 """
Pengyu Lv018b2f62022-11-08 15:55:00 +0800407 Parse all words in the file that begin with MBED|PSA, in and out of
408 macros, comments, anything.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200409
410 Args:
411 * include: A List of glob expressions to look for files through.
412 * exclude: A List of glob expressions for excluding files.
413
Pengyu Lv018b2f62022-11-08 15:55:00 +0800414 Returns a List of Match objects for words beginning with MBED|PSA.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200415 """
416 # Typos of TLS are common, hence the broader check below than MBEDTLS.
Pengyu Lv018b2f62022-11-08 15:55:00 +0800417 mbed_regex = re.compile(r"\b(MBED.+?|PSA)_[A-Z0-9_]*")
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200418 exclusions = re.compile(r"// *no-check-names|#error")
419
Aditya Deshpande94375c82023-01-25 17:00:12 +0000420 files = self.get_included_files(include, exclude)
Pengyu Lv018b2f62022-11-08 15:55:00 +0800421 self.log.debug(
422 "Looking for MBED|PSA words in {} files"
423 .format(len(files))
424 )
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200425
Pengyu Lv018b2f62022-11-08 15:55:00 +0800426 mbed_psa_words = []
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200427 for filename in files:
428 with open(filename, "r", encoding="utf-8") as fp:
429 for line_no, line in enumerate(fp):
430 if exclusions.search(line):
431 continue
432
433 for name in mbed_regex.finditer(line):
Pengyu Lv018b2f62022-11-08 15:55:00 +0800434 mbed_psa_words.append(Match(
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200435 filename,
436 line,
437 line_no,
438 name.span(0),
439 name.group(0)))
440
Pengyu Lv018b2f62022-11-08 15:55:00 +0800441 return mbed_psa_words
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200442
443 def parse_enum_consts(self, include, exclude=None):
444 """
445 Parse all enum value constants that are declared.
446
447 Args:
448 * include: A List of glob expressions to look for files through.
449 * exclude: A List of glob expressions for excluding files.
450
451 Returns a List of Match objects for the findings.
452 """
Aditya Deshpande94375c82023-01-25 17:00:12 +0000453 files = self.get_included_files(include, exclude)
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200454 self.log.debug("Looking for enum consts in {} files".format(len(files)))
455
456 # Emulate a finite state machine to parse enum declarations.
457 # OUTSIDE_KEYWORD = outside the enum keyword
458 # IN_BRACES = inside enum opening braces
459 # IN_BETWEEN = between enum keyword and opening braces
460 states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
461 enum_consts = []
462 for header_file in files:
463 state = states.OUTSIDE_KEYWORD
464 with open(header_file, "r", encoding="utf-8") as header:
465 for line_no, line in enumerate(header):
466 # Match typedefs and brackets only when they are at the
467 # beginning of the line -- if they are indented, they might
468 # be sub-structures within structs, etc.
David Horstmanne1e776c2022-12-16 13:39:04 +0000469 optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200470 if (state == states.OUTSIDE_KEYWORD and
David Horstmanne1e776c2022-12-16 13:39:04 +0000471 re.search(r"^(typedef +)?enum " + \
472 optional_c_identifier + \
473 r" *{", line)):
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200474 state = states.IN_BRACES
475 elif (state == states.OUTSIDE_KEYWORD and
476 re.search(r"^(typedef +)?enum", line)):
477 state = states.IN_BETWEEN
478 elif (state == states.IN_BETWEEN and
479 re.search(r"^{", line)):
480 state = states.IN_BRACES
481 elif (state == states.IN_BRACES and
482 re.search(r"^}", line)):
483 state = states.OUTSIDE_KEYWORD
484 elif (state == states.IN_BRACES and
485 not re.search(r"^ *#", line)):
486 enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
487 if not enum_const:
488 continue
489
490 enum_consts.append(Match(
491 header_file,
492 line,
493 line_no,
494 enum_const.span("enum_const"),
495 enum_const.group("enum_const")))
496
497 return enum_consts
498
Gilles Peskine44801622021-11-17 20:43:35 +0100499 IGNORED_CHUNK_REGEX = re.compile('|'.join([
500 r'/\*.*?\*/', # block comment entirely on one line
501 r'//.*', # line comment
502 r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal
503 ]))
504
Gilles Peskinedf306652021-11-17 20:32:31 +0100505 def strip_comments_and_literals(self, line, in_block_comment):
506 """Strip comments and string literals from line.
507
508 Continuation lines are not supported.
509
510 If in_block_comment is true, assume that the line starts inside a
511 block comment.
512
513 Return updated values of (line, in_block_comment) where:
514 * Comments in line have been replaced by a space (or nothing at the
515 start or end of the line).
516 * String contents have been removed.
517 * in_block_comment indicates whether the line ends inside a block
518 comment that continues on the next line.
519 """
Gilles Peskine23b40962021-11-17 20:45:39 +0100520
521 # Terminate current multiline comment?
Gilles Peskinedf306652021-11-17 20:32:31 +0100522 if in_block_comment:
Gilles Peskine23b40962021-11-17 20:45:39 +0100523 m = re.search(r"\*/", line)
524 if m:
525 in_block_comment = False
526 line = line[m.end(0):]
527 else:
528 return '', True
Gilles Peskine44801622021-11-17 20:43:35 +0100529
530 # Remove full comments and string literals.
531 # Do it all together to handle cases like "/*" correctly.
532 # Note that continuation lines are not supported.
533 line = re.sub(self.IGNORED_CHUNK_REGEX,
534 lambda s: '""' if s.group('string') else ' ',
Gilles Peskinedf306652021-11-17 20:32:31 +0100535 line)
Gilles Peskine44801622021-11-17 20:43:35 +0100536
Gilles Peskinedf306652021-11-17 20:32:31 +0100537 # Start an unfinished comment?
Gilles Peskine44801622021-11-17 20:43:35 +0100538 # (If `/*` was part of a complete comment, it's already been removed.)
Gilles Peskine23b40962021-11-17 20:45:39 +0100539 m = re.search(r"/\*", line)
Gilles Peskinedf306652021-11-17 20:32:31 +0100540 if m:
541 in_block_comment = True
Gilles Peskine23b40962021-11-17 20:45:39 +0100542 line = line[:m.start(0)]
Gilles Peskine44801622021-11-17 20:43:35 +0100543
Gilles Peskinedf306652021-11-17 20:32:31 +0100544 return line, in_block_comment
545
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100546 IDENTIFIER_REGEX = re.compile('|'.join([
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100547 # Match " something(a" or " *something(a". Functions.
548 # Assumptions:
549 # - function definition from return type to one of its arguments is
550 # all on one line
551 # - function definition line only contains alphanumeric, asterisk,
552 # underscore, and open bracket
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100553 r".* \**(\w+) *\( *\w",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100554 # Match "(*something)(".
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100555 r".*\( *\* *(\w+) *\) *\(",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100556 # Match names of named data structures.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100557 r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$",
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100558 # Match names of typedef instances, after closing bracket.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100559 r"}? *(\w+)[;[].*",
560 ]))
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100561 # The regex below is indented for clarity.
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100562 EXCLUSION_LINES = re.compile("|".join([
563 r"extern +\"C\"",
564 r"(typedef +)?(struct|union|enum)( *{)?$",
565 r"} *;?$",
566 r"$",
567 r"//",
568 r"#",
569 ]))
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100570
571 def parse_identifiers_in_file(self, header_file, identifiers):
572 """
573 Parse all lines of a header where a function/enum/struct/union/typedef
574 identifier is declared, based on some regex and heuristics. Highly
575 dependent on formatting style.
576
577 Append found matches to the list ``identifiers``.
578 """
579
580 with open(header_file, "r", encoding="utf-8") as header:
581 in_block_comment = False
582 # The previous line variable is used for concatenating lines
583 # when identifiers are formatted and spread across multiple
584 # lines.
585 previous_line = ""
586
587 for line_no, line in enumerate(header):
Gilles Peskinedf306652021-11-17 20:32:31 +0100588 line, in_block_comment = \
589 self.strip_comments_and_literals(line, in_block_comment)
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100590
Gilles Peskinec8fc67f2021-11-17 20:23:18 +0100591 if self.EXCLUSION_LINES.match(line):
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100592 previous_line = ""
593 continue
594
595 # If the line contains only space-separated alphanumeric
Gilles Peskine4f04d612021-11-17 20:39:56 +0100596 # characters (or underscore, asterisk, or open parenthesis),
Gilles Peskineb3f4dd52021-11-16 20:56:47 +0100597 # and nothing else, high chance it's a declaration that
598 # continues on the next line
599 if re.search(r"^([\w\*\(]+\s+)+$", line):
600 previous_line += line
601 continue
602
603 # If previous line seemed to start an unfinished declaration
604 # (as above), concat and treat them as one.
605 if previous_line:
606 line = previous_line.strip() + " " + line.strip() + "\n"
607 previous_line = ""
608
609 # Skip parsing if line has a space in front = heuristic to
610 # skip function argument lines (highly subject to formatting
611 # changes)
612 if line[0] == " ":
613 continue
614
615 identifier = self.IDENTIFIER_REGEX.search(line)
616
617 if not identifier:
618 continue
619
620 # Find the group that matched, and append it
621 for group in identifier.groups():
622 if not group:
623 continue
624
625 identifiers.append(Match(
626 header_file,
627 line,
628 line_no,
629 identifier.span(),
630 group))
631
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200632 def parse_identifiers(self, include, exclude=None):
633 """
634 Parse all lines of a header where a function/enum/struct/union/typedef
635 identifier is declared, based on some regex and heuristics. Highly
Aditya Deshpande94375c82023-01-25 17:00:12 +0000636 dependent on formatting style. Identifiers in excluded files are still
637 parsed
Aditya Deshpande7d20bb42023-01-27 15:45:32 +0000638
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200639 Args:
640 * include: A List of glob expressions to look for files through.
641 * exclude: A List of glob expressions for excluding files.
Aditya Deshpande7d20bb42023-01-27 15:45:32 +0000642
Aditya Deshpande94375c82023-01-25 17:00:12 +0000643 Returns: a Tuple of two Lists of Match objects with identifiers.
644 * included_identifiers: A List of Match objects with identifiers from
645 included files.
646 * excluded_identifiers: A List of Match objects with identifiers from
647 excluded files.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200648 """
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200649
Aditya Deshpande94375c82023-01-25 17:00:12 +0000650 included_files, excluded_files = \
651 self.get_all_files(include, exclude)
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200652
Aditya Deshpande94375c82023-01-25 17:00:12 +0000653 self.log.debug("Looking for included identifiers in {} files".format \
654 (len(included_files)))
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200655
Aditya Deshpande94375c82023-01-25 17:00:12 +0000656 included_identifiers = []
657 excluded_identifiers = []
658 for header_file in included_files:
659 self.parse_identifiers_in_file(header_file, included_identifiers)
660 for header_file in excluded_files:
661 self.parse_identifiers_in_file(header_file, excluded_identifiers)
662
663 return (included_identifiers, excluded_identifiers)
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200664
665 def parse_symbols(self):
666 """
667 Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
668 object files using nm to retrieve the list of referenced symbols.
669 Exceptions thrown here are rethrown because they would be critical
670 errors that void several tests, and thus needs to halt the program. This
671 is explicitly done for clarity.
672
673 Returns a List of unique symbols defined and used in the libraries.
674 """
675 self.log.info("Compiling...")
676 symbols = []
677
Tom Cosgrove49f99bc2022-12-04 16:44:21 +0000678 # Back up the config and atomically compile with the full configuration.
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200679 shutil.copy(
Gilles Peskined47f6362021-09-27 20:12:00 +0200680 "include/mbedtls/config.h",
681 "include/mbedtls/config.h.bak"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200682 )
683 try:
684 # Use check=True in all subprocess calls so that failures are raised
685 # as exceptions and logged.
686 subprocess.run(
687 ["python3", "scripts/config.py", "full"],
688 universal_newlines=True,
689 check=True
690 )
691 my_environment = os.environ.copy()
692 my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
693 # Run make clean separately to lib to prevent unwanted behavior when
694 # make is invoked with parallelism.
695 subprocess.run(
696 ["make", "clean"],
697 universal_newlines=True,
698 check=True
699 )
700 subprocess.run(
701 ["make", "lib"],
702 env=my_environment,
703 universal_newlines=True,
704 stdout=subprocess.PIPE,
705 stderr=subprocess.STDOUT,
706 check=True
707 )
708
709 # Perform object file analysis using nm
710 symbols = self.parse_symbols_from_nm([
711 "library/libmbedcrypto.a",
712 "library/libmbedtls.a",
713 "library/libmbedx509.a"
714 ])
715
716 subprocess.run(
717 ["make", "clean"],
718 universal_newlines=True,
719 check=True
720 )
721 except subprocess.CalledProcessError as error:
722 self.log.debug(error.output)
723 raise error
724 finally:
725 # Put back the original config regardless of there being errors.
726 # Works also for keyboard interrupts.
727 shutil.move(
Gilles Peskined47f6362021-09-27 20:12:00 +0200728 "include/mbedtls/config.h.bak",
729 "include/mbedtls/config.h"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200730 )
731
732 return symbols
733
734 def parse_symbols_from_nm(self, object_files):
735 """
736 Run nm to retrieve the list of referenced symbols in each object file.
737 Does not return the position data since it is of no use.
738
739 Args:
740 * object_files: a List of compiled object filepaths to search through.
741
742 Returns a List of unique symbols defined and used in any of the object
743 files.
744 """
745 nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$")
746 nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
747 exclusions = ("FStar", "Hacl")
748
749 symbols = []
750
751 # Gather all outputs of nm
752 nm_output = ""
753 for lib in object_files:
754 nm_output += subprocess.run(
755 ["nm", "-og", lib],
756 universal_newlines=True,
757 stdout=subprocess.PIPE,
758 stderr=subprocess.STDOUT,
759 check=True
760 ).stdout
761
762 for line in nm_output.splitlines():
763 if not nm_undefined_regex.search(line):
764 symbol = nm_valid_regex.search(line)
765 if (symbol and not symbol.group("symbol").startswith(exclusions)):
766 symbols.append(symbol.group("symbol"))
767 else:
768 self.log.error(line)
769
770 return symbols
771
772class NameChecker():
773 """
774 Representation of the core name checking operation performed by this script.
775 """
776 def __init__(self, parse_result, log):
777 self.parse_result = parse_result
778 self.log = log
779
780 def perform_checks(self, quiet=False):
781 """
782 A comprehensive checker that performs each check in order, and outputs
783 a final verdict.
784
785 Args:
786 * quiet: whether to hide detailed problem explanation.
787 """
788 self.log.info("=============")
789 Problem.quiet = quiet
790 problems = 0
791 problems += self.check_symbols_declared_in_header()
792
793 pattern_checks = [
794 ("macros", MACRO_PATTERN),
795 ("enum_consts", CONSTANTS_PATTERN),
796 ("identifiers", IDENTIFIER_PATTERN)
797 ]
798 for group, check_pattern in pattern_checks:
799 problems += self.check_match_pattern(group, check_pattern)
800
801 problems += self.check_for_typos()
802
803 self.log.info("=============")
804 if problems > 0:
805 self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
806 if quiet:
807 self.log.info("Remove --quiet to see explanations.")
808 else:
809 self.log.info("Use --quiet for minimal output.")
810 return 1
811 else:
812 self.log.info("PASS")
813 return 0
814
815 def check_symbols_declared_in_header(self):
816 """
817 Perform a check that all detected symbols in the library object files
818 are properly declared in headers.
819 Assumes parse_names_in_source() was called before this.
Aditya Deshpande7d20bb42023-01-27 15:45:32 +0000820
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200821 Returns the number of problems that need fixing.
822 """
823 problems = []
Aditya Deshpande94375c82023-01-25 17:00:12 +0000824 all_identifiers = self.parse_result["identifiers"] + \
825 self.parse_result["excluded_identifiers"]
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200826
827 for symbol in self.parse_result["symbols"]:
828 found_symbol_declared = False
Aditya Deshpande94375c82023-01-25 17:00:12 +0000829 for identifier_match in all_identifiers:
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200830 if symbol == identifier_match.name:
831 found_symbol_declared = True
832 break
833
834 if not found_symbol_declared:
835 problems.append(SymbolNotInHeader(symbol))
836
837 self.output_check_result("All symbols in header", problems)
838 return len(problems)
839
840 def check_match_pattern(self, group_to_check, check_pattern):
841 """
842 Perform a check that all items of a group conform to a regex pattern.
843 Assumes parse_names_in_source() was called before this.
844
845 Args:
846 * group_to_check: string key to index into self.parse_result.
847 * check_pattern: the regex to check against.
848
849 Returns the number of problems that need fixing.
850 """
851 problems = []
852
853 for item_match in self.parse_result[group_to_check]:
854 if not re.search(check_pattern, item_match.name):
855 problems.append(PatternMismatch(check_pattern, item_match))
856 # Double underscore should not be used for names
857 if re.search(r".*__.*", item_match.name):
858 problems.append(
859 PatternMismatch("no double underscore allowed", item_match))
860
861 self.output_check_result(
862 "Naming patterns of {}".format(group_to_check),
863 problems)
864 return len(problems)
865
866 def check_for_typos(self):
867 """
Shaun Case0e7791f2021-12-20 21:14:10 -0800868 Perform a check that all words in the source code beginning with MBED are
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200869 either defined as macros, or as enum constants.
870 Assumes parse_names_in_source() was called before this.
871
872 Returns the number of problems that need fixing.
873 """
874 problems = []
875
876 # Set comprehension, equivalent to a list comprehension wrapped by set()
877 all_caps_names = {
878 match.name
879 for match
Pengyu Lv018b2f62022-11-08 15:55:00 +0800880 in self.parse_result["macros"] +
881 self.parse_result["private_macros"] +
882 self.parse_result["enum_consts"]
883 }
Ronald Cronb814bda2021-09-13 14:50:42 +0200884 typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|"
Pengyu Lvfda7f502022-11-08 16:56:51 +0800885 r"MBEDTLS_TEST_LIBTESTDRIVER*|"
886 r"PSA_CRYPTO_DRIVER_TEST")
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200887
Pengyu Lv018b2f62022-11-08 15:55:00 +0800888 for name_match in self.parse_result["mbed_psa_words"]:
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200889 found = name_match.name in all_caps_names
890
891 # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
892 # PSA driver, they will not exist as macros. However, they
893 # should still be checked for typos using the equivalent
894 # BUILTINs that exist.
895 if "MBEDTLS_PSA_ACCEL_" in name_match.name:
896 found = name_match.name.replace(
897 "MBEDTLS_PSA_ACCEL_",
898 "MBEDTLS_PSA_BUILTIN_") in all_caps_names
899
900 if not found and not typo_exclusion.search(name_match.name):
901 problems.append(Typo(name_match))
902
903 self.output_check_result("Likely typos", problems)
904 return len(problems)
905
906 def output_check_result(self, name, problems):
907 """
908 Write out the PASS/FAIL status of a performed check depending on whether
909 there were problems.
910
911 Args:
912 * name: the name of the test
913 * problems: a List of encountered Problems
914 """
915 if problems:
916 self.log.info("{}: FAIL\n".format(name))
917 for problem in problems:
918 self.log.warning(str(problem))
919 else:
920 self.log.info("{}: PASS".format(name))
921
922def main():
923 """
924 Perform argument parsing, and create an instance of CodeParser and
925 NameChecker to begin the core operation.
926 """
927 parser = argparse.ArgumentParser(
928 formatter_class=argparse.RawDescriptionHelpFormatter,
929 description=(
930 "This script confirms that the naming of all symbols and identifiers "
931 "in Mbed TLS are consistent with the house style and are also "
932 "self-consistent.\n\n"
933 "Expected to be run from the MbedTLS root directory.")
934 )
935 parser.add_argument(
936 "-v", "--verbose",
937 action="store_true",
938 help="show parse results"
939 )
940 parser.add_argument(
941 "-q", "--quiet",
942 action="store_true",
Tom Cosgrove49f99bc2022-12-04 16:44:21 +0000943 help="hide unnecessary text, explanations, and highlights"
Gilles Peskine8266b5b2021-09-27 19:53:31 +0200944 )
945
946 args = parser.parse_args()
947
948 # Configure the global logger, which is then passed to the classes below
949 log = logging.getLogger()
950 log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
951 log.addHandler(logging.StreamHandler())
952
953 try:
954 code_parser = CodeParser(log)
955 parse_result = code_parser.comprehensive_parse()
956 except Exception: # pylint: disable=broad-except
957 traceback.print_exc()
958 sys.exit(2)
959
960 name_checker = NameChecker(parse_result, log)
961 return_code = name_checker.perform_checks(quiet=args.quiet)
962
963 sys.exit(return_code)
964
965if __name__ == "__main__":
966 main()