Blame - tests/scripts/check_names.py - mirror/mbed-tls

blob: 95dae3645f681dbbbc1b3319d55b90fb7211d23f [file] [log] [blame]

Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	1	#!/usr/bin/env python3
				2	#
				3	# Copyright The Mbed TLS Contributors
				4	# SPDX-License-Identifier: Apache-2.0
				5	#
				6	# Licensed under the Apache License, Version 2.0 (the "License"); you may
				7	# not use this file except in compliance with the License.
				8	# You may obtain a copy of the License at
				9	#
				10	# http://www.apache.org/licenses/LICENSE-2.0
				11	#
				12	# Unless required by applicable law or agreed to in writing, software
				13	# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
				14	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				15	# See the License for the specific language governing permissions and
				16	# limitations under the License.
				17
				18	"""
				19	This script confirms that the naming of all symbols and identifiers in Mbed TLS
				20	are consistent with the house style and are also self-consistent. It only runs
				21	on Linux and macOS since it depends on nm.
				22
				23	It contains two major Python classes, CodeParser and NameChecker. They both have
				24	a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
				25	but the individual functions can also be used for specific needs.
				26
				27	CodeParser makes heavy use of regular expressions to parse the code, and is
				28	dependent on the current code formatting. Many Python C parser libraries require
				29	preprocessed C code, which means no macro parsing. Compiler tools are also not
				30	very helpful when we want the exact location in the original source (which
				31	becomes impossible when e.g. comments are stripped).
				32
				33	NameChecker performs the following checks:
				34
				35	- All exported and available symbols in the library object files, are explicitly
				36	declared in the header files. This uses the nm command.
				37	- All macros, constants, and identifiers (function names, struct names, etc)
				38	follow the required regex pattern.
				39	- Typo checking: All words that begin with MBED exist as macros or constants.
				40
				41	The script returns 0 on success, 1 on test failure, and 2 if there is a script
				42	error. It must be run from Mbed TLS root.
				43	"""
				44
				45	import abc
				46	import argparse
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	47	import fnmatch
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	48	import glob
				49	import textwrap
				50	import os
				51	import sys
				52	import traceback
				53	import re
				54	import enum
				55	import shutil
				56	import subprocess
				57	import logging
				58
				59	# Naming patterns to check against. These are defined outside the NameCheck
				60	# class for ease of modification.
				61	MACRO_PATTERN = r"^(MBEDTLS\|PSA)_[0-9A-Z_]*[0-9A-Z]$"
				62	CONSTANTS_PATTERN = MACRO_PATTERN
				63	IDENTIFIER_PATTERN = r"^(mbedtls\|psa)_[0-9a-z_]*[0-9a-z]$"
				64
				65	class Match(): # pylint: disable=too-few-public-methods
				66	"""
				67	A class representing a match, together with its found position.
				68
				69	Fields:
				70	* filename: the file that the match was in.
				71	* line: the full line containing the match.
				72	* line_no: the line number.
				73	* pos: a tuple of (start, end) positions on the line where the match is.
				74	* name: the match itself.
				75	"""
				76	def __init__(self, filename, line, line_no, pos, name):
				77	# pylint: disable=too-many-arguments
				78	self.filename = filename
				79	self.line = line
				80	self.line_no = line_no
				81	self.pos = pos
				82	self.name = name
				83
				84	def __str__(self):
				85	"""
				86	Return a formatted code listing representation of the erroneous line.
				87	"""
				88	gutter = format(self.line_no, "4d")
				89	underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
				90
				91	return (
				92	" {0} \|\n".format(" " * len(gutter)) +
				93	" {0} \| {1}".format(gutter, self.line) +
				94	" {0} \| {1}\n".format(" " * len(gutter), underline)
				95	)
				96
				97	class Problem(abc.ABC): # pylint: disable=too-few-public-methods
				98	"""
				99	An abstract parent class representing a form of static analysis error.
				100	It extends an Abstract Base Class, which means it is not instantiable, and
				101	it also mandates certain abstract methods to be implemented in subclasses.
				102	"""
				103	# Class variable to control the quietness of all problems
				104	quiet = False
				105	def __init__(self):
				106	self.textwrapper = textwrap.TextWrapper()
				107	self.textwrapper.width = 80
				108	self.textwrapper.initial_indent = " > "
				109	self.textwrapper.subsequent_indent = " "
				110
				111	def __str__(self):
				112	"""
				113	Unified string representation method for all Problems.
				114	"""
				115	if self.__class__.quiet:
				116	return self.quiet_output()
				117	return self.verbose_output()
				118
				119	@abc.abstractmethod
				120	def quiet_output(self):
				121	"""
				122	The output when --quiet is enabled.
				123	"""
				124	pass
				125
				126	@abc.abstractmethod
				127	def verbose_output(self):
				128	"""
				129	The default output with explanation and code snippet if appropriate.
				130	"""
				131	pass
				132
				133	class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
				134	"""
				135	A problem that occurs when an exported/available symbol in the object file
				136	is not explicitly declared in header files. Created with
				137	NameCheck.check_symbols_declared_in_header()
				138
				139	Fields:
				140	* symbol_name: the name of the symbol.
				141	"""
				142	def __init__(self, symbol_name):
				143	self.symbol_name = symbol_name
				144	Problem.__init__(self)
				145
				146	def quiet_output(self):
				147	return "{0}".format(self.symbol_name)
				148
				149	def verbose_output(self):
				150	return self.textwrapper.fill(
				151	"'{0}' was found as an available symbol in the output of nm, "
				152	"however it was not declared in any header files."
				153	.format(self.symbol_name))
				154
				155	class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
				156	"""
				157	A problem that occurs when something doesn't match the expected pattern.
				158	Created with NameCheck.check_match_pattern()
				159
				160	Fields:
				161	* pattern: the expected regex pattern
				162	* match: the Match object in question
				163	"""
				164	def __init__(self, pattern, match):
				165	self.pattern = pattern
				166	self.match = match
				167	Problem.__init__(self)
				168
				169
				170	def quiet_output(self):
				171	return (
				172	"{0}:{1}:{2}"
				173	.format(self.match.filename, self.match.line_no, self.match.name)
				174	)
				175
				176	def verbose_output(self):
				177	return self.textwrapper.fill(
				178	"{0}:{1}: '{2}' does not match the required pattern '{3}'."
				179	.format(
				180	self.match.filename,
				181	self.match.line_no,
				182	self.match.name,
				183	self.pattern
				184	)
				185	) + "\n" + str(self.match)
				186
				187	class Typo(Problem): # pylint: disable=too-few-public-methods
				188	"""
				189	A problem that occurs when a word using MBED doesn't appear to be defined as
				190	constants nor enum values. Created with NameCheck.check_for_typos()
				191
				192	Fields:
				193	* match: the Match object of the MBED name in question.
				194	"""
				195	def __init__(self, match):
				196	self.match = match
				197	Problem.__init__(self)
				198
				199	def quiet_output(self):
				200	return (
				201	"{0}:{1}:{2}"
				202	.format(self.match.filename, self.match.line_no, self.match.name)
				203	)
				204
				205	def verbose_output(self):
				206	return self.textwrapper.fill(
				207	"{0}:{1}: '{2}' looks like a typo. It was not found in any "
				208	"macros or any enums. If this is not a typo, put "
				209	"//no-check-names after it."
				210	.format(self.match.filename, self.match.line_no, self.match.name)
				211	) + "\n" + str(self.match)
				212
				213	class CodeParser():
				214	"""
				215	Class for retrieving files and parsing the code. This can be used
				216	independently of the checks that NameChecker performs, for example for
				217	list_internal_identifiers.py.
				218	"""
				219	def __init__(self, log):
				220	self.log = log
				221	self.check_repo_path()
				222
				223	# Memo for storing "glob expression": set(filepaths)
				224	self.files = {}
				225
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	226	# Globally excluded filenames.
				227	# Note that "*" can match directory separators in exclude lists.
Gilles Peskine	d47f636	2021-09-27 20:12:00 +0200	[diff] [blame]	228	self.excluded_files = ["/bn_mul", "/compat-1.3.h"]
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	229
				230	@staticmethod
				231	def check_repo_path():
				232	"""
				233	Check that the current working directory is the project root, and throw
				234	an exception if not.
				235	"""
				236	if not all(os.path.isdir(d) for d in ["include", "library", "tests"]):
				237	raise Exception("This script must be run from Mbed TLS root")
				238
				239	def comprehensive_parse(self):
				240	"""
				241	Comprehensive ("default") function to call each parsing function and
				242	retrieve various elements of the code, together with the source location.
				243
				244	Returns a dict of parsed item key to the corresponding List of Matches.
				245	"""
				246	self.log.info("Parsing source code...")
				247	self.log.debug(
				248	"The following files are excluded from the search: {}"
				249	.format(str(self.excluded_files))
				250	)
				251
				252	all_macros = self.parse_macros([
				253	"include/mbedtls/*.h",
				254	"include/psa/*.h",
				255	"library/*.h",
				256	"tests/include/test/drivers/*.h",
				257	"3rdparty/everest/include/everest/everest.h",
				258	"3rdparty/everest/include/everest/x25519.h"
				259	])
				260	enum_consts = self.parse_enum_consts([
				261	"include/mbedtls/*.h",
				262	"library/*.h",
				263	"3rdparty/everest/include/everest/everest.h",
				264	"3rdparty/everest/include/everest/x25519.h"
				265	])
				266	identifiers = self.parse_identifiers([
				267	"include/mbedtls/*.h",
				268	"include/psa/*.h",
				269	"library/*.h",
				270	"3rdparty/everest/include/everest/everest.h",
				271	"3rdparty/everest/include/everest/x25519.h"
				272	])
				273	mbed_words = self.parse_mbed_words([
				274	"include/mbedtls/*.h",
				275	"include/psa/*.h",
				276	"library/*.h",
				277	"3rdparty/everest/include/everest/everest.h",
				278	"3rdparty/everest/include/everest/x25519.h",
				279	"library/*.c",
				280	"3rdparty/everest/library/everest.c",
				281	"3rdparty/everest/library/x25519.c"
				282	])
				283	symbols = self.parse_symbols()
				284
				285	# Remove identifier macros like mbedtls_printf or mbedtls_calloc
				286	identifiers_justname = [x.name for x in identifiers]
				287	actual_macros = []
				288	for macro in all_macros:
				289	if macro.name not in identifiers_justname:
				290	actual_macros.append(macro)
				291
				292	self.log.debug("Found:")
				293	# Aligns the counts on the assumption that none exceeds 4 digits
				294	self.log.debug(" {:4} Total Macros".format(len(all_macros)))
				295	self.log.debug(" {:4} Non-identifier Macros".format(len(actual_macros)))
				296	self.log.debug(" {:4} Enum Constants".format(len(enum_consts)))
				297	self.log.debug(" {:4} Identifiers".format(len(identifiers)))
				298	self.log.debug(" {:4} Exported Symbols".format(len(symbols)))
				299	return {
				300	"macros": actual_macros,
				301	"enum_consts": enum_consts,
				302	"identifiers": identifiers,
				303	"symbols": symbols,
				304	"mbed_words": mbed_words
				305	}
				306
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	307	def is_file_excluded(self, path, exclude_wildcards):
Gilles Peskine	1c39975	2021-09-28 10:12:49 +0200	[diff] [blame]	308	"""Whether the given file path is excluded."""
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	309	# exclude_wildcards may be None. Also, consider the global exclusions.
				310	exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
				311	for pattern in exclude_wildcards:
				312	if fnmatch.fnmatch(path, pattern):
				313	return True
				314	return False
				315
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	316	def get_files(self, include_wildcards, exclude_wildcards):
				317	"""
				318	Get all files that match any of the UNIX-style wildcards. While the
				319	check_names script is designed only for use on UNIX/macOS (due to nm),
				320	this function alone would work fine on Windows even with forward slashes
				321	in the wildcard.
				322
				323	Args:
				324	* include_wildcards: a List of shell-style wildcards to match filepaths.
				325	* exclude_wildcards: a List of shell-style wildcards to exclude.
				326
				327	Returns a List of relative filepaths.
				328	"""
				329	accumulator = set()
				330
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	331	for include_wildcard in include_wildcards:
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	332	accumulator = accumulator.union(glob.iglob(include_wildcard))
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	333
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	334	return list(path for path in accumulator
				335	if not self.is_file_excluded(path, exclude_wildcards))
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	336
				337	def parse_macros(self, include, exclude=None):
				338	"""
				339	Parse all macros defined by #define preprocessor directives.
				340
				341	Args:
				342	* include: A List of glob expressions to look for files through.
				343	* exclude: A List of glob expressions for excluding files.
				344
				345	Returns a List of Match objects for the found macros.
				346	"""
				347	macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
				348	exclusions = (
				349	"asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
				350	)
				351
				352	files = self.get_files(include, exclude)
				353	self.log.debug("Looking for macros in {} files".format(len(files)))
				354
				355	macros = []
				356	for header_file in files:
				357	with open(header_file, "r", encoding="utf-8") as header:
				358	for line_no, line in enumerate(header):
				359	for macro in macro_regex.finditer(line):
				360	if macro.group("macro").startswith(exclusions):
				361	continue
				362
				363	macros.append(Match(
				364	header_file,
				365	line,
				366	line_no,
				367	macro.span("macro"),
				368	macro.group("macro")))
				369
				370	return macros
				371
				372	def parse_mbed_words(self, include, exclude=None):
				373	"""
				374	Parse all words in the file that begin with MBED, in and out of macros,
				375	comments, anything.
				376
				377	Args:
				378	* include: A List of glob expressions to look for files through.
				379	* exclude: A List of glob expressions for excluding files.
				380
				381	Returns a List of Match objects for words beginning with MBED.
				382	"""
				383	# Typos of TLS are common, hence the broader check below than MBEDTLS.
				384	mbed_regex = re.compile(r"\bMBED.+?_[A-Z0-9_]*")
				385	exclusions = re.compile(r"// *no-check-names\|#error")
				386
				387	files = self.get_files(include, exclude)
				388	self.log.debug("Looking for MBED words in {} files".format(len(files)))
				389
				390	mbed_words = []
				391	for filename in files:
				392	with open(filename, "r", encoding="utf-8") as fp:
				393	for line_no, line in enumerate(fp):
				394	if exclusions.search(line):
				395	continue
				396
				397	for name in mbed_regex.finditer(line):
				398	mbed_words.append(Match(
				399	filename,
				400	line,
				401	line_no,
				402	name.span(0),
				403	name.group(0)))
				404
				405	return mbed_words
				406
				407	def parse_enum_consts(self, include, exclude=None):
				408	"""
				409	Parse all enum value constants that are declared.
				410
				411	Args:
				412	* include: A List of glob expressions to look for files through.
				413	* exclude: A List of glob expressions for excluding files.
				414
				415	Returns a List of Match objects for the findings.
				416	"""
				417	files = self.get_files(include, exclude)
				418	self.log.debug("Looking for enum consts in {} files".format(len(files)))
				419
				420	# Emulate a finite state machine to parse enum declarations.
				421	# OUTSIDE_KEYWORD = outside the enum keyword
				422	# IN_BRACES = inside enum opening braces
				423	# IN_BETWEEN = between enum keyword and opening braces
				424	states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
				425	enum_consts = []
				426	for header_file in files:
				427	state = states.OUTSIDE_KEYWORD
				428	with open(header_file, "r", encoding="utf-8") as header:
				429	for line_no, line in enumerate(header):
				430	# Match typedefs and brackets only when they are at the
				431	# beginning of the line -- if they are indented, they might
				432	# be sub-structures within structs, etc.
				433	if (state == states.OUTSIDE_KEYWORD and
				434	re.search(r"^(typedef +)?enum +{", line)):
				435	state = states.IN_BRACES
				436	elif (state == states.OUTSIDE_KEYWORD and
				437	re.search(r"^(typedef +)?enum", line)):
				438	state = states.IN_BETWEEN
				439	elif (state == states.IN_BETWEEN and
				440	re.search(r"^{", line)):
				441	state = states.IN_BRACES
				442	elif (state == states.IN_BRACES and
				443	re.search(r"^}", line)):
				444	state = states.OUTSIDE_KEYWORD
				445	elif (state == states.IN_BRACES and
				446	not re.search(r"^ *#", line)):
				447	enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
				448	if not enum_const:
				449	continue
				450
				451	enum_consts.append(Match(
				452	header_file,
				453	line,
				454	line_no,
				455	enum_const.span("enum_const"),
				456	enum_const.group("enum_const")))
				457
				458	return enum_consts
				459
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame^]	460	IDENTIFIER_REGEX = re.compile(
				461	# Match " something(a" or " *something(a". Functions.
				462	# Assumptions:
				463	# - function definition from return type to one of its arguments is
				464	# all on one line
				465	# - function definition line only contains alphanumeric, asterisk,
				466	# underscore, and open bracket
				467	r".* \*(\w+) \( *\w\|"
				468	# Match "(*something)(".
				469	r".$ \* (\w+) $ *\(\|"
				470	# Match names of named data structures.
				471	r"(?:typedef +)?(?:struct\|union\|enum) +(\w+)(?: *{)?$\|"
				472	# Match names of typedef instances, after closing bracket.
				473	r"}? (\w+)[;[]."
				474	)
				475	# The regex below is indented for clarity.
				476	EXCLUSION_LINES = re.compile(
				477	r"^("
				478	r"extern +\"C\"\|" # pylint: disable=bad-continuation
				479	r"(typedef +)?(struct\|union\|enum)( *{)?$\|"
				480	r"} *;?$\|"
				481	r"$\|"
				482	r"//\|"
				483	r"#"
				484	r")"
				485	)
				486
				487	def parse_identifiers_in_file(self, header_file, identifiers):
				488	"""
				489	Parse all lines of a header where a function/enum/struct/union/typedef
				490	identifier is declared, based on some regex and heuristics. Highly
				491	dependent on formatting style.
				492
				493	Append found matches to the list ``identifiers``.
				494	"""
				495
				496	with open(header_file, "r", encoding="utf-8") as header:
				497	in_block_comment = False
				498	# The previous line variable is used for concatenating lines
				499	# when identifiers are formatted and spread across multiple
				500	# lines.
				501	previous_line = ""
				502
				503	for line_no, line in enumerate(header):
				504	# Terminate current comment?
				505	if in_block_comment:
				506	line = re.sub(r".?\/", r"", line, 1)
				507	in_block_comment = False
				508	# Remove full comments and string literals
				509	line = re.sub(r'/\.?\/\|(")(?:[^\\\"]\|\\.)"',
				510	lambda s: '""' if s.group(1) else ' ',
				511	line)
				512	# Start an unfinished comment?
				513	m = re.match(r"/\*", line)
				514	if m:
				515	in_block_comment = True
				516	line = line[:m.end(0)]
				517
				518	if self.EXCLUSION_LINES.search(line):
				519	previous_line = ""
				520	continue
				521
				522	# If the line contains only space-separated alphanumeric
				523	# characters (or underscore, asterisk, or, open bracket),
				524	# and nothing else, high chance it's a declaration that
				525	# continues on the next line
				526	if re.search(r"^([\w\*\(]+\s+)+$", line):
				527	previous_line += line
				528	continue
				529
				530	# If previous line seemed to start an unfinished declaration
				531	# (as above), concat and treat them as one.
				532	if previous_line:
				533	line = previous_line.strip() + " " + line.strip() + "\n"
				534	previous_line = ""
				535
				536	# Skip parsing if line has a space in front = heuristic to
				537	# skip function argument lines (highly subject to formatting
				538	# changes)
				539	if line[0] == " ":
				540	continue
				541
				542	identifier = self.IDENTIFIER_REGEX.search(line)
				543
				544	if not identifier:
				545	continue
				546
				547	# Find the group that matched, and append it
				548	for group in identifier.groups():
				549	if not group:
				550	continue
				551
				552	identifiers.append(Match(
				553	header_file,
				554	line,
				555	line_no,
				556	identifier.span(),
				557	group))
				558
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	559	def parse_identifiers(self, include, exclude=None):
				560	"""
				561	Parse all lines of a header where a function/enum/struct/union/typedef
				562	identifier is declared, based on some regex and heuristics. Highly
				563	dependent on formatting style.
				564
				565	Args:
				566	* include: A List of glob expressions to look for files through.
				567	* exclude: A List of glob expressions for excluding files.
				568
				569	Returns a List of Match objects with identifiers.
				570	"""
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	571
				572	files = self.get_files(include, exclude)
				573	self.log.debug("Looking for identifiers in {} files".format(len(files)))
				574
				575	identifiers = []
				576	for header_file in files:
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame^]	577	self.parse_identifiers_in_file(header_file, identifiers)
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	578
				579	return identifiers
				580
				581	def parse_symbols(self):
				582	"""
				583	Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
				584	object files using nm to retrieve the list of referenced symbols.
				585	Exceptions thrown here are rethrown because they would be critical
				586	errors that void several tests, and thus needs to halt the program. This
				587	is explicitly done for clarity.
				588
				589	Returns a List of unique symbols defined and used in the libraries.
				590	"""
				591	self.log.info("Compiling...")
				592	symbols = []
				593
				594	# Back up the config and atomically compile with the full configratuion.
				595	shutil.copy(
Gilles Peskine	d47f636	2021-09-27 20:12:00 +0200	[diff] [blame]	596	"include/mbedtls/config.h",
				597	"include/mbedtls/config.h.bak"
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	598	)
				599	try:
				600	# Use check=True in all subprocess calls so that failures are raised
				601	# as exceptions and logged.
				602	subprocess.run(
				603	["python3", "scripts/config.py", "full"],
				604	universal_newlines=True,
				605	check=True
				606	)
				607	my_environment = os.environ.copy()
				608	my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
				609	# Run make clean separately to lib to prevent unwanted behavior when
				610	# make is invoked with parallelism.
				611	subprocess.run(
				612	["make", "clean"],
				613	universal_newlines=True,
				614	check=True
				615	)
				616	subprocess.run(
				617	["make", "lib"],
				618	env=my_environment,
				619	universal_newlines=True,
				620	stdout=subprocess.PIPE,
				621	stderr=subprocess.STDOUT,
				622	check=True
				623	)
				624
				625	# Perform object file analysis using nm
				626	symbols = self.parse_symbols_from_nm([
				627	"library/libmbedcrypto.a",
				628	"library/libmbedtls.a",
				629	"library/libmbedx509.a"
				630	])
				631
				632	subprocess.run(
				633	["make", "clean"],
				634	universal_newlines=True,
				635	check=True
				636	)
				637	except subprocess.CalledProcessError as error:
				638	self.log.debug(error.output)
				639	raise error
				640	finally:
				641	# Put back the original config regardless of there being errors.
				642	# Works also for keyboard interrupts.
				643	shutil.move(
Gilles Peskine	d47f636	2021-09-27 20:12:00 +0200	[diff] [blame]	644	"include/mbedtls/config.h.bak",
				645	"include/mbedtls/config.h"
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	646	)
				647
				648	return symbols
				649
				650	def parse_symbols_from_nm(self, object_files):
				651	"""
				652	Run nm to retrieve the list of referenced symbols in each object file.
				653	Does not return the position data since it is of no use.
				654
				655	Args:
				656	* object_files: a List of compiled object filepaths to search through.
				657
				658	Returns a List of unique symbols defined and used in any of the object
				659	files.
				660	"""
				661	nm_undefined_regex = re.compile(r"^\S+: +U \|^$\|^\S+:$")
				662	nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
				663	exclusions = ("FStar", "Hacl")
				664
				665	symbols = []
				666
				667	# Gather all outputs of nm
				668	nm_output = ""
				669	for lib in object_files:
				670	nm_output += subprocess.run(
				671	["nm", "-og", lib],
				672	universal_newlines=True,
				673	stdout=subprocess.PIPE,
				674	stderr=subprocess.STDOUT,
				675	check=True
				676	).stdout
				677
				678	for line in nm_output.splitlines():
				679	if not nm_undefined_regex.search(line):
				680	symbol = nm_valid_regex.search(line)
				681	if (symbol and not symbol.group("symbol").startswith(exclusions)):
				682	symbols.append(symbol.group("symbol"))
				683	else:
				684	self.log.error(line)
				685
				686	return symbols
				687
				688	class NameChecker():
				689	"""
				690	Representation of the core name checking operation performed by this script.
				691	"""
				692	def __init__(self, parse_result, log):
				693	self.parse_result = parse_result
				694	self.log = log
				695
				696	def perform_checks(self, quiet=False):
				697	"""
				698	A comprehensive checker that performs each check in order, and outputs
				699	a final verdict.
				700
				701	Args:
				702	* quiet: whether to hide detailed problem explanation.
				703	"""
				704	self.log.info("=============")
				705	Problem.quiet = quiet
				706	problems = 0
				707	problems += self.check_symbols_declared_in_header()
				708
				709	pattern_checks = [
				710	("macros", MACRO_PATTERN),
				711	("enum_consts", CONSTANTS_PATTERN),
				712	("identifiers", IDENTIFIER_PATTERN)
				713	]
				714	for group, check_pattern in pattern_checks:
				715	problems += self.check_match_pattern(group, check_pattern)
				716
				717	problems += self.check_for_typos()
				718
				719	self.log.info("=============")
				720	if problems > 0:
				721	self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
				722	if quiet:
				723	self.log.info("Remove --quiet to see explanations.")
				724	else:
				725	self.log.info("Use --quiet for minimal output.")
				726	return 1
				727	else:
				728	self.log.info("PASS")
				729	return 0
				730
				731	def check_symbols_declared_in_header(self):
				732	"""
				733	Perform a check that all detected symbols in the library object files
				734	are properly declared in headers.
				735	Assumes parse_names_in_source() was called before this.
				736
				737	Returns the number of problems that need fixing.
				738	"""
				739	problems = []
				740
				741	for symbol in self.parse_result["symbols"]:
				742	found_symbol_declared = False
				743	for identifier_match in self.parse_result["identifiers"]:
				744	if symbol == identifier_match.name:
				745	found_symbol_declared = True
				746	break
				747
				748	if not found_symbol_declared:
				749	problems.append(SymbolNotInHeader(symbol))
				750
				751	self.output_check_result("All symbols in header", problems)
				752	return len(problems)
				753
				754	def check_match_pattern(self, group_to_check, check_pattern):
				755	"""
				756	Perform a check that all items of a group conform to a regex pattern.
				757	Assumes parse_names_in_source() was called before this.
				758
				759	Args:
				760	* group_to_check: string key to index into self.parse_result.
				761	* check_pattern: the regex to check against.
				762
				763	Returns the number of problems that need fixing.
				764	"""
				765	problems = []
				766
				767	for item_match in self.parse_result[group_to_check]:
				768	if not re.search(check_pattern, item_match.name):
				769	problems.append(PatternMismatch(check_pattern, item_match))
				770	# Double underscore should not be used for names
				771	if re.search(r".__.", item_match.name):
				772	problems.append(
				773	PatternMismatch("no double underscore allowed", item_match))
				774
				775	self.output_check_result(
				776	"Naming patterns of {}".format(group_to_check),
				777	problems)
				778	return len(problems)
				779
				780	def check_for_typos(self):
				781	"""
				782	Perform a check that all words in the soure code beginning with MBED are
				783	either defined as macros, or as enum constants.
				784	Assumes parse_names_in_source() was called before this.
				785
				786	Returns the number of problems that need fixing.
				787	"""
				788	problems = []
				789
				790	# Set comprehension, equivalent to a list comprehension wrapped by set()
				791	all_caps_names = {
				792	match.name
				793	for match
				794	in self.parse_result["macros"] + self.parse_result["enum_consts"]}
				795	typo_exclusion = re.compile(r"XXX\|__\|_$\|^MBEDTLS_.*CONFIG_FILE$")
				796
				797	for name_match in self.parse_result["mbed_words"]:
				798	found = name_match.name in all_caps_names
				799
				800	# Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
				801	# PSA driver, they will not exist as macros. However, they
				802	# should still be checked for typos using the equivalent
				803	# BUILTINs that exist.
				804	if "MBEDTLS_PSA_ACCEL_" in name_match.name:
				805	found = name_match.name.replace(
				806	"MBEDTLS_PSA_ACCEL_",
				807	"MBEDTLS_PSA_BUILTIN_") in all_caps_names
				808
				809	if not found and not typo_exclusion.search(name_match.name):
				810	problems.append(Typo(name_match))
				811
				812	self.output_check_result("Likely typos", problems)
				813	return len(problems)
				814
				815	def output_check_result(self, name, problems):
				816	"""
				817	Write out the PASS/FAIL status of a performed check depending on whether
				818	there were problems.
				819
				820	Args:
				821	* name: the name of the test
				822	* problems: a List of encountered Problems
				823	"""
				824	if problems:
				825	self.log.info("{}: FAIL\n".format(name))
				826	for problem in problems:
				827	self.log.warning(str(problem))
				828	else:
				829	self.log.info("{}: PASS".format(name))
				830
				831	def main():
				832	"""
				833	Perform argument parsing, and create an instance of CodeParser and
				834	NameChecker to begin the core operation.
				835	"""
				836	parser = argparse.ArgumentParser(
				837	formatter_class=argparse.RawDescriptionHelpFormatter,
				838	description=(
				839	"This script confirms that the naming of all symbols and identifiers "
				840	"in Mbed TLS are consistent with the house style and are also "
				841	"self-consistent.\n\n"
				842	"Expected to be run from the MbedTLS root directory.")
				843	)
				844	parser.add_argument(
				845	"-v", "--verbose",
				846	action="store_true",
				847	help="show parse results"
				848	)
				849	parser.add_argument(
				850	"-q", "--quiet",
				851	action="store_true",
				852	help="hide unnecessary text, explanations, and highlighs"
				853	)
				854
				855	args = parser.parse_args()
				856
				857	# Configure the global logger, which is then passed to the classes below
				858	log = logging.getLogger()
				859	log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
				860	log.addHandler(logging.StreamHandler())
				861
				862	try:
				863	code_parser = CodeParser(log)
				864	parse_result = code_parser.comprehensive_parse()
				865	except Exception: # pylint: disable=broad-except
				866	traceback.print_exc()
				867	sys.exit(2)
				868
				869	name_checker = NameChecker(parse_result, log)
				870	return_code = name_checker.perform_checks(quiet=args.quiet)
				871
				872	sys.exit(return_code)
				873
				874	if __name__ == "__main__":
				875	main()