Blame - tests/scripts/check_names.py - mirror/mbed-tls

blob: 096da965257164b8b5855cebbc50fb8f0f102c5e [file] [log] [blame]

Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	1	#!/usr/bin/env python3
				2	#
				3	# Copyright The Mbed TLS Contributors
				4	# SPDX-License-Identifier: Apache-2.0
				5	#
				6	# Licensed under the Apache License, Version 2.0 (the "License"); you may
				7	# not use this file except in compliance with the License.
				8	# You may obtain a copy of the License at
				9	#
				10	# http://www.apache.org/licenses/LICENSE-2.0
				11	#
				12	# Unless required by applicable law or agreed to in writing, software
				13	# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
				14	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				15	# See the License for the specific language governing permissions and
				16	# limitations under the License.
				17
				18	"""
				19	This script confirms that the naming of all symbols and identifiers in Mbed TLS
				20	are consistent with the house style and are also self-consistent. It only runs
				21	on Linux and macOS since it depends on nm.
				22
				23	It contains two major Python classes, CodeParser and NameChecker. They both have
				24	a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
				25	but the individual functions can also be used for specific needs.
				26
				27	CodeParser makes heavy use of regular expressions to parse the code, and is
				28	dependent on the current code formatting. Many Python C parser libraries require
				29	preprocessed C code, which means no macro parsing. Compiler tools are also not
				30	very helpful when we want the exact location in the original source (which
				31	becomes impossible when e.g. comments are stripped).
				32
				33	NameChecker performs the following checks:
				34
				35	- All exported and available symbols in the library object files, are explicitly
				36	declared in the header files. This uses the nm command.
				37	- All macros, constants, and identifiers (function names, struct names, etc)
				38	follow the required regex pattern.
				39	- Typo checking: All words that begin with MBED exist as macros or constants.
				40
				41	The script returns 0 on success, 1 on test failure, and 2 if there is a script
				42	error. It must be run from Mbed TLS root.
				43	"""
				44
				45	import abc
				46	import argparse
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	47	import fnmatch
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	48	import glob
				49	import textwrap
				50	import os
				51	import sys
				52	import traceback
				53	import re
				54	import enum
				55	import shutil
				56	import subprocess
				57	import logging
				58
Gilles Peskine	7ff4766	2022-09-18 21:17:09 +0200	[diff] [blame]	59	import scripts_path # pylint: disable=unused-import
				60	from mbedtls_dev import build_tree
				61
				62
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	63	# Naming patterns to check against. These are defined outside the NameCheck
				64	# class for ease of modification.
				65	MACRO_PATTERN = r"^(MBEDTLS\|PSA)_[0-9A-Z_]*[0-9A-Z]$"
				66	CONSTANTS_PATTERN = MACRO_PATTERN
				67	IDENTIFIER_PATTERN = r"^(mbedtls\|psa)_[0-9a-z_]*[0-9a-z]$"
				68
				69	class Match(): # pylint: disable=too-few-public-methods
				70	"""
				71	A class representing a match, together with its found position.
				72
				73	Fields:
				74	* filename: the file that the match was in.
				75	* line: the full line containing the match.
				76	* line_no: the line number.
				77	* pos: a tuple of (start, end) positions on the line where the match is.
				78	* name: the match itself.
				79	"""
				80	def __init__(self, filename, line, line_no, pos, name):
				81	# pylint: disable=too-many-arguments
				82	self.filename = filename
				83	self.line = line
				84	self.line_no = line_no
				85	self.pos = pos
				86	self.name = name
				87
				88	def __str__(self):
				89	"""
				90	Return a formatted code listing representation of the erroneous line.
				91	"""
				92	gutter = format(self.line_no, "4d")
				93	underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
				94
				95	return (
				96	" {0} \|\n".format(" " * len(gutter)) +
				97	" {0} \| {1}".format(gutter, self.line) +
				98	" {0} \| {1}\n".format(" " * len(gutter), underline)
				99	)
				100
				101	class Problem(abc.ABC): # pylint: disable=too-few-public-methods
				102	"""
				103	An abstract parent class representing a form of static analysis error.
				104	It extends an Abstract Base Class, which means it is not instantiable, and
				105	it also mandates certain abstract methods to be implemented in subclasses.
				106	"""
				107	# Class variable to control the quietness of all problems
				108	quiet = False
				109	def __init__(self):
				110	self.textwrapper = textwrap.TextWrapper()
				111	self.textwrapper.width = 80
				112	self.textwrapper.initial_indent = " > "
				113	self.textwrapper.subsequent_indent = " "
				114
				115	def __str__(self):
				116	"""
				117	Unified string representation method for all Problems.
				118	"""
				119	if self.__class__.quiet:
				120	return self.quiet_output()
				121	return self.verbose_output()
				122
				123	@abc.abstractmethod
				124	def quiet_output(self):
				125	"""
				126	The output when --quiet is enabled.
				127	"""
				128	pass
				129
				130	@abc.abstractmethod
				131	def verbose_output(self):
				132	"""
				133	The default output with explanation and code snippet if appropriate.
				134	"""
				135	pass
				136
				137	class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
				138	"""
				139	A problem that occurs when an exported/available symbol in the object file
				140	is not explicitly declared in header files. Created with
				141	NameCheck.check_symbols_declared_in_header()
				142
				143	Fields:
				144	* symbol_name: the name of the symbol.
				145	"""
				146	def __init__(self, symbol_name):
				147	self.symbol_name = symbol_name
				148	Problem.__init__(self)
				149
				150	def quiet_output(self):
				151	return "{0}".format(self.symbol_name)
				152
				153	def verbose_output(self):
				154	return self.textwrapper.fill(
				155	"'{0}' was found as an available symbol in the output of nm, "
				156	"however it was not declared in any header files."
				157	.format(self.symbol_name))
				158
				159	class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
				160	"""
				161	A problem that occurs when something doesn't match the expected pattern.
				162	Created with NameCheck.check_match_pattern()
				163
				164	Fields:
				165	* pattern: the expected regex pattern
				166	* match: the Match object in question
				167	"""
				168	def __init__(self, pattern, match):
				169	self.pattern = pattern
				170	self.match = match
				171	Problem.__init__(self)
				172
				173
				174	def quiet_output(self):
				175	return (
				176	"{0}:{1}:{2}"
				177	.format(self.match.filename, self.match.line_no, self.match.name)
				178	)
				179
				180	def verbose_output(self):
				181	return self.textwrapper.fill(
				182	"{0}:{1}: '{2}' does not match the required pattern '{3}'."
				183	.format(
				184	self.match.filename,
				185	self.match.line_no,
				186	self.match.name,
				187	self.pattern
				188	)
				189	) + "\n" + str(self.match)
				190
				191	class Typo(Problem): # pylint: disable=too-few-public-methods
				192	"""
				193	A problem that occurs when a word using MBED doesn't appear to be defined as
				194	constants nor enum values. Created with NameCheck.check_for_typos()
				195
				196	Fields:
				197	* match: the Match object of the MBED name in question.
				198	"""
				199	def __init__(self, match):
				200	self.match = match
				201	Problem.__init__(self)
				202
				203	def quiet_output(self):
				204	return (
				205	"{0}:{1}:{2}"
				206	.format(self.match.filename, self.match.line_no, self.match.name)
				207	)
				208
				209	def verbose_output(self):
				210	return self.textwrapper.fill(
				211	"{0}:{1}: '{2}' looks like a typo. It was not found in any "
				212	"macros or any enums. If this is not a typo, put "
				213	"//no-check-names after it."
				214	.format(self.match.filename, self.match.line_no, self.match.name)
				215	) + "\n" + str(self.match)
				216
				217	class CodeParser():
				218	"""
				219	Class for retrieving files and parsing the code. This can be used
				220	independently of the checks that NameChecker performs, for example for
				221	list_internal_identifiers.py.
				222	"""
				223	def __init__(self, log):
				224	self.log = log
Gilles Peskine	7ff4766	2022-09-18 21:17:09 +0200	[diff] [blame]	225	build_tree.check_repo_path()
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	226
				227	# Memo for storing "glob expression": set(filepaths)
				228	self.files = {}
				229
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	230	# Globally excluded filenames.
				231	# Note that "*" can match directory separators in exclude lists.
Gilles Peskine	d47f636	2021-09-27 20:12:00 +0200	[diff] [blame]	232	self.excluded_files = ["/bn_mul", "/compat-1.3.h"]
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	233
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	234	def comprehensive_parse(self):
				235	"""
				236	Comprehensive ("default") function to call each parsing function and
				237	retrieve various elements of the code, together with the source location.
				238
				239	Returns a dict of parsed item key to the corresponding List of Matches.
				240	"""
				241	self.log.info("Parsing source code...")
				242	self.log.debug(
				243	"The following files are excluded from the search: {}"
				244	.format(str(self.excluded_files))
				245	)
				246
				247	all_macros = self.parse_macros([
				248	"include/mbedtls/*.h",
				249	"include/psa/*.h",
				250	"library/*.h",
				251	"tests/include/test/drivers/*.h",
				252	"3rdparty/everest/include/everest/everest.h",
				253	"3rdparty/everest/include/everest/x25519.h"
				254	])
				255	enum_consts = self.parse_enum_consts([
				256	"include/mbedtls/*.h",
				257	"library/*.h",
				258	"3rdparty/everest/include/everest/everest.h",
				259	"3rdparty/everest/include/everest/x25519.h"
				260	])
				261	identifiers = self.parse_identifiers([
				262	"include/mbedtls/*.h",
				263	"include/psa/*.h",
				264	"library/*.h",
				265	"3rdparty/everest/include/everest/everest.h",
				266	"3rdparty/everest/include/everest/x25519.h"
				267	])
				268	mbed_words = self.parse_mbed_words([
				269	"include/mbedtls/*.h",
				270	"include/psa/*.h",
				271	"library/*.h",
				272	"3rdparty/everest/include/everest/everest.h",
				273	"3rdparty/everest/include/everest/x25519.h",
				274	"library/*.c",
				275	"3rdparty/everest/library/everest.c",
				276	"3rdparty/everest/library/x25519.c"
				277	])
				278	symbols = self.parse_symbols()
				279
				280	# Remove identifier macros like mbedtls_printf or mbedtls_calloc
				281	identifiers_justname = [x.name for x in identifiers]
				282	actual_macros = []
				283	for macro in all_macros:
				284	if macro.name not in identifiers_justname:
				285	actual_macros.append(macro)
				286
				287	self.log.debug("Found:")
				288	# Aligns the counts on the assumption that none exceeds 4 digits
				289	self.log.debug(" {:4} Total Macros".format(len(all_macros)))
				290	self.log.debug(" {:4} Non-identifier Macros".format(len(actual_macros)))
				291	self.log.debug(" {:4} Enum Constants".format(len(enum_consts)))
				292	self.log.debug(" {:4} Identifiers".format(len(identifiers)))
				293	self.log.debug(" {:4} Exported Symbols".format(len(symbols)))
				294	return {
				295	"macros": actual_macros,
				296	"enum_consts": enum_consts,
				297	"identifiers": identifiers,
				298	"symbols": symbols,
				299	"mbed_words": mbed_words
				300	}
				301
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	302	def is_file_excluded(self, path, exclude_wildcards):
Gilles Peskine	1c39975	2021-09-28 10:12:49 +0200	[diff] [blame]	303	"""Whether the given file path is excluded."""
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	304	# exclude_wildcards may be None. Also, consider the global exclusions.
				305	exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
				306	for pattern in exclude_wildcards:
				307	if fnmatch.fnmatch(path, pattern):
				308	return True
				309	return False
				310
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	311	def get_files(self, include_wildcards, exclude_wildcards):
				312	"""
				313	Get all files that match any of the UNIX-style wildcards. While the
				314	check_names script is designed only for use on UNIX/macOS (due to nm),
				315	this function alone would work fine on Windows even with forward slashes
				316	in the wildcard.
				317
				318	Args:
				319	* include_wildcards: a List of shell-style wildcards to match filepaths.
				320	* exclude_wildcards: a List of shell-style wildcards to exclude.
				321
				322	Returns a List of relative filepaths.
				323	"""
				324	accumulator = set()
				325
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	326	for include_wildcard in include_wildcards:
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	327	accumulator = accumulator.union(glob.iglob(include_wildcard))
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	328
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	329	return list(path for path in accumulator
				330	if not self.is_file_excluded(path, exclude_wildcards))
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	331
				332	def parse_macros(self, include, exclude=None):
				333	"""
				334	Parse all macros defined by #define preprocessor directives.
				335
				336	Args:
				337	* include: A List of glob expressions to look for files through.
				338	* exclude: A List of glob expressions for excluding files.
				339
				340	Returns a List of Match objects for the found macros.
				341	"""
				342	macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
				343	exclusions = (
				344	"asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
				345	)
				346
				347	files = self.get_files(include, exclude)
				348	self.log.debug("Looking for macros in {} files".format(len(files)))
				349
				350	macros = []
				351	for header_file in files:
				352	with open(header_file, "r", encoding="utf-8") as header:
				353	for line_no, line in enumerate(header):
				354	for macro in macro_regex.finditer(line):
				355	if macro.group("macro").startswith(exclusions):
				356	continue
				357
				358	macros.append(Match(
				359	header_file,
				360	line,
				361	line_no,
				362	macro.span("macro"),
				363	macro.group("macro")))
				364
				365	return macros
				366
				367	def parse_mbed_words(self, include, exclude=None):
				368	"""
				369	Parse all words in the file that begin with MBED, in and out of macros,
				370	comments, anything.
				371
				372	Args:
				373	* include: A List of glob expressions to look for files through.
				374	* exclude: A List of glob expressions for excluding files.
				375
				376	Returns a List of Match objects for words beginning with MBED.
				377	"""
				378	# Typos of TLS are common, hence the broader check below than MBEDTLS.
				379	mbed_regex = re.compile(r"\bMBED.+?_[A-Z0-9_]*")
				380	exclusions = re.compile(r"// *no-check-names\|#error")
				381
				382	files = self.get_files(include, exclude)
				383	self.log.debug("Looking for MBED words in {} files".format(len(files)))
				384
				385	mbed_words = []
				386	for filename in files:
				387	with open(filename, "r", encoding="utf-8") as fp:
				388	for line_no, line in enumerate(fp):
				389	if exclusions.search(line):
				390	continue
				391
				392	for name in mbed_regex.finditer(line):
				393	mbed_words.append(Match(
				394	filename,
				395	line,
				396	line_no,
				397	name.span(0),
				398	name.group(0)))
				399
				400	return mbed_words
				401
				402	def parse_enum_consts(self, include, exclude=None):
				403	"""
				404	Parse all enum value constants that are declared.
				405
				406	Args:
				407	* include: A List of glob expressions to look for files through.
				408	* exclude: A List of glob expressions for excluding files.
				409
				410	Returns a List of Match objects for the findings.
				411	"""
				412	files = self.get_files(include, exclude)
				413	self.log.debug("Looking for enum consts in {} files".format(len(files)))
				414
				415	# Emulate a finite state machine to parse enum declarations.
				416	# OUTSIDE_KEYWORD = outside the enum keyword
				417	# IN_BRACES = inside enum opening braces
				418	# IN_BETWEEN = between enum keyword and opening braces
				419	states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
				420	enum_consts = []
				421	for header_file in files:
				422	state = states.OUTSIDE_KEYWORD
				423	with open(header_file, "r", encoding="utf-8") as header:
				424	for line_no, line in enumerate(header):
				425	# Match typedefs and brackets only when they are at the
				426	# beginning of the line -- if they are indented, they might
				427	# be sub-structures within structs, etc.
				428	if (state == states.OUTSIDE_KEYWORD and
				429	re.search(r"^(typedef +)?enum +{", line)):
				430	state = states.IN_BRACES
				431	elif (state == states.OUTSIDE_KEYWORD and
				432	re.search(r"^(typedef +)?enum", line)):
				433	state = states.IN_BETWEEN
				434	elif (state == states.IN_BETWEEN and
				435	re.search(r"^{", line)):
				436	state = states.IN_BRACES
				437	elif (state == states.IN_BRACES and
				438	re.search(r"^}", line)):
				439	state = states.OUTSIDE_KEYWORD
				440	elif (state == states.IN_BRACES and
				441	not re.search(r"^ *#", line)):
				442	enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
				443	if not enum_const:
				444	continue
				445
				446	enum_consts.append(Match(
				447	header_file,
				448	line,
				449	line_no,
				450	enum_const.span("enum_const"),
				451	enum_const.group("enum_const")))
				452
				453	return enum_consts
				454
Gilles Peskine	4480162	2021-11-17 20:43:35 +0100	[diff] [blame]	455	IGNORED_CHUNK_REGEX = re.compile('\|'.join([
				456	r'/\.?\*/', # block comment entirely on one line
				457	r'//.*', # line comment
				458	r'(?P<string>")(?:[^\\\"]\|\\.)*"', # string literal
				459	]))
				460
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	461	def strip_comments_and_literals(self, line, in_block_comment):
				462	"""Strip comments and string literals from line.
				463
				464	Continuation lines are not supported.
				465
				466	If in_block_comment is true, assume that the line starts inside a
				467	block comment.
				468
				469	Return updated values of (line, in_block_comment) where:
				470	* Comments in line have been replaced by a space (or nothing at the
				471	start or end of the line).
				472	* String contents have been removed.
				473	* in_block_comment indicates whether the line ends inside a block
				474	comment that continues on the next line.
				475	"""
Gilles Peskine	23b4096	2021-11-17 20:45:39 +0100	[diff] [blame]	476
				477	# Terminate current multiline comment?
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	478	if in_block_comment:
Gilles Peskine	23b4096	2021-11-17 20:45:39 +0100	[diff] [blame]	479	m = re.search(r"\*/", line)
				480	if m:
				481	in_block_comment = False
				482	line = line[m.end(0):]
				483	else:
				484	return '', True
Gilles Peskine	4480162	2021-11-17 20:43:35 +0100	[diff] [blame]	485
				486	# Remove full comments and string literals.
				487	# Do it all together to handle cases like "/*" correctly.
				488	# Note that continuation lines are not supported.
				489	line = re.sub(self.IGNORED_CHUNK_REGEX,
				490	lambda s: '""' if s.group('string') else ' ',
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	491	line)
Gilles Peskine	4480162	2021-11-17 20:43:35 +0100	[diff] [blame]	492
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	493	# Start an unfinished comment?
Gilles Peskine	4480162	2021-11-17 20:43:35 +0100	[diff] [blame]	494	# (If `/*` was part of a complete comment, it's already been removed.)
Gilles Peskine	23b4096	2021-11-17 20:45:39 +0100	[diff] [blame]	495	m = re.search(r"/\*", line)
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	496	if m:
				497	in_block_comment = True
Gilles Peskine	23b4096	2021-11-17 20:45:39 +0100	[diff] [blame]	498	line = line[:m.start(0)]
Gilles Peskine	4480162	2021-11-17 20:43:35 +0100	[diff] [blame]	499
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	500	return line, in_block_comment
				501
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	502	IDENTIFIER_REGEX = re.compile('\|'.join([
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	503	# Match " something(a" or " *something(a". Functions.
				504	# Assumptions:
				505	# - function definition from return type to one of its arguments is
				506	# all on one line
				507	# - function definition line only contains alphanumeric, asterisk,
				508	# underscore, and open bracket
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	509	r".* \*(\w+) \( *\w",
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	510	# Match "(*something)(".
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	511	r".$ \* (\w+) $ *\(",
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	512	# Match names of named data structures.
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	513	r"(?:typedef +)?(?:struct\|union\|enum) +(\w+)(?: *{)?$",
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	514	# Match names of typedef instances, after closing bracket.
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	515	r"}? (\w+)[;[].",
				516	]))
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	517	# The regex below is indented for clarity.
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	518	EXCLUSION_LINES = re.compile("\|".join([
				519	r"extern +\"C\"",
				520	r"(typedef +)?(struct\|union\|enum)( *{)?$",
				521	r"} *;?$",
				522	r"$",
				523	r"//",
				524	r"#",
				525	]))
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	526
				527	def parse_identifiers_in_file(self, header_file, identifiers):
				528	"""
				529	Parse all lines of a header where a function/enum/struct/union/typedef
				530	identifier is declared, based on some regex and heuristics. Highly
				531	dependent on formatting style.
				532
				533	Append found matches to the list ``identifiers``.
				534	"""
				535
				536	with open(header_file, "r", encoding="utf-8") as header:
				537	in_block_comment = False
				538	# The previous line variable is used for concatenating lines
				539	# when identifiers are formatted and spread across multiple
				540	# lines.
				541	previous_line = ""
				542
				543	for line_no, line in enumerate(header):
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	544	line, in_block_comment = \
				545	self.strip_comments_and_literals(line, in_block_comment)
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	546
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	547	if self.EXCLUSION_LINES.match(line):
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	548	previous_line = ""
				549	continue
				550
				551	# If the line contains only space-separated alphanumeric
Gilles Peskine	4f04d61	2021-11-17 20:39:56 +0100	[diff] [blame]	552	# characters (or underscore, asterisk, or open parenthesis),
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	553	# and nothing else, high chance it's a declaration that
				554	# continues on the next line
				555	if re.search(r"^([\w\*\(]+\s+)+$", line):
				556	previous_line += line
				557	continue
				558
				559	# If previous line seemed to start an unfinished declaration
				560	# (as above), concat and treat them as one.
				561	if previous_line:
				562	line = previous_line.strip() + " " + line.strip() + "\n"
				563	previous_line = ""
				564
				565	# Skip parsing if line has a space in front = heuristic to
				566	# skip function argument lines (highly subject to formatting
				567	# changes)
				568	if line[0] == " ":
				569	continue
				570
				571	identifier = self.IDENTIFIER_REGEX.search(line)
				572
				573	if not identifier:
				574	continue
				575
				576	# Find the group that matched, and append it
				577	for group in identifier.groups():
				578	if not group:
				579	continue
				580
				581	identifiers.append(Match(
				582	header_file,
				583	line,
				584	line_no,
				585	identifier.span(),
				586	group))
				587
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	588	def parse_identifiers(self, include, exclude=None):
				589	"""
				590	Parse all lines of a header where a function/enum/struct/union/typedef
				591	identifier is declared, based on some regex and heuristics. Highly
				592	dependent on formatting style.
				593
				594	Args:
				595	* include: A List of glob expressions to look for files through.
				596	* exclude: A List of glob expressions for excluding files.
				597
				598	Returns a List of Match objects with identifiers.
				599	"""
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	600
				601	files = self.get_files(include, exclude)
				602	self.log.debug("Looking for identifiers in {} files".format(len(files)))
				603
				604	identifiers = []
				605	for header_file in files:
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	606	self.parse_identifiers_in_file(header_file, identifiers)
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	607
				608	return identifiers
				609
				610	def parse_symbols(self):
				611	"""
				612	Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
				613	object files using nm to retrieve the list of referenced symbols.
				614	Exceptions thrown here are rethrown because they would be critical
				615	errors that void several tests, and thus needs to halt the program. This
				616	is explicitly done for clarity.
				617
				618	Returns a List of unique symbols defined and used in the libraries.
				619	"""
				620	self.log.info("Compiling...")
				621	symbols = []
				622
Tom Cosgrove	49f99bc	2022-12-04 16:44:21 +0000	[diff] [blame^]	623	# Back up the config and atomically compile with the full configuration.
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	624	shutil.copy(
Gilles Peskine	d47f636	2021-09-27 20:12:00 +0200	[diff] [blame]	625	"include/mbedtls/config.h",
				626	"include/mbedtls/config.h.bak"
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	627	)
				628	try:
				629	# Use check=True in all subprocess calls so that failures are raised
				630	# as exceptions and logged.
				631	subprocess.run(
				632	["python3", "scripts/config.py", "full"],
				633	universal_newlines=True,
				634	check=True
				635	)
				636	my_environment = os.environ.copy()
				637	my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
				638	# Run make clean separately to lib to prevent unwanted behavior when
				639	# make is invoked with parallelism.
				640	subprocess.run(
				641	["make", "clean"],
				642	universal_newlines=True,
				643	check=True
				644	)
				645	subprocess.run(
				646	["make", "lib"],
				647	env=my_environment,
				648	universal_newlines=True,
				649	stdout=subprocess.PIPE,
				650	stderr=subprocess.STDOUT,
				651	check=True
				652	)
				653
				654	# Perform object file analysis using nm
				655	symbols = self.parse_symbols_from_nm([
				656	"library/libmbedcrypto.a",
				657	"library/libmbedtls.a",
				658	"library/libmbedx509.a"
				659	])
				660
				661	subprocess.run(
				662	["make", "clean"],
				663	universal_newlines=True,
				664	check=True
				665	)
				666	except subprocess.CalledProcessError as error:
				667	self.log.debug(error.output)
				668	raise error
				669	finally:
				670	# Put back the original config regardless of there being errors.
				671	# Works also for keyboard interrupts.
				672	shutil.move(
Gilles Peskine	d47f636	2021-09-27 20:12:00 +0200	[diff] [blame]	673	"include/mbedtls/config.h.bak",
				674	"include/mbedtls/config.h"
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	675	)
				676
				677	return symbols
				678
				679	def parse_symbols_from_nm(self, object_files):
				680	"""
				681	Run nm to retrieve the list of referenced symbols in each object file.
				682	Does not return the position data since it is of no use.
				683
				684	Args:
				685	* object_files: a List of compiled object filepaths to search through.
				686
				687	Returns a List of unique symbols defined and used in any of the object
				688	files.
				689	"""
				690	nm_undefined_regex = re.compile(r"^\S+: +U \|^$\|^\S+:$")
				691	nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
				692	exclusions = ("FStar", "Hacl")
				693
				694	symbols = []
				695
				696	# Gather all outputs of nm
				697	nm_output = ""
				698	for lib in object_files:
				699	nm_output += subprocess.run(
				700	["nm", "-og", lib],
				701	universal_newlines=True,
				702	stdout=subprocess.PIPE,
				703	stderr=subprocess.STDOUT,
				704	check=True
				705	).stdout
				706
				707	for line in nm_output.splitlines():
				708	if not nm_undefined_regex.search(line):
				709	symbol = nm_valid_regex.search(line)
				710	if (symbol and not symbol.group("symbol").startswith(exclusions)):
				711	symbols.append(symbol.group("symbol"))
				712	else:
				713	self.log.error(line)
				714
				715	return symbols
				716
				717	class NameChecker():
				718	"""
				719	Representation of the core name checking operation performed by this script.
				720	"""
				721	def __init__(self, parse_result, log):
				722	self.parse_result = parse_result
				723	self.log = log
				724
				725	def perform_checks(self, quiet=False):
				726	"""
				727	A comprehensive checker that performs each check in order, and outputs
				728	a final verdict.
				729
				730	Args:
				731	* quiet: whether to hide detailed problem explanation.
				732	"""
				733	self.log.info("=============")
				734	Problem.quiet = quiet
				735	problems = 0
				736	problems += self.check_symbols_declared_in_header()
				737
				738	pattern_checks = [
				739	("macros", MACRO_PATTERN),
				740	("enum_consts", CONSTANTS_PATTERN),
				741	("identifiers", IDENTIFIER_PATTERN)
				742	]
				743	for group, check_pattern in pattern_checks:
				744	problems += self.check_match_pattern(group, check_pattern)
				745
				746	problems += self.check_for_typos()
				747
				748	self.log.info("=============")
				749	if problems > 0:
				750	self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
				751	if quiet:
				752	self.log.info("Remove --quiet to see explanations.")
				753	else:
				754	self.log.info("Use --quiet for minimal output.")
				755	return 1
				756	else:
				757	self.log.info("PASS")
				758	return 0
				759
				760	def check_symbols_declared_in_header(self):
				761	"""
				762	Perform a check that all detected symbols in the library object files
				763	are properly declared in headers.
				764	Assumes parse_names_in_source() was called before this.
				765
				766	Returns the number of problems that need fixing.
				767	"""
				768	problems = []
				769
				770	for symbol in self.parse_result["symbols"]:
				771	found_symbol_declared = False
				772	for identifier_match in self.parse_result["identifiers"]:
				773	if symbol == identifier_match.name:
				774	found_symbol_declared = True
				775	break
				776
				777	if not found_symbol_declared:
				778	problems.append(SymbolNotInHeader(symbol))
				779
				780	self.output_check_result("All symbols in header", problems)
				781	return len(problems)
				782
				783	def check_match_pattern(self, group_to_check, check_pattern):
				784	"""
				785	Perform a check that all items of a group conform to a regex pattern.
				786	Assumes parse_names_in_source() was called before this.
				787
				788	Args:
				789	* group_to_check: string key to index into self.parse_result.
				790	* check_pattern: the regex to check against.
				791
				792	Returns the number of problems that need fixing.
				793	"""
				794	problems = []
				795
				796	for item_match in self.parse_result[group_to_check]:
				797	if not re.search(check_pattern, item_match.name):
				798	problems.append(PatternMismatch(check_pattern, item_match))
				799	# Double underscore should not be used for names
				800	if re.search(r".__.", item_match.name):
				801	problems.append(
				802	PatternMismatch("no double underscore allowed", item_match))
				803
				804	self.output_check_result(
				805	"Naming patterns of {}".format(group_to_check),
				806	problems)
				807	return len(problems)
				808
				809	def check_for_typos(self):
				810	"""
Shaun Case	0e7791f	2021-12-20 21:14:10 -0800	[diff] [blame]	811	Perform a check that all words in the source code beginning with MBED are
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	812	either defined as macros, or as enum constants.
				813	Assumes parse_names_in_source() was called before this.
				814
				815	Returns the number of problems that need fixing.
				816	"""
				817	problems = []
				818
				819	# Set comprehension, equivalent to a list comprehension wrapped by set()
				820	all_caps_names = {
				821	match.name
				822	for match
				823	in self.parse_result["macros"] + self.parse_result["enum_consts"]}
Ronald Cron	b814bda	2021-09-13 14:50:42 +0200	[diff] [blame]	824	typo_exclusion = re.compile(r"XXX\|__\|_$\|^MBEDTLS_.*CONFIG_FILE$\|"
				825	r"MBEDTLS_TEST_LIBTESTDRIVER*")
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	826
				827	for name_match in self.parse_result["mbed_words"]:
				828	found = name_match.name in all_caps_names
				829
				830	# Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
				831	# PSA driver, they will not exist as macros. However, they
				832	# should still be checked for typos using the equivalent
				833	# BUILTINs that exist.
				834	if "MBEDTLS_PSA_ACCEL_" in name_match.name:
				835	found = name_match.name.replace(
				836	"MBEDTLS_PSA_ACCEL_",
				837	"MBEDTLS_PSA_BUILTIN_") in all_caps_names
				838
				839	if not found and not typo_exclusion.search(name_match.name):
				840	problems.append(Typo(name_match))
				841
				842	self.output_check_result("Likely typos", problems)
				843	return len(problems)
				844
				845	def output_check_result(self, name, problems):
				846	"""
				847	Write out the PASS/FAIL status of a performed check depending on whether
				848	there were problems.
				849
				850	Args:
				851	* name: the name of the test
				852	* problems: a List of encountered Problems
				853	"""
				854	if problems:
				855	self.log.info("{}: FAIL\n".format(name))
				856	for problem in problems:
				857	self.log.warning(str(problem))
				858	else:
				859	self.log.info("{}: PASS".format(name))
				860
				861	def main():
				862	"""
				863	Perform argument parsing, and create an instance of CodeParser and
				864	NameChecker to begin the core operation.
				865	"""
				866	parser = argparse.ArgumentParser(
				867	formatter_class=argparse.RawDescriptionHelpFormatter,
				868	description=(
				869	"This script confirms that the naming of all symbols and identifiers "
				870	"in Mbed TLS are consistent with the house style and are also "
				871	"self-consistent.\n\n"
				872	"Expected to be run from the MbedTLS root directory.")
				873	)
				874	parser.add_argument(
				875	"-v", "--verbose",
				876	action="store_true",
				877	help="show parse results"
				878	)
				879	parser.add_argument(
				880	"-q", "--quiet",
				881	action="store_true",
Tom Cosgrove	49f99bc	2022-12-04 16:44:21 +0000	[diff] [blame^]	882	help="hide unnecessary text, explanations, and highlights"
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	883	)
				884
				885	args = parser.parse_args()
				886
				887	# Configure the global logger, which is then passed to the classes below
				888	log = logging.getLogger()
				889	log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
				890	log.addHandler(logging.StreamHandler())
				891
				892	try:
				893	code_parser = CodeParser(log)
				894	parse_result = code_parser.comprehensive_parse()
				895	except Exception: # pylint: disable=broad-except
				896	traceback.print_exc()
				897	sys.exit(2)
				898
				899	name_checker = NameChecker(parse_result, log)
				900	return_code = name_checker.perform_checks(quiet=args.quiet)
				901
				902	sys.exit(return_code)
				903
				904	if __name__ == "__main__":
				905	main()