Blame - tests/scripts/check_names.py - mirror/mbed-tls

blob: 8c08e5c6f35be304d0ecff4f9b368c9d0f900fec [file] [log] [blame]

Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	1	#!/usr/bin/env python3
				2	#
				3	# Copyright The Mbed TLS Contributors
				4	# SPDX-License-Identifier: Apache-2.0
				5	#
				6	# Licensed under the Apache License, Version 2.0 (the "License"); you may
				7	# not use this file except in compliance with the License.
				8	# You may obtain a copy of the License at
				9	#
				10	# http://www.apache.org/licenses/LICENSE-2.0
				11	#
				12	# Unless required by applicable law or agreed to in writing, software
				13	# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
				14	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				15	# See the License for the specific language governing permissions and
				16	# limitations under the License.
				17
				18	"""
				19	This script confirms that the naming of all symbols and identifiers in Mbed TLS
				20	are consistent with the house style and are also self-consistent. It only runs
				21	on Linux and macOS since it depends on nm.
				22
				23	It contains two major Python classes, CodeParser and NameChecker. They both have
				24	a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
				25	but the individual functions can also be used for specific needs.
				26
				27	CodeParser makes heavy use of regular expressions to parse the code, and is
				28	dependent on the current code formatting. Many Python C parser libraries require
				29	preprocessed C code, which means no macro parsing. Compiler tools are also not
				30	very helpful when we want the exact location in the original source (which
				31	becomes impossible when e.g. comments are stripped).
				32
				33	NameChecker performs the following checks:
				34
				35	- All exported and available symbols in the library object files, are explicitly
				36	declared in the header files. This uses the nm command.
				37	- All macros, constants, and identifiers (function names, struct names, etc)
				38	follow the required regex pattern.
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	39	- Typo checking: All words that begin with MBED\|PSA exist as macros or constants.
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	40
				41	The script returns 0 on success, 1 on test failure, and 2 if there is a script
				42	error. It must be run from Mbed TLS root.
				43	"""
				44
				45	import abc
				46	import argparse
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	47	import fnmatch
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	48	import glob
				49	import textwrap
				50	import os
				51	import sys
				52	import traceback
				53	import re
				54	import enum
				55	import shutil
				56	import subprocess
				57	import logging
				58
Gilles Peskine	7ff4766	2022-09-18 21:17:09 +0200	[diff] [blame]	59	import scripts_path # pylint: disable=unused-import
				60	from mbedtls_dev import build_tree
				61
				62
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	63	# Naming patterns to check against. These are defined outside the NameCheck
				64	# class for ease of modification.
				65	MACRO_PATTERN = r"^(MBEDTLS\|PSA)_[0-9A-Z_]*[0-9A-Z]$"
				66	CONSTANTS_PATTERN = MACRO_PATTERN
				67	IDENTIFIER_PATTERN = r"^(mbedtls\|psa)_[0-9a-z_]*[0-9a-z]$"
				68
				69	class Match(): # pylint: disable=too-few-public-methods
				70	"""
				71	A class representing a match, together with its found position.
				72
				73	Fields:
				74	* filename: the file that the match was in.
				75	* line: the full line containing the match.
				76	* line_no: the line number.
				77	* pos: a tuple of (start, end) positions on the line where the match is.
				78	* name: the match itself.
				79	"""
				80	def __init__(self, filename, line, line_no, pos, name):
				81	# pylint: disable=too-many-arguments
				82	self.filename = filename
				83	self.line = line
				84	self.line_no = line_no
				85	self.pos = pos
				86	self.name = name
				87
				88	def __str__(self):
				89	"""
				90	Return a formatted code listing representation of the erroneous line.
				91	"""
				92	gutter = format(self.line_no, "4d")
				93	underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
				94
				95	return (
				96	" {0} \|\n".format(" " * len(gutter)) +
				97	" {0} \| {1}".format(gutter, self.line) +
				98	" {0} \| {1}\n".format(" " * len(gutter), underline)
				99	)
				100
				101	class Problem(abc.ABC): # pylint: disable=too-few-public-methods
				102	"""
				103	An abstract parent class representing a form of static analysis error.
				104	It extends an Abstract Base Class, which means it is not instantiable, and
				105	it also mandates certain abstract methods to be implemented in subclasses.
				106	"""
				107	# Class variable to control the quietness of all problems
				108	quiet = False
				109	def __init__(self):
				110	self.textwrapper = textwrap.TextWrapper()
				111	self.textwrapper.width = 80
				112	self.textwrapper.initial_indent = " > "
				113	self.textwrapper.subsequent_indent = " "
				114
				115	def __str__(self):
				116	"""
				117	Unified string representation method for all Problems.
				118	"""
				119	if self.__class__.quiet:
				120	return self.quiet_output()
				121	return self.verbose_output()
				122
				123	@abc.abstractmethod
				124	def quiet_output(self):
				125	"""
				126	The output when --quiet is enabled.
				127	"""
				128	pass
				129
				130	@abc.abstractmethod
				131	def verbose_output(self):
				132	"""
				133	The default output with explanation and code snippet if appropriate.
				134	"""
				135	pass
				136
				137	class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
				138	"""
				139	A problem that occurs when an exported/available symbol in the object file
				140	is not explicitly declared in header files. Created with
				141	NameCheck.check_symbols_declared_in_header()
				142
				143	Fields:
				144	* symbol_name: the name of the symbol.
				145	"""
				146	def __init__(self, symbol_name):
				147	self.symbol_name = symbol_name
				148	Problem.__init__(self)
				149
				150	def quiet_output(self):
				151	return "{0}".format(self.symbol_name)
				152
				153	def verbose_output(self):
				154	return self.textwrapper.fill(
				155	"'{0}' was found as an available symbol in the output of nm, "
				156	"however it was not declared in any header files."
				157	.format(self.symbol_name))
				158
				159	class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
				160	"""
				161	A problem that occurs when something doesn't match the expected pattern.
				162	Created with NameCheck.check_match_pattern()
				163
				164	Fields:
				165	* pattern: the expected regex pattern
				166	* match: the Match object in question
				167	"""
				168	def __init__(self, pattern, match):
				169	self.pattern = pattern
				170	self.match = match
				171	Problem.__init__(self)
				172
				173
				174	def quiet_output(self):
				175	return (
				176	"{0}:{1}:{2}"
				177	.format(self.match.filename, self.match.line_no, self.match.name)
				178	)
				179
				180	def verbose_output(self):
				181	return self.textwrapper.fill(
				182	"{0}:{1}: '{2}' does not match the required pattern '{3}'."
				183	.format(
				184	self.match.filename,
				185	self.match.line_no,
				186	self.match.name,
				187	self.pattern
				188	)
				189	) + "\n" + str(self.match)
				190
				191	class Typo(Problem): # pylint: disable=too-few-public-methods
				192	"""
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	193	A problem that occurs when a word using MBED or PSA doesn't
				194	appear to be defined as constants nor enum values. Created with
				195	NameCheck.check_for_typos()
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	196
				197	Fields:
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	198	* match: the Match object of the MBED\|PSA name in question.
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	199	"""
				200	def __init__(self, match):
				201	self.match = match
				202	Problem.__init__(self)
				203
				204	def quiet_output(self):
				205	return (
				206	"{0}:{1}:{2}"
				207	.format(self.match.filename, self.match.line_no, self.match.name)
				208	)
				209
				210	def verbose_output(self):
				211	return self.textwrapper.fill(
				212	"{0}:{1}: '{2}' looks like a typo. It was not found in any "
				213	"macros or any enums. If this is not a typo, put "
				214	"//no-check-names after it."
				215	.format(self.match.filename, self.match.line_no, self.match.name)
				216	) + "\n" + str(self.match)
				217
				218	class CodeParser():
				219	"""
				220	Class for retrieving files and parsing the code. This can be used
				221	independently of the checks that NameChecker performs, for example for
				222	list_internal_identifiers.py.
				223	"""
				224	def __init__(self, log):
				225	self.log = log
Gilles Peskine	7ff4766	2022-09-18 21:17:09 +0200	[diff] [blame]	226	build_tree.check_repo_path()
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	227
				228	# Memo for storing "glob expression": set(filepaths)
				229	self.files = {}
				230
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	231	# Globally excluded filenames.
				232	# Note that "*" can match directory separators in exclude lists.
Gilles Peskine	d47f636	2021-09-27 20:12:00 +0200	[diff] [blame]	233	self.excluded_files = ["/bn_mul", "/compat-1.3.h"]
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	234
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	235	def comprehensive_parse(self):
				236	"""
				237	Comprehensive ("default") function to call each parsing function and
				238	retrieve various elements of the code, together with the source location.
				239
				240	Returns a dict of parsed item key to the corresponding List of Matches.
				241	"""
				242	self.log.info("Parsing source code...")
				243	self.log.debug(
				244	"The following files are excluded from the search: {}"
				245	.format(str(self.excluded_files))
				246	)
				247
				248	all_macros = self.parse_macros([
				249	"include/mbedtls/*.h",
				250	"include/psa/*.h",
				251	"library/*.h",
				252	"tests/include/test/drivers/*.h",
				253	"3rdparty/everest/include/everest/everest.h",
				254	"3rdparty/everest/include/everest/x25519.h"
				255	])
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	256	private_macros = self.parse_macros([
				257	"library/*.c",
				258	])
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	259	enum_consts = self.parse_enum_consts([
				260	"include/mbedtls/*.h",
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	261	"include/psa/*.h",
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	262	"library/*.h",
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	263	"library/*.c",
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	264	"3rdparty/everest/include/everest/everest.h",
				265	"3rdparty/everest/include/everest/x25519.h"
				266	])
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	267	identifiers, excluded_identifiers = self.parse_identifiers([
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	268	"include/mbedtls/*.h",
				269	"include/psa/*.h",
				270	"library/*.h",
				271	"3rdparty/everest/include/everest/everest.h",
				272	"3rdparty/everest/include/everest/x25519.h"
				273	])
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	274	mbed_psa_words = self.parse_mbed_psa_words([
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	275	"include/mbedtls/*.h",
				276	"include/psa/*.h",
				277	"library/*.h",
				278	"3rdparty/everest/include/everest/everest.h",
				279	"3rdparty/everest/include/everest/x25519.h",
				280	"library/*.c",
				281	"3rdparty/everest/library/everest.c",
				282	"3rdparty/everest/library/x25519.c"
				283	])
				284	symbols = self.parse_symbols()
				285
				286	# Remove identifier macros like mbedtls_printf or mbedtls_calloc
				287	identifiers_justname = [x.name for x in identifiers]
				288	actual_macros = []
				289	for macro in all_macros:
				290	if macro.name not in identifiers_justname:
				291	actual_macros.append(macro)
				292
				293	self.log.debug("Found:")
				294	# Aligns the counts on the assumption that none exceeds 4 digits
				295	self.log.debug(" {:4} Total Macros".format(len(all_macros)))
				296	self.log.debug(" {:4} Non-identifier Macros".format(len(actual_macros)))
				297	self.log.debug(" {:4} Enum Constants".format(len(enum_consts)))
				298	self.log.debug(" {:4} Identifiers".format(len(identifiers)))
				299	self.log.debug(" {:4} Exported Symbols".format(len(symbols)))
				300	return {
				301	"macros": actual_macros,
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	302	"private_macros": private_macros,
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	303	"enum_consts": enum_consts,
				304	"identifiers": identifiers,
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	305	"excluded_identifiers": excluded_identifiers,
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	306	"symbols": symbols,
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	307	"mbed_psa_words": mbed_psa_words
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	308	}
				309
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	310	def is_file_excluded(self, path, exclude_wildcards):
Gilles Peskine	1c39975	2021-09-28 10:12:49 +0200	[diff] [blame]	311	"""Whether the given file path is excluded."""
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	312	# exclude_wildcards may be None. Also, consider the global exclusions.
				313	exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
				314	for pattern in exclude_wildcards:
				315	if fnmatch.fnmatch(path, pattern):
				316	return True
				317	return False
				318
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	319	def get_all_files(self, include_wildcards, exclude_wildcards):
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	320	"""
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	321	Get all files that match any of the included UNIX-style wildcards
				322	and filter them into included and excluded lists.
				323	While the check_names script is designed only for use on UNIX/macOS
				324	(due to nm), this function alone will work fine on Windows even with
				325	forward slashes in the wildcard.
Aditya Deshpande	7d20bb4	2023-01-27 15:45:32 +0000	[diff] [blame]	326
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	327	Args:
				328	* include_wildcards: a List of shell-style wildcards to match filepaths.
				329	* exclude_wildcards: a List of shell-style wildcards to exclude.
Aditya Deshpande	7d20bb4	2023-01-27 15:45:32 +0000	[diff] [blame]	330
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	331	Returns:
				332	* inc_files: A List of relative filepaths for included files.
				333	* exc_files: A List of relative filepaths for excluded files.
				334	"""
				335	accumulator = set()
				336	all_wildcards = include_wildcards + (exclude_wildcards or [])
				337	for wildcard in all_wildcards:
				338	accumulator = accumulator.union(glob.iglob(wildcard))
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	339
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	340	inc_files = []
				341	exc_files = []
				342	for path in accumulator:
				343	if self.is_file_excluded(path, exclude_wildcards):
				344	exc_files.append(path)
				345	else:
				346	inc_files.append(path)
				347	return (inc_files, exc_files)
				348
				349	def get_included_files(self, include_wildcards, exclude_wildcards):
				350	"""
				351	Get all files that match any of the included UNIX-style wildcards.
				352	While the check_names script is designed only for use on UNIX/macOS
				353	(due to nm), this function alone will work fine on Windows even with
				354	forward slashes in the wildcard.
Aditya Deshpande	7d20bb4	2023-01-27 15:45:32 +0000	[diff] [blame]	355
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	356	Args:
				357	* include_wildcards: a List of shell-style wildcards to match filepaths.
				358	* exclude_wildcards: a List of shell-style wildcards to exclude.
Aditya Deshpande	7d20bb4	2023-01-27 15:45:32 +0000	[diff] [blame]	359
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	360	Returns a List of relative filepaths.
				361	"""
				362	accumulator = set()
				363
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	364	for include_wildcard in include_wildcards:
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	365	accumulator = accumulator.union(glob.iglob(include_wildcard))
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	366
Gilles Peskine	7bf5205	2021-09-27 19:20:17 +0200	[diff] [blame]	367	return list(path for path in accumulator
				368	if not self.is_file_excluded(path, exclude_wildcards))
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	369
				370	def parse_macros(self, include, exclude=None):
				371	"""
				372	Parse all macros defined by #define preprocessor directives.
				373
				374	Args:
				375	* include: A List of glob expressions to look for files through.
				376	* exclude: A List of glob expressions for excluding files.
				377
				378	Returns a List of Match objects for the found macros.
				379	"""
				380	macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
				381	exclusions = (
				382	"asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
				383	)
				384
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	385	files = self.get_included_files(include, exclude)
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	386	self.log.debug("Looking for macros in {} files".format(len(files)))
				387
				388	macros = []
				389	for header_file in files:
				390	with open(header_file, "r", encoding="utf-8") as header:
				391	for line_no, line in enumerate(header):
				392	for macro in macro_regex.finditer(line):
				393	if macro.group("macro").startswith(exclusions):
				394	continue
				395
				396	macros.append(Match(
				397	header_file,
				398	line,
				399	line_no,
				400	macro.span("macro"),
				401	macro.group("macro")))
				402
				403	return macros
				404
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	405	def parse_mbed_psa_words(self, include, exclude=None):
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	406	"""
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	407	Parse all words in the file that begin with MBED\|PSA, in and out of
				408	macros, comments, anything.
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	409
				410	Args:
				411	* include: A List of glob expressions to look for files through.
				412	* exclude: A List of glob expressions for excluding files.
				413
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	414	Returns a List of Match objects for words beginning with MBED\|PSA.
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	415	"""
				416	# Typos of TLS are common, hence the broader check below than MBEDTLS.
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	417	mbed_regex = re.compile(r"\b(MBED.+?\|PSA)_[A-Z0-9_]*")
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	418	exclusions = re.compile(r"// *no-check-names\|#error")
				419
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	420	files = self.get_included_files(include, exclude)
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	421	self.log.debug(
				422	"Looking for MBED\|PSA words in {} files"
				423	.format(len(files))
				424	)
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	425
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	426	mbed_psa_words = []
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	427	for filename in files:
				428	with open(filename, "r", encoding="utf-8") as fp:
				429	for line_no, line in enumerate(fp):
				430	if exclusions.search(line):
				431	continue
				432
				433	for name in mbed_regex.finditer(line):
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	434	mbed_psa_words.append(Match(
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	435	filename,
				436	line,
				437	line_no,
				438	name.span(0),
				439	name.group(0)))
				440
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	441	return mbed_psa_words
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	442
				443	def parse_enum_consts(self, include, exclude=None):
				444	"""
				445	Parse all enum value constants that are declared.
				446
				447	Args:
				448	* include: A List of glob expressions to look for files through.
				449	* exclude: A List of glob expressions for excluding files.
				450
				451	Returns a List of Match objects for the findings.
				452	"""
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	453	files = self.get_included_files(include, exclude)
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	454	self.log.debug("Looking for enum consts in {} files".format(len(files)))
				455
				456	# Emulate a finite state machine to parse enum declarations.
				457	# OUTSIDE_KEYWORD = outside the enum keyword
				458	# IN_BRACES = inside enum opening braces
				459	# IN_BETWEEN = between enum keyword and opening braces
				460	states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
				461	enum_consts = []
				462	for header_file in files:
				463	state = states.OUTSIDE_KEYWORD
				464	with open(header_file, "r", encoding="utf-8") as header:
				465	for line_no, line in enumerate(header):
				466	# Match typedefs and brackets only when they are at the
				467	# beginning of the line -- if they are indented, they might
				468	# be sub-structures within structs, etc.
David Horstmann	e1e776c	2022-12-16 13:39:04 +0000	[diff] [blame]	469	optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?"
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	470	if (state == states.OUTSIDE_KEYWORD and
David Horstmann	e1e776c	2022-12-16 13:39:04 +0000	[diff] [blame]	471	re.search(r"^(typedef +)?enum " + \
				472	optional_c_identifier + \
				473	r" *{", line)):
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	474	state = states.IN_BRACES
				475	elif (state == states.OUTSIDE_KEYWORD and
				476	re.search(r"^(typedef +)?enum", line)):
				477	state = states.IN_BETWEEN
				478	elif (state == states.IN_BETWEEN and
				479	re.search(r"^{", line)):
				480	state = states.IN_BRACES
				481	elif (state == states.IN_BRACES and
				482	re.search(r"^}", line)):
				483	state = states.OUTSIDE_KEYWORD
				484	elif (state == states.IN_BRACES and
				485	not re.search(r"^ *#", line)):
				486	enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
				487	if not enum_const:
				488	continue
				489
				490	enum_consts.append(Match(
				491	header_file,
				492	line,
				493	line_no,
				494	enum_const.span("enum_const"),
				495	enum_const.group("enum_const")))
				496
				497	return enum_consts
				498
Gilles Peskine	4480162	2021-11-17 20:43:35 +0100	[diff] [blame]	499	IGNORED_CHUNK_REGEX = re.compile('\|'.join([
				500	r'/\.?\*/', # block comment entirely on one line
				501	r'//.*', # line comment
				502	r'(?P<string>")(?:[^\\\"]\|\\.)*"', # string literal
				503	]))
				504
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	505	def strip_comments_and_literals(self, line, in_block_comment):
				506	"""Strip comments and string literals from line.
				507
				508	Continuation lines are not supported.
				509
				510	If in_block_comment is true, assume that the line starts inside a
				511	block comment.
				512
				513	Return updated values of (line, in_block_comment) where:
				514	* Comments in line have been replaced by a space (or nothing at the
				515	start or end of the line).
				516	* String contents have been removed.
				517	* in_block_comment indicates whether the line ends inside a block
				518	comment that continues on the next line.
				519	"""
Gilles Peskine	23b4096	2021-11-17 20:45:39 +0100	[diff] [blame]	520
				521	# Terminate current multiline comment?
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	522	if in_block_comment:
Gilles Peskine	23b4096	2021-11-17 20:45:39 +0100	[diff] [blame]	523	m = re.search(r"\*/", line)
				524	if m:
				525	in_block_comment = False
				526	line = line[m.end(0):]
				527	else:
				528	return '', True
Gilles Peskine	4480162	2021-11-17 20:43:35 +0100	[diff] [blame]	529
				530	# Remove full comments and string literals.
				531	# Do it all together to handle cases like "/*" correctly.
				532	# Note that continuation lines are not supported.
				533	line = re.sub(self.IGNORED_CHUNK_REGEX,
				534	lambda s: '""' if s.group('string') else ' ',
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	535	line)
Gilles Peskine	4480162	2021-11-17 20:43:35 +0100	[diff] [blame]	536
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	537	# Start an unfinished comment?
Gilles Peskine	4480162	2021-11-17 20:43:35 +0100	[diff] [blame]	538	# (If `/*` was part of a complete comment, it's already been removed.)
Gilles Peskine	23b4096	2021-11-17 20:45:39 +0100	[diff] [blame]	539	m = re.search(r"/\*", line)
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	540	if m:
				541	in_block_comment = True
Gilles Peskine	23b4096	2021-11-17 20:45:39 +0100	[diff] [blame]	542	line = line[:m.start(0)]
Gilles Peskine	4480162	2021-11-17 20:43:35 +0100	[diff] [blame]	543
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	544	return line, in_block_comment
				545
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	546	IDENTIFIER_REGEX = re.compile('\|'.join([
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	547	# Match " something(a" or " *something(a". Functions.
				548	# Assumptions:
				549	# - function definition from return type to one of its arguments is
				550	# all on one line
				551	# - function definition line only contains alphanumeric, asterisk,
				552	# underscore, and open bracket
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	553	r".* \*(\w+) \( *\w",
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	554	# Match "(*something)(".
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	555	r".$ \* (\w+) $ *\(",
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	556	# Match names of named data structures.
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	557	r"(?:typedef +)?(?:struct\|union\|enum) +(\w+)(?: *{)?$",
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	558	# Match names of typedef instances, after closing bracket.
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	559	r"}? (\w+)[;[].",
				560	]))
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	561	# The regex below is indented for clarity.
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	562	EXCLUSION_LINES = re.compile("\|".join([
				563	r"extern +\"C\"",
				564	r"(typedef +)?(struct\|union\|enum)( *{)?$",
				565	r"} *;?$",
				566	r"$",
				567	r"//",
				568	r"#",
				569	]))
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	570
				571	def parse_identifiers_in_file(self, header_file, identifiers):
				572	"""
				573	Parse all lines of a header where a function/enum/struct/union/typedef
				574	identifier is declared, based on some regex and heuristics. Highly
				575	dependent on formatting style.
				576
				577	Append found matches to the list ``identifiers``.
				578	"""
				579
				580	with open(header_file, "r", encoding="utf-8") as header:
				581	in_block_comment = False
				582	# The previous line variable is used for concatenating lines
				583	# when identifiers are formatted and spread across multiple
				584	# lines.
				585	previous_line = ""
				586
				587	for line_no, line in enumerate(header):
Gilles Peskine	df30665	2021-11-17 20:32:31 +0100	[diff] [blame]	588	line, in_block_comment = \
				589	self.strip_comments_and_literals(line, in_block_comment)
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	590
Gilles Peskine	c8fc67f	2021-11-17 20:23:18 +0100	[diff] [blame]	591	if self.EXCLUSION_LINES.match(line):
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	592	previous_line = ""
				593	continue
				594
				595	# If the line contains only space-separated alphanumeric
Gilles Peskine	4f04d61	2021-11-17 20:39:56 +0100	[diff] [blame]	596	# characters (or underscore, asterisk, or open parenthesis),
Gilles Peskine	b3f4dd5	2021-11-16 20:56:47 +0100	[diff] [blame]	597	# and nothing else, high chance it's a declaration that
				598	# continues on the next line
				599	if re.search(r"^([\w\*\(]+\s+)+$", line):
				600	previous_line += line
				601	continue
				602
				603	# If previous line seemed to start an unfinished declaration
				604	# (as above), concat and treat them as one.
				605	if previous_line:
				606	line = previous_line.strip() + " " + line.strip() + "\n"
				607	previous_line = ""
				608
				609	# Skip parsing if line has a space in front = heuristic to
				610	# skip function argument lines (highly subject to formatting
				611	# changes)
				612	if line[0] == " ":
				613	continue
				614
				615	identifier = self.IDENTIFIER_REGEX.search(line)
				616
				617	if not identifier:
				618	continue
				619
				620	# Find the group that matched, and append it
				621	for group in identifier.groups():
				622	if not group:
				623	continue
				624
				625	identifiers.append(Match(
				626	header_file,
				627	line,
				628	line_no,
				629	identifier.span(),
				630	group))
				631
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	632	def parse_identifiers(self, include, exclude=None):
				633	"""
				634	Parse all lines of a header where a function/enum/struct/union/typedef
				635	identifier is declared, based on some regex and heuristics. Highly
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	636	dependent on formatting style. Identifiers in excluded files are still
				637	parsed
Aditya Deshpande	7d20bb4	2023-01-27 15:45:32 +0000	[diff] [blame]	638
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	639	Args:
				640	* include: A List of glob expressions to look for files through.
				641	* exclude: A List of glob expressions for excluding files.
Aditya Deshpande	7d20bb4	2023-01-27 15:45:32 +0000	[diff] [blame]	642
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	643	Returns: a Tuple of two Lists of Match objects with identifiers.
				644	* included_identifiers: A List of Match objects with identifiers from
				645	included files.
				646	* excluded_identifiers: A List of Match objects with identifiers from
				647	excluded files.
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	648	"""
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	649
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	650	included_files, excluded_files = \
				651	self.get_all_files(include, exclude)
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	652
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	653	self.log.debug("Looking for included identifiers in {} files".format \
				654	(len(included_files)))
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	655
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	656	included_identifiers = []
				657	excluded_identifiers = []
				658	for header_file in included_files:
				659	self.parse_identifiers_in_file(header_file, included_identifiers)
				660	for header_file in excluded_files:
				661	self.parse_identifiers_in_file(header_file, excluded_identifiers)
				662
				663	return (included_identifiers, excluded_identifiers)
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	664
				665	def parse_symbols(self):
				666	"""
				667	Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
				668	object files using nm to retrieve the list of referenced symbols.
				669	Exceptions thrown here are rethrown because they would be critical
				670	errors that void several tests, and thus needs to halt the program. This
				671	is explicitly done for clarity.
				672
				673	Returns a List of unique symbols defined and used in the libraries.
				674	"""
				675	self.log.info("Compiling...")
				676	symbols = []
				677
Tom Cosgrove	49f99bc	2022-12-04 16:44:21 +0000	[diff] [blame]	678	# Back up the config and atomically compile with the full configuration.
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	679	shutil.copy(
Gilles Peskine	d47f636	2021-09-27 20:12:00 +0200	[diff] [blame]	680	"include/mbedtls/config.h",
				681	"include/mbedtls/config.h.bak"
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	682	)
				683	try:
				684	# Use check=True in all subprocess calls so that failures are raised
				685	# as exceptions and logged.
				686	subprocess.run(
				687	["python3", "scripts/config.py", "full"],
				688	universal_newlines=True,
				689	check=True
				690	)
				691	my_environment = os.environ.copy()
				692	my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
				693	# Run make clean separately to lib to prevent unwanted behavior when
				694	# make is invoked with parallelism.
				695	subprocess.run(
				696	["make", "clean"],
				697	universal_newlines=True,
				698	check=True
				699	)
				700	subprocess.run(
				701	["make", "lib"],
				702	env=my_environment,
				703	universal_newlines=True,
				704	stdout=subprocess.PIPE,
				705	stderr=subprocess.STDOUT,
				706	check=True
				707	)
				708
				709	# Perform object file analysis using nm
				710	symbols = self.parse_symbols_from_nm([
				711	"library/libmbedcrypto.a",
				712	"library/libmbedtls.a",
				713	"library/libmbedx509.a"
				714	])
				715
				716	subprocess.run(
				717	["make", "clean"],
				718	universal_newlines=True,
				719	check=True
				720	)
				721	except subprocess.CalledProcessError as error:
				722	self.log.debug(error.output)
				723	raise error
				724	finally:
				725	# Put back the original config regardless of there being errors.
				726	# Works also for keyboard interrupts.
				727	shutil.move(
Gilles Peskine	d47f636	2021-09-27 20:12:00 +0200	[diff] [blame]	728	"include/mbedtls/config.h.bak",
				729	"include/mbedtls/config.h"
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	730	)
				731
				732	return symbols
				733
				734	def parse_symbols_from_nm(self, object_files):
				735	"""
				736	Run nm to retrieve the list of referenced symbols in each object file.
				737	Does not return the position data since it is of no use.
				738
				739	Args:
				740	* object_files: a List of compiled object filepaths to search through.
				741
				742	Returns a List of unique symbols defined and used in any of the object
				743	files.
				744	"""
				745	nm_undefined_regex = re.compile(r"^\S+: +U \|^$\|^\S+:$")
				746	nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
				747	exclusions = ("FStar", "Hacl")
				748
				749	symbols = []
				750
				751	# Gather all outputs of nm
				752	nm_output = ""
				753	for lib in object_files:
				754	nm_output += subprocess.run(
				755	["nm", "-og", lib],
				756	universal_newlines=True,
				757	stdout=subprocess.PIPE,
				758	stderr=subprocess.STDOUT,
				759	check=True
				760	).stdout
				761
				762	for line in nm_output.splitlines():
				763	if not nm_undefined_regex.search(line):
				764	symbol = nm_valid_regex.search(line)
				765	if (symbol and not symbol.group("symbol").startswith(exclusions)):
				766	symbols.append(symbol.group("symbol"))
				767	else:
				768	self.log.error(line)
				769
				770	return symbols
				771
				772	class NameChecker():
				773	"""
				774	Representation of the core name checking operation performed by this script.
				775	"""
				776	def __init__(self, parse_result, log):
				777	self.parse_result = parse_result
				778	self.log = log
				779
				780	def perform_checks(self, quiet=False):
				781	"""
				782	A comprehensive checker that performs each check in order, and outputs
				783	a final verdict.
				784
				785	Args:
				786	* quiet: whether to hide detailed problem explanation.
				787	"""
				788	self.log.info("=============")
				789	Problem.quiet = quiet
				790	problems = 0
				791	problems += self.check_symbols_declared_in_header()
				792
				793	pattern_checks = [
				794	("macros", MACRO_PATTERN),
				795	("enum_consts", CONSTANTS_PATTERN),
				796	("identifiers", IDENTIFIER_PATTERN)
				797	]
				798	for group, check_pattern in pattern_checks:
				799	problems += self.check_match_pattern(group, check_pattern)
				800
				801	problems += self.check_for_typos()
				802
				803	self.log.info("=============")
				804	if problems > 0:
				805	self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
				806	if quiet:
				807	self.log.info("Remove --quiet to see explanations.")
				808	else:
				809	self.log.info("Use --quiet for minimal output.")
				810	return 1
				811	else:
				812	self.log.info("PASS")
				813	return 0
				814
				815	def check_symbols_declared_in_header(self):
				816	"""
				817	Perform a check that all detected symbols in the library object files
				818	are properly declared in headers.
				819	Assumes parse_names_in_source() was called before this.
Aditya Deshpande	7d20bb4	2023-01-27 15:45:32 +0000	[diff] [blame]	820
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	821	Returns the number of problems that need fixing.
				822	"""
				823	problems = []
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	824	all_identifiers = self.parse_result["identifiers"] + \
				825	self.parse_result["excluded_identifiers"]
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	826
				827	for symbol in self.parse_result["symbols"]:
				828	found_symbol_declared = False
Aditya Deshpande	94375c8	2023-01-25 17:00:12 +0000	[diff] [blame]	829	for identifier_match in all_identifiers:
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	830	if symbol == identifier_match.name:
				831	found_symbol_declared = True
				832	break
				833
				834	if not found_symbol_declared:
				835	problems.append(SymbolNotInHeader(symbol))
				836
				837	self.output_check_result("All symbols in header", problems)
				838	return len(problems)
				839
				840	def check_match_pattern(self, group_to_check, check_pattern):
				841	"""
				842	Perform a check that all items of a group conform to a regex pattern.
				843	Assumes parse_names_in_source() was called before this.
				844
				845	Args:
				846	* group_to_check: string key to index into self.parse_result.
				847	* check_pattern: the regex to check against.
				848
				849	Returns the number of problems that need fixing.
				850	"""
				851	problems = []
				852
				853	for item_match in self.parse_result[group_to_check]:
				854	if not re.search(check_pattern, item_match.name):
				855	problems.append(PatternMismatch(check_pattern, item_match))
				856	# Double underscore should not be used for names
				857	if re.search(r".__.", item_match.name):
				858	problems.append(
				859	PatternMismatch("no double underscore allowed", item_match))
				860
				861	self.output_check_result(
				862	"Naming patterns of {}".format(group_to_check),
				863	problems)
				864	return len(problems)
				865
				866	def check_for_typos(self):
				867	"""
Shaun Case	0e7791f	2021-12-20 21:14:10 -0800	[diff] [blame]	868	Perform a check that all words in the source code beginning with MBED are
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	869	either defined as macros, or as enum constants.
				870	Assumes parse_names_in_source() was called before this.
				871
				872	Returns the number of problems that need fixing.
				873	"""
				874	problems = []
				875
				876	# Set comprehension, equivalent to a list comprehension wrapped by set()
				877	all_caps_names = {
				878	match.name
				879	for match
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	880	in self.parse_result["macros"] +
				881	self.parse_result["private_macros"] +
				882	self.parse_result["enum_consts"]
				883	}
Ronald Cron	b814bda	2021-09-13 14:50:42 +0200	[diff] [blame]	884	typo_exclusion = re.compile(r"XXX\|__\|_$\|^MBEDTLS_.*CONFIG_FILE$\|"
Pengyu Lv	fda7f50	2022-11-08 16:56:51 +0800	[diff] [blame]	885	r"MBEDTLS_TEST_LIBTESTDRIVER*\|"
				886	r"PSA_CRYPTO_DRIVER_TEST")
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	887
Pengyu Lv	018b2f6	2022-11-08 15:55:00 +0800	[diff] [blame]	888	for name_match in self.parse_result["mbed_psa_words"]:
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	889	found = name_match.name in all_caps_names
				890
				891	# Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
				892	# PSA driver, they will not exist as macros. However, they
				893	# should still be checked for typos using the equivalent
				894	# BUILTINs that exist.
				895	if "MBEDTLS_PSA_ACCEL_" in name_match.name:
				896	found = name_match.name.replace(
				897	"MBEDTLS_PSA_ACCEL_",
				898	"MBEDTLS_PSA_BUILTIN_") in all_caps_names
				899
				900	if not found and not typo_exclusion.search(name_match.name):
				901	problems.append(Typo(name_match))
				902
				903	self.output_check_result("Likely typos", problems)
				904	return len(problems)
				905
				906	def output_check_result(self, name, problems):
				907	"""
				908	Write out the PASS/FAIL status of a performed check depending on whether
				909	there were problems.
				910
				911	Args:
				912	* name: the name of the test
				913	* problems: a List of encountered Problems
				914	"""
				915	if problems:
				916	self.log.info("{}: FAIL\n".format(name))
				917	for problem in problems:
				918	self.log.warning(str(problem))
				919	else:
				920	self.log.info("{}: PASS".format(name))
				921
				922	def main():
				923	"""
				924	Perform argument parsing, and create an instance of CodeParser and
				925	NameChecker to begin the core operation.
				926	"""
				927	parser = argparse.ArgumentParser(
				928	formatter_class=argparse.RawDescriptionHelpFormatter,
				929	description=(
				930	"This script confirms that the naming of all symbols and identifiers "
				931	"in Mbed TLS are consistent with the house style and are also "
				932	"self-consistent.\n\n"
				933	"Expected to be run from the MbedTLS root directory.")
				934	)
				935	parser.add_argument(
				936	"-v", "--verbose",
				937	action="store_true",
				938	help="show parse results"
				939	)
				940	parser.add_argument(
				941	"-q", "--quiet",
				942	action="store_true",
Tom Cosgrove	49f99bc	2022-12-04 16:44:21 +0000	[diff] [blame]	943	help="hide unnecessary text, explanations, and highlights"
Gilles Peskine	8266b5b	2021-09-27 19:53:31 +0200	[diff] [blame]	944	)
				945
				946	args = parser.parse_args()
				947
				948	# Configure the global logger, which is then passed to the classes below
				949	log = logging.getLogger()
				950	log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
				951	log.addHandler(logging.StreamHandler())
				952
				953	try:
				954	code_parser = CodeParser(log)
				955	parse_result = code_parser.comprehensive_parse()
				956	except Exception: # pylint: disable=broad-except
				957	traceback.print_exc()
				958	sys.exit(2)
				959
				960	name_checker = NameChecker(parse_result, log)
				961	return_code = name_checker.perform_checks(quiet=args.quiet)
				962
				963	sys.exit(return_code)
				964
				965	if __name__ == "__main__":
				966	main()