blob: fbec007660a3da666e85c6bfd916f911fef928bc [file] [log] [blame]
Gilles Peskinee7c44552021-01-25 21:40:45 +01001"""Collect macro definitions from header files.
2"""
3
4# Copyright The Mbed TLS Contributors
Dave Rodgman7ff79652023-11-03 12:04:52 +00005# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
Gilles Peskinee7c44552021-01-25 21:40:45 +01006#
Gilles Peskinee7c44552021-01-25 21:40:45 +01007
Gilles Peskine22fcf1b2021-03-10 01:02:39 +01008import itertools
Gilles Peskinee7c44552021-01-25 21:40:45 +01009import re
Gilles Peskineaeb8d662022-03-04 20:02:00 +010010from typing import Dict, IO, Iterable, Iterator, List, Optional, Pattern, Set, Tuple, Union
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +020011
12
13class ReadFileLineException(Exception):
14 def __init__(self, filename: str, line_number: Union[int, str]) -> None:
15 message = 'in {} at {}'.format(filename, line_number)
16 super(ReadFileLineException, self).__init__(message)
17 self.filename = filename
18 self.line_number = line_number
19
20
21class read_file_lines:
22 # Dear Pylint, conventionally, a context manager class name is lowercase.
23 # pylint: disable=invalid-name,too-few-public-methods
24 """Context manager to read a text file line by line.
25
26 ```
27 with read_file_lines(filename) as lines:
28 for line in lines:
29 process(line)
30 ```
31 is equivalent to
32 ```
33 with open(filename, 'r') as input_file:
34 for line in input_file:
35 process(line)
36 ```
37 except that if process(line) raises an exception, then the read_file_lines
38 snippet annotates the exception with the file name and line number.
39 """
40 def __init__(self, filename: str, binary: bool = False) -> None:
41 self.filename = filename
Gilles Peskineaeb8d662022-03-04 20:02:00 +010042 self.file = None #type: Optional[IO[str]]
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +020043 self.line_number = 'entry' #type: Union[int, str]
44 self.generator = None #type: Optional[Iterable[Tuple[int, str]]]
45 self.binary = binary
46 def __enter__(self) -> 'read_file_lines':
Gilles Peskineaeb8d662022-03-04 20:02:00 +010047 self.file = open(self.filename, 'rb' if self.binary else 'r')
48 self.generator = enumerate(self.file)
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +020049 return self
50 def __iter__(self) -> Iterator[str]:
51 assert self.generator is not None
52 for line_number, content in self.generator:
53 self.line_number = line_number
54 yield content
55 self.line_number = 'exit'
56 def __exit__(self, exc_type, exc_value, exc_traceback) -> None:
Gilles Peskineaeb8d662022-03-04 20:02:00 +010057 if self.file is not None:
58 self.file.close()
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +020059 if exc_type is not None:
60 raise ReadFileLineException(self.filename, self.line_number) \
61 from exc_value
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010062
63
64class PSAMacroEnumerator:
65 """Information about constructors of various PSA Crypto types.
66
67 This includes macro names as well as information about their arguments
68 when applicable.
69
70 This class only provides ways to enumerate expressions that evaluate to
71 values of the covered types. Derived classes are expected to populate
72 the set of known constructors of each kind, as well as populate
73 `self.arguments_for` for arguments that are not of a kind that is
74 enumerated here.
75 """
Gilles Peskine4c7da692021-04-21 21:39:27 +020076 #pylint: disable=too-many-instance-attributes
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010077
78 def __init__(self) -> None:
79 """Set up an empty set of known constructor macros.
80 """
81 self.statuses = set() #type: Set[str]
Gilles Peskine4c7da692021-04-21 21:39:27 +020082 self.lifetimes = set() #type: Set[str]
83 self.locations = set() #type: Set[str]
84 self.persistence_levels = set() #type: Set[str]
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010085 self.algorithms = set() #type: Set[str]
86 self.ecc_curves = set() #type: Set[str]
87 self.dh_groups = set() #type: Set[str]
88 self.key_types = set() #type: Set[str]
89 self.key_usage_flags = set() #type: Set[str]
90 self.hash_algorithms = set() #type: Set[str]
91 self.mac_algorithms = set() #type: Set[str]
92 self.ka_algorithms = set() #type: Set[str]
93 self.kdf_algorithms = set() #type: Set[str]
94 self.aead_algorithms = set() #type: Set[str]
gabor-mezei-arm044fefc2021-06-24 10:16:44 +020095 self.sign_algorithms = set() #type: Set[str]
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010096 # macro name -> list of argument names
97 self.argspecs = {} #type: Dict[str, List[str]]
98 # argument name -> list of values
99 self.arguments_for = {
100 'mac_length': [],
101 'min_mac_length': [],
102 'tag_length': [],
103 'min_tag_length': [],
104 } #type: Dict[str, List[str]]
Gilles Peskine46d3a372021-05-20 21:37:06 +0200105 # Whether to include intermediate macros in enumerations. Intermediate
106 # macros serve as category headers and are not valid values of their
107 # type. See `is_internal_name`.
108 # Always false in this class, may be set to true in derived classes.
Gilles Peskineb93f8542021-04-19 13:50:25 +0200109 self.include_intermediate = False
110
111 def is_internal_name(self, name: str) -> bool:
112 """Whether this is an internal macro. Internal macros will be skipped."""
113 if not self.include_intermediate:
114 if name.endswith('_BASE') or name.endswith('_NONE'):
115 return True
116 if '_CATEGORY_' in name:
117 return True
118 return name.endswith('_FLAG') or name.endswith('_MASK')
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100119
120 def gather_arguments(self) -> None:
121 """Populate the list of values for macro arguments.
122
123 Call this after parsing all the inputs.
124 """
125 self.arguments_for['hash_alg'] = sorted(self.hash_algorithms)
126 self.arguments_for['mac_alg'] = sorted(self.mac_algorithms)
127 self.arguments_for['ka_alg'] = sorted(self.ka_algorithms)
128 self.arguments_for['kdf_alg'] = sorted(self.kdf_algorithms)
129 self.arguments_for['aead_alg'] = sorted(self.aead_algorithms)
gabor-mezei-arm044fefc2021-06-24 10:16:44 +0200130 self.arguments_for['sign_alg'] = sorted(self.sign_algorithms)
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100131 self.arguments_for['curve'] = sorted(self.ecc_curves)
132 self.arguments_for['group'] = sorted(self.dh_groups)
Gilles Peskine4c7da692021-04-21 21:39:27 +0200133 self.arguments_for['persistence'] = sorted(self.persistence_levels)
134 self.arguments_for['location'] = sorted(self.locations)
135 self.arguments_for['lifetime'] = sorted(self.lifetimes)
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100136
137 @staticmethod
138 def _format_arguments(name: str, arguments: Iterable[str]) -> str:
Gilles Peskine0a93c1b2021-04-21 15:36:58 +0200139 """Format a macro call with arguments.
140
141 The resulting format is consistent with
142 `InputsForTest.normalize_argument`.
143 """
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100144 return name + '(' + ', '.join(arguments) + ')'
145
146 _argument_split_re = re.compile(r' *, *')
147 @classmethod
148 def _argument_split(cls, arguments: str) -> List[str]:
149 return re.split(cls._argument_split_re, arguments)
150
151 def distribute_arguments(self, name: str) -> Iterator[str]:
152 """Generate macro calls with each tested argument set.
153
154 If name is a macro without arguments, just yield "name".
155 If name is a macro with arguments, yield a series of
156 "name(arg1,...,argN)" where each argument takes each possible
157 value at least once.
158 """
159 try:
160 if name not in self.argspecs:
161 yield name
162 return
163 argspec = self.argspecs[name]
164 if argspec == []:
165 yield name + '()'
166 return
167 argument_lists = [self.arguments_for[arg] for arg in argspec]
168 arguments = [values[0] for values in argument_lists]
169 yield self._format_arguments(name, arguments)
170 # Dear Pylint, enumerate won't work here since we're modifying
171 # the array.
172 # pylint: disable=consider-using-enumerate
173 for i in range(len(arguments)):
174 for value in argument_lists[i][1:]:
175 arguments[i] = value
176 yield self._format_arguments(name, arguments)
Gilles Peskined36ed482022-03-19 10:36:07 +0100177 arguments[i] = argument_lists[i][0]
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100178 except BaseException as e:
179 raise Exception('distribute_arguments({})'.format(name)) from e
180
Gilles Peskine08966e62021-04-21 15:37:34 +0200181 def distribute_arguments_without_duplicates(
182 self, seen: Set[str], name: str
183 ) -> Iterator[str]:
184 """Same as `distribute_arguments`, but don't repeat seen results."""
185 for result in self.distribute_arguments(name):
186 if result not in seen:
187 seen.add(result)
188 yield result
189
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100190 def generate_expressions(self, names: Iterable[str]) -> Iterator[str]:
191 """Generate expressions covering values constructed from the given names.
192
193 `names` can be any iterable collection of macro names.
194
195 For example:
196 * ``generate_expressions(['PSA_ALG_CMAC', 'PSA_ALG_HMAC'])``
197 generates ``'PSA_ALG_CMAC'`` as well as ``'PSA_ALG_HMAC(h)'`` for
198 every known hash algorithm ``h``.
199 * ``macros.generate_expressions(macros.key_types)`` generates all
200 key types.
201 """
Gilles Peskine08966e62021-04-21 15:37:34 +0200202 seen = set() #type: Set[str]
203 return itertools.chain(*(
204 self.distribute_arguments_without_duplicates(seen, name)
205 for name in names
206 ))
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100207
Gilles Peskinee7c44552021-01-25 21:40:45 +0100208
Gilles Peskine33c601c2021-03-10 01:25:50 +0100209class PSAMacroCollector(PSAMacroEnumerator):
Gilles Peskinee7c44552021-01-25 21:40:45 +0100210 """Collect PSA crypto macro definitions from C header files.
211 """
212
Gilles Peskine10ab2672021-03-10 00:59:53 +0100213 def __init__(self, include_intermediate: bool = False) -> None:
Gilles Peskine13d60eb2021-01-25 22:42:14 +0100214 """Set up an object to collect PSA macro definitions.
215
216 Call the read_file method of the constructed object on each header file.
217
218 * include_intermediate: if true, include intermediate macros such as
219 PSA_XXX_BASE that do not designate semantic values.
220 """
Gilles Peskine33c601c2021-03-10 01:25:50 +0100221 super().__init__()
Gilles Peskine13d60eb2021-01-25 22:42:14 +0100222 self.include_intermediate = include_intermediate
Gilles Peskine10ab2672021-03-10 00:59:53 +0100223 self.key_types_from_curve = {} #type: Dict[str, str]
224 self.key_types_from_group = {} #type: Dict[str, str]
Gilles Peskine10ab2672021-03-10 00:59:53 +0100225 self.algorithms_from_hash = {} #type: Dict[str, str]
Gilles Peskinee7c44552021-01-25 21:40:45 +0100226
Gilles Peskine35451032021-10-04 18:10:16 +0200227 @staticmethod
228 def algorithm_tester(name: str) -> str:
229 """The predicate for whether an algorithm is built from the given constructor.
230
231 The given name must be the name of an algorithm constructor of the
232 form ``PSA_ALG_xxx`` which is used as ``PSA_ALG_xxx(yyy)`` to build
233 an algorithm value. Return the corresponding predicate macro which
234 is used as ``predicate(alg)`` to test whether ``alg`` can be built
235 as ``PSA_ALG_xxx(yyy)``. The predicate is usually called
236 ``PSA_ALG_IS_xxx``.
237 """
238 prefix = 'PSA_ALG_'
239 assert name.startswith(prefix)
240 midfix = 'IS_'
241 suffix = name[len(prefix):]
242 if suffix in ['DSA', 'ECDSA']:
243 midfix += 'RANDOMIZED_'
Gilles Peskine35115f92021-10-04 18:10:38 +0200244 elif suffix == 'RSA_PSS':
245 suffix += '_STANDARD_SALT'
Gilles Peskine35451032021-10-04 18:10:16 +0200246 return prefix + midfix + suffix
247
Gilles Peskine33c601c2021-03-10 01:25:50 +0100248 def record_algorithm_subtype(self, name: str, expansion: str) -> None:
249 """Record the subtype of an algorithm constructor.
250
251 Given a ``PSA_ALG_xxx`` macro name and its expansion, if the algorithm
252 is of a subtype that is tracked in its own set, add it to the relevant
253 set.
254 """
255 # This code is very ad hoc and fragile. It should be replaced by
256 # something more robust.
257 if re.match(r'MAC(?:_|\Z)', name):
258 self.mac_algorithms.add(name)
259 elif re.match(r'KDF(?:_|\Z)', name):
260 self.kdf_algorithms.add(name)
261 elif re.search(r'0x020000[0-9A-Fa-f]{2}', expansion):
262 self.hash_algorithms.add(name)
263 elif re.search(r'0x03[0-9A-Fa-f]{6}', expansion):
264 self.mac_algorithms.add(name)
265 elif re.search(r'0x05[0-9A-Fa-f]{6}', expansion):
266 self.aead_algorithms.add(name)
267 elif re.search(r'0x09[0-9A-Fa-f]{2}0000', expansion):
268 self.ka_algorithms.add(name)
269 elif re.search(r'0x08[0-9A-Fa-f]{6}', expansion):
270 self.kdf_algorithms.add(name)
271
Gilles Peskinee7c44552021-01-25 21:40:45 +0100272 # "#define" followed by a macro name with either no parameters
273 # or a single parameter and a non-empty expansion.
274 # Grab the macro name in group 1, the parameter name if any in group 2
275 # and the expansion in group 3.
276 _define_directive_re = re.compile(r'\s*#\s*define\s+(\w+)' +
277 r'(?:\s+|\((\w+)\)\s*)' +
278 r'(.+)')
279 _deprecated_definition_re = re.compile(r'\s*MBEDTLS_DEPRECATED')
280
281 def read_line(self, line):
282 """Parse a C header line and record the PSA identifier it defines if any.
283 This function analyzes lines that start with "#define PSA_"
284 (up to non-significant whitespace) and skips all non-matching lines.
285 """
286 # pylint: disable=too-many-branches
287 m = re.match(self._define_directive_re, line)
288 if not m:
289 return
290 name, parameter, expansion = m.groups()
291 expansion = re.sub(r'/\*.*?\*/|//.*', r' ', expansion)
Gilles Peskine33c601c2021-03-10 01:25:50 +0100292 if parameter:
293 self.argspecs[name] = [parameter]
Gilles Peskinee7c44552021-01-25 21:40:45 +0100294 if re.match(self._deprecated_definition_re, expansion):
295 # Skip deprecated values, which are assumed to be
296 # backward compatibility aliases that share
297 # numerical values with non-deprecated values.
298 return
Gilles Peskinef8deb752021-01-25 22:41:45 +0100299 if self.is_internal_name(name):
Gilles Peskinee7c44552021-01-25 21:40:45 +0100300 # Macro only to build actual values
301 return
302 elif (name.startswith('PSA_ERROR_') or name == 'PSA_SUCCESS') \
303 and not parameter:
304 self.statuses.add(name)
305 elif name.startswith('PSA_KEY_TYPE_') and not parameter:
306 self.key_types.add(name)
307 elif name.startswith('PSA_KEY_TYPE_') and parameter == 'curve':
308 self.key_types_from_curve[name] = name[:13] + 'IS_' + name[13:]
309 elif name.startswith('PSA_KEY_TYPE_') and parameter == 'group':
310 self.key_types_from_group[name] = name[:13] + 'IS_' + name[13:]
311 elif name.startswith('PSA_ECC_FAMILY_') and not parameter:
312 self.ecc_curves.add(name)
313 elif name.startswith('PSA_DH_FAMILY_') and not parameter:
314 self.dh_groups.add(name)
315 elif name.startswith('PSA_ALG_') and not parameter:
316 if name in ['PSA_ALG_ECDSA_BASE',
317 'PSA_ALG_RSA_PKCS1V15_SIGN_BASE']:
318 # Ad hoc skipping of duplicate names for some numerical values
319 return
320 self.algorithms.add(name)
Gilles Peskine33c601c2021-03-10 01:25:50 +0100321 self.record_algorithm_subtype(name, expansion)
Gilles Peskinee7c44552021-01-25 21:40:45 +0100322 elif name.startswith('PSA_ALG_') and parameter == 'hash_alg':
Gilles Peskine35451032021-10-04 18:10:16 +0200323 self.algorithms_from_hash[name] = self.algorithm_tester(name)
Gilles Peskinee7c44552021-01-25 21:40:45 +0100324 elif name.startswith('PSA_KEY_USAGE_') and not parameter:
Gilles Peskine33c601c2021-03-10 01:25:50 +0100325 self.key_usage_flags.add(name)
Gilles Peskinee7c44552021-01-25 21:40:45 +0100326 else:
327 # Other macro without parameter
328 return
329
330 _nonascii_re = re.compile(rb'[^\x00-\x7f]+')
331 _continued_line_re = re.compile(rb'\\\r?\n\Z')
332 def read_file(self, header_file):
333 for line in header_file:
334 m = re.search(self._continued_line_re, line)
335 while m:
336 cont = next(header_file)
337 line = line[:m.start(0)] + cont
338 m = re.search(self._continued_line_re, line)
339 line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
340 self.read_line(line)
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200341
342
Gilles Peskineb93f8542021-04-19 13:50:25 +0200343class InputsForTest(PSAMacroEnumerator):
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200344 # pylint: disable=too-many-instance-attributes
345 """Accumulate information about macros to test.
346enumerate
347 This includes macro names as well as information about their arguments
348 when applicable.
349 """
350
351 def __init__(self) -> None:
352 super().__init__()
353 self.all_declared = set() #type: Set[str]
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200354 # Identifier prefixes
355 self.table_by_prefix = {
356 'ERROR': self.statuses,
357 'ALG': self.algorithms,
358 'ECC_CURVE': self.ecc_curves,
359 'DH_GROUP': self.dh_groups,
Gilles Peskine4c7da692021-04-21 21:39:27 +0200360 'KEY_LIFETIME': self.lifetimes,
361 'KEY_LOCATION': self.locations,
362 'KEY_PERSISTENCE': self.persistence_levels,
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200363 'KEY_TYPE': self.key_types,
364 'KEY_USAGE': self.key_usage_flags,
365 } #type: Dict[str, Set[str]]
366 # Test functions
367 self.table_by_test_function = {
368 # Any function ending in _algorithm also gets added to
369 # self.algorithms.
370 'key_type': [self.key_types],
371 'block_cipher_key_type': [self.key_types],
372 'stream_cipher_key_type': [self.key_types],
373 'ecc_key_family': [self.ecc_curves],
374 'ecc_key_types': [self.ecc_curves],
375 'dh_key_family': [self.dh_groups],
376 'dh_key_types': [self.dh_groups],
377 'hash_algorithm': [self.hash_algorithms],
378 'mac_algorithm': [self.mac_algorithms],
379 'cipher_algorithm': [],
gabor-mezei-arm044fefc2021-06-24 10:16:44 +0200380 'hmac_algorithm': [self.mac_algorithms, self.sign_algorithms],
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200381 'aead_algorithm': [self.aead_algorithms],
382 'key_derivation_algorithm': [self.kdf_algorithms],
383 'key_agreement_algorithm': [self.ka_algorithms],
gabor-mezei-arm044fefc2021-06-24 10:16:44 +0200384 'asymmetric_signature_algorithm': [self.sign_algorithms],
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200385 'asymmetric_signature_wildcard': [self.algorithms],
386 'asymmetric_encryption_algorithm': [],
387 'other_algorithm': [],
Gilles Peskine4c7da692021-04-21 21:39:27 +0200388 'lifetime': [self.lifetimes],
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200389 } #type: Dict[str, List[Set[str]]]
Gilles Peskinec77f16b2022-03-18 18:46:00 +0100390 mac_lengths = [str(n) for n in [
391 1, # minimum expressible
392 4, # minimum allowed by policy
393 13, # an odd size in a plausible range
394 14, # an even non-power-of-two size in a plausible range
395 16, # same as full size for at least one algorithm
396 63, # maximum expressible
397 ]]
398 self.arguments_for['mac_length'] += mac_lengths
399 self.arguments_for['min_mac_length'] += mac_lengths
400 aead_lengths = [str(n) for n in [
401 1, # minimum expressible
402 4, # minimum allowed by policy
403 13, # an odd size in a plausible range
404 14, # an even non-power-of-two size in a plausible range
405 16, # same as full size for at least one algorithm
406 63, # maximum expressible
407 ]]
408 self.arguments_for['tag_length'] += aead_lengths
409 self.arguments_for['min_tag_length'] += aead_lengths
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200410
Gilles Peskined6d2d6a2021-03-30 21:46:35 +0200411 def add_numerical_values(self) -> None:
412 """Add numerical values that are not supported to the known identifiers."""
413 # Sets of names per type
414 self.algorithms.add('0xffffffff')
415 self.ecc_curves.add('0xff')
416 self.dh_groups.add('0xff')
417 self.key_types.add('0xffff')
418 self.key_usage_flags.add('0x80000000')
419
420 # Hard-coded values for unknown algorithms
421 #
422 # These have to have values that are correct for their respective
423 # PSA_ALG_IS_xxx macros, but are also not currently assigned and are
424 # not likely to be assigned in the near future.
425 self.hash_algorithms.add('0x020000fe') # 0x020000ff is PSA_ALG_ANY_HASH
426 self.mac_algorithms.add('0x03007fff')
427 self.ka_algorithms.add('0x09fc0000')
428 self.kdf_algorithms.add('0x080000ff')
429 # For AEAD algorithms, the only variability is over the tag length,
430 # and this only applies to known algorithms, so don't test an
431 # unknown algorithm.
432
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200433 def get_names(self, type_word: str) -> Set[str]:
434 """Return the set of known names of values of the given type."""
435 return {
436 'status': self.statuses,
437 'algorithm': self.algorithms,
438 'ecc_curve': self.ecc_curves,
439 'dh_group': self.dh_groups,
440 'key_type': self.key_types,
441 'key_usage': self.key_usage_flags,
442 }[type_word]
443
444 # Regex for interesting header lines.
445 # Groups: 1=macro name, 2=type, 3=argument list (optional).
446 _header_line_re = \
447 re.compile(r'#define +' +
448 r'(PSA_((?:(?:DH|ECC|KEY)_)?[A-Z]+)_\w+)' +
449 r'(?:\(([^\n()]*)\))?')
450 # Regex of macro names to exclude.
451 _excluded_name_re = re.compile(r'_(?:GET|IS|OF)_|_(?:BASE|FLAG|MASK)\Z')
452 # Additional excluded macros.
453 _excluded_names = set([
454 # Macros that provide an alternative way to build the same
455 # algorithm as another macro.
456 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG',
457 'PSA_ALG_FULL_LENGTH_MAC',
458 # Auxiliary macro whose name doesn't fit the usual patterns for
459 # auxiliary macros.
460 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG_CASE',
461 ])
462 def parse_header_line(self, line: str) -> None:
463 """Parse a C header line, looking for "#define PSA_xxx"."""
464 m = re.match(self._header_line_re, line)
465 if not m:
466 return
467 name = m.group(1)
468 self.all_declared.add(name)
469 if re.search(self._excluded_name_re, name) or \
Gilles Peskineb93f8542021-04-19 13:50:25 +0200470 name in self._excluded_names or \
471 self.is_internal_name(name):
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200472 return
473 dest = self.table_by_prefix.get(m.group(2))
474 if dest is None:
475 return
476 dest.add(name)
477 if m.group(3):
478 self.argspecs[name] = self._argument_split(m.group(3))
479
480 _nonascii_re = re.compile(rb'[^\x00-\x7f]+') #type: Pattern
481 def parse_header(self, filename: str) -> None:
482 """Parse a C header file, looking for "#define PSA_xxx"."""
483 with read_file_lines(filename, binary=True) as lines:
484 for line in lines:
485 line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
486 self.parse_header_line(line)
487
488 _macro_identifier_re = re.compile(r'[A-Z]\w+')
489 def generate_undeclared_names(self, expr: str) -> Iterable[str]:
490 for name in re.findall(self._macro_identifier_re, expr):
491 if name not in self.all_declared:
492 yield name
493
494 def accept_test_case_line(self, function: str, argument: str) -> bool:
495 #pylint: disable=unused-argument
496 undeclared = list(self.generate_undeclared_names(argument))
497 if undeclared:
498 raise Exception('Undeclared names in test case', undeclared)
499 return True
500
Gilles Peskine0a93c1b2021-04-21 15:36:58 +0200501 @staticmethod
502 def normalize_argument(argument: str) -> str:
503 """Normalize whitespace in the given C expression.
504
505 The result uses the same whitespace as
506 ` PSAMacroEnumerator.distribute_arguments`.
507 """
508 return re.sub(r',', r', ', re.sub(r' +', r'', argument))
509
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200510 def add_test_case_line(self, function: str, argument: str) -> None:
511 """Parse a test case data line, looking for algorithm metadata tests."""
512 sets = []
513 if function.endswith('_algorithm'):
514 sets.append(self.algorithms)
515 if function == 'key_agreement_algorithm' and \
516 argument.startswith('PSA_ALG_KEY_AGREEMENT('):
517 # We only want *raw* key agreement algorithms as such, so
518 # exclude ones that are already chained with a KDF.
519 # Keep the expression as one to test as an algorithm.
520 function = 'other_algorithm'
521 sets += self.table_by_test_function[function]
522 if self.accept_test_case_line(function, argument):
523 for s in sets:
Gilles Peskine0a93c1b2021-04-21 15:36:58 +0200524 s.add(self.normalize_argument(argument))
Gilles Peskine3cf3a8e2021-03-30 19:09:05 +0200525
526 # Regex matching a *.data line containing a test function call and
527 # its arguments. The actual definition is partly positional, but this
528 # regex is good enough in practice.
529 _test_case_line_re = re.compile(r'(?!depends_on:)(\w+):([^\n :][^:\n]*)')
530 def parse_test_cases(self, filename: str) -> None:
531 """Parse a test case file (*.data), looking for algorithm metadata tests."""
532 with read_file_lines(filename) as lines:
533 for line in lines:
534 m = re.match(self._test_case_line_re, line)
535 if m:
536 self.add_test_case_line(m.group(1), m.group(2))