blob: 395e038ca1e32e2dac22dad99d0cd0aafd183d05 [file] [log] [blame]
Gilles Peskinee7c44552021-01-25 21:40:45 +01001"""Collect macro definitions from header files.
2"""
3
4# Copyright The Mbed TLS Contributors
5# SPDX-License-Identifier: Apache-2.0
6#
7# Licensed under the Apache License, Version 2.0 (the "License"); you may
8# not use this file except in compliance with the License.
9# You may obtain a copy of the License at
10#
11# http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
18
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010019import itertools
Gilles Peskinee7c44552021-01-25 21:40:45 +010020import re
Gilles Peskineb4edff92021-03-30 19:09:05 +020021from typing import Dict, Iterable, Iterator, List, Optional, Pattern, Set, Tuple, Union
22
23
24class ReadFileLineException(Exception):
25 def __init__(self, filename: str, line_number: Union[int, str]) -> None:
26 message = 'in {} at {}'.format(filename, line_number)
27 super(ReadFileLineException, self).__init__(message)
28 self.filename = filename
29 self.line_number = line_number
30
31
32class read_file_lines:
33 # Dear Pylint, conventionally, a context manager class name is lowercase.
34 # pylint: disable=invalid-name,too-few-public-methods
35 """Context manager to read a text file line by line.
36
37 ```
38 with read_file_lines(filename) as lines:
39 for line in lines:
40 process(line)
41 ```
42 is equivalent to
43 ```
44 with open(filename, 'r') as input_file:
45 for line in input_file:
46 process(line)
47 ```
48 except that if process(line) raises an exception, then the read_file_lines
49 snippet annotates the exception with the file name and line number.
50 """
51 def __init__(self, filename: str, binary: bool = False) -> None:
52 self.filename = filename
53 self.line_number = 'entry' #type: Union[int, str]
54 self.generator = None #type: Optional[Iterable[Tuple[int, str]]]
55 self.binary = binary
56 def __enter__(self) -> 'read_file_lines':
57 self.generator = enumerate(open(self.filename,
58 'rb' if self.binary else 'r'))
59 return self
60 def __iter__(self) -> Iterator[str]:
61 assert self.generator is not None
62 for line_number, content in self.generator:
63 self.line_number = line_number
64 yield content
65 self.line_number = 'exit'
66 def __exit__(self, exc_type, exc_value, exc_traceback) -> None:
67 if exc_type is not None:
68 raise ReadFileLineException(self.filename, self.line_number) \
69 from exc_value
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010070
71
72class PSAMacroEnumerator:
73 """Information about constructors of various PSA Crypto types.
74
75 This includes macro names as well as information about their arguments
76 when applicable.
77
78 This class only provides ways to enumerate expressions that evaluate to
79 values of the covered types. Derived classes are expected to populate
80 the set of known constructors of each kind, as well as populate
81 `self.arguments_for` for arguments that are not of a kind that is
82 enumerated here.
83 """
84
85 def __init__(self) -> None:
86 """Set up an empty set of known constructor macros.
87 """
88 self.statuses = set() #type: Set[str]
89 self.algorithms = set() #type: Set[str]
90 self.ecc_curves = set() #type: Set[str]
91 self.dh_groups = set() #type: Set[str]
92 self.key_types = set() #type: Set[str]
93 self.key_usage_flags = set() #type: Set[str]
94 self.hash_algorithms = set() #type: Set[str]
95 self.mac_algorithms = set() #type: Set[str]
96 self.ka_algorithms = set() #type: Set[str]
97 self.kdf_algorithms = set() #type: Set[str]
Janos Follath8603fb02021-04-19 15:12:46 +010098 self.pake_algorithms = set() #type: Set[str]
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010099 self.aead_algorithms = set() #type: Set[str]
100 # macro name -> list of argument names
101 self.argspecs = {} #type: Dict[str, List[str]]
102 # argument name -> list of values
103 self.arguments_for = {
104 'mac_length': [],
105 'min_mac_length': [],
106 'tag_length': [],
107 'min_tag_length': [],
108 } #type: Dict[str, List[str]]
Gilles Peskine2157e862021-05-20 21:37:06 +0200109 # Whether to include intermediate macros in enumerations. Intermediate
110 # macros serve as category headers and are not valid values of their
111 # type. See `is_internal_name`.
112 # Always false in this class, may be set to true in derived classes.
Gilles Peskine537d5fa2021-04-19 13:50:25 +0200113 self.include_intermediate = False
114
115 def is_internal_name(self, name: str) -> bool:
116 """Whether this is an internal macro. Internal macros will be skipped."""
117 if not self.include_intermediate:
118 if name.endswith('_BASE') or name.endswith('_NONE'):
119 return True
120 if '_CATEGORY_' in name:
121 return True
122 return name.endswith('_FLAG') or name.endswith('_MASK')
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100123
124 def gather_arguments(self) -> None:
125 """Populate the list of values for macro arguments.
126
127 Call this after parsing all the inputs.
128 """
129 self.arguments_for['hash_alg'] = sorted(self.hash_algorithms)
130 self.arguments_for['mac_alg'] = sorted(self.mac_algorithms)
131 self.arguments_for['ka_alg'] = sorted(self.ka_algorithms)
132 self.arguments_for['kdf_alg'] = sorted(self.kdf_algorithms)
133 self.arguments_for['aead_alg'] = sorted(self.aead_algorithms)
134 self.arguments_for['curve'] = sorted(self.ecc_curves)
135 self.arguments_for['group'] = sorted(self.dh_groups)
136
137 @staticmethod
138 def _format_arguments(name: str, arguments: Iterable[str]) -> str:
Gilles Peskinecccd1ac2021-04-21 15:36:58 +0200139 """Format a macro call with arguments.
140
141 The resulting format is consistent with
142 `InputsForTest.normalize_argument`.
143 """
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100144 return name + '(' + ', '.join(arguments) + ')'
145
146 _argument_split_re = re.compile(r' *, *')
147 @classmethod
148 def _argument_split(cls, arguments: str) -> List[str]:
149 return re.split(cls._argument_split_re, arguments)
150
151 def distribute_arguments(self, name: str) -> Iterator[str]:
152 """Generate macro calls with each tested argument set.
153
154 If name is a macro without arguments, just yield "name".
155 If name is a macro with arguments, yield a series of
156 "name(arg1,...,argN)" where each argument takes each possible
157 value at least once.
158 """
159 try:
160 if name not in self.argspecs:
161 yield name
162 return
163 argspec = self.argspecs[name]
164 if argspec == []:
165 yield name + '()'
166 return
167 argument_lists = [self.arguments_for[arg] for arg in argspec]
168 arguments = [values[0] for values in argument_lists]
169 yield self._format_arguments(name, arguments)
170 # Dear Pylint, enumerate won't work here since we're modifying
171 # the array.
172 # pylint: disable=consider-using-enumerate
173 for i in range(len(arguments)):
174 for value in argument_lists[i][1:]:
175 arguments[i] = value
176 yield self._format_arguments(name, arguments)
177 arguments[i] = argument_lists[0][0]
178 except BaseException as e:
179 raise Exception('distribute_arguments({})'.format(name)) from e
180
Gilles Peskine38ebfec2021-04-21 15:37:34 +0200181 def distribute_arguments_without_duplicates(
182 self, seen: Set[str], name: str
183 ) -> Iterator[str]:
184 """Same as `distribute_arguments`, but don't repeat seen results."""
185 for result in self.distribute_arguments(name):
186 if result not in seen:
187 seen.add(result)
188 yield result
189
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100190 def generate_expressions(self, names: Iterable[str]) -> Iterator[str]:
191 """Generate expressions covering values constructed from the given names.
192
193 `names` can be any iterable collection of macro names.
194
195 For example:
196 * ``generate_expressions(['PSA_ALG_CMAC', 'PSA_ALG_HMAC'])``
197 generates ``'PSA_ALG_CMAC'`` as well as ``'PSA_ALG_HMAC(h)'`` for
198 every known hash algorithm ``h``.
199 * ``macros.generate_expressions(macros.key_types)`` generates all
200 key types.
201 """
Gilles Peskine38ebfec2021-04-21 15:37:34 +0200202 seen = set() #type: Set[str]
203 return itertools.chain(*(
204 self.distribute_arguments_without_duplicates(seen, name)
205 for name in names
206 ))
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100207
Gilles Peskinee7c44552021-01-25 21:40:45 +0100208
Gilles Peskine33c601c2021-03-10 01:25:50 +0100209class PSAMacroCollector(PSAMacroEnumerator):
Gilles Peskinee7c44552021-01-25 21:40:45 +0100210 """Collect PSA crypto macro definitions from C header files.
211 """
212
Gilles Peskine10ab2672021-03-10 00:59:53 +0100213 def __init__(self, include_intermediate: bool = False) -> None:
Gilles Peskine13d60eb2021-01-25 22:42:14 +0100214 """Set up an object to collect PSA macro definitions.
215
216 Call the read_file method of the constructed object on each header file.
217
218 * include_intermediate: if true, include intermediate macros such as
219 PSA_XXX_BASE that do not designate semantic values.
220 """
Gilles Peskine33c601c2021-03-10 01:25:50 +0100221 super().__init__()
Gilles Peskine13d60eb2021-01-25 22:42:14 +0100222 self.include_intermediate = include_intermediate
Gilles Peskine10ab2672021-03-10 00:59:53 +0100223 self.key_types_from_curve = {} #type: Dict[str, str]
224 self.key_types_from_group = {} #type: Dict[str, str]
Gilles Peskine10ab2672021-03-10 00:59:53 +0100225 self.algorithms_from_hash = {} #type: Dict[str, str]
Gilles Peskinee7c44552021-01-25 21:40:45 +0100226
Gilles Peskine33c601c2021-03-10 01:25:50 +0100227 def record_algorithm_subtype(self, name: str, expansion: str) -> None:
228 """Record the subtype of an algorithm constructor.
229
230 Given a ``PSA_ALG_xxx`` macro name and its expansion, if the algorithm
231 is of a subtype that is tracked in its own set, add it to the relevant
232 set.
233 """
234 # This code is very ad hoc and fragile. It should be replaced by
235 # something more robust.
236 if re.match(r'MAC(?:_|\Z)', name):
237 self.mac_algorithms.add(name)
238 elif re.match(r'KDF(?:_|\Z)', name):
239 self.kdf_algorithms.add(name)
240 elif re.search(r'0x020000[0-9A-Fa-f]{2}', expansion):
241 self.hash_algorithms.add(name)
242 elif re.search(r'0x03[0-9A-Fa-f]{6}', expansion):
243 self.mac_algorithms.add(name)
244 elif re.search(r'0x05[0-9A-Fa-f]{6}', expansion):
245 self.aead_algorithms.add(name)
246 elif re.search(r'0x09[0-9A-Fa-f]{2}0000', expansion):
247 self.ka_algorithms.add(name)
248 elif re.search(r'0x08[0-9A-Fa-f]{6}', expansion):
249 self.kdf_algorithms.add(name)
250
Gilles Peskinee7c44552021-01-25 21:40:45 +0100251 # "#define" followed by a macro name with either no parameters
252 # or a single parameter and a non-empty expansion.
253 # Grab the macro name in group 1, the parameter name if any in group 2
254 # and the expansion in group 3.
255 _define_directive_re = re.compile(r'\s*#\s*define\s+(\w+)' +
256 r'(?:\s+|\((\w+)\)\s*)' +
257 r'(.+)')
258 _deprecated_definition_re = re.compile(r'\s*MBEDTLS_DEPRECATED')
259
260 def read_line(self, line):
261 """Parse a C header line and record the PSA identifier it defines if any.
262 This function analyzes lines that start with "#define PSA_"
263 (up to non-significant whitespace) and skips all non-matching lines.
264 """
265 # pylint: disable=too-many-branches
266 m = re.match(self._define_directive_re, line)
267 if not m:
268 return
269 name, parameter, expansion = m.groups()
270 expansion = re.sub(r'/\*.*?\*/|//.*', r' ', expansion)
Gilles Peskine33c601c2021-03-10 01:25:50 +0100271 if parameter:
272 self.argspecs[name] = [parameter]
Gilles Peskinee7c44552021-01-25 21:40:45 +0100273 if re.match(self._deprecated_definition_re, expansion):
274 # Skip deprecated values, which are assumed to be
275 # backward compatibility aliases that share
276 # numerical values with non-deprecated values.
277 return
Gilles Peskinef8deb752021-01-25 22:41:45 +0100278 if self.is_internal_name(name):
Gilles Peskinee7c44552021-01-25 21:40:45 +0100279 # Macro only to build actual values
280 return
281 elif (name.startswith('PSA_ERROR_') or name == 'PSA_SUCCESS') \
282 and not parameter:
283 self.statuses.add(name)
284 elif name.startswith('PSA_KEY_TYPE_') and not parameter:
285 self.key_types.add(name)
286 elif name.startswith('PSA_KEY_TYPE_') and parameter == 'curve':
287 self.key_types_from_curve[name] = name[:13] + 'IS_' + name[13:]
288 elif name.startswith('PSA_KEY_TYPE_') and parameter == 'group':
289 self.key_types_from_group[name] = name[:13] + 'IS_' + name[13:]
290 elif name.startswith('PSA_ECC_FAMILY_') and not parameter:
291 self.ecc_curves.add(name)
292 elif name.startswith('PSA_DH_FAMILY_') and not parameter:
293 self.dh_groups.add(name)
294 elif name.startswith('PSA_ALG_') and not parameter:
295 if name in ['PSA_ALG_ECDSA_BASE',
296 'PSA_ALG_RSA_PKCS1V15_SIGN_BASE']:
297 # Ad hoc skipping of duplicate names for some numerical values
298 return
299 self.algorithms.add(name)
Gilles Peskine33c601c2021-03-10 01:25:50 +0100300 self.record_algorithm_subtype(name, expansion)
Gilles Peskinee7c44552021-01-25 21:40:45 +0100301 elif name.startswith('PSA_ALG_') and parameter == 'hash_alg':
302 if name in ['PSA_ALG_DSA', 'PSA_ALG_ECDSA']:
303 # A naming irregularity
304 tester = name[:8] + 'IS_RANDOMIZED_' + name[8:]
305 else:
306 tester = name[:8] + 'IS_' + name[8:]
307 self.algorithms_from_hash[name] = tester
308 elif name.startswith('PSA_KEY_USAGE_') and not parameter:
Gilles Peskine33c601c2021-03-10 01:25:50 +0100309 self.key_usage_flags.add(name)
Gilles Peskinee7c44552021-01-25 21:40:45 +0100310 else:
311 # Other macro without parameter
312 return
313
314 _nonascii_re = re.compile(rb'[^\x00-\x7f]+')
315 _continued_line_re = re.compile(rb'\\\r?\n\Z')
316 def read_file(self, header_file):
317 for line in header_file:
318 m = re.search(self._continued_line_re, line)
319 while m:
320 cont = next(header_file)
321 line = line[:m.start(0)] + cont
322 m = re.search(self._continued_line_re, line)
323 line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
324 self.read_line(line)
Gilles Peskineb4edff92021-03-30 19:09:05 +0200325
326
Gilles Peskine537d5fa2021-04-19 13:50:25 +0200327class InputsForTest(PSAMacroEnumerator):
Gilles Peskineb4edff92021-03-30 19:09:05 +0200328 # pylint: disable=too-many-instance-attributes
329 """Accumulate information about macros to test.
330enumerate
331 This includes macro names as well as information about their arguments
332 when applicable.
333 """
334
335 def __init__(self) -> None:
336 super().__init__()
337 self.all_declared = set() #type: Set[str]
Gilles Peskineb4edff92021-03-30 19:09:05 +0200338 # Identifier prefixes
339 self.table_by_prefix = {
340 'ERROR': self.statuses,
341 'ALG': self.algorithms,
342 'ECC_CURVE': self.ecc_curves,
343 'DH_GROUP': self.dh_groups,
344 'KEY_TYPE': self.key_types,
345 'KEY_USAGE': self.key_usage_flags,
346 } #type: Dict[str, Set[str]]
347 # Test functions
348 self.table_by_test_function = {
349 # Any function ending in _algorithm also gets added to
350 # self.algorithms.
351 'key_type': [self.key_types],
352 'block_cipher_key_type': [self.key_types],
353 'stream_cipher_key_type': [self.key_types],
354 'ecc_key_family': [self.ecc_curves],
355 'ecc_key_types': [self.ecc_curves],
356 'dh_key_family': [self.dh_groups],
357 'dh_key_types': [self.dh_groups],
358 'hash_algorithm': [self.hash_algorithms],
359 'mac_algorithm': [self.mac_algorithms],
360 'cipher_algorithm': [],
361 'hmac_algorithm': [self.mac_algorithms],
362 'aead_algorithm': [self.aead_algorithms],
363 'key_derivation_algorithm': [self.kdf_algorithms],
364 'key_agreement_algorithm': [self.ka_algorithms],
365 'asymmetric_signature_algorithm': [],
366 'asymmetric_signature_wildcard': [self.algorithms],
367 'asymmetric_encryption_algorithm': [],
Janos Follath8603fb02021-04-19 15:12:46 +0100368 'pake_algorithm': [self.pake_algorithms],
Gilles Peskineb4edff92021-03-30 19:09:05 +0200369 'other_algorithm': [],
370 } #type: Dict[str, List[Set[str]]]
371 self.arguments_for['mac_length'] += ['1', '63']
372 self.arguments_for['min_mac_length'] += ['1', '63']
373 self.arguments_for['tag_length'] += ['1', '63']
374 self.arguments_for['min_tag_length'] += ['1', '63']
375
Gilles Peskine3d404b82021-03-30 21:46:35 +0200376 def add_numerical_values(self) -> None:
377 """Add numerical values that are not supported to the known identifiers."""
378 # Sets of names per type
379 self.algorithms.add('0xffffffff')
380 self.ecc_curves.add('0xff')
381 self.dh_groups.add('0xff')
382 self.key_types.add('0xffff')
383 self.key_usage_flags.add('0x80000000')
384
385 # Hard-coded values for unknown algorithms
386 #
387 # These have to have values that are correct for their respective
388 # PSA_ALG_IS_xxx macros, but are also not currently assigned and are
389 # not likely to be assigned in the near future.
390 self.hash_algorithms.add('0x020000fe') # 0x020000ff is PSA_ALG_ANY_HASH
391 self.mac_algorithms.add('0x03007fff')
392 self.ka_algorithms.add('0x09fc0000')
393 self.kdf_algorithms.add('0x080000ff')
Janos Follath8603fb02021-04-19 15:12:46 +0100394 self.pake_algorithms.add('0x0a0000ff')
Gilles Peskine3d404b82021-03-30 21:46:35 +0200395 # For AEAD algorithms, the only variability is over the tag length,
396 # and this only applies to known algorithms, so don't test an
397 # unknown algorithm.
398
Gilles Peskineb4edff92021-03-30 19:09:05 +0200399 def get_names(self, type_word: str) -> Set[str]:
400 """Return the set of known names of values of the given type."""
401 return {
402 'status': self.statuses,
403 'algorithm': self.algorithms,
404 'ecc_curve': self.ecc_curves,
405 'dh_group': self.dh_groups,
406 'key_type': self.key_types,
407 'key_usage': self.key_usage_flags,
408 }[type_word]
409
410 # Regex for interesting header lines.
411 # Groups: 1=macro name, 2=type, 3=argument list (optional).
412 _header_line_re = \
413 re.compile(r'#define +' +
414 r'(PSA_((?:(?:DH|ECC|KEY)_)?[A-Z]+)_\w+)' +
415 r'(?:\(([^\n()]*)\))?')
416 # Regex of macro names to exclude.
417 _excluded_name_re = re.compile(r'_(?:GET|IS|OF)_|_(?:BASE|FLAG|MASK)\Z')
418 # Additional excluded macros.
419 _excluded_names = set([
420 # Macros that provide an alternative way to build the same
421 # algorithm as another macro.
422 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG',
423 'PSA_ALG_FULL_LENGTH_MAC',
424 # Auxiliary macro whose name doesn't fit the usual patterns for
425 # auxiliary macros.
426 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG_CASE',
427 ])
428 def parse_header_line(self, line: str) -> None:
429 """Parse a C header line, looking for "#define PSA_xxx"."""
430 m = re.match(self._header_line_re, line)
431 if not m:
432 return
433 name = m.group(1)
434 self.all_declared.add(name)
435 if re.search(self._excluded_name_re, name) or \
Gilles Peskine537d5fa2021-04-19 13:50:25 +0200436 name in self._excluded_names or \
437 self.is_internal_name(name):
Gilles Peskineb4edff92021-03-30 19:09:05 +0200438 return
439 dest = self.table_by_prefix.get(m.group(2))
440 if dest is None:
441 return
442 dest.add(name)
443 if m.group(3):
444 self.argspecs[name] = self._argument_split(m.group(3))
445
446 _nonascii_re = re.compile(rb'[^\x00-\x7f]+') #type: Pattern
447 def parse_header(self, filename: str) -> None:
448 """Parse a C header file, looking for "#define PSA_xxx"."""
449 with read_file_lines(filename, binary=True) as lines:
450 for line in lines:
451 line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
452 self.parse_header_line(line)
453
454 _macro_identifier_re = re.compile(r'[A-Z]\w+')
455 def generate_undeclared_names(self, expr: str) -> Iterable[str]:
456 for name in re.findall(self._macro_identifier_re, expr):
457 if name not in self.all_declared:
458 yield name
459
460 def accept_test_case_line(self, function: str, argument: str) -> bool:
461 #pylint: disable=unused-argument
462 undeclared = list(self.generate_undeclared_names(argument))
463 if undeclared:
464 raise Exception('Undeclared names in test case', undeclared)
465 return True
466
Gilles Peskinecccd1ac2021-04-21 15:36:58 +0200467 @staticmethod
468 def normalize_argument(argument: str) -> str:
469 """Normalize whitespace in the given C expression.
470
471 The result uses the same whitespace as
472 ` PSAMacroEnumerator.distribute_arguments`.
473 """
474 return re.sub(r',', r', ', re.sub(r' +', r'', argument))
475
Gilles Peskineb4edff92021-03-30 19:09:05 +0200476 def add_test_case_line(self, function: str, argument: str) -> None:
477 """Parse a test case data line, looking for algorithm metadata tests."""
478 sets = []
479 if function.endswith('_algorithm'):
480 sets.append(self.algorithms)
481 if function == 'key_agreement_algorithm' and \
482 argument.startswith('PSA_ALG_KEY_AGREEMENT('):
483 # We only want *raw* key agreement algorithms as such, so
484 # exclude ones that are already chained with a KDF.
485 # Keep the expression as one to test as an algorithm.
486 function = 'other_algorithm'
487 sets += self.table_by_test_function[function]
488 if self.accept_test_case_line(function, argument):
489 for s in sets:
Gilles Peskinecccd1ac2021-04-21 15:36:58 +0200490 s.add(self.normalize_argument(argument))
Gilles Peskineb4edff92021-03-30 19:09:05 +0200491
492 # Regex matching a *.data line containing a test function call and
493 # its arguments. The actual definition is partly positional, but this
494 # regex is good enough in practice.
495 _test_case_line_re = re.compile(r'(?!depends_on:)(\w+):([^\n :][^:\n]*)')
496 def parse_test_cases(self, filename: str) -> None:
497 """Parse a test case file (*.data), looking for algorithm metadata tests."""
498 with read_file_lines(filename) as lines:
499 for line in lines:
500 m = re.match(self._test_case_line_re, line)
501 if m:
502 self.add_test_case_line(m.group(1), m.group(2))