blob: 0e76435f38287cf9289134dbef36fd3acf3c0684 [file] [log] [blame]
Gilles Peskinee7c44552021-01-25 21:40:45 +01001"""Collect macro definitions from header files.
2"""
3
4# Copyright The Mbed TLS Contributors
5# SPDX-License-Identifier: Apache-2.0
6#
7# Licensed under the Apache License, Version 2.0 (the "License"); you may
8# not use this file except in compliance with the License.
9# You may obtain a copy of the License at
10#
11# http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
18
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010019import itertools
Gilles Peskinee7c44552021-01-25 21:40:45 +010020import re
Gilles Peskineb4edff92021-03-30 19:09:05 +020021from typing import Dict, Iterable, Iterator, List, Optional, Pattern, Set, Tuple, Union
22
23
24class ReadFileLineException(Exception):
25 def __init__(self, filename: str, line_number: Union[int, str]) -> None:
26 message = 'in {} at {}'.format(filename, line_number)
27 super(ReadFileLineException, self).__init__(message)
28 self.filename = filename
29 self.line_number = line_number
30
31
32class read_file_lines:
33 # Dear Pylint, conventionally, a context manager class name is lowercase.
34 # pylint: disable=invalid-name,too-few-public-methods
35 """Context manager to read a text file line by line.
36
37 ```
38 with read_file_lines(filename) as lines:
39 for line in lines:
40 process(line)
41 ```
42 is equivalent to
43 ```
44 with open(filename, 'r') as input_file:
45 for line in input_file:
46 process(line)
47 ```
48 except that if process(line) raises an exception, then the read_file_lines
49 snippet annotates the exception with the file name and line number.
50 """
51 def __init__(self, filename: str, binary: bool = False) -> None:
52 self.filename = filename
53 self.line_number = 'entry' #type: Union[int, str]
54 self.generator = None #type: Optional[Iterable[Tuple[int, str]]]
55 self.binary = binary
56 def __enter__(self) -> 'read_file_lines':
57 self.generator = enumerate(open(self.filename,
58 'rb' if self.binary else 'r'))
59 return self
60 def __iter__(self) -> Iterator[str]:
61 assert self.generator is not None
62 for line_number, content in self.generator:
63 self.line_number = line_number
64 yield content
65 self.line_number = 'exit'
66 def __exit__(self, exc_type, exc_value, exc_traceback) -> None:
67 if exc_type is not None:
68 raise ReadFileLineException(self.filename, self.line_number) \
69 from exc_value
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010070
71
72class PSAMacroEnumerator:
73 """Information about constructors of various PSA Crypto types.
74
75 This includes macro names as well as information about their arguments
76 when applicable.
77
78 This class only provides ways to enumerate expressions that evaluate to
79 values of the covered types. Derived classes are expected to populate
80 the set of known constructors of each kind, as well as populate
81 `self.arguments_for` for arguments that are not of a kind that is
82 enumerated here.
83 """
84
85 def __init__(self) -> None:
86 """Set up an empty set of known constructor macros.
87 """
88 self.statuses = set() #type: Set[str]
89 self.algorithms = set() #type: Set[str]
90 self.ecc_curves = set() #type: Set[str]
91 self.dh_groups = set() #type: Set[str]
92 self.key_types = set() #type: Set[str]
93 self.key_usage_flags = set() #type: Set[str]
94 self.hash_algorithms = set() #type: Set[str]
95 self.mac_algorithms = set() #type: Set[str]
96 self.ka_algorithms = set() #type: Set[str]
97 self.kdf_algorithms = set() #type: Set[str]
98 self.aead_algorithms = set() #type: Set[str]
99 # macro name -> list of argument names
100 self.argspecs = {} #type: Dict[str, List[str]]
101 # argument name -> list of values
102 self.arguments_for = {
103 'mac_length': [],
104 'min_mac_length': [],
105 'tag_length': [],
106 'min_tag_length': [],
107 } #type: Dict[str, List[str]]
Gilles Peskine2157e862021-05-20 21:37:06 +0200108 # Whether to include intermediate macros in enumerations. Intermediate
109 # macros serve as category headers and are not valid values of their
110 # type. See `is_internal_name`.
111 # Always false in this class, may be set to true in derived classes.
Gilles Peskine537d5fa2021-04-19 13:50:25 +0200112 self.include_intermediate = False
113
114 def is_internal_name(self, name: str) -> bool:
115 """Whether this is an internal macro. Internal macros will be skipped."""
116 if not self.include_intermediate:
117 if name.endswith('_BASE') or name.endswith('_NONE'):
118 return True
119 if '_CATEGORY_' in name:
120 return True
121 return name.endswith('_FLAG') or name.endswith('_MASK')
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100122
123 def gather_arguments(self) -> None:
124 """Populate the list of values for macro arguments.
125
126 Call this after parsing all the inputs.
127 """
128 self.arguments_for['hash_alg'] = sorted(self.hash_algorithms)
129 self.arguments_for['mac_alg'] = sorted(self.mac_algorithms)
130 self.arguments_for['ka_alg'] = sorted(self.ka_algorithms)
131 self.arguments_for['kdf_alg'] = sorted(self.kdf_algorithms)
132 self.arguments_for['aead_alg'] = sorted(self.aead_algorithms)
133 self.arguments_for['curve'] = sorted(self.ecc_curves)
134 self.arguments_for['group'] = sorted(self.dh_groups)
135
136 @staticmethod
137 def _format_arguments(name: str, arguments: Iterable[str]) -> str:
Gilles Peskinecccd1ac2021-04-21 15:36:58 +0200138 """Format a macro call with arguments.
139
140 The resulting format is consistent with
141 `InputsForTest.normalize_argument`.
142 """
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100143 return name + '(' + ', '.join(arguments) + ')'
144
145 _argument_split_re = re.compile(r' *, *')
146 @classmethod
147 def _argument_split(cls, arguments: str) -> List[str]:
148 return re.split(cls._argument_split_re, arguments)
149
150 def distribute_arguments(self, name: str) -> Iterator[str]:
151 """Generate macro calls with each tested argument set.
152
153 If name is a macro without arguments, just yield "name".
154 If name is a macro with arguments, yield a series of
155 "name(arg1,...,argN)" where each argument takes each possible
156 value at least once.
157 """
158 try:
159 if name not in self.argspecs:
160 yield name
161 return
162 argspec = self.argspecs[name]
163 if argspec == []:
164 yield name + '()'
165 return
166 argument_lists = [self.arguments_for[arg] for arg in argspec]
167 arguments = [values[0] for values in argument_lists]
168 yield self._format_arguments(name, arguments)
169 # Dear Pylint, enumerate won't work here since we're modifying
170 # the array.
171 # pylint: disable=consider-using-enumerate
172 for i in range(len(arguments)):
173 for value in argument_lists[i][1:]:
174 arguments[i] = value
175 yield self._format_arguments(name, arguments)
176 arguments[i] = argument_lists[0][0]
177 except BaseException as e:
178 raise Exception('distribute_arguments({})'.format(name)) from e
179
Gilles Peskine38ebfec2021-04-21 15:37:34 +0200180 def distribute_arguments_without_duplicates(
181 self, seen: Set[str], name: str
182 ) -> Iterator[str]:
183 """Same as `distribute_arguments`, but don't repeat seen results."""
184 for result in self.distribute_arguments(name):
185 if result not in seen:
186 seen.add(result)
187 yield result
188
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100189 def generate_expressions(self, names: Iterable[str]) -> Iterator[str]:
190 """Generate expressions covering values constructed from the given names.
191
192 `names` can be any iterable collection of macro names.
193
194 For example:
195 * ``generate_expressions(['PSA_ALG_CMAC', 'PSA_ALG_HMAC'])``
196 generates ``'PSA_ALG_CMAC'`` as well as ``'PSA_ALG_HMAC(h)'`` for
197 every known hash algorithm ``h``.
198 * ``macros.generate_expressions(macros.key_types)`` generates all
199 key types.
200 """
Gilles Peskine38ebfec2021-04-21 15:37:34 +0200201 seen = set() #type: Set[str]
202 return itertools.chain(*(
203 self.distribute_arguments_without_duplicates(seen, name)
204 for name in names
205 ))
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100206
Gilles Peskinee7c44552021-01-25 21:40:45 +0100207
Gilles Peskine33c601c2021-03-10 01:25:50 +0100208class PSAMacroCollector(PSAMacroEnumerator):
Gilles Peskinee7c44552021-01-25 21:40:45 +0100209 """Collect PSA crypto macro definitions from C header files.
210 """
211
Gilles Peskine10ab2672021-03-10 00:59:53 +0100212 def __init__(self, include_intermediate: bool = False) -> None:
Gilles Peskine13d60eb2021-01-25 22:42:14 +0100213 """Set up an object to collect PSA macro definitions.
214
215 Call the read_file method of the constructed object on each header file.
216
217 * include_intermediate: if true, include intermediate macros such as
218 PSA_XXX_BASE that do not designate semantic values.
219 """
Gilles Peskine33c601c2021-03-10 01:25:50 +0100220 super().__init__()
Gilles Peskine13d60eb2021-01-25 22:42:14 +0100221 self.include_intermediate = include_intermediate
Gilles Peskine10ab2672021-03-10 00:59:53 +0100222 self.key_types_from_curve = {} #type: Dict[str, str]
223 self.key_types_from_group = {} #type: Dict[str, str]
Gilles Peskine10ab2672021-03-10 00:59:53 +0100224 self.algorithms_from_hash = {} #type: Dict[str, str]
Gilles Peskinee7c44552021-01-25 21:40:45 +0100225
Gilles Peskine33c601c2021-03-10 01:25:50 +0100226 def record_algorithm_subtype(self, name: str, expansion: str) -> None:
227 """Record the subtype of an algorithm constructor.
228
229 Given a ``PSA_ALG_xxx`` macro name and its expansion, if the algorithm
230 is of a subtype that is tracked in its own set, add it to the relevant
231 set.
232 """
233 # This code is very ad hoc and fragile. It should be replaced by
234 # something more robust.
235 if re.match(r'MAC(?:_|\Z)', name):
236 self.mac_algorithms.add(name)
237 elif re.match(r'KDF(?:_|\Z)', name):
238 self.kdf_algorithms.add(name)
239 elif re.search(r'0x020000[0-9A-Fa-f]{2}', expansion):
240 self.hash_algorithms.add(name)
241 elif re.search(r'0x03[0-9A-Fa-f]{6}', expansion):
242 self.mac_algorithms.add(name)
243 elif re.search(r'0x05[0-9A-Fa-f]{6}', expansion):
244 self.aead_algorithms.add(name)
245 elif re.search(r'0x09[0-9A-Fa-f]{2}0000', expansion):
246 self.ka_algorithms.add(name)
247 elif re.search(r'0x08[0-9A-Fa-f]{6}', expansion):
248 self.kdf_algorithms.add(name)
249
Gilles Peskinee7c44552021-01-25 21:40:45 +0100250 # "#define" followed by a macro name with either no parameters
251 # or a single parameter and a non-empty expansion.
252 # Grab the macro name in group 1, the parameter name if any in group 2
253 # and the expansion in group 3.
254 _define_directive_re = re.compile(r'\s*#\s*define\s+(\w+)' +
255 r'(?:\s+|\((\w+)\)\s*)' +
256 r'(.+)')
257 _deprecated_definition_re = re.compile(r'\s*MBEDTLS_DEPRECATED')
258
259 def read_line(self, line):
260 """Parse a C header line and record the PSA identifier it defines if any.
261 This function analyzes lines that start with "#define PSA_"
262 (up to non-significant whitespace) and skips all non-matching lines.
263 """
264 # pylint: disable=too-many-branches
265 m = re.match(self._define_directive_re, line)
266 if not m:
267 return
268 name, parameter, expansion = m.groups()
269 expansion = re.sub(r'/\*.*?\*/|//.*', r' ', expansion)
Gilles Peskine33c601c2021-03-10 01:25:50 +0100270 if parameter:
271 self.argspecs[name] = [parameter]
Gilles Peskinee7c44552021-01-25 21:40:45 +0100272 if re.match(self._deprecated_definition_re, expansion):
273 # Skip deprecated values, which are assumed to be
274 # backward compatibility aliases that share
275 # numerical values with non-deprecated values.
276 return
Gilles Peskinef8deb752021-01-25 22:41:45 +0100277 if self.is_internal_name(name):
Gilles Peskinee7c44552021-01-25 21:40:45 +0100278 # Macro only to build actual values
279 return
280 elif (name.startswith('PSA_ERROR_') or name == 'PSA_SUCCESS') \
281 and not parameter:
282 self.statuses.add(name)
283 elif name.startswith('PSA_KEY_TYPE_') and not parameter:
284 self.key_types.add(name)
285 elif name.startswith('PSA_KEY_TYPE_') and parameter == 'curve':
286 self.key_types_from_curve[name] = name[:13] + 'IS_' + name[13:]
287 elif name.startswith('PSA_KEY_TYPE_') and parameter == 'group':
288 self.key_types_from_group[name] = name[:13] + 'IS_' + name[13:]
289 elif name.startswith('PSA_ECC_FAMILY_') and not parameter:
290 self.ecc_curves.add(name)
291 elif name.startswith('PSA_DH_FAMILY_') and not parameter:
292 self.dh_groups.add(name)
293 elif name.startswith('PSA_ALG_') and not parameter:
294 if name in ['PSA_ALG_ECDSA_BASE',
295 'PSA_ALG_RSA_PKCS1V15_SIGN_BASE']:
296 # Ad hoc skipping of duplicate names for some numerical values
297 return
298 self.algorithms.add(name)
Gilles Peskine33c601c2021-03-10 01:25:50 +0100299 self.record_algorithm_subtype(name, expansion)
Gilles Peskinee7c44552021-01-25 21:40:45 +0100300 elif name.startswith('PSA_ALG_') and parameter == 'hash_alg':
301 if name in ['PSA_ALG_DSA', 'PSA_ALG_ECDSA']:
302 # A naming irregularity
303 tester = name[:8] + 'IS_RANDOMIZED_' + name[8:]
304 else:
305 tester = name[:8] + 'IS_' + name[8:]
306 self.algorithms_from_hash[name] = tester
307 elif name.startswith('PSA_KEY_USAGE_') and not parameter:
Gilles Peskine33c601c2021-03-10 01:25:50 +0100308 self.key_usage_flags.add(name)
Gilles Peskinee7c44552021-01-25 21:40:45 +0100309 else:
310 # Other macro without parameter
311 return
312
313 _nonascii_re = re.compile(rb'[^\x00-\x7f]+')
314 _continued_line_re = re.compile(rb'\\\r?\n\Z')
315 def read_file(self, header_file):
316 for line in header_file:
317 m = re.search(self._continued_line_re, line)
318 while m:
319 cont = next(header_file)
320 line = line[:m.start(0)] + cont
321 m = re.search(self._continued_line_re, line)
322 line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
323 self.read_line(line)
Gilles Peskineb4edff92021-03-30 19:09:05 +0200324
325
Gilles Peskine537d5fa2021-04-19 13:50:25 +0200326class InputsForTest(PSAMacroEnumerator):
Gilles Peskineb4edff92021-03-30 19:09:05 +0200327 # pylint: disable=too-many-instance-attributes
328 """Accumulate information about macros to test.
329enumerate
330 This includes macro names as well as information about their arguments
331 when applicable.
332 """
333
334 def __init__(self) -> None:
335 super().__init__()
336 self.all_declared = set() #type: Set[str]
Gilles Peskineb4edff92021-03-30 19:09:05 +0200337 # Identifier prefixes
338 self.table_by_prefix = {
339 'ERROR': self.statuses,
340 'ALG': self.algorithms,
341 'ECC_CURVE': self.ecc_curves,
342 'DH_GROUP': self.dh_groups,
343 'KEY_TYPE': self.key_types,
344 'KEY_USAGE': self.key_usage_flags,
345 } #type: Dict[str, Set[str]]
346 # Test functions
347 self.table_by_test_function = {
348 # Any function ending in _algorithm also gets added to
349 # self.algorithms.
350 'key_type': [self.key_types],
351 'block_cipher_key_type': [self.key_types],
352 'stream_cipher_key_type': [self.key_types],
353 'ecc_key_family': [self.ecc_curves],
354 'ecc_key_types': [self.ecc_curves],
355 'dh_key_family': [self.dh_groups],
356 'dh_key_types': [self.dh_groups],
357 'hash_algorithm': [self.hash_algorithms],
358 'mac_algorithm': [self.mac_algorithms],
359 'cipher_algorithm': [],
360 'hmac_algorithm': [self.mac_algorithms],
361 'aead_algorithm': [self.aead_algorithms],
362 'key_derivation_algorithm': [self.kdf_algorithms],
363 'key_agreement_algorithm': [self.ka_algorithms],
364 'asymmetric_signature_algorithm': [],
365 'asymmetric_signature_wildcard': [self.algorithms],
366 'asymmetric_encryption_algorithm': [],
367 'other_algorithm': [],
368 } #type: Dict[str, List[Set[str]]]
369 self.arguments_for['mac_length'] += ['1', '63']
370 self.arguments_for['min_mac_length'] += ['1', '63']
371 self.arguments_for['tag_length'] += ['1', '63']
372 self.arguments_for['min_tag_length'] += ['1', '63']
373
Gilles Peskine3d404b82021-03-30 21:46:35 +0200374 def add_numerical_values(self) -> None:
375 """Add numerical values that are not supported to the known identifiers."""
376 # Sets of names per type
377 self.algorithms.add('0xffffffff')
378 self.ecc_curves.add('0xff')
379 self.dh_groups.add('0xff')
380 self.key_types.add('0xffff')
381 self.key_usage_flags.add('0x80000000')
382
383 # Hard-coded values for unknown algorithms
384 #
385 # These have to have values that are correct for their respective
386 # PSA_ALG_IS_xxx macros, but are also not currently assigned and are
387 # not likely to be assigned in the near future.
388 self.hash_algorithms.add('0x020000fe') # 0x020000ff is PSA_ALG_ANY_HASH
389 self.mac_algorithms.add('0x03007fff')
390 self.ka_algorithms.add('0x09fc0000')
391 self.kdf_algorithms.add('0x080000ff')
392 # For AEAD algorithms, the only variability is over the tag length,
393 # and this only applies to known algorithms, so don't test an
394 # unknown algorithm.
395
Gilles Peskineb4edff92021-03-30 19:09:05 +0200396 def get_names(self, type_word: str) -> Set[str]:
397 """Return the set of known names of values of the given type."""
398 return {
399 'status': self.statuses,
400 'algorithm': self.algorithms,
401 'ecc_curve': self.ecc_curves,
402 'dh_group': self.dh_groups,
403 'key_type': self.key_types,
404 'key_usage': self.key_usage_flags,
405 }[type_word]
406
407 # Regex for interesting header lines.
408 # Groups: 1=macro name, 2=type, 3=argument list (optional).
409 _header_line_re = \
410 re.compile(r'#define +' +
411 r'(PSA_((?:(?:DH|ECC|KEY)_)?[A-Z]+)_\w+)' +
412 r'(?:\(([^\n()]*)\))?')
413 # Regex of macro names to exclude.
414 _excluded_name_re = re.compile(r'_(?:GET|IS|OF)_|_(?:BASE|FLAG|MASK)\Z')
415 # Additional excluded macros.
416 _excluded_names = set([
417 # Macros that provide an alternative way to build the same
418 # algorithm as another macro.
419 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG',
420 'PSA_ALG_FULL_LENGTH_MAC',
421 # Auxiliary macro whose name doesn't fit the usual patterns for
422 # auxiliary macros.
423 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG_CASE',
424 ])
425 def parse_header_line(self, line: str) -> None:
426 """Parse a C header line, looking for "#define PSA_xxx"."""
427 m = re.match(self._header_line_re, line)
428 if not m:
429 return
430 name = m.group(1)
431 self.all_declared.add(name)
432 if re.search(self._excluded_name_re, name) or \
Gilles Peskine537d5fa2021-04-19 13:50:25 +0200433 name in self._excluded_names or \
434 self.is_internal_name(name):
Gilles Peskineb4edff92021-03-30 19:09:05 +0200435 return
436 dest = self.table_by_prefix.get(m.group(2))
437 if dest is None:
438 return
439 dest.add(name)
440 if m.group(3):
441 self.argspecs[name] = self._argument_split(m.group(3))
442
443 _nonascii_re = re.compile(rb'[^\x00-\x7f]+') #type: Pattern
444 def parse_header(self, filename: str) -> None:
445 """Parse a C header file, looking for "#define PSA_xxx"."""
446 with read_file_lines(filename, binary=True) as lines:
447 for line in lines:
448 line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
449 self.parse_header_line(line)
450
451 _macro_identifier_re = re.compile(r'[A-Z]\w+')
452 def generate_undeclared_names(self, expr: str) -> Iterable[str]:
453 for name in re.findall(self._macro_identifier_re, expr):
454 if name not in self.all_declared:
455 yield name
456
457 def accept_test_case_line(self, function: str, argument: str) -> bool:
458 #pylint: disable=unused-argument
459 undeclared = list(self.generate_undeclared_names(argument))
460 if undeclared:
461 raise Exception('Undeclared names in test case', undeclared)
462 return True
463
Gilles Peskinecccd1ac2021-04-21 15:36:58 +0200464 @staticmethod
465 def normalize_argument(argument: str) -> str:
466 """Normalize whitespace in the given C expression.
467
468 The result uses the same whitespace as
469 ` PSAMacroEnumerator.distribute_arguments`.
470 """
471 return re.sub(r',', r', ', re.sub(r' +', r'', argument))
472
Gilles Peskineb4edff92021-03-30 19:09:05 +0200473 def add_test_case_line(self, function: str, argument: str) -> None:
474 """Parse a test case data line, looking for algorithm metadata tests."""
475 sets = []
476 if function.endswith('_algorithm'):
477 sets.append(self.algorithms)
478 if function == 'key_agreement_algorithm' and \
479 argument.startswith('PSA_ALG_KEY_AGREEMENT('):
480 # We only want *raw* key agreement algorithms as such, so
481 # exclude ones that are already chained with a KDF.
482 # Keep the expression as one to test as an algorithm.
483 function = 'other_algorithm'
484 sets += self.table_by_test_function[function]
485 if self.accept_test_case_line(function, argument):
486 for s in sets:
Gilles Peskinecccd1ac2021-04-21 15:36:58 +0200487 s.add(self.normalize_argument(argument))
Gilles Peskineb4edff92021-03-30 19:09:05 +0200488
489 # Regex matching a *.data line containing a test function call and
490 # its arguments. The actual definition is partly positional, but this
491 # regex is good enough in practice.
492 _test_case_line_re = re.compile(r'(?!depends_on:)(\w+):([^\n :][^:\n]*)')
493 def parse_test_cases(self, filename: str) -> None:
494 """Parse a test case file (*.data), looking for algorithm metadata tests."""
495 with read_file_lines(filename) as lines:
496 for line in lines:
497 m = re.match(self._test_case_line_re, line)
498 if m:
499 self.add_test_case_line(m.group(1), m.group(2))