blob: bc432be9f5d5d3a718623dc9cb9d93cc81b7a1ff [file] [log] [blame]
Gilles Peskinee7c44552021-01-25 21:40:45 +01001"""Collect macro definitions from header files.
2"""
3
4# Copyright The Mbed TLS Contributors
5# SPDX-License-Identifier: Apache-2.0
6#
7# Licensed under the Apache License, Version 2.0 (the "License"); you may
8# not use this file except in compliance with the License.
9# You may obtain a copy of the License at
10#
11# http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
18
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010019import itertools
Gilles Peskinee7c44552021-01-25 21:40:45 +010020import re
Gilles Peskineb4edff92021-03-30 19:09:05 +020021from typing import Dict, Iterable, Iterator, List, Optional, Pattern, Set, Tuple, Union
22
23
24class ReadFileLineException(Exception):
25 def __init__(self, filename: str, line_number: Union[int, str]) -> None:
26 message = 'in {} at {}'.format(filename, line_number)
27 super(ReadFileLineException, self).__init__(message)
28 self.filename = filename
29 self.line_number = line_number
30
31
32class read_file_lines:
33 # Dear Pylint, conventionally, a context manager class name is lowercase.
34 # pylint: disable=invalid-name,too-few-public-methods
35 """Context manager to read a text file line by line.
36
37 ```
38 with read_file_lines(filename) as lines:
39 for line in lines:
40 process(line)
41 ```
42 is equivalent to
43 ```
44 with open(filename, 'r') as input_file:
45 for line in input_file:
46 process(line)
47 ```
48 except that if process(line) raises an exception, then the read_file_lines
49 snippet annotates the exception with the file name and line number.
50 """
51 def __init__(self, filename: str, binary: bool = False) -> None:
52 self.filename = filename
53 self.line_number = 'entry' #type: Union[int, str]
54 self.generator = None #type: Optional[Iterable[Tuple[int, str]]]
55 self.binary = binary
56 def __enter__(self) -> 'read_file_lines':
57 self.generator = enumerate(open(self.filename,
58 'rb' if self.binary else 'r'))
59 return self
60 def __iter__(self) -> Iterator[str]:
61 assert self.generator is not None
62 for line_number, content in self.generator:
63 self.line_number = line_number
64 yield content
65 self.line_number = 'exit'
66 def __exit__(self, exc_type, exc_value, exc_traceback) -> None:
67 if exc_type is not None:
68 raise ReadFileLineException(self.filename, self.line_number) \
69 from exc_value
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010070
71
72class PSAMacroEnumerator:
73 """Information about constructors of various PSA Crypto types.
74
75 This includes macro names as well as information about their arguments
76 when applicable.
77
78 This class only provides ways to enumerate expressions that evaluate to
79 values of the covered types. Derived classes are expected to populate
80 the set of known constructors of each kind, as well as populate
81 `self.arguments_for` for arguments that are not of a kind that is
82 enumerated here.
83 """
84
85 def __init__(self) -> None:
86 """Set up an empty set of known constructor macros.
87 """
88 self.statuses = set() #type: Set[str]
89 self.algorithms = set() #type: Set[str]
90 self.ecc_curves = set() #type: Set[str]
91 self.dh_groups = set() #type: Set[str]
92 self.key_types = set() #type: Set[str]
93 self.key_usage_flags = set() #type: Set[str]
94 self.hash_algorithms = set() #type: Set[str]
95 self.mac_algorithms = set() #type: Set[str]
96 self.ka_algorithms = set() #type: Set[str]
97 self.kdf_algorithms = set() #type: Set[str]
Janos Follath8603fb02021-04-19 15:12:46 +010098 self.pake_algorithms = set() #type: Set[str]
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010099 self.aead_algorithms = set() #type: Set[str]
100 # macro name -> list of argument names
101 self.argspecs = {} #type: Dict[str, List[str]]
102 # argument name -> list of values
103 self.arguments_for = {
104 'mac_length': [],
105 'min_mac_length': [],
106 'tag_length': [],
107 'min_tag_length': [],
108 } #type: Dict[str, List[str]]
Gilles Peskine2157e862021-05-20 21:37:06 +0200109 # Whether to include intermediate macros in enumerations. Intermediate
110 # macros serve as category headers and are not valid values of their
111 # type. See `is_internal_name`.
112 # Always false in this class, may be set to true in derived classes.
Gilles Peskine537d5fa2021-04-19 13:50:25 +0200113 self.include_intermediate = False
114
115 def is_internal_name(self, name: str) -> bool:
116 """Whether this is an internal macro. Internal macros will be skipped."""
117 if not self.include_intermediate:
118 if name.endswith('_BASE') or name.endswith('_NONE'):
119 return True
120 if '_CATEGORY_' in name:
121 return True
122 return name.endswith('_FLAG') or name.endswith('_MASK')
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100123
124 def gather_arguments(self) -> None:
125 """Populate the list of values for macro arguments.
126
127 Call this after parsing all the inputs.
128 """
129 self.arguments_for['hash_alg'] = sorted(self.hash_algorithms)
130 self.arguments_for['mac_alg'] = sorted(self.mac_algorithms)
131 self.arguments_for['ka_alg'] = sorted(self.ka_algorithms)
132 self.arguments_for['kdf_alg'] = sorted(self.kdf_algorithms)
133 self.arguments_for['aead_alg'] = sorted(self.aead_algorithms)
134 self.arguments_for['curve'] = sorted(self.ecc_curves)
135 self.arguments_for['group'] = sorted(self.dh_groups)
136
137 @staticmethod
138 def _format_arguments(name: str, arguments: Iterable[str]) -> str:
Gilles Peskinecccd1ac2021-04-21 15:36:58 +0200139 """Format a macro call with arguments.
140
141 The resulting format is consistent with
142 `InputsForTest.normalize_argument`.
143 """
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100144 return name + '(' + ', '.join(arguments) + ')'
145
146 _argument_split_re = re.compile(r' *, *')
147 @classmethod
148 def _argument_split(cls, arguments: str) -> List[str]:
149 return re.split(cls._argument_split_re, arguments)
150
151 def distribute_arguments(self, name: str) -> Iterator[str]:
152 """Generate macro calls with each tested argument set.
153
154 If name is a macro without arguments, just yield "name".
155 If name is a macro with arguments, yield a series of
156 "name(arg1,...,argN)" where each argument takes each possible
157 value at least once.
158 """
159 try:
160 if name not in self.argspecs:
161 yield name
162 return
163 argspec = self.argspecs[name]
164 if argspec == []:
165 yield name + '()'
166 return
167 argument_lists = [self.arguments_for[arg] for arg in argspec]
168 arguments = [values[0] for values in argument_lists]
169 yield self._format_arguments(name, arguments)
170 # Dear Pylint, enumerate won't work here since we're modifying
171 # the array.
172 # pylint: disable=consider-using-enumerate
173 for i in range(len(arguments)):
174 for value in argument_lists[i][1:]:
175 arguments[i] = value
176 yield self._format_arguments(name, arguments)
177 arguments[i] = argument_lists[0][0]
178 except BaseException as e:
179 raise Exception('distribute_arguments({})'.format(name)) from e
180
Gilles Peskine38ebfec2021-04-21 15:37:34 +0200181 def distribute_arguments_without_duplicates(
182 self, seen: Set[str], name: str
183 ) -> Iterator[str]:
184 """Same as `distribute_arguments`, but don't repeat seen results."""
185 for result in self.distribute_arguments(name):
186 if result not in seen:
187 seen.add(result)
188 yield result
189
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100190 def generate_expressions(self, names: Iterable[str]) -> Iterator[str]:
191 """Generate expressions covering values constructed from the given names.
192
193 `names` can be any iterable collection of macro names.
194
195 For example:
196 * ``generate_expressions(['PSA_ALG_CMAC', 'PSA_ALG_HMAC'])``
197 generates ``'PSA_ALG_CMAC'`` as well as ``'PSA_ALG_HMAC(h)'`` for
198 every known hash algorithm ``h``.
199 * ``macros.generate_expressions(macros.key_types)`` generates all
200 key types.
201 """
Gilles Peskine38ebfec2021-04-21 15:37:34 +0200202 seen = set() #type: Set[str]
203 return itertools.chain(*(
204 self.distribute_arguments_without_duplicates(seen, name)
205 for name in names
206 ))
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100207
Gilles Peskinee7c44552021-01-25 21:40:45 +0100208
Gilles Peskine33c601c2021-03-10 01:25:50 +0100209class PSAMacroCollector(PSAMacroEnumerator):
Gilles Peskinee7c44552021-01-25 21:40:45 +0100210 """Collect PSA crypto macro definitions from C header files.
211 """
212
Gilles Peskine10ab2672021-03-10 00:59:53 +0100213 def __init__(self, include_intermediate: bool = False) -> None:
Gilles Peskine13d60eb2021-01-25 22:42:14 +0100214 """Set up an object to collect PSA macro definitions.
215
216 Call the read_file method of the constructed object on each header file.
217
218 * include_intermediate: if true, include intermediate macros such as
219 PSA_XXX_BASE that do not designate semantic values.
220 """
Gilles Peskine33c601c2021-03-10 01:25:50 +0100221 super().__init__()
Gilles Peskine13d60eb2021-01-25 22:42:14 +0100222 self.include_intermediate = include_intermediate
Gilles Peskine10ab2672021-03-10 00:59:53 +0100223 self.key_types_from_curve = {} #type: Dict[str, str]
224 self.key_types_from_group = {} #type: Dict[str, str]
Gilles Peskine10ab2672021-03-10 00:59:53 +0100225 self.algorithms_from_hash = {} #type: Dict[str, str]
Gilles Peskinee7c44552021-01-25 21:40:45 +0100226
Gilles Peskine33c601c2021-03-10 01:25:50 +0100227 def record_algorithm_subtype(self, name: str, expansion: str) -> None:
228 """Record the subtype of an algorithm constructor.
229
230 Given a ``PSA_ALG_xxx`` macro name and its expansion, if the algorithm
231 is of a subtype that is tracked in its own set, add it to the relevant
232 set.
233 """
234 # This code is very ad hoc and fragile. It should be replaced by
235 # something more robust.
236 if re.match(r'MAC(?:_|\Z)', name):
237 self.mac_algorithms.add(name)
238 elif re.match(r'KDF(?:_|\Z)', name):
239 self.kdf_algorithms.add(name)
240 elif re.search(r'0x020000[0-9A-Fa-f]{2}', expansion):
241 self.hash_algorithms.add(name)
242 elif re.search(r'0x03[0-9A-Fa-f]{6}', expansion):
243 self.mac_algorithms.add(name)
244 elif re.search(r'0x05[0-9A-Fa-f]{6}', expansion):
245 self.aead_algorithms.add(name)
246 elif re.search(r'0x09[0-9A-Fa-f]{2}0000', expansion):
247 self.ka_algorithms.add(name)
248 elif re.search(r'0x08[0-9A-Fa-f]{6}', expansion):
249 self.kdf_algorithms.add(name)
250
Gilles Peskinee7c44552021-01-25 21:40:45 +0100251 # "#define" followed by a macro name with either no parameters
252 # or a single parameter and a non-empty expansion.
253 # Grab the macro name in group 1, the parameter name if any in group 2
254 # and the expansion in group 3.
255 _define_directive_re = re.compile(r'\s*#\s*define\s+(\w+)' +
256 r'(?:\s+|\((\w+)\)\s*)' +
257 r'(.+)')
258 _deprecated_definition_re = re.compile(r'\s*MBEDTLS_DEPRECATED')
259
260 def read_line(self, line):
261 """Parse a C header line and record the PSA identifier it defines if any.
262 This function analyzes lines that start with "#define PSA_"
263 (up to non-significant whitespace) and skips all non-matching lines.
264 """
265 # pylint: disable=too-many-branches
266 m = re.match(self._define_directive_re, line)
267 if not m:
268 return
269 name, parameter, expansion = m.groups()
270 expansion = re.sub(r'/\*.*?\*/|//.*', r' ', expansion)
Gilles Peskine33c601c2021-03-10 01:25:50 +0100271 if parameter:
272 self.argspecs[name] = [parameter]
Gilles Peskinee7c44552021-01-25 21:40:45 +0100273 if re.match(self._deprecated_definition_re, expansion):
274 # Skip deprecated values, which are assumed to be
275 # backward compatibility aliases that share
276 # numerical values with non-deprecated values.
277 return
Gilles Peskinef8deb752021-01-25 22:41:45 +0100278 if self.is_internal_name(name):
Gilles Peskinee7c44552021-01-25 21:40:45 +0100279 # Macro only to build actual values
280 return
281 elif (name.startswith('PSA_ERROR_') or name == 'PSA_SUCCESS') \
282 and not parameter:
283 self.statuses.add(name)
284 elif name.startswith('PSA_KEY_TYPE_') and not parameter:
285 self.key_types.add(name)
286 elif name.startswith('PSA_KEY_TYPE_') and parameter == 'curve':
287 self.key_types_from_curve[name] = name[:13] + 'IS_' + name[13:]
288 elif name.startswith('PSA_KEY_TYPE_') and parameter == 'group':
289 self.key_types_from_group[name] = name[:13] + 'IS_' + name[13:]
290 elif name.startswith('PSA_ECC_FAMILY_') and not parameter:
291 self.ecc_curves.add(name)
292 elif name.startswith('PSA_DH_FAMILY_') and not parameter:
293 self.dh_groups.add(name)
294 elif name.startswith('PSA_ALG_') and not parameter:
295 if name in ['PSA_ALG_ECDSA_BASE',
296 'PSA_ALG_RSA_PKCS1V15_SIGN_BASE']:
297 # Ad hoc skipping of duplicate names for some numerical values
298 return
299 self.algorithms.add(name)
Gilles Peskine33c601c2021-03-10 01:25:50 +0100300 self.record_algorithm_subtype(name, expansion)
Gilles Peskinee7c44552021-01-25 21:40:45 +0100301 elif name.startswith('PSA_ALG_') and parameter == 'hash_alg':
302 if name in ['PSA_ALG_DSA', 'PSA_ALG_ECDSA']:
303 # A naming irregularity
304 tester = name[:8] + 'IS_RANDOMIZED_' + name[8:]
305 else:
306 tester = name[:8] + 'IS_' + name[8:]
307 self.algorithms_from_hash[name] = tester
308 elif name.startswith('PSA_KEY_USAGE_') and not parameter:
Gilles Peskine33c601c2021-03-10 01:25:50 +0100309 self.key_usage_flags.add(name)
Gilles Peskinee7c44552021-01-25 21:40:45 +0100310 else:
311 # Other macro without parameter
312 return
313
314 _nonascii_re = re.compile(rb'[^\x00-\x7f]+')
315 _continued_line_re = re.compile(rb'\\\r?\n\Z')
316 def read_file(self, header_file):
317 for line in header_file:
318 m = re.search(self._continued_line_re, line)
319 while m:
320 cont = next(header_file)
321 line = line[:m.start(0)] + cont
322 m = re.search(self._continued_line_re, line)
323 line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
324 self.read_line(line)
Gilles Peskineb4edff92021-03-30 19:09:05 +0200325
326
Gilles Peskine537d5fa2021-04-19 13:50:25 +0200327class InputsForTest(PSAMacroEnumerator):
Gilles Peskineb4edff92021-03-30 19:09:05 +0200328 # pylint: disable=too-many-instance-attributes
329 """Accumulate information about macros to test.
330enumerate
331 This includes macro names as well as information about their arguments
332 when applicable.
333 """
334
335 def __init__(self) -> None:
336 super().__init__()
337 self.all_declared = set() #type: Set[str]
Gilles Peskineb4edff92021-03-30 19:09:05 +0200338 # Identifier prefixes
339 self.table_by_prefix = {
340 'ERROR': self.statuses,
341 'ALG': self.algorithms,
342 'ECC_CURVE': self.ecc_curves,
343 'DH_GROUP': self.dh_groups,
344 'KEY_TYPE': self.key_types,
345 'KEY_USAGE': self.key_usage_flags,
346 } #type: Dict[str, Set[str]]
347 # Test functions
348 self.table_by_test_function = {
349 # Any function ending in _algorithm also gets added to
350 # self.algorithms.
351 'key_type': [self.key_types],
352 'block_cipher_key_type': [self.key_types],
353 'stream_cipher_key_type': [self.key_types],
354 'ecc_key_family': [self.ecc_curves],
355 'ecc_key_types': [self.ecc_curves],
356 'dh_key_family': [self.dh_groups],
357 'dh_key_types': [self.dh_groups],
358 'hash_algorithm': [self.hash_algorithms],
359 'mac_algorithm': [self.mac_algorithms],
360 'cipher_algorithm': [],
361 'hmac_algorithm': [self.mac_algorithms],
362 'aead_algorithm': [self.aead_algorithms],
363 'key_derivation_algorithm': [self.kdf_algorithms],
364 'key_agreement_algorithm': [self.ka_algorithms],
365 'asymmetric_signature_algorithm': [],
366 'asymmetric_signature_wildcard': [self.algorithms],
367 'asymmetric_encryption_algorithm': [],
Janos Follath8603fb02021-04-19 15:12:46 +0100368 'pake_algorithm': [self.pake_algorithms],
Gilles Peskineb4edff92021-03-30 19:09:05 +0200369 'other_algorithm': [],
Gilles Peskine607eb7e2021-04-21 20:03:53 +0200370 'lifetime': [],
Gilles Peskineb4edff92021-03-30 19:09:05 +0200371 } #type: Dict[str, List[Set[str]]]
372 self.arguments_for['mac_length'] += ['1', '63']
373 self.arguments_for['min_mac_length'] += ['1', '63']
374 self.arguments_for['tag_length'] += ['1', '63']
375 self.arguments_for['min_tag_length'] += ['1', '63']
376
Gilles Peskine3d404b82021-03-30 21:46:35 +0200377 def add_numerical_values(self) -> None:
378 """Add numerical values that are not supported to the known identifiers."""
379 # Sets of names per type
380 self.algorithms.add('0xffffffff')
381 self.ecc_curves.add('0xff')
382 self.dh_groups.add('0xff')
383 self.key_types.add('0xffff')
384 self.key_usage_flags.add('0x80000000')
385
386 # Hard-coded values for unknown algorithms
387 #
388 # These have to have values that are correct for their respective
389 # PSA_ALG_IS_xxx macros, but are also not currently assigned and are
390 # not likely to be assigned in the near future.
391 self.hash_algorithms.add('0x020000fe') # 0x020000ff is PSA_ALG_ANY_HASH
392 self.mac_algorithms.add('0x03007fff')
393 self.ka_algorithms.add('0x09fc0000')
394 self.kdf_algorithms.add('0x080000ff')
Janos Follath8603fb02021-04-19 15:12:46 +0100395 self.pake_algorithms.add('0x0a0000ff')
Gilles Peskine3d404b82021-03-30 21:46:35 +0200396 # For AEAD algorithms, the only variability is over the tag length,
397 # and this only applies to known algorithms, so don't test an
398 # unknown algorithm.
399
Gilles Peskineb4edff92021-03-30 19:09:05 +0200400 def get_names(self, type_word: str) -> Set[str]:
401 """Return the set of known names of values of the given type."""
402 return {
403 'status': self.statuses,
404 'algorithm': self.algorithms,
405 'ecc_curve': self.ecc_curves,
406 'dh_group': self.dh_groups,
407 'key_type': self.key_types,
408 'key_usage': self.key_usage_flags,
409 }[type_word]
410
411 # Regex for interesting header lines.
412 # Groups: 1=macro name, 2=type, 3=argument list (optional).
413 _header_line_re = \
414 re.compile(r'#define +' +
415 r'(PSA_((?:(?:DH|ECC|KEY)_)?[A-Z]+)_\w+)' +
416 r'(?:\(([^\n()]*)\))?')
417 # Regex of macro names to exclude.
418 _excluded_name_re = re.compile(r'_(?:GET|IS|OF)_|_(?:BASE|FLAG|MASK)\Z')
419 # Additional excluded macros.
420 _excluded_names = set([
421 # Macros that provide an alternative way to build the same
422 # algorithm as another macro.
423 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG',
424 'PSA_ALG_FULL_LENGTH_MAC',
425 # Auxiliary macro whose name doesn't fit the usual patterns for
426 # auxiliary macros.
427 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG_CASE',
428 ])
429 def parse_header_line(self, line: str) -> None:
430 """Parse a C header line, looking for "#define PSA_xxx"."""
431 m = re.match(self._header_line_re, line)
432 if not m:
433 return
434 name = m.group(1)
435 self.all_declared.add(name)
436 if re.search(self._excluded_name_re, name) or \
Gilles Peskine537d5fa2021-04-19 13:50:25 +0200437 name in self._excluded_names or \
438 self.is_internal_name(name):
Gilles Peskineb4edff92021-03-30 19:09:05 +0200439 return
440 dest = self.table_by_prefix.get(m.group(2))
441 if dest is None:
442 return
443 dest.add(name)
444 if m.group(3):
445 self.argspecs[name] = self._argument_split(m.group(3))
446
447 _nonascii_re = re.compile(rb'[^\x00-\x7f]+') #type: Pattern
448 def parse_header(self, filename: str) -> None:
449 """Parse a C header file, looking for "#define PSA_xxx"."""
450 with read_file_lines(filename, binary=True) as lines:
451 for line in lines:
452 line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
453 self.parse_header_line(line)
454
455 _macro_identifier_re = re.compile(r'[A-Z]\w+')
456 def generate_undeclared_names(self, expr: str) -> Iterable[str]:
457 for name in re.findall(self._macro_identifier_re, expr):
458 if name not in self.all_declared:
459 yield name
460
461 def accept_test_case_line(self, function: str, argument: str) -> bool:
462 #pylint: disable=unused-argument
463 undeclared = list(self.generate_undeclared_names(argument))
464 if undeclared:
465 raise Exception('Undeclared names in test case', undeclared)
466 return True
467
Gilles Peskinecccd1ac2021-04-21 15:36:58 +0200468 @staticmethod
469 def normalize_argument(argument: str) -> str:
470 """Normalize whitespace in the given C expression.
471
472 The result uses the same whitespace as
473 ` PSAMacroEnumerator.distribute_arguments`.
474 """
475 return re.sub(r',', r', ', re.sub(r' +', r'', argument))
476
Gilles Peskineb4edff92021-03-30 19:09:05 +0200477 def add_test_case_line(self, function: str, argument: str) -> None:
478 """Parse a test case data line, looking for algorithm metadata tests."""
479 sets = []
480 if function.endswith('_algorithm'):
481 sets.append(self.algorithms)
482 if function == 'key_agreement_algorithm' and \
483 argument.startswith('PSA_ALG_KEY_AGREEMENT('):
484 # We only want *raw* key agreement algorithms as such, so
485 # exclude ones that are already chained with a KDF.
486 # Keep the expression as one to test as an algorithm.
487 function = 'other_algorithm'
488 sets += self.table_by_test_function[function]
489 if self.accept_test_case_line(function, argument):
490 for s in sets:
Gilles Peskinecccd1ac2021-04-21 15:36:58 +0200491 s.add(self.normalize_argument(argument))
Gilles Peskineb4edff92021-03-30 19:09:05 +0200492
493 # Regex matching a *.data line containing a test function call and
494 # its arguments. The actual definition is partly positional, but this
495 # regex is good enough in practice.
496 _test_case_line_re = re.compile(r'(?!depends_on:)(\w+):([^\n :][^:\n]*)')
497 def parse_test_cases(self, filename: str) -> None:
498 """Parse a test case file (*.data), looking for algorithm metadata tests."""
499 with read_file_lines(filename) as lines:
500 for line in lines:
501 m = re.match(self._test_case_line_re, line)
502 if m:
503 self.add_test_case_line(m.group(1), m.group(2))