blob: ec8e18435ad3dbc7b5e18775e3453777f5e6a9e6 [file] [log] [blame]
Gilles Peskinee7c44552021-01-25 21:40:45 +01001"""Collect macro definitions from header files.
2"""
3
4# Copyright The Mbed TLS Contributors
5# SPDX-License-Identifier: Apache-2.0
6#
7# Licensed under the Apache License, Version 2.0 (the "License"); you may
8# not use this file except in compliance with the License.
9# You may obtain a copy of the License at
10#
11# http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
18
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010019import itertools
Gilles Peskinee7c44552021-01-25 21:40:45 +010020import re
Gilles Peskineb4edff92021-03-30 19:09:05 +020021from typing import Dict, Iterable, Iterator, List, Optional, Pattern, Set, Tuple, Union
22
23
24class ReadFileLineException(Exception):
25 def __init__(self, filename: str, line_number: Union[int, str]) -> None:
26 message = 'in {} at {}'.format(filename, line_number)
27 super(ReadFileLineException, self).__init__(message)
28 self.filename = filename
29 self.line_number = line_number
30
31
32class read_file_lines:
33 # Dear Pylint, conventionally, a context manager class name is lowercase.
34 # pylint: disable=invalid-name,too-few-public-methods
35 """Context manager to read a text file line by line.
36
37 ```
38 with read_file_lines(filename) as lines:
39 for line in lines:
40 process(line)
41 ```
42 is equivalent to
43 ```
44 with open(filename, 'r') as input_file:
45 for line in input_file:
46 process(line)
47 ```
48 except that if process(line) raises an exception, then the read_file_lines
49 snippet annotates the exception with the file name and line number.
50 """
51 def __init__(self, filename: str, binary: bool = False) -> None:
52 self.filename = filename
53 self.line_number = 'entry' #type: Union[int, str]
54 self.generator = None #type: Optional[Iterable[Tuple[int, str]]]
55 self.binary = binary
56 def __enter__(self) -> 'read_file_lines':
57 self.generator = enumerate(open(self.filename,
58 'rb' if self.binary else 'r'))
59 return self
60 def __iter__(self) -> Iterator[str]:
61 assert self.generator is not None
62 for line_number, content in self.generator:
63 self.line_number = line_number
64 yield content
65 self.line_number = 'exit'
66 def __exit__(self, exc_type, exc_value, exc_traceback) -> None:
67 if exc_type is not None:
68 raise ReadFileLineException(self.filename, self.line_number) \
69 from exc_value
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010070
71
72class PSAMacroEnumerator:
73 """Information about constructors of various PSA Crypto types.
74
75 This includes macro names as well as information about their arguments
76 when applicable.
77
78 This class only provides ways to enumerate expressions that evaluate to
79 values of the covered types. Derived classes are expected to populate
80 the set of known constructors of each kind, as well as populate
81 `self.arguments_for` for arguments that are not of a kind that is
82 enumerated here.
83 """
84
85 def __init__(self) -> None:
86 """Set up an empty set of known constructor macros.
87 """
88 self.statuses = set() #type: Set[str]
89 self.algorithms = set() #type: Set[str]
90 self.ecc_curves = set() #type: Set[str]
91 self.dh_groups = set() #type: Set[str]
92 self.key_types = set() #type: Set[str]
93 self.key_usage_flags = set() #type: Set[str]
94 self.hash_algorithms = set() #type: Set[str]
95 self.mac_algorithms = set() #type: Set[str]
96 self.ka_algorithms = set() #type: Set[str]
97 self.kdf_algorithms = set() #type: Set[str]
98 self.aead_algorithms = set() #type: Set[str]
99 # macro name -> list of argument names
100 self.argspecs = {} #type: Dict[str, List[str]]
101 # argument name -> list of values
102 self.arguments_for = {
103 'mac_length': [],
104 'min_mac_length': [],
105 'tag_length': [],
106 'min_tag_length': [],
107 } #type: Dict[str, List[str]]
Gilles Peskine537d5fa2021-04-19 13:50:25 +0200108 self.include_intermediate = False
109
110 def is_internal_name(self, name: str) -> bool:
111 """Whether this is an internal macro. Internal macros will be skipped."""
112 if not self.include_intermediate:
113 if name.endswith('_BASE') or name.endswith('_NONE'):
114 return True
115 if '_CATEGORY_' in name:
116 return True
117 return name.endswith('_FLAG') or name.endswith('_MASK')
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100118
119 def gather_arguments(self) -> None:
120 """Populate the list of values for macro arguments.
121
122 Call this after parsing all the inputs.
123 """
124 self.arguments_for['hash_alg'] = sorted(self.hash_algorithms)
125 self.arguments_for['mac_alg'] = sorted(self.mac_algorithms)
126 self.arguments_for['ka_alg'] = sorted(self.ka_algorithms)
127 self.arguments_for['kdf_alg'] = sorted(self.kdf_algorithms)
128 self.arguments_for['aead_alg'] = sorted(self.aead_algorithms)
129 self.arguments_for['curve'] = sorted(self.ecc_curves)
130 self.arguments_for['group'] = sorted(self.dh_groups)
131
132 @staticmethod
133 def _format_arguments(name: str, arguments: Iterable[str]) -> str:
Gilles Peskinecccd1ac2021-04-21 15:36:58 +0200134 """Format a macro call with arguments.
135
136 The resulting format is consistent with
137 `InputsForTest.normalize_argument`.
138 """
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100139 return name + '(' + ', '.join(arguments) + ')'
140
141 _argument_split_re = re.compile(r' *, *')
142 @classmethod
143 def _argument_split(cls, arguments: str) -> List[str]:
144 return re.split(cls._argument_split_re, arguments)
145
146 def distribute_arguments(self, name: str) -> Iterator[str]:
147 """Generate macro calls with each tested argument set.
148
149 If name is a macro without arguments, just yield "name".
150 If name is a macro with arguments, yield a series of
151 "name(arg1,...,argN)" where each argument takes each possible
152 value at least once.
153 """
154 try:
155 if name not in self.argspecs:
156 yield name
157 return
158 argspec = self.argspecs[name]
159 if argspec == []:
160 yield name + '()'
161 return
162 argument_lists = [self.arguments_for[arg] for arg in argspec]
163 arguments = [values[0] for values in argument_lists]
164 yield self._format_arguments(name, arguments)
165 # Dear Pylint, enumerate won't work here since we're modifying
166 # the array.
167 # pylint: disable=consider-using-enumerate
168 for i in range(len(arguments)):
169 for value in argument_lists[i][1:]:
170 arguments[i] = value
171 yield self._format_arguments(name, arguments)
172 arguments[i] = argument_lists[0][0]
173 except BaseException as e:
174 raise Exception('distribute_arguments({})'.format(name)) from e
175
Gilles Peskine38ebfec2021-04-21 15:37:34 +0200176 def distribute_arguments_without_duplicates(
177 self, seen: Set[str], name: str
178 ) -> Iterator[str]:
179 """Same as `distribute_arguments`, but don't repeat seen results."""
180 for result in self.distribute_arguments(name):
181 if result not in seen:
182 seen.add(result)
183 yield result
184
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100185 def generate_expressions(self, names: Iterable[str]) -> Iterator[str]:
186 """Generate expressions covering values constructed from the given names.
187
188 `names` can be any iterable collection of macro names.
189
190 For example:
191 * ``generate_expressions(['PSA_ALG_CMAC', 'PSA_ALG_HMAC'])``
192 generates ``'PSA_ALG_CMAC'`` as well as ``'PSA_ALG_HMAC(h)'`` for
193 every known hash algorithm ``h``.
194 * ``macros.generate_expressions(macros.key_types)`` generates all
195 key types.
196 """
Gilles Peskine38ebfec2021-04-21 15:37:34 +0200197 seen = set() #type: Set[str]
198 return itertools.chain(*(
199 self.distribute_arguments_without_duplicates(seen, name)
200 for name in names
201 ))
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100202
Gilles Peskinee7c44552021-01-25 21:40:45 +0100203
Gilles Peskine33c601c2021-03-10 01:25:50 +0100204class PSAMacroCollector(PSAMacroEnumerator):
Gilles Peskinee7c44552021-01-25 21:40:45 +0100205 """Collect PSA crypto macro definitions from C header files.
206 """
207
Gilles Peskine10ab2672021-03-10 00:59:53 +0100208 def __init__(self, include_intermediate: bool = False) -> None:
Gilles Peskine13d60eb2021-01-25 22:42:14 +0100209 """Set up an object to collect PSA macro definitions.
210
211 Call the read_file method of the constructed object on each header file.
212
213 * include_intermediate: if true, include intermediate macros such as
214 PSA_XXX_BASE that do not designate semantic values.
215 """
Gilles Peskine33c601c2021-03-10 01:25:50 +0100216 super().__init__()
Gilles Peskine13d60eb2021-01-25 22:42:14 +0100217 self.include_intermediate = include_intermediate
Gilles Peskine10ab2672021-03-10 00:59:53 +0100218 self.key_types_from_curve = {} #type: Dict[str, str]
219 self.key_types_from_group = {} #type: Dict[str, str]
Gilles Peskine10ab2672021-03-10 00:59:53 +0100220 self.algorithms_from_hash = {} #type: Dict[str, str]
Gilles Peskinee7c44552021-01-25 21:40:45 +0100221
Gilles Peskine33c601c2021-03-10 01:25:50 +0100222 def record_algorithm_subtype(self, name: str, expansion: str) -> None:
223 """Record the subtype of an algorithm constructor.
224
225 Given a ``PSA_ALG_xxx`` macro name and its expansion, if the algorithm
226 is of a subtype that is tracked in its own set, add it to the relevant
227 set.
228 """
229 # This code is very ad hoc and fragile. It should be replaced by
230 # something more robust.
231 if re.match(r'MAC(?:_|\Z)', name):
232 self.mac_algorithms.add(name)
233 elif re.match(r'KDF(?:_|\Z)', name):
234 self.kdf_algorithms.add(name)
235 elif re.search(r'0x020000[0-9A-Fa-f]{2}', expansion):
236 self.hash_algorithms.add(name)
237 elif re.search(r'0x03[0-9A-Fa-f]{6}', expansion):
238 self.mac_algorithms.add(name)
239 elif re.search(r'0x05[0-9A-Fa-f]{6}', expansion):
240 self.aead_algorithms.add(name)
241 elif re.search(r'0x09[0-9A-Fa-f]{2}0000', expansion):
242 self.ka_algorithms.add(name)
243 elif re.search(r'0x08[0-9A-Fa-f]{6}', expansion):
244 self.kdf_algorithms.add(name)
245
Gilles Peskinee7c44552021-01-25 21:40:45 +0100246 # "#define" followed by a macro name with either no parameters
247 # or a single parameter and a non-empty expansion.
248 # Grab the macro name in group 1, the parameter name if any in group 2
249 # and the expansion in group 3.
250 _define_directive_re = re.compile(r'\s*#\s*define\s+(\w+)' +
251 r'(?:\s+|\((\w+)\)\s*)' +
252 r'(.+)')
253 _deprecated_definition_re = re.compile(r'\s*MBEDTLS_DEPRECATED')
254
255 def read_line(self, line):
256 """Parse a C header line and record the PSA identifier it defines if any.
257 This function analyzes lines that start with "#define PSA_"
258 (up to non-significant whitespace) and skips all non-matching lines.
259 """
260 # pylint: disable=too-many-branches
261 m = re.match(self._define_directive_re, line)
262 if not m:
263 return
264 name, parameter, expansion = m.groups()
265 expansion = re.sub(r'/\*.*?\*/|//.*', r' ', expansion)
Gilles Peskine33c601c2021-03-10 01:25:50 +0100266 if parameter:
267 self.argspecs[name] = [parameter]
Gilles Peskinee7c44552021-01-25 21:40:45 +0100268 if re.match(self._deprecated_definition_re, expansion):
269 # Skip deprecated values, which are assumed to be
270 # backward compatibility aliases that share
271 # numerical values with non-deprecated values.
272 return
Gilles Peskinef8deb752021-01-25 22:41:45 +0100273 if self.is_internal_name(name):
Gilles Peskinee7c44552021-01-25 21:40:45 +0100274 # Macro only to build actual values
275 return
276 elif (name.startswith('PSA_ERROR_') or name == 'PSA_SUCCESS') \
277 and not parameter:
278 self.statuses.add(name)
279 elif name.startswith('PSA_KEY_TYPE_') and not parameter:
280 self.key_types.add(name)
281 elif name.startswith('PSA_KEY_TYPE_') and parameter == 'curve':
282 self.key_types_from_curve[name] = name[:13] + 'IS_' + name[13:]
283 elif name.startswith('PSA_KEY_TYPE_') and parameter == 'group':
284 self.key_types_from_group[name] = name[:13] + 'IS_' + name[13:]
285 elif name.startswith('PSA_ECC_FAMILY_') and not parameter:
286 self.ecc_curves.add(name)
287 elif name.startswith('PSA_DH_FAMILY_') and not parameter:
288 self.dh_groups.add(name)
289 elif name.startswith('PSA_ALG_') and not parameter:
290 if name in ['PSA_ALG_ECDSA_BASE',
291 'PSA_ALG_RSA_PKCS1V15_SIGN_BASE']:
292 # Ad hoc skipping of duplicate names for some numerical values
293 return
294 self.algorithms.add(name)
Gilles Peskine33c601c2021-03-10 01:25:50 +0100295 self.record_algorithm_subtype(name, expansion)
Gilles Peskinee7c44552021-01-25 21:40:45 +0100296 elif name.startswith('PSA_ALG_') and parameter == 'hash_alg':
297 if name in ['PSA_ALG_DSA', 'PSA_ALG_ECDSA']:
298 # A naming irregularity
299 tester = name[:8] + 'IS_RANDOMIZED_' + name[8:]
300 else:
301 tester = name[:8] + 'IS_' + name[8:]
302 self.algorithms_from_hash[name] = tester
303 elif name.startswith('PSA_KEY_USAGE_') and not parameter:
Gilles Peskine33c601c2021-03-10 01:25:50 +0100304 self.key_usage_flags.add(name)
Gilles Peskinee7c44552021-01-25 21:40:45 +0100305 else:
306 # Other macro without parameter
307 return
308
309 _nonascii_re = re.compile(rb'[^\x00-\x7f]+')
310 _continued_line_re = re.compile(rb'\\\r?\n\Z')
311 def read_file(self, header_file):
312 for line in header_file:
313 m = re.search(self._continued_line_re, line)
314 while m:
315 cont = next(header_file)
316 line = line[:m.start(0)] + cont
317 m = re.search(self._continued_line_re, line)
318 line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
319 self.read_line(line)
Gilles Peskineb4edff92021-03-30 19:09:05 +0200320
321
Gilles Peskine537d5fa2021-04-19 13:50:25 +0200322class InputsForTest(PSAMacroEnumerator):
Gilles Peskineb4edff92021-03-30 19:09:05 +0200323 # pylint: disable=too-many-instance-attributes
324 """Accumulate information about macros to test.
325enumerate
326 This includes macro names as well as information about their arguments
327 when applicable.
328 """
329
330 def __init__(self) -> None:
331 super().__init__()
332 self.all_declared = set() #type: Set[str]
Gilles Peskineb4edff92021-03-30 19:09:05 +0200333 # Identifier prefixes
334 self.table_by_prefix = {
335 'ERROR': self.statuses,
336 'ALG': self.algorithms,
337 'ECC_CURVE': self.ecc_curves,
338 'DH_GROUP': self.dh_groups,
339 'KEY_TYPE': self.key_types,
340 'KEY_USAGE': self.key_usage_flags,
341 } #type: Dict[str, Set[str]]
342 # Test functions
343 self.table_by_test_function = {
344 # Any function ending in _algorithm also gets added to
345 # self.algorithms.
346 'key_type': [self.key_types],
347 'block_cipher_key_type': [self.key_types],
348 'stream_cipher_key_type': [self.key_types],
349 'ecc_key_family': [self.ecc_curves],
350 'ecc_key_types': [self.ecc_curves],
351 'dh_key_family': [self.dh_groups],
352 'dh_key_types': [self.dh_groups],
353 'hash_algorithm': [self.hash_algorithms],
354 'mac_algorithm': [self.mac_algorithms],
355 'cipher_algorithm': [],
356 'hmac_algorithm': [self.mac_algorithms],
357 'aead_algorithm': [self.aead_algorithms],
358 'key_derivation_algorithm': [self.kdf_algorithms],
359 'key_agreement_algorithm': [self.ka_algorithms],
360 'asymmetric_signature_algorithm': [],
361 'asymmetric_signature_wildcard': [self.algorithms],
362 'asymmetric_encryption_algorithm': [],
363 'other_algorithm': [],
364 } #type: Dict[str, List[Set[str]]]
365 self.arguments_for['mac_length'] += ['1', '63']
366 self.arguments_for['min_mac_length'] += ['1', '63']
367 self.arguments_for['tag_length'] += ['1', '63']
368 self.arguments_for['min_tag_length'] += ['1', '63']
369
Gilles Peskine3d404b82021-03-30 21:46:35 +0200370 def add_numerical_values(self) -> None:
371 """Add numerical values that are not supported to the known identifiers."""
372 # Sets of names per type
373 self.algorithms.add('0xffffffff')
374 self.ecc_curves.add('0xff')
375 self.dh_groups.add('0xff')
376 self.key_types.add('0xffff')
377 self.key_usage_flags.add('0x80000000')
378
379 # Hard-coded values for unknown algorithms
380 #
381 # These have to have values that are correct for their respective
382 # PSA_ALG_IS_xxx macros, but are also not currently assigned and are
383 # not likely to be assigned in the near future.
384 self.hash_algorithms.add('0x020000fe') # 0x020000ff is PSA_ALG_ANY_HASH
385 self.mac_algorithms.add('0x03007fff')
386 self.ka_algorithms.add('0x09fc0000')
387 self.kdf_algorithms.add('0x080000ff')
388 # For AEAD algorithms, the only variability is over the tag length,
389 # and this only applies to known algorithms, so don't test an
390 # unknown algorithm.
391
Gilles Peskineb4edff92021-03-30 19:09:05 +0200392 def get_names(self, type_word: str) -> Set[str]:
393 """Return the set of known names of values of the given type."""
394 return {
395 'status': self.statuses,
396 'algorithm': self.algorithms,
397 'ecc_curve': self.ecc_curves,
398 'dh_group': self.dh_groups,
399 'key_type': self.key_types,
400 'key_usage': self.key_usage_flags,
401 }[type_word]
402
403 # Regex for interesting header lines.
404 # Groups: 1=macro name, 2=type, 3=argument list (optional).
405 _header_line_re = \
406 re.compile(r'#define +' +
407 r'(PSA_((?:(?:DH|ECC|KEY)_)?[A-Z]+)_\w+)' +
408 r'(?:\(([^\n()]*)\))?')
409 # Regex of macro names to exclude.
410 _excluded_name_re = re.compile(r'_(?:GET|IS|OF)_|_(?:BASE|FLAG|MASK)\Z')
411 # Additional excluded macros.
412 _excluded_names = set([
413 # Macros that provide an alternative way to build the same
414 # algorithm as another macro.
415 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG',
416 'PSA_ALG_FULL_LENGTH_MAC',
417 # Auxiliary macro whose name doesn't fit the usual patterns for
418 # auxiliary macros.
419 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG_CASE',
420 ])
421 def parse_header_line(self, line: str) -> None:
422 """Parse a C header line, looking for "#define PSA_xxx"."""
423 m = re.match(self._header_line_re, line)
424 if not m:
425 return
426 name = m.group(1)
427 self.all_declared.add(name)
428 if re.search(self._excluded_name_re, name) or \
Gilles Peskine537d5fa2021-04-19 13:50:25 +0200429 name in self._excluded_names or \
430 self.is_internal_name(name):
Gilles Peskineb4edff92021-03-30 19:09:05 +0200431 return
432 dest = self.table_by_prefix.get(m.group(2))
433 if dest is None:
434 return
435 dest.add(name)
436 if m.group(3):
437 self.argspecs[name] = self._argument_split(m.group(3))
438
439 _nonascii_re = re.compile(rb'[^\x00-\x7f]+') #type: Pattern
440 def parse_header(self, filename: str) -> None:
441 """Parse a C header file, looking for "#define PSA_xxx"."""
442 with read_file_lines(filename, binary=True) as lines:
443 for line in lines:
444 line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
445 self.parse_header_line(line)
446
447 _macro_identifier_re = re.compile(r'[A-Z]\w+')
448 def generate_undeclared_names(self, expr: str) -> Iterable[str]:
449 for name in re.findall(self._macro_identifier_re, expr):
450 if name not in self.all_declared:
451 yield name
452
453 def accept_test_case_line(self, function: str, argument: str) -> bool:
454 #pylint: disable=unused-argument
455 undeclared = list(self.generate_undeclared_names(argument))
456 if undeclared:
457 raise Exception('Undeclared names in test case', undeclared)
458 return True
459
Gilles Peskinecccd1ac2021-04-21 15:36:58 +0200460 @staticmethod
461 def normalize_argument(argument: str) -> str:
462 """Normalize whitespace in the given C expression.
463
464 The result uses the same whitespace as
465 ` PSAMacroEnumerator.distribute_arguments`.
466 """
467 return re.sub(r',', r', ', re.sub(r' +', r'', argument))
468
Gilles Peskineb4edff92021-03-30 19:09:05 +0200469 def add_test_case_line(self, function: str, argument: str) -> None:
470 """Parse a test case data line, looking for algorithm metadata tests."""
471 sets = []
472 if function.endswith('_algorithm'):
473 sets.append(self.algorithms)
474 if function == 'key_agreement_algorithm' and \
475 argument.startswith('PSA_ALG_KEY_AGREEMENT('):
476 # We only want *raw* key agreement algorithms as such, so
477 # exclude ones that are already chained with a KDF.
478 # Keep the expression as one to test as an algorithm.
479 function = 'other_algorithm'
480 sets += self.table_by_test_function[function]
481 if self.accept_test_case_line(function, argument):
482 for s in sets:
Gilles Peskinecccd1ac2021-04-21 15:36:58 +0200483 s.add(self.normalize_argument(argument))
Gilles Peskineb4edff92021-03-30 19:09:05 +0200484
485 # Regex matching a *.data line containing a test function call and
486 # its arguments. The actual definition is partly positional, but this
487 # regex is good enough in practice.
488 _test_case_line_re = re.compile(r'(?!depends_on:)(\w+):([^\n :][^:\n]*)')
489 def parse_test_cases(self, filename: str) -> None:
490 """Parse a test case file (*.data), looking for algorithm metadata tests."""
491 with read_file_lines(filename) as lines:
492 for line in lines:
493 m = re.match(self._test_case_line_re, line)
494 if m:
495 self.add_test_case_line(m.group(1), m.group(2))