blob: 3e28f624857b9338fc928f87d3dc9ad1e099ffdc [file] [log] [blame]
Gilles Peskinee7c44552021-01-25 21:40:45 +01001"""Collect macro definitions from header files.
2"""
3
4# Copyright The Mbed TLS Contributors
5# SPDX-License-Identifier: Apache-2.0
6#
7# Licensed under the Apache License, Version 2.0 (the "License"); you may
8# not use this file except in compliance with the License.
9# You may obtain a copy of the License at
10#
11# http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
18
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010019import itertools
Gilles Peskinee7c44552021-01-25 21:40:45 +010020import re
Gilles Peskineb4edff92021-03-30 19:09:05 +020021from typing import Dict, Iterable, Iterator, List, Optional, Pattern, Set, Tuple, Union
22
23
24class ReadFileLineException(Exception):
25 def __init__(self, filename: str, line_number: Union[int, str]) -> None:
26 message = 'in {} at {}'.format(filename, line_number)
27 super(ReadFileLineException, self).__init__(message)
28 self.filename = filename
29 self.line_number = line_number
30
31
32class read_file_lines:
33 # Dear Pylint, conventionally, a context manager class name is lowercase.
34 # pylint: disable=invalid-name,too-few-public-methods
35 """Context manager to read a text file line by line.
36
37 ```
38 with read_file_lines(filename) as lines:
39 for line in lines:
40 process(line)
41 ```
42 is equivalent to
43 ```
44 with open(filename, 'r') as input_file:
45 for line in input_file:
46 process(line)
47 ```
48 except that if process(line) raises an exception, then the read_file_lines
49 snippet annotates the exception with the file name and line number.
50 """
51 def __init__(self, filename: str, binary: bool = False) -> None:
52 self.filename = filename
53 self.line_number = 'entry' #type: Union[int, str]
54 self.generator = None #type: Optional[Iterable[Tuple[int, str]]]
55 self.binary = binary
56 def __enter__(self) -> 'read_file_lines':
57 self.generator = enumerate(open(self.filename,
58 'rb' if self.binary else 'r'))
59 return self
60 def __iter__(self) -> Iterator[str]:
61 assert self.generator is not None
62 for line_number, content in self.generator:
63 self.line_number = line_number
64 yield content
65 self.line_number = 'exit'
66 def __exit__(self, exc_type, exc_value, exc_traceback) -> None:
67 if exc_type is not None:
68 raise ReadFileLineException(self.filename, self.line_number) \
69 from exc_value
Gilles Peskine22fcf1b2021-03-10 01:02:39 +010070
71
72class PSAMacroEnumerator:
73 """Information about constructors of various PSA Crypto types.
74
75 This includes macro names as well as information about their arguments
76 when applicable.
77
78 This class only provides ways to enumerate expressions that evaluate to
79 values of the covered types. Derived classes are expected to populate
80 the set of known constructors of each kind, as well as populate
81 `self.arguments_for` for arguments that are not of a kind that is
82 enumerated here.
83 """
84
85 def __init__(self) -> None:
86 """Set up an empty set of known constructor macros.
87 """
88 self.statuses = set() #type: Set[str]
89 self.algorithms = set() #type: Set[str]
90 self.ecc_curves = set() #type: Set[str]
91 self.dh_groups = set() #type: Set[str]
92 self.key_types = set() #type: Set[str]
93 self.key_usage_flags = set() #type: Set[str]
94 self.hash_algorithms = set() #type: Set[str]
95 self.mac_algorithms = set() #type: Set[str]
96 self.ka_algorithms = set() #type: Set[str]
97 self.kdf_algorithms = set() #type: Set[str]
98 self.aead_algorithms = set() #type: Set[str]
99 # macro name -> list of argument names
100 self.argspecs = {} #type: Dict[str, List[str]]
101 # argument name -> list of values
102 self.arguments_for = {
103 'mac_length': [],
104 'min_mac_length': [],
105 'tag_length': [],
106 'min_tag_length': [],
107 } #type: Dict[str, List[str]]
Gilles Peskine537d5fa2021-04-19 13:50:25 +0200108 self.include_intermediate = False
109
110 def is_internal_name(self, name: str) -> bool:
111 """Whether this is an internal macro. Internal macros will be skipped."""
112 if not self.include_intermediate:
113 if name.endswith('_BASE') or name.endswith('_NONE'):
114 return True
115 if '_CATEGORY_' in name:
116 return True
117 return name.endswith('_FLAG') or name.endswith('_MASK')
Gilles Peskine22fcf1b2021-03-10 01:02:39 +0100118
119 def gather_arguments(self) -> None:
120 """Populate the list of values for macro arguments.
121
122 Call this after parsing all the inputs.
123 """
124 self.arguments_for['hash_alg'] = sorted(self.hash_algorithms)
125 self.arguments_for['mac_alg'] = sorted(self.mac_algorithms)
126 self.arguments_for['ka_alg'] = sorted(self.ka_algorithms)
127 self.arguments_for['kdf_alg'] = sorted(self.kdf_algorithms)
128 self.arguments_for['aead_alg'] = sorted(self.aead_algorithms)
129 self.arguments_for['curve'] = sorted(self.ecc_curves)
130 self.arguments_for['group'] = sorted(self.dh_groups)
131
132 @staticmethod
133 def _format_arguments(name: str, arguments: Iterable[str]) -> str:
134 """Format a macro call with arguments.."""
135 return name + '(' + ', '.join(arguments) + ')'
136
137 _argument_split_re = re.compile(r' *, *')
138 @classmethod
139 def _argument_split(cls, arguments: str) -> List[str]:
140 return re.split(cls._argument_split_re, arguments)
141
142 def distribute_arguments(self, name: str) -> Iterator[str]:
143 """Generate macro calls with each tested argument set.
144
145 If name is a macro without arguments, just yield "name".
146 If name is a macro with arguments, yield a series of
147 "name(arg1,...,argN)" where each argument takes each possible
148 value at least once.
149 """
150 try:
151 if name not in self.argspecs:
152 yield name
153 return
154 argspec = self.argspecs[name]
155 if argspec == []:
156 yield name + '()'
157 return
158 argument_lists = [self.arguments_for[arg] for arg in argspec]
159 arguments = [values[0] for values in argument_lists]
160 yield self._format_arguments(name, arguments)
161 # Dear Pylint, enumerate won't work here since we're modifying
162 # the array.
163 # pylint: disable=consider-using-enumerate
164 for i in range(len(arguments)):
165 for value in argument_lists[i][1:]:
166 arguments[i] = value
167 yield self._format_arguments(name, arguments)
168 arguments[i] = argument_lists[0][0]
169 except BaseException as e:
170 raise Exception('distribute_arguments({})'.format(name)) from e
171
172 def generate_expressions(self, names: Iterable[str]) -> Iterator[str]:
173 """Generate expressions covering values constructed from the given names.
174
175 `names` can be any iterable collection of macro names.
176
177 For example:
178 * ``generate_expressions(['PSA_ALG_CMAC', 'PSA_ALG_HMAC'])``
179 generates ``'PSA_ALG_CMAC'`` as well as ``'PSA_ALG_HMAC(h)'`` for
180 every known hash algorithm ``h``.
181 * ``macros.generate_expressions(macros.key_types)`` generates all
182 key types.
183 """
184 return itertools.chain(*map(self.distribute_arguments, names))
185
Gilles Peskinee7c44552021-01-25 21:40:45 +0100186
Gilles Peskine33c601c2021-03-10 01:25:50 +0100187class PSAMacroCollector(PSAMacroEnumerator):
Gilles Peskinee7c44552021-01-25 21:40:45 +0100188 """Collect PSA crypto macro definitions from C header files.
189 """
190
Gilles Peskine10ab2672021-03-10 00:59:53 +0100191 def __init__(self, include_intermediate: bool = False) -> None:
Gilles Peskine13d60eb2021-01-25 22:42:14 +0100192 """Set up an object to collect PSA macro definitions.
193
194 Call the read_file method of the constructed object on each header file.
195
196 * include_intermediate: if true, include intermediate macros such as
197 PSA_XXX_BASE that do not designate semantic values.
198 """
Gilles Peskine33c601c2021-03-10 01:25:50 +0100199 super().__init__()
Gilles Peskine13d60eb2021-01-25 22:42:14 +0100200 self.include_intermediate = include_intermediate
Gilles Peskine10ab2672021-03-10 00:59:53 +0100201 self.key_types_from_curve = {} #type: Dict[str, str]
202 self.key_types_from_group = {} #type: Dict[str, str]
Gilles Peskine10ab2672021-03-10 00:59:53 +0100203 self.algorithms_from_hash = {} #type: Dict[str, str]
Gilles Peskinee7c44552021-01-25 21:40:45 +0100204
Gilles Peskine33c601c2021-03-10 01:25:50 +0100205 def record_algorithm_subtype(self, name: str, expansion: str) -> None:
206 """Record the subtype of an algorithm constructor.
207
208 Given a ``PSA_ALG_xxx`` macro name and its expansion, if the algorithm
209 is of a subtype that is tracked in its own set, add it to the relevant
210 set.
211 """
212 # This code is very ad hoc and fragile. It should be replaced by
213 # something more robust.
214 if re.match(r'MAC(?:_|\Z)', name):
215 self.mac_algorithms.add(name)
216 elif re.match(r'KDF(?:_|\Z)', name):
217 self.kdf_algorithms.add(name)
218 elif re.search(r'0x020000[0-9A-Fa-f]{2}', expansion):
219 self.hash_algorithms.add(name)
220 elif re.search(r'0x03[0-9A-Fa-f]{6}', expansion):
221 self.mac_algorithms.add(name)
222 elif re.search(r'0x05[0-9A-Fa-f]{6}', expansion):
223 self.aead_algorithms.add(name)
224 elif re.search(r'0x09[0-9A-Fa-f]{2}0000', expansion):
225 self.ka_algorithms.add(name)
226 elif re.search(r'0x08[0-9A-Fa-f]{6}', expansion):
227 self.kdf_algorithms.add(name)
228
Gilles Peskinee7c44552021-01-25 21:40:45 +0100229 # "#define" followed by a macro name with either no parameters
230 # or a single parameter and a non-empty expansion.
231 # Grab the macro name in group 1, the parameter name if any in group 2
232 # and the expansion in group 3.
233 _define_directive_re = re.compile(r'\s*#\s*define\s+(\w+)' +
234 r'(?:\s+|\((\w+)\)\s*)' +
235 r'(.+)')
236 _deprecated_definition_re = re.compile(r'\s*MBEDTLS_DEPRECATED')
237
238 def read_line(self, line):
239 """Parse a C header line and record the PSA identifier it defines if any.
240 This function analyzes lines that start with "#define PSA_"
241 (up to non-significant whitespace) and skips all non-matching lines.
242 """
243 # pylint: disable=too-many-branches
244 m = re.match(self._define_directive_re, line)
245 if not m:
246 return
247 name, parameter, expansion = m.groups()
248 expansion = re.sub(r'/\*.*?\*/|//.*', r' ', expansion)
Gilles Peskine33c601c2021-03-10 01:25:50 +0100249 if parameter:
250 self.argspecs[name] = [parameter]
Gilles Peskinee7c44552021-01-25 21:40:45 +0100251 if re.match(self._deprecated_definition_re, expansion):
252 # Skip deprecated values, which are assumed to be
253 # backward compatibility aliases that share
254 # numerical values with non-deprecated values.
255 return
Gilles Peskinef8deb752021-01-25 22:41:45 +0100256 if self.is_internal_name(name):
Gilles Peskinee7c44552021-01-25 21:40:45 +0100257 # Macro only to build actual values
258 return
259 elif (name.startswith('PSA_ERROR_') or name == 'PSA_SUCCESS') \
260 and not parameter:
261 self.statuses.add(name)
262 elif name.startswith('PSA_KEY_TYPE_') and not parameter:
263 self.key_types.add(name)
264 elif name.startswith('PSA_KEY_TYPE_') and parameter == 'curve':
265 self.key_types_from_curve[name] = name[:13] + 'IS_' + name[13:]
266 elif name.startswith('PSA_KEY_TYPE_') and parameter == 'group':
267 self.key_types_from_group[name] = name[:13] + 'IS_' + name[13:]
268 elif name.startswith('PSA_ECC_FAMILY_') and not parameter:
269 self.ecc_curves.add(name)
270 elif name.startswith('PSA_DH_FAMILY_') and not parameter:
271 self.dh_groups.add(name)
272 elif name.startswith('PSA_ALG_') and not parameter:
273 if name in ['PSA_ALG_ECDSA_BASE',
274 'PSA_ALG_RSA_PKCS1V15_SIGN_BASE']:
275 # Ad hoc skipping of duplicate names for some numerical values
276 return
277 self.algorithms.add(name)
Gilles Peskine33c601c2021-03-10 01:25:50 +0100278 self.record_algorithm_subtype(name, expansion)
Gilles Peskinee7c44552021-01-25 21:40:45 +0100279 elif name.startswith('PSA_ALG_') and parameter == 'hash_alg':
280 if name in ['PSA_ALG_DSA', 'PSA_ALG_ECDSA']:
281 # A naming irregularity
282 tester = name[:8] + 'IS_RANDOMIZED_' + name[8:]
283 else:
284 tester = name[:8] + 'IS_' + name[8:]
285 self.algorithms_from_hash[name] = tester
286 elif name.startswith('PSA_KEY_USAGE_') and not parameter:
Gilles Peskine33c601c2021-03-10 01:25:50 +0100287 self.key_usage_flags.add(name)
Gilles Peskinee7c44552021-01-25 21:40:45 +0100288 else:
289 # Other macro without parameter
290 return
291
292 _nonascii_re = re.compile(rb'[^\x00-\x7f]+')
293 _continued_line_re = re.compile(rb'\\\r?\n\Z')
294 def read_file(self, header_file):
295 for line in header_file:
296 m = re.search(self._continued_line_re, line)
297 while m:
298 cont = next(header_file)
299 line = line[:m.start(0)] + cont
300 m = re.search(self._continued_line_re, line)
301 line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
302 self.read_line(line)
Gilles Peskineb4edff92021-03-30 19:09:05 +0200303
304
Gilles Peskine537d5fa2021-04-19 13:50:25 +0200305class InputsForTest(PSAMacroEnumerator):
Gilles Peskineb4edff92021-03-30 19:09:05 +0200306 # pylint: disable=too-many-instance-attributes
307 """Accumulate information about macros to test.
308enumerate
309 This includes macro names as well as information about their arguments
310 when applicable.
311 """
312
313 def __init__(self) -> None:
314 super().__init__()
315 self.all_declared = set() #type: Set[str]
Gilles Peskineb4edff92021-03-30 19:09:05 +0200316 # Identifier prefixes
317 self.table_by_prefix = {
318 'ERROR': self.statuses,
319 'ALG': self.algorithms,
320 'ECC_CURVE': self.ecc_curves,
321 'DH_GROUP': self.dh_groups,
322 'KEY_TYPE': self.key_types,
323 'KEY_USAGE': self.key_usage_flags,
324 } #type: Dict[str, Set[str]]
325 # Test functions
326 self.table_by_test_function = {
327 # Any function ending in _algorithm also gets added to
328 # self.algorithms.
329 'key_type': [self.key_types],
330 'block_cipher_key_type': [self.key_types],
331 'stream_cipher_key_type': [self.key_types],
332 'ecc_key_family': [self.ecc_curves],
333 'ecc_key_types': [self.ecc_curves],
334 'dh_key_family': [self.dh_groups],
335 'dh_key_types': [self.dh_groups],
336 'hash_algorithm': [self.hash_algorithms],
337 'mac_algorithm': [self.mac_algorithms],
338 'cipher_algorithm': [],
339 'hmac_algorithm': [self.mac_algorithms],
340 'aead_algorithm': [self.aead_algorithms],
341 'key_derivation_algorithm': [self.kdf_algorithms],
342 'key_agreement_algorithm': [self.ka_algorithms],
343 'asymmetric_signature_algorithm': [],
344 'asymmetric_signature_wildcard': [self.algorithms],
345 'asymmetric_encryption_algorithm': [],
346 'other_algorithm': [],
347 } #type: Dict[str, List[Set[str]]]
348 self.arguments_for['mac_length'] += ['1', '63']
349 self.arguments_for['min_mac_length'] += ['1', '63']
350 self.arguments_for['tag_length'] += ['1', '63']
351 self.arguments_for['min_tag_length'] += ['1', '63']
352
Gilles Peskine3d404b82021-03-30 21:46:35 +0200353 def add_numerical_values(self) -> None:
354 """Add numerical values that are not supported to the known identifiers."""
355 # Sets of names per type
356 self.algorithms.add('0xffffffff')
357 self.ecc_curves.add('0xff')
358 self.dh_groups.add('0xff')
359 self.key_types.add('0xffff')
360 self.key_usage_flags.add('0x80000000')
361
362 # Hard-coded values for unknown algorithms
363 #
364 # These have to have values that are correct for their respective
365 # PSA_ALG_IS_xxx macros, but are also not currently assigned and are
366 # not likely to be assigned in the near future.
367 self.hash_algorithms.add('0x020000fe') # 0x020000ff is PSA_ALG_ANY_HASH
368 self.mac_algorithms.add('0x03007fff')
369 self.ka_algorithms.add('0x09fc0000')
370 self.kdf_algorithms.add('0x080000ff')
371 # For AEAD algorithms, the only variability is over the tag length,
372 # and this only applies to known algorithms, so don't test an
373 # unknown algorithm.
374
Gilles Peskineb4edff92021-03-30 19:09:05 +0200375 def get_names(self, type_word: str) -> Set[str]:
376 """Return the set of known names of values of the given type."""
377 return {
378 'status': self.statuses,
379 'algorithm': self.algorithms,
380 'ecc_curve': self.ecc_curves,
381 'dh_group': self.dh_groups,
382 'key_type': self.key_types,
383 'key_usage': self.key_usage_flags,
384 }[type_word]
385
386 # Regex for interesting header lines.
387 # Groups: 1=macro name, 2=type, 3=argument list (optional).
388 _header_line_re = \
389 re.compile(r'#define +' +
390 r'(PSA_((?:(?:DH|ECC|KEY)_)?[A-Z]+)_\w+)' +
391 r'(?:\(([^\n()]*)\))?')
392 # Regex of macro names to exclude.
393 _excluded_name_re = re.compile(r'_(?:GET|IS|OF)_|_(?:BASE|FLAG|MASK)\Z')
394 # Additional excluded macros.
395 _excluded_names = set([
396 # Macros that provide an alternative way to build the same
397 # algorithm as another macro.
398 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG',
399 'PSA_ALG_FULL_LENGTH_MAC',
400 # Auxiliary macro whose name doesn't fit the usual patterns for
401 # auxiliary macros.
402 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG_CASE',
403 ])
404 def parse_header_line(self, line: str) -> None:
405 """Parse a C header line, looking for "#define PSA_xxx"."""
406 m = re.match(self._header_line_re, line)
407 if not m:
408 return
409 name = m.group(1)
410 self.all_declared.add(name)
411 if re.search(self._excluded_name_re, name) or \
Gilles Peskine537d5fa2021-04-19 13:50:25 +0200412 name in self._excluded_names or \
413 self.is_internal_name(name):
Gilles Peskineb4edff92021-03-30 19:09:05 +0200414 return
415 dest = self.table_by_prefix.get(m.group(2))
416 if dest is None:
417 return
418 dest.add(name)
419 if m.group(3):
420 self.argspecs[name] = self._argument_split(m.group(3))
421
422 _nonascii_re = re.compile(rb'[^\x00-\x7f]+') #type: Pattern
423 def parse_header(self, filename: str) -> None:
424 """Parse a C header file, looking for "#define PSA_xxx"."""
425 with read_file_lines(filename, binary=True) as lines:
426 for line in lines:
427 line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
428 self.parse_header_line(line)
429
430 _macro_identifier_re = re.compile(r'[A-Z]\w+')
431 def generate_undeclared_names(self, expr: str) -> Iterable[str]:
432 for name in re.findall(self._macro_identifier_re, expr):
433 if name not in self.all_declared:
434 yield name
435
436 def accept_test_case_line(self, function: str, argument: str) -> bool:
437 #pylint: disable=unused-argument
438 undeclared = list(self.generate_undeclared_names(argument))
439 if undeclared:
440 raise Exception('Undeclared names in test case', undeclared)
441 return True
442
443 def add_test_case_line(self, function: str, argument: str) -> None:
444 """Parse a test case data line, looking for algorithm metadata tests."""
445 sets = []
446 if function.endswith('_algorithm'):
447 sets.append(self.algorithms)
448 if function == 'key_agreement_algorithm' and \
449 argument.startswith('PSA_ALG_KEY_AGREEMENT('):
450 # We only want *raw* key agreement algorithms as such, so
451 # exclude ones that are already chained with a KDF.
452 # Keep the expression as one to test as an algorithm.
453 function = 'other_algorithm'
454 sets += self.table_by_test_function[function]
455 if self.accept_test_case_line(function, argument):
456 for s in sets:
457 s.add(argument)
458
459 # Regex matching a *.data line containing a test function call and
460 # its arguments. The actual definition is partly positional, but this
461 # regex is good enough in practice.
462 _test_case_line_re = re.compile(r'(?!depends_on:)(\w+):([^\n :][^:\n]*)')
463 def parse_test_cases(self, filename: str) -> None:
464 """Parse a test case file (*.data), looking for algorithm metadata tests."""
465 with read_file_lines(filename) as lines:
466 for line in lines:
467 m = re.match(self._test_case_line_re, line)
468 if m:
469 self.add_test_case_line(m.group(1), m.group(2))