Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 1 | """Collect macro definitions from header files. |
| 2 | """ |
| 3 | |
| 4 | # Copyright The Mbed TLS Contributors |
| 5 | # SPDX-License-Identifier: Apache-2.0 |
| 6 | # |
| 7 | # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 8 | # not use this file except in compliance with the License. |
| 9 | # You may obtain a copy of the License at |
| 10 | # |
| 11 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 12 | # |
| 13 | # Unless required by applicable law or agreed to in writing, software |
| 14 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 15 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 16 | # See the License for the specific language governing permissions and |
| 17 | # limitations under the License. |
| 18 | |
Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 19 | import itertools |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 20 | import re |
Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 21 | from typing import Dict, Iterable, Iterator, List, Optional, Pattern, Set, Tuple, Union |
| 22 | |
| 23 | |
| 24 | class ReadFileLineException(Exception): |
| 25 | def __init__(self, filename: str, line_number: Union[int, str]) -> None: |
| 26 | message = 'in {} at {}'.format(filename, line_number) |
| 27 | super(ReadFileLineException, self).__init__(message) |
| 28 | self.filename = filename |
| 29 | self.line_number = line_number |
| 30 | |
| 31 | |
| 32 | class read_file_lines: |
| 33 | # Dear Pylint, conventionally, a context manager class name is lowercase. |
| 34 | # pylint: disable=invalid-name,too-few-public-methods |
| 35 | """Context manager to read a text file line by line. |
| 36 | |
| 37 | ``` |
| 38 | with read_file_lines(filename) as lines: |
| 39 | for line in lines: |
| 40 | process(line) |
| 41 | ``` |
| 42 | is equivalent to |
| 43 | ``` |
| 44 | with open(filename, 'r') as input_file: |
| 45 | for line in input_file: |
| 46 | process(line) |
| 47 | ``` |
| 48 | except that if process(line) raises an exception, then the read_file_lines |
| 49 | snippet annotates the exception with the file name and line number. |
| 50 | """ |
| 51 | def __init__(self, filename: str, binary: bool = False) -> None: |
| 52 | self.filename = filename |
| 53 | self.line_number = 'entry' #type: Union[int, str] |
| 54 | self.generator = None #type: Optional[Iterable[Tuple[int, str]]] |
| 55 | self.binary = binary |
| 56 | def __enter__(self) -> 'read_file_lines': |
| 57 | self.generator = enumerate(open(self.filename, |
| 58 | 'rb' if self.binary else 'r')) |
| 59 | return self |
| 60 | def __iter__(self) -> Iterator[str]: |
| 61 | assert self.generator is not None |
| 62 | for line_number, content in self.generator: |
| 63 | self.line_number = line_number |
| 64 | yield content |
| 65 | self.line_number = 'exit' |
| 66 | def __exit__(self, exc_type, exc_value, exc_traceback) -> None: |
| 67 | if exc_type is not None: |
| 68 | raise ReadFileLineException(self.filename, self.line_number) \ |
| 69 | from exc_value |
Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 70 | |
| 71 | |
| 72 | class PSAMacroEnumerator: |
| 73 | """Information about constructors of various PSA Crypto types. |
| 74 | |
| 75 | This includes macro names as well as information about their arguments |
| 76 | when applicable. |
| 77 | |
| 78 | This class only provides ways to enumerate expressions that evaluate to |
| 79 | values of the covered types. Derived classes are expected to populate |
| 80 | the set of known constructors of each kind, as well as populate |
| 81 | `self.arguments_for` for arguments that are not of a kind that is |
| 82 | enumerated here. |
| 83 | """ |
Gilles Peskine | 45a4391 | 2021-04-21 21:39:27 +0200 | [diff] [blame] | 84 | #pylint: disable=too-many-instance-attributes |
Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 85 | |
| 86 | def __init__(self) -> None: |
| 87 | """Set up an empty set of known constructor macros. |
| 88 | """ |
| 89 | self.statuses = set() #type: Set[str] |
Gilles Peskine | 45a4391 | 2021-04-21 21:39:27 +0200 | [diff] [blame] | 90 | self.lifetimes = set() #type: Set[str] |
| 91 | self.locations = set() #type: Set[str] |
| 92 | self.persistence_levels = set() #type: Set[str] |
Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 93 | self.algorithms = set() #type: Set[str] |
| 94 | self.ecc_curves = set() #type: Set[str] |
| 95 | self.dh_groups = set() #type: Set[str] |
| 96 | self.key_types = set() #type: Set[str] |
| 97 | self.key_usage_flags = set() #type: Set[str] |
| 98 | self.hash_algorithms = set() #type: Set[str] |
| 99 | self.mac_algorithms = set() #type: Set[str] |
| 100 | self.ka_algorithms = set() #type: Set[str] |
| 101 | self.kdf_algorithms = set() #type: Set[str] |
Janos Follath | 8603fb0 | 2021-04-19 15:12:46 +0100 | [diff] [blame] | 102 | self.pake_algorithms = set() #type: Set[str] |
Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 103 | self.aead_algorithms = set() #type: Set[str] |
gabor-mezei-arm | 672e376 | 2021-06-24 10:16:44 +0200 | [diff] [blame] | 104 | self.sign_algorithms = set() #type: Set[str] |
Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 105 | # macro name -> list of argument names |
| 106 | self.argspecs = {} #type: Dict[str, List[str]] |
| 107 | # argument name -> list of values |
| 108 | self.arguments_for = { |
| 109 | 'mac_length': [], |
| 110 | 'min_mac_length': [], |
| 111 | 'tag_length': [], |
| 112 | 'min_tag_length': [], |
| 113 | } #type: Dict[str, List[str]] |
Gilles Peskine | 2157e86 | 2021-05-20 21:37:06 +0200 | [diff] [blame] | 114 | # Whether to include intermediate macros in enumerations. Intermediate |
| 115 | # macros serve as category headers and are not valid values of their |
| 116 | # type. See `is_internal_name`. |
| 117 | # Always false in this class, may be set to true in derived classes. |
Gilles Peskine | 537d5fa | 2021-04-19 13:50:25 +0200 | [diff] [blame] | 118 | self.include_intermediate = False |
| 119 | |
| 120 | def is_internal_name(self, name: str) -> bool: |
| 121 | """Whether this is an internal macro. Internal macros will be skipped.""" |
| 122 | if not self.include_intermediate: |
| 123 | if name.endswith('_BASE') or name.endswith('_NONE'): |
| 124 | return True |
| 125 | if '_CATEGORY_' in name: |
| 126 | return True |
| 127 | return name.endswith('_FLAG') or name.endswith('_MASK') |
Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 128 | |
| 129 | def gather_arguments(self) -> None: |
| 130 | """Populate the list of values for macro arguments. |
| 131 | |
| 132 | Call this after parsing all the inputs. |
| 133 | """ |
| 134 | self.arguments_for['hash_alg'] = sorted(self.hash_algorithms) |
| 135 | self.arguments_for['mac_alg'] = sorted(self.mac_algorithms) |
| 136 | self.arguments_for['ka_alg'] = sorted(self.ka_algorithms) |
| 137 | self.arguments_for['kdf_alg'] = sorted(self.kdf_algorithms) |
| 138 | self.arguments_for['aead_alg'] = sorted(self.aead_algorithms) |
gabor-mezei-arm | 672e376 | 2021-06-24 10:16:44 +0200 | [diff] [blame] | 139 | self.arguments_for['sign_alg'] = sorted(self.sign_algorithms) |
Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 140 | self.arguments_for['curve'] = sorted(self.ecc_curves) |
| 141 | self.arguments_for['group'] = sorted(self.dh_groups) |
Gilles Peskine | 45a4391 | 2021-04-21 21:39:27 +0200 | [diff] [blame] | 142 | self.arguments_for['persistence'] = sorted(self.persistence_levels) |
| 143 | self.arguments_for['location'] = sorted(self.locations) |
| 144 | self.arguments_for['lifetime'] = sorted(self.lifetimes) |
Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 145 | |
| 146 | @staticmethod |
| 147 | def _format_arguments(name: str, arguments: Iterable[str]) -> str: |
Gilles Peskine | cccd1ac | 2021-04-21 15:36:58 +0200 | [diff] [blame] | 148 | """Format a macro call with arguments. |
| 149 | |
| 150 | The resulting format is consistent with |
| 151 | `InputsForTest.normalize_argument`. |
| 152 | """ |
Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 153 | return name + '(' + ', '.join(arguments) + ')' |
| 154 | |
| 155 | _argument_split_re = re.compile(r' *, *') |
| 156 | @classmethod |
| 157 | def _argument_split(cls, arguments: str) -> List[str]: |
| 158 | return re.split(cls._argument_split_re, arguments) |
| 159 | |
| 160 | def distribute_arguments(self, name: str) -> Iterator[str]: |
| 161 | """Generate macro calls with each tested argument set. |
| 162 | |
| 163 | If name is a macro without arguments, just yield "name". |
| 164 | If name is a macro with arguments, yield a series of |
| 165 | "name(arg1,...,argN)" where each argument takes each possible |
| 166 | value at least once. |
| 167 | """ |
| 168 | try: |
| 169 | if name not in self.argspecs: |
| 170 | yield name |
| 171 | return |
| 172 | argspec = self.argspecs[name] |
| 173 | if argspec == []: |
| 174 | yield name + '()' |
| 175 | return |
| 176 | argument_lists = [self.arguments_for[arg] for arg in argspec] |
| 177 | arguments = [values[0] for values in argument_lists] |
| 178 | yield self._format_arguments(name, arguments) |
| 179 | # Dear Pylint, enumerate won't work here since we're modifying |
| 180 | # the array. |
| 181 | # pylint: disable=consider-using-enumerate |
| 182 | for i in range(len(arguments)): |
| 183 | for value in argument_lists[i][1:]: |
| 184 | arguments[i] = value |
| 185 | yield self._format_arguments(name, arguments) |
| 186 | arguments[i] = argument_lists[0][0] |
| 187 | except BaseException as e: |
| 188 | raise Exception('distribute_arguments({})'.format(name)) from e |
| 189 | |
Gilles Peskine | 38ebfec | 2021-04-21 15:37:34 +0200 | [diff] [blame] | 190 | def distribute_arguments_without_duplicates( |
| 191 | self, seen: Set[str], name: str |
| 192 | ) -> Iterator[str]: |
| 193 | """Same as `distribute_arguments`, but don't repeat seen results.""" |
| 194 | for result in self.distribute_arguments(name): |
| 195 | if result not in seen: |
| 196 | seen.add(result) |
| 197 | yield result |
| 198 | |
Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 199 | def generate_expressions(self, names: Iterable[str]) -> Iterator[str]: |
| 200 | """Generate expressions covering values constructed from the given names. |
| 201 | |
| 202 | `names` can be any iterable collection of macro names. |
| 203 | |
| 204 | For example: |
| 205 | * ``generate_expressions(['PSA_ALG_CMAC', 'PSA_ALG_HMAC'])`` |
| 206 | generates ``'PSA_ALG_CMAC'`` as well as ``'PSA_ALG_HMAC(h)'`` for |
| 207 | every known hash algorithm ``h``. |
| 208 | * ``macros.generate_expressions(macros.key_types)`` generates all |
| 209 | key types. |
| 210 | """ |
Gilles Peskine | 38ebfec | 2021-04-21 15:37:34 +0200 | [diff] [blame] | 211 | seen = set() #type: Set[str] |
| 212 | return itertools.chain(*( |
| 213 | self.distribute_arguments_without_duplicates(seen, name) |
| 214 | for name in names |
| 215 | )) |
Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 216 | |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 217 | |
Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 218 | class PSAMacroCollector(PSAMacroEnumerator): |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 219 | """Collect PSA crypto macro definitions from C header files. |
| 220 | """ |
| 221 | |
Gilles Peskine | 10ab267 | 2021-03-10 00:59:53 +0100 | [diff] [blame] | 222 | def __init__(self, include_intermediate: bool = False) -> None: |
Gilles Peskine | 13d60eb | 2021-01-25 22:42:14 +0100 | [diff] [blame] | 223 | """Set up an object to collect PSA macro definitions. |
| 224 | |
| 225 | Call the read_file method of the constructed object on each header file. |
| 226 | |
| 227 | * include_intermediate: if true, include intermediate macros such as |
| 228 | PSA_XXX_BASE that do not designate semantic values. |
| 229 | """ |
Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 230 | super().__init__() |
Gilles Peskine | 13d60eb | 2021-01-25 22:42:14 +0100 | [diff] [blame] | 231 | self.include_intermediate = include_intermediate |
Gilles Peskine | 10ab267 | 2021-03-10 00:59:53 +0100 | [diff] [blame] | 232 | self.key_types_from_curve = {} #type: Dict[str, str] |
| 233 | self.key_types_from_group = {} #type: Dict[str, str] |
Gilles Peskine | 10ab267 | 2021-03-10 00:59:53 +0100 | [diff] [blame] | 234 | self.algorithms_from_hash = {} #type: Dict[str, str] |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 235 | |
Gilles Peskine | 8135cb9 | 2021-10-04 18:10:16 +0200 | [diff] [blame^] | 236 | @staticmethod |
| 237 | def algorithm_tester(name: str) -> str: |
| 238 | """The predicate for whether an algorithm is built from the given constructor. |
| 239 | |
| 240 | The given name must be the name of an algorithm constructor of the |
| 241 | form ``PSA_ALG_xxx`` which is used as ``PSA_ALG_xxx(yyy)`` to build |
| 242 | an algorithm value. Return the corresponding predicate macro which |
| 243 | is used as ``predicate(alg)`` to test whether ``alg`` can be built |
| 244 | as ``PSA_ALG_xxx(yyy)``. The predicate is usually called |
| 245 | ``PSA_ALG_IS_xxx``. |
| 246 | """ |
| 247 | prefix = 'PSA_ALG_' |
| 248 | assert name.startswith(prefix) |
| 249 | midfix = 'IS_' |
| 250 | suffix = name[len(prefix):] |
| 251 | if suffix in ['DSA', 'ECDSA']: |
| 252 | midfix += 'RANDOMIZED_' |
| 253 | return prefix + midfix + suffix |
| 254 | |
Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 255 | def record_algorithm_subtype(self, name: str, expansion: str) -> None: |
| 256 | """Record the subtype of an algorithm constructor. |
| 257 | |
| 258 | Given a ``PSA_ALG_xxx`` macro name and its expansion, if the algorithm |
| 259 | is of a subtype that is tracked in its own set, add it to the relevant |
| 260 | set. |
| 261 | """ |
| 262 | # This code is very ad hoc and fragile. It should be replaced by |
| 263 | # something more robust. |
| 264 | if re.match(r'MAC(?:_|\Z)', name): |
| 265 | self.mac_algorithms.add(name) |
| 266 | elif re.match(r'KDF(?:_|\Z)', name): |
| 267 | self.kdf_algorithms.add(name) |
| 268 | elif re.search(r'0x020000[0-9A-Fa-f]{2}', expansion): |
| 269 | self.hash_algorithms.add(name) |
| 270 | elif re.search(r'0x03[0-9A-Fa-f]{6}', expansion): |
| 271 | self.mac_algorithms.add(name) |
| 272 | elif re.search(r'0x05[0-9A-Fa-f]{6}', expansion): |
| 273 | self.aead_algorithms.add(name) |
| 274 | elif re.search(r'0x09[0-9A-Fa-f]{2}0000', expansion): |
| 275 | self.ka_algorithms.add(name) |
| 276 | elif re.search(r'0x08[0-9A-Fa-f]{6}', expansion): |
| 277 | self.kdf_algorithms.add(name) |
| 278 | |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 279 | # "#define" followed by a macro name with either no parameters |
| 280 | # or a single parameter and a non-empty expansion. |
| 281 | # Grab the macro name in group 1, the parameter name if any in group 2 |
| 282 | # and the expansion in group 3. |
| 283 | _define_directive_re = re.compile(r'\s*#\s*define\s+(\w+)' + |
| 284 | r'(?:\s+|\((\w+)\)\s*)' + |
| 285 | r'(.+)') |
| 286 | _deprecated_definition_re = re.compile(r'\s*MBEDTLS_DEPRECATED') |
| 287 | |
| 288 | def read_line(self, line): |
| 289 | """Parse a C header line and record the PSA identifier it defines if any. |
| 290 | This function analyzes lines that start with "#define PSA_" |
| 291 | (up to non-significant whitespace) and skips all non-matching lines. |
| 292 | """ |
| 293 | # pylint: disable=too-many-branches |
| 294 | m = re.match(self._define_directive_re, line) |
| 295 | if not m: |
| 296 | return |
| 297 | name, parameter, expansion = m.groups() |
| 298 | expansion = re.sub(r'/\*.*?\*/|//.*', r' ', expansion) |
Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 299 | if parameter: |
| 300 | self.argspecs[name] = [parameter] |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 301 | if re.match(self._deprecated_definition_re, expansion): |
| 302 | # Skip deprecated values, which are assumed to be |
| 303 | # backward compatibility aliases that share |
| 304 | # numerical values with non-deprecated values. |
| 305 | return |
Gilles Peskine | f8deb75 | 2021-01-25 22:41:45 +0100 | [diff] [blame] | 306 | if self.is_internal_name(name): |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 307 | # Macro only to build actual values |
| 308 | return |
| 309 | elif (name.startswith('PSA_ERROR_') or name == 'PSA_SUCCESS') \ |
| 310 | and not parameter: |
| 311 | self.statuses.add(name) |
| 312 | elif name.startswith('PSA_KEY_TYPE_') and not parameter: |
| 313 | self.key_types.add(name) |
| 314 | elif name.startswith('PSA_KEY_TYPE_') and parameter == 'curve': |
| 315 | self.key_types_from_curve[name] = name[:13] + 'IS_' + name[13:] |
| 316 | elif name.startswith('PSA_KEY_TYPE_') and parameter == 'group': |
| 317 | self.key_types_from_group[name] = name[:13] + 'IS_' + name[13:] |
| 318 | elif name.startswith('PSA_ECC_FAMILY_') and not parameter: |
| 319 | self.ecc_curves.add(name) |
| 320 | elif name.startswith('PSA_DH_FAMILY_') and not parameter: |
| 321 | self.dh_groups.add(name) |
| 322 | elif name.startswith('PSA_ALG_') and not parameter: |
| 323 | if name in ['PSA_ALG_ECDSA_BASE', |
| 324 | 'PSA_ALG_RSA_PKCS1V15_SIGN_BASE']: |
| 325 | # Ad hoc skipping of duplicate names for some numerical values |
| 326 | return |
| 327 | self.algorithms.add(name) |
Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 328 | self.record_algorithm_subtype(name, expansion) |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 329 | elif name.startswith('PSA_ALG_') and parameter == 'hash_alg': |
Gilles Peskine | 8135cb9 | 2021-10-04 18:10:16 +0200 | [diff] [blame^] | 330 | self.algorithms_from_hash[name] = self.algorithm_tester(name) |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 331 | elif name.startswith('PSA_KEY_USAGE_') and not parameter: |
Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 332 | self.key_usage_flags.add(name) |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 333 | else: |
| 334 | # Other macro without parameter |
| 335 | return |
| 336 | |
| 337 | _nonascii_re = re.compile(rb'[^\x00-\x7f]+') |
| 338 | _continued_line_re = re.compile(rb'\\\r?\n\Z') |
| 339 | def read_file(self, header_file): |
| 340 | for line in header_file: |
| 341 | m = re.search(self._continued_line_re, line) |
| 342 | while m: |
| 343 | cont = next(header_file) |
| 344 | line = line[:m.start(0)] + cont |
| 345 | m = re.search(self._continued_line_re, line) |
| 346 | line = re.sub(self._nonascii_re, rb'', line).decode('ascii') |
| 347 | self.read_line(line) |
Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 348 | |
| 349 | |
Gilles Peskine | 537d5fa | 2021-04-19 13:50:25 +0200 | [diff] [blame] | 350 | class InputsForTest(PSAMacroEnumerator): |
Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 351 | # pylint: disable=too-many-instance-attributes |
| 352 | """Accumulate information about macros to test. |
| 353 | enumerate |
| 354 | This includes macro names as well as information about their arguments |
| 355 | when applicable. |
| 356 | """ |
| 357 | |
| 358 | def __init__(self) -> None: |
| 359 | super().__init__() |
| 360 | self.all_declared = set() #type: Set[str] |
Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 361 | # Identifier prefixes |
| 362 | self.table_by_prefix = { |
| 363 | 'ERROR': self.statuses, |
| 364 | 'ALG': self.algorithms, |
| 365 | 'ECC_CURVE': self.ecc_curves, |
| 366 | 'DH_GROUP': self.dh_groups, |
Gilles Peskine | 45a4391 | 2021-04-21 21:39:27 +0200 | [diff] [blame] | 367 | 'KEY_LIFETIME': self.lifetimes, |
| 368 | 'KEY_LOCATION': self.locations, |
| 369 | 'KEY_PERSISTENCE': self.persistence_levels, |
Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 370 | 'KEY_TYPE': self.key_types, |
| 371 | 'KEY_USAGE': self.key_usage_flags, |
| 372 | } #type: Dict[str, Set[str]] |
| 373 | # Test functions |
| 374 | self.table_by_test_function = { |
| 375 | # Any function ending in _algorithm also gets added to |
| 376 | # self.algorithms. |
| 377 | 'key_type': [self.key_types], |
| 378 | 'block_cipher_key_type': [self.key_types], |
| 379 | 'stream_cipher_key_type': [self.key_types], |
| 380 | 'ecc_key_family': [self.ecc_curves], |
| 381 | 'ecc_key_types': [self.ecc_curves], |
| 382 | 'dh_key_family': [self.dh_groups], |
| 383 | 'dh_key_types': [self.dh_groups], |
| 384 | 'hash_algorithm': [self.hash_algorithms], |
| 385 | 'mac_algorithm': [self.mac_algorithms], |
| 386 | 'cipher_algorithm': [], |
gabor-mezei-arm | 672e376 | 2021-06-24 10:16:44 +0200 | [diff] [blame] | 387 | 'hmac_algorithm': [self.mac_algorithms, self.sign_algorithms], |
Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 388 | 'aead_algorithm': [self.aead_algorithms], |
| 389 | 'key_derivation_algorithm': [self.kdf_algorithms], |
| 390 | 'key_agreement_algorithm': [self.ka_algorithms], |
gabor-mezei-arm | 672e376 | 2021-06-24 10:16:44 +0200 | [diff] [blame] | 391 | 'asymmetric_signature_algorithm': [self.sign_algorithms], |
Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 392 | 'asymmetric_signature_wildcard': [self.algorithms], |
| 393 | 'asymmetric_encryption_algorithm': [], |
Janos Follath | 8603fb0 | 2021-04-19 15:12:46 +0100 | [diff] [blame] | 394 | 'pake_algorithm': [self.pake_algorithms], |
Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 395 | 'other_algorithm': [], |
Gilles Peskine | 45a4391 | 2021-04-21 21:39:27 +0200 | [diff] [blame] | 396 | 'lifetime': [self.lifetimes], |
Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 397 | } #type: Dict[str, List[Set[str]]] |
| 398 | self.arguments_for['mac_length'] += ['1', '63'] |
| 399 | self.arguments_for['min_mac_length'] += ['1', '63'] |
| 400 | self.arguments_for['tag_length'] += ['1', '63'] |
| 401 | self.arguments_for['min_tag_length'] += ['1', '63'] |
| 402 | |
Gilles Peskine | 3d404b8 | 2021-03-30 21:46:35 +0200 | [diff] [blame] | 403 | def add_numerical_values(self) -> None: |
| 404 | """Add numerical values that are not supported to the known identifiers.""" |
| 405 | # Sets of names per type |
| 406 | self.algorithms.add('0xffffffff') |
| 407 | self.ecc_curves.add('0xff') |
| 408 | self.dh_groups.add('0xff') |
| 409 | self.key_types.add('0xffff') |
| 410 | self.key_usage_flags.add('0x80000000') |
| 411 | |
| 412 | # Hard-coded values for unknown algorithms |
| 413 | # |
| 414 | # These have to have values that are correct for their respective |
| 415 | # PSA_ALG_IS_xxx macros, but are also not currently assigned and are |
| 416 | # not likely to be assigned in the near future. |
| 417 | self.hash_algorithms.add('0x020000fe') # 0x020000ff is PSA_ALG_ANY_HASH |
| 418 | self.mac_algorithms.add('0x03007fff') |
| 419 | self.ka_algorithms.add('0x09fc0000') |
| 420 | self.kdf_algorithms.add('0x080000ff') |
Janos Follath | 8603fb0 | 2021-04-19 15:12:46 +0100 | [diff] [blame] | 421 | self.pake_algorithms.add('0x0a0000ff') |
Gilles Peskine | 3d404b8 | 2021-03-30 21:46:35 +0200 | [diff] [blame] | 422 | # For AEAD algorithms, the only variability is over the tag length, |
| 423 | # and this only applies to known algorithms, so don't test an |
| 424 | # unknown algorithm. |
| 425 | |
Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 426 | def get_names(self, type_word: str) -> Set[str]: |
| 427 | """Return the set of known names of values of the given type.""" |
| 428 | return { |
| 429 | 'status': self.statuses, |
| 430 | 'algorithm': self.algorithms, |
| 431 | 'ecc_curve': self.ecc_curves, |
| 432 | 'dh_group': self.dh_groups, |
| 433 | 'key_type': self.key_types, |
| 434 | 'key_usage': self.key_usage_flags, |
| 435 | }[type_word] |
| 436 | |
| 437 | # Regex for interesting header lines. |
| 438 | # Groups: 1=macro name, 2=type, 3=argument list (optional). |
| 439 | _header_line_re = \ |
| 440 | re.compile(r'#define +' + |
| 441 | r'(PSA_((?:(?:DH|ECC|KEY)_)?[A-Z]+)_\w+)' + |
| 442 | r'(?:\(([^\n()]*)\))?') |
| 443 | # Regex of macro names to exclude. |
| 444 | _excluded_name_re = re.compile(r'_(?:GET|IS|OF)_|_(?:BASE|FLAG|MASK)\Z') |
| 445 | # Additional excluded macros. |
| 446 | _excluded_names = set([ |
| 447 | # Macros that provide an alternative way to build the same |
| 448 | # algorithm as another macro. |
| 449 | 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG', |
| 450 | 'PSA_ALG_FULL_LENGTH_MAC', |
| 451 | # Auxiliary macro whose name doesn't fit the usual patterns for |
| 452 | # auxiliary macros. |
| 453 | 'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG_CASE', |
| 454 | ]) |
| 455 | def parse_header_line(self, line: str) -> None: |
| 456 | """Parse a C header line, looking for "#define PSA_xxx".""" |
| 457 | m = re.match(self._header_line_re, line) |
| 458 | if not m: |
| 459 | return |
| 460 | name = m.group(1) |
| 461 | self.all_declared.add(name) |
| 462 | if re.search(self._excluded_name_re, name) or \ |
Gilles Peskine | 537d5fa | 2021-04-19 13:50:25 +0200 | [diff] [blame] | 463 | name in self._excluded_names or \ |
| 464 | self.is_internal_name(name): |
Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 465 | return |
| 466 | dest = self.table_by_prefix.get(m.group(2)) |
| 467 | if dest is None: |
| 468 | return |
| 469 | dest.add(name) |
| 470 | if m.group(3): |
| 471 | self.argspecs[name] = self._argument_split(m.group(3)) |
| 472 | |
| 473 | _nonascii_re = re.compile(rb'[^\x00-\x7f]+') #type: Pattern |
| 474 | def parse_header(self, filename: str) -> None: |
| 475 | """Parse a C header file, looking for "#define PSA_xxx".""" |
| 476 | with read_file_lines(filename, binary=True) as lines: |
| 477 | for line in lines: |
| 478 | line = re.sub(self._nonascii_re, rb'', line).decode('ascii') |
| 479 | self.parse_header_line(line) |
| 480 | |
| 481 | _macro_identifier_re = re.compile(r'[A-Z]\w+') |
| 482 | def generate_undeclared_names(self, expr: str) -> Iterable[str]: |
| 483 | for name in re.findall(self._macro_identifier_re, expr): |
| 484 | if name not in self.all_declared: |
| 485 | yield name |
| 486 | |
| 487 | def accept_test_case_line(self, function: str, argument: str) -> bool: |
| 488 | #pylint: disable=unused-argument |
| 489 | undeclared = list(self.generate_undeclared_names(argument)) |
| 490 | if undeclared: |
| 491 | raise Exception('Undeclared names in test case', undeclared) |
| 492 | return True |
| 493 | |
Gilles Peskine | cccd1ac | 2021-04-21 15:36:58 +0200 | [diff] [blame] | 494 | @staticmethod |
| 495 | def normalize_argument(argument: str) -> str: |
| 496 | """Normalize whitespace in the given C expression. |
| 497 | |
| 498 | The result uses the same whitespace as |
| 499 | ` PSAMacroEnumerator.distribute_arguments`. |
| 500 | """ |
| 501 | return re.sub(r',', r', ', re.sub(r' +', r'', argument)) |
| 502 | |
Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 503 | def add_test_case_line(self, function: str, argument: str) -> None: |
| 504 | """Parse a test case data line, looking for algorithm metadata tests.""" |
| 505 | sets = [] |
| 506 | if function.endswith('_algorithm'): |
| 507 | sets.append(self.algorithms) |
| 508 | if function == 'key_agreement_algorithm' and \ |
| 509 | argument.startswith('PSA_ALG_KEY_AGREEMENT('): |
| 510 | # We only want *raw* key agreement algorithms as such, so |
| 511 | # exclude ones that are already chained with a KDF. |
| 512 | # Keep the expression as one to test as an algorithm. |
| 513 | function = 'other_algorithm' |
| 514 | sets += self.table_by_test_function[function] |
| 515 | if self.accept_test_case_line(function, argument): |
| 516 | for s in sets: |
Gilles Peskine | cccd1ac | 2021-04-21 15:36:58 +0200 | [diff] [blame] | 517 | s.add(self.normalize_argument(argument)) |
Gilles Peskine | b4edff9 | 2021-03-30 19:09:05 +0200 | [diff] [blame] | 518 | |
| 519 | # Regex matching a *.data line containing a test function call and |
| 520 | # its arguments. The actual definition is partly positional, but this |
| 521 | # regex is good enough in practice. |
| 522 | _test_case_line_re = re.compile(r'(?!depends_on:)(\w+):([^\n :][^:\n]*)') |
| 523 | def parse_test_cases(self, filename: str) -> None: |
| 524 | """Parse a test case file (*.data), looking for algorithm metadata tests.""" |
| 525 | with read_file_lines(filename) as lines: |
| 526 | for line in lines: |
| 527 | m = re.match(self._test_case_line_re, line) |
| 528 | if m: |
| 529 | self.add_test_case_line(m.group(1), m.group(2)) |