Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 1 | """Collect macro definitions from header files. |
| 2 | """ |
| 3 | |
| 4 | # Copyright The Mbed TLS Contributors |
| 5 | # SPDX-License-Identifier: Apache-2.0 |
| 6 | # |
| 7 | # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 8 | # not use this file except in compliance with the License. |
| 9 | # You may obtain a copy of the License at |
| 10 | # |
| 11 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 12 | # |
| 13 | # Unless required by applicable law or agreed to in writing, software |
| 14 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 15 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 16 | # See the License for the specific language governing permissions and |
| 17 | # limitations under the License. |
| 18 | |
Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 19 | import itertools |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 20 | import re |
Gilles Peskine | 22fcf1b | 2021-03-10 01:02:39 +0100 | [diff] [blame] | 21 | from typing import Dict, Iterable, Iterator, List, Set |
| 22 | |
| 23 | |
| 24 | class PSAMacroEnumerator: |
| 25 | """Information about constructors of various PSA Crypto types. |
| 26 | |
| 27 | This includes macro names as well as information about their arguments |
| 28 | when applicable. |
| 29 | |
| 30 | This class only provides ways to enumerate expressions that evaluate to |
| 31 | values of the covered types. Derived classes are expected to populate |
| 32 | the set of known constructors of each kind, as well as populate |
| 33 | `self.arguments_for` for arguments that are not of a kind that is |
| 34 | enumerated here. |
| 35 | """ |
| 36 | |
| 37 | def __init__(self) -> None: |
| 38 | """Set up an empty set of known constructor macros. |
| 39 | """ |
| 40 | self.statuses = set() #type: Set[str] |
| 41 | self.algorithms = set() #type: Set[str] |
| 42 | self.ecc_curves = set() #type: Set[str] |
| 43 | self.dh_groups = set() #type: Set[str] |
| 44 | self.key_types = set() #type: Set[str] |
| 45 | self.key_usage_flags = set() #type: Set[str] |
| 46 | self.hash_algorithms = set() #type: Set[str] |
| 47 | self.mac_algorithms = set() #type: Set[str] |
| 48 | self.ka_algorithms = set() #type: Set[str] |
| 49 | self.kdf_algorithms = set() #type: Set[str] |
| 50 | self.aead_algorithms = set() #type: Set[str] |
| 51 | # macro name -> list of argument names |
| 52 | self.argspecs = {} #type: Dict[str, List[str]] |
| 53 | # argument name -> list of values |
| 54 | self.arguments_for = { |
| 55 | 'mac_length': [], |
| 56 | 'min_mac_length': [], |
| 57 | 'tag_length': [], |
| 58 | 'min_tag_length': [], |
| 59 | } #type: Dict[str, List[str]] |
| 60 | |
| 61 | def gather_arguments(self) -> None: |
| 62 | """Populate the list of values for macro arguments. |
| 63 | |
| 64 | Call this after parsing all the inputs. |
| 65 | """ |
| 66 | self.arguments_for['hash_alg'] = sorted(self.hash_algorithms) |
| 67 | self.arguments_for['mac_alg'] = sorted(self.mac_algorithms) |
| 68 | self.arguments_for['ka_alg'] = sorted(self.ka_algorithms) |
| 69 | self.arguments_for['kdf_alg'] = sorted(self.kdf_algorithms) |
| 70 | self.arguments_for['aead_alg'] = sorted(self.aead_algorithms) |
| 71 | self.arguments_for['curve'] = sorted(self.ecc_curves) |
| 72 | self.arguments_for['group'] = sorted(self.dh_groups) |
| 73 | |
| 74 | @staticmethod |
| 75 | def _format_arguments(name: str, arguments: Iterable[str]) -> str: |
| 76 | """Format a macro call with arguments..""" |
| 77 | return name + '(' + ', '.join(arguments) + ')' |
| 78 | |
| 79 | _argument_split_re = re.compile(r' *, *') |
| 80 | @classmethod |
| 81 | def _argument_split(cls, arguments: str) -> List[str]: |
| 82 | return re.split(cls._argument_split_re, arguments) |
| 83 | |
| 84 | def distribute_arguments(self, name: str) -> Iterator[str]: |
| 85 | """Generate macro calls with each tested argument set. |
| 86 | |
| 87 | If name is a macro without arguments, just yield "name". |
| 88 | If name is a macro with arguments, yield a series of |
| 89 | "name(arg1,...,argN)" where each argument takes each possible |
| 90 | value at least once. |
| 91 | """ |
| 92 | try: |
| 93 | if name not in self.argspecs: |
| 94 | yield name |
| 95 | return |
| 96 | argspec = self.argspecs[name] |
| 97 | if argspec == []: |
| 98 | yield name + '()' |
| 99 | return |
| 100 | argument_lists = [self.arguments_for[arg] for arg in argspec] |
| 101 | arguments = [values[0] for values in argument_lists] |
| 102 | yield self._format_arguments(name, arguments) |
| 103 | # Dear Pylint, enumerate won't work here since we're modifying |
| 104 | # the array. |
| 105 | # pylint: disable=consider-using-enumerate |
| 106 | for i in range(len(arguments)): |
| 107 | for value in argument_lists[i][1:]: |
| 108 | arguments[i] = value |
| 109 | yield self._format_arguments(name, arguments) |
| 110 | arguments[i] = argument_lists[0][0] |
| 111 | except BaseException as e: |
| 112 | raise Exception('distribute_arguments({})'.format(name)) from e |
| 113 | |
| 114 | def generate_expressions(self, names: Iterable[str]) -> Iterator[str]: |
| 115 | """Generate expressions covering values constructed from the given names. |
| 116 | |
| 117 | `names` can be any iterable collection of macro names. |
| 118 | |
| 119 | For example: |
| 120 | * ``generate_expressions(['PSA_ALG_CMAC', 'PSA_ALG_HMAC'])`` |
| 121 | generates ``'PSA_ALG_CMAC'`` as well as ``'PSA_ALG_HMAC(h)'`` for |
| 122 | every known hash algorithm ``h``. |
| 123 | * ``macros.generate_expressions(macros.key_types)`` generates all |
| 124 | key types. |
| 125 | """ |
| 126 | return itertools.chain(*map(self.distribute_arguments, names)) |
| 127 | |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 128 | |
Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 129 | class PSAMacroCollector(PSAMacroEnumerator): |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 130 | """Collect PSA crypto macro definitions from C header files. |
| 131 | """ |
| 132 | |
Gilles Peskine | 10ab267 | 2021-03-10 00:59:53 +0100 | [diff] [blame] | 133 | def __init__(self, include_intermediate: bool = False) -> None: |
Gilles Peskine | 13d60eb | 2021-01-25 22:42:14 +0100 | [diff] [blame] | 134 | """Set up an object to collect PSA macro definitions. |
| 135 | |
| 136 | Call the read_file method of the constructed object on each header file. |
| 137 | |
| 138 | * include_intermediate: if true, include intermediate macros such as |
| 139 | PSA_XXX_BASE that do not designate semantic values. |
| 140 | """ |
Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 141 | super().__init__() |
Gilles Peskine | 13d60eb | 2021-01-25 22:42:14 +0100 | [diff] [blame] | 142 | self.include_intermediate = include_intermediate |
Gilles Peskine | 10ab267 | 2021-03-10 00:59:53 +0100 | [diff] [blame] | 143 | self.key_types_from_curve = {} #type: Dict[str, str] |
| 144 | self.key_types_from_group = {} #type: Dict[str, str] |
Gilles Peskine | 10ab267 | 2021-03-10 00:59:53 +0100 | [diff] [blame] | 145 | self.algorithms_from_hash = {} #type: Dict[str, str] |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 146 | |
Gilles Peskine | 10ab267 | 2021-03-10 00:59:53 +0100 | [diff] [blame] | 147 | def is_internal_name(self, name: str) -> bool: |
Gilles Peskine | f8deb75 | 2021-01-25 22:41:45 +0100 | [diff] [blame] | 148 | """Whether this is an internal macro. Internal macros will be skipped.""" |
Gilles Peskine | 13d60eb | 2021-01-25 22:42:14 +0100 | [diff] [blame] | 149 | if not self.include_intermediate: |
| 150 | if name.endswith('_BASE') or name.endswith('_NONE'): |
| 151 | return True |
| 152 | if '_CATEGORY_' in name: |
| 153 | return True |
Gilles Peskine | 0655b4f | 2021-01-25 22:44:36 +0100 | [diff] [blame] | 154 | return name.endswith('_FLAG') or name.endswith('_MASK') |
Gilles Peskine | f8deb75 | 2021-01-25 22:41:45 +0100 | [diff] [blame] | 155 | |
Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 156 | def record_algorithm_subtype(self, name: str, expansion: str) -> None: |
| 157 | """Record the subtype of an algorithm constructor. |
| 158 | |
| 159 | Given a ``PSA_ALG_xxx`` macro name and its expansion, if the algorithm |
| 160 | is of a subtype that is tracked in its own set, add it to the relevant |
| 161 | set. |
| 162 | """ |
| 163 | # This code is very ad hoc and fragile. It should be replaced by |
| 164 | # something more robust. |
| 165 | if re.match(r'MAC(?:_|\Z)', name): |
| 166 | self.mac_algorithms.add(name) |
| 167 | elif re.match(r'KDF(?:_|\Z)', name): |
| 168 | self.kdf_algorithms.add(name) |
| 169 | elif re.search(r'0x020000[0-9A-Fa-f]{2}', expansion): |
| 170 | self.hash_algorithms.add(name) |
| 171 | elif re.search(r'0x03[0-9A-Fa-f]{6}', expansion): |
| 172 | self.mac_algorithms.add(name) |
| 173 | elif re.search(r'0x05[0-9A-Fa-f]{6}', expansion): |
| 174 | self.aead_algorithms.add(name) |
| 175 | elif re.search(r'0x09[0-9A-Fa-f]{2}0000', expansion): |
| 176 | self.ka_algorithms.add(name) |
| 177 | elif re.search(r'0x08[0-9A-Fa-f]{6}', expansion): |
| 178 | self.kdf_algorithms.add(name) |
| 179 | |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 180 | # "#define" followed by a macro name with either no parameters |
| 181 | # or a single parameter and a non-empty expansion. |
| 182 | # Grab the macro name in group 1, the parameter name if any in group 2 |
| 183 | # and the expansion in group 3. |
| 184 | _define_directive_re = re.compile(r'\s*#\s*define\s+(\w+)' + |
| 185 | r'(?:\s+|\((\w+)\)\s*)' + |
| 186 | r'(.+)') |
| 187 | _deprecated_definition_re = re.compile(r'\s*MBEDTLS_DEPRECATED') |
| 188 | |
| 189 | def read_line(self, line): |
| 190 | """Parse a C header line and record the PSA identifier it defines if any. |
| 191 | This function analyzes lines that start with "#define PSA_" |
| 192 | (up to non-significant whitespace) and skips all non-matching lines. |
| 193 | """ |
| 194 | # pylint: disable=too-many-branches |
| 195 | m = re.match(self._define_directive_re, line) |
| 196 | if not m: |
| 197 | return |
| 198 | name, parameter, expansion = m.groups() |
| 199 | expansion = re.sub(r'/\*.*?\*/|//.*', r' ', expansion) |
Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 200 | if parameter: |
| 201 | self.argspecs[name] = [parameter] |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 202 | if re.match(self._deprecated_definition_re, expansion): |
| 203 | # Skip deprecated values, which are assumed to be |
| 204 | # backward compatibility aliases that share |
| 205 | # numerical values with non-deprecated values. |
| 206 | return |
Gilles Peskine | f8deb75 | 2021-01-25 22:41:45 +0100 | [diff] [blame] | 207 | if self.is_internal_name(name): |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 208 | # Macro only to build actual values |
| 209 | return |
| 210 | elif (name.startswith('PSA_ERROR_') or name == 'PSA_SUCCESS') \ |
| 211 | and not parameter: |
| 212 | self.statuses.add(name) |
| 213 | elif name.startswith('PSA_KEY_TYPE_') and not parameter: |
| 214 | self.key_types.add(name) |
| 215 | elif name.startswith('PSA_KEY_TYPE_') and parameter == 'curve': |
| 216 | self.key_types_from_curve[name] = name[:13] + 'IS_' + name[13:] |
| 217 | elif name.startswith('PSA_KEY_TYPE_') and parameter == 'group': |
| 218 | self.key_types_from_group[name] = name[:13] + 'IS_' + name[13:] |
| 219 | elif name.startswith('PSA_ECC_FAMILY_') and not parameter: |
| 220 | self.ecc_curves.add(name) |
| 221 | elif name.startswith('PSA_DH_FAMILY_') and not parameter: |
| 222 | self.dh_groups.add(name) |
| 223 | elif name.startswith('PSA_ALG_') and not parameter: |
| 224 | if name in ['PSA_ALG_ECDSA_BASE', |
| 225 | 'PSA_ALG_RSA_PKCS1V15_SIGN_BASE']: |
| 226 | # Ad hoc skipping of duplicate names for some numerical values |
| 227 | return |
| 228 | self.algorithms.add(name) |
Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 229 | self.record_algorithm_subtype(name, expansion) |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 230 | elif name.startswith('PSA_ALG_') and parameter == 'hash_alg': |
| 231 | if name in ['PSA_ALG_DSA', 'PSA_ALG_ECDSA']: |
| 232 | # A naming irregularity |
| 233 | tester = name[:8] + 'IS_RANDOMIZED_' + name[8:] |
| 234 | else: |
| 235 | tester = name[:8] + 'IS_' + name[8:] |
| 236 | self.algorithms_from_hash[name] = tester |
| 237 | elif name.startswith('PSA_KEY_USAGE_') and not parameter: |
Gilles Peskine | 33c601c | 2021-03-10 01:25:50 +0100 | [diff] [blame] | 238 | self.key_usage_flags.add(name) |
Gilles Peskine | e7c4455 | 2021-01-25 21:40:45 +0100 | [diff] [blame] | 239 | else: |
| 240 | # Other macro without parameter |
| 241 | return |
| 242 | |
| 243 | _nonascii_re = re.compile(rb'[^\x00-\x7f]+') |
| 244 | _continued_line_re = re.compile(rb'\\\r?\n\Z') |
| 245 | def read_file(self, header_file): |
| 246 | for line in header_file: |
| 247 | m = re.search(self._continued_line_re, line) |
| 248 | while m: |
| 249 | cont = next(header_file) |
| 250 | line = line[:m.start(0)] + cont |
| 251 | m = re.search(self._continued_line_re, line) |
| 252 | line = re.sub(self._nonascii_re, rb'', line).decode('ascii') |
| 253 | self.read_line(line) |