blob: 3bb6f0405e878f3394a2dac09e8f273b206b12b3 [file] [log] [blame]
Gilles Peskinecedb1122023-11-22 19:24:31 +01001"""Helper functions to parse C code in heavily constrained scenarios.
2
3Currently supported functionality:
4
5* read_function_declarations: read function declarations from a header file.
6"""
7
8# Copyright The Mbed TLS Contributors
9# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
10
11import re
12from typing import Dict, Iterable, Iterator, List, Optional, Tuple
13
14
15class ArgumentInfo:
16 """Information about an argument to an API function."""
17 #pylint: disable=too-few-public-methods
18
19 _KEYWORDS = [
20 'const', 'register', 'restrict',
21 'int', 'long', 'short', 'signed', 'unsigned',
22 ]
23 _DECLARATION_RE = re.compile(
24 r'(?P<type>\w[\w\s*]*?)\s*' +
25 r'(?!(?:' + r'|'.join(_KEYWORDS) + r'))(?P<name>\b\w+\b)?' +
26 r'\s*(?P<suffix>\[[^][]*\])?\Z',
27 re.A | re.S)
28
29 @classmethod
30 def normalize_type(cls, typ: str) -> str:
31 """Normalize whitespace in a type."""
32 typ = re.sub(r'\s+', r' ', typ)
33 typ = re.sub(r'\s*\*', r' *', typ)
34 return typ
35
36 def __init__(self, decl: str) -> None:
37 self.decl = decl.strip()
38 m = self._DECLARATION_RE.match(self.decl)
39 if not m:
40 raise ValueError(self.decl)
41 self.type = self.normalize_type(m.group('type')) #type: str
42 self.name = m.group('name') #type: Optional[str]
43 self.suffix = m.group('suffix') if m.group('suffix') else '' #type: str
44
45
46class FunctionInfo:
47 """Information about an API function."""
48 #pylint: disable=too-few-public-methods
49
50 # Regex matching the declaration of a function that returns void.
51 VOID_RE = re.compile(r'\s*\bvoid\s*\Z', re.A)
52
53 def __init__(self, #pylint: disable=too-many-arguments
54 filename: str,
55 line_number: int,
56 qualifiers: Iterable[str],
57 return_type: str,
58 name: str,
59 arguments: List[str]) -> None:
60 self.filename = filename
61 self.line_number = line_number
62 self.qualifiers = frozenset(qualifiers)
63 self.return_type = return_type
64 self.name = name
65 self.arguments = [ArgumentInfo(arg) for arg in arguments]
66
67 def returns_void(self) -> bool:
68 """Whether the function returns void."""
69 return bool(self.VOID_RE.search(self.return_type))
70
71
72# Match one C comment.
73# Note that we match both comment types, so things like // in a /*...*/
74# comment are handled correctly.
75_C_COMMENT_RE = re.compile(r'//[^n]*|/\*.*?\*/', re.S)
76_NOT_NEWLINES_RE = re.compile(r'[^\n]+')
77
78def read_logical_lines(filename: str) -> Iterator[Tuple[int, str]]:
79 """Read logical lines from a file.
80
81 Logical lines are one or more physical line, with balanced parentheses.
82 """
83 with open(filename, encoding='utf-8') as inp:
84 content = inp.read()
85 # Strip comments, but keep newlines for line numbering
86 content = re.sub(_C_COMMENT_RE,
87 lambda m: re.sub(_NOT_NEWLINES_RE, "", m.group(0)),
88 content)
89 lines = enumerate(content.splitlines(), 1)
90 for line_number, line in lines:
91 # Read a logical line, containing balanced parentheses.
92 # We assume that parentheses are balanced (this should be ok
93 # since comments have been stripped), otherwise there will be
94 # a gigantic logical line at the end.
95 paren_level = line.count('(') - line.count(')')
96 while paren_level > 0:
97 _, more = next(lines) #pylint: disable=stop-iteration-return
98 paren_level += more.count('(') - more.count(')')
99 line += '\n' + more
100 yield line_number, line
101
102_C_FUNCTION_DECLARATION_RE = re.compile(
103 r'(?P<qualifiers>(?:(?:extern|inline|static)\b\s*)*)'
104 r'(?P<return_type>\w[\w\s*]*?)\s*' +
105 r'\b(?P<name>\w+)' +
106 r'\s*\((?P<arguments>.*)\)\s*;',
107 re.A | re.S)
108
109def read_function_declarations(functions: Dict[str, FunctionInfo],
110 filename: str) -> None:
111 """Collect function declarations from a C header file."""
112 for line_number, line in read_logical_lines(filename):
113 m = _C_FUNCTION_DECLARATION_RE.match(line)
114 if not m:
115 continue
116 qualifiers = m.group('qualifiers').split()
117 return_type = m.group('return_type')
118 name = m.group('name')
119 arguments = m.group('arguments').split(',')
120 if len(arguments) == 1 and re.match(FunctionInfo.VOID_RE, arguments[0]):
121 arguments = []
122 # Note: we replace any existing declaration for the same name.
123 functions[name] = FunctionInfo(filename, line_number,
124 qualifiers,
125 return_type,
126 name,
127 arguments)