blob: 55474b91eaa9aa10809ee93f8c87c91be3408e5d [file] [log] [blame]
Basil Eljuse4b14afb2020-09-30 13:07:23 +01001# !/usr/bin/env python
2###############################################################################
Saul Romero1be65f42023-12-13 10:26:21 +00003# Copyright (c) 2020-2023, ARM Limited and Contributors. All rights reserved.
Basil Eljuse4b14afb2020-09-30 13:07:23 +01004#
5# SPDX-License-Identifier: BSD-3-Clause
6###############################################################################
7
8###############################################################################
9# FILE: intermediate_layer.py
10#
11# DESCRIPTION: Creates an intermediate json file with information provided
12# by the configuration json file, dwarf signatures and trace
13# files.
14#
15###############################################################################
16
17import os
18import re
19import glob
20import argparse
21import subprocess
22import json
23from argparse import RawTextHelpFormatter
Saul Romero1be65f42023-12-13 10:26:21 +000024import cc_logger
Basil Eljuse4b14afb2020-09-30 13:07:23 +010025import time
Saul Romero884d2142023-01-16 10:31:22 +000026from typing import Dict
27from typing import List
Saul Romero1be65f42023-12-13 10:26:21 +000028from typing import Generator
29from typing import Union
30from typing import Tuple
Saul Romero1be65f42023-12-13 10:26:21 +000031import logging
Basil Eljuse4b14afb2020-09-30 13:07:23 +010032
Saul Romero884d2142023-01-16 10:31:22 +000033__version__ = "7.0"
Basil Eljuse4b14afb2020-09-30 13:07:23 +010034
35# Static map that defines the elf file source type in the intermediate json
36ELF_MAP = {
37 "bl1": 0,
38 "bl2": 1,
39 "bl31": 2,
40 "bl32": 3,
41 "scp_ram": 10,
42 "scp_rom": 11,
43 "mcp_rom": 12,
44 "mcp_ram": 13,
Saul Romero884d2142023-01-16 10:31:22 +000045 "secure_hafnium": 14,
46 "hafium": 15,
Basil Eljuse4b14afb2020-09-30 13:07:23 +010047 "custom_offset": 100
48}
49
50
51def os_command(command, show_command=False):
52 """
53 Function that execute an os command, on fail exit the program
54
55 :param command: OS command as string
56 :param show_command: Optional argument to print the command in stdout
57 :return: The string output of the os command
58 """
Basil Eljuse4b14afb2020-09-30 13:07:23 +010059 try:
60 if show_command:
61 print("OS command: {}".format(command))
62 out = subprocess.check_output(
63 command, stderr=subprocess.STDOUT, shell=True)
64 except subprocess.CalledProcessError as ex:
65 raise Exception(
66 "Exception running command '{}': {}({})".format(
67 command, ex.output, ex.returncode))
68 return out.decode("utf8")
69
70
71def load_stats_from_traces(trace_globs):
72 """
73 Function to process and consolidate statistics from trace files
74
75 :param trace_globs: List of trace file patterns
76 :return: Dictionary with stats from trace files i.e.
77 {mem address in decimal}=(times executed, inst size)
78 """
79 stats = {}
80 stat_size = {}
81
82 # Make a list of unique trace files
83 trace_files = []
84 for tg in trace_globs:
85 trace_files.extend(glob.glob(tg))
86 trace_files = set(trace_files)
87
88 if not trace_files:
89 raise Exception("No trace files found for '{}'".format(trace_globs))
90 # Load stats from the trace files
91 for trace_file in trace_files:
92 try:
93 with open(trace_file, 'r') as f:
94 for line in f:
95 data = line.split()
96 address = int(data[0], 16)
97 stat = int(data[1])
98 size = int(data[2])
99 stat_size[address] = size
100 if address in stats:
101 stats[address] += stat
102 else:
103 stats[address] = stat
104 except Exception as ex:
105 logger.error("@Loading stats from trace files:{}".format(ex))
106 # Merge the two dicts
107 for address in stats:
108 stats[address] = (stats[address], stat_size[address])
109 return stats
110
111
112def get_code_sections_for_binary(elf_name):
113 """
114 Function to return the ranges of memory address for sections of code
115 in the elf file
116
117 :param elf_name: Elf binary file name
118 :return: List of code sections tuples, i.e. (section type, initial
119 address, end address)
120 """
121 command = """%s -h %s | grep -B 1 CODE | grep -v CODE \
122 | awk '{print $2" "$4" "$3}'""" % (OBJDUMP, elf_name)
123 text_out = os_command(command)
124 sections = text_out.split('\n')
125 sections.pop()
126 secs = []
127 for sec in sections:
128 try:
129 d = sec.split()
130 secs.append((d[0], int(d[1], 16), int(d[2], 16)))
131 except Exception as ex:
132 logger.error(
133 "@Returning memory address code sections:".format(ex))
134 return secs
135
136
137def get_executable_ranges_for_binary(elf_name):
138 """
139 Get function ranges from an elf file
140
141 :param elf_name: Elf binary file name
142 :return: List of tuples for ranges i.e. (range start, range end)
143 """
144 # Parse all $x / $d symbols
145 symbol_table = []
Saul Romero884d2142023-01-16 10:31:22 +0000146 address = None
147 _type = None
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100148 command = r"""%s -s %s | awk '/\$[xatd]/ {print $2" "$8}'""" % (
149 READELF, elf_name)
150 text_out = os_command(command)
151 lines = text_out.split('\n')
152 lines.pop()
153 for line in lines:
154 try:
155 data = line.split()
156 address = int(data[0], 16)
157 _type = 'X' if data[1] in ['$x', '$t', '$a'] else 'D'
158 except Exception as ex:
159 logger.error("@Getting executable ranges:".format(ex))
160 symbol_table.append((address, _type))
161
162 # Add markers for end of code sections
163 sections = get_code_sections_for_binary(elf_name)
164 for sec in sections:
165 symbol_table.append((sec[1] + sec[2], 'S'))
166
167 # Sort by address
168 symbol_table = sorted(symbol_table, key=lambda tup: tup[0])
169
170 # Create ranges (list of START/END tuples)
171 ranges = []
172 range_start = symbol_table[0][0]
173 rtype = symbol_table[0][1]
174 for sym in symbol_table:
175 if sym[1] != rtype:
176 if rtype == 'X':
Saul Romero884d2142023-01-16 10:31:22 +0000177 # Subtract one because the first address of the
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100178 # next range belongs to the next range.
179 ranges.append((range_start, sym[0] - 1))
180 range_start = sym[0]
181 rtype = sym[1]
182 return ranges
183
184
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100185def remove_workspace(path, workspace):
186 """
187 Get the relative path to a given workspace
188
189 :param path: Path relative to the workspace to be returned
190 :param workspace: Path.
191 """
192 ret = path if workspace is None else os.path.relpath(path, workspace)
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100193 return ret
194
195
Saul Romero884d2142023-01-16 10:31:22 +0000196def get_function_line_numbers(source_file: str) -> Dict[str, int]:
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100197 """
198 Using ctags get all the function names with their line numbers
199 within the source_file
200
201 :return: Dictionary with function name as key and line number as value
202 """
Saul Romeroc1aa68d2021-07-22 16:56:07 +0100203 command = "ctags -x --c-kinds=f {}".format(source_file)
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100204 fln = {}
205 try:
Saul Romeroc1aa68d2021-07-22 16:56:07 +0100206 function_lines = os_command(command).split("\n")
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100207 for line in function_lines:
208 cols = line.split()
209 if len(cols) < 3:
210 continue
211 if cols[1] == "function":
212 fln[cols[0]] = int(cols[2])
Saul Romero1be65f42023-12-13 10:26:21 +0000213 elif cols[1] == "label":
214 if cols[0] == "func":
215 fln[cols[-1]] = int(cols[2])
216 elif cols[0] + ":" == cols[-1]:
217 fln[cols[0]] = int(cols[2])
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100218 except BaseException:
219 logger.warning("Warning: Can't get all function line numbers from %s" %
220 source_file)
Saul Romeroc1aa68d2021-07-22 16:56:07 +0100221 except Exception as ex:
Saul Romero884d2142023-01-16 10:31:22 +0000222 logger.warning(f"Warning: Unknown error '{ex}' when executing command "
223 f"'{command}'")
Saul Romeroc1aa68d2021-07-22 16:56:07 +0100224 return {}
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100225 return fln
226
227
228class FunctionLineNumbers(object):
Saul Romero884d2142023-01-16 10:31:22 +0000229 """Helper class used to get a function start line number within
230 a source code file"""
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100231
Saul Romero884d2142023-01-16 10:31:22 +0000232 def __init__(self, workspace: str):
233 """
234 Initialise dictionary to allocate source code files with the
235 corresponding function start line numbers.
236
237 :param workspace: The folder where the source files are deployed
238 """
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100239 self.filenames = {}
240 self.workspace = workspace
241
Saul Romero884d2142023-01-16 10:31:22 +0000242 def get_line_number(self, filename: str, function_name: str) -> int:
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100243 if not FUNCTION_LINES_ENABLED:
244 return 0
245 if filename not in self.filenames:
Saul Romero1be65f42023-12-13 10:26:21 +0000246 source_file = os.path.join(self.workspace, filename)
247 # Get all functions with their lines in the source file
248 self.filenames[filename] = get_function_line_numbers(source_file)
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100249 return 0 if function_name not in self.filenames[filename] else \
250 self.filenames[filename][function_name]
251
252
Saul Romero884d2142023-01-16 10:31:22 +0000253class BinaryParser(object):
254 """Class used to create an instance to parse the binary files with a
255 dwarf signature in order to produce logical information to be matched with
256 traces and produce a code coverage report"""
257
Saul Romero1be65f42023-12-13 10:26:21 +0000258 def __init__(self, dump: str, _workspace: str, _remove_workspace: bool,
259 local_workspace: str):
Saul Romero884d2142023-01-16 10:31:22 +0000260 """
261 Initialisation of the instance to parse binary files.
262
263 :param dump: Binary dump (string) containing assembly code and source
264 code metadata, i.e. source code location and line number.
Saul Romero1be65f42023-12-13 10:26:21 +0000265 :param _workspace: Workspace (folder) where the source files were
266 built from.
267 :param _remove_workspace: Boolean to indicate if the build of
268 source files was local (false) or from a CI (true).
269 :param local_workspace: Path to the local workspace where the source
270 files reside
Saul Romero884d2142023-01-16 10:31:22 +0000271 """
272 self.dump = dump
Saul Romero1be65f42023-12-13 10:26:21 +0000273 self.no_source_functions = self.get_no_source_functions()
Saul Romero Dominguezfd4d0c92023-02-15 10:47:59 +0000274 self.workspace = _workspace
275 self.remove_workspace = _remove_workspace
Saul Romero1be65f42023-12-13 10:26:21 +0000276 self.local_workspace = local_workspace
277 self.function_line_numbers = FunctionLineNumbers(self.local_workspace)
278
279 def get_no_source_functions(self) -> Dict[int, Dict]:
280 """Find in the dwarf dump all the functions with no source code i.e.:
281 function_name():
282 start_hex_address opcode
283 ....
284 end_hex_address opcode
285
286 :returns: Dictionary of functions indexed by start address function's
287 location
288 """
289 # The functions dict is [start_dec_address]={function name, function
290 # end address in decimal}
291 _functions = {}
292 groups = re.findall(r"(.+?)\(\):\n\s+([a-f0-9]+):."
293 r"+?\n(\s+([a-f0-9]+):.+?\n)*", self.dump)
294 for group in groups:
295 function_name, start_hex_address, _, end_hex_address = group
296 if not end_hex_address:
297 end_hex_address = start_hex_address
298 _functions[int(start_hex_address, 16)] = {'name': function_name,
299 'end_address': int(
300 end_hex_address, 16)}
301 return _functions
302
303 class SourceCodeBlock(object):
304 """Class used to represent a source code block of information within
305 a function block in a binary dump file.
306 The source code block contains the following components:
307 - Source code file that contains the source code corresponding
308 to the assembly code.
309 - Line number within the source code file corresponding to the source
310 code.
311 - Assembly code block.
312 """
313
314 def __init__(self, source_code_block_dump):
315 """
316 Create an instance of a source code block within a function block.
317
318 :param source_code_block: Tuple of 3 elements that contains the
319 components of a source code block.
320 """
321 self.source_file, self.line_number, self.asm_code \
322 = source_code_block_dump
323
324 @staticmethod
325 def get(dwarf_data: str) -> Generator['BinaryParser.SourceCodeBlock',
326 None, None]:
327 source_block_groups = re.findall(r"(?s)(/[a-zA-Z_0-9][^\n]+?):"
328 r"([0-9]+)(?: [^\n]+)?\n(.+?)"
329 r"\n(?=/[a-zA-Z_0-9][^\n]+?"
330 r":[0-9]+[^\n]+?\n|\n$)",
331 dwarf_data)
332 for source_block_group in source_block_groups:
333 if len(source_block_group) != 3:
334 logger.warning(f"Source code incomplete:"
335 f"{source_block_group}")
336 continue
337 source_block_dump = list(source_block_group)
338 source_block_dump[-1] += "\n\n" # For parsing assembly lines
339 yield BinaryParser.SourceCodeBlock(source_block_dump)
340
341 def __str__(self):
342 return f"'{self.source_file}:{self.line_number}'"
Saul Romero884d2142023-01-16 10:31:22 +0000343
344 class FunctionBlock(object):
345 """Class used to parse and obtain a function block from the
Saul Romero1be65f42023-12-13 10:26:21 +0000346 binary dump file that corresponds to a function declaration in the
347 source code file and a block of assembly code mixed with corresponding
348 source code lines, i.e. dwarf information.
Saul Romero884d2142023-01-16 10:31:22 +0000349 The function block has the following components:
Saul Romero1be65f42023-12-13 10:26:21 +0000350 - Function name at source code.
351 - DWARF data.
352 - Function declaration's line number at source code.
353 This comes from dump blocks like these:
354 0000000000000230 <_setup>:
355 read_el(): <---- Function name at source code
356 /home/user/aarch64/setup.c:238 <------ Source file and line number
357 230: d53e1100 mrs x0, scr_el3 <----- Assembly lines belonging to
358 the source code
359 no_setup():
360 /home/user/no_setup.c:618
361 234: b2760000 orr x0, x0, #0x400
Saul Romero884d2142023-01-16 10:31:22 +0000362 """
363
364 def __init__(self, function_group: List[str]):
365 """
366 Create an instance of a function block within a binary dump.
367
Saul Romero1be65f42023-12-13 10:26:21 +0000368 :param function_group: List containing the function name and
369 dwarf data of the block.
Saul Romero884d2142023-01-16 10:31:22 +0000370 """
Saul Romero1be65f42023-12-13 10:26:21 +0000371 self.name, self.dwarf = function_group
372 # Now obtain the function's source file
373 m = re.search(r"(/.+?):([0-9]+)(?: [^\n]+)?\n", self.dwarf)
374 self.source_file = m.groups()[0].strip() \
375 if m and len(m.groups()) == 2 else None
376 # Computed later
Saul Romero884d2142023-01-16 10:31:22 +0000377 self.function_line_number = None
378
379 @staticmethod
Saul Romero1be65f42023-12-13 10:26:21 +0000380 def get(dump: str) -> Generator['BinaryParser.FunctionBlock', None,
381 None]:
Saul Romero884d2142023-01-16 10:31:22 +0000382 """
383 Static method generator to extract a function block from the binary
384 dump.
385
386 :param dump: Binary dump (string) that contains the binary file
387 information.
388 :return: A FunctionBlock object that is a logical representation
389 of a function declaration within the binary dump.
390 """
Saul Romero1be65f42023-12-13 10:26:21 +0000391 function_groups = re.findall(r"(?s)([a-zA-Z0-9_]+?)\(\):"
392 r"\n(/.+?:[0-9]+?.+?)\n"
393 r"(?=[a-zA-Z0-9_]+?\(\):\n|\n\n$)",
394 dump)
Saul Romero884d2142023-01-16 10:31:22 +0000395 for group in function_groups:
Saul Romero1be65f42023-12-13 10:26:21 +0000396 if len(group) != 2:
Saul Romero884d2142023-01-16 10:31:22 +0000397 continue
398 function_group = list(group)
Saul Romero1be65f42023-12-13 10:26:21 +0000399 function_group[-1] += "\n\n" # For parsing source code blocks
Saul Romero884d2142023-01-16 10:31:22 +0000400 yield BinaryParser.FunctionBlock(function_group)
401
Saul Romero1be65f42023-12-13 10:26:21 +0000402 @property
403 def values(self):
404 return self.name, self.source_file, self.function_line_number
Saul Romero884d2142023-01-16 10:31:22 +0000405
Saul Romero1be65f42023-12-13 10:26:21 +0000406 def __str__(self):
407 return f"'{self.name}:{self.function_line_number}'"
Saul Romero884d2142023-01-16 10:31:22 +0000408
409 class AssemblyLine(object):
410 """Class used to represent an assembly code line within an
411 assembly code block.
412 The assembly line instruction is formed by the following components:
413 - Hexadecimal address of the assembly instruction.
414 - Assembly instruction.
415 """
416
417 def __init__(self, line):
418 """
419 Create an instance representing an assembly code line within an
420 assembly code block.
421
422 :param line: Tuple of 2 elements [Hexadecimal number,
423 and assembly code]
424 """
425 self.hex_line_number, self.opcode = line
426 self.dec_address = int(self.hex_line_number, 16)
Saul Romero1be65f42023-12-13 10:26:21 +0000427 self.times_executed = 0
Saul Romero884d2142023-01-16 10:31:22 +0000428
429 @staticmethod
Saul Romero1be65f42023-12-13 10:26:21 +0000430 def get(asm_code: str) -> Generator['BinaryParser.AssemblyLine',
431 None, None]:
Saul Romero884d2142023-01-16 10:31:22 +0000432 """
Saul Romero5a8c9502023-12-21 11:34:59 +0000433 Static method generator to extract an assembly code line from an
Saul Romero884d2142023-01-16 10:31:22 +0000434 assembly code block.
435
Saul Romero1be65f42023-12-13 10:26:21 +0000436 :param asm_code: Lines of assembly code within the dump
Saul Romero884d2142023-01-16 10:31:22 +0000437 :return: AssemblyLine object.
438 """
Saul Romero5b66d952024-03-19 14:44:47 +0000439 lines = re.findall(r"^(?:\s+)?([a-fA-F0-9]+):\t(.+?)\n", asm_code,
Saul Romero1be65f42023-12-13 10:26:21 +0000440 re.DOTALL | re.MULTILINE)
Saul Romero884d2142023-01-16 10:31:22 +0000441 for line in lines:
442 if len(line) != 2:
Saul Romero1be65f42023-12-13 10:26:21 +0000443 logger.warning(f"Assembly code incomplete: {line}")
Saul Romero884d2142023-01-16 10:31:22 +0000444 continue
445 yield BinaryParser.AssemblyLine(line)
446
Saul Romero1be65f42023-12-13 10:26:21 +0000447 @staticmethod
448 def get_asm_line(source_code_block: 'BinaryParser.SourceCodeBlock',
449 traces_stats) -> \
450 Generator['BinaryParser.AssemblyLine', None, None]:
451 """Generator method to obtain all assembly line codes within a source
452 code line """
453 traces_stats = traces_stats
454 for asm_line in BinaryParser.AssemblyLine.get(
455 source_code_block.asm_code):
456 asm_line.times_executed = traces_stats.get(asm_line.dec_address,
457 [0])[0]
458 yield asm_line
Saul Romero884d2142023-01-16 10:31:22 +0000459
Saul Romero1be65f42023-12-13 10:26:21 +0000460 def get_source_code_block(self, function_block: FunctionBlock) -> \
461 Generator['BinaryParser.SourceCodeBlock', None, None]:
Saul Romero884d2142023-01-16 10:31:22 +0000462 """
463 Generator method to obtain all the source code blocks within a
464 function block.
465
466 :param function_block: FunctionBlock object that contains the code
467 the source code blocks.
468 :return: A SourceCodeBlock object.
469 """
Saul Romero1be65f42023-12-13 10:26:21 +0000470 for source_code_block in BinaryParser.SourceCodeBlock.get(
471 function_block.dwarf):
Saul Romero Dominguezfd4d0c92023-02-15 10:47:59 +0000472 if self.remove_workspace:
Saul Romero1be65f42023-12-13 10:26:21 +0000473 source_code_block.source_file = remove_workspace(
Saul Romero Dominguezfd4d0c92023-02-15 10:47:59 +0000474 source_code_block.source_file, self.workspace)
Saul Romero884d2142023-01-16 10:31:22 +0000475 yield source_code_block
476
Saul Romero1be65f42023-12-13 10:26:21 +0000477 def get_function_block(self) -> Generator['BinaryParser.FunctionBlock',
478 None, None]:
Saul Romero884d2142023-01-16 10:31:22 +0000479 """Generator method to obtain all the function blocks contained in
480 the binary dump file.
481 """
482 for function_block in BinaryParser.FunctionBlock.get(self.dump):
Saul Romero1be65f42023-12-13 10:26:21 +0000483 if function_block.source_file is None:
484 logger.warning(f"Source file not found for function "
485 f"{function_block.name}, will not be covered")
486 continue
Saul Romero Dominguezfd4d0c92023-02-15 10:47:59 +0000487 if self.remove_workspace:
488 function_block.source_file = remove_workspace(
Saul Romero1be65f42023-12-13 10:26:21 +0000489 function_block.source_file, self.workspace)
Saul Romero884d2142023-01-16 10:31:22 +0000490 function_block.function_line_number = \
491 self.function_line_numbers.get_line_number(
492 function_block.source_file, function_block.name)
493 yield function_block
494
495
Saul Romero1be65f42023-12-13 10:26:21 +0000496class CoverageHandler(object):
497 """ Class used to handle source files coverage linked with their functions
498 and line code coverage from function blocks obtained from DWARF data and
499 trace code coverage from CC plugin"""
500
501 def __init__(self):
502 self._source_files = {}
503
504 def add_function_coverage(self, function_data:
505 Union[BinaryParser.FunctionBlock,
506 Tuple[str, str, int]]):
507 """ Add a function coverage block and a source file coverage block,
508 if not already created and link them"""
509 # Unpack function data either as an FunctionBlock object property or a
510 # tuple
511 name, source_file, function_line_number = function_data.values if \
512 isinstance(function_data, BinaryParser.FunctionBlock) else \
513 function_data
514
515 # Add source file coverage block it if not already there
516 self._source_files.setdefault(source_file,
517 {"functions": {}, "lines": {}})
518 # Add a function coverage block (if not existent) from a function
519 # block using the function block name as key and link it to the source
520 # file coverage block
521 self._source_files[source_file]["functions"].setdefault(
522 name, {"covered": False, "line_number": function_line_number})
523
524 def add_line_coverage(self, source_code_block:
525 BinaryParser.SourceCodeBlock):
526 """ Add a line coverage block and a source file coverage block,
527 if not already created and link them"""
528 # Add source file coverage block it if not already there
529 self._source_files.setdefault(source_code_block.source_file,
530 {"functions": {}, "lines": {}})
531 # Add a line coverage block (if not existent) from a source block
Saul Romero5a8c9502023-12-21 11:34:59 +0000532 # using the source code line number as a key and link it to the source
Saul Romero1be65f42023-12-13 10:26:21 +0000533 # file coverage block
534 self._source_files[source_code_block.source_file]["lines"].setdefault(
535 source_code_block.line_number, {"covered": False, "elf_index": {}})
536
537 def add_asm_line(self, source_code_block: BinaryParser.SourceCodeBlock,
538 asm_line: BinaryParser.AssemblyLine, elf_index: int):
539 """Add an assembly line from the DWARF data linked to a source code
540 line"""
541 self._source_files[source_code_block.source_file]["lines"][
542 source_code_block.line_number]["elf_index"].setdefault(
543 elf_index, {})
544 self._source_files[source_code_block.source_file]["lines"][
545 source_code_block.line_number]["elf_index"][
546 elf_index].setdefault(asm_line.dec_address,
547 (asm_line.opcode, asm_line.times_executed))
548
549 def set_line_coverage(self, source_code_block:
550 BinaryParser.SourceCodeBlock, value: bool):
551 self._source_files[source_code_block.source_file]["lines"][
552 source_code_block.line_number]["covered"] = value
553
554 def set_function_coverage(self, function_block:
555 Union[BinaryParser.FunctionBlock,
556 Tuple[str, str]], value: bool):
557 name, source_file = (function_block.name, function_block.source_file)\
558 if isinstance(function_block, BinaryParser.FunctionBlock) else \
559 function_block
560 self._source_files[source_file]["functions"][name]["covered"] = value
561
562 @property
563 def source_files(self):
564 return self._source_files
565
566
Saul Romero884d2142023-01-16 10:31:22 +0000567class IntermediateCodeCoverage(object):
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100568 """Class used to process the trace data along with the dwarf
569 signature files to produce an intermediate layer in json with
570 code coverage in assembly and c source code.
571 """
572
573 def __init__(self, _config, local_workspace):
574 self._data = {}
575 self.config = _config
Saul Romero Dominguezfd4d0c92023-02-15 10:47:59 +0000576 self.workspace = self.config['parameters']['workspace']
577 self.remove_workspace = self.config['configuration']['remove_workspace']
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100578 self.local_workspace = local_workspace
579 self.elfs = self.config['elfs']
580 # Dictionary with stats from trace files {address}=(times executed,
581 # inst size)
582 self.traces_stats = {}
583 # Dictionary of unique assembly line memory address against source
584 # file location
585 # {assembly address} = (opcode, source file location, line number in
586 # the source file, times executed)
587 self.asm_lines = {}
588 # Dictionary of {source file location}=>{'lines': {'covered':Boolean,
589 # 'elf_index'; {elf index}=>{assembly address}=>(opcode,
590 # times executed),
591 # 'functions': {function name}=>is covered(boolean)}
Saul Romero1be65f42023-12-13 10:26:21 +0000592 self.coverage = CoverageHandler()
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100593 self.functions = []
594 # Unique set of elf list of files
595 self.elf_map = {}
596 # For elf custom mappings
597 self.elf_custom = None
598
599 def process(self):
600 """
601 Public method to process the trace files and dwarf signatures
602 using the information contained in the json configuration file.
603 This method writes the intermediate json file output linking
604 the trace data and c source and assembly code.
605 """
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100606 self.asm_lines = {}
607 # Initialize for unknown elf files
608 self.elf_custom = ELF_MAP["custom_offset"]
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100609 print("Generating intermediate json layer '{}'...".format(
610 self.config['parameters']['output_file']))
611 for elf in self.elfs:
612 # Gather information
613 elf_name = elf['name']
Saul Romero1be65f42023-12-13 10:26:21 +0000614 # Obtain trace data
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100615 self.traces_stats = load_stats_from_traces(elf['traces'])
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100616 # Produce code coverage
Saul Romero1be65f42023-12-13 10:26:21 +0000617 self._process_binary(elf_name)
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100618 # Write to the intermediate json file
Saul Romero1be65f42023-12-13 10:26:21 +0000619 data = {"source_files": self.coverage.source_files,
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100620 "configuration": {
Saul Romero1be65f42023-12-13 10:26:21 +0000621 "sources": self.config['parameters']['sources'],
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100622 "metadata": "" if 'metadata' not in
623 self.config['parameters'] else
624 self.config['parameters']['metadata'],
Saul Romero884d2142023-01-16 10:31:22 +0000625 "elf_map": self.elf_map}
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100626 }
627 json_data = json.dumps(data, indent=4, sort_keys=True)
628 with open(self.config['parameters']['output_file'], "w") as f:
629 f.write(json_data)
630
Saul Romero884d2142023-01-16 10:31:22 +0000631 def get_elf_index(self, elf_name: str) -> int:
632 """Obtains the elf index and fills the elf_map instance variable"""
633 if elf_name not in self.elf_map:
634 if elf_name in ELF_MAP:
635 self.elf_map[elf_name] = ELF_MAP[elf_name]
636 else:
637 self.elf_map[elf_name] = ELF_MAP["custom_offset"]
638 ELF_MAP["custom_offset"] += 1
639 return self.elf_map[elf_name]
640
Saul Romero1be65f42023-12-13 10:26:21 +0000641 def _process_binary(self, elf_filename: str) -> BinaryParser:
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100642 """
Saul Romero884d2142023-01-16 10:31:22 +0000643 Process an elf file i.e. match the source code and asm lines against
644 trace files (coverage).
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100645
646 :param elf_filename: Elf binary file name
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100647 """
Saul Romero884d2142023-01-16 10:31:22 +0000648 command = "%s -Sl %s | tee %s" % (OBJDUMP, elf_filename,
Saul Romero2aad27e2024-05-01 12:37:46 +0000649 elf_filename.replace(".elf", ".dump") \
650 .replace(".axf", ".dump"))
Saul Romero884d2142023-01-16 10:31:22 +0000651 dump = os_command(command, show_command=True)
Saul Romero1be65f42023-12-13 10:26:21 +0000652 dump += "\n\n" # For pattern matching the last function
653 logger.info(f"Parsing assembly file {elf_filename}")
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100654 elf_name = os.path.splitext(os.path.basename(elf_filename))[0]
Saul Romero884d2142023-01-16 10:31:22 +0000655 elf_index = self.get_elf_index(elf_name)
Saul Romero1be65f42023-12-13 10:26:21 +0000656 parser = BinaryParser(dump, self.workspace, self.remove_workspace,
657 self.local_workspace)
658 total_number_functions = 0
659 functions_covered = 0
Saul Romero884d2142023-01-16 10:31:22 +0000660 for function_block in parser.get_function_block():
Saul Romero1be65f42023-12-13 10:26:21 +0000661 total_number_functions += 1
662 # Function contains source code
663 self.coverage.add_function_coverage(function_block)
664 is_function_covered = False
Saul Romero884d2142023-01-16 10:31:22 +0000665 for source_code_block in parser.get_source_code_block(
666 function_block):
Saul Romero1be65f42023-12-13 10:26:21 +0000667 self.coverage.add_line_coverage(source_code_block)
668 is_line_covered = False
669 for asm_line in parser.get_asm_line(source_code_block,
670 self.traces_stats):
671 # Here it is checked the line coverage
672 is_line_covered = asm_line.times_executed > 0 or \
673 is_line_covered
674 self.coverage.add_asm_line(source_code_block, asm_line,
675 elf_index)
676 logger.debug(f"Source file {source_code_block} is "
677 f"{'' if is_line_covered else 'not '}covered")
678 if is_line_covered:
679 self.coverage.set_line_coverage(source_code_block, True)
680 is_function_covered = True
681 logger.debug(f"\tFunction '{function_block.name}' at '"
682 f"{function_block.source_file} is "
683 f"{'' if is_function_covered else 'not '}covered")
684 if is_function_covered:
685 self.coverage.set_function_coverage(function_block, True)
686 functions_covered += 1
687 logger.info(f"Total functions: {total_number_functions}, Functions "
688 f"covered:{functions_covered}")
689 # Now check code coverage in the functions with no dwarf signature
690 self._process_fn_no_sources(parser)
691 return parser
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100692
Saul Romero1be65f42023-12-13 10:26:21 +0000693 def _process_fn_no_sources(self, parser: BinaryParser):
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100694 """
Saul Romero5a8c9502023-12-21 11:34:59 +0000695 Checks function coverage for functions with no dwarf signature i.e.
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100696 sources.
697
Saul Romero1be65f42023-12-13 10:26:21 +0000698 :param parser: Binary parser that contains objects needed
699 to check function line numbers including the dictionary of functions
700 to be checked i.e [start_dec_address]={'name', 'end_address'}
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100701 """
702 if not FUNCTION_LINES_ENABLED:
703 return # No source code at the workspace
Saul Romero1be65f42023-12-13 10:26:21 +0000704 traces_addresses = sorted(self.traces_stats.keys())
705 traces_address_pointer = 0
706 _functions = parser.no_source_functions
707 functions_addresses = sorted(_functions.keys())
708 address_size = 4
709 for start_address in functions_addresses:
710 function_covered = False
711 function_name = _functions[start_address]['name']
712 # Get all files in the source code where the function is defined
713 source_files = os_command("grep --include '*.c' --include '*.s' "
714 "--include '*.S' -nrw '{}' {}"
715 "| cut -d: -f1".
716 format(function_name,
717 self.local_workspace))
718 unique_files = set(source_files.split())
719 sources_found = []
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100720 for source_file in unique_files:
Saul Romero1be65f42023-12-13 10:26:21 +0000721 line_number = parser.function_line_numbers.get_line_number(
722 source_file, function_name)
723 if line_number > 0:
724 sources_found.append((source_file, line_number))
725 if len(sources_found) == 0:
726 logger.debug(f"'{function_name}' not found in sources")
727 elif len(sources_found) > 1:
728 logger.warning(f"'{function_name}' declared in "
729 f"{len(sources_found)} files")
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100730 else:
Saul Romero1be65f42023-12-13 10:26:21 +0000731 source_file_found, function_line_number = sources_found[0]
732 function_source_file = remove_workspace(source_file_found,
733 self.local_workspace)
734 self.coverage.add_function_coverage((function_name,
735 function_source_file,
736 function_line_number))
737 for in_function_address in \
738 range(start_address,
739 _functions[start_address]['end_address']
740 + address_size, address_size):
741 if in_function_address in traces_addresses[
742 traces_address_pointer:]:
743 function_covered = True
744 traces_address_pointer = traces_addresses.index(
745 in_function_address) + 1
746 break
747 logger.info(f"Added non-sources function '{function_name}' "
748 f"with coverage: {function_covered}")
749 if function_covered:
750 self.coverage.set_function_coverage((function_name,
751 function_source_file),
752 function_covered)
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100753
754
755json_conf_help = """
756Produces an intermediate json layer for code coverage reporting
757using an input json configuration file.
758
759Input json configuration file format:
760{
761 "configuration":
762 {
763 "remove_workspace": <true if 'workspace' must be from removed from the
764 path of the source files>,
765 "include_assembly": <true to include assembly source code in the
766 intermediate layer>
767 },
768 "parameters":
769 {
770 "objdump": "<Path to the objdump binary to handle dwarf signatures>",
771 "readelf: "<Path to the readelf binary to handle dwarf signatures>",
772 "sources": [ <List of source code origins, one or more of the next
773 options>
774 {
775 "type": "git",
776 "URL": "<URL git repo>",
777 "COMMIT": "<Commit id>",
778 "REFSPEC": "<Refspec>",
779 "LOCATION": "<Folder within 'workspace' where this source
780 is located>"
781 },
782 {
783 "type": "http",
784 "URL": <URL link to file>",
785 "COMPRESSION": "xz",
786 "LOCATION": "<Folder within 'workspace' where this source
787 is located>"
788 }
789 ],
790 "workspace": "<Workspace folder where the source code was located to
791 produce the elf/axf files>",
792 "output_file": "<Intermediate layer output file name and location>",
793 "metadata": {<Metadata objects to be passed to the intermediate json
794 files>}
795 },
796 "elfs": [ <List of elf files to be traced/parsed>
797 {
798 "name": "<Full path name to elf/axf file>",
799 "traces": [ <List of trace files to be parsed for this
800 elf/axf file>
801 "Full path name to the trace file,"
802 ]
803 }
804 ]
805}
806"""
807OBJDUMP = None
808READELF = None
809FUNCTION_LINES_ENABLED = None
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100810
811
812def main():
813 global OBJDUMP
814 global READELF
815 global FUNCTION_LINES_ENABLED
816
817 parser = argparse.ArgumentParser(epilog=json_conf_help,
818 formatter_class=RawTextHelpFormatter)
819 parser.add_argument('--config-json', metavar='PATH',
820 dest="config_json", default='config_file.json',
821 help='JSON configuration file', required=True)
822 parser.add_argument('--local-workspace', default="",
823 help=('Local workspace folder where source code files'
824 ' and folders resides'))
825 args = parser.parse_args()
826 try:
827 with open(args.config_json, 'r') as f:
828 config = json.load(f)
829 except Exception as ex:
830 print("Error at opening and processing JSON: {}".format(ex))
831 return
Saul Romero1be65f42023-12-13 10:26:21 +0000832 print(json.dumps(config, indent=4))
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100833 # Setting toolchain binary tools variables
834 OBJDUMP = config['parameters']['objdump']
835 READELF = config['parameters']['readelf']
836 # Checking if are installed
837 os_command("{} --version".format(OBJDUMP))
838 os_command("{} --version".format(READELF))
839
840 if args.local_workspace != "":
841 # Checking ctags installed
842 try:
843 os_command("ctags --version")
844 except BaseException:
845 print("Warning!: ctags not installed/working function line numbers\
846 will be set to 0. [{}]".format(
847 "sudo apt install exuberant-ctags"))
848 else:
849 FUNCTION_LINES_ENABLED = True
850
Saul Romero884d2142023-01-16 10:31:22 +0000851 intermediate_layer = IntermediateCodeCoverage(config, args.local_workspace)
852 intermediate_layer.process()
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100853
854
855if __name__ == '__main__':
Saul Romero1be65f42023-12-13 10:26:21 +0000856 logger = cc_logger.logger
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100857 start_time = time.time()
858 main()
859 elapsed_time = time.time() - start_time
860 print("Elapsed time: {}s".format(elapsed_time))