blob: 5d25a7b189ed911fe9f64232f695d63086c11a84 [file] [log] [blame]
Basil Eljuse4b14afb2020-09-30 13:07:23 +01001# !/usr/bin/env python
2###############################################################################
Saul Romero1be65f42023-12-13 10:26:21 +00003# Copyright (c) 2020-2023, ARM Limited and Contributors. All rights reserved.
Basil Eljuse4b14afb2020-09-30 13:07:23 +01004#
5# SPDX-License-Identifier: BSD-3-Clause
6###############################################################################
7
8###############################################################################
9# FILE: intermediate_layer.py
10#
11# DESCRIPTION: Creates an intermediate json file with information provided
12# by the configuration json file, dwarf signatures and trace
13# files.
14#
15###############################################################################
16
17import os
18import re
19import glob
20import argparse
21import subprocess
22import json
23from argparse import RawTextHelpFormatter
Saul Romero1be65f42023-12-13 10:26:21 +000024import cc_logger
Basil Eljuse4b14afb2020-09-30 13:07:23 +010025import time
Saul Romero884d2142023-01-16 10:31:22 +000026from typing import Dict
27from typing import List
Saul Romero1be65f42023-12-13 10:26:21 +000028from typing import Generator
29from typing import Union
30from typing import Tuple
31from functools import cached_property
32import logging
Basil Eljuse4b14afb2020-09-30 13:07:23 +010033
Saul Romero884d2142023-01-16 10:31:22 +000034__version__ = "7.0"
Basil Eljuse4b14afb2020-09-30 13:07:23 +010035
36# Static map that defines the elf file source type in the intermediate json
37ELF_MAP = {
38 "bl1": 0,
39 "bl2": 1,
40 "bl31": 2,
41 "bl32": 3,
42 "scp_ram": 10,
43 "scp_rom": 11,
44 "mcp_rom": 12,
45 "mcp_ram": 13,
Saul Romero884d2142023-01-16 10:31:22 +000046 "secure_hafnium": 14,
47 "hafium": 15,
Basil Eljuse4b14afb2020-09-30 13:07:23 +010048 "custom_offset": 100
49}
50
51
52def os_command(command, show_command=False):
53 """
54 Function that execute an os command, on fail exit the program
55
56 :param command: OS command as string
57 :param show_command: Optional argument to print the command in stdout
58 :return: The string output of the os command
59 """
Basil Eljuse4b14afb2020-09-30 13:07:23 +010060 try:
61 if show_command:
62 print("OS command: {}".format(command))
63 out = subprocess.check_output(
64 command, stderr=subprocess.STDOUT, shell=True)
65 except subprocess.CalledProcessError as ex:
66 raise Exception(
67 "Exception running command '{}': {}({})".format(
68 command, ex.output, ex.returncode))
69 return out.decode("utf8")
70
71
72def load_stats_from_traces(trace_globs):
73 """
74 Function to process and consolidate statistics from trace files
75
76 :param trace_globs: List of trace file patterns
77 :return: Dictionary with stats from trace files i.e.
78 {mem address in decimal}=(times executed, inst size)
79 """
80 stats = {}
81 stat_size = {}
82
83 # Make a list of unique trace files
84 trace_files = []
85 for tg in trace_globs:
86 trace_files.extend(glob.glob(tg))
87 trace_files = set(trace_files)
88
89 if not trace_files:
90 raise Exception("No trace files found for '{}'".format(trace_globs))
91 # Load stats from the trace files
92 for trace_file in trace_files:
93 try:
94 with open(trace_file, 'r') as f:
95 for line in f:
96 data = line.split()
97 address = int(data[0], 16)
98 stat = int(data[1])
99 size = int(data[2])
100 stat_size[address] = size
101 if address in stats:
102 stats[address] += stat
103 else:
104 stats[address] = stat
105 except Exception as ex:
106 logger.error("@Loading stats from trace files:{}".format(ex))
107 # Merge the two dicts
108 for address in stats:
109 stats[address] = (stats[address], stat_size[address])
110 return stats
111
112
113def get_code_sections_for_binary(elf_name):
114 """
115 Function to return the ranges of memory address for sections of code
116 in the elf file
117
118 :param elf_name: Elf binary file name
119 :return: List of code sections tuples, i.e. (section type, initial
120 address, end address)
121 """
122 command = """%s -h %s | grep -B 1 CODE | grep -v CODE \
123 | awk '{print $2" "$4" "$3}'""" % (OBJDUMP, elf_name)
124 text_out = os_command(command)
125 sections = text_out.split('\n')
126 sections.pop()
127 secs = []
128 for sec in sections:
129 try:
130 d = sec.split()
131 secs.append((d[0], int(d[1], 16), int(d[2], 16)))
132 except Exception as ex:
133 logger.error(
134 "@Returning memory address code sections:".format(ex))
135 return secs
136
137
138def get_executable_ranges_for_binary(elf_name):
139 """
140 Get function ranges from an elf file
141
142 :param elf_name: Elf binary file name
143 :return: List of tuples for ranges i.e. (range start, range end)
144 """
145 # Parse all $x / $d symbols
146 symbol_table = []
Saul Romero884d2142023-01-16 10:31:22 +0000147 address = None
148 _type = None
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100149 command = r"""%s -s %s | awk '/\$[xatd]/ {print $2" "$8}'""" % (
150 READELF, elf_name)
151 text_out = os_command(command)
152 lines = text_out.split('\n')
153 lines.pop()
154 for line in lines:
155 try:
156 data = line.split()
157 address = int(data[0], 16)
158 _type = 'X' if data[1] in ['$x', '$t', '$a'] else 'D'
159 except Exception as ex:
160 logger.error("@Getting executable ranges:".format(ex))
161 symbol_table.append((address, _type))
162
163 # Add markers for end of code sections
164 sections = get_code_sections_for_binary(elf_name)
165 for sec in sections:
166 symbol_table.append((sec[1] + sec[2], 'S'))
167
168 # Sort by address
169 symbol_table = sorted(symbol_table, key=lambda tup: tup[0])
170
171 # Create ranges (list of START/END tuples)
172 ranges = []
173 range_start = symbol_table[0][0]
174 rtype = symbol_table[0][1]
175 for sym in symbol_table:
176 if sym[1] != rtype:
177 if rtype == 'X':
Saul Romero884d2142023-01-16 10:31:22 +0000178 # Subtract one because the first address of the
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100179 # next range belongs to the next range.
180 ranges.append((range_start, sym[0] - 1))
181 range_start = sym[0]
182 rtype = sym[1]
183 return ranges
184
185
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100186def remove_workspace(path, workspace):
187 """
188 Get the relative path to a given workspace
189
190 :param path: Path relative to the workspace to be returned
191 :param workspace: Path.
192 """
193 ret = path if workspace is None else os.path.relpath(path, workspace)
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100194 return ret
195
196
Saul Romero884d2142023-01-16 10:31:22 +0000197def get_function_line_numbers(source_file: str) -> Dict[str, int]:
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100198 """
199 Using ctags get all the function names with their line numbers
200 within the source_file
201
202 :return: Dictionary with function name as key and line number as value
203 """
Saul Romeroc1aa68d2021-07-22 16:56:07 +0100204 command = "ctags -x --c-kinds=f {}".format(source_file)
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100205 fln = {}
206 try:
Saul Romeroc1aa68d2021-07-22 16:56:07 +0100207 function_lines = os_command(command).split("\n")
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100208 for line in function_lines:
209 cols = line.split()
210 if len(cols) < 3:
211 continue
212 if cols[1] == "function":
213 fln[cols[0]] = int(cols[2])
Saul Romero1be65f42023-12-13 10:26:21 +0000214 elif cols[1] == "label":
215 if cols[0] == "func":
216 fln[cols[-1]] = int(cols[2])
217 elif cols[0] + ":" == cols[-1]:
218 fln[cols[0]] = int(cols[2])
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100219 except BaseException:
220 logger.warning("Warning: Can't get all function line numbers from %s" %
221 source_file)
Saul Romeroc1aa68d2021-07-22 16:56:07 +0100222 except Exception as ex:
Saul Romero884d2142023-01-16 10:31:22 +0000223 logger.warning(f"Warning: Unknown error '{ex}' when executing command "
224 f"'{command}'")
Saul Romeroc1aa68d2021-07-22 16:56:07 +0100225 return {}
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100226 return fln
227
228
229class FunctionLineNumbers(object):
Saul Romero884d2142023-01-16 10:31:22 +0000230 """Helper class used to get a function start line number within
231 a source code file"""
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100232
Saul Romero884d2142023-01-16 10:31:22 +0000233 def __init__(self, workspace: str):
234 """
235 Initialise dictionary to allocate source code files with the
236 corresponding function start line numbers.
237
238 :param workspace: The folder where the source files are deployed
239 """
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100240 self.filenames = {}
241 self.workspace = workspace
242
Saul Romero884d2142023-01-16 10:31:22 +0000243 def get_line_number(self, filename: str, function_name: str) -> int:
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100244 if not FUNCTION_LINES_ENABLED:
245 return 0
246 if filename not in self.filenames:
Saul Romero1be65f42023-12-13 10:26:21 +0000247 source_file = os.path.join(self.workspace, filename)
248 # Get all functions with their lines in the source file
249 self.filenames[filename] = get_function_line_numbers(source_file)
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100250 return 0 if function_name not in self.filenames[filename] else \
251 self.filenames[filename][function_name]
252
253
Saul Romero884d2142023-01-16 10:31:22 +0000254class BinaryParser(object):
255 """Class used to create an instance to parse the binary files with a
256 dwarf signature in order to produce logical information to be matched with
257 traces and produce a code coverage report"""
258
Saul Romero1be65f42023-12-13 10:26:21 +0000259 def __init__(self, dump: str, _workspace: str, _remove_workspace: bool,
260 local_workspace: str):
Saul Romero884d2142023-01-16 10:31:22 +0000261 """
262 Initialisation of the instance to parse binary files.
263
264 :param dump: Binary dump (string) containing assembly code and source
265 code metadata, i.e. source code location and line number.
Saul Romero1be65f42023-12-13 10:26:21 +0000266 :param _workspace: Workspace (folder) where the source files were
267 built from.
268 :param _remove_workspace: Boolean to indicate if the build of
269 source files was local (false) or from a CI (true).
270 :param local_workspace: Path to the local workspace where the source
271 files reside
Saul Romero884d2142023-01-16 10:31:22 +0000272 """
273 self.dump = dump
Saul Romero1be65f42023-12-13 10:26:21 +0000274 self.no_source_functions = self.get_no_source_functions()
Saul Romero Dominguezfd4d0c92023-02-15 10:47:59 +0000275 self.workspace = _workspace
276 self.remove_workspace = _remove_workspace
Saul Romero1be65f42023-12-13 10:26:21 +0000277 self.local_workspace = local_workspace
278 self.function_line_numbers = FunctionLineNumbers(self.local_workspace)
279
280 def get_no_source_functions(self) -> Dict[int, Dict]:
281 """Find in the dwarf dump all the functions with no source code i.e.:
282 function_name():
283 start_hex_address opcode
284 ....
285 end_hex_address opcode
286
287 :returns: Dictionary of functions indexed by start address function's
288 location
289 """
290 # The functions dict is [start_dec_address]={function name, function
291 # end address in decimal}
292 _functions = {}
293 groups = re.findall(r"(.+?)\(\):\n\s+([a-f0-9]+):."
294 r"+?\n(\s+([a-f0-9]+):.+?\n)*", self.dump)
295 for group in groups:
296 function_name, start_hex_address, _, end_hex_address = group
297 if not end_hex_address:
298 end_hex_address = start_hex_address
299 _functions[int(start_hex_address, 16)] = {'name': function_name,
300 'end_address': int(
301 end_hex_address, 16)}
302 return _functions
303
304 class SourceCodeBlock(object):
305 """Class used to represent a source code block of information within
306 a function block in a binary dump file.
307 The source code block contains the following components:
308 - Source code file that contains the source code corresponding
309 to the assembly code.
310 - Line number within the source code file corresponding to the source
311 code.
312 - Assembly code block.
313 """
314
315 def __init__(self, source_code_block_dump):
316 """
317 Create an instance of a source code block within a function block.
318
319 :param source_code_block: Tuple of 3 elements that contains the
320 components of a source code block.
321 """
322 self.source_file, self.line_number, self.asm_code \
323 = source_code_block_dump
324
325 @staticmethod
326 def get(dwarf_data: str) -> Generator['BinaryParser.SourceCodeBlock',
327 None, None]:
328 source_block_groups = re.findall(r"(?s)(/[a-zA-Z_0-9][^\n]+?):"
329 r"([0-9]+)(?: [^\n]+)?\n(.+?)"
330 r"\n(?=/[a-zA-Z_0-9][^\n]+?"
331 r":[0-9]+[^\n]+?\n|\n$)",
332 dwarf_data)
333 for source_block_group in source_block_groups:
334 if len(source_block_group) != 3:
335 logger.warning(f"Source code incomplete:"
336 f"{source_block_group}")
337 continue
338 source_block_dump = list(source_block_group)
339 source_block_dump[-1] += "\n\n" # For parsing assembly lines
340 yield BinaryParser.SourceCodeBlock(source_block_dump)
341
342 def __str__(self):
343 return f"'{self.source_file}:{self.line_number}'"
Saul Romero884d2142023-01-16 10:31:22 +0000344
345 class FunctionBlock(object):
346 """Class used to parse and obtain a function block from the
Saul Romero1be65f42023-12-13 10:26:21 +0000347 binary dump file that corresponds to a function declaration in the
348 source code file and a block of assembly code mixed with corresponding
349 source code lines, i.e. dwarf information.
Saul Romero884d2142023-01-16 10:31:22 +0000350 The function block has the following components:
Saul Romero1be65f42023-12-13 10:26:21 +0000351 - Function name at source code.
352 - DWARF data.
353 - Function declaration's line number at source code.
354 This comes from dump blocks like these:
355 0000000000000230 <_setup>:
356 read_el(): <---- Function name at source code
357 /home/user/aarch64/setup.c:238 <------ Source file and line number
358 230: d53e1100 mrs x0, scr_el3 <----- Assembly lines belonging to
359 the source code
360 no_setup():
361 /home/user/no_setup.c:618
362 234: b2760000 orr x0, x0, #0x400
Saul Romero884d2142023-01-16 10:31:22 +0000363 """
364
365 def __init__(self, function_group: List[str]):
366 """
367 Create an instance of a function block within a binary dump.
368
Saul Romero1be65f42023-12-13 10:26:21 +0000369 :param function_group: List containing the function name and
370 dwarf data of the block.
Saul Romero884d2142023-01-16 10:31:22 +0000371 """
Saul Romero1be65f42023-12-13 10:26:21 +0000372 self.name, self.dwarf = function_group
373 # Now obtain the function's source file
374 m = re.search(r"(/.+?):([0-9]+)(?: [^\n]+)?\n", self.dwarf)
375 self.source_file = m.groups()[0].strip() \
376 if m and len(m.groups()) == 2 else None
377 # Computed later
Saul Romero884d2142023-01-16 10:31:22 +0000378 self.function_line_number = None
379
380 @staticmethod
Saul Romero1be65f42023-12-13 10:26:21 +0000381 def get(dump: str) -> Generator['BinaryParser.FunctionBlock', None,
382 None]:
Saul Romero884d2142023-01-16 10:31:22 +0000383 """
384 Static method generator to extract a function block from the binary
385 dump.
386
387 :param dump: Binary dump (string) that contains the binary file
388 information.
389 :return: A FunctionBlock object that is a logical representation
390 of a function declaration within the binary dump.
391 """
Saul Romero1be65f42023-12-13 10:26:21 +0000392 function_groups = re.findall(r"(?s)([a-zA-Z0-9_]+?)\(\):"
393 r"\n(/.+?:[0-9]+?.+?)\n"
394 r"(?=[a-zA-Z0-9_]+?\(\):\n|\n\n$)",
395 dump)
Saul Romero884d2142023-01-16 10:31:22 +0000396 for group in function_groups:
Saul Romero1be65f42023-12-13 10:26:21 +0000397 if len(group) != 2:
Saul Romero884d2142023-01-16 10:31:22 +0000398 continue
399 function_group = list(group)
Saul Romero1be65f42023-12-13 10:26:21 +0000400 function_group[-1] += "\n\n" # For parsing source code blocks
Saul Romero884d2142023-01-16 10:31:22 +0000401 yield BinaryParser.FunctionBlock(function_group)
402
Saul Romero1be65f42023-12-13 10:26:21 +0000403 @property
404 def values(self):
405 return self.name, self.source_file, self.function_line_number
Saul Romero884d2142023-01-16 10:31:22 +0000406
Saul Romero1be65f42023-12-13 10:26:21 +0000407 def __str__(self):
408 return f"'{self.name}:{self.function_line_number}'"
Saul Romero884d2142023-01-16 10:31:22 +0000409
410 class AssemblyLine(object):
411 """Class used to represent an assembly code line within an
412 assembly code block.
413 The assembly line instruction is formed by the following components:
414 - Hexadecimal address of the assembly instruction.
415 - Assembly instruction.
416 """
417
418 def __init__(self, line):
419 """
420 Create an instance representing an assembly code line within an
421 assembly code block.
422
423 :param line: Tuple of 2 elements [Hexadecimal number,
424 and assembly code]
425 """
426 self.hex_line_number, self.opcode = line
427 self.dec_address = int(self.hex_line_number, 16)
Saul Romero1be65f42023-12-13 10:26:21 +0000428 self.times_executed = 0
Saul Romero884d2142023-01-16 10:31:22 +0000429
430 @staticmethod
Saul Romero1be65f42023-12-13 10:26:21 +0000431 def get(asm_code: str) -> Generator['BinaryParser.AssemblyLine',
432 None, None]:
Saul Romero884d2142023-01-16 10:31:22 +0000433 """
434 Static method generator to extract an assembly code line from a
435 assembly code block.
436
Saul Romero1be65f42023-12-13 10:26:21 +0000437 :param asm_code: Lines of assembly code within the dump
Saul Romero884d2142023-01-16 10:31:22 +0000438 :return: AssemblyLine object.
439 """
Saul Romero1be65f42023-12-13 10:26:21 +0000440 lines = re.findall(r"^\s+([a-fA-F0-9]+):\t(.+?)\n", asm_code,
441 re.DOTALL | re.MULTILINE)
Saul Romero884d2142023-01-16 10:31:22 +0000442 for line in lines:
443 if len(line) != 2:
Saul Romero1be65f42023-12-13 10:26:21 +0000444 logger.warning(f"Assembly code incomplete: {line}")
Saul Romero884d2142023-01-16 10:31:22 +0000445 continue
446 yield BinaryParser.AssemblyLine(line)
447
Saul Romero1be65f42023-12-13 10:26:21 +0000448 @staticmethod
449 def get_asm_line(source_code_block: 'BinaryParser.SourceCodeBlock',
450 traces_stats) -> \
451 Generator['BinaryParser.AssemblyLine', None, None]:
452 """Generator method to obtain all assembly line codes within a source
453 code line """
454 traces_stats = traces_stats
455 for asm_line in BinaryParser.AssemblyLine.get(
456 source_code_block.asm_code):
457 asm_line.times_executed = traces_stats.get(asm_line.dec_address,
458 [0])[0]
459 yield asm_line
Saul Romero884d2142023-01-16 10:31:22 +0000460
Saul Romero1be65f42023-12-13 10:26:21 +0000461 def get_source_code_block(self, function_block: FunctionBlock) -> \
462 Generator['BinaryParser.SourceCodeBlock', None, None]:
Saul Romero884d2142023-01-16 10:31:22 +0000463 """
464 Generator method to obtain all the source code blocks within a
465 function block.
466
467 :param function_block: FunctionBlock object that contains the code
468 the source code blocks.
469 :return: A SourceCodeBlock object.
470 """
Saul Romero1be65f42023-12-13 10:26:21 +0000471 for source_code_block in BinaryParser.SourceCodeBlock.get(
472 function_block.dwarf):
Saul Romero Dominguezfd4d0c92023-02-15 10:47:59 +0000473 if self.remove_workspace:
Saul Romero1be65f42023-12-13 10:26:21 +0000474 source_code_block.source_file = remove_workspace(
Saul Romero Dominguezfd4d0c92023-02-15 10:47:59 +0000475 source_code_block.source_file, self.workspace)
Saul Romero884d2142023-01-16 10:31:22 +0000476 yield source_code_block
477
Saul Romero1be65f42023-12-13 10:26:21 +0000478 def get_function_block(self) -> Generator['BinaryParser.FunctionBlock',
479 None, None]:
Saul Romero884d2142023-01-16 10:31:22 +0000480 """Generator method to obtain all the function blocks contained in
481 the binary dump file.
482 """
483 for function_block in BinaryParser.FunctionBlock.get(self.dump):
Saul Romero1be65f42023-12-13 10:26:21 +0000484 if function_block.source_file is None:
485 logger.warning(f"Source file not found for function "
486 f"{function_block.name}, will not be covered")
487 continue
Saul Romero Dominguezfd4d0c92023-02-15 10:47:59 +0000488 if self.remove_workspace:
489 function_block.source_file = remove_workspace(
Saul Romero1be65f42023-12-13 10:26:21 +0000490 function_block.source_file, self.workspace)
Saul Romero884d2142023-01-16 10:31:22 +0000491 function_block.function_line_number = \
492 self.function_line_numbers.get_line_number(
493 function_block.source_file, function_block.name)
494 yield function_block
495
496
Saul Romero1be65f42023-12-13 10:26:21 +0000497class CoverageHandler(object):
498 """ Class used to handle source files coverage linked with their functions
499 and line code coverage from function blocks obtained from DWARF data and
500 trace code coverage from CC plugin"""
501
502 def __init__(self):
503 self._source_files = {}
504
505 def add_function_coverage(self, function_data:
506 Union[BinaryParser.FunctionBlock,
507 Tuple[str, str, int]]):
508 """ Add a function coverage block and a source file coverage block,
509 if not already created and link them"""
510 # Unpack function data either as an FunctionBlock object property or a
511 # tuple
512 name, source_file, function_line_number = function_data.values if \
513 isinstance(function_data, BinaryParser.FunctionBlock) else \
514 function_data
515
516 # Add source file coverage block it if not already there
517 self._source_files.setdefault(source_file,
518 {"functions": {}, "lines": {}})
519 # Add a function coverage block (if not existent) from a function
520 # block using the function block name as key and link it to the source
521 # file coverage block
522 self._source_files[source_file]["functions"].setdefault(
523 name, {"covered": False, "line_number": function_line_number})
524
525 def add_line_coverage(self, source_code_block:
526 BinaryParser.SourceCodeBlock):
527 """ Add a line coverage block and a source file coverage block,
528 if not already created and link them"""
529 # Add source file coverage block it if not already there
530 self._source_files.setdefault(source_code_block.source_file,
531 {"functions": {}, "lines": {}})
532 # Add a line coverage block (if not existent) from a source block
533 # using the source code line number as key and link it to the source
534 # file coverage block
535 self._source_files[source_code_block.source_file]["lines"].setdefault(
536 source_code_block.line_number, {"covered": False, "elf_index": {}})
537
538 def add_asm_line(self, source_code_block: BinaryParser.SourceCodeBlock,
539 asm_line: BinaryParser.AssemblyLine, elf_index: int):
540 """Add an assembly line from the DWARF data linked to a source code
541 line"""
542 self._source_files[source_code_block.source_file]["lines"][
543 source_code_block.line_number]["elf_index"].setdefault(
544 elf_index, {})
545 self._source_files[source_code_block.source_file]["lines"][
546 source_code_block.line_number]["elf_index"][
547 elf_index].setdefault(asm_line.dec_address,
548 (asm_line.opcode, asm_line.times_executed))
549
550 def set_line_coverage(self, source_code_block:
551 BinaryParser.SourceCodeBlock, value: bool):
552 self._source_files[source_code_block.source_file]["lines"][
553 source_code_block.line_number]["covered"] = value
554
555 def set_function_coverage(self, function_block:
556 Union[BinaryParser.FunctionBlock,
557 Tuple[str, str]], value: bool):
558 name, source_file = (function_block.name, function_block.source_file)\
559 if isinstance(function_block, BinaryParser.FunctionBlock) else \
560 function_block
561 self._source_files[source_file]["functions"][name]["covered"] = value
562
563 @property
564 def source_files(self):
565 return self._source_files
566
567
Saul Romero884d2142023-01-16 10:31:22 +0000568class IntermediateCodeCoverage(object):
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100569 """Class used to process the trace data along with the dwarf
570 signature files to produce an intermediate layer in json with
571 code coverage in assembly and c source code.
572 """
573
574 def __init__(self, _config, local_workspace):
575 self._data = {}
576 self.config = _config
Saul Romero Dominguezfd4d0c92023-02-15 10:47:59 +0000577 self.workspace = self.config['parameters']['workspace']
578 self.remove_workspace = self.config['configuration']['remove_workspace']
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100579 self.local_workspace = local_workspace
580 self.elfs = self.config['elfs']
581 # Dictionary with stats from trace files {address}=(times executed,
582 # inst size)
583 self.traces_stats = {}
584 # Dictionary of unique assembly line memory address against source
585 # file location
586 # {assembly address} = (opcode, source file location, line number in
587 # the source file, times executed)
588 self.asm_lines = {}
589 # Dictionary of {source file location}=>{'lines': {'covered':Boolean,
590 # 'elf_index'; {elf index}=>{assembly address}=>(opcode,
591 # times executed),
592 # 'functions': {function name}=>is covered(boolean)}
Saul Romero1be65f42023-12-13 10:26:21 +0000593 self.coverage = CoverageHandler()
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100594 self.functions = []
595 # Unique set of elf list of files
596 self.elf_map = {}
597 # For elf custom mappings
598 self.elf_custom = None
599
600 def process(self):
601 """
602 Public method to process the trace files and dwarf signatures
603 using the information contained in the json configuration file.
604 This method writes the intermediate json file output linking
605 the trace data and c source and assembly code.
606 """
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100607 self.asm_lines = {}
608 # Initialize for unknown elf files
609 self.elf_custom = ELF_MAP["custom_offset"]
610 sources_config = {}
611 print("Generating intermediate json layer '{}'...".format(
612 self.config['parameters']['output_file']))
613 for elf in self.elfs:
614 # Gather information
615 elf_name = elf['name']
Saul Romero1be65f42023-12-13 10:26:21 +0000616 # Obtain trace data
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100617 self.traces_stats = load_stats_from_traces(elf['traces'])
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100618 # Produce code coverage
Saul Romero1be65f42023-12-13 10:26:21 +0000619 self._process_binary(elf_name)
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100620 # Write to the intermediate json file
Saul Romero1be65f42023-12-13 10:26:21 +0000621 data = {"source_files": self.coverage.source_files,
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100622 "configuration": {
Saul Romero1be65f42023-12-13 10:26:21 +0000623 "sources": self.config['parameters']['sources'],
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100624 "metadata": "" if 'metadata' not in
625 self.config['parameters'] else
626 self.config['parameters']['metadata'],
Saul Romero884d2142023-01-16 10:31:22 +0000627 "elf_map": self.elf_map}
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100628 }
629 json_data = json.dumps(data, indent=4, sort_keys=True)
630 with open(self.config['parameters']['output_file'], "w") as f:
631 f.write(json_data)
632
Saul Romero884d2142023-01-16 10:31:22 +0000633 def get_elf_index(self, elf_name: str) -> int:
634 """Obtains the elf index and fills the elf_map instance variable"""
635 if elf_name not in self.elf_map:
636 if elf_name in ELF_MAP:
637 self.elf_map[elf_name] = ELF_MAP[elf_name]
638 else:
639 self.elf_map[elf_name] = ELF_MAP["custom_offset"]
640 ELF_MAP["custom_offset"] += 1
641 return self.elf_map[elf_name]
642
Saul Romero1be65f42023-12-13 10:26:21 +0000643 def _process_binary(self, elf_filename: str) -> BinaryParser:
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100644 """
Saul Romero884d2142023-01-16 10:31:22 +0000645 Process an elf file i.e. match the source code and asm lines against
646 trace files (coverage).
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100647
648 :param elf_filename: Elf binary file name
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100649 """
Saul Romero884d2142023-01-16 10:31:22 +0000650 command = "%s -Sl %s | tee %s" % (OBJDUMP, elf_filename,
651 elf_filename.replace(".elf", ".dump"))
652 dump = os_command(command, show_command=True)
Saul Romero1be65f42023-12-13 10:26:21 +0000653 # with open(elf_filename.replace(".elf", ".dump"), "r") as f:
654 # dump = f.read()
655 dump += "\n\n" # For pattern matching the last function
656 logger.info(f"Parsing assembly file {elf_filename}")
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100657 elf_name = os.path.splitext(os.path.basename(elf_filename))[0]
Saul Romero884d2142023-01-16 10:31:22 +0000658 elf_index = self.get_elf_index(elf_name)
Saul Romero1be65f42023-12-13 10:26:21 +0000659 parser = BinaryParser(dump, self.workspace, self.remove_workspace,
660 self.local_workspace)
661 total_number_functions = 0
662 functions_covered = 0
Saul Romero884d2142023-01-16 10:31:22 +0000663 for function_block in parser.get_function_block():
Saul Romero1be65f42023-12-13 10:26:21 +0000664 total_number_functions += 1
665 # Function contains source code
666 self.coverage.add_function_coverage(function_block)
667 is_function_covered = False
Saul Romero884d2142023-01-16 10:31:22 +0000668 for source_code_block in parser.get_source_code_block(
669 function_block):
Saul Romero1be65f42023-12-13 10:26:21 +0000670 self.coverage.add_line_coverage(source_code_block)
671 is_line_covered = False
672 for asm_line in parser.get_asm_line(source_code_block,
673 self.traces_stats):
674 # Here it is checked the line coverage
675 is_line_covered = asm_line.times_executed > 0 or \
676 is_line_covered
677 self.coverage.add_asm_line(source_code_block, asm_line,
678 elf_index)
679 logger.debug(f"Source file {source_code_block} is "
680 f"{'' if is_line_covered else 'not '}covered")
681 if is_line_covered:
682 self.coverage.set_line_coverage(source_code_block, True)
683 is_function_covered = True
684 logger.debug(f"\tFunction '{function_block.name}' at '"
685 f"{function_block.source_file} is "
686 f"{'' if is_function_covered else 'not '}covered")
687 if is_function_covered:
688 self.coverage.set_function_coverage(function_block, True)
689 functions_covered += 1
690 logger.info(f"Total functions: {total_number_functions}, Functions "
691 f"covered:{functions_covered}")
692 # Now check code coverage in the functions with no dwarf signature
693 self._process_fn_no_sources(parser)
694 return parser
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100695
Saul Romero1be65f42023-12-13 10:26:21 +0000696 def _process_fn_no_sources(self, parser: BinaryParser):
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100697 """
698 Checks function coverage for functions with no dwarf signature i.e
699 sources.
700
Saul Romero1be65f42023-12-13 10:26:21 +0000701 :param parser: Binary parser that contains objects needed
702 to check function line numbers including the dictionary of functions
703 to be checked i.e [start_dec_address]={'name', 'end_address'}
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100704 """
705 if not FUNCTION_LINES_ENABLED:
706 return # No source code at the workspace
Saul Romero1be65f42023-12-13 10:26:21 +0000707 traces_addresses = sorted(self.traces_stats.keys())
708 traces_address_pointer = 0
709 _functions = parser.no_source_functions
710 functions_addresses = sorted(_functions.keys())
711 address_size = 4
712 for start_address in functions_addresses:
713 function_covered = False
714 function_name = _functions[start_address]['name']
715 # Get all files in the source code where the function is defined
716 source_files = os_command("grep --include '*.c' --include '*.s' "
717 "--include '*.S' -nrw '{}' {}"
718 "| cut -d: -f1".
719 format(function_name,
720 self.local_workspace))
721 unique_files = set(source_files.split())
722 sources_found = []
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100723 for source_file in unique_files:
Saul Romero1be65f42023-12-13 10:26:21 +0000724 line_number = parser.function_line_numbers.get_line_number(
725 source_file, function_name)
726 if line_number > 0:
727 sources_found.append((source_file, line_number))
728 if len(sources_found) == 0:
729 logger.debug(f"'{function_name}' not found in sources")
730 elif len(sources_found) > 1:
731 logger.warning(f"'{function_name}' declared in "
732 f"{len(sources_found)} files")
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100733 else:
Saul Romero1be65f42023-12-13 10:26:21 +0000734 source_file_found, function_line_number = sources_found[0]
735 function_source_file = remove_workspace(source_file_found,
736 self.local_workspace)
737 self.coverage.add_function_coverage((function_name,
738 function_source_file,
739 function_line_number))
740 for in_function_address in \
741 range(start_address,
742 _functions[start_address]['end_address']
743 + address_size, address_size):
744 if in_function_address in traces_addresses[
745 traces_address_pointer:]:
746 function_covered = True
747 traces_address_pointer = traces_addresses.index(
748 in_function_address) + 1
749 break
750 logger.info(f"Added non-sources function '{function_name}' "
751 f"with coverage: {function_covered}")
752 if function_covered:
753 self.coverage.set_function_coverage((function_name,
754 function_source_file),
755 function_covered)
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100756
757
758json_conf_help = """
759Produces an intermediate json layer for code coverage reporting
760using an input json configuration file.
761
762Input json configuration file format:
763{
764 "configuration":
765 {
766 "remove_workspace": <true if 'workspace' must be from removed from the
767 path of the source files>,
768 "include_assembly": <true to include assembly source code in the
769 intermediate layer>
770 },
771 "parameters":
772 {
773 "objdump": "<Path to the objdump binary to handle dwarf signatures>",
774 "readelf: "<Path to the readelf binary to handle dwarf signatures>",
775 "sources": [ <List of source code origins, one or more of the next
776 options>
777 {
778 "type": "git",
779 "URL": "<URL git repo>",
780 "COMMIT": "<Commit id>",
781 "REFSPEC": "<Refspec>",
782 "LOCATION": "<Folder within 'workspace' where this source
783 is located>"
784 },
785 {
786 "type": "http",
787 "URL": <URL link to file>",
788 "COMPRESSION": "xz",
789 "LOCATION": "<Folder within 'workspace' where this source
790 is located>"
791 }
792 ],
793 "workspace": "<Workspace folder where the source code was located to
794 produce the elf/axf files>",
795 "output_file": "<Intermediate layer output file name and location>",
796 "metadata": {<Metadata objects to be passed to the intermediate json
797 files>}
798 },
799 "elfs": [ <List of elf files to be traced/parsed>
800 {
801 "name": "<Full path name to elf/axf file>",
802 "traces": [ <List of trace files to be parsed for this
803 elf/axf file>
804 "Full path name to the trace file,"
805 ]
806 }
807 ]
808}
809"""
810OBJDUMP = None
811READELF = None
812FUNCTION_LINES_ENABLED = None
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100813
814
815def main():
816 global OBJDUMP
817 global READELF
818 global FUNCTION_LINES_ENABLED
819
820 parser = argparse.ArgumentParser(epilog=json_conf_help,
821 formatter_class=RawTextHelpFormatter)
822 parser.add_argument('--config-json', metavar='PATH',
823 dest="config_json", default='config_file.json',
824 help='JSON configuration file', required=True)
825 parser.add_argument('--local-workspace', default="",
826 help=('Local workspace folder where source code files'
827 ' and folders resides'))
828 args = parser.parse_args()
829 try:
830 with open(args.config_json, 'r') as f:
831 config = json.load(f)
832 except Exception as ex:
833 print("Error at opening and processing JSON: {}".format(ex))
834 return
Saul Romero1be65f42023-12-13 10:26:21 +0000835 print(json.dumps(config, indent=4))
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100836 # Setting toolchain binary tools variables
837 OBJDUMP = config['parameters']['objdump']
838 READELF = config['parameters']['readelf']
839 # Checking if are installed
840 os_command("{} --version".format(OBJDUMP))
841 os_command("{} --version".format(READELF))
842
843 if args.local_workspace != "":
844 # Checking ctags installed
845 try:
846 os_command("ctags --version")
847 except BaseException:
848 print("Warning!: ctags not installed/working function line numbers\
849 will be set to 0. [{}]".format(
850 "sudo apt install exuberant-ctags"))
851 else:
852 FUNCTION_LINES_ENABLED = True
853
Saul Romero884d2142023-01-16 10:31:22 +0000854 intermediate_layer = IntermediateCodeCoverage(config, args.local_workspace)
855 intermediate_layer.process()
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100856
857
858if __name__ == '__main__':
Saul Romero1be65f42023-12-13 10:26:21 +0000859 logger = cc_logger.logger
Basil Eljuse4b14afb2020-09-30 13:07:23 +0100860 start_time = time.time()
861 main()
862 elapsed_time = time.time() - start_time
863 print("Elapsed time: {}s".format(elapsed_time))