Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 1 | # !/usr/bin/env python |
| 2 | ############################################################################### |
Jelle Sels | 83f141e | 2022-08-01 15:17:40 +0000 | [diff] [blame] | 3 | # Copyright (c) 2020-2022, ARM Limited and Contributors. All rights reserved. |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 4 | # |
| 5 | # SPDX-License-Identifier: BSD-3-Clause |
| 6 | ############################################################################### |
| 7 | |
| 8 | ############################################################################### |
| 9 | # FILE: intermediate_layer.py |
| 10 | # |
| 11 | # DESCRIPTION: Creates an intermediate json file with information provided |
| 12 | # by the configuration json file, dwarf signatures and trace |
| 13 | # files. |
| 14 | # |
| 15 | ############################################################################### |
| 16 | |
| 17 | import os |
| 18 | import re |
| 19 | import glob |
| 20 | import argparse |
| 21 | import subprocess |
| 22 | import json |
| 23 | from argparse import RawTextHelpFormatter |
| 24 | import logging |
| 25 | import time |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 26 | from typing import Dict |
| 27 | from typing import List |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 28 | |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 29 | __version__ = "7.0" |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 30 | |
| 31 | # Static map that defines the elf file source type in the intermediate json |
| 32 | ELF_MAP = { |
| 33 | "bl1": 0, |
| 34 | "bl2": 1, |
| 35 | "bl31": 2, |
| 36 | "bl32": 3, |
| 37 | "scp_ram": 10, |
| 38 | "scp_rom": 11, |
| 39 | "mcp_rom": 12, |
| 40 | "mcp_ram": 13, |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 41 | "secure_hafnium": 14, |
| 42 | "hafium": 15, |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 43 | "custom_offset": 100 |
| 44 | } |
| 45 | |
| 46 | |
| 47 | def os_command(command, show_command=False): |
| 48 | """ |
| 49 | Function that execute an os command, on fail exit the program |
| 50 | |
| 51 | :param command: OS command as string |
| 52 | :param show_command: Optional argument to print the command in stdout |
| 53 | :return: The string output of the os command |
| 54 | """ |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 55 | try: |
| 56 | if show_command: |
| 57 | print("OS command: {}".format(command)) |
| 58 | out = subprocess.check_output( |
| 59 | command, stderr=subprocess.STDOUT, shell=True) |
| 60 | except subprocess.CalledProcessError as ex: |
| 61 | raise Exception( |
| 62 | "Exception running command '{}': {}({})".format( |
| 63 | command, ex.output, ex.returncode)) |
| 64 | return out.decode("utf8") |
| 65 | |
| 66 | |
| 67 | def load_stats_from_traces(trace_globs): |
| 68 | """ |
| 69 | Function to process and consolidate statistics from trace files |
| 70 | |
| 71 | :param trace_globs: List of trace file patterns |
| 72 | :return: Dictionary with stats from trace files i.e. |
| 73 | {mem address in decimal}=(times executed, inst size) |
| 74 | """ |
| 75 | stats = {} |
| 76 | stat_size = {} |
| 77 | |
| 78 | # Make a list of unique trace files |
| 79 | trace_files = [] |
| 80 | for tg in trace_globs: |
| 81 | trace_files.extend(glob.glob(tg)) |
| 82 | trace_files = set(trace_files) |
| 83 | |
| 84 | if not trace_files: |
| 85 | raise Exception("No trace files found for '{}'".format(trace_globs)) |
| 86 | # Load stats from the trace files |
| 87 | for trace_file in trace_files: |
| 88 | try: |
| 89 | with open(trace_file, 'r') as f: |
| 90 | for line in f: |
| 91 | data = line.split() |
| 92 | address = int(data[0], 16) |
| 93 | stat = int(data[1]) |
| 94 | size = int(data[2]) |
| 95 | stat_size[address] = size |
| 96 | if address in stats: |
| 97 | stats[address] += stat |
| 98 | else: |
| 99 | stats[address] = stat |
| 100 | except Exception as ex: |
| 101 | logger.error("@Loading stats from trace files:{}".format(ex)) |
| 102 | # Merge the two dicts |
| 103 | for address in stats: |
| 104 | stats[address] = (stats[address], stat_size[address]) |
| 105 | return stats |
| 106 | |
| 107 | |
| 108 | def get_code_sections_for_binary(elf_name): |
| 109 | """ |
| 110 | Function to return the ranges of memory address for sections of code |
| 111 | in the elf file |
| 112 | |
| 113 | :param elf_name: Elf binary file name |
| 114 | :return: List of code sections tuples, i.e. (section type, initial |
| 115 | address, end address) |
| 116 | """ |
| 117 | command = """%s -h %s | grep -B 1 CODE | grep -v CODE \ |
| 118 | | awk '{print $2" "$4" "$3}'""" % (OBJDUMP, elf_name) |
| 119 | text_out = os_command(command) |
| 120 | sections = text_out.split('\n') |
| 121 | sections.pop() |
| 122 | secs = [] |
| 123 | for sec in sections: |
| 124 | try: |
| 125 | d = sec.split() |
| 126 | secs.append((d[0], int(d[1], 16), int(d[2], 16))) |
| 127 | except Exception as ex: |
| 128 | logger.error( |
| 129 | "@Returning memory address code sections:".format(ex)) |
| 130 | return secs |
| 131 | |
| 132 | |
| 133 | def get_executable_ranges_for_binary(elf_name): |
| 134 | """ |
| 135 | Get function ranges from an elf file |
| 136 | |
| 137 | :param elf_name: Elf binary file name |
| 138 | :return: List of tuples for ranges i.e. (range start, range end) |
| 139 | """ |
| 140 | # Parse all $x / $d symbols |
| 141 | symbol_table = [] |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 142 | address = None |
| 143 | _type = None |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 144 | command = r"""%s -s %s | awk '/\$[xatd]/ {print $2" "$8}'""" % ( |
| 145 | READELF, elf_name) |
| 146 | text_out = os_command(command) |
| 147 | lines = text_out.split('\n') |
| 148 | lines.pop() |
| 149 | for line in lines: |
| 150 | try: |
| 151 | data = line.split() |
| 152 | address = int(data[0], 16) |
| 153 | _type = 'X' if data[1] in ['$x', '$t', '$a'] else 'D' |
| 154 | except Exception as ex: |
| 155 | logger.error("@Getting executable ranges:".format(ex)) |
| 156 | symbol_table.append((address, _type)) |
| 157 | |
| 158 | # Add markers for end of code sections |
| 159 | sections = get_code_sections_for_binary(elf_name) |
| 160 | for sec in sections: |
| 161 | symbol_table.append((sec[1] + sec[2], 'S')) |
| 162 | |
| 163 | # Sort by address |
| 164 | symbol_table = sorted(symbol_table, key=lambda tup: tup[0]) |
| 165 | |
| 166 | # Create ranges (list of START/END tuples) |
| 167 | ranges = [] |
| 168 | range_start = symbol_table[0][0] |
| 169 | rtype = symbol_table[0][1] |
| 170 | for sym in symbol_table: |
| 171 | if sym[1] != rtype: |
| 172 | if rtype == 'X': |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 173 | # Subtract one because the first address of the |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 174 | # next range belongs to the next range. |
| 175 | ranges.append((range_start, sym[0] - 1)) |
| 176 | range_start = sym[0] |
| 177 | rtype = sym[1] |
| 178 | return ranges |
| 179 | |
| 180 | |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 181 | def list_of_functions_for_binary(elf_name: str) -> Dict[str, Dict[str, any]]: |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 182 | """ |
| 183 | Get an array of the functions in the elf file |
| 184 | |
| 185 | :param elf_name: Elf binary file name |
| 186 | :return: An array of function address start, function address end, |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 187 | function dwarf signature (sources) indexed by function name |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 188 | """ |
| 189 | _functions = {} |
| 190 | command = "%s -t %s | awk 'NR>4' | sed /^$/d" % (OBJDUMP, elf_name) |
| 191 | symbols_output = os_command(command) |
| 192 | rex = r'([0-9a-fA-F]+) (.{7}) ([^ ]+)[ \t]([0-9a-fA-F]+) (.*)' |
| 193 | symbols = symbols_output.split('\n')[:-1] |
| 194 | for sym in symbols: |
| 195 | try: |
| 196 | symbol_details = re.findall(rex, sym) |
| 197 | symbol_details = symbol_details[0] |
| 198 | if 'F' not in symbol_details[1]: |
| 199 | continue |
| 200 | function_name = symbol_details[4] |
| 201 | # We don't want the .hidden for hidden functions |
| 202 | if function_name.startswith('.hidden '): |
| 203 | function_name = function_name[len('.hidden '):] |
| 204 | if function_name not in _functions: |
| 205 | _functions[function_name] = {'start': symbol_details[0], |
| 206 | 'end': symbol_details[3], |
| 207 | 'sources': False} |
| 208 | else: |
| 209 | logger.warning("'{}' duplicated in '{}'".format( |
| 210 | function_name, |
| 211 | elf_name)) |
| 212 | except Exception as ex: |
| 213 | logger.error("@Listing functions at file {}: {}".format( |
| 214 | elf_name, |
| 215 | ex)) |
| 216 | return _functions |
| 217 | |
| 218 | |
| 219 | def apply_functions_exclude(elf_config, functions): |
| 220 | """ |
| 221 | Remove excluded functions from the list of functions |
| 222 | |
| 223 | :param elf_config: Config for elf binary file |
| 224 | :param functions: Array of functions in the binary elf file |
| 225 | :return: Tuple with included and excluded functions |
| 226 | """ |
| 227 | if 'exclude_functions' not in elf_config: |
| 228 | return functions, [] |
| 229 | incl = {} |
| 230 | excl = {} |
| 231 | for fname in functions: |
| 232 | exclude = False |
| 233 | for rex in elf_config['exclude_functions']: |
| 234 | if re.match(rex, fname): |
| 235 | exclude = True |
| 236 | excl[fname] = functions[fname] |
| 237 | break |
| 238 | if not exclude: |
| 239 | incl[fname] = functions[fname] |
| 240 | return incl, excl |
| 241 | |
| 242 | |
| 243 | def remove_workspace(path, workspace): |
| 244 | """ |
| 245 | Get the relative path to a given workspace |
| 246 | |
| 247 | :param path: Path relative to the workspace to be returned |
| 248 | :param workspace: Path. |
| 249 | """ |
| 250 | ret = path if workspace is None else os.path.relpath(path, workspace) |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 251 | return ret |
| 252 | |
| 253 | |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 254 | def get_function_line_numbers(source_file: str) -> Dict[str, int]: |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 255 | """ |
| 256 | Using ctags get all the function names with their line numbers |
| 257 | within the source_file |
| 258 | |
| 259 | :return: Dictionary with function name as key and line number as value |
| 260 | """ |
Saul Romero | c1aa68d | 2021-07-22 16:56:07 +0100 | [diff] [blame] | 261 | command = "ctags -x --c-kinds=f {}".format(source_file) |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 262 | fln = {} |
| 263 | try: |
Saul Romero | c1aa68d | 2021-07-22 16:56:07 +0100 | [diff] [blame] | 264 | function_lines = os_command(command).split("\n") |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 265 | for line in function_lines: |
| 266 | cols = line.split() |
| 267 | if len(cols) < 3: |
| 268 | continue |
| 269 | if cols[1] == "function": |
| 270 | fln[cols[0]] = int(cols[2]) |
| 271 | elif cols[1] == "label" and cols[0] == "func": |
| 272 | fln[cols[-1]] = int(cols[2]) |
| 273 | except BaseException: |
| 274 | logger.warning("Warning: Can't get all function line numbers from %s" % |
| 275 | source_file) |
Saul Romero | c1aa68d | 2021-07-22 16:56:07 +0100 | [diff] [blame] | 276 | except Exception as ex: |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 277 | logger.warning(f"Warning: Unknown error '{ex}' when executing command " |
| 278 | f"'{command}'") |
Saul Romero | c1aa68d | 2021-07-22 16:56:07 +0100 | [diff] [blame] | 279 | return {} |
| 280 | |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 281 | return fln |
| 282 | |
| 283 | |
| 284 | class FunctionLineNumbers(object): |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 285 | """Helper class used to get a function start line number within |
| 286 | a source code file""" |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 287 | |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 288 | def __init__(self, workspace: str): |
| 289 | """ |
| 290 | Initialise dictionary to allocate source code files with the |
| 291 | corresponding function start line numbers. |
| 292 | |
| 293 | :param workspace: The folder where the source files are deployed |
| 294 | """ |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 295 | self.filenames = {} |
| 296 | self.workspace = workspace |
| 297 | |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 298 | def get_line_number(self, filename: str, function_name: str) -> int: |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 299 | if not FUNCTION_LINES_ENABLED: |
| 300 | return 0 |
| 301 | if filename not in self.filenames: |
| 302 | newp = os.path.join(self.workspace, filename) |
| 303 | self.filenames[filename] = get_function_line_numbers(newp) |
| 304 | return 0 if function_name not in self.filenames[filename] else \ |
| 305 | self.filenames[filename][function_name] |
| 306 | |
| 307 | |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 308 | class BinaryParser(object): |
| 309 | """Class used to create an instance to parse the binary files with a |
| 310 | dwarf signature in order to produce logical information to be matched with |
| 311 | traces and produce a code coverage report""" |
| 312 | |
| 313 | def __init__(self, dump: str, function_list: Dict[str, Dict[str, any]], |
| 314 | prefix: str, function_line_numbers: FunctionLineNumbers): |
| 315 | """ |
| 316 | Initialisation of the instance to parse binary files. |
| 317 | |
| 318 | :param dump: Binary dump (string) containing assembly code and source |
| 319 | code metadata, i.e. source code location and line number. |
| 320 | :param function_list: Dictionary of functions defined in the binary |
| 321 | dump. |
| 322 | :param prefix: Prefix for every source code file contained in the |
| 323 | binary dump file, usually the workspace (folders) where the source code |
| 324 | files where built. |
| 325 | :param function_line_numbers: Object instance to get a function line |
| 326 | number within a source code file. |
| 327 | """ |
| 328 | self.dump = dump |
| 329 | self.function_list = function_list |
| 330 | self.prefix = prefix |
| 331 | self.function_definition = None |
| 332 | self.function_line_numbers = function_line_numbers |
| 333 | |
| 334 | class FunctionBlock(object): |
| 335 | """Class used to parse and obtain a function block from the |
| 336 | binary dump file that corresponds to a function declaration within |
| 337 | the binary assembly code. |
| 338 | The function block has the following components: |
| 339 | - Function start address in memory (hexadecimal). |
| 340 | - Function name. |
| 341 | - Function code. |
| 342 | """ |
| 343 | |
| 344 | def __init__(self, function_group: List[str]): |
| 345 | """ |
| 346 | Create an instance of a function block within a binary dump. |
| 347 | |
| 348 | :param function_group: List containing the function start |
| 349 | address, name and code in the function block. |
| 350 | """ |
| 351 | self.start, self.name, self.code = function_group |
| 352 | self.source_file = None |
| 353 | self.function_line_number = None |
| 354 | |
| 355 | @staticmethod |
| 356 | def get(dump: str): |
| 357 | """ |
| 358 | Static method generator to extract a function block from the binary |
| 359 | dump. |
| 360 | |
| 361 | :param dump: Binary dump (string) that contains the binary file |
| 362 | information. |
| 363 | :return: A FunctionBlock object that is a logical representation |
| 364 | of a function declaration within the binary dump. |
| 365 | """ |
| 366 | function_groups = re.findall( |
| 367 | r"(?s)([0-9a-fA-F]+) <([a-zA-Z0-9_]+)>:\n(.+?)(?=[A-Fa-f0-9]* " |
| 368 | r"<[a-zA-Z0-9_]+>:)", dump, re.DOTALL | re.MULTILINE) |
| 369 | for group in function_groups: |
| 370 | if len(group) != 3: |
| 371 | continue |
| 372 | function_group = list(group) |
| 373 | function_group[-1] += "\n" |
| 374 | yield BinaryParser.FunctionBlock(function_group) |
| 375 | |
| 376 | class SourceCodeBlock(object): |
| 377 | """Class used to represent a source code block of information within |
| 378 | a function block in a binary dump file. |
| 379 | The source code block contains the following components: |
| 380 | - Optional function name where the source code/assembly code is defined. |
| 381 | - Source code file that contains the source code corresponding |
| 382 | to the assembly code. |
| 383 | - Line number within the source code file corresponding to the source |
| 384 | code. |
| 385 | - Assembly code block. |
| 386 | """ |
| 387 | |
| 388 | def __init__(self, source_code_block): |
| 389 | """ |
| 390 | Create an instance of a source code block within a function block. |
| 391 | |
| 392 | :param source_code_block: Tuple of 4 elements that contains the |
| 393 | components of a source code block. |
| 394 | """ |
| 395 | self.function_name, self.source_file, self.line, self.asm_code \ |
| 396 | = source_code_block |
| 397 | |
| 398 | def get_assembly_line(self): |
| 399 | """Getter to return and AssemblyLine instance that corresponds to |
| 400 | a logical representation of an assembly code line contained |
| 401 | within a source code block (assembly code block)""" |
| 402 | return BinaryParser.AssemblyLine.get(self) |
| 403 | |
| 404 | class AssemblyLine(object): |
| 405 | """Class used to represent an assembly code line within an |
| 406 | assembly code block. |
| 407 | The assembly line instruction is formed by the following components: |
| 408 | - Hexadecimal address of the assembly instruction. |
| 409 | - Assembly instruction. |
| 410 | """ |
| 411 | |
| 412 | def __init__(self, line): |
| 413 | """ |
| 414 | Create an instance representing an assembly code line within an |
| 415 | assembly code block. |
| 416 | |
| 417 | :param line: Tuple of 2 elements [Hexadecimal number, |
| 418 | and assembly code] |
| 419 | """ |
| 420 | self.hex_line_number, self.opcode = line |
| 421 | self.dec_address = int(self.hex_line_number, 16) |
| 422 | |
| 423 | @staticmethod |
| 424 | def get(source_code_block): |
| 425 | """ |
| 426 | Static method generator to extract an assembly code line from a |
| 427 | assembly code block. |
| 428 | |
| 429 | :param source_code_block: Object that contains the assembly code |
| 430 | within the source code block. |
| 431 | :return: AssemblyLine object. |
| 432 | """ |
| 433 | lines = re.findall( |
| 434 | r"^[\s]+([a-fA-F0-9]+):\t(.+?)\n", |
| 435 | source_code_block.asm_code, re.DOTALL | re.MULTILINE) |
| 436 | for line in lines: |
| 437 | if len(line) != 2: |
| 438 | continue |
| 439 | yield BinaryParser.AssemblyLine(line) |
| 440 | |
| 441 | class FunctionDefinition(object): |
| 442 | """ |
| 443 | Class used to handle a function definition i.e. function name, source |
| 444 | code filename and line number where is declared. |
| 445 | """ |
| 446 | |
| 447 | def __init__(self, function_name): |
| 448 | """ |
| 449 | Create an instance representing a function definition within a |
| 450 | function code block. |
| 451 | |
| 452 | :param function_name: Initial function name |
| 453 | """ |
| 454 | self.function_line_number = None |
| 455 | self.function_name = function_name |
| 456 | self.source_file: str = None |
| 457 | |
| 458 | def update_sources(self, source_files, function_line_numbers): |
| 459 | """ |
| 460 | Method to update source files dictionary |
| 461 | |
| 462 | :param source_files: Dictionary that contains the representation |
| 463 | of the intermediate layer. |
| 464 | |
| 465 | :param function_line_numbers: Object that obtains the start line |
| 466 | number for a function definition inside it source file. |
| 467 | :return:Nothing |
| 468 | """ |
| 469 | source_files.setdefault(self.source_file, {"functions": {}, |
| 470 | "lines": {}}) |
| 471 | if self.function_name not in \ |
| 472 | source_files[self.source_file]["functions"]: |
| 473 | self.function_line_number = \ |
| 474 | function_line_numbers.get_line_number( |
| 475 | self.source_file, |
| 476 | self.function_name) |
| 477 | source_files[self.source_file]["functions"][ |
| 478 | self.function_name] = {"covered": False, |
| 479 | "line_number": |
| 480 | self.function_line_number} |
| 481 | |
| 482 | def get_source_code_block(self, function_block: FunctionBlock): |
| 483 | """ |
| 484 | Generator method to obtain all the source code blocks within a |
| 485 | function block. |
| 486 | |
| 487 | :param function_block: FunctionBlock object that contains the code |
| 488 | the source code blocks. |
| 489 | :return: A SourceCodeBlock object. |
| 490 | """ |
| 491 | # When not present the block function name applies |
| 492 | self.function_definition = BinaryParser.FunctionDefinition( |
| 493 | function_block.name) |
| 494 | pattern = r'(?s)(^[a-zA-Z0-9_]+)?(?:\(\):\n)?(^{0}.+?):([0-9]+)[' \ |
| 495 | r'^\n]*\n(.+?)(?={0}.+?:[0-9]+.+\n|^[a-zA-Z0-9_]+\(' \ |
| 496 | r'\):\n)'.format(self.prefix) |
| 497 | source_code_blocks = re.findall(pattern, |
| 498 | "{}\n{}/:000".format( |
| 499 | function_block.code, |
| 500 | self.prefix), |
| 501 | re.DOTALL | |
| 502 | re.MULTILINE) |
| 503 | for block in source_code_blocks: |
| 504 | if len(block) != 4: |
| 505 | continue |
| 506 | source_code_block = BinaryParser.SourceCodeBlock(block) |
| 507 | if source_code_block.function_name: |
| 508 | # Usually in the first iteration function name is not empty |
| 509 | # and is the function's name block |
| 510 | self.function_definition.function_name = \ |
| 511 | source_code_block.function_name |
| 512 | self.function_definition.source_file = remove_workspace( |
| 513 | source_code_block.source_file, self.prefix) |
| 514 | yield source_code_block |
| 515 | |
| 516 | def get_function_block(self): |
| 517 | """Generator method to obtain all the function blocks contained in |
| 518 | the binary dump file. |
| 519 | """ |
| 520 | for function_block in BinaryParser.FunctionBlock.get(self.dump): |
| 521 | # Find out if the function block has C source code filename in |
| 522 | # the function block code |
| 523 | signature_group = re.findall( |
| 524 | r"(?s){}\(\):\n(/.+?):[0-9]+.*(?:\r*\n\n|\n$)".format( |
| 525 | function_block.name), function_block.code, |
| 526 | re.DOTALL | re.MULTILINE) |
| 527 | if not signature_group: |
| 528 | continue # Function does not have dwarf signature (sources) |
| 529 | if function_block.name not in self.function_list: |
| 530 | print("Warning:Function '{}' not found in function list!!!". |
| 531 | format(function_block.name)) |
| 532 | continue # Function not found in function list |
| 533 | source_code_file = signature_group[0] |
| 534 | function_block.source_file = remove_workspace( |
| 535 | source_code_file, self.prefix) |
| 536 | function_block.function_line_number = \ |
| 537 | self.function_line_numbers.get_line_number( |
| 538 | function_block.source_file, function_block.name) |
| 539 | yield function_block |
| 540 | |
| 541 | |
| 542 | class IntermediateCodeCoverage(object): |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 543 | """Class used to process the trace data along with the dwarf |
| 544 | signature files to produce an intermediate layer in json with |
| 545 | code coverage in assembly and c source code. |
| 546 | """ |
| 547 | |
| 548 | def __init__(self, _config, local_workspace): |
| 549 | self._data = {} |
| 550 | self.config = _config |
| 551 | self.local_workspace = local_workspace |
| 552 | self.elfs = self.config['elfs'] |
| 553 | # Dictionary with stats from trace files {address}=(times executed, |
| 554 | # inst size) |
| 555 | self.traces_stats = {} |
| 556 | # Dictionary of unique assembly line memory address against source |
| 557 | # file location |
| 558 | # {assembly address} = (opcode, source file location, line number in |
| 559 | # the source file, times executed) |
| 560 | self.asm_lines = {} |
| 561 | # Dictionary of {source file location}=>{'lines': {'covered':Boolean, |
| 562 | # 'elf_index'; {elf index}=>{assembly address}=>(opcode, |
| 563 | # times executed), |
| 564 | # 'functions': {function name}=>is covered(boolean)} |
| 565 | self.source_files_coverage = {} |
| 566 | self.functions = [] |
| 567 | # Unique set of elf list of files |
| 568 | self.elf_map = {} |
| 569 | # For elf custom mappings |
| 570 | self.elf_custom = None |
| 571 | |
| 572 | def process(self): |
| 573 | """ |
| 574 | Public method to process the trace files and dwarf signatures |
| 575 | using the information contained in the json configuration file. |
| 576 | This method writes the intermediate json file output linking |
| 577 | the trace data and c source and assembly code. |
| 578 | """ |
| 579 | self.source_files_coverage = {} |
| 580 | self.asm_lines = {} |
| 581 | # Initialize for unknown elf files |
| 582 | self.elf_custom = ELF_MAP["custom_offset"] |
| 583 | sources_config = {} |
| 584 | print("Generating intermediate json layer '{}'...".format( |
| 585 | self.config['parameters']['output_file'])) |
| 586 | for elf in self.elfs: |
| 587 | # Gather information |
| 588 | elf_name = elf['name'] |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 589 | # Trace data |
| 590 | self.traces_stats = load_stats_from_traces(elf['traces']) |
| 591 | prefix = self.config['parameters']['workspace'] \ |
| 592 | if self.config['configuration']['remove_workspace'] else \ |
| 593 | None |
| 594 | functions_list = list_of_functions_for_binary(elf_name) |
| 595 | (functions_list, excluded_functions) = apply_functions_exclude( |
| 596 | elf, functions_list) |
| 597 | # Produce code coverage |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 598 | self.process_binary(elf_name, functions_list, prefix) |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 599 | sources_config = self.config['parameters']['sources'] |
| 600 | # Now check code coverage in the functions with no dwarf signature |
| 601 | # (sources) |
| 602 | nf = {f: functions_list[f] for f in |
| 603 | functions_list if not |
| 604 | functions_list[f]["sources"]} |
| 605 | self.process_fn_no_sources(nf) |
| 606 | # Write to the intermediate json file |
| 607 | data = {"source_files": self.source_files_coverage, |
| 608 | "configuration": { |
| 609 | "sources": sources_config, |
| 610 | "metadata": "" if 'metadata' not in |
| 611 | self.config['parameters'] else |
| 612 | self.config['parameters']['metadata'], |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 613 | "elf_map": self.elf_map} |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 614 | } |
| 615 | json_data = json.dumps(data, indent=4, sort_keys=True) |
| 616 | with open(self.config['parameters']['output_file'], "w") as f: |
| 617 | f.write(json_data) |
| 618 | |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 619 | def get_elf_index(self, elf_name: str) -> int: |
| 620 | """Obtains the elf index and fills the elf_map instance variable""" |
| 621 | if elf_name not in self.elf_map: |
| 622 | if elf_name in ELF_MAP: |
| 623 | self.elf_map[elf_name] = ELF_MAP[elf_name] |
| 624 | else: |
| 625 | self.elf_map[elf_name] = ELF_MAP["custom_offset"] |
| 626 | ELF_MAP["custom_offset"] += 1 |
| 627 | return self.elf_map[elf_name] |
| 628 | |
| 629 | def process_binary(self, elf_filename: str, function_list, prefix=None): |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 630 | """ |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 631 | Process an elf file i.e. match the source code and asm lines against |
| 632 | trace files (coverage). |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 633 | |
| 634 | :param elf_filename: Elf binary file name |
| 635 | :param function_list: List of functions in the elf file i.e. |
| 636 | [(address start, address end, function name)] |
| 637 | :param prefix: Optional path name to be removed at the start of source |
| 638 | file locations |
| 639 | """ |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 640 | command = "%s -Sl %s | tee %s" % (OBJDUMP, elf_filename, |
| 641 | elf_filename.replace(".elf", ".dump")) |
| 642 | dump = os_command(command, show_command=True) |
Jelle Sels | 83f141e | 2022-08-01 15:17:40 +0000 | [diff] [blame] | 643 | dump += "\n0 <null>:" # For pattern matching the last function |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 644 | elf_name = os.path.splitext(os.path.basename(elf_filename))[0] |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 645 | function_line_numbers = FunctionLineNumbers(self.local_workspace) |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 646 | elf_index = self.get_elf_index(elf_name) |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 647 | # Pointer to files dictionary |
| 648 | source_files = self.source_files_coverage |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 649 | parser = BinaryParser(dump, function_list, prefix, |
| 650 | function_line_numbers) |
| 651 | for function_block in parser.get_function_block(): |
| 652 | function_list[function_block.name]["sources"] = True |
| 653 | source_files.setdefault(function_block.source_file, |
| 654 | {"functions": {}, |
| 655 | "lines": {}}) |
| 656 | source_files[function_block.source_file]["functions"][ |
| 657 | function_block.name] = {"covered": False, |
| 658 | "line_number": |
| 659 | function_block.function_line_number} |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 660 | is_function_block_covered = False |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 661 | source_code_block: BinaryParser.SourceCodeBlock |
| 662 | for source_code_block in parser.get_source_code_block( |
| 663 | function_block): |
| 664 | if parser.function_definition.function_name in function_list: |
| 665 | function_list[parser.function_definition.function_name][ |
| 666 | "sources"] = True |
| 667 | parser.function_definition.update_sources(source_files, |
| 668 | function_line_numbers) |
| 669 | source_file_ln = \ |
| 670 | source_files[parser.function_definition.source_file][ |
| 671 | "lines"].setdefault(source_code_block.line, |
| 672 | {"covered": False, "elf_index": {}}) |
| 673 | for asm_block in source_code_block.get_assembly_line(): |
| 674 | times_executed = 0 if \ |
| 675 | asm_block.dec_address not in self.traces_stats else \ |
| 676 | self.traces_stats[asm_block.dec_address][0] |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 677 | if times_executed > 0: |
| 678 | is_function_block_covered = True |
| 679 | source_file_ln["covered"] = True |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 680 | source_files[parser.function_definition.source_file][ |
| 681 | "functions"][ |
| 682 | parser.function_definition.function_name][ |
| 683 | "covered"] = True |
| 684 | source_file_ln.setdefault("elf_index", {'elf_index': {}}) |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 685 | if elf_index not in source_file_ln["elf_index"]: |
| 686 | source_file_ln["elf_index"][elf_index] = {} |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 687 | if asm_block.dec_address not in \ |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 688 | source_file_ln["elf_index"][elf_index]: |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 689 | source_file_ln["elf_index"][elf_index][ |
| 690 | asm_block.dec_address] = ( |
| 691 | asm_block.opcode, times_executed) |
| 692 | source_files[function_block.source_file]["functions"][ |
| 693 | function_block.name]["covered"] |= is_function_block_covered |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 694 | |
| 695 | def process_fn_no_sources(self, function_list): |
| 696 | """ |
| 697 | Checks function coverage for functions with no dwarf signature i.e |
| 698 | sources. |
| 699 | |
| 700 | :param function_list: Dictionary of functions to be checked |
| 701 | """ |
| 702 | if not FUNCTION_LINES_ENABLED: |
| 703 | return # No source code at the workspace |
| 704 | address_seq = sorted(self.traces_stats.keys()) |
| 705 | for function_name in function_list: |
| 706 | # Just check if the start address is in the trace logs |
| 707 | covered = function_list[function_name]["start"] in address_seq |
| 708 | # Find the source file |
| 709 | files = os_command(("grep --include *.c --include *.s -nrw '{}' {}" |
| 710 | "| cut -d: -f1").format(function_name, |
| 711 | self.local_workspace)) |
| 712 | unique_files = set(files.split()) |
| 713 | sources = [] |
| 714 | line_number = 0 |
| 715 | for source_file in unique_files: |
| 716 | d = get_function_line_numbers(source_file) |
| 717 | if function_name in d: |
| 718 | line_number = d[function_name] |
| 719 | sources.append(source_file) |
| 720 | if len(sources) > 1: |
| 721 | logger.warning("'{}' declared in {} files:{}".format( |
| 722 | function_name, len(sources), |
| 723 | ", ".join(sources))) |
| 724 | elif len(sources) == 1: |
| 725 | source_file = remove_workspace(sources[0], |
| 726 | self.local_workspace) |
| 727 | if source_file not in self.source_files_coverage: |
| 728 | self.source_files_coverage[source_file] = {"functions": {}, |
| 729 | "lines": {}} |
| 730 | if function_name not in \ |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 731 | self.source_files_coverage[source_file]["functions"] \ |
| 732 | or covered: |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 733 | self.source_files_coverage[source_file]["functions"][ |
| 734 | function_name] = {"covered": covered, |
| 735 | "line_number": line_number} |
| 736 | else: |
| 737 | logger.warning("Function '{}' not found in sources.".format( |
| 738 | function_name)) |
| 739 | |
| 740 | |
| 741 | json_conf_help = """ |
| 742 | Produces an intermediate json layer for code coverage reporting |
| 743 | using an input json configuration file. |
| 744 | |
| 745 | Input json configuration file format: |
| 746 | { |
| 747 | "configuration": |
| 748 | { |
| 749 | "remove_workspace": <true if 'workspace' must be from removed from the |
| 750 | path of the source files>, |
| 751 | "include_assembly": <true to include assembly source code in the |
| 752 | intermediate layer> |
| 753 | }, |
| 754 | "parameters": |
| 755 | { |
| 756 | "objdump": "<Path to the objdump binary to handle dwarf signatures>", |
| 757 | "readelf: "<Path to the readelf binary to handle dwarf signatures>", |
| 758 | "sources": [ <List of source code origins, one or more of the next |
| 759 | options> |
| 760 | { |
| 761 | "type": "git", |
| 762 | "URL": "<URL git repo>", |
| 763 | "COMMIT": "<Commit id>", |
| 764 | "REFSPEC": "<Refspec>", |
| 765 | "LOCATION": "<Folder within 'workspace' where this source |
| 766 | is located>" |
| 767 | }, |
| 768 | { |
| 769 | "type": "http", |
| 770 | "URL": <URL link to file>", |
| 771 | "COMPRESSION": "xz", |
| 772 | "LOCATION": "<Folder within 'workspace' where this source |
| 773 | is located>" |
| 774 | } |
| 775 | ], |
| 776 | "workspace": "<Workspace folder where the source code was located to |
| 777 | produce the elf/axf files>", |
| 778 | "output_file": "<Intermediate layer output file name and location>", |
| 779 | "metadata": {<Metadata objects to be passed to the intermediate json |
| 780 | files>} |
| 781 | }, |
| 782 | "elfs": [ <List of elf files to be traced/parsed> |
| 783 | { |
| 784 | "name": "<Full path name to elf/axf file>", |
| 785 | "traces": [ <List of trace files to be parsed for this |
| 786 | elf/axf file> |
| 787 | "Full path name to the trace file," |
| 788 | ] |
| 789 | } |
| 790 | ] |
| 791 | } |
| 792 | """ |
| 793 | OBJDUMP = None |
| 794 | READELF = None |
| 795 | FUNCTION_LINES_ENABLED = None |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 796 | |
| 797 | |
| 798 | def main(): |
| 799 | global OBJDUMP |
| 800 | global READELF |
| 801 | global FUNCTION_LINES_ENABLED |
| 802 | |
| 803 | parser = argparse.ArgumentParser(epilog=json_conf_help, |
| 804 | formatter_class=RawTextHelpFormatter) |
| 805 | parser.add_argument('--config-json', metavar='PATH', |
| 806 | dest="config_json", default='config_file.json', |
| 807 | help='JSON configuration file', required=True) |
| 808 | parser.add_argument('--local-workspace', default="", |
| 809 | help=('Local workspace folder where source code files' |
| 810 | ' and folders resides')) |
| 811 | args = parser.parse_args() |
| 812 | try: |
| 813 | with open(args.config_json, 'r') as f: |
| 814 | config = json.load(f) |
| 815 | except Exception as ex: |
| 816 | print("Error at opening and processing JSON: {}".format(ex)) |
| 817 | return |
| 818 | # Setting toolchain binary tools variables |
| 819 | OBJDUMP = config['parameters']['objdump'] |
| 820 | READELF = config['parameters']['readelf'] |
| 821 | # Checking if are installed |
| 822 | os_command("{} --version".format(OBJDUMP)) |
| 823 | os_command("{} --version".format(READELF)) |
| 824 | |
| 825 | if args.local_workspace != "": |
| 826 | # Checking ctags installed |
| 827 | try: |
| 828 | os_command("ctags --version") |
| 829 | except BaseException: |
| 830 | print("Warning!: ctags not installed/working function line numbers\ |
| 831 | will be set to 0. [{}]".format( |
| 832 | "sudo apt install exuberant-ctags")) |
| 833 | else: |
| 834 | FUNCTION_LINES_ENABLED = True |
| 835 | |
Saul Romero | 884d214 | 2023-01-16 10:31:22 +0000 | [diff] [blame^] | 836 | intermediate_layer = IntermediateCodeCoverage(config, args.local_workspace) |
| 837 | intermediate_layer.process() |
Basil Eljuse | 4b14afb | 2020-09-30 13:07:23 +0100 | [diff] [blame] | 838 | |
| 839 | |
| 840 | if __name__ == '__main__': |
| 841 | logging.basicConfig(filename='intermediate_layer.log', level=logging.DEBUG, |
| 842 | format=('%(asctime)s %(levelname)s %(name)s ' |
| 843 | '%(message)s')) |
| 844 | logger = logging.getLogger(__name__) |
| 845 | start_time = time.time() |
| 846 | main() |
| 847 | elapsed_time = time.time() - start_time |
| 848 | print("Elapsed time: {}s".format(elapsed_time)) |