Add options and refactoring
diff --git a/coverage-tool/coverage-reporting/intermediate_layer.py b/coverage-tool/coverage-reporting/intermediate_layer.py
index 701aeac..8713149 100644
--- a/coverage-tool/coverage-reporting/intermediate_layer.py
+++ b/coverage-tool/coverage-reporting/intermediate_layer.py
@@ -23,8 +23,10 @@
from argparse import RawTextHelpFormatter
import logging
import time
+from typing import Dict
+from typing import List
-__version__ = "6.0"
+__version__ = "7.0"
# Static map that defines the elf file source type in the intermediate json
ELF_MAP = {
@@ -36,6 +38,8 @@
"scp_rom": 11,
"mcp_rom": 12,
"mcp_ram": 13,
+ "secure_hafnium": 14,
+ "hafium": 15,
"custom_offset": 100
}
@@ -48,7 +52,6 @@
:param show_command: Optional argument to print the command in stdout
:return: The string output of the os command
"""
- out = ""
try:
if show_command:
print("OS command: {}".format(command))
@@ -136,6 +139,8 @@
"""
# Parse all $x / $d symbols
symbol_table = []
+ address = None
+ _type = None
command = r"""%s -s %s | awk '/\$[xatd]/ {print $2" "$8}'""" % (
READELF, elf_name)
text_out = os_command(command)
@@ -165,7 +170,7 @@
for sym in symbol_table:
if sym[1] != rtype:
if rtype == 'X':
- # Substract one because the first address of the
+ # Subtract one because the first address of the
# next range belongs to the next range.
ranges.append((range_start, sym[0] - 1))
range_start = sym[0]
@@ -173,13 +178,13 @@
return ranges
-def list_of_functions_for_binary(elf_name):
+def list_of_functions_for_binary(elf_name: str) -> Dict[str, Dict[str, any]]:
"""
Get an array of the functions in the elf file
:param elf_name: Elf binary file name
:return: An array of function address start, function address end,
- function dwarf signature (sources) addressed by function name
+ function dwarf signature (sources) indexed by function name
"""
_functions = {}
command = "%s -t %s | awk 'NR>4' | sed /^$/d" % (OBJDUMP, elf_name)
@@ -243,11 +248,10 @@
:param workspace: Path.
"""
ret = path if workspace is None else os.path.relpath(path, workspace)
- # print("{} => {}".format(path, ret))
return ret
-def get_function_line_numbers(source_file):
+def get_function_line_numbers(source_file: str) -> Dict[str, int]:
"""
Using ctags get all the function names with their line numbers
within the source_file
@@ -270,19 +274,28 @@
logger.warning("Warning: Can't get all function line numbers from %s" %
source_file)
except Exception as ex:
- logger.warning(f"Warning: Unknown error '{ex}' when executing command '{command}'")
+ logger.warning(f"Warning: Unknown error '{ex}' when executing command "
+ f"'{command}'")
return {}
return fln
class FunctionLineNumbers(object):
+ """Helper class used to get a function start line number within
+ a source code file"""
- def __init__(self, workspace):
+ def __init__(self, workspace: str):
+ """
+ Initialise dictionary to allocate source code files with the
+ corresponding function start line numbers.
+
+ :param workspace: The folder where the source files are deployed
+ """
self.filenames = {}
self.workspace = workspace
- def get_line_number(self, filename, function_name):
+ def get_line_number(self, filename: str, function_name: str) -> int:
if not FUNCTION_LINES_ENABLED:
return 0
if filename not in self.filenames:
@@ -292,7 +305,241 @@
self.filenames[filename][function_name]
-class PostProcessCC(object):
+class BinaryParser(object):
+ """Class used to create an instance to parse the binary files with a
+ dwarf signature in order to produce logical information to be matched with
+ traces and produce a code coverage report"""
+
+ def __init__(self, dump: str, function_list: Dict[str, Dict[str, any]],
+ prefix: str, function_line_numbers: FunctionLineNumbers):
+ """
+ Initialisation of the instance to parse binary files.
+
+ :param dump: Binary dump (string) containing assembly code and source
+ code metadata, i.e. source code location and line number.
+ :param function_list: Dictionary of functions defined in the binary
+ dump.
+ :param prefix: Prefix for every source code file contained in the
+ binary dump file, usually the workspace (folders) where the source code
+ files where built.
+ :param function_line_numbers: Object instance to get a function line
+ number within a source code file.
+ """
+ self.dump = dump
+ self.function_list = function_list
+ self.prefix = prefix
+ self.function_definition = None
+ self.function_line_numbers = function_line_numbers
+
+ class FunctionBlock(object):
+ """Class used to parse and obtain a function block from the
+ binary dump file that corresponds to a function declaration within
+ the binary assembly code.
+ The function block has the following components:
+ - Function start address in memory (hexadecimal).
+ - Function name.
+ - Function code.
+ """
+
+ def __init__(self, function_group: List[str]):
+ """
+ Create an instance of a function block within a binary dump.
+
+ :param function_group: List containing the function start
+ address, name and code in the function block.
+ """
+ self.start, self.name, self.code = function_group
+ self.source_file = None
+ self.function_line_number = None
+
+ @staticmethod
+ def get(dump: str):
+ """
+ Static method generator to extract a function block from the binary
+ dump.
+
+ :param dump: Binary dump (string) that contains the binary file
+ information.
+ :return: A FunctionBlock object that is a logical representation
+ of a function declaration within the binary dump.
+ """
+ function_groups = re.findall(
+ r"(?s)([0-9a-fA-F]+) <([a-zA-Z0-9_]+)>:\n(.+?)(?=[A-Fa-f0-9]* "
+ r"<[a-zA-Z0-9_]+>:)", dump, re.DOTALL | re.MULTILINE)
+ for group in function_groups:
+ if len(group) != 3:
+ continue
+ function_group = list(group)
+ function_group[-1] += "\n"
+ yield BinaryParser.FunctionBlock(function_group)
+
+ class SourceCodeBlock(object):
+ """Class used to represent a source code block of information within
+ a function block in a binary dump file.
+ The source code block contains the following components:
+ - Optional function name where the source code/assembly code is defined.
+ - Source code file that contains the source code corresponding
+ to the assembly code.
+ - Line number within the source code file corresponding to the source
+ code.
+ - Assembly code block.
+ """
+
+ def __init__(self, source_code_block):
+ """
+ Create an instance of a source code block within a function block.
+
+ :param source_code_block: Tuple of 4 elements that contains the
+ components of a source code block.
+ """
+ self.function_name, self.source_file, self.line, self.asm_code \
+ = source_code_block
+
+ def get_assembly_line(self):
+ """Getter to return and AssemblyLine instance that corresponds to
+ a logical representation of an assembly code line contained
+ within a source code block (assembly code block)"""
+ return BinaryParser.AssemblyLine.get(self)
+
+ class AssemblyLine(object):
+ """Class used to represent an assembly code line within an
+ assembly code block.
+ The assembly line instruction is formed by the following components:
+ - Hexadecimal address of the assembly instruction.
+ - Assembly instruction.
+ """
+
+ def __init__(self, line):
+ """
+ Create an instance representing an assembly code line within an
+ assembly code block.
+
+ :param line: Tuple of 2 elements [Hexadecimal number,
+ and assembly code]
+ """
+ self.hex_line_number, self.opcode = line
+ self.dec_address = int(self.hex_line_number, 16)
+
+ @staticmethod
+ def get(source_code_block):
+ """
+ Static method generator to extract an assembly code line from a
+ assembly code block.
+
+ :param source_code_block: Object that contains the assembly code
+ within the source code block.
+ :return: AssemblyLine object.
+ """
+ lines = re.findall(
+ r"^[\s]+([a-fA-F0-9]+):\t(.+?)\n",
+ source_code_block.asm_code, re.DOTALL | re.MULTILINE)
+ for line in lines:
+ if len(line) != 2:
+ continue
+ yield BinaryParser.AssemblyLine(line)
+
+ class FunctionDefinition(object):
+ """
+ Class used to handle a function definition i.e. function name, source
+ code filename and line number where is declared.
+ """
+
+ def __init__(self, function_name):
+ """
+ Create an instance representing a function definition within a
+ function code block.
+
+ :param function_name: Initial function name
+ """
+ self.function_line_number = None
+ self.function_name = function_name
+ self.source_file: str = None
+
+ def update_sources(self, source_files, function_line_numbers):
+ """
+ Method to update source files dictionary
+
+ :param source_files: Dictionary that contains the representation
+ of the intermediate layer.
+
+ :param function_line_numbers: Object that obtains the start line
+ number for a function definition inside it source file.
+ :return:Nothing
+ """
+ source_files.setdefault(self.source_file, {"functions": {},
+ "lines": {}})
+ if self.function_name not in \
+ source_files[self.source_file]["functions"]:
+ self.function_line_number = \
+ function_line_numbers.get_line_number(
+ self.source_file,
+ self.function_name)
+ source_files[self.source_file]["functions"][
+ self.function_name] = {"covered": False,
+ "line_number":
+ self.function_line_number}
+
+ def get_source_code_block(self, function_block: FunctionBlock):
+ """
+ Generator method to obtain all the source code blocks within a
+ function block.
+
+ :param function_block: FunctionBlock object that contains the code
+ the source code blocks.
+ :return: A SourceCodeBlock object.
+ """
+ # When not present the block function name applies
+ self.function_definition = BinaryParser.FunctionDefinition(
+ function_block.name)
+ pattern = r'(?s)(^[a-zA-Z0-9_]+)?(?:\(\):\n)?(^{0}.+?):([0-9]+)[' \
+ r'^\n]*\n(.+?)(?={0}.+?:[0-9]+.+\n|^[a-zA-Z0-9_]+\(' \
+ r'\):\n)'.format(self.prefix)
+ source_code_blocks = re.findall(pattern,
+ "{}\n{}/:000".format(
+ function_block.code,
+ self.prefix),
+ re.DOTALL |
+ re.MULTILINE)
+ for block in source_code_blocks:
+ if len(block) != 4:
+ continue
+ source_code_block = BinaryParser.SourceCodeBlock(block)
+ if source_code_block.function_name:
+ # Usually in the first iteration function name is not empty
+ # and is the function's name block
+ self.function_definition.function_name = \
+ source_code_block.function_name
+ self.function_definition.source_file = remove_workspace(
+ source_code_block.source_file, self.prefix)
+ yield source_code_block
+
+ def get_function_block(self):
+ """Generator method to obtain all the function blocks contained in
+ the binary dump file.
+ """
+ for function_block in BinaryParser.FunctionBlock.get(self.dump):
+ # Find out if the function block has C source code filename in
+ # the function block code
+ signature_group = re.findall(
+ r"(?s){}\(\):\n(/.+?):[0-9]+.*(?:\r*\n\n|\n$)".format(
+ function_block.name), function_block.code,
+ re.DOTALL | re.MULTILINE)
+ if not signature_group:
+ continue # Function does not have dwarf signature (sources)
+ if function_block.name not in self.function_list:
+ print("Warning:Function '{}' not found in function list!!!".
+ format(function_block.name))
+ continue # Function not found in function list
+ source_code_file = signature_group[0]
+ function_block.source_file = remove_workspace(
+ source_code_file, self.prefix)
+ function_block.function_line_number = \
+ self.function_line_numbers.get_line_number(
+ function_block.source_file, function_block.name)
+ yield function_block
+
+
+class IntermediateCodeCoverage(object):
"""Class used to process the trace data along with the dwarf
signature files to produce an intermediate layer in json with
code coverage in assembly and c source code.
@@ -339,7 +586,6 @@
for elf in self.elfs:
# Gather information
elf_name = elf['name']
- os_command("ls {}".format(elf_name))
# Trace data
self.traces_stats = load_stats_from_traces(elf['traces'])
prefix = self.config['parameters']['workspace'] \
@@ -349,7 +595,7 @@
(functions_list, excluded_functions) = apply_functions_exclude(
elf, functions_list)
# Produce code coverage
- self.dump_sources(elf_name, functions_list, prefix)
+ self.process_binary(elf_name, functions_list, prefix)
sources_config = self.config['parameters']['sources']
# Now check code coverage in the functions with no dwarf signature
# (sources)
@@ -364,17 +610,26 @@
"metadata": "" if 'metadata' not in
self.config['parameters'] else
self.config['parameters']['metadata'],
- "elf_map": self.elf_map
- }
+ "elf_map": self.elf_map}
}
json_data = json.dumps(data, indent=4, sort_keys=True)
with open(self.config['parameters']['output_file'], "w") as f:
f.write(json_data)
- def dump_sources(self, elf_filename, function_list, prefix=None):
+ def get_elf_index(self, elf_name: str) -> int:
+ """Obtains the elf index and fills the elf_map instance variable"""
+ if elf_name not in self.elf_map:
+ if elf_name in ELF_MAP:
+ self.elf_map[elf_name] = ELF_MAP[elf_name]
+ else:
+ self.elf_map[elf_name] = ELF_MAP["custom_offset"]
+ ELF_MAP["custom_offset"] += 1
+ return self.elf_map[elf_name]
+
+ def process_binary(self, elf_filename: str, function_list, prefix=None):
"""
- Process an elf file i.e. match the source and asm lines against trace
- files (coverage).
+ Process an elf file i.e. match the source code and asm lines against
+ trace files (coverage).
:param elf_filename: Elf binary file name
:param function_list: List of functions in the elf file i.e.
@@ -382,119 +637,60 @@
:param prefix: Optional path name to be removed at the start of source
file locations
"""
- command = "%s -Sl %s" % (OBJDUMP, elf_filename)
- dump = os_command(command)
+ command = "%s -Sl %s | tee %s" % (OBJDUMP, elf_filename,
+ elf_filename.replace(".elf", ".dump"))
+ dump = os_command(command, show_command=True)
dump += "\n0 <null>:" # For pattern matching the last function
elf_name = os.path.splitext(os.path.basename(elf_filename))[0]
- # Object that handles the function line numbers in
- # their filename
function_line_numbers = FunctionLineNumbers(self.local_workspace)
- # To map the elf filename against an index
- if elf_name not in self.elf_map:
- if elf_name in ELF_MAP:
- self.elf_map[elf_name] = ELF_MAP[elf_name]
- else:
- self.elf_map[elf_name] = self.elf_custom
- self.elf_custom += 1
- elf_index = self.elf_map[elf_name]
- # The function groups have 2 elements:
- # Function's block name, Function's block code
- function_groups = re.findall(
- r"(?s)[0-9a-fA-F]+ <([a-zA-Z0-9_]+)>:\n(.+?)(?=[A-Fa-f0-9]* <[a-zA-Z0-9_]+>:)",
- dump, re.DOTALL | re.MULTILINE)
+ elf_index = self.get_elf_index(elf_name)
# Pointer to files dictionary
source_files = self.source_files_coverage
- for function_group in function_groups:
- if len(function_group) != 2:
- continue
- block_function_name, block_code = function_group
- block_code += "\n"
- # Find if the function has C source code filename
- function_signature_group = re.findall(
- r"(?s){}\(\):\n(/.+?):[0-9]+.*(?:\r*\n\n|\n$)".format(
- block_function_name), block_code, re.DOTALL | re.MULTILINE)
- if not function_signature_group:
- continue # Function does not have dwarf signature (sources)
- if not block_function_name in function_list:
- print("Warning:Function '{}' not found in function list!!!".format(block_function_name))
- continue # Function not found in function list
- function_list[block_function_name]["sources"] = True
- block_function_source_file = remove_workspace(
- function_signature_group[0], prefix)
- fn_line_number = function_line_numbers.get_line_number(
- block_function_source_file, block_function_name)
- if block_function_source_file not in source_files:
- source_files[block_function_source_file] = {"functions": {},
- "lines": {}}
- source_files[block_function_source_file]["functions"][
- block_function_name] = {"covered": False,
- "line_number": fn_line_number}
- # Now lets check the block code
- # The source code groups have 5 elements:
- # Function for the statements (optional), Source file for the asm
- # statements,
- # line number for the asm statements, asm statements, lookahead
- # (ignored)
- source_code_groups = re.findall(SOURCE_PATTERN, block_code,
- re.DOTALL | re.MULTILINE)
+ parser = BinaryParser(dump, function_list, prefix,
+ function_line_numbers)
+ for function_block in parser.get_function_block():
+ function_list[function_block.name]["sources"] = True
+ source_files.setdefault(function_block.source_file,
+ {"functions": {},
+ "lines": {}})
+ source_files[function_block.source_file]["functions"][
+ function_block.name] = {"covered": False,
+ "line_number":
+ function_block.function_line_number}
is_function_block_covered = False
- # When not present the last function name applies
- statements_function_name = block_function_name
- for source_code_group in source_code_groups:
- if len(source_code_group) != 5:
- continue
- fn_name, source_file, ln, asm_code, _ = source_code_group
- if not fn_name:
- # The statement belongs to the most recent function
- fn_name = statements_function_name
- else:
- # Usually in the first iteration fn_name is not empty and
- # is the function's name block
- statements_function_name = fn_name
- if statements_function_name in function_list:
- # Some of the functions within a block are not defined in
- # the function list dump
- function_list[statements_function_name]["sources"] = True
- statements_source_file = remove_workspace(source_file, prefix)
- if statements_source_file not in source_files:
- source_files[statements_source_file] = {"functions": {},
- "lines": {}}
- if statements_function_name not in \
- source_files[statements_source_file]["functions"]:
- fn_line_number = function_line_numbers.get_line_number(
- statements_source_file,
- statements_function_name)
- source_files[statements_source_file]["functions"][
- statements_function_name] = \
- {"covered": False, "line_number": fn_line_number}
- if ln not in source_files[statements_source_file]["lines"]:
- source_files[statements_source_file]["lines"][ln] = \
- {"covered": False, "elf_index": {}}
- source_file_ln = source_files[statements_source_file]["lines"][
- ln]
- asm_line_groups = re.findall(
- r"(?s)([a-fA-F0-9]+):\t(.+?)(?:\n|$)",
- asm_code, re.DOTALL | re.MULTILINE)
- for asm_line in asm_line_groups:
- if len(asm_line) != 2:
- continue
- hex_line_number, opcode = asm_line
- dec_address = int(hex_line_number, 16)
- times_executed = 0 if dec_address not in self.traces_stats \
- else self.traces_stats[dec_address][0]
+ source_code_block: BinaryParser.SourceCodeBlock
+ for source_code_block in parser.get_source_code_block(
+ function_block):
+ if parser.function_definition.function_name in function_list:
+ function_list[parser.function_definition.function_name][
+ "sources"] = True
+ parser.function_definition.update_sources(source_files,
+ function_line_numbers)
+ source_file_ln = \
+ source_files[parser.function_definition.source_file][
+ "lines"].setdefault(source_code_block.line,
+ {"covered": False, "elf_index": {}})
+ for asm_block in source_code_block.get_assembly_line():
+ times_executed = 0 if \
+ asm_block.dec_address not in self.traces_stats else \
+ self.traces_stats[asm_block.dec_address][0]
if times_executed > 0:
is_function_block_covered = True
source_file_ln["covered"] = True
- source_files[statements_source_file]["functions"][
- statements_function_name]["covered"] = True
+ source_files[parser.function_definition.source_file][
+ "functions"][
+ parser.function_definition.function_name][
+ "covered"] = True
+ source_file_ln.setdefault("elf_index", {'elf_index': {}})
if elf_index not in source_file_ln["elf_index"]:
source_file_ln["elf_index"][elf_index] = {}
- if dec_address not in \
+ if asm_block.dec_address not in \
source_file_ln["elf_index"][elf_index]:
- source_file_ln["elf_index"][elf_index][dec_address] = (
- opcode, times_executed)
- source_files[block_function_source_file]["functions"][
- block_function_name]["covered"] |= is_function_block_covered
+ source_file_ln["elf_index"][elf_index][
+ asm_block.dec_address] = (
+ asm_block.opcode, times_executed)
+ source_files[function_block.source_file]["functions"][
+ function_block.name]["covered"] |= is_function_block_covered
def process_fn_no_sources(self, function_list):
"""
@@ -532,8 +728,8 @@
self.source_files_coverage[source_file] = {"functions": {},
"lines": {}}
if function_name not in \
- self.source_files_coverage[source_file]["functions"] or \
- covered:
+ self.source_files_coverage[source_file]["functions"] \
+ or covered:
self.source_files_coverage[source_file]["functions"][
function_name] = {"covered": covered,
"line_number": line_number}
@@ -597,8 +793,6 @@
OBJDUMP = None
READELF = None
FUNCTION_LINES_ENABLED = None
-SOURCE_PATTERN = (r'(?s)([a-zA-Z0-9_]+)?(?:\(\):\n)?(^/.+?):([0-9]+)'
- r'(?: \(.+?\))?\n(.+?)(?=\n/|([a-zA-Z0-9_]+\(\):))')
def main():
@@ -639,8 +833,8 @@
else:
FUNCTION_LINES_ENABLED = True
- pp = PostProcessCC(config, args.local_workspace)
- pp.process()
+ intermediate_layer = IntermediateCodeCoverage(config, args.local_workspace)
+ intermediate_layer.process()
if __name__ == '__main__':