Code Coverage: improvements to tarmac trace script

Added multiprocessing to python script that processes
the tarmac trace to generate code coverage reports

Signed-off-by: Darsh Chanduka <darsh.chanduka@arm.com>
Change-Id: I32a25354531ecba551d03c58274e1380a9bb4e5c
diff --git a/code_coverage/ingest_tarmac.py b/code_coverage/ingest_tarmac.py
index bc386e0..eaf9a09 100755
--- a/code_coverage/ingest_tarmac.py
+++ b/code_coverage/ingest_tarmac.py
@@ -6,63 +6,114 @@
 #
 # -----------------------------------------------------------------------------
 
-import argparse
-import logging
 import re
-import elftools
+import argparse
+
+import multiprocessing as mp
+
 from itertools import islice
+from collections import Counter
 
-def parse_line_fvp(line: str) -> str:
+MAX_JOBS = 10
+CHUNK_SIZE = 100000
+
+
+def parse_fvp(line: str) -> (str, str):
+    """ processes the assembly instructions from the fvp """
     split = line.split(" ")
-    try:
-        addr = split[5]
-        size = len(split[6]) // 2
-        logging.debug("Instruction at {} of size {}".format(addr, size))
-    except Exception as e:
-        print("Parse error {} for line {}".format(e,line))
-        raise Exception
-    return (addr, size)
 
-def parse_line_rtl(line: str) -> str:
-    try:
-        split = line.split(" ")[1].replace("(", "").replace(")", "").split(":")
-        addr = split[0]
-        size = len(split[1]) // 2
-        logging.debug("Instruction at {} of size {}".format(addr, size))
-    except Exception as e:
-        print("Parse error {} for line {}".format(e,line))
-        raise Exception
-    return (addr, size)
+    #       addr      size
+    return (split[5], len(split[6]) // 2)
 
-parser = argparse.ArgumentParser()
-parser.add_argument("--input_file", help="tarmac file to input", required=True)
-parser.add_argument("--log_level", help="Log level", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], default="ERROR")
-parser.add_argument("--output_file", help="output file, in qa-tools format", required=True)
-args = parser.parse_args()
 
-# logging setup
-logging.basicConfig(level=args.log_level)
+def parse_rtl(line: str) -> (str, str):
+    split = line.split(" ")[1].replace("(", "").replace(")", "").split(":")
 
-instructions = []
-parse_function = parse_line_fvp
-hit_counts = {}
+    #       addr      size
+    return (split[0], len(split[1]) // 2)
 
-with open(args.input_file, "rt") as input_file:
-    while(lines := list(islice(input_file, 100000))):
-        lines = ''.join(lines)
-        chunk_instructions = re.findall(r'[0-9]* [a-z]{3} [a-z\.]* IT .*', lines)
 
-        if len(chunk_instructions) == 0:
-            chunk_instructions = re.findall(r'[0-9]* clk ES (.*:.*).*', lines)
-            if len(chunk_instructions) != 0:
-                parse_function = parse_line_rtl
+def process_trace(function, lines) -> Counter:
+    """ function run by each worker
+        to process the tarmac trace """
 
-        for i in chunk_instructions:
-            addr = parse_function(i)
-            if addr in hit_counts.keys():
-                hit_counts[addr] += 1
-            else:
-                hit_counts[addr] = 1
+    hit_counts = {}
+    for line in lines:
 
-with open(args.output_file, "w+") as output_file:
-    output_file.writelines(["{} {} {}\n".format(x[0], hit_counts[x], x[1]) for x in hit_counts.keys()])
+        addr = function(line)
+
+        if addr not in hit_counts: hit_counts[addr]  = 1
+        else:                      hit_counts[addr] += 1
+
+    return hit_counts
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_file",
+                        help="tarmac file to input", required=True)
+    parser.add_argument("--output_file",
+                        help="output file, in qa-tools format", required=True)
+    parser.add_argument("--log_level",   help="Log level",
+                        choices=["DEBUG", "INFO", "WARNING",
+                                 "ERROR", "CRITICAL"],
+                        default="ERROR")
+
+    args = parser.parse_args()
+
+    # open mp pool
+    p = mp.Pool(MAX_JOBS)
+
+    jobs = []
+    hit_count = Counter({})
+    with open(args.input_file, "rt") as f:
+        while (chunk := list(islice(f, CHUNK_SIZE))):
+            # when workers are not availible
+            j = 0
+            while len(jobs) == MAX_JOBS:
+                job = jobs[j]
+
+                # next job if this job not finished
+                if not job.ready():
+                    j += 1
+                    j %= MAX_JOBS
+                    continue
+
+                # sum when results are availible
+                hit_count += job.get()
+                # remove completed job from list
+                jobs.pop(j)
+
+            # when a worker is available
+            chunk = ''.join(chunk)
+
+            chunk = \
+                re.findall(r'[0-9]* [a-z]{3} [a-z\.]* IT .*', chunk)
+
+            # if there are any fvp instructions to process
+            if len(chunk):
+                jobs.append(p.apply_async(process_trace,
+                                          args=(parse_fvp, chunk,)))
+                continue
+
+            chunk = \
+                re.findall(r'[0-9]* clk ES (.*:.*).*', chunk)
+
+            # if there are any rtl instructions to process
+            if len(chunk):
+                jobs.append(p.apply_async(process_trace,
+                                          args=(parse_rtl, chunk,)))
+                continue
+
+    # sum any remaining jobs
+    for job in jobs:
+        job.wait()
+        hit_count += job.get()
+
+    # close mp pool
+    p.close()
+
+    # write the results
+    with open(args.output_file, "w+") as f:
+        f.writelines([f"{x[0]} {hit_count[x]} {x[1]}\n"
+                      for x in hit_count.keys()])