Sync job files with internal CI
Sync job files with platform-ci commit:
539c151d0cd99a5e6ca6c0e6966f6d8579fe864e
Signed-off-by: Zelalem <zelalem.aweke@arm.com>
Change-Id: Ida470e00da76188ce3987d1fa93ec758b5e0f23a
diff --git a/job/tf-worker/run_lava.py b/job/tf-worker/run_lava.py
new file mode 100644
index 0000000..93d522c
--- /dev/null
+++ b/job/tf-worker/run_lava.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2019, Arm Limited. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+import argparse
+import os
+import subprocess
+import sys
+import logging
+import tempfile
+import yaml
+
+
+def case_infra_error(case):
+ try:
+ if case["metadata"]["error_type"] == "Infrastructure":
+ logging.error("case %s: infra error is type Infrastructure", case["id"])
+ return False
+ elif "timed out" in case["metadata"]["error_msg"]:
+ logging.error(
+ "case %s: infra error: %s", case["id"], case["metadata"]["error_msg"]
+ )
+ return False
+ else:
+ return True
+ except KeyError:
+ return True
+
+
+def not_infra_error(path):
+ """Returns a boolean indicating if there was not an infra error"""
+ try:
+ with open(path) as file:
+ results = yaml.safe_load(file)
+ return all(case_infra_error(tc) for tc in results)
+ except FileNotFoundError:
+ logging.warning("Could not open results file %s", path)
+ return True
+
+
+def run_one_job(cmd):
+ """Run a job and return a boolean indicating if there was not an infra error.
+ Raises a `subprocess.CalledProcessError` when the called script fails.
+ """
+ subprocess.run(cmd, check=True)
+ return not_infra_error("job_results.yaml")
+
+
+def retry_job(cmd, retries):
+ """Run a job until there was not an infra error or retries are exhausted.
+ Raises a `subprocess.CalledProcessError` when the called script fails.
+ """
+ logging.debug("trying job %s up to %d times", str(cmd), retries)
+ return any(run_one_job(cmd) for _ in range(retries))
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Lava job runner with infrastructure error dectection and retry."
+ )
+ parser.add_argument(
+ "script",
+ nargs="?",
+ default=os.path.join(os.path.dirname(__file__), "run_lava_job.sh"),
+ help="bash job script to run a lava job",
+ )
+ parser.add_argument(
+ "job",
+ nargs="?",
+ default=os.path.join("artefacts", os.environ["BIN_MODE"], "juno.yaml"),
+ help="the Lava job description file",
+ )
+ parser.add_argument(
+ "retries",
+ type=int,
+ nargs="?",
+ default=3,
+ help="Number of retries. defaluts to 3",
+ )
+ parser.add_argument(
+ "--save",
+ default=tempfile.mkdtemp(prefix="job-output"),
+ help="directory to store the job_output.log",
+ )
+ parser.add_argument(
+ "-v", action="count", default=0, help="Increase printing of debug ouptut"
+ )
+ args = parser.parse_args()
+ if args.v >= 2:
+ logging.getLogger().setLevel(logging.DEBUG)
+ elif args.v >= 1:
+ logging.getLogger().setLevel(logging.INFO)
+ logging.debug(args)
+ try:
+ if not retry_job([args.script, args.job, args.save], args.retries):
+ logging.critical("All jobs failed with infra errors; retries exhausted")
+ sys.exit(-1)
+ else:
+ sys.exit(0)
+ except subprocess.CalledProcessError as e:
+ logging.critical("Job script returned error code %d", e.returncode)
+ sys.exit(e.returncode)