blob: 47e99eb5869af81bde39beda520b9024df03f2c8 [file] [log] [blame]
Zelalem917b43e2020-08-04 11:39:55 -05001#!/usr/bin/env python3
2#
Khasim Syed Mohammed430d5942021-08-09 21:26:15 +05303# Copyright (c) 2019-2021 Arm Limited. All rights reserved.
Zelalem917b43e2020-08-04 11:39:55 -05004#
5# SPDX-License-Identifier: BSD-3-Clause
6#
7
8import argparse
9import os
10import subprocess
11import sys
12import logging
13import tempfile
14import yaml
15
16
17def case_infra_error(case):
18 try:
19 if case["metadata"]["error_type"] == "Infrastructure":
20 logging.error("case %s: infra error is type Infrastructure", case["id"])
21 return False
22 elif "timed out" in case["metadata"]["error_msg"]:
23 logging.error(
24 "case %s: infra error: %s", case["id"], case["metadata"]["error_msg"]
25 )
26 return False
27 else:
28 return True
29 except KeyError:
30 return True
31
32
33def not_infra_error(path):
34 """Returns a boolean indicating if there was not an infra error"""
35 try:
36 with open(path) as file:
37 results = yaml.safe_load(file)
38 return all(case_infra_error(tc) for tc in results)
39 except FileNotFoundError:
40 logging.warning("Could not open results file %s", path)
41 return True
42
43
44def run_one_job(cmd):
45 """Run a job and return a boolean indicating if there was not an infra error.
46 Raises a `subprocess.CalledProcessError` when the called script fails.
47 """
48 subprocess.run(cmd, check=True)
49 return not_infra_error("job_results.yaml")
50
51
52def retry_job(cmd, retries):
53 """Run a job until there was not an infra error or retries are exhausted.
54 Raises a `subprocess.CalledProcessError` when the called script fails.
55 """
56 logging.debug("trying job %s up to %d times", str(cmd), retries)
57 return any(run_one_job(cmd) for _ in range(retries))
58
59
60if __name__ == "__main__":
Khasim Syed Mohammed430d5942021-08-09 21:26:15 +053061
62 # To deploy and boot the artefacts on a board in LAVA a platform specific
63 # yaml file should be dispatched to LAVA. The below logic will identify
64 # the name of the yaml file at run time for the platform defined in run_cfg.
65 platform_list = ['n1sdp', 'juno']
66
67 run_cfg = os.environ["RUN_CONFIG"]
68 res = [i for i in platform_list if i in run_cfg]
69 if res:
70 platform_yaml=''.join(res)+'.yaml'
71 else:
72 logging.critical("Exiting: Platform not found for LAVA in run-config %s", os.environ["RUN_CONFIG"])
73 sys.exit(-1)
74
Zelalem917b43e2020-08-04 11:39:55 -050075 parser = argparse.ArgumentParser(
76 description="Lava job runner with infrastructure error dectection and retry."
77 )
78 parser.add_argument(
79 "script",
80 nargs="?",
81 default=os.path.join(os.path.dirname(__file__), "run_lava_job.sh"),
82 help="bash job script to run a lava job",
83 )
84 parser.add_argument(
85 "job",
86 nargs="?",
Khasim Syed Mohammed430d5942021-08-09 21:26:15 +053087 default=os.path.join("artefacts", os.environ["BIN_MODE"], platform_yaml),
Zelalem917b43e2020-08-04 11:39:55 -050088 help="the Lava job description file",
89 )
90 parser.add_argument(
91 "retries",
92 type=int,
93 nargs="?",
94 default=3,
95 help="Number of retries. defaluts to 3",
96 )
97 parser.add_argument(
98 "--save",
99 default=tempfile.mkdtemp(prefix="job-output"),
100 help="directory to store the job_output.log",
101 )
102 parser.add_argument(
Zelalem8afa0922020-08-28 10:40:44 -0500103 "--username",
104 required=True,
105 help="the user name for lava server",
106 )
107 parser.add_argument(
108 "--token",
109 required=True,
110 help="the token for lava server",
111 )
112 parser.add_argument(
Zelalem917b43e2020-08-04 11:39:55 -0500113 "-v", action="count", default=0, help="Increase printing of debug ouptut"
114 )
115 args = parser.parse_args()
116 if args.v >= 2:
117 logging.getLogger().setLevel(logging.DEBUG)
118 elif args.v >= 1:
119 logging.getLogger().setLevel(logging.INFO)
120 logging.debug(args)
121 try:
Zelalem8afa0922020-08-28 10:40:44 -0500122 if not retry_job([args.script, args.job, args.save, args.username, args.token],\
123 args.retries):
Zelalem917b43e2020-08-04 11:39:55 -0500124 logging.critical("All jobs failed with infra errors; retries exhausted")
125 sys.exit(-1)
126 else:
127 sys.exit(0)
128 except subprocess.CalledProcessError as e:
129 logging.critical("Job script returned error code %d", e.returncode)
130 sys.exit(e.returncode)