lava_helper: Log job's "health" and reason for resubmitting a job
Signed-off-by: Paul Sokolovsky <paul.sokolovsky@linaro.org>
Change-Id: I8daf56cf02e0cb875e8896b2a40ed609ad16e459
diff --git a/lava_helper/lava_wait_jobs.py b/lava_helper/lava_wait_jobs.py
index b2deb57..bf3704b 100755
--- a/lava_helper/lava_wait_jobs.py
+++ b/lava_helper/lava_wait_jobs.py
@@ -70,6 +70,10 @@
os.makedirs('failed_jobs', exist_ok=True)
for job_id, info in jobs.items():
if not (info['health'] == "Complete" and info['state'] == "Finished"):
+ _log.warning(
+ "Will resubmit job %d because of its state: %s, health: %s",
+ info["state"], info["health"]
+ )
job_dir = info['job_dir']
def_path = os.path.join(job_dir, 'definition.yaml')
os.rename(def_path, 'failed_jobs/{}_definition.yaml'.format(job_id))
diff --git a/tfm_ci_pylib/lava_rpc_connector.py b/tfm_ci_pylib/lava_rpc_connector.py
index c88d1da..e43eaf5 100644
--- a/tfm_ci_pylib/lava_rpc_connector.py
+++ b/tfm_ci_pylib/lava_rpc_connector.py
@@ -256,8 +256,10 @@
cur_status['error_reason'] = self.get_error_reason(job_id)
finished_jobs[job_id] = cur_status
_log.info(
- "Job %d finished in %ds with status: %s. Remaining: %d",
- job_id, time.time() - start_t, cur_status['state'],
+ "Job %d finished in %ds with state: %s, health: %s. Remaining: %d",
+ job_id, time.time() - start_t,
+ cur_status['state'],
+ cur_status['health'],
len(job_ids) - len(finished_jobs)
)
if len(job_ids) == len(finished_jobs):