lava_wait_jobs.py: fetch_artifacts: Retry in case of IO errors

Retry on the level of a single job, to avoid refetching all jobs again
while overloading LAVA.

Signed-off-by: Paul Sokolovsky <paul.sokolovsky@linaro.org>
Change-Id: I8fb90159192d26691c0a68643cbf7e598133331f
diff --git a/lava_helper/lava_wait_jobs.py b/lava_helper/lava_wait_jobs.py
index 2f4268f..224f97b 100755
--- a/lava_helper/lava_wait_jobs.py
+++ b/lava_helper/lava_wait_jobs.py
@@ -92,19 +92,30 @@
         job_dir = info['job_dir']
         t = time.time()
         _log.info("Fetching artifacts for job %d to %s", job_id, job_dir)
-        os.makedirs(job_dir, exist_ok=True)
-        def_path = os.path.join(job_dir, 'definition.yaml')
-        target_log = os.path.join(job_dir, 'target_log.txt')
-        config = os.path.join(job_dir, 'config.tar.bz2')
-        results_file = os.path.join(job_dir, 'results.yaml')
-        definition = lava.get_job_definition(job_id, def_path)
-        jobs[job_id]['metadata'] = definition.get('metadata', [])
-        time.sleep(0.2) # be friendly to LAVA
-        lava.get_job_log(job_id, target_log)
-        time.sleep(0.2)
-        lava.get_job_config(job_id, config)
-        time.sleep(0.2)
-        lava.get_job_results(job_id, results_file)
+
+        for retry in range(3, 0, -1):
+            try:
+                os.makedirs(job_dir, exist_ok=True)
+                def_path = os.path.join(job_dir, 'definition.yaml')
+                target_log = os.path.join(job_dir, 'target_log.txt')
+                config = os.path.join(job_dir, 'config.tar.bz2')
+                results_file = os.path.join(job_dir, 'results.yaml')
+                definition = lava.get_job_definition(job_id, def_path)
+                jobs[job_id]['metadata'] = definition.get('metadata', [])
+                time.sleep(0.2) # be friendly to LAVA
+                lava.get_job_log(job_id, target_log)
+                time.sleep(0.2)
+                lava.get_job_config(job_id, config)
+                time.sleep(0.2)
+                lava.get_job_results(job_id, results_file)
+                break
+            except IOError as e:
+                if retry == 1:
+                    raise
+                else:
+                    _log.warning("fetch_artifacts: Error %r occurred, retrying", e)
+                    time.sleep(2)
+
         _log.info("Fetched artifacts in %ds", time.time() - t)
         codecov_helper.extract_trace_data(target_log, job_dir)
     return(jobs)