Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | |
| 3 | from __future__ import print_function |
| 4 | |
| 5 | __copyright__ = """ |
| 6 | /* |
Xinyu Zhang | af63f90 | 2023-01-05 15:09:28 +0800 | [diff] [blame] | 7 | * Copyright (c) 2020-2023, Arm Limited. All rights reserved. |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 8 | * |
| 9 | * SPDX-License-Identifier: BSD-3-Clause |
| 10 | * |
| 11 | */ |
| 12 | """ |
| 13 | |
| 14 | """ |
| 15 | Script for waiting for LAVA jobs and parsing the results |
| 16 | """ |
| 17 | |
| 18 | import os |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 19 | import time |
| 20 | import yaml |
| 21 | import argparse |
Xinyu Zhang | c8a670c | 2021-05-18 20:20:53 +0800 | [diff] [blame] | 22 | import shutil |
Paul Sokolovsky | a95abd9 | 2022-12-27 13:48:11 +0300 | [diff] [blame] | 23 | import logging |
Paul Sokolovsky | 7fd1bc5 | 2023-01-11 20:14:37 +0300 | [diff] [blame] | 24 | from xmlrpc.client import ProtocolError |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 25 | from jinja2 import Environment, FileSystemLoader |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 26 | from lava_helper import test_lava_dispatch_credentials |
Xinyu Zhang | c918b6e | 2022-10-08 17:13:17 +0800 | [diff] [blame] | 27 | from lava_submit_jobs import submit_lava_jobs |
Paul Sokolovsky | 2512ec5 | 2022-03-04 00:15:39 +0300 | [diff] [blame] | 28 | import codecov_helper |
| 29 | |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 30 | |
Paul Sokolovsky | a95abd9 | 2022-12-27 13:48:11 +0300 | [diff] [blame] | 31 | _log = logging.getLogger("lavaci") |
| 32 | |
| 33 | |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 34 | def wait_for_jobs(user_args): |
| 35 | job_list = user_args.job_ids.split(",") |
| 36 | job_list = [int(x) for x in job_list if x != ''] |
| 37 | lava = test_lava_dispatch_credentials(user_args) |
Xinyu Zhang | f2b7cbf | 2021-05-18 20:17:34 +0800 | [diff] [blame] | 38 | finished_jobs = get_finished_jobs(job_list, user_args, lava) |
Xinyu Zhang | c8a670c | 2021-05-18 20:20:53 +0800 | [diff] [blame] | 39 | resubmit_jobs = resubmit_failed_jobs(finished_jobs, user_args) |
Paul Sokolovsky | c87beee | 2022-04-30 08:50:47 +0300 | [diff] [blame] | 40 | if resubmit_jobs: |
Paul Sokolovsky | f367456 | 2022-12-27 22:20:01 +0300 | [diff] [blame] | 41 | _log.info("Waiting for resubmitted jobs: %s", resubmit_jobs) |
Paul Sokolovsky | c87beee | 2022-04-30 08:50:47 +0300 | [diff] [blame] | 42 | finished_resubmit_jobs = get_finished_jobs(resubmit_jobs, user_args, lava) |
| 43 | finished_jobs.update(finished_resubmit_jobs) |
Paul Sokolovsky | 451f67b | 2022-03-08 19:44:41 +0300 | [diff] [blame] | 44 | return finished_jobs |
| 45 | |
Paul Sokolovsky | 451f67b | 2022-03-08 19:44:41 +0300 | [diff] [blame] | 46 | def process_finished_jobs(finished_jobs, user_args): |
Xinyu Zhang | f2b7cbf | 2021-05-18 20:17:34 +0800 | [diff] [blame] | 47 | print_lava_urls(finished_jobs, user_args) |
Paul Sokolovsky | 451f67b | 2022-03-08 19:44:41 +0300 | [diff] [blame] | 48 | test_report(finished_jobs, user_args) |
Xinyu Zhang | 82dab28 | 2022-10-09 16:33:19 +0800 | [diff] [blame] | 49 | job_links(finished_jobs, user_args) |
Paul Sokolovsky | 2512ec5 | 2022-03-04 00:15:39 +0300 | [diff] [blame] | 50 | codecov_helper.coverage_reports(finished_jobs, user_args) |
Xinyu Zhang | f2b7cbf | 2021-05-18 20:17:34 +0800 | [diff] [blame] | 51 | |
| 52 | def get_finished_jobs(job_list, user_args, lava): |
Paul Sokolovsky | a95abd9 | 2022-12-27 13:48:11 +0300 | [diff] [blame] | 53 | _log.info("Waiting for %d LAVA jobs", len(job_list)) |
Paul Sokolovsky | 697f955 | 2022-05-05 10:44:27 +0300 | [diff] [blame] | 54 | finished_jobs = lava.block_wait_for_jobs(job_list, user_args.dispatch_timeout, 5) |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 55 | unfinished_jobs = [item for item in job_list if item not in finished_jobs] |
| 56 | for job in unfinished_jobs: |
Xinyu Zhang | 7fefe5b | 2023-02-08 11:35:49 +0800 | [diff] [blame] | 57 | _log.info("Cancelling unfinished job %d because of timeout.", job) |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 58 | lava.cancel_job(job) |
Xinyu Zhang | 7fefe5b | 2023-02-08 11:35:49 +0800 | [diff] [blame] | 59 | if len(unfinished_jobs) > 0: |
| 60 | _log.info("Job fails because some test jobs have been cancelled.") |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 61 | if user_args.artifacts_path: |
| 62 | for job, info in finished_jobs.items(): |
| 63 | info['job_dir'] = os.path.join(user_args.artifacts_path, "{}_{}".format(str(job), info['description'])) |
| 64 | finished_jobs[job] = info |
| 65 | finished_jobs = fetch_artifacts(finished_jobs, user_args, lava) |
Xinyu Zhang | f2b7cbf | 2021-05-18 20:17:34 +0800 | [diff] [blame] | 66 | return finished_jobs |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 67 | |
Xinyu Zhang | c8a670c | 2021-05-18 20:20:53 +0800 | [diff] [blame] | 68 | def resubmit_failed_jobs(jobs, user_args): |
| 69 | if not jobs: |
| 70 | return [] |
Xinyu Zhang | 4aca6d0 | 2021-05-31 11:43:32 +0800 | [diff] [blame] | 71 | time.sleep(2) # be friendly to LAVA |
Xinyu Zhang | c8a670c | 2021-05-18 20:20:53 +0800 | [diff] [blame] | 72 | failed_job = [] |
| 73 | os.makedirs('failed_jobs', exist_ok=True) |
| 74 | for job_id, info in jobs.items(): |
| 75 | if not (info['health'] == "Complete" and info['state'] == "Finished"): |
Paul Sokolovsky | b7a41a9 | 2022-12-28 18:06:45 +0300 | [diff] [blame] | 76 | _log.warning( |
| 77 | "Will resubmit job %d because of its state: %s, health: %s", |
Paul Sokolovsky | 7fa6c9e | 2022-12-30 15:01:49 +0300 | [diff] [blame] | 78 | job_id, info["state"], info["health"] |
Paul Sokolovsky | b7a41a9 | 2022-12-28 18:06:45 +0300 | [diff] [blame] | 79 | ) |
Xinyu Zhang | c8a670c | 2021-05-18 20:20:53 +0800 | [diff] [blame] | 80 | job_dir = info['job_dir'] |
| 81 | def_path = os.path.join(job_dir, 'definition.yaml') |
| 82 | os.rename(def_path, 'failed_jobs/{}_definition.yaml'.format(job_id)) |
| 83 | shutil.rmtree(job_dir) |
| 84 | failed_job.append(job_id) |
| 85 | for failed_job_id in failed_job: |
| 86 | jobs.pop(failed_job_id) |
Xinyu Zhang | c918b6e | 2022-10-08 17:13:17 +0800 | [diff] [blame] | 87 | resubmitted_jobs = submit_lava_jobs(user_args, job_dir='failed_jobs') |
Xinyu Zhang | c8a670c | 2021-05-18 20:20:53 +0800 | [diff] [blame] | 88 | resubmitted_jobs = [int(x) for x in resubmitted_jobs if x != ''] |
| 89 | return resubmitted_jobs |
| 90 | |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 91 | def fetch_artifacts(jobs, user_args, lava): |
| 92 | if not user_args.artifacts_path: |
| 93 | return |
| 94 | for job_id, info in jobs.items(): |
| 95 | job_dir = info['job_dir'] |
Paul Sokolovsky | dc8281a | 2022-12-27 21:54:42 +0300 | [diff] [blame] | 96 | t = time.time() |
| 97 | _log.info("Fetching artifacts for job %d to %s", job_id, job_dir) |
Paul Sokolovsky | ce54619 | 2023-01-03 21:28:08 +0300 | [diff] [blame] | 98 | |
| 99 | for retry in range(3, 0, -1): |
| 100 | try: |
| 101 | os.makedirs(job_dir, exist_ok=True) |
| 102 | def_path = os.path.join(job_dir, 'definition.yaml') |
| 103 | target_log = os.path.join(job_dir, 'target_log.txt') |
| 104 | config = os.path.join(job_dir, 'config.tar.bz2') |
| 105 | results_file = os.path.join(job_dir, 'results.yaml') |
| 106 | definition = lava.get_job_definition(job_id, def_path) |
| 107 | jobs[job_id]['metadata'] = definition.get('metadata', []) |
| 108 | time.sleep(0.2) # be friendly to LAVA |
| 109 | lava.get_job_log(job_id, target_log) |
| 110 | time.sleep(0.2) |
| 111 | lava.get_job_config(job_id, config) |
| 112 | time.sleep(0.2) |
| 113 | lava.get_job_results(job_id, results_file) |
| 114 | break |
Paul Sokolovsky | d5c8c81 | 2023-04-20 22:23:17 +0300 | [diff] [blame^] | 115 | except (ProtocolError, IOError, yaml.error.YAMLError) as e: |
Paul Sokolovsky | ce54619 | 2023-01-03 21:28:08 +0300 | [diff] [blame] | 116 | if retry == 1: |
| 117 | raise |
| 118 | else: |
| 119 | _log.warning("fetch_artifacts: Error %r occurred, retrying", e) |
| 120 | time.sleep(2) |
| 121 | |
Paul Sokolovsky | dc8281a | 2022-12-27 21:54:42 +0300 | [diff] [blame] | 122 | _log.info("Fetched artifacts in %ds", time.time() - t) |
Paul Sokolovsky | c2d6d88 | 2022-02-25 19:11:18 +0300 | [diff] [blame] | 123 | codecov_helper.extract_trace_data(target_log, job_dir) |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 124 | return(jobs) |
| 125 | |
| 126 | |
| 127 | def lava_id_to_url(id, user_args): |
| 128 | return "{}/scheduler/job/{}".format(user_args.lava_url, id) |
| 129 | |
Xinyu Zhang | 97ee3fd | 2020-12-14 14:45:06 +0800 | [diff] [blame] | 130 | def job_links(jobs, user_args): |
| 131 | job_links = "" |
| 132 | for job, info in jobs.items(): |
Xinyu Zhang | 82dab28 | 2022-10-09 16:33:19 +0800 | [diff] [blame] | 133 | job_links += "\nLAVA Test Config:\n" |
| 134 | job_links += "Config Name: {}\n".format(info['metadata']['build_name']) |
| 135 | job_links += "Test Result: {}\n".format(info['result']) |
| 136 | job_links += "Device Type: {}\n".format(info['metadata']['device_type']) |
Xinyu Zhang | 97ee3fd | 2020-12-14 14:45:06 +0800 | [diff] [blame] | 137 | job_links += "Build link: {}\n".format(info['metadata']['build_job_url']) |
Xinyu Zhang | 78c146a | 2022-09-05 19:06:40 +0800 | [diff] [blame] | 138 | job_links += "LAVA link: {}\n".format(lava_id_to_url(job, user_args)) |
Xinyu Zhang | 82dab28 | 2022-10-09 16:33:19 +0800 | [diff] [blame] | 139 | job_links += "TFM LOG: {}artifact/{}/target_log.txt\n".format(os.getenv("BUILD_URL"), info['job_dir']) |
Xinyu Zhang | 97ee3fd | 2020-12-14 14:45:06 +0800 | [diff] [blame] | 140 | print(job_links) |
| 141 | |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 142 | def remove_lava_dupes(results): |
| 143 | for result in results: |
| 144 | if result['result'] != 'pass': |
| 145 | if result['suite'] == "lava": |
| 146 | for other in [x for x in results if x != result]: |
| 147 | if other['name'] == result['name']: |
| 148 | if other['result'] == 'pass': |
| 149 | results.remove(result) |
| 150 | return(results) |
| 151 | |
Paul Sokolovsky | 451f67b | 2022-03-08 19:44:41 +0300 | [diff] [blame] | 152 | def test_report(jobs, user_args): |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 153 | # parsing of test results is WIP |
| 154 | fail_j = [] |
| 155 | jinja_data = [] |
| 156 | for job, info in jobs.items(): |
Xinyu Zhang | 0f78e7a | 2022-10-17 13:55:52 +0800 | [diff] [blame] | 157 | info['result'] = 'SUCCESS' |
Xinyu Zhang | 82dab28 | 2022-10-09 16:33:19 +0800 | [diff] [blame] | 158 | if info['health'] != 'Complete': |
Xinyu Zhang | 0f78e7a | 2022-10-17 13:55:52 +0800 | [diff] [blame] | 159 | info['result'] = 'FAILURE' |
Xinyu Zhang | 82dab28 | 2022-10-09 16:33:19 +0800 | [diff] [blame] | 160 | fail_j.append(job) |
| 161 | continue |
Matthew Hart | 4a4f120 | 2020-06-12 15:52:46 +0100 | [diff] [blame] | 162 | results_file = os.path.join(info['job_dir'], 'results.yaml') |
| 163 | if not os.path.exists(results_file) or (os.path.getsize(results_file) == 0): |
Xinyu Zhang | 0f78e7a | 2022-10-17 13:55:52 +0800 | [diff] [blame] | 164 | info['result'] = 'FAILURE' |
Matthew Hart | 4a4f120 | 2020-06-12 15:52:46 +0100 | [diff] [blame] | 165 | fail_j.append(job) |
| 166 | continue |
| 167 | with open(results_file, "r") as F: |
| 168 | res_data = F.read() |
Paul Sokolovsky | f2f385d | 2022-01-11 00:36:31 +0300 | [diff] [blame] | 169 | results = yaml.safe_load(res_data) |
Paul Sokolovsky | 07f6dfb | 2022-07-15 12:26:24 +0300 | [diff] [blame] | 170 | non_lava_results = [x for x in results if x['suite'] != 'lava' or x['name'] == 'lava-test-monitor'] |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 171 | info['lava_url'] = lava_id_to_url(job, user_args) |
Arthur She | 38d5f5a | 2022-09-02 17:32:14 -0700 | [diff] [blame] | 172 | info['artifacts_dir'] = info['job_dir'] |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 173 | jinja_data.append({job: [info, non_lava_results]}) |
| 174 | for result in non_lava_results: |
Paul Sokolovsky | 58f00de | 2022-02-01 00:26:32 +0300 | [diff] [blame] | 175 | if result['result'] == 'fail': |
Xinyu Zhang | 0f78e7a | 2022-10-17 13:55:52 +0800 | [diff] [blame] | 176 | info['result'] = 'FAILURE' |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 177 | fail_j.append(job) if job not in fail_j else fail_j |
| 178 | time.sleep(0.5) # be friendly to LAVA |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 179 | data = {} |
| 180 | data['jobs'] = jinja_data |
| 181 | render_jinja(data) |
| 182 | |
| 183 | def render_jinja(data): |
| 184 | work_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), "jinja2_templates") |
| 185 | template_loader = FileSystemLoader(searchpath=work_dir) |
| 186 | template_env = Environment(loader=template_loader) |
| 187 | html = template_env.get_template("test_summary.jinja2").render(data) |
| 188 | csv = template_env.get_template("test_summary_csv.jinja2").render(data) |
| 189 | with open('test_summary.html', "w") as F: |
| 190 | F.write(html) |
| 191 | with open('test_summary.csv', "w") as F: |
| 192 | F.write(csv) |
| 193 | |
| 194 | def print_lava_urls(jobs, user_args): |
| 195 | output = [lava_id_to_url(x, user_args) for x in jobs] |
Xinyu Zhang | 78c146a | 2022-09-05 19:06:40 +0800 | [diff] [blame] | 196 | info_print("LAVA jobs triggered for this build: {}".format(output)) |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 197 | |
| 198 | |
Xinyu Zhang | 78c146a | 2022-09-05 19:06:40 +0800 | [diff] [blame] | 199 | def info_print(line, silent=True): |
| 200 | if not silent: |
| 201 | print("INFO: {}".format(line)) |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 202 | |
Paul Sokolovsky | de25e1f | 2023-01-02 14:29:21 +0300 | [diff] [blame] | 203 | |
| 204 | # WARNING: Setting this to >1 is a last resort, temporary stop-gap measure, |
| 205 | # which will overload LAVA and jeopardize stability of the entire TF CI. |
| 206 | INEFFICIENT_RETRIES = 1 |
| 207 | |
| 208 | |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 209 | def main(user_args): |
| 210 | """ Main logic """ |
Paul Sokolovsky | de25e1f | 2023-01-02 14:29:21 +0300 | [diff] [blame] | 211 | for try_time in range(INEFFICIENT_RETRIES): |
Xinyu Zhang | 3e8f660 | 2021-04-28 10:57:32 +0800 | [diff] [blame] | 212 | try: |
Paul Sokolovsky | 451f67b | 2022-03-08 19:44:41 +0300 | [diff] [blame] | 213 | finished_jobs = wait_for_jobs(user_args) |
Xinyu Zhang | 3e8f660 | 2021-04-28 10:57:32 +0800 | [diff] [blame] | 214 | break |
| 215 | except Exception as e: |
Paul Sokolovsky | de25e1f | 2023-01-02 14:29:21 +0300 | [diff] [blame] | 216 | if try_time < INEFFICIENT_RETRIES - 1: |
Paul Sokolovsky | f367456 | 2022-12-27 22:20:01 +0300 | [diff] [blame] | 217 | _log.exception("Exception in wait_for_jobs") |
| 218 | _log.info("Will try to get LAVA jobs again, this was try: %d", try_time) |
Xinyu Zhang | 3e8f660 | 2021-04-28 10:57:32 +0800 | [diff] [blame] | 219 | else: |
| 220 | raise e |
Paul Sokolovsky | 451f67b | 2022-03-08 19:44:41 +0300 | [diff] [blame] | 221 | process_finished_jobs(finished_jobs, user_args) |
Xinyu Zhang | af63f90 | 2023-01-05 15:09:28 +0800 | [diff] [blame] | 222 | if len(finished_jobs) < len(user_args.job_ids.split(",")): |
| 223 | raise Exception("Some LAVA jobs cancelled.") |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 224 | |
| 225 | def get_cmd_args(): |
| 226 | """ Parse command line arguments """ |
| 227 | |
| 228 | # Parse command line arguments to override config |
| 229 | parser = argparse.ArgumentParser(description="Lava Wait Jobs") |
| 230 | cmdargs = parser.add_argument_group("Lava Wait Jobs") |
| 231 | |
| 232 | # Configuration control |
| 233 | cmdargs.add_argument( |
| 234 | "--lava-url", dest="lava_url", action="store", help="LAVA lab URL (without RPC2)" |
| 235 | ) |
| 236 | cmdargs.add_argument( |
| 237 | "--job-ids", dest="job_ids", action="store", required=True, help="Comma separated list of job IDS" |
| 238 | ) |
| 239 | cmdargs.add_argument( |
Xinyu Zhang | f2b7cbf | 2021-05-18 20:17:34 +0800 | [diff] [blame] | 240 | "--lava-token", dest="lava_token", action="store", help="LAVA auth token" |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 241 | ) |
| 242 | cmdargs.add_argument( |
Xinyu Zhang | f2b7cbf | 2021-05-18 20:17:34 +0800 | [diff] [blame] | 243 | "--lava-user", dest="lava_user", action="store", help="LAVA username" |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 244 | ) |
| 245 | cmdargs.add_argument( |
| 246 | "--use-env", dest="token_from_env", action="store_true", default=False, help="Use LAVA auth info from environment" |
| 247 | ) |
| 248 | cmdargs.add_argument( |
| 249 | "--lava-timeout", dest="dispatch_timeout", action="store", type=int, default=3600, help="Time in seconds to wait for all jobs" |
| 250 | ) |
| 251 | cmdargs.add_argument( |
| 252 | "--artifacts-path", dest="artifacts_path", action="store", help="Download LAVA artifacts to this directory" |
| 253 | ) |
| 254 | return parser.parse_args() |
| 255 | |
| 256 | |
| 257 | if __name__ == "__main__": |
Paul Sokolovsky | a95abd9 | 2022-12-27 13:48:11 +0300 | [diff] [blame] | 258 | logging.basicConfig(level=logging.INFO) |
Matthew Hart | fb6fd36 | 2020-03-04 21:03:59 +0000 | [diff] [blame] | 259 | main(get_cmd_args()) |