1bf215546Sopenharmony_ci#!/usr/bin/env python3 2bf215546Sopenharmony_ci# 3bf215546Sopenharmony_ci# Copyright (C) 2020 - 2022 Collabora Limited 4bf215546Sopenharmony_ci# Authors: 5bf215546Sopenharmony_ci# Gustavo Padovan <gustavo.padovan@collabora.com> 6bf215546Sopenharmony_ci# Guilherme Gallo <guilherme.gallo@collabora.com> 7bf215546Sopenharmony_ci# 8bf215546Sopenharmony_ci# SPDX-License-Identifier: MIT 9bf215546Sopenharmony_ci 10bf215546Sopenharmony_ci"""Send a job to LAVA, track it and collect log back""" 11bf215546Sopenharmony_ci 12bf215546Sopenharmony_ci 13bf215546Sopenharmony_ciimport argparse 14bf215546Sopenharmony_ciimport contextlib 15bf215546Sopenharmony_ciimport pathlib 16bf215546Sopenharmony_ciimport re 17bf215546Sopenharmony_ciimport sys 18bf215546Sopenharmony_ciimport time 19bf215546Sopenharmony_ciimport traceback 20bf215546Sopenharmony_ciimport urllib.parse 21bf215546Sopenharmony_ciimport xmlrpc.client 22bf215546Sopenharmony_cifrom datetime import datetime, timedelta 23bf215546Sopenharmony_cifrom os import getenv 24bf215546Sopenharmony_cifrom typing import Any, Optional 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ciimport lavacli 27bf215546Sopenharmony_ciimport yaml 28bf215546Sopenharmony_cifrom lava.exceptions import ( 29bf215546Sopenharmony_ci MesaCIException, 30bf215546Sopenharmony_ci MesaCIKnownIssueException, 31bf215546Sopenharmony_ci MesaCIParseException, 32bf215546Sopenharmony_ci MesaCIRetryError, 33bf215546Sopenharmony_ci MesaCITimeoutError, 34bf215546Sopenharmony_ci) 35bf215546Sopenharmony_cifrom lava.utils import ( 36bf215546Sopenharmony_ci CONSOLE_LOG, 37bf215546Sopenharmony_ci GitlabSection, 38bf215546Sopenharmony_ci LogFollower, 39bf215546Sopenharmony_ci LogSectionType, 40bf215546Sopenharmony_ci fatal_err, 41bf215546Sopenharmony_ci hide_sensitive_data, 42bf215546Sopenharmony_ci print_log, 43bf215546Sopenharmony_ci) 44bf215546Sopenharmony_cifrom lavacli.utils import loader 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci# Timeout in seconds to decide if the device from the dispatched LAVA job has 47bf215546Sopenharmony_ci# hung or not due to the lack of new log output. 48bf215546Sopenharmony_ciDEVICE_HANGING_TIMEOUT_SEC = int(getenv("LAVA_DEVICE_HANGING_TIMEOUT_SEC", 5*60)) 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci# How many seconds the script should wait before try a new polling iteration to 51bf215546Sopenharmony_ci# check if the dispatched LAVA job is running or waiting in the job queue. 52bf215546Sopenharmony_ciWAIT_FOR_DEVICE_POLLING_TIME_SEC = int(getenv("LAVA_WAIT_FOR_DEVICE_POLLING_TIME_SEC", 10)) 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_ci# How many seconds to wait between log output LAVA RPC calls. 55bf215546Sopenharmony_ciLOG_POLLING_TIME_SEC = int(getenv("LAVA_LOG_POLLING_TIME_SEC", 5)) 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci# How many retries should be made when a timeout happen. 58bf215546Sopenharmony_ciNUMBER_OF_RETRIES_TIMEOUT_DETECTION = int(getenv("LAVA_NUMBER_OF_RETRIES_TIMEOUT_DETECTION", 2)) 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci# How many attempts should be made when a timeout happen during LAVA device boot. 61bf215546Sopenharmony_ciNUMBER_OF_ATTEMPTS_LAVA_BOOT = int(getenv("LAVA_NUMBER_OF_ATTEMPTS_LAVA_BOOT", 3)) 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_cidef generate_lava_yaml(args): 65bf215546Sopenharmony_ci # General metadata and permissions, plus also inexplicably kernel arguments 66bf215546Sopenharmony_ci values = { 67bf215546Sopenharmony_ci 'job_name': 'mesa: {}'.format(args.pipeline_info), 68bf215546Sopenharmony_ci 'device_type': args.device_type, 69bf215546Sopenharmony_ci 'visibility': { 'group': [ args.visibility_group ] }, 70bf215546Sopenharmony_ci 'priority': 75, 71bf215546Sopenharmony_ci 'context': { 72bf215546Sopenharmony_ci 'extra_nfsroot_args': ' init=/init rootwait usbcore.quirks=0bda:8153:k' 73bf215546Sopenharmony_ci }, 74bf215546Sopenharmony_ci "timeouts": { 75bf215546Sopenharmony_ci "job": {"minutes": args.job_timeout}, 76bf215546Sopenharmony_ci "action": {"minutes": 3}, 77bf215546Sopenharmony_ci "actions": { 78bf215546Sopenharmony_ci "depthcharge-action": { 79bf215546Sopenharmony_ci "minutes": 3 * NUMBER_OF_ATTEMPTS_LAVA_BOOT, 80bf215546Sopenharmony_ci } 81bf215546Sopenharmony_ci } 82bf215546Sopenharmony_ci }, 83bf215546Sopenharmony_ci } 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci if args.lava_tags: 86bf215546Sopenharmony_ci values['tags'] = args.lava_tags.split(',') 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci # URLs to our kernel rootfs to boot from, both generated by the base 89bf215546Sopenharmony_ci # container build 90bf215546Sopenharmony_ci deploy = { 91bf215546Sopenharmony_ci 'timeout': { 'minutes': 10 }, 92bf215546Sopenharmony_ci 'to': 'tftp', 93bf215546Sopenharmony_ci 'os': 'oe', 94bf215546Sopenharmony_ci 'kernel': { 95bf215546Sopenharmony_ci 'url': '{}/{}'.format(args.kernel_url_prefix, args.kernel_image_name), 96bf215546Sopenharmony_ci }, 97bf215546Sopenharmony_ci 'nfsrootfs': { 98bf215546Sopenharmony_ci 'url': '{}/lava-rootfs.tgz'.format(args.rootfs_url_prefix), 99bf215546Sopenharmony_ci 'compression': 'gz', 100bf215546Sopenharmony_ci } 101bf215546Sopenharmony_ci } 102bf215546Sopenharmony_ci if args.kernel_image_type: 103bf215546Sopenharmony_ci deploy['kernel']['type'] = args.kernel_image_type 104bf215546Sopenharmony_ci if args.dtb: 105bf215546Sopenharmony_ci deploy['dtb'] = { 106bf215546Sopenharmony_ci 'url': '{}/{}.dtb'.format(args.kernel_url_prefix, args.dtb) 107bf215546Sopenharmony_ci } 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci # always boot over NFS 110bf215546Sopenharmony_ci boot = { 111bf215546Sopenharmony_ci "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT, 112bf215546Sopenharmony_ci "method": args.boot_method, 113bf215546Sopenharmony_ci "commands": "nfs", 114bf215546Sopenharmony_ci "prompts": ["lava-shell:"], 115bf215546Sopenharmony_ci } 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci # skeleton test definition: only declaring each job as a single 'test' 118bf215546Sopenharmony_ci # since LAVA's test parsing is not useful to us 119bf215546Sopenharmony_ci run_steps = [] 120bf215546Sopenharmony_ci test = { 121bf215546Sopenharmony_ci 'timeout': { 'minutes': args.job_timeout }, 122bf215546Sopenharmony_ci 'failure_retry': 1, 123bf215546Sopenharmony_ci 'definitions': [ { 124bf215546Sopenharmony_ci 'name': 'mesa', 125bf215546Sopenharmony_ci 'from': 'inline', 126bf215546Sopenharmony_ci 'lava-signal': 'kmsg', 127bf215546Sopenharmony_ci 'path': 'inline/mesa.yaml', 128bf215546Sopenharmony_ci 'repository': { 129bf215546Sopenharmony_ci 'metadata': { 130bf215546Sopenharmony_ci 'name': 'mesa', 131bf215546Sopenharmony_ci 'description': 'Mesa test plan', 132bf215546Sopenharmony_ci 'os': [ 'oe' ], 133bf215546Sopenharmony_ci 'scope': [ 'functional' ], 134bf215546Sopenharmony_ci 'format': 'Lava-Test Test Definition 1.0', 135bf215546Sopenharmony_ci }, 136bf215546Sopenharmony_ci 'run': { 137bf215546Sopenharmony_ci "steps": run_steps 138bf215546Sopenharmony_ci }, 139bf215546Sopenharmony_ci }, 140bf215546Sopenharmony_ci } ], 141bf215546Sopenharmony_ci } 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci # job execution script: 144bf215546Sopenharmony_ci # - inline .gitlab-ci/common/init-stage1.sh 145bf215546Sopenharmony_ci # - fetch and unpack per-pipeline build artifacts from build job 146bf215546Sopenharmony_ci # - fetch and unpack per-job environment from lava-submit.sh 147bf215546Sopenharmony_ci # - exec .gitlab-ci/common/init-stage2.sh 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci with open(args.first_stage_init, 'r') as init_sh: 150bf215546Sopenharmony_ci run_steps += [ x.rstrip() for x in init_sh if not x.startswith('#') and x.rstrip() ] 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci if args.jwt_file: 153bf215546Sopenharmony_ci with open(args.jwt_file) as jwt_file: 154bf215546Sopenharmony_ci run_steps += [ 155bf215546Sopenharmony_ci "set +x", 156bf215546Sopenharmony_ci f'echo -n "{jwt_file.read()}" > "{args.jwt_file}" # HIDEME', 157bf215546Sopenharmony_ci "set -x", 158bf215546Sopenharmony_ci f'echo "export CI_JOB_JWT_FILE={args.jwt_file}" >> /set-job-env-vars.sh', 159bf215546Sopenharmony_ci ] 160bf215546Sopenharmony_ci else: 161bf215546Sopenharmony_ci run_steps += [ 162bf215546Sopenharmony_ci "echo Could not find jwt file, disabling MINIO requests...", 163bf215546Sopenharmony_ci "sed -i '/MINIO_RESULTS_UPLOAD/d' /set-job-env-vars.sh", 164bf215546Sopenharmony_ci ] 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci run_steps += [ 167bf215546Sopenharmony_ci 'mkdir -p {}'.format(args.ci_project_dir), 168bf215546Sopenharmony_ci 'wget -S --progress=dot:giga -O- {} | tar -xz -C {}'.format(args.build_url, args.ci_project_dir), 169bf215546Sopenharmony_ci 'wget -S --progress=dot:giga -O- {} | tar -xz -C /'.format(args.job_rootfs_overlay_url), 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci # Sleep a bit to give time for bash to dump shell xtrace messages into 172bf215546Sopenharmony_ci # console which may cause interleaving with LAVA_SIGNAL_STARTTC in some 173bf215546Sopenharmony_ci # devices like a618. 174bf215546Sopenharmony_ci 'sleep 1', 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci # Putting CI_JOB name as the testcase name, it may help LAVA farm 177bf215546Sopenharmony_ci # maintainers with monitoring 178bf215546Sopenharmony_ci f"lava-test-case 'mesa-ci_{args.mesa_job_name}' --shell /init-stage2.sh", 179bf215546Sopenharmony_ci ] 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci values['actions'] = [ 182bf215546Sopenharmony_ci { 'deploy': deploy }, 183bf215546Sopenharmony_ci { 'boot': boot }, 184bf215546Sopenharmony_ci { 'test': test }, 185bf215546Sopenharmony_ci ] 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci return yaml.dump(values, width=10000000) 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_cidef setup_lava_proxy(): 191bf215546Sopenharmony_ci config = lavacli.load_config("default") 192bf215546Sopenharmony_ci uri, usr, tok = (config.get(key) for key in ("uri", "username", "token")) 193bf215546Sopenharmony_ci uri_obj = urllib.parse.urlparse(uri) 194bf215546Sopenharmony_ci uri_str = "{}://{}:{}@{}{}".format(uri_obj.scheme, usr, tok, uri_obj.netloc, uri_obj.path) 195bf215546Sopenharmony_ci transport = lavacli.RequestsTransport( 196bf215546Sopenharmony_ci uri_obj.scheme, 197bf215546Sopenharmony_ci config.get("proxy"), 198bf215546Sopenharmony_ci config.get("timeout", 120.0), 199bf215546Sopenharmony_ci config.get("verify_ssl_cert", True), 200bf215546Sopenharmony_ci ) 201bf215546Sopenharmony_ci proxy = xmlrpc.client.ServerProxy( 202bf215546Sopenharmony_ci uri_str, allow_none=True, transport=transport) 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci print_log("Proxy for {} created.".format(config['uri'])) 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci return proxy 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_cidef _call_proxy(fn, *args): 210bf215546Sopenharmony_ci retries = 60 211bf215546Sopenharmony_ci for n in range(1, retries + 1): 212bf215546Sopenharmony_ci try: 213bf215546Sopenharmony_ci return fn(*args) 214bf215546Sopenharmony_ci except xmlrpc.client.ProtocolError as err: 215bf215546Sopenharmony_ci if n == retries: 216bf215546Sopenharmony_ci traceback.print_exc() 217bf215546Sopenharmony_ci fatal_err("A protocol error occurred (Err {} {})".format(err.errcode, err.errmsg)) 218bf215546Sopenharmony_ci else: 219bf215546Sopenharmony_ci time.sleep(15) 220bf215546Sopenharmony_ci except xmlrpc.client.Fault as err: 221bf215546Sopenharmony_ci traceback.print_exc() 222bf215546Sopenharmony_ci fatal_err("FATAL: Fault: {} (code: {})".format(err.faultString, err.faultCode)) 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ciclass LAVAJob: 226bf215546Sopenharmony_ci COLOR_STATUS_MAP = { 227bf215546Sopenharmony_ci "pass": CONSOLE_LOG["FG_GREEN"], 228bf215546Sopenharmony_ci "hung": CONSOLE_LOG["FG_YELLOW"], 229bf215546Sopenharmony_ci "fail": CONSOLE_LOG["FG_RED"], 230bf215546Sopenharmony_ci "canceled": CONSOLE_LOG["FG_MAGENTA"], 231bf215546Sopenharmony_ci } 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci def __init__(self, proxy, definition): 234bf215546Sopenharmony_ci self.job_id = None 235bf215546Sopenharmony_ci self.proxy = proxy 236bf215546Sopenharmony_ci self.definition = definition 237bf215546Sopenharmony_ci self.last_log_line = 0 238bf215546Sopenharmony_ci self.last_log_time = None 239bf215546Sopenharmony_ci self.is_finished = False 240bf215546Sopenharmony_ci self.status = "created" 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci def heartbeat(self): 243bf215546Sopenharmony_ci self.last_log_time = datetime.now() 244bf215546Sopenharmony_ci self.status = "running" 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci def validate(self) -> Optional[dict]: 247bf215546Sopenharmony_ci """Returns a dict with errors, if the validation fails. 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci Returns: 250bf215546Sopenharmony_ci Optional[dict]: a dict with the validation errors, if any 251bf215546Sopenharmony_ci """ 252bf215546Sopenharmony_ci return _call_proxy(self.proxy.scheduler.jobs.validate, self.definition, True) 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci def submit(self): 255bf215546Sopenharmony_ci try: 256bf215546Sopenharmony_ci self.job_id = _call_proxy(self.proxy.scheduler.jobs.submit, self.definition) 257bf215546Sopenharmony_ci except MesaCIException: 258bf215546Sopenharmony_ci return False 259bf215546Sopenharmony_ci return True 260bf215546Sopenharmony_ci 261bf215546Sopenharmony_ci def cancel(self): 262bf215546Sopenharmony_ci if self.job_id: 263bf215546Sopenharmony_ci self.proxy.scheduler.jobs.cancel(self.job_id) 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci def is_started(self) -> bool: 266bf215546Sopenharmony_ci waiting_states = ["Submitted", "Scheduling", "Scheduled"] 267bf215546Sopenharmony_ci job_state: dict[str, str] = _call_proxy( 268bf215546Sopenharmony_ci self.proxy.scheduler.job_state, self.job_id 269bf215546Sopenharmony_ci ) 270bf215546Sopenharmony_ci return job_state["job_state"] not in waiting_states 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_ci def _load_log_from_data(self, data) -> list[str]: 273bf215546Sopenharmony_ci lines = [] 274bf215546Sopenharmony_ci # When there is no new log data, the YAML is empty 275bf215546Sopenharmony_ci if loaded_lines := yaml.load(str(data), Loader=loader(False)): 276bf215546Sopenharmony_ci lines = loaded_lines 277bf215546Sopenharmony_ci self.last_log_line += len(lines) 278bf215546Sopenharmony_ci return lines 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ci def get_logs(self) -> list[str]: 281bf215546Sopenharmony_ci try: 282bf215546Sopenharmony_ci (finished, data) = _call_proxy( 283bf215546Sopenharmony_ci self.proxy.scheduler.jobs.logs, self.job_id, self.last_log_line 284bf215546Sopenharmony_ci ) 285bf215546Sopenharmony_ci self.is_finished = finished 286bf215546Sopenharmony_ci return self._load_log_from_data(data) 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci except Exception as mesa_ci_err: 289bf215546Sopenharmony_ci raise MesaCIParseException( 290bf215546Sopenharmony_ci f"Could not get LAVA job logs. Reason: {mesa_ci_err}" 291bf215546Sopenharmony_ci ) from mesa_ci_err 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci def parse_job_result_from_log( 294bf215546Sopenharmony_ci self, lava_lines: list[dict[str, str]] 295bf215546Sopenharmony_ci ) -> list[dict[str, str]]: 296bf215546Sopenharmony_ci """Use the console log to catch if the job has completed successfully or 297bf215546Sopenharmony_ci not. Returns the list of log lines until the result line.""" 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_ci last_line = None # Print all lines. lines[:None] == lines[:] 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_ci for idx, line in enumerate(lava_lines): 302bf215546Sopenharmony_ci if result := re.search(r"hwci: mesa: (pass|fail)", line): 303bf215546Sopenharmony_ci self.is_finished = True 304bf215546Sopenharmony_ci self.status = result.group(1) 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci last_line = idx + 1 307bf215546Sopenharmony_ci # We reached the log end here. hwci script has finished. 308bf215546Sopenharmony_ci break 309bf215546Sopenharmony_ci return lava_lines[:last_line] 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_cidef find_exception_from_metadata(metadata, job_id): 313bf215546Sopenharmony_ci if "result" not in metadata or metadata["result"] != "fail": 314bf215546Sopenharmony_ci return 315bf215546Sopenharmony_ci if "error_type" in metadata: 316bf215546Sopenharmony_ci error_type = metadata["error_type"] 317bf215546Sopenharmony_ci if error_type == "Infrastructure": 318bf215546Sopenharmony_ci raise MesaCIException( 319bf215546Sopenharmony_ci f"LAVA job {job_id} failed with Infrastructure Error. Retry." 320bf215546Sopenharmony_ci ) 321bf215546Sopenharmony_ci if error_type == "Job": 322bf215546Sopenharmony_ci # This happens when LAVA assumes that the job cannot terminate or 323bf215546Sopenharmony_ci # with mal-formed job definitions. As we are always validating the 324bf215546Sopenharmony_ci # jobs, only the former is probable to happen. E.g.: When some LAVA 325bf215546Sopenharmony_ci # action timed out more times than expected in job definition. 326bf215546Sopenharmony_ci raise MesaCIException( 327bf215546Sopenharmony_ci f"LAVA job {job_id} failed with JobError " 328bf215546Sopenharmony_ci "(possible LAVA timeout misconfiguration/bug). Retry." 329bf215546Sopenharmony_ci ) 330bf215546Sopenharmony_ci if "case" in metadata and metadata["case"] == "validate": 331bf215546Sopenharmony_ci raise MesaCIException( 332bf215546Sopenharmony_ci f"LAVA job {job_id} failed validation (possible download error). Retry." 333bf215546Sopenharmony_ci ) 334bf215546Sopenharmony_ci return metadata 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_cidef find_lava_error(job) -> None: 338bf215546Sopenharmony_ci # Look for infrastructure errors and retry if we see them. 339bf215546Sopenharmony_ci results_yaml = _call_proxy(job.proxy.results.get_testjob_results_yaml, job.job_id) 340bf215546Sopenharmony_ci results = yaml.load(results_yaml, Loader=loader(False)) 341bf215546Sopenharmony_ci for res in results: 342bf215546Sopenharmony_ci metadata = res["metadata"] 343bf215546Sopenharmony_ci find_exception_from_metadata(metadata, job.job_id) 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci # If we reach this far, it means that the job ended without hwci script 346bf215546Sopenharmony_ci # result and no LAVA infrastructure problem was found 347bf215546Sopenharmony_ci job.status = "fail" 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_cidef show_job_data(job): 351bf215546Sopenharmony_ci with GitlabSection( 352bf215546Sopenharmony_ci "job_data", 353bf215546Sopenharmony_ci "LAVA job info", 354bf215546Sopenharmony_ci type=LogSectionType.LAVA_POST_PROCESSING, 355bf215546Sopenharmony_ci start_collapsed=True, 356bf215546Sopenharmony_ci ): 357bf215546Sopenharmony_ci show = _call_proxy(job.proxy.scheduler.jobs.show, job.job_id) 358bf215546Sopenharmony_ci for field, value in show.items(): 359bf215546Sopenharmony_ci print("{}\t: {}".format(field, value)) 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_cidef fetch_logs(job, max_idle_time, log_follower) -> None: 363bf215546Sopenharmony_ci # Poll to check for new logs, assuming that a prolonged period of 364bf215546Sopenharmony_ci # silence means that the device has died and we should try it again 365bf215546Sopenharmony_ci if datetime.now() - job.last_log_time > max_idle_time: 366bf215546Sopenharmony_ci max_idle_time_min = max_idle_time.total_seconds() / 60 367bf215546Sopenharmony_ci 368bf215546Sopenharmony_ci raise MesaCITimeoutError( 369bf215546Sopenharmony_ci f"{CONSOLE_LOG['BOLD']}" 370bf215546Sopenharmony_ci f"{CONSOLE_LOG['FG_YELLOW']}" 371bf215546Sopenharmony_ci f"LAVA job {job.job_id} does not respond for {max_idle_time_min} " 372bf215546Sopenharmony_ci "minutes. Retry." 373bf215546Sopenharmony_ci f"{CONSOLE_LOG['RESET']}", 374bf215546Sopenharmony_ci timeout_duration=max_idle_time, 375bf215546Sopenharmony_ci ) 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci time.sleep(LOG_POLLING_TIME_SEC) 378bf215546Sopenharmony_ci 379bf215546Sopenharmony_ci # The XMLRPC binary packet may be corrupted, causing a YAML scanner error. 380bf215546Sopenharmony_ci # Retry the log fetching several times before exposing the error. 381bf215546Sopenharmony_ci for _ in range(5): 382bf215546Sopenharmony_ci with contextlib.suppress(MesaCIParseException): 383bf215546Sopenharmony_ci new_log_lines = job.get_logs() 384bf215546Sopenharmony_ci break 385bf215546Sopenharmony_ci else: 386bf215546Sopenharmony_ci raise MesaCIParseException 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci if log_follower.feed(new_log_lines): 389bf215546Sopenharmony_ci # If we had non-empty log data, we can assure that the device is alive. 390bf215546Sopenharmony_ci job.heartbeat() 391bf215546Sopenharmony_ci parsed_lines = log_follower.flush() 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_ci # Only parse job results when the script reaches the end of the logs. 394bf215546Sopenharmony_ci # Depending on how much payload the RPC scheduler.jobs.logs get, it may 395bf215546Sopenharmony_ci # reach the LAVA_POST_PROCESSING phase. 396bf215546Sopenharmony_ci if log_follower.current_section.type in ( 397bf215546Sopenharmony_ci LogSectionType.TEST_CASE, 398bf215546Sopenharmony_ci LogSectionType.LAVA_POST_PROCESSING, 399bf215546Sopenharmony_ci ): 400bf215546Sopenharmony_ci parsed_lines = job.parse_job_result_from_log(parsed_lines) 401bf215546Sopenharmony_ci 402bf215546Sopenharmony_ci for line in parsed_lines: 403bf215546Sopenharmony_ci print_log(line) 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_cidef follow_job_execution(job): 407bf215546Sopenharmony_ci try: 408bf215546Sopenharmony_ci job.submit() 409bf215546Sopenharmony_ci except Exception as mesa_ci_err: 410bf215546Sopenharmony_ci raise MesaCIException( 411bf215546Sopenharmony_ci f"Could not submit LAVA job. Reason: {mesa_ci_err}" 412bf215546Sopenharmony_ci ) from mesa_ci_err 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci print_log(f"Waiting for job {job.job_id} to start.") 415bf215546Sopenharmony_ci while not job.is_started(): 416bf215546Sopenharmony_ci time.sleep(WAIT_FOR_DEVICE_POLLING_TIME_SEC) 417bf215546Sopenharmony_ci print_log(f"Job {job.job_id} started.") 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_ci gl = GitlabSection( 420bf215546Sopenharmony_ci id="lava_boot", 421bf215546Sopenharmony_ci header="LAVA boot", 422bf215546Sopenharmony_ci type=LogSectionType.LAVA_BOOT, 423bf215546Sopenharmony_ci start_collapsed=True, 424bf215546Sopenharmony_ci ) 425bf215546Sopenharmony_ci print(gl.start()) 426bf215546Sopenharmony_ci max_idle_time = timedelta(seconds=DEVICE_HANGING_TIMEOUT_SEC) 427bf215546Sopenharmony_ci with LogFollower(current_section=gl) as lf: 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci max_idle_time = timedelta(seconds=DEVICE_HANGING_TIMEOUT_SEC) 430bf215546Sopenharmony_ci # Start to check job's health 431bf215546Sopenharmony_ci job.heartbeat() 432bf215546Sopenharmony_ci while not job.is_finished: 433bf215546Sopenharmony_ci fetch_logs(job, max_idle_time, lf) 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_ci show_job_data(job) 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci # Mesa Developers expect to have a simple pass/fail job result. 438bf215546Sopenharmony_ci # If this does not happen, it probably means a LAVA infrastructure error 439bf215546Sopenharmony_ci # happened. 440bf215546Sopenharmony_ci if job.status not in ["pass", "fail"]: 441bf215546Sopenharmony_ci find_lava_error(job) 442bf215546Sopenharmony_ci 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_cidef print_job_final_status(job): 445bf215546Sopenharmony_ci if job.status == "running": 446bf215546Sopenharmony_ci job.status = "hung" 447bf215546Sopenharmony_ci 448bf215546Sopenharmony_ci color = LAVAJob.COLOR_STATUS_MAP.get(job.status, CONSOLE_LOG["FG_RED"]) 449bf215546Sopenharmony_ci print_log( 450bf215546Sopenharmony_ci f"{color}" 451bf215546Sopenharmony_ci f"LAVA Job finished with status: {job.status}" 452bf215546Sopenharmony_ci f"{CONSOLE_LOG['RESET']}" 453bf215546Sopenharmony_ci ) 454bf215546Sopenharmony_ci 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_cidef retriable_follow_job(proxy, job_definition) -> LAVAJob: 457bf215546Sopenharmony_ci retry_count = NUMBER_OF_RETRIES_TIMEOUT_DETECTION 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci for attempt_no in range(1, retry_count + 2): 460bf215546Sopenharmony_ci job = LAVAJob(proxy, job_definition) 461bf215546Sopenharmony_ci try: 462bf215546Sopenharmony_ci follow_job_execution(job) 463bf215546Sopenharmony_ci return job 464bf215546Sopenharmony_ci except MesaCIKnownIssueException as found_issue: 465bf215546Sopenharmony_ci print_log(found_issue) 466bf215546Sopenharmony_ci job.status = "canceled" 467bf215546Sopenharmony_ci except MesaCIException as mesa_exception: 468bf215546Sopenharmony_ci print_log(mesa_exception) 469bf215546Sopenharmony_ci job.cancel() 470bf215546Sopenharmony_ci except KeyboardInterrupt as e: 471bf215546Sopenharmony_ci print_log("LAVA job submitter was interrupted. Cancelling the job.") 472bf215546Sopenharmony_ci job.cancel() 473bf215546Sopenharmony_ci raise e 474bf215546Sopenharmony_ci finally: 475bf215546Sopenharmony_ci print_log( 476bf215546Sopenharmony_ci f"{CONSOLE_LOG['BOLD']}" 477bf215546Sopenharmony_ci f"Finished executing LAVA job in the attempt #{attempt_no}" 478bf215546Sopenharmony_ci f"{CONSOLE_LOG['RESET']}" 479bf215546Sopenharmony_ci ) 480bf215546Sopenharmony_ci print_job_final_status(job) 481bf215546Sopenharmony_ci 482bf215546Sopenharmony_ci raise MesaCIRetryError( 483bf215546Sopenharmony_ci f"{CONSOLE_LOG['BOLD']}" 484bf215546Sopenharmony_ci f"{CONSOLE_LOG['FG_RED']}" 485bf215546Sopenharmony_ci "Job failed after it exceeded the number of " 486bf215546Sopenharmony_ci f"{retry_count} retries." 487bf215546Sopenharmony_ci f"{CONSOLE_LOG['RESET']}", 488bf215546Sopenharmony_ci retry_count=retry_count, 489bf215546Sopenharmony_ci ) 490bf215546Sopenharmony_ci 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_cidef treat_mesa_job_name(args): 493bf215546Sopenharmony_ci # Remove mesa job names with spaces, which breaks the lava-test-case command 494bf215546Sopenharmony_ci args.mesa_job_name = args.mesa_job_name.split(" ")[0] 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_ci 497bf215546Sopenharmony_cidef main(args): 498bf215546Sopenharmony_ci proxy = setup_lava_proxy() 499bf215546Sopenharmony_ci 500bf215546Sopenharmony_ci job_definition = generate_lava_yaml(args) 501bf215546Sopenharmony_ci 502bf215546Sopenharmony_ci if args.dump_yaml: 503bf215546Sopenharmony_ci with GitlabSection( 504bf215546Sopenharmony_ci "yaml_dump", 505bf215546Sopenharmony_ci "LAVA job definition (YAML)", 506bf215546Sopenharmony_ci type=LogSectionType.LAVA_BOOT, 507bf215546Sopenharmony_ci start_collapsed=True, 508bf215546Sopenharmony_ci ): 509bf215546Sopenharmony_ci print(hide_sensitive_data(job_definition)) 510bf215546Sopenharmony_ci job = LAVAJob(proxy, job_definition) 511bf215546Sopenharmony_ci 512bf215546Sopenharmony_ci if errors := job.validate(): 513bf215546Sopenharmony_ci fatal_err(f"Error in LAVA job definition: {errors}") 514bf215546Sopenharmony_ci print_log("LAVA job definition validated successfully") 515bf215546Sopenharmony_ci 516bf215546Sopenharmony_ci if args.validate_only: 517bf215546Sopenharmony_ci return 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_ci finished_job = retriable_follow_job(proxy, job_definition) 520bf215546Sopenharmony_ci exit_code = 0 if finished_job.status == "pass" else 1 521bf215546Sopenharmony_ci sys.exit(exit_code) 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci 524bf215546Sopenharmony_cidef create_parser(): 525bf215546Sopenharmony_ci parser = argparse.ArgumentParser("LAVA job submitter") 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_ci parser.add_argument("--pipeline-info") 528bf215546Sopenharmony_ci parser.add_argument("--rootfs-url-prefix") 529bf215546Sopenharmony_ci parser.add_argument("--kernel-url-prefix") 530bf215546Sopenharmony_ci parser.add_argument("--build-url") 531bf215546Sopenharmony_ci parser.add_argument("--job-rootfs-overlay-url") 532bf215546Sopenharmony_ci parser.add_argument("--job-timeout", type=int) 533bf215546Sopenharmony_ci parser.add_argument("--first-stage-init") 534bf215546Sopenharmony_ci parser.add_argument("--ci-project-dir") 535bf215546Sopenharmony_ci parser.add_argument("--device-type") 536bf215546Sopenharmony_ci parser.add_argument("--dtb", nargs='?', default="") 537bf215546Sopenharmony_ci parser.add_argument("--kernel-image-name") 538bf215546Sopenharmony_ci parser.add_argument("--kernel-image-type", nargs='?', default="") 539bf215546Sopenharmony_ci parser.add_argument("--boot-method") 540bf215546Sopenharmony_ci parser.add_argument("--lava-tags", nargs='?', default="") 541bf215546Sopenharmony_ci parser.add_argument("--jwt-file", type=pathlib.Path) 542bf215546Sopenharmony_ci parser.add_argument("--validate-only", action='store_true') 543bf215546Sopenharmony_ci parser.add_argument("--dump-yaml", action='store_true') 544bf215546Sopenharmony_ci parser.add_argument("--visibility-group") 545bf215546Sopenharmony_ci parser.add_argument("--mesa-job-name") 546bf215546Sopenharmony_ci 547bf215546Sopenharmony_ci return parser 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_ciif __name__ == "__main__": 551bf215546Sopenharmony_ci # given that we proxy from DUT -> LAVA dispatcher -> LAVA primary -> us -> 552bf215546Sopenharmony_ci # GitLab runner -> GitLab primary -> user, safe to say we don't need any 553bf215546Sopenharmony_ci # more buffering 554bf215546Sopenharmony_ci sys.stdout.reconfigure(line_buffering=True) 555bf215546Sopenharmony_ci sys.stderr.reconfigure(line_buffering=True) 556bf215546Sopenharmony_ci 557bf215546Sopenharmony_ci parser = create_parser() 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci parser.set_defaults(func=main) 560bf215546Sopenharmony_ci args = parser.parse_args() 561bf215546Sopenharmony_ci treat_mesa_job_name(args) 562bf215546Sopenharmony_ci args.func(args) 563