1bf215546Sopenharmony_ci#!/usr/bin/env python3
2bf215546Sopenharmony_ci#
3bf215546Sopenharmony_ci# Copyright (C) 2020 - 2022 Collabora Limited
4bf215546Sopenharmony_ci# Authors:
5bf215546Sopenharmony_ci#     Gustavo Padovan <gustavo.padovan@collabora.com>
6bf215546Sopenharmony_ci#     Guilherme Gallo <guilherme.gallo@collabora.com>
7bf215546Sopenharmony_ci#
8bf215546Sopenharmony_ci# SPDX-License-Identifier: MIT
9bf215546Sopenharmony_ci
10bf215546Sopenharmony_ci"""Send a job to LAVA, track it and collect log back"""
11bf215546Sopenharmony_ci
12bf215546Sopenharmony_ci
13bf215546Sopenharmony_ciimport argparse
14bf215546Sopenharmony_ciimport contextlib
15bf215546Sopenharmony_ciimport pathlib
16bf215546Sopenharmony_ciimport re
17bf215546Sopenharmony_ciimport sys
18bf215546Sopenharmony_ciimport time
19bf215546Sopenharmony_ciimport traceback
20bf215546Sopenharmony_ciimport urllib.parse
21bf215546Sopenharmony_ciimport xmlrpc.client
22bf215546Sopenharmony_cifrom datetime import datetime, timedelta
23bf215546Sopenharmony_cifrom os import getenv
24bf215546Sopenharmony_cifrom typing import Any, Optional
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ciimport lavacli
27bf215546Sopenharmony_ciimport yaml
28bf215546Sopenharmony_cifrom lava.exceptions import (
29bf215546Sopenharmony_ci    MesaCIException,
30bf215546Sopenharmony_ci    MesaCIKnownIssueException,
31bf215546Sopenharmony_ci    MesaCIParseException,
32bf215546Sopenharmony_ci    MesaCIRetryError,
33bf215546Sopenharmony_ci    MesaCITimeoutError,
34bf215546Sopenharmony_ci)
35bf215546Sopenharmony_cifrom lava.utils import (
36bf215546Sopenharmony_ci    CONSOLE_LOG,
37bf215546Sopenharmony_ci    GitlabSection,
38bf215546Sopenharmony_ci    LogFollower,
39bf215546Sopenharmony_ci    LogSectionType,
40bf215546Sopenharmony_ci    fatal_err,
41bf215546Sopenharmony_ci    hide_sensitive_data,
42bf215546Sopenharmony_ci    print_log,
43bf215546Sopenharmony_ci)
44bf215546Sopenharmony_cifrom lavacli.utils import loader
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_ci# Timeout in seconds to decide if the device from the dispatched LAVA job has
47bf215546Sopenharmony_ci# hung or not due to the lack of new log output.
48bf215546Sopenharmony_ciDEVICE_HANGING_TIMEOUT_SEC = int(getenv("LAVA_DEVICE_HANGING_TIMEOUT_SEC",  5*60))
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_ci# How many seconds the script should wait before try a new polling iteration to
51bf215546Sopenharmony_ci# check if the dispatched LAVA job is running or waiting in the job queue.
52bf215546Sopenharmony_ciWAIT_FOR_DEVICE_POLLING_TIME_SEC = int(getenv("LAVA_WAIT_FOR_DEVICE_POLLING_TIME_SEC", 10))
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci# How many seconds to wait between log output LAVA RPC calls.
55bf215546Sopenharmony_ciLOG_POLLING_TIME_SEC = int(getenv("LAVA_LOG_POLLING_TIME_SEC", 5))
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci# How many retries should be made when a timeout happen.
58bf215546Sopenharmony_ciNUMBER_OF_RETRIES_TIMEOUT_DETECTION = int(getenv("LAVA_NUMBER_OF_RETRIES_TIMEOUT_DETECTION", 2))
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ci# How many attempts should be made when a timeout happen during LAVA device boot.
61bf215546Sopenharmony_ciNUMBER_OF_ATTEMPTS_LAVA_BOOT = int(getenv("LAVA_NUMBER_OF_ATTEMPTS_LAVA_BOOT", 3))
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_cidef generate_lava_yaml(args):
65bf215546Sopenharmony_ci    # General metadata and permissions, plus also inexplicably kernel arguments
66bf215546Sopenharmony_ci    values = {
67bf215546Sopenharmony_ci        'job_name': 'mesa: {}'.format(args.pipeline_info),
68bf215546Sopenharmony_ci        'device_type': args.device_type,
69bf215546Sopenharmony_ci        'visibility': { 'group': [ args.visibility_group ] },
70bf215546Sopenharmony_ci        'priority': 75,
71bf215546Sopenharmony_ci        'context': {
72bf215546Sopenharmony_ci            'extra_nfsroot_args': ' init=/init rootwait usbcore.quirks=0bda:8153:k'
73bf215546Sopenharmony_ci        },
74bf215546Sopenharmony_ci        "timeouts": {
75bf215546Sopenharmony_ci            "job": {"minutes": args.job_timeout},
76bf215546Sopenharmony_ci            "action": {"minutes": 3},
77bf215546Sopenharmony_ci            "actions": {
78bf215546Sopenharmony_ci                "depthcharge-action": {
79bf215546Sopenharmony_ci                    "minutes": 3 * NUMBER_OF_ATTEMPTS_LAVA_BOOT,
80bf215546Sopenharmony_ci                }
81bf215546Sopenharmony_ci            }
82bf215546Sopenharmony_ci        },
83bf215546Sopenharmony_ci    }
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci    if args.lava_tags:
86bf215546Sopenharmony_ci        values['tags'] = args.lava_tags.split(',')
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_ci    # URLs to our kernel rootfs to boot from, both generated by the base
89bf215546Sopenharmony_ci    # container build
90bf215546Sopenharmony_ci    deploy = {
91bf215546Sopenharmony_ci      'timeout': { 'minutes': 10 },
92bf215546Sopenharmony_ci      'to': 'tftp',
93bf215546Sopenharmony_ci      'os': 'oe',
94bf215546Sopenharmony_ci      'kernel': {
95bf215546Sopenharmony_ci        'url': '{}/{}'.format(args.kernel_url_prefix, args.kernel_image_name),
96bf215546Sopenharmony_ci      },
97bf215546Sopenharmony_ci      'nfsrootfs': {
98bf215546Sopenharmony_ci        'url': '{}/lava-rootfs.tgz'.format(args.rootfs_url_prefix),
99bf215546Sopenharmony_ci        'compression': 'gz',
100bf215546Sopenharmony_ci      }
101bf215546Sopenharmony_ci    }
102bf215546Sopenharmony_ci    if args.kernel_image_type:
103bf215546Sopenharmony_ci        deploy['kernel']['type'] = args.kernel_image_type
104bf215546Sopenharmony_ci    if args.dtb:
105bf215546Sopenharmony_ci        deploy['dtb'] = {
106bf215546Sopenharmony_ci          'url': '{}/{}.dtb'.format(args.kernel_url_prefix, args.dtb)
107bf215546Sopenharmony_ci        }
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_ci    # always boot over NFS
110bf215546Sopenharmony_ci    boot = {
111bf215546Sopenharmony_ci        "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT,
112bf215546Sopenharmony_ci        "method": args.boot_method,
113bf215546Sopenharmony_ci        "commands": "nfs",
114bf215546Sopenharmony_ci        "prompts": ["lava-shell:"],
115bf215546Sopenharmony_ci    }
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci    # skeleton test definition: only declaring each job as a single 'test'
118bf215546Sopenharmony_ci    # since LAVA's test parsing is not useful to us
119bf215546Sopenharmony_ci    run_steps = []
120bf215546Sopenharmony_ci    test = {
121bf215546Sopenharmony_ci      'timeout': { 'minutes': args.job_timeout },
122bf215546Sopenharmony_ci      'failure_retry': 1,
123bf215546Sopenharmony_ci      'definitions': [ {
124bf215546Sopenharmony_ci        'name': 'mesa',
125bf215546Sopenharmony_ci        'from': 'inline',
126bf215546Sopenharmony_ci        'lava-signal': 'kmsg',
127bf215546Sopenharmony_ci        'path': 'inline/mesa.yaml',
128bf215546Sopenharmony_ci        'repository': {
129bf215546Sopenharmony_ci          'metadata': {
130bf215546Sopenharmony_ci            'name': 'mesa',
131bf215546Sopenharmony_ci            'description': 'Mesa test plan',
132bf215546Sopenharmony_ci            'os': [ 'oe' ],
133bf215546Sopenharmony_ci            'scope': [ 'functional' ],
134bf215546Sopenharmony_ci            'format': 'Lava-Test Test Definition 1.0',
135bf215546Sopenharmony_ci          },
136bf215546Sopenharmony_ci          'run': {
137bf215546Sopenharmony_ci            "steps": run_steps
138bf215546Sopenharmony_ci          },
139bf215546Sopenharmony_ci        },
140bf215546Sopenharmony_ci      } ],
141bf215546Sopenharmony_ci    }
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci    # job execution script:
144bf215546Sopenharmony_ci    #   - inline .gitlab-ci/common/init-stage1.sh
145bf215546Sopenharmony_ci    #   - fetch and unpack per-pipeline build artifacts from build job
146bf215546Sopenharmony_ci    #   - fetch and unpack per-job environment from lava-submit.sh
147bf215546Sopenharmony_ci    #   - exec .gitlab-ci/common/init-stage2.sh
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci    with open(args.first_stage_init, 'r') as init_sh:
150bf215546Sopenharmony_ci      run_steps += [ x.rstrip() for x in init_sh if not x.startswith('#') and x.rstrip() ]
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci    if args.jwt_file:
153bf215546Sopenharmony_ci        with open(args.jwt_file) as jwt_file:
154bf215546Sopenharmony_ci            run_steps += [
155bf215546Sopenharmony_ci                "set +x",
156bf215546Sopenharmony_ci                f'echo -n "{jwt_file.read()}" > "{args.jwt_file}"  # HIDEME',
157bf215546Sopenharmony_ci                "set -x",
158bf215546Sopenharmony_ci                f'echo "export CI_JOB_JWT_FILE={args.jwt_file}" >> /set-job-env-vars.sh',
159bf215546Sopenharmony_ci            ]
160bf215546Sopenharmony_ci    else:
161bf215546Sopenharmony_ci        run_steps += [
162bf215546Sopenharmony_ci            "echo Could not find jwt file, disabling MINIO requests...",
163bf215546Sopenharmony_ci            "sed -i '/MINIO_RESULTS_UPLOAD/d' /set-job-env-vars.sh",
164bf215546Sopenharmony_ci        ]
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci    run_steps += [
167bf215546Sopenharmony_ci      'mkdir -p {}'.format(args.ci_project_dir),
168bf215546Sopenharmony_ci      'wget -S --progress=dot:giga -O- {} | tar -xz -C {}'.format(args.build_url, args.ci_project_dir),
169bf215546Sopenharmony_ci      'wget -S --progress=dot:giga -O- {} | tar -xz -C /'.format(args.job_rootfs_overlay_url),
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci      # Sleep a bit to give time for bash to dump shell xtrace messages into
172bf215546Sopenharmony_ci      # console which may cause interleaving with LAVA_SIGNAL_STARTTC in some
173bf215546Sopenharmony_ci      # devices like a618.
174bf215546Sopenharmony_ci      'sleep 1',
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci      # Putting CI_JOB name as the testcase name, it may help LAVA farm
177bf215546Sopenharmony_ci      # maintainers with monitoring
178bf215546Sopenharmony_ci      f"lava-test-case 'mesa-ci_{args.mesa_job_name}' --shell /init-stage2.sh",
179bf215546Sopenharmony_ci    ]
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci    values['actions'] = [
182bf215546Sopenharmony_ci      { 'deploy': deploy },
183bf215546Sopenharmony_ci      { 'boot': boot },
184bf215546Sopenharmony_ci      { 'test': test },
185bf215546Sopenharmony_ci    ]
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci    return yaml.dump(values, width=10000000)
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_cidef setup_lava_proxy():
191bf215546Sopenharmony_ci    config = lavacli.load_config("default")
192bf215546Sopenharmony_ci    uri, usr, tok = (config.get(key) for key in ("uri", "username", "token"))
193bf215546Sopenharmony_ci    uri_obj = urllib.parse.urlparse(uri)
194bf215546Sopenharmony_ci    uri_str = "{}://{}:{}@{}{}".format(uri_obj.scheme, usr, tok, uri_obj.netloc, uri_obj.path)
195bf215546Sopenharmony_ci    transport = lavacli.RequestsTransport(
196bf215546Sopenharmony_ci        uri_obj.scheme,
197bf215546Sopenharmony_ci        config.get("proxy"),
198bf215546Sopenharmony_ci        config.get("timeout", 120.0),
199bf215546Sopenharmony_ci        config.get("verify_ssl_cert", True),
200bf215546Sopenharmony_ci    )
201bf215546Sopenharmony_ci    proxy = xmlrpc.client.ServerProxy(
202bf215546Sopenharmony_ci        uri_str, allow_none=True, transport=transport)
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_ci    print_log("Proxy for {} created.".format(config['uri']))
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci    return proxy
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_cidef _call_proxy(fn, *args):
210bf215546Sopenharmony_ci    retries = 60
211bf215546Sopenharmony_ci    for n in range(1, retries + 1):
212bf215546Sopenharmony_ci        try:
213bf215546Sopenharmony_ci            return fn(*args)
214bf215546Sopenharmony_ci        except xmlrpc.client.ProtocolError as err:
215bf215546Sopenharmony_ci            if n == retries:
216bf215546Sopenharmony_ci                traceback.print_exc()
217bf215546Sopenharmony_ci                fatal_err("A protocol error occurred (Err {} {})".format(err.errcode, err.errmsg))
218bf215546Sopenharmony_ci            else:
219bf215546Sopenharmony_ci                time.sleep(15)
220bf215546Sopenharmony_ci        except xmlrpc.client.Fault as err:
221bf215546Sopenharmony_ci            traceback.print_exc()
222bf215546Sopenharmony_ci            fatal_err("FATAL: Fault: {} (code: {})".format(err.faultString, err.faultCode))
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ciclass LAVAJob:
226bf215546Sopenharmony_ci    COLOR_STATUS_MAP = {
227bf215546Sopenharmony_ci        "pass": CONSOLE_LOG["FG_GREEN"],
228bf215546Sopenharmony_ci        "hung": CONSOLE_LOG["FG_YELLOW"],
229bf215546Sopenharmony_ci        "fail": CONSOLE_LOG["FG_RED"],
230bf215546Sopenharmony_ci        "canceled": CONSOLE_LOG["FG_MAGENTA"],
231bf215546Sopenharmony_ci    }
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci    def __init__(self, proxy, definition):
234bf215546Sopenharmony_ci        self.job_id = None
235bf215546Sopenharmony_ci        self.proxy = proxy
236bf215546Sopenharmony_ci        self.definition = definition
237bf215546Sopenharmony_ci        self.last_log_line = 0
238bf215546Sopenharmony_ci        self.last_log_time = None
239bf215546Sopenharmony_ci        self.is_finished = False
240bf215546Sopenharmony_ci        self.status = "created"
241bf215546Sopenharmony_ci
242bf215546Sopenharmony_ci    def heartbeat(self):
243bf215546Sopenharmony_ci        self.last_log_time = datetime.now()
244bf215546Sopenharmony_ci        self.status = "running"
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci    def validate(self) -> Optional[dict]:
247bf215546Sopenharmony_ci        """Returns a dict with errors, if the validation fails.
248bf215546Sopenharmony_ci
249bf215546Sopenharmony_ci        Returns:
250bf215546Sopenharmony_ci            Optional[dict]: a dict with the validation errors, if any
251bf215546Sopenharmony_ci        """
252bf215546Sopenharmony_ci        return _call_proxy(self.proxy.scheduler.jobs.validate, self.definition, True)
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci    def submit(self):
255bf215546Sopenharmony_ci        try:
256bf215546Sopenharmony_ci            self.job_id = _call_proxy(self.proxy.scheduler.jobs.submit, self.definition)
257bf215546Sopenharmony_ci        except MesaCIException:
258bf215546Sopenharmony_ci            return False
259bf215546Sopenharmony_ci        return True
260bf215546Sopenharmony_ci
261bf215546Sopenharmony_ci    def cancel(self):
262bf215546Sopenharmony_ci        if self.job_id:
263bf215546Sopenharmony_ci            self.proxy.scheduler.jobs.cancel(self.job_id)
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci    def is_started(self) -> bool:
266bf215546Sopenharmony_ci        waiting_states = ["Submitted", "Scheduling", "Scheduled"]
267bf215546Sopenharmony_ci        job_state: dict[str, str] = _call_proxy(
268bf215546Sopenharmony_ci            self.proxy.scheduler.job_state, self.job_id
269bf215546Sopenharmony_ci        )
270bf215546Sopenharmony_ci        return job_state["job_state"] not in waiting_states
271bf215546Sopenharmony_ci
272bf215546Sopenharmony_ci    def _load_log_from_data(self, data) -> list[str]:
273bf215546Sopenharmony_ci        lines = []
274bf215546Sopenharmony_ci        # When there is no new log data, the YAML is empty
275bf215546Sopenharmony_ci        if loaded_lines := yaml.load(str(data), Loader=loader(False)):
276bf215546Sopenharmony_ci            lines = loaded_lines
277bf215546Sopenharmony_ci            self.last_log_line += len(lines)
278bf215546Sopenharmony_ci        return lines
279bf215546Sopenharmony_ci
280bf215546Sopenharmony_ci    def get_logs(self) -> list[str]:
281bf215546Sopenharmony_ci        try:
282bf215546Sopenharmony_ci            (finished, data) = _call_proxy(
283bf215546Sopenharmony_ci                self.proxy.scheduler.jobs.logs, self.job_id, self.last_log_line
284bf215546Sopenharmony_ci            )
285bf215546Sopenharmony_ci            self.is_finished = finished
286bf215546Sopenharmony_ci            return self._load_log_from_data(data)
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_ci        except Exception as mesa_ci_err:
289bf215546Sopenharmony_ci            raise MesaCIParseException(
290bf215546Sopenharmony_ci                f"Could not get LAVA job logs. Reason: {mesa_ci_err}"
291bf215546Sopenharmony_ci            ) from mesa_ci_err
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci    def parse_job_result_from_log(
294bf215546Sopenharmony_ci        self, lava_lines: list[dict[str, str]]
295bf215546Sopenharmony_ci    ) -> list[dict[str, str]]:
296bf215546Sopenharmony_ci        """Use the console log to catch if the job has completed successfully or
297bf215546Sopenharmony_ci        not. Returns the list of log lines until the result line."""
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_ci        last_line = None  # Print all lines. lines[:None] == lines[:]
300bf215546Sopenharmony_ci
301bf215546Sopenharmony_ci        for idx, line in enumerate(lava_lines):
302bf215546Sopenharmony_ci            if result := re.search(r"hwci: mesa: (pass|fail)", line):
303bf215546Sopenharmony_ci                self.is_finished = True
304bf215546Sopenharmony_ci                self.status = result.group(1)
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci                last_line = idx + 1
307bf215546Sopenharmony_ci                # We reached the log end here. hwci script has finished.
308bf215546Sopenharmony_ci                break
309bf215546Sopenharmony_ci        return lava_lines[:last_line]
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_cidef find_exception_from_metadata(metadata, job_id):
313bf215546Sopenharmony_ci    if "result" not in metadata or metadata["result"] != "fail":
314bf215546Sopenharmony_ci        return
315bf215546Sopenharmony_ci    if "error_type" in metadata:
316bf215546Sopenharmony_ci        error_type = metadata["error_type"]
317bf215546Sopenharmony_ci        if error_type == "Infrastructure":
318bf215546Sopenharmony_ci            raise MesaCIException(
319bf215546Sopenharmony_ci                f"LAVA job {job_id} failed with Infrastructure Error. Retry."
320bf215546Sopenharmony_ci            )
321bf215546Sopenharmony_ci        if error_type == "Job":
322bf215546Sopenharmony_ci            # This happens when LAVA assumes that the job cannot terminate or
323bf215546Sopenharmony_ci            # with mal-formed job definitions. As we are always validating the
324bf215546Sopenharmony_ci            # jobs, only the former is probable to happen. E.g.: When some LAVA
325bf215546Sopenharmony_ci            # action timed out more times than expected in job definition.
326bf215546Sopenharmony_ci            raise MesaCIException(
327bf215546Sopenharmony_ci                f"LAVA job {job_id} failed with JobError "
328bf215546Sopenharmony_ci                "(possible LAVA timeout misconfiguration/bug). Retry."
329bf215546Sopenharmony_ci            )
330bf215546Sopenharmony_ci    if "case" in metadata and metadata["case"] == "validate":
331bf215546Sopenharmony_ci        raise MesaCIException(
332bf215546Sopenharmony_ci            f"LAVA job {job_id} failed validation (possible download error). Retry."
333bf215546Sopenharmony_ci        )
334bf215546Sopenharmony_ci    return metadata
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_cidef find_lava_error(job) -> None:
338bf215546Sopenharmony_ci    # Look for infrastructure errors and retry if we see them.
339bf215546Sopenharmony_ci    results_yaml = _call_proxy(job.proxy.results.get_testjob_results_yaml, job.job_id)
340bf215546Sopenharmony_ci    results = yaml.load(results_yaml, Loader=loader(False))
341bf215546Sopenharmony_ci    for res in results:
342bf215546Sopenharmony_ci        metadata = res["metadata"]
343bf215546Sopenharmony_ci        find_exception_from_metadata(metadata, job.job_id)
344bf215546Sopenharmony_ci
345bf215546Sopenharmony_ci    # If we reach this far, it means that the job ended without hwci script
346bf215546Sopenharmony_ci    # result and no LAVA infrastructure problem was found
347bf215546Sopenharmony_ci    job.status = "fail"
348bf215546Sopenharmony_ci
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_cidef show_job_data(job):
351bf215546Sopenharmony_ci    with GitlabSection(
352bf215546Sopenharmony_ci        "job_data",
353bf215546Sopenharmony_ci        "LAVA job info",
354bf215546Sopenharmony_ci        type=LogSectionType.LAVA_POST_PROCESSING,
355bf215546Sopenharmony_ci        start_collapsed=True,
356bf215546Sopenharmony_ci    ):
357bf215546Sopenharmony_ci        show = _call_proxy(job.proxy.scheduler.jobs.show, job.job_id)
358bf215546Sopenharmony_ci        for field, value in show.items():
359bf215546Sopenharmony_ci            print("{}\t: {}".format(field, value))
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_cidef fetch_logs(job, max_idle_time, log_follower) -> None:
363bf215546Sopenharmony_ci    # Poll to check for new logs, assuming that a prolonged period of
364bf215546Sopenharmony_ci    # silence means that the device has died and we should try it again
365bf215546Sopenharmony_ci    if datetime.now() - job.last_log_time > max_idle_time:
366bf215546Sopenharmony_ci        max_idle_time_min = max_idle_time.total_seconds() / 60
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_ci        raise MesaCITimeoutError(
369bf215546Sopenharmony_ci            f"{CONSOLE_LOG['BOLD']}"
370bf215546Sopenharmony_ci            f"{CONSOLE_LOG['FG_YELLOW']}"
371bf215546Sopenharmony_ci            f"LAVA job {job.job_id} does not respond for {max_idle_time_min} "
372bf215546Sopenharmony_ci            "minutes. Retry."
373bf215546Sopenharmony_ci            f"{CONSOLE_LOG['RESET']}",
374bf215546Sopenharmony_ci            timeout_duration=max_idle_time,
375bf215546Sopenharmony_ci        )
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_ci    time.sleep(LOG_POLLING_TIME_SEC)
378bf215546Sopenharmony_ci
379bf215546Sopenharmony_ci    # The XMLRPC binary packet may be corrupted, causing a YAML scanner error.
380bf215546Sopenharmony_ci    # Retry the log fetching several times before exposing the error.
381bf215546Sopenharmony_ci    for _ in range(5):
382bf215546Sopenharmony_ci        with contextlib.suppress(MesaCIParseException):
383bf215546Sopenharmony_ci            new_log_lines = job.get_logs()
384bf215546Sopenharmony_ci            break
385bf215546Sopenharmony_ci    else:
386bf215546Sopenharmony_ci        raise MesaCIParseException
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_ci    if log_follower.feed(new_log_lines):
389bf215546Sopenharmony_ci        # If we had non-empty log data, we can assure that the device is alive.
390bf215546Sopenharmony_ci        job.heartbeat()
391bf215546Sopenharmony_ci    parsed_lines = log_follower.flush()
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci    # Only parse job results when the script reaches the end of the logs.
394bf215546Sopenharmony_ci    # Depending on how much payload the RPC scheduler.jobs.logs get, it may
395bf215546Sopenharmony_ci    # reach the LAVA_POST_PROCESSING phase.
396bf215546Sopenharmony_ci    if log_follower.current_section.type in (
397bf215546Sopenharmony_ci        LogSectionType.TEST_CASE,
398bf215546Sopenharmony_ci        LogSectionType.LAVA_POST_PROCESSING,
399bf215546Sopenharmony_ci    ):
400bf215546Sopenharmony_ci        parsed_lines = job.parse_job_result_from_log(parsed_lines)
401bf215546Sopenharmony_ci
402bf215546Sopenharmony_ci    for line in parsed_lines:
403bf215546Sopenharmony_ci        print_log(line)
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_cidef follow_job_execution(job):
407bf215546Sopenharmony_ci    try:
408bf215546Sopenharmony_ci        job.submit()
409bf215546Sopenharmony_ci    except Exception as mesa_ci_err:
410bf215546Sopenharmony_ci        raise MesaCIException(
411bf215546Sopenharmony_ci            f"Could not submit LAVA job. Reason: {mesa_ci_err}"
412bf215546Sopenharmony_ci        ) from mesa_ci_err
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_ci    print_log(f"Waiting for job {job.job_id} to start.")
415bf215546Sopenharmony_ci    while not job.is_started():
416bf215546Sopenharmony_ci        time.sleep(WAIT_FOR_DEVICE_POLLING_TIME_SEC)
417bf215546Sopenharmony_ci    print_log(f"Job {job.job_id} started.")
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_ci    gl = GitlabSection(
420bf215546Sopenharmony_ci        id="lava_boot",
421bf215546Sopenharmony_ci        header="LAVA boot",
422bf215546Sopenharmony_ci        type=LogSectionType.LAVA_BOOT,
423bf215546Sopenharmony_ci        start_collapsed=True,
424bf215546Sopenharmony_ci    )
425bf215546Sopenharmony_ci    print(gl.start())
426bf215546Sopenharmony_ci    max_idle_time = timedelta(seconds=DEVICE_HANGING_TIMEOUT_SEC)
427bf215546Sopenharmony_ci    with LogFollower(current_section=gl) as lf:
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_ci        max_idle_time = timedelta(seconds=DEVICE_HANGING_TIMEOUT_SEC)
430bf215546Sopenharmony_ci        # Start to check job's health
431bf215546Sopenharmony_ci        job.heartbeat()
432bf215546Sopenharmony_ci        while not job.is_finished:
433bf215546Sopenharmony_ci            fetch_logs(job, max_idle_time, lf)
434bf215546Sopenharmony_ci
435bf215546Sopenharmony_ci    show_job_data(job)
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci    # Mesa Developers expect to have a simple pass/fail job result.
438bf215546Sopenharmony_ci    # If this does not happen, it probably means a LAVA infrastructure error
439bf215546Sopenharmony_ci    # happened.
440bf215546Sopenharmony_ci    if job.status not in ["pass", "fail"]:
441bf215546Sopenharmony_ci        find_lava_error(job)
442bf215546Sopenharmony_ci
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_cidef print_job_final_status(job):
445bf215546Sopenharmony_ci    if job.status == "running":
446bf215546Sopenharmony_ci        job.status = "hung"
447bf215546Sopenharmony_ci
448bf215546Sopenharmony_ci    color = LAVAJob.COLOR_STATUS_MAP.get(job.status, CONSOLE_LOG["FG_RED"])
449bf215546Sopenharmony_ci    print_log(
450bf215546Sopenharmony_ci        f"{color}"
451bf215546Sopenharmony_ci        f"LAVA Job finished with status: {job.status}"
452bf215546Sopenharmony_ci        f"{CONSOLE_LOG['RESET']}"
453bf215546Sopenharmony_ci    )
454bf215546Sopenharmony_ci
455bf215546Sopenharmony_ci
456bf215546Sopenharmony_cidef retriable_follow_job(proxy, job_definition) -> LAVAJob:
457bf215546Sopenharmony_ci    retry_count = NUMBER_OF_RETRIES_TIMEOUT_DETECTION
458bf215546Sopenharmony_ci
459bf215546Sopenharmony_ci    for attempt_no in range(1, retry_count + 2):
460bf215546Sopenharmony_ci        job = LAVAJob(proxy, job_definition)
461bf215546Sopenharmony_ci        try:
462bf215546Sopenharmony_ci            follow_job_execution(job)
463bf215546Sopenharmony_ci            return job
464bf215546Sopenharmony_ci        except MesaCIKnownIssueException as found_issue:
465bf215546Sopenharmony_ci            print_log(found_issue)
466bf215546Sopenharmony_ci            job.status = "canceled"
467bf215546Sopenharmony_ci        except MesaCIException as mesa_exception:
468bf215546Sopenharmony_ci            print_log(mesa_exception)
469bf215546Sopenharmony_ci            job.cancel()
470bf215546Sopenharmony_ci        except KeyboardInterrupt as e:
471bf215546Sopenharmony_ci            print_log("LAVA job submitter was interrupted. Cancelling the job.")
472bf215546Sopenharmony_ci            job.cancel()
473bf215546Sopenharmony_ci            raise e
474bf215546Sopenharmony_ci        finally:
475bf215546Sopenharmony_ci            print_log(
476bf215546Sopenharmony_ci                f"{CONSOLE_LOG['BOLD']}"
477bf215546Sopenharmony_ci                f"Finished executing LAVA job in the attempt #{attempt_no}"
478bf215546Sopenharmony_ci                f"{CONSOLE_LOG['RESET']}"
479bf215546Sopenharmony_ci            )
480bf215546Sopenharmony_ci            print_job_final_status(job)
481bf215546Sopenharmony_ci
482bf215546Sopenharmony_ci    raise MesaCIRetryError(
483bf215546Sopenharmony_ci        f"{CONSOLE_LOG['BOLD']}"
484bf215546Sopenharmony_ci        f"{CONSOLE_LOG['FG_RED']}"
485bf215546Sopenharmony_ci        "Job failed after it exceeded the number of "
486bf215546Sopenharmony_ci        f"{retry_count} retries."
487bf215546Sopenharmony_ci        f"{CONSOLE_LOG['RESET']}",
488bf215546Sopenharmony_ci        retry_count=retry_count,
489bf215546Sopenharmony_ci    )
490bf215546Sopenharmony_ci
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_cidef treat_mesa_job_name(args):
493bf215546Sopenharmony_ci    # Remove mesa job names with spaces, which breaks the lava-test-case command
494bf215546Sopenharmony_ci    args.mesa_job_name = args.mesa_job_name.split(" ")[0]
495bf215546Sopenharmony_ci
496bf215546Sopenharmony_ci
497bf215546Sopenharmony_cidef main(args):
498bf215546Sopenharmony_ci    proxy = setup_lava_proxy()
499bf215546Sopenharmony_ci
500bf215546Sopenharmony_ci    job_definition = generate_lava_yaml(args)
501bf215546Sopenharmony_ci
502bf215546Sopenharmony_ci    if args.dump_yaml:
503bf215546Sopenharmony_ci        with GitlabSection(
504bf215546Sopenharmony_ci            "yaml_dump",
505bf215546Sopenharmony_ci            "LAVA job definition (YAML)",
506bf215546Sopenharmony_ci            type=LogSectionType.LAVA_BOOT,
507bf215546Sopenharmony_ci            start_collapsed=True,
508bf215546Sopenharmony_ci        ):
509bf215546Sopenharmony_ci            print(hide_sensitive_data(job_definition))
510bf215546Sopenharmony_ci    job = LAVAJob(proxy, job_definition)
511bf215546Sopenharmony_ci
512bf215546Sopenharmony_ci    if errors := job.validate():
513bf215546Sopenharmony_ci        fatal_err(f"Error in LAVA job definition: {errors}")
514bf215546Sopenharmony_ci    print_log("LAVA job definition validated successfully")
515bf215546Sopenharmony_ci
516bf215546Sopenharmony_ci    if args.validate_only:
517bf215546Sopenharmony_ci        return
518bf215546Sopenharmony_ci
519bf215546Sopenharmony_ci    finished_job = retriable_follow_job(proxy, job_definition)
520bf215546Sopenharmony_ci    exit_code = 0 if finished_job.status == "pass" else 1
521bf215546Sopenharmony_ci    sys.exit(exit_code)
522bf215546Sopenharmony_ci
523bf215546Sopenharmony_ci
524bf215546Sopenharmony_cidef create_parser():
525bf215546Sopenharmony_ci    parser = argparse.ArgumentParser("LAVA job submitter")
526bf215546Sopenharmony_ci
527bf215546Sopenharmony_ci    parser.add_argument("--pipeline-info")
528bf215546Sopenharmony_ci    parser.add_argument("--rootfs-url-prefix")
529bf215546Sopenharmony_ci    parser.add_argument("--kernel-url-prefix")
530bf215546Sopenharmony_ci    parser.add_argument("--build-url")
531bf215546Sopenharmony_ci    parser.add_argument("--job-rootfs-overlay-url")
532bf215546Sopenharmony_ci    parser.add_argument("--job-timeout", type=int)
533bf215546Sopenharmony_ci    parser.add_argument("--first-stage-init")
534bf215546Sopenharmony_ci    parser.add_argument("--ci-project-dir")
535bf215546Sopenharmony_ci    parser.add_argument("--device-type")
536bf215546Sopenharmony_ci    parser.add_argument("--dtb", nargs='?', default="")
537bf215546Sopenharmony_ci    parser.add_argument("--kernel-image-name")
538bf215546Sopenharmony_ci    parser.add_argument("--kernel-image-type", nargs='?', default="")
539bf215546Sopenharmony_ci    parser.add_argument("--boot-method")
540bf215546Sopenharmony_ci    parser.add_argument("--lava-tags", nargs='?', default="")
541bf215546Sopenharmony_ci    parser.add_argument("--jwt-file", type=pathlib.Path)
542bf215546Sopenharmony_ci    parser.add_argument("--validate-only", action='store_true')
543bf215546Sopenharmony_ci    parser.add_argument("--dump-yaml", action='store_true')
544bf215546Sopenharmony_ci    parser.add_argument("--visibility-group")
545bf215546Sopenharmony_ci    parser.add_argument("--mesa-job-name")
546bf215546Sopenharmony_ci
547bf215546Sopenharmony_ci    return parser
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_ci
550bf215546Sopenharmony_ciif __name__ == "__main__":
551bf215546Sopenharmony_ci    # given that we proxy from DUT -> LAVA dispatcher -> LAVA primary -> us ->
552bf215546Sopenharmony_ci    # GitLab runner -> GitLab primary -> user, safe to say we don't need any
553bf215546Sopenharmony_ci    # more buffering
554bf215546Sopenharmony_ci    sys.stdout.reconfigure(line_buffering=True)
555bf215546Sopenharmony_ci    sys.stderr.reconfigure(line_buffering=True)
556bf215546Sopenharmony_ci
557bf215546Sopenharmony_ci    parser = create_parser()
558bf215546Sopenharmony_ci
559bf215546Sopenharmony_ci    parser.set_defaults(func=main)
560bf215546Sopenharmony_ci    args = parser.parse_args()
561bf215546Sopenharmony_ci    treat_mesa_job_name(args)
562bf215546Sopenharmony_ci    args.func(args)
563