162306a36Sopenharmony_ci#!/bin/bash
262306a36Sopenharmony_ci# SPDX-License-Identifier: GPL-2.0+
362306a36Sopenharmony_ci#
462306a36Sopenharmony_ci# Run a series of tests on remote systems under KVM.
562306a36Sopenharmony_ci#
662306a36Sopenharmony_ci# Usage: kvm-remote.sh "systems" [ <kvm.sh args> ]
762306a36Sopenharmony_ci#	 kvm-remote.sh "systems" /path/to/old/run [ <kvm-again.sh args> ]
862306a36Sopenharmony_ci#
962306a36Sopenharmony_ci# Copyright (C) 2021 Facebook, Inc.
1062306a36Sopenharmony_ci#
1162306a36Sopenharmony_ci# Authors: Paul E. McKenney <paulmck@kernel.org>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ciscriptname=$0
1462306a36Sopenharmony_ciargs="$*"
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ciif ! test -d tools/testing/selftests/rcutorture/bin
1762306a36Sopenharmony_cithen
1862306a36Sopenharmony_ci	echo $scriptname must be run from top-level directory of kernel source tree.
1962306a36Sopenharmony_ci	exit 1
2062306a36Sopenharmony_cifi
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ciRCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
2362306a36Sopenharmony_ciPATH=${RCUTORTURE}/bin:$PATH; export PATH
2462306a36Sopenharmony_ci. functions.sh
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_cistarttime="`get_starttime`"
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_cisystems="$1"
2962306a36Sopenharmony_ciif test -z "$systems"
3062306a36Sopenharmony_cithen
3162306a36Sopenharmony_ci	echo $scriptname: Empty list of systems will go nowhere good, giving up.
3262306a36Sopenharmony_ci	exit 1
3362306a36Sopenharmony_cifi
3462306a36Sopenharmony_cishift
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci# Pathnames:
3762306a36Sopenharmony_ci# T:	  /tmp/kvm-remote.sh.NNNNNN where "NNNNNN" is set by mktemp
3862306a36Sopenharmony_ci# resdir: /tmp/kvm-remote.sh.NNNNNN/res
3962306a36Sopenharmony_ci# rundir: /tmp/kvm-remote.sh.NNNNNN/res/$ds ("-remote" suffix)
4062306a36Sopenharmony_ci# oldrun: `pwd`/tools/testing/.../res/$otherds
4162306a36Sopenharmony_ci#
4262306a36Sopenharmony_ci# Pathname segments:
4362306a36Sopenharmony_ci# TD:	  kvm-remote.sh.NNNNNN
4462306a36Sopenharmony_ci# ds:	  yyyy.mm.dd-hh.mm.ss-remote
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ciT="`mktemp -d ${TMPDIR-/tmp}/kvm-remote.sh.XXXXXX`"
4762306a36Sopenharmony_citrap 'rm -rf $T' 0
4862306a36Sopenharmony_ciTD="`basename "$T"`"
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ciresdir="$T/res"
5162306a36Sopenharmony_cids=`date +%Y.%m.%d-%H.%M.%S`-remote
5262306a36Sopenharmony_cirundir=$resdir/$ds
5362306a36Sopenharmony_ciecho Results directory: $rundir
5462306a36Sopenharmony_ciecho $scriptname $args
5562306a36Sopenharmony_ciif echo $1 | grep -q '^--'
5662306a36Sopenharmony_cithen
5762306a36Sopenharmony_ci	# Fresh build.  Create a datestamp unless the caller supplied one.
5862306a36Sopenharmony_ci	datestamp="`echo "$@" | awk -v ds="$ds" '{
5962306a36Sopenharmony_ci		for (i = 1; i < NF; i++) {
6062306a36Sopenharmony_ci			if ($i == "--datestamp") {
6162306a36Sopenharmony_ci				ds = "";
6262306a36Sopenharmony_ci				break;
6362306a36Sopenharmony_ci			}
6462306a36Sopenharmony_ci		}
6562306a36Sopenharmony_ci		if (ds != "")
6662306a36Sopenharmony_ci			print "--datestamp " ds;
6762306a36Sopenharmony_ci	}'`"
6862306a36Sopenharmony_ci	kvm.sh --remote "$@" $datestamp --buildonly > $T/kvm.sh.out 2>&1
6962306a36Sopenharmony_ci	ret=$?
7062306a36Sopenharmony_ci	if test "$ret" -ne 0
7162306a36Sopenharmony_ci	then
7262306a36Sopenharmony_ci		echo $scriptname: kvm.sh failed exit code $?
7362306a36Sopenharmony_ci		cat $T/kvm.sh.out
7462306a36Sopenharmony_ci		exit 2
7562306a36Sopenharmony_ci	fi
7662306a36Sopenharmony_ci	oldrun="`grep -m 1 "^Results directory: " $T/kvm.sh.out | awk '{ print $3 }'`"
7762306a36Sopenharmony_ci	touch "$oldrun/remote-log"
7862306a36Sopenharmony_ci	echo $scriptname $args >> "$oldrun/remote-log"
7962306a36Sopenharmony_ci	echo | tee -a "$oldrun/remote-log"
8062306a36Sopenharmony_ci	echo " ----" kvm.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
8162306a36Sopenharmony_ci	cat $T/kvm.sh.out | tee -a "$oldrun/remote-log"
8262306a36Sopenharmony_ci	# We are going to run this, so remove the buildonly files.
8362306a36Sopenharmony_ci	rm -f "$oldrun"/*/buildonly
8462306a36Sopenharmony_ci	kvm-again.sh $oldrun --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
8562306a36Sopenharmony_ci	ret=$?
8662306a36Sopenharmony_ci	if test "$ret" -ne 0
8762306a36Sopenharmony_ci	then
8862306a36Sopenharmony_ci		echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
8962306a36Sopenharmony_ci		cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
9062306a36Sopenharmony_ci		exit 2
9162306a36Sopenharmony_ci	fi
9262306a36Sopenharmony_cielse
9362306a36Sopenharmony_ci	# Re-use old run.
9462306a36Sopenharmony_ci	oldrun="$1"
9562306a36Sopenharmony_ci	if ! echo $oldrun | grep -q '^/'
9662306a36Sopenharmony_ci	then
9762306a36Sopenharmony_ci		oldrun="`pwd`/$oldrun"
9862306a36Sopenharmony_ci	fi
9962306a36Sopenharmony_ci	shift
10062306a36Sopenharmony_ci	touch "$oldrun/remote-log"
10162306a36Sopenharmony_ci	echo $scriptname $args >> "$oldrun/remote-log"
10262306a36Sopenharmony_ci	kvm-again.sh "$oldrun" "$@" --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
10362306a36Sopenharmony_ci	ret=$?
10462306a36Sopenharmony_ci	if test "$ret" -ne 0
10562306a36Sopenharmony_ci	then
10662306a36Sopenharmony_ci		echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
10762306a36Sopenharmony_ci		cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
10862306a36Sopenharmony_ci		exit 2
10962306a36Sopenharmony_ci	fi
11062306a36Sopenharmony_ci	cp -a "$rundir" "$RCUTORTURE/res/"
11162306a36Sopenharmony_ci	oldrun="$RCUTORTURE/res/$ds"
11262306a36Sopenharmony_cifi
11362306a36Sopenharmony_ciecho | tee -a "$oldrun/remote-log"
11462306a36Sopenharmony_ciecho " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
11562306a36Sopenharmony_cicat $T/kvm-again.sh.out
11662306a36Sopenharmony_ciecho | tee -a "$oldrun/remote-log"
11762306a36Sopenharmony_ciecho Remote run directory: $rundir | tee -a "$oldrun/remote-log"
11862306a36Sopenharmony_ciecho Local build-side run directory: $oldrun | tee -a "$oldrun/remote-log"
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci# Create the kvm-remote-N.sh scripts in the bin directory.
12162306a36Sopenharmony_ciawk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" '
12262306a36Sopenharmony_ci{
12362306a36Sopenharmony_ci	n = $1;
12462306a36Sopenharmony_ci	sub(/\./, "", n);
12562306a36Sopenharmony_ci	fn = dest "/kvm-remote-" n ".sh"
12662306a36Sopenharmony_ci	print "kvm-remote-noreap.sh " rundir " &" > fn;
12762306a36Sopenharmony_ci	scenarios = "";
12862306a36Sopenharmony_ci	for (i = 2; i <= NF; i++)
12962306a36Sopenharmony_ci		scenarios = scenarios " " $i;
13062306a36Sopenharmony_ci	print "kvm-test-1-run-batch.sh" scenarios >> fn;
13162306a36Sopenharmony_ci	print "sync" >> fn;
13262306a36Sopenharmony_ci	print "rm " rundir "/remote.run" >> fn;
13362306a36Sopenharmony_ci}'
13462306a36Sopenharmony_cichmod +x $T/bin/kvm-remote-*.sh
13562306a36Sopenharmony_ci( cd "`dirname $T`"; tar -chzf $T/binres.tgz "$TD/bin" "$TD/res" )
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci# Check first to avoid the need for cleanup for system-name typos
13862306a36Sopenharmony_cifor i in $systems
13962306a36Sopenharmony_cido
14062306a36Sopenharmony_ci	ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN > $T/ssh.stdout 2> $T/ssh.stderr
14162306a36Sopenharmony_ci	ret=$?
14262306a36Sopenharmony_ci	if test "$ret" -ne 0
14362306a36Sopenharmony_ci	then
14462306a36Sopenharmony_ci		echo "System $i unreachable ($ret), giving up." | tee -a "$oldrun/remote-log"
14562306a36Sopenharmony_ci		echo ' --- ssh stdout: vvv' | tee -a "$oldrun/remote-log"
14662306a36Sopenharmony_ci		cat $T/ssh.stdout | tee -a "$oldrun/remote-log"
14762306a36Sopenharmony_ci		echo ' --- ssh stdout: ^^^' | tee -a "$oldrun/remote-log"
14862306a36Sopenharmony_ci		echo ' --- ssh stderr: vvv' | tee -a "$oldrun/remote-log"
14962306a36Sopenharmony_ci		cat $T/ssh.stderr | tee -a "$oldrun/remote-log"
15062306a36Sopenharmony_ci		echo ' --- ssh stderr: ^^^' | tee -a "$oldrun/remote-log"
15162306a36Sopenharmony_ci		exit 4
15262306a36Sopenharmony_ci	fi
15362306a36Sopenharmony_ci	echo $i: `cat $T/ssh.stdout` CPUs " " `date` | tee -a "$oldrun/remote-log"
15462306a36Sopenharmony_cidone
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci# Download and expand the tarball on all systems.
15762306a36Sopenharmony_ciecho Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log"
15862306a36Sopenharmony_cifor i in $systems
15962306a36Sopenharmony_cido
16062306a36Sopenharmony_ci	echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
16162306a36Sopenharmony_ci	cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
16262306a36Sopenharmony_ci	ret=$?
16362306a36Sopenharmony_ci	tries=0
16462306a36Sopenharmony_ci	while test "$ret" -ne 0
16562306a36Sopenharmony_ci	do
16662306a36Sopenharmony_ci		echo Unable to download $T/binres.tgz to system $i, waiting and then retrying.  $tries prior retries. | tee -a "$oldrun/remote-log"
16762306a36Sopenharmony_ci		sleep 60
16862306a36Sopenharmony_ci		cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
16962306a36Sopenharmony_ci		ret=$?
17062306a36Sopenharmony_ci		if test "$ret" -ne 0
17162306a36Sopenharmony_ci		then
17262306a36Sopenharmony_ci			if test "$tries" > 5
17362306a36Sopenharmony_ci			then
17462306a36Sopenharmony_ci				echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
17562306a36Sopenharmony_ci				exit 10
17662306a36Sopenharmony_ci			fi
17762306a36Sopenharmony_ci		fi
17862306a36Sopenharmony_ci		tries=$((tries+1))
17962306a36Sopenharmony_ci	done
18062306a36Sopenharmony_cidone
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci# Function to check for presence of a file on the specified system.
18362306a36Sopenharmony_ci# Complain if the system cannot be reached, and retry after a wait.
18462306a36Sopenharmony_ci# Currently just waits forever if a machine disappears.
18562306a36Sopenharmony_ci#
18662306a36Sopenharmony_ci# Usage: checkremotefile system pathname
18762306a36Sopenharmony_cicheckremotefile () {
18862306a36Sopenharmony_ci	local ret
18962306a36Sopenharmony_ci	local sleeptime=60
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	while :
19262306a36Sopenharmony_ci	do
19362306a36Sopenharmony_ci		ssh -o BatchMode=yes $1 "test -f \"$2\""
19462306a36Sopenharmony_ci		ret=$?
19562306a36Sopenharmony_ci		if test "$ret" -eq 255
19662306a36Sopenharmony_ci		then
19762306a36Sopenharmony_ci			echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
19862306a36Sopenharmony_ci		elif test "$ret" -eq 0
19962306a36Sopenharmony_ci		then
20062306a36Sopenharmony_ci			return 0
20162306a36Sopenharmony_ci		elif test "$ret" -eq 1
20262306a36Sopenharmony_ci		then
20362306a36Sopenharmony_ci			echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\" | tee -a "$oldrun/remote-log"
20462306a36Sopenharmony_ci			return 1
20562306a36Sopenharmony_ci		else
20662306a36Sopenharmony_ci			echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
20762306a36Sopenharmony_ci			return $ret
20862306a36Sopenharmony_ci		fi
20962306a36Sopenharmony_ci		sleep $sleeptime
21062306a36Sopenharmony_ci	done
21162306a36Sopenharmony_ci}
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci# Function to start batches on idle remote $systems
21462306a36Sopenharmony_ci#
21562306a36Sopenharmony_ci# Usage: startbatches curbatch nbatches
21662306a36Sopenharmony_ci#
21762306a36Sopenharmony_ci# Batches are numbered starting at 1.  Returns the next batch to start.
21862306a36Sopenharmony_ci# Be careful to redirect all debug output to FD 2 (stderr).
21962306a36Sopenharmony_cistartbatches () {
22062306a36Sopenharmony_ci	local curbatch="$1"
22162306a36Sopenharmony_ci	local nbatches="$2"
22262306a36Sopenharmony_ci	local ret
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	# Each pass through the following loop examines one system.
22562306a36Sopenharmony_ci	for i in $systems
22662306a36Sopenharmony_ci	do
22762306a36Sopenharmony_ci		if test "$curbatch" -gt "$nbatches"
22862306a36Sopenharmony_ci		then
22962306a36Sopenharmony_ci			echo $((nbatches + 1))
23062306a36Sopenharmony_ci			return 0
23162306a36Sopenharmony_ci		fi
23262306a36Sopenharmony_ci		if checkremotefile "$i" "$resdir/$ds/remote.run" 1>&2
23362306a36Sopenharmony_ci		then
23462306a36Sopenharmony_ci			continue # System still running last test, skip.
23562306a36Sopenharmony_ci		fi
23662306a36Sopenharmony_ci		ssh -o BatchMode=yes "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
23762306a36Sopenharmony_ci		ret=$?
23862306a36Sopenharmony_ci		if test "$ret" -ne 0
23962306a36Sopenharmony_ci		then
24062306a36Sopenharmony_ci			echo ssh $i failed: exitcode $ret 1>&2
24162306a36Sopenharmony_ci			exit 11
24262306a36Sopenharmony_ci		fi
24362306a36Sopenharmony_ci		echo " ----" System $i Batch `head -n $curbatch < "$rundir"/scenarios | tail -1` `date` 1>&2
24462306a36Sopenharmony_ci		curbatch=$((curbatch + 1))
24562306a36Sopenharmony_ci	done
24662306a36Sopenharmony_ci	echo $curbatch
24762306a36Sopenharmony_ci}
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci# Launch all the scenarios.
25062306a36Sopenharmony_cinbatches="`wc -l "$rundir"/scenarios | awk '{ print $1 }'`"
25162306a36Sopenharmony_cicurbatch=1
25262306a36Sopenharmony_ciwhile test "$curbatch" -le "$nbatches"
25362306a36Sopenharmony_cido
25462306a36Sopenharmony_ci	startbatches $curbatch $nbatches > $T/curbatch 2> $T/startbatches.stderr
25562306a36Sopenharmony_ci	curbatch="`cat $T/curbatch`"
25662306a36Sopenharmony_ci	if test -s "$T/startbatches.stderr"
25762306a36Sopenharmony_ci	then
25862306a36Sopenharmony_ci		cat "$T/startbatches.stderr" | tee -a "$oldrun/remote-log"
25962306a36Sopenharmony_ci	fi
26062306a36Sopenharmony_ci	if test "$curbatch" -le "$nbatches"
26162306a36Sopenharmony_ci	then
26262306a36Sopenharmony_ci		sleep 30
26362306a36Sopenharmony_ci	fi
26462306a36Sopenharmony_cidone
26562306a36Sopenharmony_ciecho All batches started. `date` | tee -a "$oldrun/remote-log"
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci# Wait for all remaining scenarios to complete and collect results.
26862306a36Sopenharmony_cifor i in $systems
26962306a36Sopenharmony_cido
27062306a36Sopenharmony_ci	echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log"
27162306a36Sopenharmony_ci	while checkremotefile "$i" "$resdir/$ds/remote.run"
27262306a36Sopenharmony_ci	do
27362306a36Sopenharmony_ci		sleep 30
27462306a36Sopenharmony_ci	done
27562306a36Sopenharmony_ci	echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
27662306a36Sopenharmony_ci	( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
27762306a36Sopenharmony_cidone
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
28062306a36Sopenharmony_ciexit "`cat $T/exitcode`"
281