18c2ecf20Sopenharmony_ci#!/bin/sh
28c2ecf20Sopenharmony_ci# SPDX-License-Identifier: GPL-2.0-only
38c2ecf20Sopenharmony_ci
48c2ecf20Sopenharmony_ciKSELFTESTS_SKIP=4
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci. ./eeh-functions.sh
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ciif ! eeh_supported ; then
98c2ecf20Sopenharmony_ci	echo "EEH not supported on this system, skipping"
108c2ecf20Sopenharmony_ci	exit $KSELFTESTS_SKIP;
118c2ecf20Sopenharmony_cifi
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ciif [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
148c2ecf20Sopenharmony_ci   [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
158c2ecf20Sopenharmony_ci	echo "debugfs EEH testing files are missing. Is debugfs mounted?"
168c2ecf20Sopenharmony_ci	exit $KSELFTESTS_SKIP;
178c2ecf20Sopenharmony_cifi
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_cipre_lspci=`mktemp`
208c2ecf20Sopenharmony_cilspci > $pre_lspci
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci# Bump the max freeze count to something absurd so we don't
238c2ecf20Sopenharmony_ci# trip over it while breaking things.
248c2ecf20Sopenharmony_ciecho 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci# record the devices that we break in here. Assuming everything
278c2ecf20Sopenharmony_ci# goes to plan we should get them back once the recover process
288c2ecf20Sopenharmony_ci# is finished.
298c2ecf20Sopenharmony_cidevices=""
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci# Build up a list of candidate devices.
328c2ecf20Sopenharmony_cifor dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do
338c2ecf20Sopenharmony_ci	# skip bridges since we can't recover them (yet...)
348c2ecf20Sopenharmony_ci	if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then
358c2ecf20Sopenharmony_ci		echo "$dev, Skipped: bridge"
368c2ecf20Sopenharmony_ci		continue;
378c2ecf20Sopenharmony_ci	fi
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci	# Skip VFs for now since we don't have a reliable way
408c2ecf20Sopenharmony_ci	# to break them.
418c2ecf20Sopenharmony_ci	if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then
428c2ecf20Sopenharmony_ci		echo "$dev, Skipped: virtfn"
438c2ecf20Sopenharmony_ci		continue;
448c2ecf20Sopenharmony_ci	fi
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci	if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then
478c2ecf20Sopenharmony_ci		echo "$dev, Skipped: ahci doesn't support recovery"
488c2ecf20Sopenharmony_ci		continue
498c2ecf20Sopenharmony_ci	fi
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci	# Don't inject errosr into an already-frozen PE. This happens with
528c2ecf20Sopenharmony_ci	# PEs that contain multiple PCI devices (e.g. multi-function cards)
538c2ecf20Sopenharmony_ci	# and injecting new errors during the recovery process will probably
548c2ecf20Sopenharmony_ci	# result in the recovery failing and the device being marked as
558c2ecf20Sopenharmony_ci	# failed.
568c2ecf20Sopenharmony_ci	if ! pe_ok $dev ; then
578c2ecf20Sopenharmony_ci		echo "$dev, Skipped: Bad initial PE state"
588c2ecf20Sopenharmony_ci		continue;
598c2ecf20Sopenharmony_ci	fi
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	echo "$dev, Added"
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci	# Add to this list of device to check
648c2ecf20Sopenharmony_ci	devices="$devices $dev"
658c2ecf20Sopenharmony_cidone
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_cidev_count="$(echo $devices | wc -w)"
688c2ecf20Sopenharmony_ciecho "Found ${dev_count} breakable devices..."
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_cifailed=0
718c2ecf20Sopenharmony_cifor dev in $devices ; do
728c2ecf20Sopenharmony_ci	echo "Breaking $dev..."
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci	if ! pe_ok $dev ; then
758c2ecf20Sopenharmony_ci		echo "Skipping $dev, Initial PE state is not ok"
768c2ecf20Sopenharmony_ci		failed="$((failed + 1))"
778c2ecf20Sopenharmony_ci		continue;
788c2ecf20Sopenharmony_ci	fi
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	if ! eeh_one_dev $dev ; then
818c2ecf20Sopenharmony_ci		failed="$((failed + 1))"
828c2ecf20Sopenharmony_ci	fi
838c2ecf20Sopenharmony_cidone
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ciecho "$failed devices failed to recover ($dev_count tested)"
868c2ecf20Sopenharmony_cilspci | diff -u $pre_lspci -
878c2ecf20Sopenharmony_cirm -f $pre_lspci
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_citest "$failed" -eq 0
908c2ecf20Sopenharmony_ciexit $?
91