18c2ecf20Sopenharmony_ci#!/bin/sh 28c2ecf20Sopenharmony_ci# SPDX-License-Identifier: GPL-2.0-only 38c2ecf20Sopenharmony_ci 48c2ecf20Sopenharmony_ciKSELFTESTS_SKIP=4 58c2ecf20Sopenharmony_ci 68c2ecf20Sopenharmony_ci. ./eeh-functions.sh 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ciif ! eeh_supported ; then 98c2ecf20Sopenharmony_ci echo "EEH not supported on this system, skipping" 108c2ecf20Sopenharmony_ci exit $KSELFTESTS_SKIP; 118c2ecf20Sopenharmony_cifi 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ciif [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ 148c2ecf20Sopenharmony_ci [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then 158c2ecf20Sopenharmony_ci echo "debugfs EEH testing files are missing. Is debugfs mounted?" 168c2ecf20Sopenharmony_ci exit $KSELFTESTS_SKIP; 178c2ecf20Sopenharmony_cifi 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_cipre_lspci=`mktemp` 208c2ecf20Sopenharmony_cilspci > $pre_lspci 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci# Bump the max freeze count to something absurd so we don't 238c2ecf20Sopenharmony_ci# trip over it while breaking things. 248c2ecf20Sopenharmony_ciecho 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci# record the devices that we break in here. Assuming everything 278c2ecf20Sopenharmony_ci# goes to plan we should get them back once the recover process 288c2ecf20Sopenharmony_ci# is finished. 298c2ecf20Sopenharmony_cidevices="" 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci# Build up a list of candidate devices. 328c2ecf20Sopenharmony_cifor dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do 338c2ecf20Sopenharmony_ci # skip bridges since we can't recover them (yet...) 348c2ecf20Sopenharmony_ci if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then 358c2ecf20Sopenharmony_ci echo "$dev, Skipped: bridge" 368c2ecf20Sopenharmony_ci continue; 378c2ecf20Sopenharmony_ci fi 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci # Skip VFs for now since we don't have a reliable way 408c2ecf20Sopenharmony_ci # to break them. 418c2ecf20Sopenharmony_ci if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then 428c2ecf20Sopenharmony_ci echo "$dev, Skipped: virtfn" 438c2ecf20Sopenharmony_ci continue; 448c2ecf20Sopenharmony_ci fi 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then 478c2ecf20Sopenharmony_ci echo "$dev, Skipped: ahci doesn't support recovery" 488c2ecf20Sopenharmony_ci continue 498c2ecf20Sopenharmony_ci fi 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci # Don't inject errosr into an already-frozen PE. This happens with 528c2ecf20Sopenharmony_ci # PEs that contain multiple PCI devices (e.g. multi-function cards) 538c2ecf20Sopenharmony_ci # and injecting new errors during the recovery process will probably 548c2ecf20Sopenharmony_ci # result in the recovery failing and the device being marked as 558c2ecf20Sopenharmony_ci # failed. 568c2ecf20Sopenharmony_ci if ! pe_ok $dev ; then 578c2ecf20Sopenharmony_ci echo "$dev, Skipped: Bad initial PE state" 588c2ecf20Sopenharmony_ci continue; 598c2ecf20Sopenharmony_ci fi 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci echo "$dev, Added" 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci # Add to this list of device to check 648c2ecf20Sopenharmony_ci devices="$devices $dev" 658c2ecf20Sopenharmony_cidone 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_cidev_count="$(echo $devices | wc -w)" 688c2ecf20Sopenharmony_ciecho "Found ${dev_count} breakable devices..." 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_cifailed=0 718c2ecf20Sopenharmony_cifor dev in $devices ; do 728c2ecf20Sopenharmony_ci echo "Breaking $dev..." 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci if ! pe_ok $dev ; then 758c2ecf20Sopenharmony_ci echo "Skipping $dev, Initial PE state is not ok" 768c2ecf20Sopenharmony_ci failed="$((failed + 1))" 778c2ecf20Sopenharmony_ci continue; 788c2ecf20Sopenharmony_ci fi 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci if ! eeh_one_dev $dev ; then 818c2ecf20Sopenharmony_ci failed="$((failed + 1))" 828c2ecf20Sopenharmony_ci fi 838c2ecf20Sopenharmony_cidone 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_ciecho "$failed devices failed to recover ($dev_count tested)" 868c2ecf20Sopenharmony_cilspci | diff -u $pre_lspci - 878c2ecf20Sopenharmony_cirm -f $pre_lspci 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_citest "$failed" -eq 0 908c2ecf20Sopenharmony_ciexit $? 91