18c2ecf20Sopenharmony_ci#!/bin/sh
28c2ecf20Sopenharmony_ci# SPDX-License-Identifier: GPL-2.0-only
38c2ecf20Sopenharmony_ci
48c2ecf20Sopenharmony_cipe_ok() {
58c2ecf20Sopenharmony_ci	local dev="$1"
68c2ecf20Sopenharmony_ci	local path="/sys/bus/pci/devices/$dev/eeh_pe_state"
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci	# if a driver doesn't support the error handling callbacks then the
98c2ecf20Sopenharmony_ci	# device is recovered by removing and re-probing it. This causes the
108c2ecf20Sopenharmony_ci	# sysfs directory to disappear so read the PE state once and squash
118c2ecf20Sopenharmony_ci	# any potential error messages
128c2ecf20Sopenharmony_ci	local eeh_state="$(cat $path 2>/dev/null)"
138c2ecf20Sopenharmony_ci	if [ -z "$eeh_state" ]; then
148c2ecf20Sopenharmony_ci		return 1;
158c2ecf20Sopenharmony_ci	fi
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_ci	local fw_state="$(echo $eeh_state | cut -d' ' -f1)"
188c2ecf20Sopenharmony_ci	local sw_state="$(echo $eeh_state | cut -d' ' -f2)"
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci	# If EEH_PE_ISOLATED or EEH_PE_RECOVERING are set then the PE is in an
218c2ecf20Sopenharmony_ci	# error state or being recovered. Either way, not ok.
228c2ecf20Sopenharmony_ci	if [ "$((sw_state & 0x3))" -ne 0 ] ; then
238c2ecf20Sopenharmony_ci		return 1
248c2ecf20Sopenharmony_ci	fi
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci	# A functioning PE should have the EEH_STATE_MMIO_ACTIVE and
278c2ecf20Sopenharmony_ci	# EEH_STATE_DMA_ACTIVE flags set. For some goddamn stupid reason
288c2ecf20Sopenharmony_ci	# the platform backends set these when the PE is in reset. The
298c2ecf20Sopenharmony_ci	# RECOVERING check above should stop any false positives though.
308c2ecf20Sopenharmony_ci	if [ "$((fw_state & 0x18))" -ne "$((0x18))" ] ; then
318c2ecf20Sopenharmony_ci		return 1
328c2ecf20Sopenharmony_ci	fi
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci	return 0;
358c2ecf20Sopenharmony_ci}
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_cieeh_supported() {
388c2ecf20Sopenharmony_ci	test -e /proc/powerpc/eeh && \
398c2ecf20Sopenharmony_ci	grep -q 'EEH Subsystem is enabled' /proc/powerpc/eeh
408c2ecf20Sopenharmony_ci}
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_cieeh_one_dev() {
438c2ecf20Sopenharmony_ci	local dev="$1"
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci	# Using this function from the command line is sometimes useful for
468c2ecf20Sopenharmony_ci	# testing so check that the argument is a well-formed sysfs device
478c2ecf20Sopenharmony_ci	# name.
488c2ecf20Sopenharmony_ci	if ! test -e /sys/bus/pci/devices/$dev/ ; then
498c2ecf20Sopenharmony_ci		echo "Error: '$dev' must be a sysfs device name (DDDD:BB:DD.F)"
508c2ecf20Sopenharmony_ci		return 1;
518c2ecf20Sopenharmony_ci	fi
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci	# Break it
548c2ecf20Sopenharmony_ci	echo $dev >/sys/kernel/debug/powerpc/eeh_dev_break
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci	# Force an EEH device check. If the kernel has already
578c2ecf20Sopenharmony_ci	# noticed the EEH (due to a driver poll or whatever), this
588c2ecf20Sopenharmony_ci	# is a no-op.
598c2ecf20Sopenharmony_ci	echo $dev >/sys/kernel/debug/powerpc/eeh_dev_check
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	# Default to a 60s timeout when waiting for a device to recover. This
628c2ecf20Sopenharmony_ci	# is an arbitrary default which can be overridden by setting the
638c2ecf20Sopenharmony_ci	# EEH_MAX_WAIT environmental variable when required.
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci	# The current record holder for longest recovery time is:
668c2ecf20Sopenharmony_ci	#  "Adaptec Series 8 12G SAS/PCIe 3" at 39 seconds
678c2ecf20Sopenharmony_ci	max_wait=${EEH_MAX_WAIT:=60}
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci	for i in `seq 0 ${max_wait}` ; do
708c2ecf20Sopenharmony_ci		if pe_ok $dev ; then
718c2ecf20Sopenharmony_ci			break;
728c2ecf20Sopenharmony_ci		fi
738c2ecf20Sopenharmony_ci		echo "$dev, waited $i/${max_wait}"
748c2ecf20Sopenharmony_ci		sleep 1
758c2ecf20Sopenharmony_ci	done
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci	if ! pe_ok $dev ; then
788c2ecf20Sopenharmony_ci		echo "$dev, Failed to recover!"
798c2ecf20Sopenharmony_ci		return 1;
808c2ecf20Sopenharmony_ci	fi
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	echo "$dev, Recovered after $i seconds"
838c2ecf20Sopenharmony_ci	return 0;
848c2ecf20Sopenharmony_ci}
858c2ecf20Sopenharmony_ci
86