Commit db82f709 authored by Oliver O'Halloran's avatar Oliver O'Halloran Committed by Michael Ellerman
Browse files

selftests/powerpc: Hoist helper code out of eeh-basic



Hoist some of the useful test environment checking and prep code into
eeh-functions.sh so they can be reused in other tests.

Signed-off-by: default avatarOliver O'Halloran <oohall@gmail.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201103044503.917128-1-oohall@gmail.com
parent 718aae91
Loading
Loading
Loading
Loading
+3 −36
Original line number Diff line number Diff line
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0-only

KSELFTESTS_SKIP=4

. ./eeh-functions.sh

if ! eeh_supported ; then
	echo "EEH not supported on this system, skipping"
	exit $KSELFTESTS_SKIP;
fi

if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
   [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
	echo "debugfs EEH testing files are missing. Is debugfs mounted?"
	exit $KSELFTESTS_SKIP;
fi
eeh_test_prep # NB: may exit

pre_lspci=`mktemp`
lspci > $pre_lspci

# Bump the max freeze count to something absurd so we don't
# trip over it while breaking things.
echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes

# record the devices that we break in here. Assuming everything
# goes to plan we should get them back once the recover process
# is finished.
@@ -30,34 +15,16 @@ devices=""

# Build up a list of candidate devices.
for dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do
	# skip bridges since we can't recover them (yet...)
	if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then
		echo "$dev, Skipped: bridge"
	if ! eeh_can_break $dev ; then
		continue;
	fi

	# Skip VFs for now since we don't have a reliable way
	# to break them.
	# Skip VFs for now since we don't have a reliable way to break them.
	if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then
		echo "$dev, Skipped: virtfn"
		continue;
	fi

	if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then
		echo "$dev, Skipped: ahci doesn't support recovery"
		continue
	fi

	# Don't inject errosr into an already-frozen PE. This happens with
	# PEs that contain multiple PCI devices (e.g. multi-function cards)
	# and injecting new errors during the recovery process will probably
	# result in the recovery failing and the device being marked as
	# failed.
	if ! pe_ok $dev ; then
		echo "$dev, Skipped: Bad initial PE state"
		continue;
	fi

	echo "$dev, Added"

	# Add to this list of device to check
+48 −0
Original line number Diff line number Diff line
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0-only

export KSELFTESTS_SKIP=4

pe_ok() {
	local dev="$1"
	local path="/sys/bus/pci/devices/$dev/eeh_pe_state"
@@ -39,6 +41,52 @@ eeh_supported() {
	grep -q 'EEH Subsystem is enabled' /proc/powerpc/eeh
}

eeh_test_prep() {
	if ! eeh_supported ; then
		echo "EEH not supported on this system, skipping"
		exit $KSELFTESTS_SKIP;
	fi

	if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
	   [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
		echo "debugfs EEH testing files are missing. Is debugfs mounted?"
		exit $KSELFTESTS_SKIP;
	fi

	# Bump the max freeze count to something absurd so we don't
	# trip over it while breaking things.
	echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes
}

eeh_can_break() {
	# skip bridges since we can't recover them (yet...)
	if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then
		echo "$dev, Skipped: bridge"
		return 1;
	fi

	# The ahci driver doesn't support error recovery. If the ahci device
	# happens to be hosting the root filesystem, and then we go and break
	# it the system will generally go down. We should probably fix that
	# at some point
	if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then
		echo "$dev, Skipped: ahci doesn't support recovery"
		return 1;
	fi

	# Don't inject errosr into an already-frozen PE. This happens with
	# PEs that contain multiple PCI devices (e.g. multi-function cards)
	# and injecting new errors during the recovery process will probably
	# result in the recovery failing and the device being marked as
	# failed.
	if ! pe_ok $dev ; then
		echo "$dev, Skipped: Bad initial PE state"
		return 1;
	fi

	return 0
}

eeh_one_dev() {
	local dev="$1"