Commit 91fa1277 authored by Alex Williamson's avatar Alex Williamson Committed by Bjorn Helgaas
Browse files

PCI: Expose PCIe Resizable BAR support via sysfs

Add a simple sysfs interface to Resizable BAR support, largely for the
purposes of assigning such devices to a VM through VFIO.  Resizable BARs
present a difficult feature to expose to a VM through emulation, as
resizing a BAR is done on the host.  It can fail, and often does, but we
have no means via emulation of a PCIe REBAR capability to handle the error
cases.

A vfio-pci specific ioctl interface is also cumbersome as there are often
multiple devices within the same bridge aperture and handling them is a
challenge.  In the interface proposed here, expanding a BAR potentially
requires such devices to be soft-removed during the resize operation and
rescanned after, in order for all the necessary resources to be released.
A pci-sysfs interface is also more universal than a vfio specific
interface.

Please see the ABI documentation update for usage.

Link: https://lore.kernel.org/r/166336088796.3597940.14973499936692558556.stgit@omen


Signed-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
Signed-off-by: default avatarBjorn Helgaas <bhelgaas@google.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Cc: Krzysztof Wilczyński <kw@linux.com>
parent 568035b0
Loading
Loading
Loading
Loading
+33 −0
Original line number Diff line number Diff line
@@ -457,3 +457,36 @@ Description:

		The file is writable if the PF is bound to a driver that
		implements ->sriov_set_msix_vec_count().

What:		/sys/bus/pci/devices/.../resourceN_resize
Date:		September 2022
Contact:	Alex Williamson <alex.williamson@redhat.com>
Description:
		These files provide an interface to PCIe Resizable BAR support.
		A file is created for each BAR resource (N) supported by the
		PCIe Resizable BAR extended capability of the device.  Reading
		each file exposes the bitmap of available resource sizes:

		# cat resource1_resize
		00000000000001c0

		The bitmap represents supported resource sizes for the BAR,
		where bit0 = 1MB, bit1 = 2MB, bit2 = 4MB, etc.  In the above
		example the device supports 64MB, 128MB, and 256MB BAR sizes.

		When writing the file, the user provides the bit position of
		the desired resource size, for example:

		# echo 7 > resource1_resize

		This indicates to set the size value corresponding to bit 7,
		128MB.  The resulting size is 2 ^ (bit# + 20).  This definition
		matches the PCIe specification of this capability.

		In order to make use of resource resizing, all PCI drivers must
		be unbound from the device and peer devices under the same
		parent bridge may need to be soft removed.  In the case of
		VGA devices, writing a resize value will remove low level
		console drivers from the device.  Raw users of pci-sysfs
		resourceN attributes must be terminated prior to resizing.
		Success of the resizing operation is not guaranteed.
+108 −0
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@
#include <linux/pm_runtime.h>
#include <linux/msi.h>
#include <linux/of.h>
#include <linux/aperture.h>
#include "pci.h"

static int sysfs_initialized;	/* = 0 */
@@ -1373,6 +1374,112 @@ static const struct attribute_group pci_dev_reset_attr_group = {
	.is_visible = pci_dev_reset_attr_is_visible,
};

#define pci_dev_resource_resize_attr(n)					\
static ssize_t resource##n##_resize_show(struct device *dev,		\
					 struct device_attribute *attr,	\
					 char * buf)			\
{									\
	struct pci_dev *pdev = to_pci_dev(dev);				\
	ssize_t ret;							\
									\
	pci_config_pm_runtime_get(pdev);				\
									\
	ret = sysfs_emit(buf, "%016llx\n",				\
			 (u64)pci_rebar_get_possible_sizes(pdev, n));	\
									\
	pci_config_pm_runtime_put(pdev);				\
									\
	return ret;							\
}									\
									\
static ssize_t resource##n##_resize_store(struct device *dev,		\
					  struct device_attribute *attr,\
					  const char *buf, size_t count)\
{									\
	struct pci_dev *pdev = to_pci_dev(dev);				\
	unsigned long size, flags;					\
	int ret, i;							\
	u16 cmd;							\
									\
	if (kstrtoul(buf, 0, &size) < 0)				\
		return -EINVAL;						\
									\
	device_lock(dev);						\
	if (dev->driver) {						\
		ret = -EBUSY;						\
		goto unlock;						\
	}								\
									\
	pci_config_pm_runtime_get(pdev);				\
									\
	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) {		\
		ret = aperture_remove_conflicting_pci_devices(pdev,	\
						"resourceN_resize");	\
		if (ret)						\
			goto pm_put;					\
	}								\
									\
	pci_read_config_word(pdev, PCI_COMMAND, &cmd);			\
	pci_write_config_word(pdev, PCI_COMMAND,			\
			      cmd & ~PCI_COMMAND_MEMORY);		\
									\
	flags = pci_resource_flags(pdev, n);				\
									\
	pci_remove_resource_files(pdev);				\
									\
	for (i = 0; i < PCI_STD_NUM_BARS; i++) {			\
		if (pci_resource_len(pdev, i) &&			\
		    pci_resource_flags(pdev, i) == flags)		\
			pci_release_resource(pdev, i);			\
	}								\
									\
	ret = pci_resize_resource(pdev, n, size);			\
									\
	pci_assign_unassigned_bus_resources(pdev->bus);			\
									\
	if (pci_create_resource_files(pdev))				\
		pci_warn(pdev, "Failed to recreate resource files after BAR resizing\n");\
									\
	pci_write_config_word(pdev, PCI_COMMAND, cmd);			\
pm_put:									\
	pci_config_pm_runtime_put(pdev);				\
unlock:									\
	device_unlock(dev);						\
									\
	return ret ? ret : count;					\
}									\
static DEVICE_ATTR_RW(resource##n##_resize)

pci_dev_resource_resize_attr(0);
pci_dev_resource_resize_attr(1);
pci_dev_resource_resize_attr(2);
pci_dev_resource_resize_attr(3);
pci_dev_resource_resize_attr(4);
pci_dev_resource_resize_attr(5);

static struct attribute *resource_resize_attrs[] = {
	&dev_attr_resource0_resize.attr,
	&dev_attr_resource1_resize.attr,
	&dev_attr_resource2_resize.attr,
	&dev_attr_resource3_resize.attr,
	&dev_attr_resource4_resize.attr,
	&dev_attr_resource5_resize.attr,
	NULL,
};

static umode_t resource_resize_is_visible(struct kobject *kobj,
					  struct attribute *a, int n)
{
	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));

	return pci_rebar_get_current_size(pdev, n) < 0 ? 0 : a->mode;
}

static const struct attribute_group pci_dev_resource_resize_group = {
	.attrs = resource_resize_attrs,
	.is_visible = resource_resize_is_visible,
};

int __must_check pci_create_sysfs_dev_files(struct pci_dev *pdev)
{
	if (!sysfs_initialized)
@@ -1494,6 +1601,7 @@ const struct attribute_group *pci_dev_groups[] = {
#ifdef CONFIG_ACPI
	&pci_dev_acpi_attr_group,
#endif
	&pci_dev_resource_resize_group,
	NULL,
};