Commit a9409044 authored by Alexey Kardashevskiy's avatar Alexey Kardashevskiy Committed by Michael Ellerman
Browse files

powerpc/iommu: Add iommu_ops to report capabilities and allow blocking domains

Up until now PPC64 managed to avoid using iommu_ops. The VFIO driver
uses a SPAPR TCE sub-driver and all iommu_ops uses were kept in the
Type1 VFIO driver. Recent development added 2 uses of iommu_ops to the
generic VFIO which broke POWER:
  - a coherency capability check;
  - blocking IOMMU domain - iommu_group_dma_owner_claimed()/...

This adds a simple iommu_ops which reports support for cache coherency
and provides a basic support for blocking domains. No other domain types
are implemented so the default domain is NULL.

Since now iommu_ops controls the group ownership, this takes it out of
VFIO.

This adds an IOMMU device into a pci_controller (=PHB) and registers it
in the IOMMU subsystem, iommu_ops is registered at this point. This
setup is done in postcore_initcall_sync.

This replaces iommu_group_add_device() with iommu_probe_device() as the
former misses necessary steps in connecting PCI devices to IOMMU
devices. This adds a comment about why explicit iommu_probe_device() is
still needed.

The previous discussion is here:
  https://lore.kernel.org/r/20220707135552.3688927-1-aik@ozlabs.ru/
  https://lore.kernel.org/r/20220701061751.1955857-1-aik@ozlabs.ru/



Fixes: e8ae0e14 ("vfio: Require that devices support DMA cache coherence")
Fixes: 70693f47 ("vfio: Set DMA ownership for VFIO devices")
Signed-off-by: default avatarAlexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: default avatarTimothy Pearson <tpearson@raptorengineering.com>
Acked-by: default avatarAlex Williamson <alex.williamson@redhat.com>
[mpe: Fix CONFIG_IOMMU_API=n build]
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/2000135730.16998523.1678123860135.JavaMail.zimbra@raptorengineeringinc.com
parent 76f35109
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
#include <linux/list.h>
#include <linux/ioport.h>
#include <linux/numa.h>
#include <linux/iommu.h>

struct device_node;

@@ -44,6 +45,9 @@ struct pci_controller_ops {
#endif

	void		(*shutdown)(struct pci_controller *hose);

	struct iommu_group *(*device_group)(struct pci_controller *hose,
					    struct pci_dev *pdev);
};

/*
@@ -131,6 +135,9 @@ struct pci_controller {
	struct irq_domain	*dev_domain;
	struct irq_domain	*msi_domain;
	struct fwnode_handle	*fwnode;

	/* iommu_ops support */
	struct iommu_device	iommu;
};

/* These are used for config access before all the PCI probing
+146 −2
Original line number Diff line number Diff line
@@ -35,6 +35,7 @@
#include <asm/vio.h>
#include <asm/tce.h>
#include <asm/mmu_context.h>
#include <asm/ppc-pci.h>

#define DBG(...)

@@ -1156,8 +1157,14 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)

	pr_debug("%s: Adding %s to iommu group %d\n",
		 __func__, dev_name(dev),  iommu_group_id(table_group->group));

	return iommu_group_add_device(table_group->group, dev);
	/*
	 * This is still not adding devices via the IOMMU bus notifier because
	 * of pcibios_init() from arch/powerpc/kernel/pci_64.c which calls
	 * pcibios_scan_phb() first (and this guy adds devices and triggers
	 * the notifier) and only then it calls pci_bus_add_devices() which
	 * configures DMA for buses which also creates PEs and IOMMU groups.
	 */
	return iommu_probe_device(dev);
}
EXPORT_SYMBOL_GPL(iommu_add_device);

@@ -1237,6 +1244,7 @@ static long spapr_tce_take_ownership(struct iommu_table_group *table_group)
		rc = iommu_take_ownership(tbl);
		if (!rc)
			continue;

		for (j = 0; j < i; ++j)
			iommu_release_ownership(table_group->tables[j]);
		return rc;
@@ -1269,4 +1277,140 @@ struct iommu_table_group_ops spapr_tce_table_group_ops = {
	.release_ownership = spapr_tce_release_ownership,
};

/*
 * A simple iommu_ops to allow less cruft in generic VFIO code.
 */
static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom,
					       struct device *dev)
{
	struct iommu_group *grp = iommu_group_get(dev);
	struct iommu_table_group *table_group;
	int ret = -EINVAL;

	if (!grp)
		return -ENODEV;

	table_group = iommu_group_get_iommudata(grp);
	ret = table_group->ops->take_ownership(table_group);
	iommu_group_put(grp);

	return ret;
}

static void spapr_tce_blocking_iommu_set_platform_dma(struct device *dev)
{
	struct iommu_group *grp = iommu_group_get(dev);
	struct iommu_table_group *table_group;

	table_group = iommu_group_get_iommudata(grp);
	table_group->ops->release_ownership(table_group);
}

static const struct iommu_domain_ops spapr_tce_blocking_domain_ops = {
	.attach_dev = spapr_tce_blocking_iommu_attach_dev,
};

static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap)
{
	switch (cap) {
	case IOMMU_CAP_CACHE_COHERENCY:
		return true;
	default:
		break;
	}

	return false;
}

static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type)
{
	struct iommu_domain *dom;

	if (type != IOMMU_DOMAIN_BLOCKED)
		return NULL;

	dom = kzalloc(sizeof(*dom), GFP_KERNEL);
	if (!dom)
		return NULL;

	dom->ops = &spapr_tce_blocking_domain_ops;

	return dom;
}

static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
{
	struct pci_dev *pdev;
	struct pci_controller *hose;

	if (!dev_is_pci(dev))
		return ERR_PTR(-EPERM);

	pdev = to_pci_dev(dev);
	hose = pdev->bus->sysdata;

	return &hose->iommu;
}

static void spapr_tce_iommu_release_device(struct device *dev)
{
}

static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
{
	struct pci_controller *hose;
	struct pci_dev *pdev;

	pdev = to_pci_dev(dev);
	hose = pdev->bus->sysdata;

	if (!hose->controller_ops.device_group)
		return ERR_PTR(-ENOENT);

	return hose->controller_ops.device_group(hose, pdev);
}

static const struct iommu_ops spapr_tce_iommu_ops = {
	.capable = spapr_tce_iommu_capable,
	.domain_alloc = spapr_tce_iommu_domain_alloc,
	.probe_device = spapr_tce_iommu_probe_device,
	.release_device = spapr_tce_iommu_release_device,
	.device_group = spapr_tce_iommu_device_group,
	.set_platform_dma_ops = spapr_tce_blocking_iommu_set_platform_dma,
};

static struct attribute *spapr_tce_iommu_attrs[] = {
	NULL,
};

static struct attribute_group spapr_tce_iommu_group = {
	.name = "spapr-tce-iommu",
	.attrs = spapr_tce_iommu_attrs,
};

static const struct attribute_group *spapr_tce_iommu_groups[] = {
	&spapr_tce_iommu_group,
	NULL,
};

/*
 * This registers IOMMU devices of PHBs. This needs to happen
 * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and
 * before subsys_initcall(iommu_subsys_init).
 */
static int __init spapr_tce_setup_phb_iommus_initcall(void)
{
	struct pci_controller *hose;

	list_for_each_entry(hose, &hose_list, list_node) {
		iommu_device_sysfs_add(&hose->iommu, hose->parent,
				       spapr_tce_iommu_groups, "iommu-phb%04x",
				       hose->global_number);
		iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops,
				      hose->parent);
	}
	return 0;
}
postcore_initcall_sync(spapr_tce_setup_phb_iommus_initcall);

#endif /* CONFIG_IOMMU_API */
+34 −0
Original line number Diff line number Diff line
@@ -1899,6 +1899,13 @@ static long pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
	/* Store @tbl as pnv_pci_ioda2_unset_window() resets it */
	struct iommu_table *tbl = pe->table_group.tables[0];

	/*
	 * iommu_ops transfers the ownership per a device and we mode
	 * the group ownership with the first device in the group.
	 */
	if (!tbl)
		return 0;

	pnv_pci_ioda2_set_bypass(pe, false);
	pnv_pci_ioda2_unset_window(&pe->table_group, 0);
	if (pe->pbus)
@@ -1915,6 +1922,9 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
	struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
						table_group);

	/* See the comment about iommu_ops above */
	if (pe->table_group.tables[0])
		return;
	pnv_pci_ioda2_setup_default_config(pe);
	if (pe->pbus)
		pnv_ioda_setup_bus_dma(pe, pe->pbus);
@@ -2921,6 +2931,27 @@ static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus)
	}
}

#ifdef CONFIG_IOMMU_API
static struct iommu_group *pnv_pci_device_group(struct pci_controller *hose,
						struct pci_dev *pdev)
{
	struct pnv_phb *phb = hose->private_data;
	struct pnv_ioda_pe *pe;

	if (WARN_ON(!phb))
		return ERR_PTR(-ENODEV);

	pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8));
	if (!pe)
		return ERR_PTR(-ENODEV);

	if (!pe->table_group.group)
		return ERR_PTR(-ENODEV);

	return iommu_group_ref_get(pe->table_group.group);
}
#endif

static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
	.dma_dev_setup		= pnv_pci_ioda_dma_dev_setup,
	.dma_bus_setup		= pnv_pci_ioda_dma_bus_setup,
@@ -2931,6 +2962,9 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
	.setup_bridge		= pnv_pci_fixup_bridge_resources,
	.reset_secondary_bus	= pnv_pci_reset_secondary_bus,
	.shutdown		= pnv_pci_ioda_shutdown,
#ifdef CONFIG_IOMMU_API
	.device_group		= pnv_pci_device_group,
#endif
};

static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
+24 −0
Original line number Diff line number Diff line
@@ -1729,3 +1729,27 @@ static int __init tce_iommu_bus_notifier_init(void)
	return 0;
}
machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);

#ifdef CONFIG_SPAPR_TCE_IOMMU
struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose,
					     struct pci_dev *pdev)
{
	struct device_node *pdn, *dn = pdev->dev.of_node;
	struct iommu_group *grp;
	struct pci_dn *pci;

	pdn = pci_dma_find(dn, NULL);
	if (!pdn || !PCI_DN(pdn))
		return ERR_PTR(-ENODEV);

	pci = PCI_DN(pdn);
	if (!pci->table_group)
		return ERR_PTR(-ENODEV);

	grp = pci->table_group->group;
	if (!grp)
		return ERR_PTR(-ENODEV);

	return iommu_group_ref_get(grp);
}
#endif
+4 −0
Original line number Diff line number Diff line
@@ -123,5 +123,9 @@ static inline void pseries_lpar_read_hblkrm_characteristics(void) { }
#endif

void pseries_rng_init(void);
#ifdef CONFIG_SPAPR_TCE_IOMMU
struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose,
					     struct pci_dev *pdev);
#endif

#endif /* _PSERIES_PSERIES_H */
Loading