Commit 21c1f902 authored by Niklas Schnelle's avatar Niklas Schnelle Committed by Joerg Roedel
Browse files

s390/pci: use lock-free I/O translation updates



I/O translation tables on s390 use 8 byte page table entries and tables
which are allocated lazily but only freed when the entire I/O
translation table is torn down. Also each IOVA can at any time only
translate to one physical address Furthermore I/O table accesses by the
IOMMU hardware are cache coherent. With a bit of care we can thus use
atomic updates to manipulate the translation table without having to use
a global lock at all. This is done analogous to the existing I/O
translation table handling code used on Intel and AMD x86 systems.

Signed-off-by: default avatarNiklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20221109142903.4080275-6-schnelle@linux.ibm.com


Signed-off-by: default avatarJoerg Roedel <jroedel@suse.de>
parent 08955af0
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -157,7 +157,6 @@ struct zpci_dev {

	/* DMA stuff */
	unsigned long	*dma_table;
	spinlock_t	dma_table_lock;
	int		tlb_refresh;

	spinlock_t	iommu_bitmap_lock;
+45 −29
Original line number Diff line number Diff line
@@ -63,37 +63,55 @@ static void dma_free_page_table(void *table)
	kmem_cache_free(dma_page_table_cache, table);
}

static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
{
	unsigned long old_rte, rte;
	unsigned long *sto;

	if (reg_entry_isvalid(*entry))
		sto = get_rt_sto(*entry);
	else {
	rte = READ_ONCE(*rtep);
	if (reg_entry_isvalid(rte)) {
		sto = get_rt_sto(rte);
	} else {
		sto = dma_alloc_cpu_table();
		if (!sto)
			return NULL;

		set_rt_sto(entry, virt_to_phys(sto));
		validate_rt_entry(entry);
		entry_clr_protected(entry);
		set_rt_sto(&rte, virt_to_phys(sto));
		validate_rt_entry(&rte);
		entry_clr_protected(&rte);

		old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
		if (old_rte != ZPCI_TABLE_INVALID) {
			/* Somone else was faster, use theirs */
			dma_free_cpu_table(sto);
			sto = get_rt_sto(old_rte);
		}
	}
	return sto;
}

static unsigned long *dma_get_page_table_origin(unsigned long *entry)
static unsigned long *dma_get_page_table_origin(unsigned long *step)
{
	unsigned long old_ste, ste;
	unsigned long *pto;

	if (reg_entry_isvalid(*entry))
		pto = get_st_pto(*entry);
	else {
	ste = READ_ONCE(*step);
	if (reg_entry_isvalid(ste)) {
		pto = get_st_pto(ste);
	} else {
		pto = dma_alloc_page_table();
		if (!pto)
			return NULL;
		set_st_pto(entry, virt_to_phys(pto));
		validate_st_entry(entry);
		entry_clr_protected(entry);
		set_st_pto(&ste, virt_to_phys(pto));
		validate_st_entry(&ste);
		entry_clr_protected(&ste);

		old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
		if (old_ste != ZPCI_TABLE_INVALID) {
			/* Somone else was faster, use theirs */
			dma_free_page_table(pto);
			pto = get_st_pto(old_ste);
		}
	}
	return pto;
}
@@ -117,19 +135,24 @@ unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
	return &pto[px];
}

void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags)
void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
{
	unsigned long pte;

	pte = READ_ONCE(*ptep);
	if (flags & ZPCI_PTE_INVALID) {
		invalidate_pt_entry(entry);
		invalidate_pt_entry(&pte);
	} else {
		set_pt_pfaa(entry, page_addr);
		validate_pt_entry(entry);
		set_pt_pfaa(&pte, page_addr);
		validate_pt_entry(&pte);
	}

	if (flags & ZPCI_TABLE_PROTECTED)
		entry_set_protected(entry);
		entry_set_protected(&pte);
	else
		entry_clr_protected(entry);
		entry_clr_protected(&pte);

	xchg(ptep, pte);
}

static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
@@ -137,18 +160,14 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
{
	unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
	phys_addr_t page_addr = (pa & PAGE_MASK);
	unsigned long irq_flags;
	unsigned long *entry;
	int i, rc = 0;

	if (!nr_pages)
		return -EINVAL;

	spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
	if (!zdev->dma_table) {
		rc = -EINVAL;
		goto out_unlock;
	}
	if (!zdev->dma_table)
		return -EINVAL;

	for (i = 0; i < nr_pages; i++) {
		entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
@@ -173,8 +192,6 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
			dma_update_cpu_trans(entry, page_addr, flags);
		}
	}
out_unlock:
	spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
	return rc;
}

@@ -558,7 +575,6 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
	WARN_ON(zdev->s390_domain);

	spin_lock_init(&zdev->iommu_bitmap_lock);
	spin_lock_init(&zdev->dma_table_lock);

	zdev->dma_table = dma_alloc_cpu_table();
	if (!zdev->dma_table) {
+13 −24
Original line number Diff line number Diff line
@@ -20,7 +20,6 @@ struct s390_domain {
	struct iommu_domain	domain;
	struct list_head	devices;
	unsigned long		*dma_table;
	spinlock_t		dma_table_lock;
	spinlock_t		list_lock;
	struct rcu_head		rcu;
};
@@ -62,7 +61,6 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
	s390_domain->domain.geometry.aperture_start = 0;
	s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;

	spin_lock_init(&s390_domain->dma_table_lock);
	spin_lock_init(&s390_domain->list_lock);
	INIT_LIST_HEAD_RCU(&s390_domain->devices);

@@ -265,14 +263,10 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
				     unsigned long nr_pages, int flags)
{
	phys_addr_t page_addr = pa & PAGE_MASK;
	unsigned long irq_flags, i;
	unsigned long *entry;
	unsigned long i;
	int rc;

	if (!nr_pages)
		return 0;

	spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
	for (i = 0; i < nr_pages; i++) {
		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
		if (unlikely(!entry)) {
@@ -283,7 +277,6 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
		page_addr += PAGE_SIZE;
		dma_addr += PAGE_SIZE;
	}
	spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);

	return 0;

@@ -296,7 +289,6 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
			break;
		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
	}
	spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);

	return rc;
}
@@ -304,14 +296,10 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
				       dma_addr_t dma_addr, unsigned long nr_pages)
{
	unsigned long irq_flags, i;
	unsigned long *entry;
	unsigned long i;
	int rc = 0;

	if (!nr_pages)
		return 0;

	spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
	for (i = 0; i < nr_pages; i++) {
		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
		if (unlikely(!entry)) {
@@ -321,7 +309,6 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
		dma_addr += PAGE_SIZE;
	}
	spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);

	return rc;
}
@@ -363,7 +350,8 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
					   dma_addr_t iova)
{
	struct s390_domain *s390_domain = to_s390_domain(domain);
	unsigned long *sto, *pto, *rto, flags;
	unsigned long *rto, *sto, *pto;
	unsigned long ste, pte, rte;
	unsigned int rtx, sx, px;
	phys_addr_t phys = 0;

@@ -376,16 +364,17 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
	px = calc_px(iova);
	rto = s390_domain->dma_table;

	spin_lock_irqsave(&s390_domain->dma_table_lock, flags);
	if (rto && reg_entry_isvalid(rto[rtx])) {
		sto = get_rt_sto(rto[rtx]);
		if (sto && reg_entry_isvalid(sto[sx])) {
			pto = get_st_pto(sto[sx]);
			if (pto && pt_entry_isvalid(pto[px]))
				phys = pto[px] & ZPCI_PTE_ADDR_MASK;
	rte = READ_ONCE(rto[rtx]);
	if (reg_entry_isvalid(rte)) {
		sto = get_rt_sto(rte);
		ste = READ_ONCE(sto[sx]);
		if (reg_entry_isvalid(ste)) {
			pto = get_st_pto(ste);
			pte = READ_ONCE(pto[px]);
			if (pt_entry_isvalid(pte))
				phys = pte & ZPCI_PTE_ADDR_MASK;
		}
	}
	spin_unlock_irqrestore(&s390_domain->dma_table_lock, flags);

	return phys;
}