Commit 2a2b8eaa authored by Tom Murphy's avatar Tom Murphy Committed by Will Deacon
Browse files

iommu: Handle freelists when using deferred flushing in iommu drivers



Allow the iommu_unmap_fast to return newly freed page table pages and
pass the freelist to queue_iova in the dma-iommu ops path.

This is useful for iommu drivers (in this case the intel iommu driver)
which need to wait for the ioTLB to be flushed before newly
free/unmapped page table pages can be freed. This way we can still batch
ioTLB free operations and handle the freelists.

Signed-off-by: default avatarTom Murphy <murphyt7@tcd.ie>
Signed-off-by: default avatarLu Baolu <baolu.lu@linux.intel.com>
Tested-by: default avatarLogan Gunthorpe <logang@deltatee.com>
Link: https://lore.kernel.org/r/20201124082057.2614359-2-baolu.lu@linux.intel.com


Signed-off-by: default avatarWill Deacon <will@kernel.org>
parent 66930e7e
Loading
Loading
Loading
Loading
+21 −8
Original line number Diff line number Diff line
@@ -49,6 +49,18 @@ struct iommu_dma_cookie {
	struct iommu_domain		*fq_domain;
};

static void iommu_dma_entry_dtor(unsigned long data)
{
	struct page *freelist = (struct page *)data;

	while (freelist) {
		unsigned long p = (unsigned long)page_address(freelist);

		freelist = freelist->freelist;
		free_page(p);
	}
}

static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
{
	if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
@@ -343,7 +355,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
	if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
			DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) {
		if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
					NULL))
					  iommu_dma_entry_dtor))
			pr_warn("iova flush queue initialization failed\n");
		else
			cookie->fq_domain = domain;
@@ -440,7 +452,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
}

static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
		dma_addr_t iova, size_t size)
		dma_addr_t iova, size_t size, struct page *freelist)
{
	struct iova_domain *iovad = &cookie->iovad;

@@ -449,7 +461,8 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
		cookie->msi_iova -= size;
	else if (cookie->fq_domain)	/* non-strict mode */
		queue_iova(iovad, iova_pfn(iovad, iova),
				size >> iova_shift(iovad), 0);
				size >> iova_shift(iovad),
				(unsigned long)freelist);
	else
		free_iova_fast(iovad, iova_pfn(iovad, iova),
				size >> iova_shift(iovad));
@@ -474,7 +487,7 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,

	if (!cookie->fq_domain)
		iommu_iotlb_sync(domain, &iotlb_gather);
	iommu_dma_free_iova(cookie, dma_addr, size);
	iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
}

static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
@@ -496,7 +509,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
		return DMA_MAPPING_ERROR;

	if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
		iommu_dma_free_iova(cookie, iova, size);
		iommu_dma_free_iova(cookie, iova, size, NULL);
		return DMA_MAPPING_ERROR;
	}
	return iova + iova_off;
@@ -649,7 +662,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
out_free_sg:
	sg_free_table(&sgt);
out_free_iova:
	iommu_dma_free_iova(cookie, iova, size);
	iommu_dma_free_iova(cookie, iova, size, NULL);
out_free_pages:
	__iommu_dma_free_pages(pages, count);
	return NULL;
@@ -900,7 +913,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
	return __finalise_sg(dev, sg, nents, iova);

out_free_iova:
	iommu_dma_free_iova(cookie, iova, iova_len);
	iommu_dma_free_iova(cookie, iova, iova_len, NULL);
out_restore_sg:
	__invalidate_sg(sg, nents);
	return 0;
@@ -1228,7 +1241,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
	return msi_page;

out_free_iova:
	iommu_dma_free_iova(cookie, iova, size);
	iommu_dma_free_iova(cookie, iova, size, NULL);
out_free_page:
	kfree(msi_page);
	return NULL;
+36 −19
Original line number Diff line number Diff line
@@ -1243,17 +1243,17 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
   pages can only be freed after the IOTLB flush has been done. */
static struct page *domain_unmap(struct dmar_domain *domain,
				 unsigned long start_pfn,
				 unsigned long last_pfn)
				 unsigned long last_pfn,
				 struct page *freelist)
{
	struct page *freelist;

	BUG_ON(!domain_pfn_supported(domain, start_pfn));
	BUG_ON(!domain_pfn_supported(domain, last_pfn));
	BUG_ON(start_pfn > last_pfn);

	/* we don't need lock here; nobody else touches the iova range */
	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
				       domain->pgd, 0, start_pfn, last_pfn, NULL);
				       domain->pgd, 0, start_pfn, last_pfn,
				       freelist);

	/* free pgd */
	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
@@ -2011,7 +2011,8 @@ static void domain_exit(struct dmar_domain *domain)
	if (domain->pgd) {
		struct page *freelist;

		freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
		freelist = domain_unmap(domain, 0,
					DOMAIN_MAX_PFN(domain->gaw), NULL);
		dma_free_pagelist(freelist);
	}

@@ -3570,7 +3571,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
	if (dev_is_pci(dev))
		pdev = to_pci_dev(dev);

	freelist = domain_unmap(domain, start_pfn, last_pfn);
	freelist = domain_unmap(domain, start_pfn, last_pfn, NULL);
	if (intel_iommu_strict || (pdev && pdev->untrusted) ||
			!has_iova_flush_queue(&domain->iovad)) {
		iommu_flush_iotlb_psi(iommu, domain, start_pfn,
@@ -4636,7 +4637,8 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
			struct page *freelist;

			freelist = domain_unmap(si_domain,
						start_vpfn, last_vpfn);
						start_vpfn, last_vpfn,
						NULL);

			rcu_read_lock();
			for_each_active_iommu(iommu, drhd)
@@ -5608,10 +5610,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
				struct iommu_iotlb_gather *gather)
{
	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
	struct page *freelist = NULL;
	unsigned long start_pfn, last_pfn;
	unsigned int npages;
	int iommu_id, level = 0;
	int level = 0;

	/* Cope with horrid API which requires us to unmap more than the
	   size argument if it happens to be a large-page mapping. */
@@ -5623,22 +5623,38 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
	start_pfn = iova >> VTD_PAGE_SHIFT;
	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;

	freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);

	npages = last_pfn - start_pfn + 1;

	for_each_domain_iommu(iommu_id, dmar_domain)
		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
				      start_pfn, npages, !freelist, 0);

	dma_free_pagelist(freelist);
	gather->freelist = domain_unmap(dmar_domain, start_pfn,
					last_pfn, gather->freelist);

	if (dmar_domain->max_addr == iova + size)
		dmar_domain->max_addr = iova;

	iommu_iotlb_gather_add_page(domain, gather, iova, size);

	return size;
}

static void intel_iommu_tlb_sync(struct iommu_domain *domain,
				 struct iommu_iotlb_gather *gather)
{
	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
	unsigned long iova_pfn = IOVA_PFN(gather->start);
	size_t size = gather->end - gather->start;
	unsigned long start_pfn, last_pfn;
	unsigned long nrpages;
	int iommu_id;

	nrpages = aligned_nrpages(gather->start, size);
	start_pfn = mm_to_dma_pfn(iova_pfn);
	last_pfn = start_pfn + nrpages - 1;

	for_each_domain_iommu(iommu_id, dmar_domain)
		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
				      start_pfn, nrpages, !gather->freelist, 0);

	dma_free_pagelist(gather->freelist);
}

static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
					    dma_addr_t iova)
{
@@ -6098,6 +6114,7 @@ const struct iommu_ops intel_iommu_ops = {
	.aux_get_pasid		= intel_iommu_aux_get_pasid,
	.map			= intel_iommu_map,
	.unmap			= intel_iommu_unmap,
	.iotlb_sync		= intel_iommu_tlb_sync,
	.iova_to_phys		= intel_iommu_iova_to_phys,
	.probe_device		= intel_iommu_probe_device,
	.probe_finalize		= intel_iommu_probe_finalize,
+1 −0
Original line number Diff line number Diff line
@@ -180,6 +180,7 @@ struct iommu_iotlb_gather {
	unsigned long		start;
	unsigned long		end;
	size_t			pgsize;
	struct page		*freelist;
};

/**