Commit 9cc960a1 authored by Vlastimil Babka's avatar Vlastimil Babka
Browse files

Merge branch 'core' of...

Merge branch 'core' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu into slab-struct_slab-part2-v1

Merge iommu tree for a series that removes usage of struct page
'freelist' field.
parents b01af5c0 aade40b6
Loading
Loading
Loading
Loading
+41 −69
Original line number Original line Diff line number Diff line
@@ -74,87 +74,61 @@ static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
 *
 *
 ****************************************************************************/
 ****************************************************************************/


static void free_page_list(struct page *freelist)
static void free_pt_page(u64 *pt, struct list_head *freelist)
{
{
	while (freelist != NULL) {
	struct page *p = virt_to_page(pt);
		unsigned long p = (unsigned long)page_address(freelist);


		freelist = freelist->freelist;
	list_add_tail(&p->lru, freelist);
		free_page(p);
	}
}
}


static struct page *free_pt_page(unsigned long pt, struct page *freelist)
static void free_pt_lvl(u64 *pt, struct list_head *freelist, int lvl)
{
{
	struct page *p = virt_to_page((void *)pt);
	u64 *p;
	int i;


	p->freelist = freelist;
	for (i = 0; i < 512; ++i) {
		/* PTE present? */
		if (!IOMMU_PTE_PRESENT(pt[i]))
			continue;


	return p;
		/* Large PTE? */
}
		if (PM_PTE_LEVEL(pt[i]) == 0 ||
		    PM_PTE_LEVEL(pt[i]) == 7)
			continue;


#define DEFINE_FREE_PT_FN(LVL, FN)						\
		/*
static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist)	\
		 * Free the next level. No need to look at l1 tables here since
{										\
		 * they can only contain leaf PTEs; just free them directly.
	unsigned long p;							\
		 */
	u64 *pt;								\
		p = IOMMU_PTE_PAGE(pt[i]);
	int i;									\
		if (lvl > 2)
										\
			free_pt_lvl(p, freelist, lvl - 1);
	pt = (u64 *)__pt;							\
		else
										\
			free_pt_page(p, freelist);
	for (i = 0; i < 512; ++i) {						\
		/* PTE present? */						\
		if (!IOMMU_PTE_PRESENT(pt[i]))					\
			continue;						\
										\
		/* Large PTE? */						\
		if (PM_PTE_LEVEL(pt[i]) == 0 ||					\
		    PM_PTE_LEVEL(pt[i]) == 7)					\
			continue;						\
										\
		p = (unsigned long)IOMMU_PTE_PAGE(pt[i]);			\
		freelist = FN(p, freelist);					\
	}									\
										\
	return free_pt_page((unsigned long)pt, freelist);			\
	}
	}


DEFINE_FREE_PT_FN(l2, free_pt_page)
	free_pt_page(pt, freelist);
DEFINE_FREE_PT_FN(l3, free_pt_l2)
}
DEFINE_FREE_PT_FN(l4, free_pt_l3)
DEFINE_FREE_PT_FN(l5, free_pt_l4)
DEFINE_FREE_PT_FN(l6, free_pt_l5)


static struct page *free_sub_pt(unsigned long root, int mode,
static void free_sub_pt(u64 *root, int mode, struct list_head *freelist)
				struct page *freelist)
{
{
	switch (mode) {
	switch (mode) {
	case PAGE_MODE_NONE:
	case PAGE_MODE_NONE:
	case PAGE_MODE_7_LEVEL:
	case PAGE_MODE_7_LEVEL:
		break;
		break;
	case PAGE_MODE_1_LEVEL:
	case PAGE_MODE_1_LEVEL:
		freelist = free_pt_page(root, freelist);
		free_pt_page(root, freelist);
		break;
		break;
	case PAGE_MODE_2_LEVEL:
	case PAGE_MODE_2_LEVEL:
		freelist = free_pt_l2(root, freelist);
		break;
	case PAGE_MODE_3_LEVEL:
	case PAGE_MODE_3_LEVEL:
		freelist = free_pt_l3(root, freelist);
		break;
	case PAGE_MODE_4_LEVEL:
	case PAGE_MODE_4_LEVEL:
		freelist = free_pt_l4(root, freelist);
		break;
	case PAGE_MODE_5_LEVEL:
	case PAGE_MODE_5_LEVEL:
		freelist = free_pt_l5(root, freelist);
		break;
	case PAGE_MODE_6_LEVEL:
	case PAGE_MODE_6_LEVEL:
		freelist = free_pt_l6(root, freelist);
		free_pt_lvl(root, freelist, mode);
		break;
		break;
	default:
	default:
		BUG();
		BUG();
	}
	}

	return freelist;
}
}


void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
@@ -362,9 +336,9 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
	return pte;
	return pte;
}
}


static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist)
{
{
	unsigned long pt;
	u64 *pt;
	int mode;
	int mode;


	while (cmpxchg64(pte, pteval, 0) != pteval) {
	while (cmpxchg64(pte, pteval, 0) != pteval) {
@@ -373,12 +347,12 @@ static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
	}
	}


	if (!IOMMU_PTE_PRESENT(pteval))
	if (!IOMMU_PTE_PRESENT(pteval))
		return freelist;
		return;


	pt   = (unsigned long)IOMMU_PTE_PAGE(pteval);
	pt   = IOMMU_PTE_PAGE(pteval);
	mode = IOMMU_PTE_MODE(pteval);
	mode = IOMMU_PTE_MODE(pteval);


	return free_sub_pt(pt, mode, freelist);
	free_sub_pt(pt, mode, freelist);
}
}


/*
/*
@@ -392,7 +366,7 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
			  phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
			  phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
{
	struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
	struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
	struct page *freelist = NULL;
	LIST_HEAD(freelist);
	bool updated = false;
	bool updated = false;
	u64 __pte, *pte;
	u64 __pte, *pte;
	int ret, i, count;
	int ret, i, count;
@@ -412,9 +386,9 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
		goto out;
		goto out;


	for (i = 0; i < count; ++i)
	for (i = 0; i < count; ++i)
		freelist = free_clear_pte(&pte[i], pte[i], freelist);
		free_clear_pte(&pte[i], pte[i], &freelist);


	if (freelist != NULL)
	if (!list_empty(&freelist))
		updated = true;
		updated = true;


	if (count > 1) {
	if (count > 1) {
@@ -449,7 +423,7 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
	}
	}


	/* Everything flushed out, free pages now */
	/* Everything flushed out, free pages now */
	free_page_list(freelist);
	put_pages_list(&freelist);


	return ret;
	return ret;
}
}
@@ -511,8 +485,7 @@ static void v1_free_pgtable(struct io_pgtable *iop)
{
{
	struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
	struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
	struct protection_domain *dom;
	struct protection_domain *dom;
	struct page *freelist = NULL;
	LIST_HEAD(freelist);
	unsigned long root;


	if (pgtable->mode == PAGE_MODE_NONE)
	if (pgtable->mode == PAGE_MODE_NONE)
		return;
		return;
@@ -529,10 +502,9 @@ static void v1_free_pgtable(struct io_pgtable *iop)
	BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
	BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
	       pgtable->mode > PAGE_MODE_6_LEVEL);
	       pgtable->mode > PAGE_MODE_6_LEVEL);


	root = (unsigned long)pgtable->root;
	free_sub_pt(pgtable->root, pgtable->mode, &freelist);
	freelist = free_sub_pt(root, pgtable->mode, freelist);


	free_page_list(freelist);
	put_pages_list(&freelist);
}
}


static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
+217 −57
Original line number Original line Diff line number Diff line
@@ -9,9 +9,12 @@
 */
 */


#include <linux/acpi_iort.h>
#include <linux/acpi_iort.h>
#include <linux/atomic.h>
#include <linux/crash_dump.h>
#include <linux/device.h>
#include <linux/device.h>
#include <linux/dma-map-ops.h>
#include <linux/dma-direct.h>
#include <linux/dma-iommu.h>
#include <linux/dma-iommu.h>
#include <linux/dma-map-ops.h>
#include <linux/gfp.h>
#include <linux/gfp.h>
#include <linux/huge_mm.h>
#include <linux/huge_mm.h>
#include <linux/iommu.h>
#include <linux/iommu.h>
@@ -20,11 +23,10 @@
#include <linux/mm.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/pci.h>
#include <linux/swiotlb.h>
#include <linux/scatterlist.h>
#include <linux/scatterlist.h>
#include <linux/spinlock.h>
#include <linux/swiotlb.h>
#include <linux/vmalloc.h>
#include <linux/vmalloc.h>
#include <linux/crash_dump.h>
#include <linux/dma-direct.h>


struct iommu_dma_msi_page {
struct iommu_dma_msi_page {
	struct list_head	list;
	struct list_head	list;
@@ -41,7 +43,19 @@ struct iommu_dma_cookie {
	enum iommu_dma_cookie_type	type;
	enum iommu_dma_cookie_type	type;
	union {
	union {
		/* Full allocator for IOMMU_DMA_IOVA_COOKIE */
		/* Full allocator for IOMMU_DMA_IOVA_COOKIE */
		struct {
			struct iova_domain	iovad;
			struct iova_domain	iovad;

			struct iova_fq __percpu *fq;	/* Flush queue */
			/* Number of TLB flushes that have been started */
			atomic64_t		fq_flush_start_cnt;
			/* Number of TLB flushes that have been finished */
			atomic64_t		fq_flush_finish_cnt;
			/* Timer to regularily empty the flush queues */
			struct timer_list	fq_timer;
			/* 1 when timer is active, 0 when not */
			atomic_t		fq_timer_on;
		};
		/* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */
		/* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */
		dma_addr_t		msi_iova;
		dma_addr_t		msi_iova;
	};
	};
@@ -64,18 +78,205 @@ static int __init iommu_dma_forcedac_setup(char *str)
}
}
early_param("iommu.forcedac", iommu_dma_forcedac_setup);
early_param("iommu.forcedac", iommu_dma_forcedac_setup);


static void iommu_dma_entry_dtor(unsigned long data)
/* Number of entries per flush queue */
#define IOVA_FQ_SIZE	256

/* Timeout (in ms) after which entries are flushed from the queue */
#define IOVA_FQ_TIMEOUT	10

/* Flush queue entry for deferred flushing */
struct iova_fq_entry {
	unsigned long iova_pfn;
	unsigned long pages;
	struct list_head freelist;
	u64 counter; /* Flush counter when this entry was added */
};

/* Per-CPU flush queue structure */
struct iova_fq {
	struct iova_fq_entry entries[IOVA_FQ_SIZE];
	unsigned int head, tail;
	spinlock_t lock;
};

#define fq_ring_for_each(i, fq) \
	for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)

static inline bool fq_full(struct iova_fq *fq)
{
	assert_spin_locked(&fq->lock);
	return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
}

static inline unsigned int fq_ring_add(struct iova_fq *fq)
{
{
	struct page *freelist = (struct page *)data;
	unsigned int idx = fq->tail;


	while (freelist) {
	assert_spin_locked(&fq->lock);
		unsigned long p = (unsigned long)page_address(freelist);


		freelist = freelist->freelist;
	fq->tail = (idx + 1) % IOVA_FQ_SIZE;
		free_page(p);

	return idx;
}

static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
{
	u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt);
	unsigned int idx;

	assert_spin_locked(&fq->lock);

	fq_ring_for_each(idx, fq) {

		if (fq->entries[idx].counter >= counter)
			break;

		put_pages_list(&fq->entries[idx].freelist);
		free_iova_fast(&cookie->iovad,
			       fq->entries[idx].iova_pfn,
			       fq->entries[idx].pages);

		fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
	}
	}
}
}


static void fq_flush_iotlb(struct iommu_dma_cookie *cookie)
{
	atomic64_inc(&cookie->fq_flush_start_cnt);
	cookie->fq_domain->ops->flush_iotlb_all(cookie->fq_domain);
	atomic64_inc(&cookie->fq_flush_finish_cnt);
}

static void fq_flush_timeout(struct timer_list *t)
{
	struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer);
	int cpu;

	atomic_set(&cookie->fq_timer_on, 0);
	fq_flush_iotlb(cookie);

	for_each_possible_cpu(cpu) {
		unsigned long flags;
		struct iova_fq *fq;

		fq = per_cpu_ptr(cookie->fq, cpu);
		spin_lock_irqsave(&fq->lock, flags);
		fq_ring_free(cookie, fq);
		spin_unlock_irqrestore(&fq->lock, flags);
	}
}

static void queue_iova(struct iommu_dma_cookie *cookie,
		unsigned long pfn, unsigned long pages,
		struct list_head *freelist)
{
	struct iova_fq *fq;
	unsigned long flags;
	unsigned int idx;

	/*
	 * Order against the IOMMU driver's pagetable update from unmapping
	 * @pte, to guarantee that fq_flush_iotlb() observes that if called
	 * from a different CPU before we release the lock below. Full barrier
	 * so it also pairs with iommu_dma_init_fq() to avoid seeing partially
	 * written fq state here.
	 */
	smp_mb();

	fq = raw_cpu_ptr(cookie->fq);
	spin_lock_irqsave(&fq->lock, flags);

	/*
	 * First remove all entries from the flush queue that have already been
	 * flushed out on another CPU. This makes the fq_full() check below less
	 * likely to be true.
	 */
	fq_ring_free(cookie, fq);

	if (fq_full(fq)) {
		fq_flush_iotlb(cookie);
		fq_ring_free(cookie, fq);
	}

	idx = fq_ring_add(fq);

	fq->entries[idx].iova_pfn = pfn;
	fq->entries[idx].pages    = pages;
	fq->entries[idx].counter  = atomic64_read(&cookie->fq_flush_start_cnt);
	list_splice(freelist, &fq->entries[idx].freelist);

	spin_unlock_irqrestore(&fq->lock, flags);

	/* Avoid false sharing as much as possible. */
	if (!atomic_read(&cookie->fq_timer_on) &&
	    !atomic_xchg(&cookie->fq_timer_on, 1))
		mod_timer(&cookie->fq_timer,
			  jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
}

static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
{
	int cpu, idx;

	if (!cookie->fq)
		return;

	del_timer_sync(&cookie->fq_timer);
	/* The IOVAs will be torn down separately, so just free our queued pages */
	for_each_possible_cpu(cpu) {
		struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu);

		fq_ring_for_each(idx, fq)
			put_pages_list(&fq->entries[idx].freelist);
	}

	free_percpu(cookie->fq);
}

/* sysfs updates are serialised by the mutex of the group owning @domain */
int iommu_dma_init_fq(struct iommu_domain *domain)
{
	struct iommu_dma_cookie *cookie = domain->iova_cookie;
	struct iova_fq __percpu *queue;
	int i, cpu;

	if (cookie->fq_domain)
		return 0;

	atomic64_set(&cookie->fq_flush_start_cnt,  0);
	atomic64_set(&cookie->fq_flush_finish_cnt, 0);

	queue = alloc_percpu(struct iova_fq);
	if (!queue) {
		pr_warn("iova flush queue initialization failed\n");
		return -ENOMEM;
	}

	for_each_possible_cpu(cpu) {
		struct iova_fq *fq = per_cpu_ptr(queue, cpu);

		fq->head = 0;
		fq->tail = 0;

		spin_lock_init(&fq->lock);

		for (i = 0; i < IOVA_FQ_SIZE; i++)
			INIT_LIST_HEAD(&fq->entries[i].freelist);
	}

	cookie->fq = queue;

	timer_setup(&cookie->fq_timer, fq_flush_timeout, 0);
	atomic_set(&cookie->fq_timer_on, 0);
	/*
	 * Prevent incomplete fq state being observable. Pairs with path from
	 * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova()
	 */
	smp_wmb();
	WRITE_ONCE(cookie->fq_domain, domain);
	return 0;
}

static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
{
{
	if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
	if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
@@ -156,8 +357,10 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
	if (!cookie)
	if (!cookie)
		return;
		return;


	if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule)
	if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) {
		iommu_dma_free_fq(cookie);
		put_iova_domain(&cookie->iovad);
		put_iova_domain(&cookie->iovad);
	}


	list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
	list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
		list_del(&msi->list);
		list_del(&msi->list);
@@ -294,17 +497,6 @@ static int iova_reserve_iommu_regions(struct device *dev,
	return ret;
	return ret;
}
}


static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
{
	struct iommu_dma_cookie *cookie;
	struct iommu_domain *domain;

	cookie = container_of(iovad, struct iommu_dma_cookie, iovad);
	domain = cookie->fq_domain;

	domain->ops->flush_iotlb_all(domain);
}

static bool dev_is_untrusted(struct device *dev)
static bool dev_is_untrusted(struct device *dev)
{
{
	return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
	return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
@@ -315,30 +507,6 @@ static bool dev_use_swiotlb(struct device *dev)
	return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
	return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
}
}


/* sysfs updates are serialised by the mutex of the group owning @domain */
int iommu_dma_init_fq(struct iommu_domain *domain)
{
	struct iommu_dma_cookie *cookie = domain->iova_cookie;
	int ret;

	if (cookie->fq_domain)
		return 0;

	ret = init_iova_flush_queue(&cookie->iovad, iommu_dma_flush_iotlb_all,
				    iommu_dma_entry_dtor);
	if (ret) {
		pr_warn("iova flush queue initialization failed\n");
		return ret;
	}
	/*
	 * Prevent incomplete iovad->fq being observable. Pairs with path from
	 * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova()
	 */
	smp_wmb();
	WRITE_ONCE(cookie->fq_domain, domain);
	return 0;
}

/**
/**
 * iommu_dma_init_domain - Initialise a DMA mapping domain
 * iommu_dma_init_domain - Initialise a DMA mapping domain
 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -442,14 +610,6 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,


	shift = iova_shift(iovad);
	shift = iova_shift(iovad);
	iova_len = size >> shift;
	iova_len = size >> shift;
	/*
	 * Freeing non-power-of-two-sized allocations back into the IOVA caches
	 * will come back to bite us badly, so we have to waste a bit of space
	 * rounding up anything cacheable to make sure that can't happen. The
	 * order of the unadjusted size will still match upon freeing.
	 */
	if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
		iova_len = roundup_pow_of_two(iova_len);


	dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit);
	dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit);


@@ -477,9 +637,9 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
	if (cookie->type == IOMMU_DMA_MSI_COOKIE)
	if (cookie->type == IOMMU_DMA_MSI_COOKIE)
		cookie->msi_iova -= size;
		cookie->msi_iova -= size;
	else if (gather && gather->queued)
	else if (gather && gather->queued)
		queue_iova(iovad, iova_pfn(iovad, iova),
		queue_iova(cookie, iova_pfn(iovad, iova),
				size >> iova_shift(iovad),
				size >> iova_shift(iovad),
				(unsigned long)gather->freelist);
				&gather->freelist);
	else
	else
		free_iova_fast(iovad, iova_pfn(iovad, iova),
		free_iova_fast(iovad, iova_pfn(iovad, iova),
				size >> iova_shift(iovad));
				size >> iova_shift(iovad));
+31 −58
Original line number Original line Diff line number Diff line
@@ -1303,35 +1303,30 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
   know the hardware page-walk will no longer touch them.
   know the hardware page-walk will no longer touch them.
   The 'pte' argument is the *parent* PTE, pointing to the page that is to
   The 'pte' argument is the *parent* PTE, pointing to the page that is to
   be freed. */
   be freed. */
static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
static void dma_pte_list_pagetables(struct dmar_domain *domain,
				    int level, struct dma_pte *pte,
				    int level, struct dma_pte *pte,
					    struct page *freelist)
				    struct list_head *freelist)
{
{
	struct page *pg;
	struct page *pg;


	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
	pg->freelist = freelist;
	list_add_tail(&pg->lru, freelist);
	freelist = pg;


	if (level == 1)
	if (level == 1)
		return freelist;
		return;


	pte = page_address(pg);
	pte = page_address(pg);
	do {
	do {
		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
			freelist = dma_pte_list_pagetables(domain, level - 1,
			dma_pte_list_pagetables(domain, level - 1, pte, freelist);
							   pte, freelist);
		pte++;
		pte++;
	} while (!first_pte_in_page(pte));
	} while (!first_pte_in_page(pte));

	return freelist;
}
}


static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
static void dma_pte_clear_level(struct dmar_domain *domain, int level,
				struct dma_pte *pte, unsigned long pfn,
				struct dma_pte *pte, unsigned long pfn,
					unsigned long start_pfn,
				unsigned long start_pfn, unsigned long last_pfn,
					unsigned long last_pfn,
				struct list_head *freelist)
					struct page *freelist)
{
{
	struct dma_pte *first_pte = NULL, *last_pte = NULL;
	struct dma_pte *first_pte = NULL, *last_pte = NULL;


@@ -1350,7 +1345,7 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
			/* These suborbinate page tables are going away entirely. Don't
			/* These suborbinate page tables are going away entirely. Don't
			   bother to clear them; we're just going to *free* them. */
			   bother to clear them; we're just going to *free* them. */
			if (level > 1 && !dma_pte_superpage(pte))
			if (level > 1 && !dma_pte_superpage(pte))
				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
				dma_pte_list_pagetables(domain, level - 1, pte, freelist);


			dma_clear_pte(pte);
			dma_clear_pte(pte);
			if (!first_pte)
			if (!first_pte)
@@ -1358,7 +1353,7 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
			last_pte = pte;
			last_pte = pte;
		} else if (level > 1) {
		} else if (level > 1) {
			/* Recurse down into a level that isn't *entirely* obsolete */
			/* Recurse down into a level that isn't *entirely* obsolete */
			freelist = dma_pte_clear_level(domain, level - 1,
			dma_pte_clear_level(domain, level - 1,
					    phys_to_virt(dma_pte_addr(pte)),
					    phys_to_virt(dma_pte_addr(pte)),
					    level_pfn, start_pfn, last_pfn,
					    level_pfn, start_pfn, last_pfn,
					    freelist);
					    freelist);
@@ -1370,47 +1365,28 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
	if (first_pte)
	if (first_pte)
		domain_flush_cache(domain, first_pte,
		domain_flush_cache(domain, first_pte,
				   (void *)++last_pte - (void *)first_pte);
				   (void *)++last_pte - (void *)first_pte);

	return freelist;
}
}


/* We can't just free the pages because the IOMMU may still be walking
/* We can't just free the pages because the IOMMU may still be walking
   the page tables, and may have cached the intermediate levels. The
   the page tables, and may have cached the intermediate levels. The
   pages can only be freed after the IOTLB flush has been done. */
   pages can only be freed after the IOTLB flush has been done. */
static struct page *domain_unmap(struct dmar_domain *domain,
static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
				 unsigned long start_pfn,
			 unsigned long last_pfn, struct list_head *freelist)
				 unsigned long last_pfn,
				 struct page *freelist)
{
{
	BUG_ON(!domain_pfn_supported(domain, start_pfn));
	BUG_ON(!domain_pfn_supported(domain, start_pfn));
	BUG_ON(!domain_pfn_supported(domain, last_pfn));
	BUG_ON(!domain_pfn_supported(domain, last_pfn));
	BUG_ON(start_pfn > last_pfn);
	BUG_ON(start_pfn > last_pfn);


	/* we don't need lock here; nobody else touches the iova range */
	/* we don't need lock here; nobody else touches the iova range */
	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
	dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
				       domain->pgd, 0, start_pfn, last_pfn,
			    domain->pgd, 0, start_pfn, last_pfn, freelist);
				       freelist);


	/* free pgd */
	/* free pgd */
	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
		struct page *pgd_page = virt_to_page(domain->pgd);
		struct page *pgd_page = virt_to_page(domain->pgd);
		pgd_page->freelist = freelist;
		list_add_tail(&pgd_page->lru, freelist);
		freelist = pgd_page;

		domain->pgd = NULL;
		domain->pgd = NULL;
	}
	}

	return freelist;
}

static void dma_free_pagelist(struct page *freelist)
{
	struct page *pg;

	while ((pg = freelist)) {
		freelist = pg->freelist;
		free_pgtable_page(page_address(pg));
	}
}
}


/* iommu handling */
/* iommu handling */
@@ -2095,11 +2071,10 @@ static void domain_exit(struct dmar_domain *domain)
	domain_remove_dev_info(domain);
	domain_remove_dev_info(domain);


	if (domain->pgd) {
	if (domain->pgd) {
		struct page *freelist;
		LIST_HEAD(freelist);


		freelist = domain_unmap(domain, 0,
		domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist);
					DOMAIN_MAX_PFN(domain->gaw), NULL);
		put_pages_list(&freelist);
		dma_free_pagelist(freelist);
	}
	}


	free_domain_mem(domain);
	free_domain_mem(domain);
@@ -4192,19 +4167,17 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
		{
		{
			struct dmar_drhd_unit *drhd;
			struct dmar_drhd_unit *drhd;
			struct intel_iommu *iommu;
			struct intel_iommu *iommu;
			struct page *freelist;
			LIST_HEAD(freelist);


			freelist = domain_unmap(si_domain,
			domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist);
						start_vpfn, last_vpfn,
						NULL);


			rcu_read_lock();
			rcu_read_lock();
			for_each_active_iommu(iommu, drhd)
			for_each_active_iommu(iommu, drhd)
				iommu_flush_iotlb_psi(iommu, si_domain,
				iommu_flush_iotlb_psi(iommu, si_domain,
					start_vpfn, mhp->nr_pages,
					start_vpfn, mhp->nr_pages,
					!freelist, 0);
					list_empty(&freelist), 0);
			rcu_read_unlock();
			rcu_read_unlock();
			dma_free_pagelist(freelist);
			put_pages_list(&freelist);
		}
		}
		break;
		break;
	}
	}
@@ -5211,8 +5184,7 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
	start_pfn = iova >> VTD_PAGE_SHIFT;
	start_pfn = iova >> VTD_PAGE_SHIFT;
	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;


	gather->freelist = domain_unmap(dmar_domain, start_pfn,
	domain_unmap(dmar_domain, start_pfn, last_pfn, &gather->freelist);
					last_pfn, gather->freelist);


	if (dmar_domain->max_addr == iova + size)
	if (dmar_domain->max_addr == iova + size)
		dmar_domain->max_addr = iova;
		dmar_domain->max_addr = iova;
@@ -5248,9 +5220,10 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain,


	for_each_domain_iommu(iommu_id, dmar_domain)
	for_each_domain_iommu(iommu_id, dmar_domain)
		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
				      start_pfn, nrpages, !gather->freelist, 0);
				      start_pfn, nrpages,
				      list_empty(&gather->freelist), 0);


	dma_free_pagelist(gather->freelist);
	put_pages_list(&gather->freelist);
}
}


static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
+5 −4
Original line number Original line Diff line number Diff line
@@ -315,11 +315,12 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
					     arm_lpae_iopte *ptep,
					     arm_lpae_iopte *ptep,
					     arm_lpae_iopte curr,
					     arm_lpae_iopte curr,
					     struct io_pgtable_cfg *cfg)
					     struct arm_lpae_io_pgtable *data)
{
{
	arm_lpae_iopte old, new;
	arm_lpae_iopte old, new;
	struct io_pgtable_cfg *cfg = &data->iop.cfg;


	new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE;
	new = paddr_to_iopte(__pa(table), data) | ARM_LPAE_PTE_TYPE_TABLE;
	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
		new |= ARM_LPAE_PTE_NSTABLE;
		new |= ARM_LPAE_PTE_NSTABLE;


@@ -380,7 +381,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
		if (!cptep)
		if (!cptep)
			return -ENOMEM;
			return -ENOMEM;


		pte = arm_lpae_install_table(cptep, ptep, 0, cfg);
		pte = arm_lpae_install_table(cptep, ptep, 0, data);
		if (pte)
		if (pte)
			__arm_lpae_free_pages(cptep, tblsz, cfg);
			__arm_lpae_free_pages(cptep, tblsz, cfg);
	} else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
	} else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
@@ -592,7 +593,7 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
		__arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]);
		__arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]);
	}
	}


	pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg);
	pte = arm_lpae_install_table(tablep, ptep, blk_pte, data);
	if (pte != blk_pte) {
	if (pte != blk_pte) {
		__arm_lpae_free_pages(tablep, tablesz, cfg);
		__arm_lpae_free_pages(tablep, tablesz, cfg);
		/*
		/*
+2 −1
Original line number Original line Diff line number Diff line
@@ -288,11 +288,11 @@ int iommu_probe_device(struct device *dev)
	 */
	 */
	mutex_lock(&group->mutex);
	mutex_lock(&group->mutex);
	iommu_alloc_default_domain(group, dev);
	iommu_alloc_default_domain(group, dev);
	mutex_unlock(&group->mutex);


	if (group->default_domain) {
	if (group->default_domain) {
		ret = __iommu_attach_device(group->default_domain, dev);
		ret = __iommu_attach_device(group->default_domain, dev);
		if (ret) {
		if (ret) {
			mutex_unlock(&group->mutex);
			iommu_group_put(group);
			iommu_group_put(group);
			goto err_release;
			goto err_release;
		}
		}
@@ -300,6 +300,7 @@ int iommu_probe_device(struct device *dev)


	iommu_create_device_direct_mappings(group, dev);
	iommu_create_device_direct_mappings(group, dev);


	mutex_unlock(&group->mutex);
	iommu_group_put(group);
	iommu_group_put(group);


	if (ops->probe_finalize)
	if (ops->probe_finalize)
Loading