Commit caafc0a9 authored by Mike Kravetz's avatar Mike Kravetz Committed by Jinjiang Tu
Browse files

hugetlb: make free_huge_page irq safe

mainline inclusion
from mainline-v5.13-rc1
commit db71ef79
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I9SZXR
CVE: NA

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=db71ef79b59bb2e78dc4df83d0e4bf6beaa5c82d

-------------------------------------------

Commit c77c0a8a ("mm/hugetlb: defer freeing of huge pages if in
non-task context") was added to address the issue of free_huge_page being
called from irq context.  That commit hands off free_huge_page processing
to a workqueue if !in_task.  However, this doesn't cover all the cases as
pointed out by 0day bot lockdep report [1].

:  Possible interrupt unsafe locking scenario:
:
:        CPU0                    CPU1
:        ----                    ----
:   lock(hugetlb_lock);
:                                local_irq_disable();
:                                lock(slock-AF_INET);
:                                lock(hugetlb_lock);
:   <Interrupt>
:     lock(slock-AF_INET);

Shakeel has later explained that this is very likely TCP TX zerocopy from
hugetlb pages scenario when the networking code drops a last reference to
hugetlb page while having IRQ disabled.  Hugetlb freeing path doesn't
disable IRQ while holding hugetlb_lock so a lock dependency chain can lead
to a deadlock.

This commit addresses the issue by doing the following:
 - Make hugetlb_lock irq safe. This is mostly a simple process of
   changing spin_*lock calls to spin_*lock_irq* calls.
 - Make subpool lock irq safe in a similar manner.
 - Revert the !in_task check and workqueue handoff.

[1] https://lore.kernel.org/linux-mm/000000000000f1c03b05bc43aadc@google.com/

Link: https://lkml.kernel.org/r/20210409205254.242291-8-mike.kravetz@oracle.com


Signed-off-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Reviewed-by: default avatarMuchun Song <songmuchun@bytedance.com>
Reviewed-by: default avatarOscar Salvador <osalvador@suse.de>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Cc: Barry Song <song.bao.hua@hisilicon.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: HORIGUCHI NAOYA <naoya.horiguchi@nec.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roman Gushchin <guro@fb.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>

Conflicts:
	mm/hugetlb.c
	mm/hugetlb_cgroup.c
	fs/hugetlb/inode.c
[Context conflicts. Dynamic Hugetlb feature and hugetlb_checknode() also
use hugetlb_lock, convert these use too.]
Signed-off-by: default avatarJinjiang Tu <tujinjiang@huawei.com>
parent 16f8b80f
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -131,7 +131,7 @@ static int hugetlb_checknode(struct vm_area_struct *vma, long nr)
	int ret = 0;
	struct hstate *h = &default_hstate;

	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);

	nid = vma->vm_flags >> CHECKNODE_BITS;

@@ -155,7 +155,7 @@ static int hugetlb_checknode(struct vm_area_struct *vma, long nr)
	}

err:
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);
	return ret;
}

+74 −117
Original line number Diff line number Diff line
@@ -105,11 +105,12 @@ static inline void ClearPageHugeFreed(struct page *head)
static int hugetlb_acct_memory(struct hstate *h, long delta,
			       struct dhugetlb_pool *hpool);

static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
static inline void unlock_or_release_subpool(struct hugepage_subpool *spool,
						unsigned long irq_flags)
{
	bool free = (spool->count == 0) && (spool->used_hpages == 0);

	spin_unlock(&spool->lock);
	spin_unlock_irqrestore(&spool->lock, irq_flags);

	/* If no pages are used, and no other handles to the subpool
	 * remain, give up any reservations mased on minimum size and
@@ -148,10 +149,12 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages,

void hugepage_put_subpool(struct hugepage_subpool *spool)
{
	spin_lock(&spool->lock);
	unsigned long flags;

	spin_lock_irqsave(&spool->lock, flags);
	BUG_ON(!spool->count);
	spool->count--;
	unlock_or_release_subpool(spool);
	unlock_or_release_subpool(spool, flags);
}

/*
@@ -174,7 +177,7 @@ static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
	if (dhugetlb_enabled && hpool)
		return ret;

	spin_lock(&spool->lock);
	spin_lock_irq(&spool->lock);

	if (spool->max_hpages != -1) {		/* maximum size accounting */
		if ((spool->used_hpages + delta) <= spool->max_hpages)
@@ -201,7 +204,7 @@ static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
	}

unlock_ret:
	spin_unlock(&spool->lock);
	spin_unlock_irq(&spool->lock);
	return ret;
}

@@ -215,6 +218,7 @@ static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
				       long delta, struct dhugetlb_pool *hpool)
{
	long ret = delta;
	unsigned long flags;

	if (!spool)
		return delta;
@@ -223,7 +227,7 @@ static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
	if (dhugetlb_enabled && hpool)
		return ret;

	spin_lock(&spool->lock);
	spin_lock_irqsave(&spool->lock, flags);

	if (spool->max_hpages != -1)		/* maximum size accounting */
		spool->used_hpages -= delta;
@@ -244,7 +248,7 @@ static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
	 * If hugetlbfs_put_super couldn't free spool due to an outstanding
	 * quota reference, free it now.
	 */
	unlock_or_release_subpool(spool);
	unlock_or_release_subpool(spool, flags);

	return ret;
}
@@ -1429,7 +1433,7 @@ void free_huge_page_to_dhugetlb_pool(struct page *page, bool restore_reserve)
}
#endif

static void __free_huge_page(struct page *page)
void free_huge_page(struct page *page)
{
	/*
	 * Can't pass hstate in here because it is called from the
@@ -1440,6 +1444,7 @@ static void __free_huge_page(struct page *page)
	struct hugepage_subpool *spool =
		(struct hugepage_subpool *)page_private(page);
	bool restore_reserve;
	unsigned long flags;

	sp_kmemcg_uncharge_hpage(page);
	set_page_private(page, 0);
@@ -1450,12 +1455,12 @@ static void __free_huge_page(struct page *page)
	ClearPagePrivate(page);

	if (dhugetlb_enabled && PagePool(page)) {
		spin_lock(&hugetlb_lock);
		spin_lock_irqsave(&hugetlb_lock, flags);
		clear_page_huge_active(page);
		list_del(&page->lru);
		hugetlb_cgroup_uncharge_page(hstate_index(h),
					     pages_per_huge_page(h), page);
		spin_unlock(&hugetlb_lock);
		spin_unlock_irqrestore(&hugetlb_lock, flags);
		free_huge_page_to_dhugetlb_pool(page, restore_reserve);
		return;
	}
@@ -1479,7 +1484,7 @@ static void __free_huge_page(struct page *page)
			restore_reserve = true;
	}

	spin_lock(&hugetlb_lock);
	spin_lock_irqsave(&hugetlb_lock, flags);
	clear_page_huge_active(page);
	hugetlb_cgroup_uncharge_page(hstate_index(h),
				     pages_per_huge_page(h), page);
@@ -1490,78 +1495,30 @@ static void __free_huge_page(struct page *page)
		sp_memcg_uncharge_hpage(page);
		ClearPageHugeTemporary(page);
		remove_hugetlb_page(h, page, false);
		spin_unlock(&hugetlb_lock);
		spin_unlock_irqrestore(&hugetlb_lock, flags);
		update_and_free_page(h, page);
	} else if (h->surplus_huge_pages_node[nid]) {
		/* remove the page from active list */
		remove_hugetlb_page(h, page, true);
		spin_unlock(&hugetlb_lock);
		spin_unlock_irqrestore(&hugetlb_lock, flags);
		update_and_free_page(h, page);
	} else {
		arch_clear_hugepage_flags(page);
		enqueue_huge_page(h, page);
		spin_unlock(&hugetlb_lock);
	}
		spin_unlock_irqrestore(&hugetlb_lock, flags);
	}

/*
 * As free_huge_page() can be called from a non-task context, we have
 * to defer the actual freeing in a workqueue to prevent potential
 * hugetlb_lock deadlock.
 *
 * free_hpage_workfn() locklessly retrieves the linked list of pages to
 * be freed and frees them one-by-one. As the page->mapping pointer is
 * going to be cleared in __free_huge_page() anyway, it is reused as the
 * llist_node structure of a lockless linked list of huge pages to be freed.
 */
static LLIST_HEAD(hpage_freelist);

static void free_hpage_workfn(struct work_struct *work)
{
	struct llist_node *node;
	struct page *page;

	node = llist_del_all(&hpage_freelist);

	while (node) {
		page = container_of((struct address_space **)node,
				     struct page, mapping);
		node = node->next;
		__free_huge_page(page);
	}
}
static DECLARE_WORK(free_hpage_work, free_hpage_workfn);

void free_huge_page(struct page *page)
{
	/*
	 * Defer freeing if in non-task context to avoid hugetlb_lock deadlock.
	 */
	if (!in_task()) {
		/*
		 * Only call schedule_work() if hpage_freelist is previously
		 * empty. Otherwise, schedule_work() had been called but the
		 * workfn hasn't retrieved the list yet.
		 */
		if (llist_add((struct llist_node *)&page->mapping,
			      &hpage_freelist))
			schedule_work(&free_hpage_work);
		return;
	}

	__free_huge_page(page);
}

static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
{
	INIT_LIST_HEAD(&page->lru);
	set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
	set_hugetlb_cgroup(page, NULL);
	h->nr_huge_pages++;
	h->nr_huge_pages_node[nid]++;
	ClearPageHugeFreed(page);
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);
}

static void prep_compound_gigantic_page(struct page *page, unsigned int order)
@@ -1856,7 +1813,7 @@ int dissolve_free_huge_page(struct page *page)
	if (page_belong_to_dynamic_hugetlb(page))
		return -EBUSY;

	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
	if (!PageHuge(page)) {
		rc = 0;
		goto out;
@@ -1873,7 +1830,7 @@ int dissolve_free_huge_page(struct page *page)
		 * when it is dissolved.
		 */
		if (unlikely(!PageHugeFreed(head))) {
			spin_unlock(&hugetlb_lock);
			spin_unlock_irq(&hugetlb_lock);
			cond_resched();

			/*
@@ -1897,12 +1854,12 @@ int dissolve_free_huge_page(struct page *page)
		}
		remove_hugetlb_page(h, page, false);
		h->max_huge_pages--;
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		update_and_free_page(h, head);
		return 0;
	}
out:
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);
	return rc;
}

@@ -1944,16 +1901,16 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
	if (hstate_is_gigantic(h))
		return NULL;

	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
	if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages)
		goto out_unlock;
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);

	page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
	if (!page)
		return NULL;

	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
	/*
	 * We could have raced with the pool size change.
	 * Double check that and simply deallocate the new page
@@ -1963,7 +1920,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
	 */
	if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
		SetPageHugeTemporary(page);
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		put_page(page);
		return NULL;
	} else {
@@ -1972,7 +1929,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
	}

out_unlock:
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);

	return page;
}
@@ -2027,10 +1984,10 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
	if (nid != NUMA_NO_NODE)
		gfp_mask |= __GFP_THISNODE;

	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
	if (h->free_huge_pages - h->resv_huge_pages > 0)
		page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL, NULL);
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);

	if (!page) {
		if (enable_charge_mighp)
@@ -2048,18 +2005,18 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
{
	gfp_t gfp_mask = htlb_alloc_mask(h);

	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
	if (h->free_huge_pages - h->resv_huge_pages > 0) {
		struct page *page;

		page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid,
						  nmask, NULL);
		if (page) {
			spin_unlock(&hugetlb_lock);
			spin_unlock_irq(&hugetlb_lock);
			return page;
		}
	}
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);

	return alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask);
}
@@ -2106,7 +2063,7 @@ static int gather_surplus_pages(struct hstate *h, long delta)

	ret = -ENOMEM;
retry:
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);
	for (i = 0; i < needed; i++) {
		page = alloc_surplus_huge_page(h, htlb_alloc_mask(h),
				NUMA_NO_NODE, NULL);
@@ -2123,7 +2080,7 @@ static int gather_surplus_pages(struct hstate *h, long delta)
	 * After retaking hugetlb_lock, we need to recalculate 'needed'
	 * because either resv_huge_pages or free_huge_pages may have changed.
	 */
	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
	needed = (h->resv_huge_pages + delta) -
			(h->free_huge_pages + allocated);
	if (needed > 0) {
@@ -2161,12 +2118,12 @@ static int gather_surplus_pages(struct hstate *h, long delta)
		enqueue_huge_page(h, page);
	}
free:
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);

	/* Free unnecessary surplus pages to the buddy allocator */
	list_for_each_entry_safe(page, tmp, &surplus_list, lru)
		put_page(page);
	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);

	return ret;
}
@@ -2216,9 +2173,9 @@ static void return_unused_surplus_pages(struct hstate *h,
	}

out:
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);
	update_and_free_pages_bulk(h, &page_list);
	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
}


@@ -2518,18 +2475,18 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
			 * Use hugetlb_lock to manage the account of
			 * hugetlb cgroup.
			 */
			spin_lock(&hugetlb_lock);
			spin_lock_irq(&hugetlb_lock);
			list_add(&page->lru, &h->hugepage_activelist);
			hugetlb_cgroup_commit_charge(idx,
				pages_per_huge_page(hstate_vma(vma)),
				h_cg, page);
			spin_unlock(&hugetlb_lock);
			spin_unlock_irq(&hugetlb_lock);
			goto out;
		}
		goto out_uncharge_cgroup;
	}

	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
	/*
	 * glb_chg is passed to indicate whether or not a page must be taken
	 * from the global free pool (global change).  gbl_chg == 0 indicates
@@ -2537,11 +2494,11 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
	 */
	page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
	if (!page) {
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		page = alloc_buddy_huge_page_with_mpol(h, vma, addr);
		if (!page)
			goto out_uncharge_cgroup;
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
			SetPagePrivate(page);
			h->resv_huge_pages--;
@@ -2550,7 +2507,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
		/* Fall through */
	}
	hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);
out:
	set_page_private(page, (unsigned long)spool);

@@ -2801,9 +2758,9 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
	}

out:
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);
	update_and_free_pages_bulk(h, &page_list);
	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
}
#else
static inline void try_to_free_low(struct hstate *h, unsigned long count,
@@ -2868,7 +2825,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
		return h->max_huge_pages;
	}

	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);

	/*
	 * Check for a node specific request.
@@ -2912,14 +2869,14 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
		 * page, free_huge_page will handle it by freeing the page
		 * and reducing the surplus.
		 */
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);

		/* yield cpu to avoid soft lockup */
		cond_resched();

		ret = alloc_pool_huge_page(h, nodes_allowed,
						node_alloc_noretry);
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		if (!ret)
			goto out;

@@ -2958,9 +2915,9 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
		list_add(&page->lru, &page_list);
	}
	/* free the pages after dropping lock */
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);
	update_and_free_pages_bulk(h, &page_list);
	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);

	while (count < persistent_huge_pages(h)) {
		if (!adjust_pool_surplus(h, nodes_allowed, 1))
@@ -2968,7 +2925,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
	}
out:
	ret = persistent_huge_pages(h);
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);

	NODEMASK_FREE(node_alloc_noretry);

@@ -3135,9 +3092,9 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
	if (err)
		return err;

	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
	h->nr_overcommit_huge_pages = input;
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);

	return count;
}
@@ -3448,7 +3405,7 @@ int alloc_hugepage_from_hugetlb(struct dhugetlb_pool *hpool,
		return -ENOMEM;

	spin_lock(&hpool->lock);
	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
	if (h->free_huge_pages_node[nid] < size) {
		ret = -ENOMEM;
		goto out_unlock;
@@ -3470,7 +3427,7 @@ int alloc_hugepage_from_hugetlb(struct dhugetlb_pool *hpool,
	}
	ret = 0;
out_unlock:
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hpool->lock);
	return ret;
}
@@ -3835,7 +3792,7 @@ static void free_back_hugetlb(struct dhugetlb_pool *hpool)
	if (!h)
		return;

	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
	list_for_each_entry_safe(page, page_next,
				 &hpool->dhugetlb_1G_freelists, lru) {
		nr_pages = 1 << huge_page_order(h);
@@ -3862,7 +3819,7 @@ static void free_back_hugetlb(struct dhugetlb_pool *hpool)
	hpool->free_reserved_1G = 0;
	hpool->total_reserved_1G = 0;
	INIT_LIST_HEAD(&hpool->dhugetlb_1G_freelists);
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);
}

bool free_dhugetlb_pool(struct dhugetlb_pool *hpool)
@@ -4632,9 +4589,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
		goto out;

	if (write) {
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		h->nr_overcommit_huge_pages = tmp;
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
	}
out:
	return ret;
@@ -4731,7 +4688,7 @@ static int hugetlb_acct_memory(struct hstate *h, long delta,
	if (dhugetlb_enabled && hpool)
		return dhugetlb_acct_memory(h, delta, hpool);

	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
	/*
	 * When cpuset is configured, it breaks the strict hugetlb page
	 * reservation as the accounting is done on a global variable. Such
@@ -4764,7 +4721,7 @@ static int hugetlb_acct_memory(struct hstate *h, long delta,
		return_unused_surplus_pages(h, (unsigned long) -delta);

out:
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);
	return ret;
}

@@ -6671,7 +6628,7 @@ bool isolate_huge_page(struct page *page, struct list_head *list)
{
	bool ret = true;

	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
	if (!PageHeadHuge(page) || !page_huge_active(page) ||
	    !get_page_unless_zero(page)) {
		ret = false;
@@ -6680,17 +6637,17 @@ bool isolate_huge_page(struct page *page, struct list_head *list)
	clear_page_huge_active(page);
	list_move_tail(&page->lru, list);
unlock:
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);
	return ret;
}

void putback_active_hugepage(struct page *page)
{
	VM_BUG_ON_PAGE(!PageHead(page), page);
	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
	set_page_huge_active(page);
	list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);
	put_page(page);
}

@@ -6718,12 +6675,12 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
		SetPageHugeTemporary(oldpage);
		ClearPageHugeTemporary(newpage);

		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		if (h->surplus_huge_pages_node[old_nid]) {
			h->surplus_huge_pages_node[old_nid]--;
			h->surplus_huge_pages_node[new_nid]++;
		}
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
	}
}

@@ -6739,10 +6696,10 @@ static struct page *hugetlb_alloc_hugepage_normal(struct hstate *h,
{
	struct page *page = NULL;

	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
	if (h->free_huge_pages - h->resv_huge_pages > 0)
		page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL, NULL);
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);

	return page;
}
+4 −4
Original line number Diff line number Diff line
@@ -167,11 +167,11 @@ static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)

	do {
		for_each_hstate(h) {
			spin_lock(&hugetlb_lock);
			spin_lock_irq(&hugetlb_lock);
			list_for_each_entry(page, &h->hugepage_activelist, lru)
				hugetlb_cgroup_move_parent(idx, h_cg, page);

			spin_unlock(&hugetlb_lock);
			spin_unlock_irq(&hugetlb_lock);
			idx++;
		}
		cond_resched();
@@ -422,14 +422,14 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
		return;

	VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage);
	spin_lock(&hugetlb_lock);
	spin_lock_irq(&hugetlb_lock);
	h_cg = hugetlb_cgroup_from_page(oldhpage);
	set_hugetlb_cgroup(oldhpage, NULL);

	/* move the h_cg details to new cgroup */
	set_hugetlb_cgroup(newhpage, h_cg);
	list_move(&newhpage->lru, &h->hugepage_activelist);
	spin_unlock(&hugetlb_lock);
	spin_unlock_irq(&hugetlb_lock);
	return;
}