Commit 6fb7b268 authored by Jinjiang Tu's avatar Jinjiang Tu
Browse files

Revert "hugetlb: make free_huge_page irq safe"

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I9SZXR



--------------------------------

This reverts commit 7f5b0216.

The prior patches are not merged, this commit will leads to do heavy
things and even schedule with irq disabled. So revert it first.

Signed-off-by: default avatarJinjiang Tu <tujinjiang@huawei.com>
parent ddf26a67
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -131,7 +131,7 @@ static int hugetlb_checknode(struct vm_area_struct *vma, long nr)
	int ret = 0;
	struct hstate *h = &default_hstate;

	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);

	nid = vma->vm_flags >> CHECKNODE_BITS;

@@ -155,7 +155,7 @@ static int hugetlb_checknode(struct vm_area_struct *vma, long nr)
	}

err:
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);
	return ret;
}

+108 −65
Original line number Diff line number Diff line
@@ -105,12 +105,11 @@ static inline void ClearPageHugeFreed(struct page *head)
static int hugetlb_acct_memory(struct hstate *h, long delta,
			       struct dhugetlb_pool *hpool);

static inline void unlock_or_release_subpool(struct hugepage_subpool *spool,
						unsigned long irq_flags)
static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
{
	bool free = (spool->count == 0) && (spool->used_hpages == 0);

	spin_unlock_irqrestore(&spool->lock, irq_flags);
	spin_unlock(&spool->lock);

	/* If no pages are used, and no other handles to the subpool
	 * remain, give up any reservations mased on minimum size and
@@ -149,12 +148,10 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages,

void hugepage_put_subpool(struct hugepage_subpool *spool)
{
	unsigned long flags;

	spin_lock_irqsave(&spool->lock, flags);
	spin_lock(&spool->lock);
	BUG_ON(!spool->count);
	spool->count--;
	unlock_or_release_subpool(spool, flags);
	unlock_or_release_subpool(spool);
}

/*
@@ -177,7 +174,7 @@ static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
	if (dhugetlb_enabled && hpool)
		return ret;

	spin_lock_irq(&spool->lock);
	spin_lock(&spool->lock);

	if (spool->max_hpages != -1) {		/* maximum size accounting */
		if ((spool->used_hpages + delta) <= spool->max_hpages)
@@ -204,7 +201,7 @@ static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
	}

unlock_ret:
	spin_unlock_irq(&spool->lock);
	spin_unlock(&spool->lock);
	return ret;
}

@@ -218,7 +215,6 @@ static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
				       long delta, struct dhugetlb_pool *hpool)
{
	long ret = delta;
	unsigned long flags;

	if (!spool)
		return delta;
@@ -227,7 +223,7 @@ static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
	if (dhugetlb_enabled && hpool)
		return ret;

	spin_lock_irqsave(&spool->lock, flags);
	spin_lock(&spool->lock);

	if (spool->max_hpages != -1)		/* maximum size accounting */
		spool->used_hpages -= delta;
@@ -248,7 +244,7 @@ static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
	 * If hugetlbfs_put_super couldn't free spool due to an outstanding
	 * quota reference, free it now.
	 */
	unlock_or_release_subpool(spool, flags);
	unlock_or_release_subpool(spool);

	return ret;
}
@@ -1391,7 +1387,7 @@ void free_huge_page_to_dhugetlb_pool(struct page *page, bool restore_reserve)
}
#endif

void free_huge_page(struct page *page)
static void __free_huge_page(struct page *page)
{
	/*
	 * Can't pass hstate in here because it is called from the
@@ -1402,7 +1398,6 @@ void free_huge_page(struct page *page)
	struct hugepage_subpool *spool =
		(struct hugepage_subpool *)page_private(page);
	bool restore_reserve;
	unsigned long flags;

	sp_kmemcg_uncharge_hpage(page);
	set_page_private(page, 0);
@@ -1413,12 +1408,12 @@ void free_huge_page(struct page *page)
	ClearPagePrivate(page);

	if (dhugetlb_enabled && PagePool(page)) {
		spin_lock_irqsave(&hugetlb_lock, flags);
		spin_lock(&hugetlb_lock);
		clear_page_huge_active(page);
		list_del(&page->lru);
		hugetlb_cgroup_uncharge_page(hstate_index(h),
					     pages_per_huge_page(h), page);
		spin_unlock_irqrestore(&hugetlb_lock, flags);
		spin_unlock(&hugetlb_lock);
		free_huge_page_to_dhugetlb_pool(page, restore_reserve);
		return;
	}
@@ -1442,7 +1437,7 @@ void free_huge_page(struct page *page)
			restore_reserve = true;
	}

	spin_lock_irqsave(&hugetlb_lock, flags);
	spin_lock(&hugetlb_lock);
	clear_page_huge_active(page);
	hugetlb_cgroup_uncharge_page(hstate_index(h),
				     pages_per_huge_page(h), page);
@@ -1464,19 +1459,67 @@ void free_huge_page(struct page *page)
		arch_clear_hugepage_flags(page);
		enqueue_huge_page(h, page);
	}
	spin_unlock_irqrestore(&hugetlb_lock, flags);
	spin_unlock(&hugetlb_lock);
}

/*
 * As free_huge_page() can be called from a non-task context, we have
 * to defer the actual freeing in a workqueue to prevent potential
 * hugetlb_lock deadlock.
 *
 * free_hpage_workfn() locklessly retrieves the linked list of pages to
 * be freed and frees them one-by-one. As the page->mapping pointer is
 * going to be cleared in __free_huge_page() anyway, it is reused as the
 * llist_node structure of a lockless linked list of huge pages to be freed.
 */
static LLIST_HEAD(hpage_freelist);

static void free_hpage_workfn(struct work_struct *work)
{
	struct llist_node *node;
	struct page *page;

	node = llist_del_all(&hpage_freelist);

	while (node) {
		page = container_of((struct address_space **)node,
				     struct page, mapping);
		node = node->next;
		__free_huge_page(page);
	}
}
static DECLARE_WORK(free_hpage_work, free_hpage_workfn);

void free_huge_page(struct page *page)
{
	/*
	 * Defer freeing if in non-task context to avoid hugetlb_lock deadlock.
	 */
	if (!in_task()) {
		/*
		 * Only call schedule_work() if hpage_freelist is previously
		 * empty. Otherwise, schedule_work() had been called but the
		 * workfn hasn't retrieved the list yet.
		 */
		if (llist_add((struct llist_node *)&page->mapping,
			      &hpage_freelist))
			schedule_work(&free_hpage_work);
		return;
	}

	__free_huge_page(page);
}

static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
{
	INIT_LIST_HEAD(&page->lru);
	set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);
	set_hugetlb_cgroup(page, NULL);
	h->nr_huge_pages++;
	h->nr_huge_pages_node[nid]++;
	ClearPageHugeFreed(page);
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);
}

static void prep_compound_gigantic_page(struct page *page, unsigned int order)
@@ -1778,7 +1821,7 @@ int dissolve_free_huge_page(struct page *page)
	if (page_belong_to_dynamic_hugetlb(page))
		return -EBUSY;

	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);
	if (!PageHuge(page)) {
		rc = 0;
		goto out;
@@ -1796,7 +1839,7 @@ int dissolve_free_huge_page(struct page *page)
		 * when it is dissolved.
		 */
		if (unlikely(!PageHugeFreed(head))) {
			spin_unlock_irq(&hugetlb_lock);
			spin_unlock(&hugetlb_lock);
			cond_resched();

			/*
@@ -1826,7 +1869,7 @@ int dissolve_free_huge_page(struct page *page)
		rc = 0;
	}
out:
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);
	return rc;
}

@@ -1868,16 +1911,16 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
	if (hstate_is_gigantic(h))
		return NULL;

	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);
	if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages)
		goto out_unlock;
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);

	page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
	if (!page)
		return NULL;

	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);
	/*
	 * We could have raced with the pool size change.
	 * Double check that and simply deallocate the new page
@@ -1887,7 +1930,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
	 */
	if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
		SetPageHugeTemporary(page);
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		put_page(page);
		return NULL;
	} else {
@@ -1896,7 +1939,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
	}

out_unlock:
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);

	return page;
}
@@ -1951,10 +1994,10 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
	if (nid != NUMA_NO_NODE)
		gfp_mask |= __GFP_THISNODE;

	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);
	if (h->free_huge_pages - h->resv_huge_pages > 0)
		page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL, NULL);
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);

	if (!page) {
		if (enable_charge_mighp)
@@ -1972,18 +2015,18 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
{
	gfp_t gfp_mask = htlb_alloc_mask(h);

	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);
	if (h->free_huge_pages - h->resv_huge_pages > 0) {
		struct page *page;

		page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid,
						  nmask, NULL);
		if (page) {
			spin_unlock_irq(&hugetlb_lock);
			spin_unlock(&hugetlb_lock);
			return page;
		}
	}
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);

	return alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask);
}
@@ -2030,7 +2073,7 @@ static int gather_surplus_pages(struct hstate *h, long delta)

	ret = -ENOMEM;
retry:
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);
	for (i = 0; i < needed; i++) {
		page = alloc_surplus_huge_page(h, htlb_alloc_mask(h),
				NUMA_NO_NODE, NULL);
@@ -2047,7 +2090,7 @@ static int gather_surplus_pages(struct hstate *h, long delta)
	 * After retaking hugetlb_lock, we need to recalculate 'needed'
	 * because either resv_huge_pages or free_huge_pages may have changed.
	 */
	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);
	needed = (h->resv_huge_pages + delta) -
			(h->free_huge_pages + allocated);
	if (needed > 0) {
@@ -2085,12 +2128,12 @@ static int gather_surplus_pages(struct hstate *h, long delta)
		enqueue_huge_page(h, page);
	}
free:
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);

	/* Free unnecessary surplus pages to the buddy allocator */
	list_for_each_entry_safe(page, tmp, &surplus_list, lru)
		put_page(page);
	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);

	return ret;
}
@@ -2446,18 +2489,18 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
			 * Use hugetlb_lock to manage the account of
			 * hugetlb cgroup.
			 */
			spin_lock_irq(&hugetlb_lock);
			spin_lock(&hugetlb_lock);
			list_add(&page->lru, &h->hugepage_activelist);
			hugetlb_cgroup_commit_charge(idx,
				pages_per_huge_page(hstate_vma(vma)),
				h_cg, page);
			spin_unlock_irq(&hugetlb_lock);
			spin_unlock(&hugetlb_lock);
			goto out;
		}
		goto out_uncharge_cgroup;
	}

	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);
	/*
	 * glb_chg is passed to indicate whether or not a page must be taken
	 * from the global free pool (global change).  gbl_chg == 0 indicates
@@ -2465,11 +2508,11 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
	 */
	page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
	if (!page) {
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		page = alloc_buddy_huge_page_with_mpol(h, vma, addr);
		if (!page)
			goto out_uncharge_cgroup;
		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
			SetPagePrivate(page);
			h->resv_huge_pages--;
@@ -2478,7 +2521,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
		/* Fall through */
	}
	hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);
out:
	set_page_private(page, (unsigned long)spool);

@@ -2787,7 +2830,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
		return h->max_huge_pages;
	}

	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);

	/*
	 * Check for a node specific request.
@@ -2831,14 +2874,14 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
		 * page, free_huge_page will handle it by freeing the page
		 * and reducing the surplus.
		 */
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);

		/* yield cpu to avoid soft lockup */
		cond_resched();

		ret = alloc_pool_huge_page(h, nodes_allowed,
						node_alloc_noretry);
		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		if (!ret)
			goto out;

@@ -2876,7 +2919,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
	}
out:
	ret = persistent_huge_pages(h);
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);

	NODEMASK_FREE(node_alloc_noretry);

@@ -3043,9 +3086,9 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
	if (err)
		return err;

	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);
	h->nr_overcommit_huge_pages = input;
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);

	return count;
}
@@ -3356,7 +3399,7 @@ int alloc_hugepage_from_hugetlb(struct dhugetlb_pool *hpool,
		return -ENOMEM;

	spin_lock(&hpool->lock);
	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);
	if (h->free_huge_pages_node[nid] < size) {
		ret = -ENOMEM;
		goto out_unlock;
@@ -3378,7 +3421,7 @@ int alloc_hugepage_from_hugetlb(struct dhugetlb_pool *hpool,
	}
	ret = 0;
out_unlock:
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);
	spin_unlock(&hpool->lock);
	return ret;
}
@@ -3743,7 +3786,7 @@ static void free_back_hugetlb(struct dhugetlb_pool *hpool)
	if (!h)
		return;

	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);
	list_for_each_entry_safe(page, page_next,
				 &hpool->dhugetlb_1G_freelists, lru) {
		nr_pages = 1 << huge_page_order(h);
@@ -3770,7 +3813,7 @@ static void free_back_hugetlb(struct dhugetlb_pool *hpool)
	hpool->free_reserved_1G = 0;
	hpool->total_reserved_1G = 0;
	INIT_LIST_HEAD(&hpool->dhugetlb_1G_freelists);
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);
}

bool free_dhugetlb_pool(struct dhugetlb_pool *hpool)
@@ -4540,9 +4583,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
		goto out;

	if (write) {
		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		h->nr_overcommit_huge_pages = tmp;
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
	}
out:
	return ret;
@@ -4639,7 +4682,7 @@ static int hugetlb_acct_memory(struct hstate *h, long delta,
	if (dhugetlb_enabled && hpool)
		return dhugetlb_acct_memory(h, delta, hpool);

	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);
	/*
	 * When cpuset is configured, it breaks the strict hugetlb page
	 * reservation as the accounting is done on a global variable. Such
@@ -4672,7 +4715,7 @@ static int hugetlb_acct_memory(struct hstate *h, long delta,
		return_unused_surplus_pages(h, (unsigned long) -delta);

out:
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);
	return ret;
}

@@ -6579,7 +6622,7 @@ bool isolate_huge_page(struct page *page, struct list_head *list)
{
	bool ret = true;

	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);
	if (!PageHeadHuge(page) || !page_huge_active(page) ||
	    !get_page_unless_zero(page)) {
		ret = false;
@@ -6588,17 +6631,17 @@ bool isolate_huge_page(struct page *page, struct list_head *list)
	clear_page_huge_active(page);
	list_move_tail(&page->lru, list);
unlock:
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);
	return ret;
}

void putback_active_hugepage(struct page *page)
{
	VM_BUG_ON_PAGE(!PageHead(page), page);
	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);
	set_page_huge_active(page);
	list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);
	put_page(page);
}

@@ -6626,12 +6669,12 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
		SetPageHugeTemporary(oldpage);
		ClearPageHugeTemporary(newpage);

		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		if (h->surplus_huge_pages_node[old_nid]) {
			h->surplus_huge_pages_node[old_nid]--;
			h->surplus_huge_pages_node[new_nid]++;
		}
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
	}
}

@@ -6647,10 +6690,10 @@ static struct page *hugetlb_alloc_hugepage_normal(struct hstate *h,
{
	struct page *page = NULL;

	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);
	if (h->free_huge_pages - h->resv_huge_pages > 0)
		page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL, NULL);
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);

	return page;
}
+4 −4
Original line number Diff line number Diff line
@@ -167,11 +167,11 @@ static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)

	do {
		for_each_hstate(h) {
			spin_lock_irq(&hugetlb_lock);
			spin_lock(&hugetlb_lock);
			list_for_each_entry(page, &h->hugepage_activelist, lru)
				hugetlb_cgroup_move_parent(idx, h_cg, page);

			spin_unlock_irq(&hugetlb_lock);
			spin_unlock(&hugetlb_lock);
			idx++;
		}
		cond_resched();
@@ -422,14 +422,14 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
		return;

	VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage);
	spin_lock_irq(&hugetlb_lock);
	spin_lock(&hugetlb_lock);
	h_cg = hugetlb_cgroup_from_page(oldhpage);
	set_hugetlb_cgroup(oldhpage, NULL);

	/* move the h_cg details to new cgroup */
	set_hugetlb_cgroup(newhpage, h_cg);
	list_move(&newhpage->lru, &h->hugepage_activelist);
	spin_unlock_irq(&hugetlb_lock);
	spin_unlock(&hugetlb_lock);
	return;
}