Revert "hugetlb: make free_huge_page irq safe" (6fb7b268) · Commits · EulixOS / Software / Kernel

fs/hugetlbfs/inode.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -131,7 +131,7 @@ static int hugetlb_checknode(struct vm_area_struct *vma, long nr)
		int ret = 0;
		struct hstate *h = &default_hstate;

		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);

		nid = vma->vm_flags >> CHECKNODE_BITS;

		@@ -155,7 +155,7 @@ static int hugetlb_checknode(struct vm_area_struct *vma, long nr)
		}

		err:
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		return ret;
		}

mm/hugetlb.c

+108 −65

Original line number	Diff line number	Diff line
		@@ -105,12 +105,11 @@ static inline void ClearPageHugeFreed(struct page *head)
		static int hugetlb_acct_memory(struct hstate *h, long delta,
		struct dhugetlb_pool *hpool);

		static inline void unlock_or_release_subpool(struct hugepage_subpool *spool,
		unsigned long irq_flags)
		static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
		{
		bool free = (spool->count == 0) && (spool->used_hpages == 0);

		spin_unlock_irqrestore(&spool->lock, irq_flags);
		spin_unlock(&spool->lock);

		/* If no pages are used, and no other handles to the subpool
		* remain, give up any reservations mased on minimum size and
		@@ -149,12 +148,10 @@ struct hugepage_subpool hugepage_new_subpool(struct hstate h, long max_hpages,

		void hugepage_put_subpool(struct hugepage_subpool *spool)
		{
		unsigned long flags;

		spin_lock_irqsave(&spool->lock, flags);
		spin_lock(&spool->lock);
		BUG_ON(!spool->count);
		spool->count--;
		unlock_or_release_subpool(spool, flags);
		unlock_or_release_subpool(spool);
		}

		/*
		@@ -177,7 +174,7 @@ static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
		if (dhugetlb_enabled && hpool)
		return ret;

		spin_lock_irq(&spool->lock);
		spin_lock(&spool->lock);

		if (spool->max_hpages != -1) { /* maximum size accounting */
		if ((spool->used_hpages + delta) <= spool->max_hpages)
		@@ -204,7 +201,7 @@ static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
		}

		unlock_ret:
		spin_unlock_irq(&spool->lock);
		spin_unlock(&spool->lock);
		return ret;
		}

		@@ -218,7 +215,6 @@ static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
		long delta, struct dhugetlb_pool *hpool)
		{
		long ret = delta;
		unsigned long flags;

		if (!spool)
		return delta;
		@@ -227,7 +223,7 @@ static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
		if (dhugetlb_enabled && hpool)
		return ret;

		spin_lock_irqsave(&spool->lock, flags);
		spin_lock(&spool->lock);

		if (spool->max_hpages != -1) /* maximum size accounting */
		spool->used_hpages -= delta;
		@@ -248,7 +244,7 @@ static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
		* If hugetlbfs_put_super couldn't free spool due to an outstanding
		* quota reference, free it now.
		*/
		unlock_or_release_subpool(spool, flags);
		unlock_or_release_subpool(spool);

		return ret;
		}
		@@ -1391,7 +1387,7 @@ void free_huge_page_to_dhugetlb_pool(struct page *page, bool restore_reserve)
		}
		#endif

		void free_huge_page(struct page *page)
		static void __free_huge_page(struct page *page)
		{
		/*
		* Can't pass hstate in here because it is called from the
		@@ -1402,7 +1398,6 @@ void free_huge_page(struct page *page)
		struct hugepage_subpool *spool =
		(struct hugepage_subpool *)page_private(page);
		bool restore_reserve;
		unsigned long flags;

		sp_kmemcg_uncharge_hpage(page);
		set_page_private(page, 0);
		@@ -1413,12 +1408,12 @@ void free_huge_page(struct page *page)
		ClearPagePrivate(page);

		if (dhugetlb_enabled && PagePool(page)) {
		spin_lock_irqsave(&hugetlb_lock, flags);
		spin_lock(&hugetlb_lock);
		clear_page_huge_active(page);
		list_del(&page->lru);
		hugetlb_cgroup_uncharge_page(hstate_index(h),
		pages_per_huge_page(h), page);
		spin_unlock_irqrestore(&hugetlb_lock, flags);
		spin_unlock(&hugetlb_lock);
		free_huge_page_to_dhugetlb_pool(page, restore_reserve);
		return;
		}
		@@ -1442,7 +1437,7 @@ void free_huge_page(struct page *page)
		restore_reserve = true;
		}

		spin_lock_irqsave(&hugetlb_lock, flags);
		spin_lock(&hugetlb_lock);
		clear_page_huge_active(page);
		hugetlb_cgroup_uncharge_page(hstate_index(h),
		pages_per_huge_page(h), page);
		@@ -1464,19 +1459,67 @@ void free_huge_page(struct page *page)
		arch_clear_hugepage_flags(page);
		enqueue_huge_page(h, page);
		}
		spin_unlock_irqrestore(&hugetlb_lock, flags);
		spin_unlock(&hugetlb_lock);
		}

		/*
		* As free_huge_page() can be called from a non-task context, we have
		* to defer the actual freeing in a workqueue to prevent potential
		* hugetlb_lock deadlock.
		*
		* free_hpage_workfn() locklessly retrieves the linked list of pages to
		* be freed and frees them one-by-one. As the page->mapping pointer is
		* going to be cleared in __free_huge_page() anyway, it is reused as the
		* llist_node structure of a lockless linked list of huge pages to be freed.
		*/
		static LLIST_HEAD(hpage_freelist);

		static void free_hpage_workfn(struct work_struct *work)
		{
		struct llist_node *node;
		struct page *page;

		node = llist_del_all(&hpage_freelist);

		while (node) {
		page = container_of((struct address_space **)node,
		struct page, mapping);
		node = node->next;
		__free_huge_page(page);
		}
		}
		static DECLARE_WORK(free_hpage_work, free_hpage_workfn);

		void free_huge_page(struct page *page)
		{
		/*
		* Defer freeing if in non-task context to avoid hugetlb_lock deadlock.
		*/
		if (!in_task()) {
		/*
		* Only call schedule_work() if hpage_freelist is previously
		* empty. Otherwise, schedule_work() had been called but the
		* workfn hasn't retrieved the list yet.
		*/
		if (llist_add((struct llist_node *)&page->mapping,
		&hpage_freelist))
		schedule_work(&free_hpage_work);
		return;
		}

		__free_huge_page(page);
		}

		static void prep_new_huge_page(struct hstate h, struct page page, int nid)
		{
		INIT_LIST_HEAD(&page->lru);
		set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		set_hugetlb_cgroup(page, NULL);
		h->nr_huge_pages++;
		h->nr_huge_pages_node[nid]++;
		ClearPageHugeFreed(page);
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		}

		static void prep_compound_gigantic_page(struct page *page, unsigned int order)
		@@ -1778,7 +1821,7 @@ int dissolve_free_huge_page(struct page *page)
		if (page_belong_to_dynamic_hugetlb(page))
		return -EBUSY;

		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		if (!PageHuge(page)) {
		rc = 0;
		goto out;
		@@ -1796,7 +1839,7 @@ int dissolve_free_huge_page(struct page *page)
		* when it is dissolved.
		*/
		if (unlikely(!PageHugeFreed(head))) {
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		cond_resched();

		/*
		@@ -1826,7 +1869,7 @@ int dissolve_free_huge_page(struct page *page)
		rc = 0;
		}
		out:
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		return rc;
		}

		@@ -1868,16 +1911,16 @@ static struct page alloc_surplus_huge_page(struct hstate h, gfp_t gfp_mask,
		if (hstate_is_gigantic(h))
		return NULL;

		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages)
		goto out_unlock;
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);

		page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
		if (!page)
		return NULL;

		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		/*
		* We could have raced with the pool size change.
		* Double check that and simply deallocate the new page
		@@ -1887,7 +1930,7 @@ static struct page alloc_surplus_huge_page(struct hstate h, gfp_t gfp_mask,
		*/
		if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
		SetPageHugeTemporary(page);
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		put_page(page);
		return NULL;
		} else {
		@@ -1896,7 +1939,7 @@ static struct page alloc_surplus_huge_page(struct hstate h, gfp_t gfp_mask,
		}

		out_unlock:
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);

		return page;
		}
		@@ -1951,10 +1994,10 @@ struct page alloc_huge_page_node(struct hstate h, int nid)
		if (nid != NUMA_NO_NODE)
		gfp_mask \|= __GFP_THISNODE;

		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		if (h->free_huge_pages - h->resv_huge_pages > 0)
		page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL, NULL);
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);

		if (!page) {
		if (enable_charge_mighp)
		@@ -1972,18 +2015,18 @@ struct page alloc_huge_page_nodemask(struct hstate h, int preferred_nid,
		{
		gfp_t gfp_mask = htlb_alloc_mask(h);

		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		if (h->free_huge_pages - h->resv_huge_pages > 0) {
		struct page *page;

		page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid,
		nmask, NULL);
		if (page) {
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		return page;
		}
		}
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);

		return alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask);
		}
		@@ -2030,7 +2073,7 @@ static int gather_surplus_pages(struct hstate *h, long delta)

		ret = -ENOMEM;
		retry:
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		for (i = 0; i < needed; i++) {
		page = alloc_surplus_huge_page(h, htlb_alloc_mask(h),
		NUMA_NO_NODE, NULL);
		@@ -2047,7 +2090,7 @@ static int gather_surplus_pages(struct hstate *h, long delta)
		* After retaking hugetlb_lock, we need to recalculate 'needed'
		* because either resv_huge_pages or free_huge_pages may have changed.
		*/
		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		needed = (h->resv_huge_pages + delta) -
		(h->free_huge_pages + allocated);
		if (needed > 0) {
		@@ -2085,12 +2128,12 @@ static int gather_surplus_pages(struct hstate *h, long delta)
		enqueue_huge_page(h, page);
		}
		free:
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);

		/* Free unnecessary surplus pages to the buddy allocator */
		list_for_each_entry_safe(page, tmp, &surplus_list, lru)
		put_page(page);
		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);

		return ret;
		}
		@@ -2446,18 +2489,18 @@ struct page alloc_huge_page(struct vm_area_struct vma,
		* Use hugetlb_lock to manage the account of
		* hugetlb cgroup.
		*/
		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		list_add(&page->lru, &h->hugepage_activelist);
		hugetlb_cgroup_commit_charge(idx,
		pages_per_huge_page(hstate_vma(vma)),
		h_cg, page);
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		goto out;
		}
		goto out_uncharge_cgroup;
		}

		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		/*
		* glb_chg is passed to indicate whether or not a page must be taken
		* from the global free pool (global change). gbl_chg == 0 indicates
		@@ -2465,11 +2508,11 @@ struct page alloc_huge_page(struct vm_area_struct vma,
		*/
		page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
		if (!page) {
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		page = alloc_buddy_huge_page_with_mpol(h, vma, addr);
		if (!page)
		goto out_uncharge_cgroup;
		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
		SetPagePrivate(page);
		h->resv_huge_pages--;
		@@ -2478,7 +2521,7 @@ struct page alloc_huge_page(struct vm_area_struct vma,
		/* Fall through */
		}
		hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		out:
		set_page_private(page, (unsigned long)spool);

		@@ -2787,7 +2830,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
		return h->max_huge_pages;
		}

		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);

		/*
		* Check for a node specific request.
		@@ -2831,14 +2874,14 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
		* page, free_huge_page will handle it by freeing the page
		* and reducing the surplus.
		*/
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);

		/* yield cpu to avoid soft lockup */
		cond_resched();

		ret = alloc_pool_huge_page(h, nodes_allowed,
		node_alloc_noretry);
		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		if (!ret)
		goto out;

		@@ -2876,7 +2919,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
		}
		out:
		ret = persistent_huge_pages(h);
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);

		NODEMASK_FREE(node_alloc_noretry);

		@@ -3043,9 +3086,9 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
		if (err)
		return err;

		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		h->nr_overcommit_huge_pages = input;
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);

		return count;
		}
		@@ -3356,7 +3399,7 @@ int alloc_hugepage_from_hugetlb(struct dhugetlb_pool *hpool,
		return -ENOMEM;

		spin_lock(&hpool->lock);
		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		if (h->free_huge_pages_node[nid] < size) {
		ret = -ENOMEM;
		goto out_unlock;
		@@ -3378,7 +3421,7 @@ int alloc_hugepage_from_hugetlb(struct dhugetlb_pool *hpool,
		}
		ret = 0;
		out_unlock:
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		spin_unlock(&hpool->lock);
		return ret;
		}
		@@ -3743,7 +3786,7 @@ static void free_back_hugetlb(struct dhugetlb_pool *hpool)
		if (!h)
		return;

		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		list_for_each_entry_safe(page, page_next,
		&hpool->dhugetlb_1G_freelists, lru) {
		nr_pages = 1 << huge_page_order(h);
		@@ -3770,7 +3813,7 @@ static void free_back_hugetlb(struct dhugetlb_pool *hpool)
		hpool->free_reserved_1G = 0;
		hpool->total_reserved_1G = 0;
		INIT_LIST_HEAD(&hpool->dhugetlb_1G_freelists);
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		}

		bool free_dhugetlb_pool(struct dhugetlb_pool *hpool)
		@@ -4540,9 +4583,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
		goto out;

		if (write) {
		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		h->nr_overcommit_huge_pages = tmp;
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		}
		out:
		return ret;
		@@ -4639,7 +4682,7 @@ static int hugetlb_acct_memory(struct hstate *h, long delta,
		if (dhugetlb_enabled && hpool)
		return dhugetlb_acct_memory(h, delta, hpool);

		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		/*
		* When cpuset is configured, it breaks the strict hugetlb page
		* reservation as the accounting is done on a global variable. Such
		@@ -4672,7 +4715,7 @@ static int hugetlb_acct_memory(struct hstate *h, long delta,
		return_unused_surplus_pages(h, (unsigned long) -delta);

		out:
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		return ret;
		}

		@@ -6579,7 +6622,7 @@ bool isolate_huge_page(struct page page, struct list_head list)
		{
		bool ret = true;

		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		if (!PageHeadHuge(page) \|\| !page_huge_active(page) \|\|
		!get_page_unless_zero(page)) {
		ret = false;
		@@ -6588,17 +6631,17 @@ bool isolate_huge_page(struct page page, struct list_head list)
		clear_page_huge_active(page);
		list_move_tail(&page->lru, list);
		unlock:
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		return ret;
		}

		void putback_active_hugepage(struct page *page)
		{
		VM_BUG_ON_PAGE(!PageHead(page), page);
		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		set_page_huge_active(page);
		list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		put_page(page);
		}

		@@ -6626,12 +6669,12 @@ void move_hugetlb_state(struct page oldpage, struct page newpage, int reason)
		SetPageHugeTemporary(oldpage);
		ClearPageHugeTemporary(newpage);

		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		if (h->surplus_huge_pages_node[old_nid]) {
		h->surplus_huge_pages_node[old_nid]--;
		h->surplus_huge_pages_node[new_nid]++;
		}
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		}
		}

		@@ -6647,10 +6690,10 @@ static struct page hugetlb_alloc_hugepage_normal(struct hstate h,
		{
		struct page *page = NULL;

		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		if (h->free_huge_pages - h->resv_huge_pages > 0)
		page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL, NULL);
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);

		return page;
		}

mm/hugetlb_cgroup.c

+4 −4

Original line number	Diff line number	Diff line
		@@ -167,11 +167,11 @@ static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)

		do {
		for_each_hstate(h) {
		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		list_for_each_entry(page, &h->hugepage_activelist, lru)
		hugetlb_cgroup_move_parent(idx, h_cg, page);

		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		idx++;
		}
		cond_resched();
		@@ -422,14 +422,14 @@ void hugetlb_cgroup_migrate(struct page oldhpage, struct page newhpage)
		return;

		VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage);
		spin_lock_irq(&hugetlb_lock);
		spin_lock(&hugetlb_lock);
		h_cg = hugetlb_cgroup_from_page(oldhpage);
		set_hugetlb_cgroup(oldhpage, NULL);

		/* move the h_cg details to new cgroup */
		set_hugetlb_cgroup(newhpage, h_cg);
		list_move(&newhpage->lru, &h->hugepage_activelist);
		spin_unlock_irq(&hugetlb_lock);
		spin_unlock(&hugetlb_lock);
		return;
		}