hugetlb: make free_huge_page irq safe (db71ef79) · Commits · EulixOS / Software / Kernel

mm/hugetlb.c

+63 −106

Original line number	Diff line number	Diff line
		@@ -94,9 +94,10 @@ static inline bool subpool_is_free(struct hugepage_subpool *spool)
		return true;
		}

		static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
		static inline void unlock_or_release_subpool(struct hugepage_subpool *spool,
		unsigned long irq_flags)
		{
		spin_unlock(&spool->lock);
		spin_unlock_irqrestore(&spool->lock, irq_flags);

		/* If no pages are used, and no other handles to the subpool
		* remain, give up any reservations based on minimum size and
		@@ -135,10 +136,12 @@ struct hugepage_subpool hugepage_new_subpool(struct hstate h, long max_hpages,

		void hugepage_put_subpool(struct hugepage_subpool *spool)
		{
		spin_lock(&spool->lock);
		unsigned long flags;

		spin_lock_irqsave(&spool->lock, flags);
		BUG_ON(!spool->count);
		spool->count--;
		unlock_or_release_subpool(spool);
		unlock_or_release_subpool(spool, flags);
		}

		/*
		@@ -157,7 +160,7 @@ static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
		if (!spool)
		return ret;

		spin_lock(&spool->lock);
		spin_lock_irq(&spool->lock);

		if (spool->max_hpages != -1) { /* maximum size accounting */
		if ((spool->used_hpages + delta) <= spool->max_hpages)
		@@ -184,7 +187,7 @@ static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
		}

		unlock_ret:
		spin_unlock(&spool->lock);
		spin_unlock_irq(&spool->lock);
		return ret;
		}

		@@ -198,11 +201,12 @@ static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
		long delta)
		{
		long ret = delta;
		unsigned long flags;

		if (!spool)
		return delta;

		spin_lock(&spool->lock);
		spin_lock_irqsave(&spool->lock, flags);

		if (spool->max_hpages != -1) /* maximum size accounting */
		spool->used_hpages -= delta;
		@@ -223,7 +227,7 @@ static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
		* If hugetlbfs_put_super couldn't free spool due to an outstanding
		* quota reference, free it now.
		*/
		unlock_or_release_subpool(spool);
		unlock_or_release_subpool(spool, flags);

		return ret;
		}
		@@ -1412,7 +1416,7 @@ struct hstate *size_to_hstate(unsigned long size)
		return NULL;
		}

		static void __free_huge_page(struct page *page)
		void free_huge_page(struct page *page)
		{
		/*
		* Can't pass hstate in here because it is called from the
		@@ -1422,6 +1426,7 @@ static void __free_huge_page(struct page *page)
		int nid = page_to_nid(page);
		struct hugepage_subpool *spool = hugetlb_page_subpool(page);
		bool restore_reserve;
		unsigned long flags;

		VM_BUG_ON_PAGE(page_count(page), page);
		VM_BUG_ON_PAGE(page_mapcount(page), page);
		@@ -1450,7 +1455,7 @@ static void __free_huge_page(struct page *page)
		restore_reserve = true;
		}

		spin_lock(&hugetlb_lock);
		spin_lock_irqsave(&hugetlb_lock, flags);
		ClearHPageMigratable(page);
		hugetlb_cgroup_uncharge_page(hstate_index(h),
		pages_per_huge_page(h), page);
		@@ -1461,66 +1466,18 @@ static void __free_huge_page(struct page *page)

		if (HPageTemporary(page)) {
		remove_hugetlb_page(h, page, false);
		spin_unlock(&hugetlb_lock);
		spin_unlock_irqrestore(&hugetlb_lock, flags);
		update_and_free_page(h, page);
		} else if (h->surplus_huge_pages_node[nid]) {
		/* remove the page from active list */
		remove_hugetlb_page(h, page, true);
		spin_unlock(&hugetlb_lock);
		spin_unlock_irqrestore(&hugetlb_lock, flags);
		update_and_free_page(h, page);
		} else {
		arch_clear_hugepage_flags(page);
		enqueue_huge_page(h, page);
		spin_unlock(&hugetlb_lock);
		}
		}

		/*
		* As free_huge_page() can be called from a non-task context, we have
		* to defer the actual freeing in a workqueue to prevent potential
		* hugetlb_lock deadlock.
		*
		* free_hpage_workfn() locklessly retrieves the linked list of pages to
		* be freed and frees them one-by-one. As the page->mapping pointer is
		* going to be cleared in __free_huge_page() anyway, it is reused as the
		* llist_node structure of a lockless linked list of huge pages to be freed.
		*/
		static LLIST_HEAD(hpage_freelist);

		static void free_hpage_workfn(struct work_struct *work)
		{
		struct llist_node *node;
		struct page *page;

		node = llist_del_all(&hpage_freelist);

		while (node) {
		page = container_of((struct address_space **)node,
		struct page, mapping);
		node = node->next;
		__free_huge_page(page);
		}
		}
		static DECLARE_WORK(free_hpage_work, free_hpage_workfn);

		void free_huge_page(struct page *page)
		{
		/*
		* Defer freeing if in non-task context to avoid hugetlb_lock deadlock.
		*/
		if (!in_task()) {
		/*
		* Only call schedule_work() if hpage_freelist is previously
		* empty. Otherwise, schedule_work() had been called but the
		* workfn hasn't retrieved the list yet.
		*/
		if (llist_add((struct llist_node *)&page->mapping,
		&hpage_freelist))
		schedule_work(&free_hpage_work);
		return;
		spin_unlock_irqrestore(&hugetlb_lock, flags);
		}

		__free_huge_page(page);
		}

		static void prep_new_huge_page(struct hstate h, struct page page, int nid)
		@@ -1530,11 +1487,11 @@ static void prep_new_huge_page(struct hstate h, struct page page, int nid)
		hugetlb_set_page_subpool(page, NULL);
		set_hugetlb_cgroup(page, NULL);
		set_hugetlb_cgroup_rsvd(page, NULL);
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		h->nr_huge_pages++;
		h->nr_huge_pages_node[nid]++;
		ClearHPageFreed(page);
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		}

		static void prep_compound_gigantic_page(struct page *page, unsigned int order)
		@@ -1780,7 +1737,7 @@ int dissolve_free_huge_page(struct page *page)
		if (!PageHuge(page))
		return 0;

		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		if (!PageHuge(page)) {
		rc = 0;
		goto out;
		@@ -1797,7 +1754,7 @@ int dissolve_free_huge_page(struct page *page)
		* when it is dissolved.
		*/
		if (unlikely(!HPageFreed(head))) {
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		cond_resched();

		/*
		@@ -1821,12 +1778,12 @@ int dissolve_free_huge_page(struct page *page)
		}
		remove_hugetlb_page(h, page, false);
		h->max_huge_pages--;
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		update_and_free_page(h, head);
		return 0;
		}
		out:
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		return rc;
		}

		@@ -1868,16 +1825,16 @@ static struct page alloc_surplus_huge_page(struct hstate h, gfp_t gfp_mask,
		if (hstate_is_gigantic(h))
		return NULL;

		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages)
		goto out_unlock;
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);

		page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
		if (!page)
		return NULL;

		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		/*
		* We could have raced with the pool size change.
		* Double check that and simply deallocate the new page
		@@ -1887,7 +1844,7 @@ static struct page alloc_surplus_huge_page(struct hstate h, gfp_t gfp_mask,
		*/
		if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
		SetHPageTemporary(page);
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		put_page(page);
		return NULL;
		} else {
		@@ -1896,7 +1853,7 @@ static struct page alloc_surplus_huge_page(struct hstate h, gfp_t gfp_mask,
		}

		out_unlock:
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);

		return page;
		}
		@@ -1946,17 +1903,17 @@ struct page alloc_buddy_huge_page_with_mpol(struct hstate h,
		struct page alloc_huge_page_nodemask(struct hstate h, int preferred_nid,
		nodemask_t *nmask, gfp_t gfp_mask)
		{
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		if (h->free_huge_pages - h->resv_huge_pages > 0) {
		struct page *page;

		page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask);
		if (page) {
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		return page;
		}
		}
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);

		return alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask);
		}
		@@ -2004,7 +1961,7 @@ static int gather_surplus_pages(struct hstate *h, long delta)

		ret = -ENOMEM;
		retry:
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		for (i = 0; i < needed; i++) {
		page = alloc_surplus_huge_page(h, htlb_alloc_mask(h),
		NUMA_NO_NODE, NULL);
		@@ -2021,7 +1978,7 @@ static int gather_surplus_pages(struct hstate *h, long delta)
		* After retaking hugetlb_lock, we need to recalculate 'needed'
		* because either resv_huge_pages or free_huge_pages may have changed.
		*/
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		needed = (h->resv_huge_pages + delta) -
		(h->free_huge_pages + allocated);
		if (needed > 0) {
		@@ -2061,12 +2018,12 @@ static int gather_surplus_pages(struct hstate *h, long delta)
		enqueue_huge_page(h, page);
		}
		free:
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);

		/* Free unnecessary surplus pages to the buddy allocator */
		list_for_each_entry_safe(page, tmp, &surplus_list, lru)
		put_page(page);
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);

		return ret;
		}
		@@ -2116,9 +2073,9 @@ static void return_unused_surplus_pages(struct hstate *h,
		}

		out:
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		update_and_free_pages_bulk(h, &page_list);
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		}


		@@ -2352,7 +2309,7 @@ struct page alloc_huge_page(struct vm_area_struct vma,
		if (ret)
		goto out_uncharge_cgroup_reservation;

		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		/*
		* glb_chg is passed to indicate whether or not a page must be taken
		* from the global free pool (global change). gbl_chg == 0 indicates
		@@ -2360,7 +2317,7 @@ struct page alloc_huge_page(struct vm_area_struct vma,
		*/
		page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
		if (!page) {
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		page = alloc_buddy_huge_page_with_mpol(h, vma, addr);
		if (!page)
		goto out_uncharge_cgroup;
		@@ -2368,7 +2325,7 @@ struct page alloc_huge_page(struct vm_area_struct vma,
		SetHPageRestoreReserve(page);
		h->resv_huge_pages--;
		}
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		list_add(&page->lru, &h->hugepage_activelist);
		/* Fall through */
		}
		@@ -2381,7 +2338,7 @@ struct page alloc_huge_page(struct vm_area_struct vma,
		h_cg, page);
		}

		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);

		hugetlb_set_page_subpool(page, spool);

		@@ -2593,9 +2550,9 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
		}

		out:
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		update_and_free_pages_bulk(h, &page_list);
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		}
		#else
		static inline void try_to_free_low(struct hstate *h, unsigned long count,
		@@ -2660,7 +2617,7 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
		* pages in hstate via the proc/sysfs interfaces.
		*/
		mutex_lock(&h->resize_lock);
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);

		/*
		* Check for a node specific request.
		@@ -2691,7 +2648,7 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
		*/
		if (hstate_is_gigantic(h) && !IS_ENABLED(CONFIG_CONTIG_ALLOC)) {
		if (count > persistent_huge_pages(h)) {
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		mutex_unlock(&h->resize_lock);
		NODEMASK_FREE(node_alloc_noretry);
		return -EINVAL;
		@@ -2721,14 +2678,14 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
		* page, free_huge_page will handle it by freeing the page
		* and reducing the surplus.
		*/
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);

		/* yield cpu to avoid soft lockup */
		cond_resched();

		ret = alloc_pool_huge_page(h, nodes_allowed,
		node_alloc_noretry);
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		if (!ret)
		goto out;

		@@ -2767,9 +2724,9 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
		list_add(&page->lru, &page_list);
		}
		/* free the pages after dropping lock */
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		update_and_free_pages_bulk(h, &page_list);
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);

		while (count < persistent_huge_pages(h)) {
		if (!adjust_pool_surplus(h, nodes_allowed, 1))
		@@ -2777,7 +2734,7 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
		}
		out:
		h->max_huge_pages = persistent_huge_pages(h);
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		mutex_unlock(&h->resize_lock);

		NODEMASK_FREE(node_alloc_noretry);
		@@ -2933,9 +2890,9 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
		if (err)
		return err;

		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		h->nr_overcommit_huge_pages = input;
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);

		return count;
		}
		@@ -3522,9 +3479,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
		goto out;

		if (write) {
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		h->nr_overcommit_huge_pages = tmp;
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		}
		out:
		return ret;
		@@ -3620,7 +3577,7 @@ static int hugetlb_acct_memory(struct hstate *h, long delta)
		if (!delta)
		return 0;

		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		/*
		* When cpuset is configured, it breaks the strict hugetlb page
		* reservation as the accounting is done on a global variable. Such
		@@ -3659,7 +3616,7 @@ static int hugetlb_acct_memory(struct hstate *h, long delta)
		return_unused_surplus_pages(h, (unsigned long) -delta);

		out:
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		return ret;
		}

		@@ -5687,7 +5644,7 @@ bool isolate_huge_page(struct page page, struct list_head list)
		{
		bool ret = true;

		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		if (!PageHeadHuge(page) \|\|
		!HPageMigratable(page) \|\|
		!get_page_unless_zero(page)) {
		@@ -5697,16 +5654,16 @@ bool isolate_huge_page(struct page page, struct list_head list)
		ClearHPageMigratable(page);
		list_move_tail(&page->lru, list);
		unlock:
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		return ret;
		}

		void putback_active_hugepage(struct page *page)
		{
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		SetHPageMigratable(page);
		list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		put_page(page);
		}

		@@ -5740,12 +5697,12 @@ void move_hugetlb_state(struct page oldpage, struct page newpage, int reason)
		*/
		if (new_nid == old_nid)
		return;
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		if (h->surplus_huge_pages_node[old_nid]) {
		h->surplus_huge_pages_node[old_nid]--;
		h->surplus_huge_pages_node[new_nid]++;
		}
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		}
		}

mm/hugetlb_cgroup.c

+4 −4

Original line number	Diff line number	Diff line
		@@ -204,11 +204,11 @@ static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
		do {
		idx = 0;
		for_each_hstate(h) {
		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		list_for_each_entry(page, &h->hugepage_activelist, lru)
		hugetlb_cgroup_move_parent(idx, h_cg, page);

		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		idx++;
		}
		cond_resched();
		@@ -784,7 +784,7 @@ void hugetlb_cgroup_migrate(struct page oldhpage, struct page newhpage)
		if (hugetlb_cgroup_disabled())
		return;

		spin_lock(&hugetlb_lock);
		spin_lock_irq(&hugetlb_lock);
		h_cg = hugetlb_cgroup_from_page(oldhpage);
		h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage);
		set_hugetlb_cgroup(oldhpage, NULL);
		@@ -794,7 +794,7 @@ void hugetlb_cgroup_migrate(struct page oldhpage, struct page newhpage)
		set_hugetlb_cgroup(newhpage, h_cg);
		set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd);
		list_move(&newhpage->lru, &h->hugepage_activelist);
		spin_unlock(&hugetlb_lock);
		spin_unlock_irq(&hugetlb_lock);
		return;
		}