!8482 v2 Fix hugetlb deadlock (c7b6352c) · Commits · EulixOS / Software / Kernel

mm/hugetlb.c

+116 −67

Original line number	Diff line number	Diff line
		@@ -1091,7 +1091,7 @@ static int hstate_next_node_to_alloc(struct hstate *h,
		}

		/*
		* helper for free_pool_huge_page() - return the previously saved
		* helper for remove_pool_huge_page() - return the previously saved
		* node ["this node"] from which to free a huge page. Advance the
		* next node id whether or not we find a free huge page to free so
		* that the next attempt to free addresses the next node.
		@@ -1236,24 +1236,33 @@ static inline void destroy_compound_gigantic_page(struct page *page,
		unsigned int order) { }
		#endif

		static void update_and_free_page(struct hstate h, struct page page)
		/*
		* Remove hugetlb page from lists, and update dtor so that page appears
		* as just a compound page. A reference is held on the page.
		*
		* Must be called with hugetlb lock held.
		*/
		static void remove_hugetlb_page(struct hstate h, struct page page,
		bool adjust_surplus)
		{
		int i;
		struct page *subpage = page;
		int nid = page_to_nid(page);

		VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);

		if (hstate_is_gigantic(h) && !gigantic_page_supported())
		return;

		h->nr_huge_pages--;
		h->nr_huge_pages_node[page_to_nid(page)]--;
		for (i = 0; i < pages_per_huge_page(h);
		i++, subpage = mem_map_next(subpage, page, i)) {
		subpage->flags &= ~(1 << PG_locked \| 1 << PG_error \|
		1 << PG_referenced \| 1 << PG_dirty \|
		1 << PG_active \| 1 << PG_private \|
		1 << PG_writeback);
		list_del(&page->lru);

		if (PageHugeFreed(page)) {
		h->free_huge_pages--;
		h->free_huge_pages_node[nid]--;
		}
		VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
		if (adjust_surplus) {
		h->surplus_huge_pages--;
		h->surplus_huge_pages_node[nid]--;
		}

		/*
		* Very subtle
		*
		@@ -1272,16 +1281,49 @@ static void update_and_free_page(struct hstate h, struct page page)
		* after update_and_free_page is called.
		*/
		set_page_refcounted(page);
		if (hstate_is_gigantic(h)) {
		if (hstate_is_gigantic(h))
		set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
		else
		set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);

		h->nr_huge_pages--;
		h->nr_huge_pages_node[nid]--;
		}

		static void update_and_free_page(struct hstate h, struct page page)
		{
		int i;
		struct page *subpage = page;

		if (hstate_is_gigantic(h) && !gigantic_page_supported())
		return;

		for (i = 0; i < pages_per_huge_page(h);
		i++, subpage = mem_map_next(subpage, page, i)) {
		subpage->flags &= ~(1 << PG_locked \| 1 << PG_error \|
		1 << PG_referenced \| 1 << PG_dirty \|
		1 << PG_active \| 1 << PG_private \|
		1 << PG_writeback);
		}

		if (hstate_is_gigantic(h)) {
		destroy_compound_gigantic_page(page, huge_page_order(h));
		free_gigantic_page(page, huge_page_order(h));
		} else {
		set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
		__free_pages(page, huge_page_order(h));
		}
		}

		static void update_and_free_pages_bulk(struct hstate h, struct list_head list)
		{
		struct page page, t_page;

		list_for_each_entry_safe(page, t_page, list, lru) {
		update_and_free_page(h, page);
		cond_resched();
		}
		}

		struct hstate *size_to_hstate(unsigned long size)
		{
		struct hstate *h;
		@@ -1451,21 +1493,21 @@ void free_huge_page(struct page *page)

		if (PageHugeTemporary(page)) {
		sp_memcg_uncharge_hpage(page);
		list_del(&page->lru);
		ClearPageHugeTemporary(page);
		remove_hugetlb_page(h, page, false);
		spin_unlock_irqrestore(&hugetlb_lock, flags);
		update_and_free_page(h, page);
		} else if (h->surplus_huge_pages_node[nid]) {
		/* remove the page from active list */
		list_del(&page->lru);
		remove_hugetlb_page(h, page, true);
		spin_unlock_irqrestore(&hugetlb_lock, flags);
		update_and_free_page(h, page);
		h->surplus_huge_pages--;
		h->surplus_huge_pages_node[nid]--;
		} else {
		arch_clear_hugepage_flags(page);
		enqueue_huge_page(h, page);
		}
		spin_unlock_irqrestore(&hugetlb_lock, flags);
		}
		}

		static void prep_new_huge_page(struct hstate h, struct page page, int nid)
		{
		@@ -1718,16 +1760,18 @@ static int alloc_pool_huge_page(struct hstate h, nodemask_t nodes_allowed,
		}

		/*
		* Free huge page from pool from next node to free.
		* Attempt to keep persistent huge pages more or less
		* balanced over allowed nodes.
		* Remove huge page from pool from next node to free. Attempt to keep
		* persistent huge pages more or less balanced over allowed nodes.
		* This routine only 'removes' the hugetlb page. The caller must make
		* an additional call to free the page to low level allocators.
		* Called with hugetlb_lock locked.
		*/
		static int free_pool_huge_page(struct hstate h, nodemask_t nodes_allowed,
		static struct page remove_pool_huge_page(struct hstate h,
		nodemask_t *nodes_allowed,
		bool acct_surplus)
		{
		int nr_nodes, node;
		int ret = 0;
		struct page *page = NULL;

		for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
		/*
		@@ -1736,23 +1780,14 @@ static int free_pool_huge_page(struct hstate h, nodemask_t nodes_allowed,
		*/
		if ((!acct_surplus \|\| h->surplus_huge_pages_node[node]) &&
		!list_empty(&h->hugepage_freelists[node])) {
		struct page *page =
		list_entry(h->hugepage_freelists[node].next,
		page = list_entry(h->hugepage_freelists[node].next,
		struct page, lru);
		list_del(&page->lru);
		h->free_huge_pages--;
		h->free_huge_pages_node[node]--;
		if (acct_surplus) {
		h->surplus_huge_pages--;
		h->surplus_huge_pages_node[node]--;
		}
		update_and_free_page(h, page);
		ret = 1;
		remove_hugetlb_page(h, page, acct_surplus);
		break;
		}
		}

		return ret;
		return page;
		}

		/*
		@@ -1787,7 +1822,6 @@ int dissolve_free_huge_page(struct page *page)
		if (!page_count(page)) {
		struct page *head = compound_head(page);
		struct hstate *h = page_hstate(head);
		int nid = page_to_nid(head);
		if (h->free_huge_pages - h->resv_huge_pages == 0)
		goto out;

		@@ -1818,12 +1852,11 @@ int dissolve_free_huge_page(struct page *page)
		SetPageHWPoison(page);
		ClearPageHWPoison(head);
		}
		list_del(&head->lru);
		h->free_huge_pages--;
		h->free_huge_pages_node[nid]--;
		remove_hugetlb_page(h, head, false);
		h->max_huge_pages--;
		spin_unlock_irq(&hugetlb_lock);
		update_and_free_page(h, head);
		rc = 0;
		return 0;
		}
		out:
		spin_unlock_irq(&hugetlb_lock);
		@@ -2102,17 +2135,16 @@ static int gather_surplus_pages(struct hstate *h, long delta)
		* to the associated reservation map.
		* 2) Free any unused surplus pages that may have been allocated to satisfy
		* the reservation. As many as unused_resv_pages may be freed.
		*
		* Called with hugetlb_lock held. However, the lock could be dropped (and
		* reacquired) during calls to cond_resched_lock. Whenever dropping the lock,
		* we must make sure nobody else can claim pages we are in the process of
		* freeing. Do this by ensuring resv_huge_page always is greater than the
		* number of huge pages we plan to free when dropping the lock.
		*/
		static void return_unused_surplus_pages(struct hstate *h,
		unsigned long unused_resv_pages)
		{
		unsigned long nr_pages;
		struct page *page;
		LIST_HEAD(page_list);

		/* Uncommit the reservation */
		h->resv_huge_pages -= unused_resv_pages;

		/* Cannot return gigantic pages currently */
		if (hstate_is_gigantic(h))
		@@ -2129,24 +2161,21 @@ static void return_unused_surplus_pages(struct hstate *h,
		* evenly across all nodes with memory. Iterate across these nodes
		* until we can no longer free unreserved surplus pages. This occurs
		* when the nodes with surplus pages have no free pages.
		* free_pool_huge_page() will balance the the freed pages across the
		* remove_pool_huge_page() will balance the the freed pages across the
		* on-line nodes with memory and will handle the hstate accounting.
		*
		* Note that we decrement resv_huge_pages as we free the pages. If
		* we drop the lock, resv_huge_pages will still be sufficiently large
		* to cover subsequent pages we may free.
		*/
		while (nr_pages--) {
		h->resv_huge_pages--;
		unused_resv_pages--;
		if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1))
		page = remove_pool_huge_page(h, &node_states[N_MEMORY], 1);
		if (!page)
		goto out;
		cond_resched_lock(&hugetlb_lock);

		list_add(&page->lru, &page_list);
		}

		out:
		/* Fully uncommit the reservation */
		h->resv_huge_pages -= unused_resv_pages;
		spin_unlock_irq(&hugetlb_lock);
		update_and_free_pages_bulk(h, &page_list);
		spin_lock_irq(&hugetlb_lock);
		}


		@@ -2707,24 +2736,31 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
		nodemask_t *nodes_allowed)
		{
		int i;
		LIST_HEAD(page_list);

		if (hstate_is_gigantic(h))
		return;

		/*
		* Collect pages to be freed on a list, and free after dropping lock
		*/
		for_each_node_mask(i, *nodes_allowed) {
		struct page page, next;
		struct list_head *freel = &h->hugepage_freelists[i];
		list_for_each_entry_safe(page, next, freel, lru) {
		if (count >= h->nr_huge_pages)
		return;
		goto out;
		if (PageHighMem(page))
		continue;
		list_del(&page->lru);
		update_and_free_page(h, page);
		h->free_huge_pages--;
		h->free_huge_pages_node[page_to_nid(page)]--;
		remove_hugetlb_page(h, page, false);
		list_add(&page->lru, &page_list);
		}
		}

		out:
		spin_unlock_irq(&hugetlb_lock);
		update_and_free_pages_bulk(h, &page_list);
		spin_lock_irq(&hugetlb_lock);
		}
		#else
		static inline void try_to_free_low(struct hstate *h, unsigned long count,
		@@ -2770,6 +2806,8 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
		int nid, nodemask_t *nodes_allowed)
		{
		unsigned long min_count, ret;
		struct page *page;
		LIST_HEAD(page_list);
		NODEMASK_ALLOC(nodemask_t, node_alloc_noretry, GFP_KERNEL);

		/*
		@@ -2865,11 +2903,22 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
		min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages;
		min_count = max(count, min_count);
		try_to_free_low(h, min_count, nodes_allowed);

		/*
		* Collect pages to be removed on list without dropping lock
		*/
		while (min_count < persistent_huge_pages(h)) {
		if (!free_pool_huge_page(h, nodes_allowed, 0))
		page = remove_pool_huge_page(h, nodes_allowed, 0);
		if (!page)
		break;
		cond_resched_lock(&hugetlb_lock);

		list_add(&page->lru, &page_list);
		}
		/* free the pages after dropping lock */
		spin_unlock_irq(&hugetlb_lock);
		update_and_free_pages_bulk(h, &page_list);
		spin_lock_irq(&hugetlb_lock);

		while (count < persistent_huge_pages(h)) {
		if (!adjust_pool_surplus(h, nodes_allowed, 1))
		break;