Commit 0ed950d1 authored by Naoya Horiguchi's avatar Naoya Horiguchi Committed by Linus Torvalds
Browse files

mm,hwpoison: make get_hwpoison_page() call get_any_page()

__get_hwpoison_page() could fail to grab refcount by some race condition,
so it's helpful if we can handle it by retrying.  We already have retry
logic, so make get_hwpoison_page() call get_any_page() when called from
memory_failure().

As a result, get_hwpoison_page() can return negative values (i.e.  error
code), so some callers are also changed to handle error cases.
soft_offline_page() does nothing for -EBUSY because that's enough and
users in userspace can easily handle it.  unpoison_memory() is also
unchanged because it's broken and need thorough fixes (will be done
later).

Link: https://lkml.kernel.org/r/20210603233632.2964832-3-nao.horiguchi@gmail.com


Signed-off-by: default avatarNaoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a3f5d80e
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -5938,6 +5938,8 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
		*hugetlb = true;
		if (HPageFreed(page) || HPageMigratable(page))
			ret = get_page_unless_zero(page);
		else
			ret = -EBUSY;
	}
	spin_unlock_irq(&hugetlb_lock);
	return ret;
+109 −85
Original line number Diff line number Diff line
@@ -1117,13 +1117,6 @@ static inline bool HWPoisonHandlable(struct page *page)
	return PageLRU(page) || __PageMovable(page);
}

/**
 * __get_hwpoison_page() - Get refcount for memory error handling:
 * @page:	raw error page (hit by memory error)
 *
 * Return: return 0 if failed to grab the refcount, otherwise true (some
 * non-zero value.)
 */
static int __get_hwpoison_page(struct page *page)
{
	struct page *head = compound_head(page);
@@ -1168,15 +1161,6 @@ static int __get_hwpoison_page(struct page *page)
	return 0;
}

/*
 * Safely get reference count of an arbitrary page.
 *
 * Returns 0 for a free page, 1 for an in-use page,
 * -EIO for a page-type we cannot handle and -EBUSY if we raced with an
 * allocation.
 * We only incremented refcount in case the page was already in-use and it
 * is a known type we can handle.
 */
static int get_any_page(struct page *p, unsigned long flags)
{
	int ret = 0, pass = 0;
@@ -1186,7 +1170,9 @@ static int get_any_page(struct page *p, unsigned long flags)
		count_increased = true;

try_again:
	if (!count_increased && !__get_hwpoison_page(p)) {
	if (!count_increased) {
		ret = __get_hwpoison_page(p);
		if (!ret) {
			if (page_count(p)) {
				/* We raced with an allocation, retry. */
				if (pass++ < 3)
@@ -1198,7 +1184,15 @@ static int get_any_page(struct page *p, unsigned long flags)
					goto try_again;
				ret = -EIO;
			}
	} else {
			goto out;
		} else if (ret == -EBUSY) {
			/* We raced with freeing huge page to buddy, retry. */
			if (pass++ < 3)
				goto try_again;
			goto out;
		}
	}

	if (PageHuge(p) || HWPoisonHandlable(p)) {
		ret = 1;
	} else {
@@ -1215,21 +1209,38 @@ static int get_any_page(struct page *p, unsigned long flags)
		put_page(p);
		ret = -EIO;
	}
	}

out:
	return ret;
}

static int get_hwpoison_page(struct page *p, unsigned long flags,
			     enum mf_flags ctxt)
/**
 * get_hwpoison_page() - Get refcount for memory error handling
 * @p:		Raw error page (hit by memory error)
 * @flags:	Flags controlling behavior of error handling
 *
 * get_hwpoison_page() takes a page refcount of an error page to handle memory
 * error on it, after checking that the error page is in a well-defined state
 * (defined as a page-type we can successfully handle the memor error on it,
 * such as LRU page and hugetlb page).
 *
 * Memory error handling could be triggered at any time on any type of page,
 * so it's prone to race with typical memory management lifecycle (like
 * allocation and free).  So to avoid such races, get_hwpoison_page() takes
 * extra care for the error page's state (as done in __get_hwpoison_page()),
 * and has some retry logic in get_any_page().
 *
 * Return: 0 on failure,
 *         1 on success for in-use pages in a well-defined state,
 *         -EIO for pages on which we can not handle memory errors,
 *         -EBUSY when get_hwpoison_page() has raced with page lifecycle
 *         operations like allocation and free.
 */
static int get_hwpoison_page(struct page *p, unsigned long flags)
{
	int ret;

	zone_pcp_disable(page_zone(p));
	if (ctxt == MF_SOFT_OFFLINE)
	ret = get_any_page(p, flags);
	else
		ret = __get_hwpoison_page(p);
	zone_pcp_enable(page_zone(p));

	return ret;
@@ -1418,7 +1429,9 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)

	num_poisoned_pages_inc();

	if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
	if (!(flags & MF_COUNT_INCREASED)) {
		res = get_hwpoison_page(p, flags);
		if (!res) {
			/*
			 * Check "filter hit" and "race with other subpage."
			 */
@@ -1439,6 +1452,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
			}
			action_result(pfn, MF_MSG_FREE_HUGE, res);
			return res == MF_RECOVERED ? 0 : -EBUSY;
		} else if (res < 0) {
			action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
			return -EBUSY;
		}
	}

	lock_page(head);
@@ -1641,7 +1658,9 @@ int memory_failure(unsigned long pfn, int flags)
	 * In fact it's dangerous to directly bump up page count from 0,
	 * that may make page_ref_freeze()/page_ref_unfreeze() mismatch.
	 */
	if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
	if (!(flags & MF_COUNT_INCREASED)) {
		res = get_hwpoison_page(p, flags);
		if (!res) {
			if (is_free_buddy_page(p)) {
				if (take_page_off_buddy(p)) {
					page_ref_inc(p);
@@ -1663,6 +1682,11 @@ int memory_failure(unsigned long pfn, int flags)
				res = -EBUSY;
			}
			goto unlock_mutex;
		} else if (res < 0) {
			action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
			res = -EBUSY;
			goto unlock_mutex;
		}
	}

	if (PageTransHuge(hpage)) {
@@ -1940,7 +1964,7 @@ int unpoison_memory(unsigned long pfn)
		return 0;
	}

	if (!get_hwpoison_page(p, flags, 0)) {
	if (!get_hwpoison_page(p, flags)) {
		if (TestClearPageHWPoison(p))
			num_poisoned_pages_dec();
		unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
@@ -2156,7 +2180,7 @@ int soft_offline_page(unsigned long pfn, int flags)

retry:
	get_online_mems();
	ret = get_hwpoison_page(page, flags, MF_SOFT_OFFLINE);
	ret = get_hwpoison_page(page, flags);
	put_online_mems();

	if (ret > 0) {