Commit 6b9a217e authored by Oscar Salvador's avatar Oscar Salvador Committed by Linus Torvalds
Browse files

mm,hwpoison: refactor soft_offline_huge_page and __soft_offline_page



Merging soft_offline_huge_page and __soft_offline_page let us get rid of
quite some duplicated code, and makes the code much easier to follow.

Now, __soft_offline_page will handle both normal and hugetlb pages.

Signed-off-by: default avatarOscar Salvador <osalvador@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Acked-by: default avatarNaoya Horiguchi <naoya.horiguchi@nec.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Aristeu Rozanski <aris@ruivo.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Dmitry Yakunin <zeil@yandex-team.ru>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Oscar Salvador <osalvador@suse.com>
Cc: Qian Cai <cai@lca.pw>
Cc: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/20200922135650.1634-11-osalvador@suse.de


Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 79f5f8fa
Loading
Loading
Loading
Loading
+82 −100
Original line number Diff line number Diff line
@@ -65,13 +65,31 @@ int sysctl_memory_failure_recovery __read_mostly = 1;

atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);

static void page_handle_poison(struct page *page, bool release)
static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, bool release)
{
	if (hugepage_or_freepage) {
		/*
		 * Doing this check for free pages is also fine since dissolve_free_huge_page
		 * returns 0 for non-hugetlb pages as well.
		 */
		if (dissolve_free_huge_page(page) || !take_page_off_buddy(page))
			/*
			 * We could fail to take off the target page from buddy
			 * for example due to racy page allocaiton, but that's
			 * acceptable because soft-offlined page is not broken
			 * and if someone really want to use it, they should
			 * take it.
			 */
			return false;
	}

	SetPageHWPoison(page);
	if (release)
		put_page(page);
	page_ref_inc(page);
	num_poisoned_pages_inc();

	return true;
}

#if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE)
@@ -1725,63 +1743,51 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags)
	return ret;
}

static int soft_offline_huge_page(struct page *page, int flags)
static bool isolate_page(struct page *page, struct list_head *pagelist)
{
	int ret;
	unsigned long pfn = page_to_pfn(page);
	struct page *hpage = compound_head(page);
	LIST_HEAD(pagelist);
	bool isolated = false;
	bool lru = PageLRU(page);

	/*
	 * This double-check of PageHWPoison is to avoid the race with
	 * memory_failure(). See also comment in __soft_offline_page().
	 */
	lock_page(hpage);
	if (PageHWPoison(hpage)) {
		unlock_page(hpage);
		put_page(hpage);
		pr_info("soft offline: %#lx hugepage already poisoned\n", pfn);
		return -EBUSY;
	if (PageHuge(page)) {
		isolated = isolate_huge_page(page, pagelist);
	} else {
		if (lru)
			isolated = !isolate_lru_page(page);
		else
			isolated = !isolate_movable_page(page, ISOLATE_UNEVICTABLE);

		if (isolated)
			list_add(&page->lru, pagelist);
	}
	unlock_page(hpage);

	ret = isolate_huge_page(hpage, &pagelist);
	if (isolated && lru)
		inc_node_page_state(page, NR_ISOLATED_ANON +
				    page_is_file_lru(page));

	/*
	 * get_any_page() and isolate_huge_page() takes a refcount each,
	 * so need to drop one here.
	 * If we succeed to isolate the page, we grabbed another refcount on
	 * the page, so we can safely drop the one we got from get_any_pages().
	 * If we failed to isolate the page, it means that we cannot go further
	 * and we will return an error, so drop the reference we got from
	 * get_any_pages() as well.
	 */
	put_page(hpage);
	if (!ret) {
		pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn);
		return -EBUSY;
	put_page(page);
	return isolated;
}

	ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
				MIGRATE_SYNC, MR_MEMORY_FAILURE);
	if (ret) {
		pr_info("soft offline: %#lx: hugepage migration failed %d, type %lx (%pGp)\n",
			pfn, ret, page->flags, &page->flags);
		if (!list_empty(&pagelist))
			putback_movable_pages(&pagelist);
		if (ret > 0)
			ret = -EIO;
	} else {
/*
		 * We set PG_hwpoison only when we were able to take the page
		 * off the buddy.
 * __soft_offline_page handles hugetlb-pages and non-hugetlb pages.
 * If the page is a non-dirty unmapped page-cache page, it simply invalidates.
 * If the page is mapped, it migrates the contents over.
 */
		if (!dissolve_free_huge_page(page) && take_page_off_buddy(page))
			page_handle_poison(page, false);
		else
			ret = -EBUSY;
	}
	return ret;
}

static int __soft_offline_page(struct page *page, int flags)
static int __soft_offline_page(struct page *page)
{
	int ret;
	int ret = 0;
	unsigned long pfn = page_to_pfn(page);
	struct page *hpage = compound_head(page);
	char const *msg_page[] = {"page", "hugepage"};
	bool huge = PageHuge(page);
	LIST_HEAD(pagelist);

	/*
	 * Check PageHWPoison again inside page lock because PageHWPoison
@@ -1790,6 +1796,7 @@ static int __soft_offline_page(struct page *page, int flags)
	 * so there's no race between soft_offline_page() and memory_failure().
	 */
	lock_page(page);
	if (!PageHuge(page))
		wait_on_page_writeback(page);
	if (PageHWPoison(page)) {
		unlock_page(page);
@@ -1797,91 +1804,66 @@ static int __soft_offline_page(struct page *page, int flags)
		pr_info("soft offline: %#lx page already poisoned\n", pfn);
		return -EBUSY;
	}

	if (!PageHuge(page))
		/*
		 * Try to invalidate first. This should work for
		 * non dirty unmapped page cache pages.
		 */
		ret = invalidate_inode_page(page);
	unlock_page(page);

	/*
	 * RED-PEN would be better to keep it isolated here, but we
	 * would need to fix isolation locking first.
	 */
	if (ret == 1) {
	if (ret) {
		pr_info("soft_offline: %#lx: invalidated\n", pfn);
		page_handle_poison(page, true);
		page_handle_poison(page, false, true);
		return 0;
	}

	/*
	 * Simple invalidation didn't work.
	 * Try to migrate to a new page instead. migrate.c
	 * handles a large number of cases for us.
	 */
	if (PageLRU(page))
		ret = isolate_lru_page(page);
	else
		ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
	/*
	 * Drop page reference which is came from get_any_page()
	 * successful isolate_lru_page() already took another one.
	 */
	put_page(page);
	if (!ret) {
		LIST_HEAD(pagelist);
		/*
		 * After isolated lru page, the PageLRU will be cleared,
		 * so use !__PageMovable instead for LRU page's mapping
		 * cannot have PAGE_MAPPING_MOVABLE.
		 */
		if (!__PageMovable(page))
			inc_node_page_state(page, NR_ISOLATED_ANON +
						page_is_file_lru(page));
		list_add(&page->lru, &pagelist);
	if (isolate_page(hpage, &pagelist)) {
		ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
					MIGRATE_SYNC, MR_MEMORY_FAILURE);
		if (!ret) {
			page_handle_poison(page, true);
			bool release = !huge;

			if (!page_handle_poison(page, huge, release))
				ret = -EBUSY;
		} else {
			if (!list_empty(&pagelist))
				putback_movable_pages(&pagelist);

			pr_info("soft offline: %#lx: migration failed %d, type %lx (%pGp)\n",
				pfn, ret, page->flags, &page->flags);
			pr_info("soft offline: %#lx: %s migration failed %d, type %lx (%pGp)\n",
				pfn, msg_page[huge], ret, page->flags, &page->flags);
			if (ret > 0)
				ret = -EIO;
		}
	} else {
		pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx (%pGp)\n",
			pfn, ret, page_count(page), page->flags, &page->flags);
		pr_info("soft offline: %#lx: %s isolation failed: %d, page count %d, type %lx (%pGp)\n",
			pfn, msg_page[huge], ret, page_count(page), page->flags, &page->flags);
		ret = -EBUSY;
	}
	return ret;
}

static int soft_offline_in_use_page(struct page *page, int flags)
static int soft_offline_in_use_page(struct page *page)
{
	int ret;
	struct page *hpage = compound_head(page);

	if (!PageHuge(page) && PageTransHuge(hpage))
		if (try_to_split_thp_page(page, "soft offline") < 0)
			return -EBUSY;

	if (PageHuge(page))
		ret = soft_offline_huge_page(page, flags);
	else
		ret = __soft_offline_page(page, flags);
	return ret;
	return __soft_offline_page(page);
}

static int soft_offline_free_page(struct page *page)
{
	int rc = -EBUSY;
	int rc = 0;

	if (!dissolve_free_huge_page(page) && take_page_off_buddy(page)) {
		page_handle_poison(page, false);
		rc = 0;
	}
	if (!page_handle_poison(page, true, false))
		rc = -EBUSY;

	return rc;
}
@@ -1932,7 +1914,7 @@ int soft_offline_page(unsigned long pfn, int flags)
	put_online_mems();

	if (ret > 0)
		ret = soft_offline_in_use_page(page, flags);
		ret = soft_offline_in_use_page(page);
	else if (ret == 0)
		ret = soft_offline_free_page(page);