Commit c36e2024 authored by Shiyang Ruan's avatar Shiyang Ruan Committed by Andrew Morton
Browse files

mm: introduce mf_dax_kill_procs() for fsdax case

This new function is a variant of mf_generic_kill_procs that accepts a
file, offset pair instead of a struct to support multiple files sharing a
DAX mapping.  It is intended to be called by the file systems as part of
the memory_failure handler after the file system performed a reverse
mapping from the storage address to the file and file offset.

Link: https://lkml.kernel.org/r/20220603053738.1218681-6-ruansy.fnst@fujitsu.com


Signed-off-by: default avatarShiyang Ruan <ruansy.fnst@fujitsu.com>
Reviewed-by: default avatarDan Williams <dan.j.williams@intel.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarMiaohe Lin <linmiaohe@huawei.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dan Williams <dan.j.wiliams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Goldwyn Rodrigues <rgoldwyn@suse.com>
Cc: Goldwyn Rodrigues <rgoldwyn@suse.de>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Ritesh Harjani <riteshh@linux.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 2f437eff
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -3178,6 +3178,8 @@ enum mf_flags {
	MF_UNPOISON = 1 << 4,
	MF_SW_SIMULATED = 1 << 5,
};
int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
		      unsigned long count, int mf_flags);
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue(unsigned long pfn, int flags);
extern void memory_failure_queue_kick(int cpu);
+86 −10
Original line number Diff line number Diff line
@@ -297,10 +297,9 @@ void shake_page(struct page *p)
}
EXPORT_SYMBOL_GPL(shake_page);

static unsigned long dev_pagemap_mapping_shift(struct page *page,
		struct vm_area_struct *vma)
static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
		unsigned long address)
{
	unsigned long address = vma_address(page, vma);
	unsigned long ret = 0;
	pgd_t *pgd;
	p4d_t *p4d;
@@ -340,9 +339,13 @@ static unsigned long dev_pagemap_mapping_shift(struct page *page,
/*
 * Schedule a process for later kill.
 * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
 *
 * Notice: @fsdax_pgoff is used only when @p is a fsdax page.
 *   In other cases, such as anonymous and file-backend page, the address to be
 *   killed can be caculated by @p itself.
 */
static void add_to_kill(struct task_struct *tsk, struct page *p,
		       struct vm_area_struct *vma,
			pgoff_t fsdax_pgoff, struct vm_area_struct *vma,
			struct list_head *to_kill)
{
	struct to_kill *tk;
@@ -354,9 +357,15 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
	}

	tk->addr = page_address_in_vma(p, vma);
	if (is_zone_device_page(p))
		tk->size_shift = dev_pagemap_mapping_shift(p, vma);
	else
	if (is_zone_device_page(p)) {
		/*
		 * Since page->mapping is not used for fsdax, we need
		 * calculate the address based on the vma.
		 */
		if (p->pgmap->type == MEMORY_DEVICE_FS_DAX)
			tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
		tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
	} else
		tk->size_shift = page_shift(compound_head(p));

	/*
@@ -505,7 +514,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
			if (!page_mapped_in_vma(page, vma))
				continue;
			if (vma->vm_mm == t->mm)
				add_to_kill(t, page, vma, to_kill);
				add_to_kill(t, page, 0, vma, to_kill);
		}
	}
	read_unlock(&tasklist_lock);
@@ -541,12 +550,40 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
			 * to be informed of all such data corruptions.
			 */
			if (vma->vm_mm == t->mm)
				add_to_kill(t, page, vma, to_kill);
				add_to_kill(t, page, 0, vma, to_kill);
		}
	}
	read_unlock(&tasklist_lock);
	i_mmap_unlock_read(mapping);
}

#ifdef CONFIG_FS_DAX
/*
 * Collect processes when the error hit a fsdax page.
 */
static void collect_procs_fsdax(struct page *page,
		struct address_space *mapping, pgoff_t pgoff,
		struct list_head *to_kill)
{
	struct vm_area_struct *vma;
	struct task_struct *tsk;

	i_mmap_lock_read(mapping);
	read_lock(&tasklist_lock);
	for_each_process(tsk) {
		struct task_struct *t = task_early_kill(tsk, true);

		if (!t)
			continue;
		vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
			if (vma->vm_mm == t->mm)
				add_to_kill(t, page, pgoff, vma, to_kill);
		}
	}
	read_unlock(&tasklist_lock);
	i_mmap_unlock_read(mapping);
}
#endif /* CONFIG_FS_DAX */

/*
 * Collect the processes who have the corrupted page mapped to kill.
@@ -1588,6 +1625,45 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags,
	return rc;
}

#ifdef CONFIG_FS_DAX
/**
 * mf_dax_kill_procs - Collect and kill processes who are using this file range
 * @mapping:	address_space of the file in use
 * @index:	start pgoff of the range within the file
 * @count:	length of the range, in unit of PAGE_SIZE
 * @mf_flags:	memory failure flags
 */
int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
		unsigned long count, int mf_flags)
{
	LIST_HEAD(to_kill);
	dax_entry_t cookie;
	struct page *page;
	size_t end = index + count;

	mf_flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;

	for (; index < end; index++) {
		page = NULL;
		cookie = dax_lock_mapping_entry(mapping, index, &page);
		if (!cookie)
			return -EBUSY;
		if (!page)
			goto unlock;

		SetPageHWPoison(page);

		collect_procs_fsdax(page, mapping, index, &to_kill);
		unmap_and_kill(&to_kill, page_to_pfn(page), mapping,
				index, mf_flags);
unlock:
		dax_unlock_mapping_entry(mapping, index, cookie);
	}
	return 0;
}
EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
#endif /* CONFIG_FS_DAX */

/*
 * Called from hugetlb code with hugetlb_lock held.
 *