Commit cc5f2704 authored by David Hildenbrand's avatar David Hildenbrand Committed by Linus Torvalds
Browse files

proc/vmcore: convert oldmem_pfn_is_ram callback to more generic vmcore callbacks

Let's support multiple registered callbacks, making sure that
registering vmcore callbacks cannot fail.  Make the callback return a
bool instead of an int, handling how to deal with errors internally.
Drop unused HAVE_OLDMEM_PFN_IS_RAM.

We soon want to make use of this infrastructure from other drivers:
virtio-mem, registering one callback for each virtio-mem device, to
prevent reading unplugged virtio-mem memory.

Handle it via a generic vmcore_cb structure, prepared for future
extensions: for example, once we support virtio-mem on s390x where the
vmcore is completely constructed in the second kernel, we want to detect
and add plugged virtio-mem memory ranges to the vmcore in order for them
to get dumped properly.

Handle corner cases that are unexpected and shouldn't happen in sane
setups: registering a callback after the vmcore has already been opened
(warn only) and unregistering a callback after the vmcore has already been
opened (warn and essentially read only zeroes from that point on).

Link: https://lkml.kernel.org/r/20211005121430.30136-6-david@redhat.com


Signed-off-by: default avatarDavid Hildenbrand <david@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 2c9feeae
Loading
Loading
Loading
Loading
+12 −1
Original line number Diff line number Diff line
@@ -73,12 +73,23 @@ static int gart_mem_pfn_is_ram(unsigned long pfn)
		      (pfn >= aperture_pfn_start + aperture_page_count));
}

#ifdef CONFIG_PROC_VMCORE
static bool gart_oldmem_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn)
{
	return !!gart_mem_pfn_is_ram(pfn);
}

static struct vmcore_cb gart_vmcore_cb = {
	.pfn_is_ram = gart_oldmem_pfn_is_ram,
};
#endif

static void __init exclude_from_core(u64 aper_base, u32 aper_order)
{
	aperture_pfn_start = aper_base >> PAGE_SHIFT;
	aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
#ifdef CONFIG_PROC_VMCORE
	WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram));
	register_vmcore_cb(&gart_vmcore_cb);
#endif
#ifdef CONFIG_PROC_KCORE
	WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram));
+7 −4
Original line number Diff line number Diff line
@@ -12,10 +12,10 @@
 * The kdump kernel has to check whether a pfn of the crashed kernel
 * was a ballooned page. vmcore is using this function to decide
 * whether to access a pfn of the crashed kernel.
 * Returns 0 if the pfn is not backed by a RAM page, the caller may
 * Returns "false" if the pfn is not backed by a RAM page, the caller may
 * handle the pfn special in this case.
 */
static int xen_oldmem_pfn_is_ram(unsigned long pfn)
static bool xen_vmcore_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn)
{
	struct xen_hvm_get_mem_type a = {
		.domid = DOMID_SELF,
@@ -24,10 +24,13 @@ static int xen_oldmem_pfn_is_ram(unsigned long pfn)

	if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) {
		pr_warn_once("Unexpected HVMOP_get_mem_type failure\n");
		return -ENXIO;
		return true;
	}
	return a.mem_type != HVMMEM_mmio_dm;
}
static struct vmcore_cb xen_vmcore_cb = {
	.pfn_is_ram = xen_vmcore_pfn_is_ram,
};
#endif

static void xen_hvm_exit_mmap(struct mm_struct *mm)
@@ -61,6 +64,6 @@ void __init xen_hvm_init_mmu_ops(void)
	if (is_pagetable_dying_supported())
		pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap;
#ifdef CONFIG_PROC_VMCORE
	WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram));
	register_vmcore_cb(&xen_vmcore_cb);
#endif
}
+69 −30
Original line number Diff line number Diff line
@@ -62,46 +62,75 @@ core_param(novmcoredd, vmcoredd_disabled, bool, 0);
/* Device Dump Size */
static size_t vmcoredd_orig_sz;

static DECLARE_RWSEM(vmcore_cb_rwsem);
/* List of registered vmcore callbacks. */
static LIST_HEAD(vmcore_cb_list);
/* Whether we had a surprise unregistration of a callback. */
static bool vmcore_cb_unstable;
/* Whether the vmcore has been opened once. */
static bool vmcore_opened;

void register_vmcore_cb(struct vmcore_cb *cb)
{
	down_write(&vmcore_cb_rwsem);
	INIT_LIST_HEAD(&cb->next);
	list_add_tail(&cb->next, &vmcore_cb_list);
	/*
 * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
 * The called function has to take care of module refcounting.
	 * Registering a vmcore callback after the vmcore was opened is
	 * very unusual (e.g., manual driver loading).
	 */
static int (*oldmem_pfn_is_ram)(unsigned long pfn);

int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn))
{
	if (oldmem_pfn_is_ram)
		return -EBUSY;
	oldmem_pfn_is_ram = fn;
	return 0;
	if (vmcore_opened)
		pr_warn_once("Unexpected vmcore callback registration\n");
	up_write(&vmcore_cb_rwsem);
}
EXPORT_SYMBOL_GPL(register_oldmem_pfn_is_ram);
EXPORT_SYMBOL_GPL(register_vmcore_cb);

void unregister_oldmem_pfn_is_ram(void)
void unregister_vmcore_cb(struct vmcore_cb *cb)
{
	oldmem_pfn_is_ram = NULL;
	wmb();
	down_write(&vmcore_cb_rwsem);
	list_del(&cb->next);
	/*
	 * Unregistering a vmcore callback after the vmcore was opened is
	 * very unusual (e.g., forced driver removal), but we cannot stop
	 * unregistering.
	 */
	if (vmcore_opened) {
		pr_warn_once("Unexpected vmcore callback unregistration\n");
		vmcore_cb_unstable = true;
	}
EXPORT_SYMBOL_GPL(unregister_oldmem_pfn_is_ram);
	up_write(&vmcore_cb_rwsem);
}
EXPORT_SYMBOL_GPL(unregister_vmcore_cb);

static bool pfn_is_ram(unsigned long pfn)
{
	int (*fn)(unsigned long pfn);
	/* pfn is ram unless fn() checks pagetype */
	struct vmcore_cb *cb;
	bool ret = true;

	/*
	 * Ask hypervisor if the pfn is really ram.
	 * A ballooned page contains no data and reading from such a page
	 * will cause high load in the hypervisor.
	 */
	fn = oldmem_pfn_is_ram;
	if (fn)
		ret = !!fn(pfn);
	lockdep_assert_held_read(&vmcore_cb_rwsem);
	if (unlikely(vmcore_cb_unstable))
		return false;

	list_for_each_entry(cb, &vmcore_cb_list, next) {
		if (unlikely(!cb->pfn_is_ram))
			continue;
		ret = cb->pfn_is_ram(cb, pfn);
		if (!ret)
			break;
	}

	return ret;
}

static int open_vmcore(struct inode *inode, struct file *file)
{
	down_read(&vmcore_cb_rwsem);
	vmcore_opened = true;
	up_read(&vmcore_cb_rwsem);

	return 0;
}

/* Reads a page from the oldmem device from given offset. */
ssize_t read_from_oldmem(char *buf, size_t count,
			 u64 *ppos, int userbuf,
@@ -117,6 +146,7 @@ ssize_t read_from_oldmem(char *buf, size_t count,
	offset = (unsigned long)(*ppos % PAGE_SIZE);
	pfn = (unsigned long)(*ppos / PAGE_SIZE);

	down_read(&vmcore_cb_rwsem);
	do {
		if (count > (PAGE_SIZE - offset))
			nr_bytes = PAGE_SIZE - offset;
@@ -136,9 +166,11 @@ ssize_t read_from_oldmem(char *buf, size_t count,
				tmp = copy_oldmem_page(pfn, buf, nr_bytes,
						       offset, userbuf);

			if (tmp < 0)
			if (tmp < 0) {
				up_read(&vmcore_cb_rwsem);
				return tmp;
			}
		}
		*ppos += nr_bytes;
		count -= nr_bytes;
		buf += nr_bytes;
@@ -147,6 +179,7 @@ ssize_t read_from_oldmem(char *buf, size_t count,
		offset = 0;
	} while (count);

	up_read(&vmcore_cb_rwsem);
	return read;
}

@@ -537,14 +570,19 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
			    unsigned long from, unsigned long pfn,
			    unsigned long size, pgprot_t prot)
{
	int ret;

	/*
	 * Check if oldmem_pfn_is_ram was registered to avoid
	 * looping over all pages without a reason.
	 */
	if (oldmem_pfn_is_ram)
		return remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
	down_read(&vmcore_cb_rwsem);
	if (!list_empty(&vmcore_cb_list) || vmcore_cb_unstable)
		ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
	else
		return remap_oldmem_pfn_range(vma, from, pfn, size, prot);
		ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot);
	up_read(&vmcore_cb_rwsem);
	return ret;
}

static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
@@ -668,6 +706,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
#endif

static const struct proc_ops vmcore_proc_ops = {
	.proc_open	= open_vmcore,
	.proc_read	= read_vmcore,
	.proc_lseek	= default_llseek,
	.proc_mmap	= mmap_vmcore,
+23 −3
Original line number Diff line number Diff line
@@ -91,9 +91,29 @@ static inline void vmcore_unusable(void)
		elfcorehdr_addr = ELFCORE_ADDR_ERR;
}

#define HAVE_OLDMEM_PFN_IS_RAM 1
extern int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn));
extern void unregister_oldmem_pfn_is_ram(void);
/**
 * struct vmcore_cb - driver callbacks for /proc/vmcore handling
 * @pfn_is_ram: check whether a PFN really is RAM and should be accessed when
 *              reading the vmcore. Will return "true" if it is RAM or if the
 *              callback cannot tell. If any callback returns "false", it's not
 *              RAM and the page must not be accessed; zeroes should be
 *              indicated in the vmcore instead. For example, a ballooned page
 *              contains no data and reading from such a page will cause high
 *              load in the hypervisor.
 * @next: List head to manage registered callbacks internally; initialized by
 *        register_vmcore_cb().
 *
 * vmcore callbacks allow drivers managing physical memory ranges to
 * coordinate with vmcore handling code, for example, to prevent accessing
 * physical memory ranges that should not be accessed when reading the vmcore,
 * although included in the vmcore header as memory ranges to dump.
 */
struct vmcore_cb {
	bool (*pfn_is_ram)(struct vmcore_cb *cb, unsigned long pfn);
	struct list_head next;
};
extern void register_vmcore_cb(struct vmcore_cb *cb);
extern void unregister_vmcore_cb(struct vmcore_cb *cb);

#else /* !CONFIG_CRASH_DUMP */
static inline bool is_kdump_kernel(void) { return 0; }