Commit b8e55a3e authored by Hari Bathini's avatar Hari Bathini Committed by Michael Ellerman
Browse files

powerpc/kexec_file: Avoid stomping memory used by special regions



crashkernel region could have an overlap with special memory regions
like OPAL, RTAS, TCE table & such. These regions are referred to as
excluded memory ranges. Setup these ranges during image probe in order
to avoid them while finding the buffer for different kdump segments.
Override arch_kexec_locate_mem_hole() to locate a memory hole taking
these ranges into account.

Signed-off-by: default avatarHari Bathini <hbathini@linux.ibm.com>
Reviewed-by: default avatarThiago Jung Bauermann <bauerman@linux.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/159602281047.575379.6636807148335160795.stgit@hbathini
parent 180adfc5
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -100,14 +100,16 @@ void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_co
#ifdef CONFIG_KEXEC_FILE
extern const struct kexec_file_ops kexec_elf64_ops;

#ifdef CONFIG_IMA_KEXEC
#define ARCH_HAS_KIMAGE_ARCH

struct kimage_arch {
	struct crash_mem *exclude_ranges;

#ifdef CONFIG_IMA_KEXEC
	phys_addr_t ima_buffer_addr;
	size_t ima_buffer_size;
};
#endif
};

int setup_purgatory(struct kimage *image, const void *slave_code,
		    const void *fdt, unsigned long kernel_load_addr,
@@ -125,6 +127,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
			unsigned long initrd_load_addr,
			unsigned long initrd_len, const char *cmdline);
#endif /* CONFIG_PPC64 */

#endif /* CONFIG_KEXEC_FILE */

#else /* !CONFIG_KEXEC_CORE */
+14 −0
Original line number Diff line number Diff line
@@ -7,5 +7,19 @@
void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
int add_tce_mem_ranges(struct crash_mem **mem_ranges);
int add_initrd_mem_range(struct crash_mem **mem_ranges);
#ifdef CONFIG_PPC_BOOK3S_64
int add_htab_mem_range(struct crash_mem **mem_ranges);
#else
static inline int add_htab_mem_range(struct crash_mem **mem_ranges)
{
	return 0;
}
#endif
int add_kernel_mem_range(struct crash_mem **mem_ranges);
int add_rtas_mem_range(struct crash_mem **mem_ranges);
int add_opal_mem_range(struct crash_mem **mem_ranges);
int add_reserved_mem_ranges(struct crash_mem **mem_ranges);

#endif /* _ASM_POWERPC_KEXEC_RANGES_H */
+8 −0
Original line number Diff line number Diff line
@@ -46,6 +46,14 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
	if (ret)
		goto out;

	if (image->type == KEXEC_TYPE_CRASH) {
		/* min & max buffer values for kdump case */
		kbuf.buf_min = pbuf.buf_min = crashk_res.start;
		kbuf.buf_max = pbuf.buf_max =
				((crashk_res.end < ppc64_rma_size) ?
				 crashk_res.end : (ppc64_rma_size - 1));
	}

	ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
	if (ret)
		goto out;
+335 −2
Original line number Diff line number Diff line
@@ -17,12 +17,262 @@
#include <linux/kexec.h>
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
#include <linux/memblock.h>
#include <asm/kexec_ranges.h>

const struct kexec_file_ops * const kexec_file_loaders[] = {
	&kexec_elf64_ops,
	NULL
};

/**
 * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
 *                             regions like opal/rtas, tce-table, initrd,
 *                             kernel, htab which should be avoided while
 *                             setting up kexec load segments.
 * @mem_ranges:                Range list to add the memory ranges to.
 *
 * Returns 0 on success, negative errno on error.
 */
static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
{
	int ret;

	ret = add_tce_mem_ranges(mem_ranges);
	if (ret)
		goto out;

	ret = add_initrd_mem_range(mem_ranges);
	if (ret)
		goto out;

	ret = add_htab_mem_range(mem_ranges);
	if (ret)
		goto out;

	ret = add_kernel_mem_range(mem_ranges);
	if (ret)
		goto out;

	ret = add_rtas_mem_range(mem_ranges);
	if (ret)
		goto out;

	ret = add_opal_mem_range(mem_ranges);
	if (ret)
		goto out;

	ret = add_reserved_mem_ranges(mem_ranges);
	if (ret)
		goto out;

	/* exclude memory ranges should be sorted for easy lookup */
	sort_memory_ranges(*mem_ranges, true);
out:
	if (ret)
		pr_err("Failed to setup exclude memory ranges\n");
	return ret;
}

/**
 * __locate_mem_hole_top_down - Looks top down for a large enough memory hole
 *                              in the memory regions between buf_min & buf_max
 *                              for the buffer. If found, sets kbuf->mem.
 * @kbuf:                       Buffer contents and memory parameters.
 * @buf_min:                    Minimum address for the buffer.
 * @buf_max:                    Maximum address for the buffer.
 *
 * Returns 0 on success, negative errno on error.
 */
static int __locate_mem_hole_top_down(struct kexec_buf *kbuf,
				      u64 buf_min, u64 buf_max)
{
	int ret = -EADDRNOTAVAIL;
	phys_addr_t start, end;
	u64 i;

	for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE,
			       MEMBLOCK_NONE, &start, &end, NULL) {
		/*
		 * memblock uses [start, end) convention while it is
		 * [start, end] here. Fix the off-by-one to have the
		 * same convention.
		 */
		end -= 1;

		if (start > buf_max)
			continue;

		/* Memory hole not found */
		if (end < buf_min)
			break;

		/* Adjust memory region based on the given range */
		if (start < buf_min)
			start = buf_min;
		if (end > buf_max)
			end = buf_max;

		start = ALIGN(start, kbuf->buf_align);
		if (start < end && (end - start + 1) >= kbuf->memsz) {
			/* Suitable memory range found. Set kbuf->mem */
			kbuf->mem = ALIGN_DOWN(end - kbuf->memsz + 1,
					       kbuf->buf_align);
			ret = 0;
			break;
		}
	}

	return ret;
}

/**
 * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a
 *                                  suitable buffer with top down approach.
 * @kbuf:                           Buffer contents and memory parameters.
 * @buf_min:                        Minimum address for the buffer.
 * @buf_max:                        Maximum address for the buffer.
 * @emem:                           Exclude memory ranges.
 *
 * Returns 0 on success, negative errno on error.
 */
static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf,
					  u64 buf_min, u64 buf_max,
					  const struct crash_mem *emem)
{
	int i, ret = 0, err = -EADDRNOTAVAIL;
	u64 start, end, tmin, tmax;

	tmax = buf_max;
	for (i = (emem->nr_ranges - 1); i >= 0; i--) {
		start = emem->ranges[i].start;
		end = emem->ranges[i].end;

		if (start > tmax)
			continue;

		if (end < tmax) {
			tmin = (end < buf_min ? buf_min : end + 1);
			ret = __locate_mem_hole_top_down(kbuf, tmin, tmax);
			if (!ret)
				return 0;
		}

		tmax = start - 1;

		if (tmax < buf_min) {
			ret = err;
			break;
		}
		ret = 0;
	}

	if (!ret) {
		tmin = buf_min;
		ret = __locate_mem_hole_top_down(kbuf, tmin, tmax);
	}
	return ret;
}

/**
 * __locate_mem_hole_bottom_up - Looks bottom up for a large enough memory hole
 *                               in the memory regions between buf_min & buf_max
 *                               for the buffer. If found, sets kbuf->mem.
 * @kbuf:                        Buffer contents and memory parameters.
 * @buf_min:                     Minimum address for the buffer.
 * @buf_max:                     Maximum address for the buffer.
 *
 * Returns 0 on success, negative errno on error.
 */
static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf,
				       u64 buf_min, u64 buf_max)
{
	int ret = -EADDRNOTAVAIL;
	phys_addr_t start, end;
	u64 i;

	for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
			   MEMBLOCK_NONE, &start, &end, NULL) {
		/*
		 * memblock uses [start, end) convention while it is
		 * [start, end] here. Fix the off-by-one to have the
		 * same convention.
		 */
		end -= 1;

		if (end < buf_min)
			continue;

		/* Memory hole not found */
		if (start > buf_max)
			break;

		/* Adjust memory region based on the given range */
		if (start < buf_min)
			start = buf_min;
		if (end > buf_max)
			end = buf_max;

		start = ALIGN(start, kbuf->buf_align);
		if (start < end && (end - start + 1) >= kbuf->memsz) {
			/* Suitable memory range found. Set kbuf->mem */
			kbuf->mem = start;
			ret = 0;
			break;
		}
	}

	return ret;
}

/**
 * locate_mem_hole_bottom_up_ppc64 - Skip special memory regions to find a
 *                                   suitable buffer with bottom up approach.
 * @kbuf:                            Buffer contents and memory parameters.
 * @buf_min:                         Minimum address for the buffer.
 * @buf_max:                         Maximum address for the buffer.
 * @emem:                            Exclude memory ranges.
 *
 * Returns 0 on success, negative errno on error.
 */
static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf,
					   u64 buf_min, u64 buf_max,
					   const struct crash_mem *emem)
{
	int i, ret = 0, err = -EADDRNOTAVAIL;
	u64 start, end, tmin, tmax;

	tmin = buf_min;
	for (i = 0; i < emem->nr_ranges; i++) {
		start = emem->ranges[i].start;
		end = emem->ranges[i].end;

		if (end < tmin)
			continue;

		if (start > tmin) {
			tmax = (start > buf_max ? buf_max : start - 1);
			ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax);
			if (!ret)
				return 0;
		}

		tmin = end + 1;

		if (tmin > buf_max) {
			ret = err;
			break;
		}
		ret = 0;
	}

	if (!ret) {
		tmax = buf_max;
		ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax);
	}
	return ret;
}

/**
 * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global
 *                         variables and call setup_purgatory() to initialize
@@ -67,6 +317,67 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
	return setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
}

/**
 * arch_kexec_locate_mem_hole - Skip special memory regions like rtas, opal,
 *                              tce-table, reserved-ranges & such (exclude
 *                              memory ranges) as they can't be used for kexec
 *                              segment buffer. Sets kbuf->mem when a suitable
 *                              memory hole is found.
 * @kbuf:                       Buffer contents and memory parameters.
 *
 * Assumes minimum of PAGE_SIZE alignment for kbuf->memsz & kbuf->buf_align.
 *
 * Returns 0 on success, negative errno on error.
 */
int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
{
	struct crash_mem **emem;
	u64 buf_min, buf_max;
	int ret;

	/*
	 * Use the generic kexec_locate_mem_hole for regular
	 * kexec_file_load syscall
	 */
	if (kbuf->image->type != KEXEC_TYPE_CRASH)
		return kexec_locate_mem_hole(kbuf);

	/* Look up the exclude ranges list while locating the memory hole */
	emem = &(kbuf->image->arch.exclude_ranges);
	if (!(*emem) || ((*emem)->nr_ranges == 0)) {
		pr_warn("No exclude range list. Using the default locate mem hole method\n");
		return kexec_locate_mem_hole(kbuf);
	}

	/* Segments for kdump kernel should be within crashkernel region */
	buf_min = (kbuf->buf_min < crashk_res.start ?
		   crashk_res.start : kbuf->buf_min);
	buf_max = (kbuf->buf_max > crashk_res.end ?
		   crashk_res.end : kbuf->buf_max);

	if (buf_min > buf_max) {
		pr_err("Invalid buffer min and/or max values\n");
		return -EINVAL;
	}

	if (kbuf->top_down)
		ret = locate_mem_hole_top_down_ppc64(kbuf, buf_min, buf_max,
						     *emem);
	else
		ret = locate_mem_hole_bottom_up_ppc64(kbuf, buf_min, buf_max,
						      *emem);

	/* Add the buffer allocated to the exclude list for the next lookup */
	if (!ret) {
		add_mem_range(emem, kbuf->mem, kbuf->memsz);
		sort_memory_ranges(*emem, true);
	} else {
		pr_err("Failed to locate memory buffer of size %lu\n",
		       kbuf->memsz);
	}
	return ret;
}

/**
 * arch_kexec_kernel_image_probe - Does additional handling needed to setup
 *                                 kexec segments.
@@ -79,9 +390,31 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
				  unsigned long buf_len)
{
	/* We don't support crash kernels yet. */
	if (image->type == KEXEC_TYPE_CRASH)
	if (image->type == KEXEC_TYPE_CRASH) {
		int ret;

		/* Get exclude memory ranges needed for setting up kdump segments */
		ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges));
		if (ret)
			pr_err("Failed to setup exclude memory ranges for buffer lookup\n");
		/* Return this until all changes for panic kernel are in */
		return -EOPNOTSUPP;
	}

	return kexec_image_probe_default(image, buf, buf_len);
}

/**
 * arch_kimage_file_post_load_cleanup - Frees up all the allocations done
 *                                      while loading the image.
 * @image:                              kexec image being loaded.
 *
 * Returns 0 on success, negative errno on error.
 */
int arch_kimage_file_post_load_cleanup(struct kimage *image)
{
	kfree(image->arch.exclude_ranges);
	image->arch.exclude_ranges = NULL;

	return kexec_image_post_load_cleanup_default(image);
}
+177 −0
Original line number Diff line number Diff line
@@ -233,3 +233,180 @@ int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)

	return __add_mem_range(mem_ranges, base, size);
}

/**
 * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list.
 * @mem_ranges:         Range list to add the memory range(s) to.
 *
 * Returns 0 on success, negative errno on error.
 */
int add_tce_mem_ranges(struct crash_mem **mem_ranges)
{
	struct device_node *dn = NULL;
	int ret = 0;

	for_each_node_by_type(dn, "pci") {
		u64 base;
		u32 size;

		ret = of_property_read_u64(dn, "linux,tce-base", &base);
		ret |= of_property_read_u32(dn, "linux,tce-size", &size);
		if (ret) {
			/*
			 * It is ok to have pci nodes without tce. So, ignore
			 * property does not exist error.
			 */
			if (ret == -EINVAL) {
				ret = 0;
				continue;
			}
			break;
		}

		ret = add_mem_range(mem_ranges, base, size);
		if (ret)
			break;
	}

	of_node_put(dn);
	return ret;
}

/**
 * add_initrd_mem_range - Adds initrd range to the given memory ranges list,
 *                        if the initrd was retained.
 * @mem_ranges:           Range list to add the memory range to.
 *
 * Returns 0 on success, negative errno on error.
 */
int add_initrd_mem_range(struct crash_mem **mem_ranges)
{
	u64 base, end;
	int ret;

	/* This range means something, only if initrd was retained */
	if (!strstr(saved_command_line, "retain_initrd"))
		return 0;

	ret = of_property_read_u64(of_chosen, "linux,initrd-start", &base);
	ret |= of_property_read_u64(of_chosen, "linux,initrd-end", &end);
	if (!ret)
		ret = add_mem_range(mem_ranges, base, end - base + 1);

	return ret;
}

#ifdef CONFIG_PPC_BOOK3S_64
/**
 * add_htab_mem_range - Adds htab range to the given memory ranges list,
 *                      if it exists
 * @mem_ranges:         Range list to add the memory range to.
 *
 * Returns 0 on success, negative errno on error.
 */
int add_htab_mem_range(struct crash_mem **mem_ranges)
{
	if (!htab_address)
		return 0;

	return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes);
}
#endif

/**
 * add_kernel_mem_range - Adds kernel text region to the given
 *                        memory ranges list.
 * @mem_ranges:           Range list to add the memory range to.
 *
 * Returns 0 on success, negative errno on error.
 */
int add_kernel_mem_range(struct crash_mem **mem_ranges)
{
	return add_mem_range(mem_ranges, 0, __pa(_end));
}

/**
 * add_rtas_mem_range - Adds RTAS region to the given memory ranges list.
 * @mem_ranges:         Range list to add the memory range to.
 *
 * Returns 0 on success, negative errno on error.
 */
int add_rtas_mem_range(struct crash_mem **mem_ranges)
{
	struct device_node *dn;
	u32 base, size;
	int ret = 0;

	dn = of_find_node_by_path("/rtas");
	if (!dn)
		return 0;

	ret = of_property_read_u32(dn, "linux,rtas-base", &base);
	ret |= of_property_read_u32(dn, "rtas-size", &size);
	if (!ret)
		ret = add_mem_range(mem_ranges, base, size);

	of_node_put(dn);
	return ret;
}

/**
 * add_opal_mem_range - Adds OPAL region to the given memory ranges list.
 * @mem_ranges:         Range list to add the memory range to.
 *
 * Returns 0 on success, negative errno on error.
 */
int add_opal_mem_range(struct crash_mem **mem_ranges)
{
	struct device_node *dn;
	u64 base, size;
	int ret;

	dn = of_find_node_by_path("/ibm,opal");
	if (!dn)
		return 0;

	ret = of_property_read_u64(dn, "opal-base-address", &base);
	ret |= of_property_read_u64(dn, "opal-runtime-size", &size);
	if (!ret)
		ret = add_mem_range(mem_ranges, base, size);

	of_node_put(dn);
	return ret;
}

/**
 * add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w
 *                           to the given memory ranges list.
 * @mem_ranges:              Range list to add the memory ranges to.
 *
 * Returns 0 on success, negative errno on error.
 */
int add_reserved_mem_ranges(struct crash_mem **mem_ranges)
{
	int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0;
	const __be32 *prop;

	prop = of_get_property(of_root, "reserved-ranges", &len);
	if (!prop)
		return 0;

	n_mem_addr_cells = of_n_addr_cells(of_root);
	n_mem_size_cells = of_n_size_cells(of_root);
	cells = n_mem_addr_cells + n_mem_size_cells;

	/* Each reserved range is an (address,size) pair */
	for (i = 0; i < (len / (sizeof(u32) * cells)); i++) {
		u64 base, size;

		base = of_read_number(prop + (i * cells), n_mem_addr_cells);
		size = of_read_number(prop + (i * cells) + n_mem_addr_cells,
				      n_mem_size_cells);

		ret = add_mem_range(mem_ranges, base, size);
		if (ret)
			break;
	}

	return ret;
}