Unverified Commit fba8a867 authored by Nick Kossifidis's avatar Nick Kossifidis Committed by Palmer Dabbelt
Browse files

RISC-V: Add kexec support



This patch adds support for kexec on RISC-V. On SMP systems it depends
on HOTPLUG_CPU in order to be able to bring up all harts after kexec.
It also needs a recent OpenSBI version that supports the HSM extension.
I tested it on riscv64 QEMU on both an smp and a non-smp system.

Signed-off-by: default avatarNick Kossifidis <mick@ics.forth.gr>
Signed-off-by: default avatarPalmer Dabbelt <palmerdabbelt@google.com>
parent d83e682e
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -391,6 +391,21 @@ config RISCV_SBI_V01
	help
	  This config allows kernel to use SBI v0.1 APIs. This will be
	  deprecated in future once legacy M-mode software are no longer in use.

config KEXEC
	bool "Kexec system call"
	select KEXEC_CORE
	select HOTPLUG_CPU if SMP
	depends on MMU
	help
	  kexec is a system call that implements the ability to shutdown your
	  current kernel, and to start another kernel. It is like a reboot
	  but it is independent of the system firmware. And like a reboot
	  you can start any kernel with it, not just Linux.

	  The name comes from the similarity to the exec system call.


endmenu

menu "Boot options"
+49 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Copyright (C) 2019 FORTH-ICS/CARV
 *  Nick Kossifidis <mick@ics.forth.gr>
 */

#ifndef _RISCV_KEXEC_H
#define _RISCV_KEXEC_H

#include <asm/page.h>    /* For PAGE_SIZE */

/* Maximum physical address we can use pages from */
#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)

/* Maximum address we can reach in physical address mode */
#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)

/* Maximum address we can use for the control code buffer */
#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)

/* Reserve a page for the control code buffer */
#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE

#define KEXEC_ARCH KEXEC_ARCH_RISCV

static inline void
crash_setup_regs(struct pt_regs *newregs,
		 struct pt_regs *oldregs)
{
	/* Dummy implementation for now */
}


#define ARCH_HAS_KIMAGE_ARCH

struct kimage_arch {
	unsigned long fdt_addr;
};

const extern unsigned char riscv_kexec_relocate[];
const extern unsigned int riscv_kexec_relocate_size;

typedef void (*riscv_kexec_do_relocate)(unsigned long first_ind_entry,
					unsigned long jump_addr,
					unsigned long fdt_addr,
					unsigned long hartid,
					unsigned long va_pa_off);

#endif
+5 −0
Original line number Diff line number Diff line
@@ -9,6 +9,10 @@ CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_sbi.o	= $(CC_FLAGS_FTRACE)
endif

ifdef CONFIG_KEXEC
AFLAGS_kexec_relocate.o := -mcmodel=medany -mno-relax
endif

extra-y += head.o
extra-y += vmlinux.lds

@@ -54,6 +58,7 @@ obj-$(CONFIG_SMP) += cpu_ops_sbi.o
endif
obj-$(CONFIG_HOTPLUG_CPU)	+= cpu-hotplug.o
obj-$(CONFIG_KGDB)		+= kgdb.o
obj-$(CONFIG_KEXEC)		+= kexec_relocate.o machine_kexec.o

obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o

+157 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Copyright (C) 2019 FORTH-ICS/CARV
 *  Nick Kossifidis <mick@ics.forth.gr>
 */

#include <asm/asm.h>	/* For RISCV_* and REG_* macros */
#include <asm/csr.h>	/* For CSR_* macros */
#include <asm/page.h>	/* For PAGE_SIZE */
#include <linux/linkage.h> /* For SYM_* macros */

.section ".rodata"
SYM_CODE_START(riscv_kexec_relocate)

	/*
	 * s0: Pointer to the current entry
	 * s1: (const) Phys address to jump to after relocation
	 * s2: (const) Phys address of the FDT image
	 * s3: (const) The hartid of the current hart
	 * s4: Pointer to the destination address for the relocation
	 * s5: (const) Number of words per page
	 * s6: (const) 1, used for subtraction
	 * s7: (const) va_pa_offset, used when switching MMU off
	 * s8: (const) Physical address of the main loop
	 * s9: (debug) indirection page counter
	 * s10: (debug) entry counter
	 * s11: (debug) copied words counter
	 */
	mv	s0, a0
	mv	s1, a1
	mv	s2, a2
	mv	s3, a3
	mv	s4, zero
	li	s5, (PAGE_SIZE / RISCV_SZPTR)
	li	s6, 1
	mv	s7, a4
	mv	s8, zero
	mv	s9, zero
	mv	s10, zero
	mv	s11, zero

	/* Disable / cleanup interrupts */
	csrw	CSR_SIE, zero
	csrw	CSR_SIP, zero

	/*
	 * When we switch SATP.MODE to "Bare" we'll only
	 * play with physical addresses. However the first time
	 * we try to jump somewhere, the offset on the jump
	 * will be relative to pc which will still be on VA. To
	 * deal with this we set stvec to the physical address at
	 * the start of the loop below so that we jump there in
	 * any case.
	 */
	la	s8, 1f
	sub	s8, s8, s7
	csrw	CSR_STVEC, s8

	/* Process entries in a loop */
.align 2
1:
	addi	s10, s10, 1
	REG_L	t0, 0(s0)		/* t0 = *image->entry */
	addi	s0, s0, RISCV_SZPTR	/* image->entry++ */

	/* IND_DESTINATION entry ? -> save destination address */
	andi	t1, t0, 0x1
	beqz	t1, 2f
	andi	s4, t0, ~0x1
	j	1b

2:
	/* IND_INDIRECTION entry ? -> update next entry ptr (PA) */
	andi	t1, t0, 0x2
	beqz	t1, 2f
	andi	s0, t0, ~0x2
	addi	s9, s9, 1
	csrw	CSR_SATP, zero
	jalr	zero, s8, 0

2:
	/* IND_DONE entry ? -> jump to done label */
	andi	t1, t0, 0x4
	beqz	t1, 2f
	j	4f

2:
	/*
	 * IND_SOURCE entry ? -> copy page word by word to the
	 * destination address we got from IND_DESTINATION
	 */
	andi	t1, t0, 0x8
	beqz	t1, 1b		/* Unknown entry type, ignore it */
	andi	t0, t0, ~0x8
	mv	t3, s5		/* i = num words per page */
3:	/* copy loop */
	REG_L	t1, (t0)	/* t1 = *src_ptr */
	REG_S	t1, (s4)	/* *dst_ptr = *src_ptr */
	addi	t0, t0, RISCV_SZPTR /* stc_ptr++ */
	addi	s4, s4, RISCV_SZPTR /* dst_ptr++ */
	sub	t3, t3, s6	/* i-- */
	addi	s11, s11, 1	/* c++ */
	beqz	t3, 1b		/* copy done ? */
	j	3b

4:
	/* Pass the arguments to the next kernel  / Cleanup*/
	mv	a0, s3
	mv	a1, s2
	mv	a2, s1

	/* Cleanup */
	mv	a3, zero
	mv	a4, zero
	mv	a5, zero
	mv	a6, zero
	mv	a7, zero

	mv	s0, zero
	mv	s1, zero
	mv	s2, zero
	mv	s3, zero
	mv	s4, zero
	mv	s5, zero
	mv	s6, zero
	mv	s7, zero
	mv	s8, zero
	mv	s9, zero
	mv	s10, zero
	mv	s11, zero

	mv	t0, zero
	mv	t1, zero
	mv	t2, zero
	mv	t3, zero
	mv	t4, zero
	mv	t5, zero
	mv	t6, zero
	csrw	CSR_SEPC, zero
	csrw	CSR_SCAUSE, zero
	csrw	CSR_SSCRATCH, zero

	/*
	 * Make sure the relocated code is visible
	 * and jump to the new kernel
	 */
	fence.i

	jalr	zero, a2, 0

SYM_CODE_END(riscv_kexec_relocate)
riscv_kexec_relocate_end:

	.section ".rodata"
SYM_DATA(riscv_kexec_relocate_size,
	.long riscv_kexec_relocate_end - riscv_kexec_relocate)
+186 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 2019 FORTH-ICS/CARV
 *  Nick Kossifidis <mick@ics.forth.gr>
 */

#include <linux/kexec.h>
#include <asm/kexec.h>		/* For riscv_kexec_* symbol defines */
#include <linux/smp.h>		/* For smp_send_stop () */
#include <asm/cacheflush.h>	/* For local_flush_icache_all() */
#include <asm/barrier.h>	/* For smp_wmb() */
#include <asm/page.h>		/* For PAGE_MASK */
#include <linux/libfdt.h>	/* For fdt_check_header() */
#include <asm/set_memory.h>	/* For set_memory_x() */
#include <linux/compiler.h>	/* For unreachable() */
#include <linux/cpu.h>		/* For cpu_down() */

/**
 * kexec_image_info - Print received image details
 */
static void
kexec_image_info(const struct kimage *image)
{
	unsigned long i;

	pr_debug("Kexec image info:\n");
	pr_debug("\ttype:        %d\n", image->type);
	pr_debug("\tstart:       %lx\n", image->start);
	pr_debug("\thead:        %lx\n", image->head);
	pr_debug("\tnr_segments: %lu\n", image->nr_segments);

	for (i = 0; i < image->nr_segments; i++) {
		pr_debug("\t    segment[%lu]: %016lx - %016lx", i,
			image->segment[i].mem,
			image->segment[i].mem + image->segment[i].memsz);
		pr_debug("\t\t0x%lx bytes, %lu pages\n",
			(unsigned long) image->segment[i].memsz,
			(unsigned long) image->segment[i].memsz /  PAGE_SIZE);
	}
}

/**
 * machine_kexec_prepare - Initialize kexec
 *
 * This function is called from do_kexec_load, when the user has
 * provided us with an image to be loaded. Its goal is to validate
 * the image and prepare the control code buffer as needed.
 * Note that kimage_alloc_init has already been called and the
 * control buffer has already been allocated.
 */
int
machine_kexec_prepare(struct kimage *image)
{
	struct kimage_arch *internal = &image->arch;
	struct fdt_header fdt = {0};
	void *control_code_buffer = NULL;
	unsigned int control_code_buffer_sz = 0;
	int i = 0;

	kexec_image_info(image);

	if (image->type == KEXEC_TYPE_CRASH) {
		pr_warn("Loading a crash kernel is unsupported for now.\n");
		return -EINVAL;
	}

	/* Find the Flattened Device Tree and save its physical address */
	for (i = 0; i < image->nr_segments; i++) {
		if (image->segment[i].memsz <= sizeof(fdt))
			continue;

		if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt)))
			continue;

		if (fdt_check_header(&fdt))
			continue;

		internal->fdt_addr = (unsigned long) image->segment[i].mem;
		break;
	}

	if (!internal->fdt_addr) {
		pr_err("Device tree not included in the provided image\n");
		return -EINVAL;
	}

	/* Copy the assembler code for relocation to the control page */
	control_code_buffer = page_address(image->control_code_page);
	control_code_buffer_sz = page_size(image->control_code_page);
	if (unlikely(riscv_kexec_relocate_size > control_code_buffer_sz)) {
		pr_err("Relocation code doesn't fit within a control page\n");
		return -EINVAL;
	}
	memcpy(control_code_buffer, riscv_kexec_relocate,
		riscv_kexec_relocate_size);

	/* Mark the control page executable */
	set_memory_x((unsigned long) control_code_buffer, 1);

	return 0;
}


/**
 * machine_kexec_cleanup - Cleanup any leftovers from
 *			   machine_kexec_prepare
 *
 * This function is called by kimage_free to handle any arch-specific
 * allocations done on machine_kexec_prepare. Since we didn't do any
 * allocations there, this is just an empty function. Note that the
 * control buffer is freed by kimage_free.
 */
void
machine_kexec_cleanup(struct kimage *image)
{
}


/*
 * machine_shutdown - Prepare for a kexec reboot
 *
 * This function is called by kernel_kexec just before machine_kexec
 * below. Its goal is to prepare the rest of the system (the other
 * harts and possibly devices etc) for a kexec reboot.
 */
void machine_shutdown(void)
{
	/*
	 * No more interrupts on this hart
	 * until we are back up.
	 */
	local_irq_disable();

#if defined(CONFIG_HOTPLUG_CPU)
	smp_shutdown_nonboot_cpus(smp_processor_id());
#endif
}

/**
 * machine_crash_shutdown - Prepare to kexec after a kernel crash
 *
 * This function is called by crash_kexec just before machine_kexec
 * below and its goal is similar to machine_shutdown, but in case of
 * a kernel crash. Since we don't handle such cases yet, this function
 * is empty.
 */
void
machine_crash_shutdown(struct pt_regs *regs)
{
}

/**
 * machine_kexec - Jump to the loaded kimage
 *
 * This function is called by kernel_kexec which is called by the
 * reboot system call when the reboot cmd is LINUX_REBOOT_CMD_KEXEC,
 * or by crash_kernel which is called by the kernel's arch-specific
 * trap handler in case of a kernel panic. It's the final stage of
 * the kexec process where the pre-loaded kimage is ready to be
 * executed. We assume at this point that all other harts are
 * suspended and this hart will be the new boot hart.
 */
void __noreturn
machine_kexec(struct kimage *image)
{
	struct kimage_arch *internal = &image->arch;
	unsigned long jump_addr = (unsigned long) image->start;
	unsigned long first_ind_entry = (unsigned long) &image->head;
	unsigned long this_hart_id = raw_smp_processor_id();
	unsigned long fdt_addr = internal->fdt_addr;
	void *control_code_buffer = page_address(image->control_code_page);
	riscv_kexec_do_relocate do_relocate = control_code_buffer;

	pr_notice("Will call new kernel at %08lx from hart id %lx\n",
		  jump_addr, this_hart_id);
	pr_notice("FDT image at %08lx\n", fdt_addr);

	/* Make sure the relocation code is visible to the hart */
	local_flush_icache_all();

	/* Jump to the relocation code */
	pr_notice("Bye...\n");
	do_relocate(first_ind_entry, jump_addr, fdt_addr,
		    this_hart_id, va_pa_offset);
	unreachable();
}