Commit b374d0f9 authored by Will Deacon's avatar Will Deacon
Browse files

Merge branch 'for-next/kexec' into for-next/core

Significant steps along the road to leaving the MMU enabled during kexec
relocation.

* for-next/kexec:
  arm64: hibernate: add __force attribute to gfp_t casting
  arm64: kexec: arm64_relocate_new_kernel don't use x0 as temp
  arm64: kexec: arm64_relocate_new_kernel clean-ups and optimizations
  arm64: kexec: call kexec_image_info only once
  arm64: kexec: move relocation function setup
  arm64: trans_pgd: hibernate: idmap the single page that holds the copy page routines
  arm64: mm: Always update TCR_EL1 from __cpu_set_tcr_t0sz()
  arm64: trans_pgd: pass NULL instead of init_mm to *_populate functions
  arm64: trans_pgd: pass allocator trans_pgd_create_copy
  arm64: trans_pgd: make trans_pgd_map_page generic
  arm64: hibernate: move page handling function to new trans_pgd.c
  arm64: hibernate: variable pudp is used instead of pd4dp
  arm64: kexec: make dtb_mem always enabled
parents 6b76c3ae d1bbc35f
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -1132,6 +1132,10 @@ config CRASH_DUMP

	  For more details see Documentation/admin-guide/kdump/kdump.rst

config TRANS_TABLE
	def_bool y
	depends on HIBERNATION

config XEN_DOM0
	def_bool y
	depends on XEN
+3 −2
Original line number Diff line number Diff line
@@ -90,18 +90,19 @@ static inline void crash_prepare_suspend(void) {}
static inline void crash_post_resume(void) {}
#endif

#ifdef CONFIG_KEXEC_FILE
#define ARCH_HAS_KIMAGE_ARCH

struct kimage_arch {
	void *dtb;
	unsigned long dtb_mem;
	phys_addr_t dtb_mem;
	phys_addr_t kern_reloc;
	/* Core ELF header buffer */
	void *elf_headers;
	unsigned long elf_headers_mem;
	unsigned long elf_headers_sz;
};

#ifdef CONFIG_KEXEC_FILE
extern const struct kexec_file_ops kexec_image_ops;

struct kimage;
+3 −4
Original line number Diff line number Diff line
@@ -81,16 +81,15 @@ static inline bool __cpu_uses_extended_idmap_level(void)
}

/*
 * Set TCR.T0SZ to its default value (based on VA_BITS)
 * Ensure TCR.T0SZ is set to the provided value.
 */
static inline void __cpu_set_tcr_t0sz(unsigned long t0sz)
{
	unsigned long tcr;
	unsigned long tcr = read_sysreg(tcr_el1);

	if (!__cpu_uses_extended_idmap())
	if ((tcr & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET == t0sz)
		return;

	tcr = read_sysreg(tcr_el1);
	tcr &= ~TCR_T0SZ_MASK;
	tcr |= t0sz << TCR_T0SZ_OFFSET;
	write_sysreg(tcr, tcr_el1);
+39 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */

/*
 * Copyright (c) 2020, Microsoft Corporation.
 * Pavel Tatashin <pasha.tatashin@soleen.com>
 */

#ifndef _ASM_TRANS_TABLE_H
#define _ASM_TRANS_TABLE_H

#include <linux/bits.h>
#include <linux/types.h>
#include <asm/pgtable-types.h>

/*
 * trans_alloc_page
 *	- Allocator that should return exactly one zeroed page, if this
 *	  allocator fails, trans_pgd_create_copy() and trans_pgd_map_page()
 *	  return -ENOMEM error.
 *
 * trans_alloc_arg
 *	- Passed to trans_alloc_page as an argument
 */

struct trans_pgd_info {
	void * (*trans_alloc_page)(void *arg);
	void *trans_alloc_arg;
};

int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **trans_pgd,
			  unsigned long start, unsigned long end);

int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd,
		       void *page, unsigned long dst_addr, pgprot_t pgprot);

int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
			 unsigned long *t0sz, void *page);

#endif /* _ASM_TRANS_TABLE_H */
+25 −246
Original line number Diff line number Diff line
@@ -16,7 +16,6 @@
#define pr_fmt(x) "hibernate: " x
#include <linux/cpu.h>
#include <linux/kvm_host.h>
#include <linux/mm.h>
#include <linux/pm.h>
#include <linux/sched.h>
#include <linux/suspend.h>
@@ -31,13 +30,12 @@
#include <asm/memory.h>
#include <asm/mmu_context.h>
#include <asm/mte.h>
#include <asm/pgalloc.h>
#include <asm/pgtable-hwdef.h>
#include <asm/sections.h>
#include <asm/smp.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <asm/sysreg.h>
#include <asm/trans_pgd.h>
#include <asm/virt.h>

/*
@@ -178,52 +176,9 @@ int arch_hibernation_header_restore(void *addr)
}
EXPORT_SYMBOL(arch_hibernation_header_restore);

static int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
		       unsigned long dst_addr,
		       pgprot_t pgprot)
static void *hibernate_page_alloc(void *arg)
{
	pgd_t *pgdp;
	p4d_t *p4dp;
	pud_t *pudp;
	pmd_t *pmdp;
	pte_t *ptep;

	pgdp = pgd_offset_pgd(trans_pgd, dst_addr);
	if (pgd_none(READ_ONCE(*pgdp))) {
		pudp = (void *)get_safe_page(GFP_ATOMIC);
		if (!pudp)
			return -ENOMEM;
		pgd_populate(&init_mm, pgdp, pudp);
	}

	p4dp = p4d_offset(pgdp, dst_addr);
	if (p4d_none(READ_ONCE(*p4dp))) {
		pudp = (void *)get_safe_page(GFP_ATOMIC);
		if (!pudp)
			return -ENOMEM;
		p4d_populate(&init_mm, p4dp, pudp);
	}

	pudp = pud_offset(p4dp, dst_addr);
	if (pud_none(READ_ONCE(*pudp))) {
		pmdp = (void *)get_safe_page(GFP_ATOMIC);
		if (!pmdp)
			return -ENOMEM;
		pud_populate(&init_mm, pudp, pmdp);
	}

	pmdp = pmd_offset(pudp, dst_addr);
	if (pmd_none(READ_ONCE(*pmdp))) {
		ptep = (void *)get_safe_page(GFP_ATOMIC);
		if (!ptep)
			return -ENOMEM;
		pmd_populate_kernel(&init_mm, pmdp, ptep);
	}

	ptep = pte_offset_kernel(pmdp, dst_addr);
	set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC));

	return 0;
	return (void *)get_safe_page((__force gfp_t)(unsigned long)arg);
}

/*
@@ -239,11 +194,16 @@ static int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
 * page system.
 */
static int create_safe_exec_page(void *src_start, size_t length,
				 unsigned long dst_addr,
				 phys_addr_t *phys_dst_addr)
{
	struct trans_pgd_info trans_info = {
		.trans_alloc_page	= hibernate_page_alloc,
		.trans_alloc_arg	= (__force void *)GFP_ATOMIC,
	};

	void *page = (void *)get_safe_page(GFP_ATOMIC);
	pgd_t *trans_pgd;
	phys_addr_t trans_ttbr0;
	unsigned long t0sz;
	int rc;

	if (!page)
@@ -251,13 +211,7 @@ static int create_safe_exec_page(void *src_start, size_t length,

	memcpy(page, src_start, length);
	__flush_icache_range((unsigned long)page, (unsigned long)page + length);

	trans_pgd = (void *)get_safe_page(GFP_ATOMIC);
	if (!trans_pgd)
		return -ENOMEM;

	rc = trans_pgd_map_page(trans_pgd, page, dst_addr,
				PAGE_KERNEL_EXEC);
	rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page);
	if (rc)
		return rc;

@@ -270,12 +224,15 @@ static int create_safe_exec_page(void *src_start, size_t length,
	 * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
	 * runtime services), while for a userspace-driven test_resume cycle it
	 * points to userspace page tables (and we must point it at a zero page
	 * ourselves). Elsewhere we only (un)install the idmap with preemption
	 * disabled, so T0SZ should be as required regardless.
	 * ourselves).
	 *
	 * We change T0SZ as part of installing the idmap. This is undone by
	 * cpu_uninstall_idmap() in __cpu_suspend_exit().
	 */
	cpu_set_reserved_ttbr0();
	local_flush_tlb_all();
	write_sysreg(phys_to_ttbr(virt_to_phys(trans_pgd)), ttbr0_el1);
	__cpu_set_tcr_t0sz(t0sz);
	write_sysreg(trans_ttbr0, ttbr0_el1);
	isb();

	*phys_dst_addr = virt_to_phys(page);
@@ -462,182 +419,6 @@ int swsusp_arch_suspend(void)
	return ret;
}

static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
{
	pte_t pte = READ_ONCE(*src_ptep);

	if (pte_valid(pte)) {
		/*
		 * Resume will overwrite areas that may be marked
		 * read only (code, rodata). Clear the RDONLY bit from
		 * the temporary mappings we use during restore.
		 */
		set_pte(dst_ptep, pte_mkwrite(pte));
	} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
		/*
		 * debug_pagealloc will removed the PTE_VALID bit if
		 * the page isn't in use by the resume kernel. It may have
		 * been in use by the original kernel, in which case we need
		 * to put it back in our copy to do the restore.
		 *
		 * Before marking this entry valid, check the pfn should
		 * be mapped.
		 */
		BUG_ON(!pfn_valid(pte_pfn(pte)));

		set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
	}
}

static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
		    unsigned long end)
{
	pte_t *src_ptep;
	pte_t *dst_ptep;
	unsigned long addr = start;

	dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
	if (!dst_ptep)
		return -ENOMEM;
	pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
	dst_ptep = pte_offset_kernel(dst_pmdp, start);

	src_ptep = pte_offset_kernel(src_pmdp, start);
	do {
		_copy_pte(dst_ptep, src_ptep, addr);
	} while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);

	return 0;
}

static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
		    unsigned long end)
{
	pmd_t *src_pmdp;
	pmd_t *dst_pmdp;
	unsigned long next;
	unsigned long addr = start;

	if (pud_none(READ_ONCE(*dst_pudp))) {
		dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
		if (!dst_pmdp)
			return -ENOMEM;
		pud_populate(&init_mm, dst_pudp, dst_pmdp);
	}
	dst_pmdp = pmd_offset(dst_pudp, start);

	src_pmdp = pmd_offset(src_pudp, start);
	do {
		pmd_t pmd = READ_ONCE(*src_pmdp);

		next = pmd_addr_end(addr, end);
		if (pmd_none(pmd))
			continue;
		if (pmd_table(pmd)) {
			if (copy_pte(dst_pmdp, src_pmdp, addr, next))
				return -ENOMEM;
		} else {
			set_pmd(dst_pmdp,
				__pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
		}
	} while (dst_pmdp++, src_pmdp++, addr = next, addr != end);

	return 0;
}

static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start,
		    unsigned long end)
{
	pud_t *dst_pudp;
	pud_t *src_pudp;
	unsigned long next;
	unsigned long addr = start;

	if (p4d_none(READ_ONCE(*dst_p4dp))) {
		dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
		if (!dst_pudp)
			return -ENOMEM;
		p4d_populate(&init_mm, dst_p4dp, dst_pudp);
	}
	dst_pudp = pud_offset(dst_p4dp, start);

	src_pudp = pud_offset(src_p4dp, start);
	do {
		pud_t pud = READ_ONCE(*src_pudp);

		next = pud_addr_end(addr, end);
		if (pud_none(pud))
			continue;
		if (pud_table(pud)) {
			if (copy_pmd(dst_pudp, src_pudp, addr, next))
				return -ENOMEM;
		} else {
			set_pud(dst_pudp,
				__pud(pud_val(pud) & ~PUD_SECT_RDONLY));
		}
	} while (dst_pudp++, src_pudp++, addr = next, addr != end);

	return 0;
}

static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
		    unsigned long end)
{
	p4d_t *dst_p4dp;
	p4d_t *src_p4dp;
	unsigned long next;
	unsigned long addr = start;

	dst_p4dp = p4d_offset(dst_pgdp, start);
	src_p4dp = p4d_offset(src_pgdp, start);
	do {
		next = p4d_addr_end(addr, end);
		if (p4d_none(READ_ONCE(*src_p4dp)))
			continue;
		if (copy_pud(dst_p4dp, src_p4dp, addr, next))
			return -ENOMEM;
	} while (dst_p4dp++, src_p4dp++, addr = next, addr != end);

	return 0;
}

static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
			    unsigned long end)
{
	unsigned long next;
	unsigned long addr = start;
	pgd_t *src_pgdp = pgd_offset_k(start);

	dst_pgdp = pgd_offset_pgd(dst_pgdp, start);
	do {
		next = pgd_addr_end(addr, end);
		if (pgd_none(READ_ONCE(*src_pgdp)))
			continue;
		if (copy_p4d(dst_pgdp, src_pgdp, addr, next))
			return -ENOMEM;
	} while (dst_pgdp++, src_pgdp++, addr = next, addr != end);

	return 0;
}

static int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start,
			  unsigned long end)
{
	int rc;
	pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);

	if (!trans_pgd) {
		pr_err("Failed to allocate memory for temporary page tables.\n");
		return -ENOMEM;
	}

	rc = copy_page_tables(trans_pgd, start, end);
	if (!rc)
		*dst_pgdp = trans_pgd;

	return rc;
}

/*
 * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
 *
@@ -650,16 +431,20 @@ int swsusp_arch_resume(void)
	void *zero_page;
	size_t exit_size;
	pgd_t *tmp_pg_dir;
	phys_addr_t phys_hibernate_exit;
	void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
					  void *, phys_addr_t, phys_addr_t);
	struct trans_pgd_info trans_info = {
		.trans_alloc_page	= hibernate_page_alloc,
		.trans_alloc_arg	= (void *)GFP_ATOMIC,
	};

	/*
	 * Restoring the memory image will overwrite the ttbr1 page tables.
	 * Create a second copy of just the linear map, and use this when
	 * restoring.
	 */
	rc = trans_pgd_create_copy(&tmp_pg_dir, PAGE_OFFSET, PAGE_END);
	rc = trans_pgd_create_copy(&trans_info, &tmp_pg_dir, PAGE_OFFSET,
				   PAGE_END);
	if (rc)
		return rc;

@@ -673,19 +458,13 @@ int swsusp_arch_resume(void)
		return -ENOMEM;
	}

	/*
	 * Locate the exit code in the bottom-but-one page, so that *NULL
	 * still has disastrous affects.
	 */
	hibernate_exit = (void *)PAGE_SIZE;
	exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
	/*
	 * Copy swsusp_arch_suspend_exit() to a safe page. This will generate
	 * a new set of ttbr0 page tables and load them.
	 */
	rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
				   (unsigned long)hibernate_exit,
				   &phys_hibernate_exit);
				   (phys_addr_t *)&hibernate_exit);
	if (rc) {
		pr_err("Failed to create safe executable page for hibernate_exit code.\n");
		return rc;
@@ -704,7 +483,7 @@ int swsusp_arch_resume(void)
	 * We can skip this step if we booted at EL1, or are running with VHE.
	 */
	if (el2_reset_needed()) {
		phys_addr_t el2_vectors = phys_hibernate_exit;  /* base */
		phys_addr_t el2_vectors = (phys_addr_t)hibernate_exit;
		el2_vectors += hibernate_el2_vectors -
			       __hibernate_exit_text_start;     /* offset */

Loading