Commit 072e3d96 authored by Pavel Tatashin's avatar Pavel Tatashin Committed by Will Deacon
Browse files

arm64: hibernate: move page handling function to new trans_pgd.c



Now, that we abstracted the required functions move them to a new home.
Later, we will generalize these function in order to be useful outside
of hibernation.

Signed-off-by: default avatarPavel Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: default avatarJames Morse <james.morse@arm.com>
Link: https://lore.kernel.org/r/20210125191923.1060122-4-pasha.tatashin@soleen.com


Signed-off-by: default avatarWill Deacon <will@kernel.org>
parent 41f67d40
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -1132,6 +1132,10 @@ config CRASH_DUMP

	  For more details see Documentation/admin-guide/kdump/kdump.rst

config TRANS_TABLE
	def_bool y
	depends on HIBERNATION

config XEN_DOM0
	def_bool y
	depends on XEN
+21 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */

/*
 * Copyright (c) 2020, Microsoft Corporation.
 * Pavel Tatashin <pasha.tatashin@soleen.com>
 */

#ifndef _ASM_TRANS_TABLE_H
#define _ASM_TRANS_TABLE_H

#include <linux/bits.h>
#include <linux/types.h>
#include <asm/pgtable-types.h>

int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start,
			  unsigned long end);

int trans_pgd_map_page(pgd_t *trans_pgd, void *page, unsigned long dst_addr,
		       pgprot_t pgprot);

#endif /* _ASM_TRANS_TABLE_H */
+1 −227
Original line number Diff line number Diff line
@@ -16,7 +16,6 @@
#define pr_fmt(x) "hibernate: " x
#include <linux/cpu.h>
#include <linux/kvm_host.h>
#include <linux/mm.h>
#include <linux/pm.h>
#include <linux/sched.h>
#include <linux/suspend.h>
@@ -31,13 +30,12 @@
#include <asm/memory.h>
#include <asm/mmu_context.h>
#include <asm/mte.h>
#include <asm/pgalloc.h>
#include <asm/pgtable-hwdef.h>
#include <asm/sections.h>
#include <asm/smp.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <asm/sysreg.h>
#include <asm/trans_pgd.h>
#include <asm/virt.h>

/*
@@ -178,54 +176,6 @@ int arch_hibernation_header_restore(void *addr)
}
EXPORT_SYMBOL(arch_hibernation_header_restore);

static int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
		       unsigned long dst_addr,
		       pgprot_t pgprot)
{
	pgd_t *pgdp;
	p4d_t *p4dp;
	pud_t *pudp;
	pmd_t *pmdp;
	pte_t *ptep;

	pgdp = pgd_offset_pgd(trans_pgd, dst_addr);
	if (pgd_none(READ_ONCE(*pgdp))) {
		p4dp = (void *)get_safe_page(GFP_ATOMIC);
		if (!pgdp)
			return -ENOMEM;
		pgd_populate(&init_mm, pgdp, p4dp);
	}

	p4dp = p4d_offset(pgdp, dst_addr);
	if (p4d_none(READ_ONCE(*p4dp))) {
		pudp = (void *)get_safe_page(GFP_ATOMIC);
		if (!pudp)
			return -ENOMEM;
		p4d_populate(&init_mm, p4dp, pudp);
	}

	pudp = pud_offset(p4dp, dst_addr);
	if (pud_none(READ_ONCE(*pudp))) {
		pmdp = (void *)get_safe_page(GFP_ATOMIC);
		if (!pmdp)
			return -ENOMEM;
		pud_populate(&init_mm, pudp, pmdp);
	}

	pmdp = pmd_offset(pudp, dst_addr);
	if (pmd_none(READ_ONCE(*pmdp))) {
		ptep = (void *)get_safe_page(GFP_ATOMIC);
		if (!ptep)
			return -ENOMEM;
		pmd_populate_kernel(&init_mm, pmdp, ptep);
	}

	ptep = pte_offset_kernel(pmdp, dst_addr);
	set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC));

	return 0;
}

/*
 * Copies length bytes, starting at src_start into an new page,
 * perform cache maintenance, then maps it at the specified address low
@@ -462,182 +412,6 @@ int swsusp_arch_suspend(void)
	return ret;
}

static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
{
	pte_t pte = READ_ONCE(*src_ptep);

	if (pte_valid(pte)) {
		/*
		 * Resume will overwrite areas that may be marked
		 * read only (code, rodata). Clear the RDONLY bit from
		 * the temporary mappings we use during restore.
		 */
		set_pte(dst_ptep, pte_mkwrite(pte));
	} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
		/*
		 * debug_pagealloc will removed the PTE_VALID bit if
		 * the page isn't in use by the resume kernel. It may have
		 * been in use by the original kernel, in which case we need
		 * to put it back in our copy to do the restore.
		 *
		 * Before marking this entry valid, check the pfn should
		 * be mapped.
		 */
		BUG_ON(!pfn_valid(pte_pfn(pte)));

		set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
	}
}

static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
		    unsigned long end)
{
	pte_t *src_ptep;
	pte_t *dst_ptep;
	unsigned long addr = start;

	dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
	if (!dst_ptep)
		return -ENOMEM;
	pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
	dst_ptep = pte_offset_kernel(dst_pmdp, start);

	src_ptep = pte_offset_kernel(src_pmdp, start);
	do {
		_copy_pte(dst_ptep, src_ptep, addr);
	} while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);

	return 0;
}

static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
		    unsigned long end)
{
	pmd_t *src_pmdp;
	pmd_t *dst_pmdp;
	unsigned long next;
	unsigned long addr = start;

	if (pud_none(READ_ONCE(*dst_pudp))) {
		dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
		if (!dst_pmdp)
			return -ENOMEM;
		pud_populate(&init_mm, dst_pudp, dst_pmdp);
	}
	dst_pmdp = pmd_offset(dst_pudp, start);

	src_pmdp = pmd_offset(src_pudp, start);
	do {
		pmd_t pmd = READ_ONCE(*src_pmdp);

		next = pmd_addr_end(addr, end);
		if (pmd_none(pmd))
			continue;
		if (pmd_table(pmd)) {
			if (copy_pte(dst_pmdp, src_pmdp, addr, next))
				return -ENOMEM;
		} else {
			set_pmd(dst_pmdp,
				__pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
		}
	} while (dst_pmdp++, src_pmdp++, addr = next, addr != end);

	return 0;
}

static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start,
		    unsigned long end)
{
	pud_t *dst_pudp;
	pud_t *src_pudp;
	unsigned long next;
	unsigned long addr = start;

	if (p4d_none(READ_ONCE(*dst_p4dp))) {
		dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
		if (!dst_pudp)
			return -ENOMEM;
		p4d_populate(&init_mm, dst_p4dp, dst_pudp);
	}
	dst_pudp = pud_offset(dst_p4dp, start);

	src_pudp = pud_offset(src_p4dp, start);
	do {
		pud_t pud = READ_ONCE(*src_pudp);

		next = pud_addr_end(addr, end);
		if (pud_none(pud))
			continue;
		if (pud_table(pud)) {
			if (copy_pmd(dst_pudp, src_pudp, addr, next))
				return -ENOMEM;
		} else {
			set_pud(dst_pudp,
				__pud(pud_val(pud) & ~PUD_SECT_RDONLY));
		}
	} while (dst_pudp++, src_pudp++, addr = next, addr != end);

	return 0;
}

static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
		    unsigned long end)
{
	p4d_t *dst_p4dp;
	p4d_t *src_p4dp;
	unsigned long next;
	unsigned long addr = start;

	dst_p4dp = p4d_offset(dst_pgdp, start);
	src_p4dp = p4d_offset(src_pgdp, start);
	do {
		next = p4d_addr_end(addr, end);
		if (p4d_none(READ_ONCE(*src_p4dp)))
			continue;
		if (copy_pud(dst_p4dp, src_p4dp, addr, next))
			return -ENOMEM;
	} while (dst_p4dp++, src_p4dp++, addr = next, addr != end);

	return 0;
}

static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
			    unsigned long end)
{
	unsigned long next;
	unsigned long addr = start;
	pgd_t *src_pgdp = pgd_offset_k(start);

	dst_pgdp = pgd_offset_pgd(dst_pgdp, start);
	do {
		next = pgd_addr_end(addr, end);
		if (pgd_none(READ_ONCE(*src_pgdp)))
			continue;
		if (copy_p4d(dst_pgdp, src_pgdp, addr, next))
			return -ENOMEM;
	} while (dst_pgdp++, src_pgdp++, addr = next, addr != end);

	return 0;
}

static int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start,
			  unsigned long end)
{
	int rc;
	pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);

	if (!trans_pgd) {
		pr_err("Failed to allocate memory for temporary page tables.\n");
		return -ENOMEM;
	}

	rc = copy_page_tables(trans_pgd, start, end);
	if (!rc)
		*dst_pgdp = trans_pgd;

	return rc;
}

/*
 * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
 *
+1 −0
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
obj-$(CONFIG_PTDUMP_CORE)	+= ptdump.o
obj-$(CONFIG_PTDUMP_DEBUGFS)	+= ptdump_debugfs.o
obj-$(CONFIG_TRANS_TABLE)	+= trans_pgd.o
obj-$(CONFIG_NUMA)		+= numa.o
obj-$(CONFIG_DEBUG_VIRTUAL)	+= physaddr.o
obj-$(CONFIG_ARM64_MTE)		+= mteswap.o
+250 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0

/*
 * Transitional page tables for kexec and hibernate
 *
 * This file derived from: arch/arm64/kernel/hibernate.c
 *
 * Copyright (c) 2020, Microsoft Corporation.
 * Pavel Tatashin <pasha.tatashin@soleen.com>
 *
 */

/*
 * Transitional tables are used during system transferring from one world to
 * another: such as during hibernate restore, and kexec reboots. During these
 * phases one cannot rely on page table not being overwritten. This is because
 * hibernate and kexec can overwrite the current page tables during transition.
 */

#include <asm/trans_pgd.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <linux/suspend.h>
#include <linux/bug.h>
#include <linux/mm.h>
#include <linux/mmzone.h>

static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
{
	pte_t pte = READ_ONCE(*src_ptep);

	if (pte_valid(pte)) {
		/*
		 * Resume will overwrite areas that may be marked
		 * read only (code, rodata). Clear the RDONLY bit from
		 * the temporary mappings we use during restore.
		 */
		set_pte(dst_ptep, pte_mkwrite(pte));
	} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
		/*
		 * debug_pagealloc will removed the PTE_VALID bit if
		 * the page isn't in use by the resume kernel. It may have
		 * been in use by the original kernel, in which case we need
		 * to put it back in our copy to do the restore.
		 *
		 * Before marking this entry valid, check the pfn should
		 * be mapped.
		 */
		BUG_ON(!pfn_valid(pte_pfn(pte)));

		set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
	}
}

static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
		    unsigned long end)
{
	pte_t *src_ptep;
	pte_t *dst_ptep;
	unsigned long addr = start;

	dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
	if (!dst_ptep)
		return -ENOMEM;
	pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
	dst_ptep = pte_offset_kernel(dst_pmdp, start);

	src_ptep = pte_offset_kernel(src_pmdp, start);
	do {
		_copy_pte(dst_ptep, src_ptep, addr);
	} while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);

	return 0;
}

static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
		    unsigned long end)
{
	pmd_t *src_pmdp;
	pmd_t *dst_pmdp;
	unsigned long next;
	unsigned long addr = start;

	if (pud_none(READ_ONCE(*dst_pudp))) {
		dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
		if (!dst_pmdp)
			return -ENOMEM;
		pud_populate(&init_mm, dst_pudp, dst_pmdp);
	}
	dst_pmdp = pmd_offset(dst_pudp, start);

	src_pmdp = pmd_offset(src_pudp, start);
	do {
		pmd_t pmd = READ_ONCE(*src_pmdp);

		next = pmd_addr_end(addr, end);
		if (pmd_none(pmd))
			continue;
		if (pmd_table(pmd)) {
			if (copy_pte(dst_pmdp, src_pmdp, addr, next))
				return -ENOMEM;
		} else {
			set_pmd(dst_pmdp,
				__pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
		}
	} while (dst_pmdp++, src_pmdp++, addr = next, addr != end);

	return 0;
}

static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start,
		    unsigned long end)
{
	pud_t *dst_pudp;
	pud_t *src_pudp;
	unsigned long next;
	unsigned long addr = start;

	if (p4d_none(READ_ONCE(*dst_p4dp))) {
		dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
		if (!dst_pudp)
			return -ENOMEM;
		p4d_populate(&init_mm, dst_p4dp, dst_pudp);
	}
	dst_pudp = pud_offset(dst_p4dp, start);

	src_pudp = pud_offset(src_p4dp, start);
	do {
		pud_t pud = READ_ONCE(*src_pudp);

		next = pud_addr_end(addr, end);
		if (pud_none(pud))
			continue;
		if (pud_table(pud)) {
			if (copy_pmd(dst_pudp, src_pudp, addr, next))
				return -ENOMEM;
		} else {
			set_pud(dst_pudp,
				__pud(pud_val(pud) & ~PUD_SECT_RDONLY));
		}
	} while (dst_pudp++, src_pudp++, addr = next, addr != end);

	return 0;
}

static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
		    unsigned long end)
{
	p4d_t *dst_p4dp;
	p4d_t *src_p4dp;
	unsigned long next;
	unsigned long addr = start;

	dst_p4dp = p4d_offset(dst_pgdp, start);
	src_p4dp = p4d_offset(src_pgdp, start);
	do {
		next = p4d_addr_end(addr, end);
		if (p4d_none(READ_ONCE(*src_p4dp)))
			continue;
		if (copy_pud(dst_p4dp, src_p4dp, addr, next))
			return -ENOMEM;
	} while (dst_p4dp++, src_p4dp++, addr = next, addr != end);

	return 0;
}

static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
			    unsigned long end)
{
	unsigned long next;
	unsigned long addr = start;
	pgd_t *src_pgdp = pgd_offset_k(start);

	dst_pgdp = pgd_offset_pgd(dst_pgdp, start);
	do {
		next = pgd_addr_end(addr, end);
		if (pgd_none(READ_ONCE(*src_pgdp)))
			continue;
		if (copy_p4d(dst_pgdp, src_pgdp, addr, next))
			return -ENOMEM;
	} while (dst_pgdp++, src_pgdp++, addr = next, addr != end);

	return 0;
}

int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start,
			  unsigned long end)
{
	int rc;
	pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);

	if (!trans_pgd) {
		pr_err("Failed to allocate memory for temporary page tables.\n");
		return -ENOMEM;
	}

	rc = copy_page_tables(trans_pgd, start, end);
	if (!rc)
		*dst_pgdp = trans_pgd;

	return rc;
}

int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
		       unsigned long dst_addr,
		       pgprot_t pgprot)
{
	pgd_t *pgdp;
	p4d_t *p4dp;
	pud_t *pudp;
	pmd_t *pmdp;
	pte_t *ptep;

	pgdp = pgd_offset_pgd(trans_pgd, dst_addr);
	if (pgd_none(READ_ONCE(*pgdp))) {
		p4dp = (void *)get_safe_page(GFP_ATOMIC);
		if (!pgdp)
			return -ENOMEM;
		pgd_populate(&init_mm, pgdp, p4dp);
	}

	p4dp = p4d_offset(pgdp, dst_addr);
	if (p4d_none(READ_ONCE(*p4dp))) {
		pudp = (void *)get_safe_page(GFP_ATOMIC);
		if (!pudp)
			return -ENOMEM;
		p4d_populate(&init_mm, p4dp, pudp);
	}

	pudp = pud_offset(p4dp, dst_addr);
	if (pud_none(READ_ONCE(*pudp))) {
		pmdp = (void *)get_safe_page(GFP_ATOMIC);
		if (!pmdp)
			return -ENOMEM;
		pud_populate(&init_mm, pudp, pmdp);
	}

	pmdp = pmd_offset(pudp, dst_addr);
	if (pmd_none(READ_ONCE(*pmdp))) {
		ptep = (void *)get_safe_page(GFP_ATOMIC);
		if (!ptep)
			return -ENOMEM;
		pmd_populate_kernel(&init_mm, pmdp, ptep);
	}

	ptep = pte_offset_kernel(pmdp, dst_addr);
	set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC));

	return 0;
}