Unverified Commit dffe11e2 authored by Tong Tiangen's avatar Tong Tiangen Committed by Palmer Dabbelt
Browse files

riscv/vdso: Add support for time namespaces



Implement generic vdso time namespace support which also enables time
namespaces for riscv. This is quite similar to what arm64 does.

selftest/timens test result:
  1..10
  ok 1 Passed for CLOCK_BOOTTIME (syscall)
  ok 2 Passed for CLOCK_BOOTTIME (vdso)
  ok 3 # SKIP CLOCK_BOOTTIME_ALARM isn't supported
  ok 4 # SKIP CLOCK_BOOTTIME_ALARM isn't supported
  ok 5 Passed for CLOCK_MONOTONIC (syscall)
  ok 6 Passed for CLOCK_MONOTONIC (vdso)
  ok 7 Passed for CLOCK_MONOTONIC_COARSE (syscall)
  ok 8 Passed for CLOCK_MONOTONIC_COARSE (vdso)
  ok 9 Passed for CLOCK_MONOTONIC_RAW (syscall)
  ok 10 Passed for CLOCK_MONOTONIC_RAW (vdso)
  # Totals: pass:8 fail:0 xfail:0 xpass:0 skip:2 error:0

Signed-off-by: default avatarTong Tiangen <tongtiangen@huawei.com>
Signed-off-by: default avatarPalmer Dabbelt <palmerdabbelt@google.com>
parent 8edab023
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -62,6 +62,7 @@ config RISCV
	select GENERIC_SCHED_CLOCK
	select GENERIC_SMP_IDLE_THREAD
	select GENERIC_TIME_VSYSCALL if MMU && 64BIT
	select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
	select HANDLE_DOMAIN_IRQ
	select HAVE_ARCH_AUDITSYSCALL
	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
+2 −0
Original line number Diff line number Diff line
@@ -157,6 +157,8 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
#define page_to_bus(page)	(page_to_phys(page))
#define phys_to_page(paddr)	(pfn_to_page(phys_to_pfn(paddr)))

#define sym_to_pfn(x)           __phys_to_pfn(__pa_symbol(x))

#ifdef CONFIG_FLATMEM
#define pfn_valid(pfn) \
	(((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))
+1 −1
Original line number Diff line number Diff line
@@ -22,7 +22,7 @@
 */
#ifdef CONFIG_MMU

#define __VVAR_PAGES    1
#define __VVAR_PAGES    2

#ifndef __ASSEMBLY__
#include <generated/vdso-offsets.h>
+7 −0
Original line number Diff line number Diff line
@@ -76,6 +76,13 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
	return _vdso_data;
}

#ifdef CONFIG_TIME_NS
static __always_inline
const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
{
	return _timens_data;
}
#endif
#endif /* !__ASSEMBLY__ */

#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
+197 −53
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#include <linux/err.h>
#include <asm/page.h>
#include <asm/vdso.h>
#include <linux/time_namespace.h>

#ifdef CONFIG_GENERIC_TIME_VSYSCALL
#include <vdso/datapage.h>
@@ -25,14 +26,12 @@ extern char vdso_start[], vdso_end[];

enum vvar_pages {
	VVAR_DATA_PAGE_OFFSET,
	VVAR_TIMENS_PAGE_OFFSET,
	VVAR_NR_PAGES,
};

#define VVAR_SIZE  (VVAR_NR_PAGES << PAGE_SHIFT)

static unsigned int vdso_pages __ro_after_init;
static struct page **vdso_pagelist __ro_after_init;

/*
 * The vDSO data page.
 */
@@ -42,83 +41,228 @@ static union {
} vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = &vdso_data_store.data;

static int __init vdso_init(void)
struct __vdso_info {
	const char *name;
	const char *vdso_code_start;
	const char *vdso_code_end;
	unsigned long vdso_pages;
	/* Data Mapping */
	struct vm_special_mapping *dm;
	/* Code Mapping */
	struct vm_special_mapping *cm;
};

static struct __vdso_info vdso_info __ro_after_init = {
	.name = "vdso",
	.vdso_code_start = vdso_start,
	.vdso_code_end = vdso_end,
};

static int vdso_mremap(const struct vm_special_mapping *sm,
		       struct vm_area_struct *new_vma)
{
	current->mm->context.vdso = (void *)new_vma->vm_start;

	return 0;
}

static int __init __vdso_init(void)
{
	unsigned int i;
	struct page **vdso_pagelist;
	unsigned long pfn;

	if (memcmp(vdso_info.vdso_code_start, "\177ELF", 4)) {
		pr_err("vDSO is not a valid ELF object!\n");
		return -EINVAL;
	}

	vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
	vdso_pagelist =
		kcalloc(vdso_pages + VVAR_NR_PAGES, sizeof(struct page *), GFP_KERNEL);
	if (unlikely(vdso_pagelist == NULL)) {
		pr_err("vdso: pagelist allocation failed\n");
	vdso_info.vdso_pages = (
		vdso_info.vdso_code_end -
		vdso_info.vdso_code_start) >>
		PAGE_SHIFT;

	vdso_pagelist = kcalloc(vdso_info.vdso_pages,
				sizeof(struct page *),
				GFP_KERNEL);
	if (vdso_pagelist == NULL)
		return -ENOMEM;

	/* Grab the vDSO code pages. */
	pfn = sym_to_pfn(vdso_info.vdso_code_start);

	for (i = 0; i < vdso_info.vdso_pages; i++)
		vdso_pagelist[i] = pfn_to_page(pfn + i);

	vdso_info.cm->pages = vdso_pagelist;

	return 0;
}

#ifdef CONFIG_TIME_NS
struct vdso_data *arch_get_vdso_data(void *vvar_page)
{
	return (struct vdso_data *)(vvar_page);
}

	for (i = 0; i < vdso_pages; i++) {
		struct page *pg;
/*
 * The vvar mapping contains data for a specific time namespace, so when a task
 * changes namespace we must unmap its vvar data for the old namespace.
 * Subsequent faults will map in data for the new namespace.
 *
 * For more details see timens_setup_vdso_data().
 */
int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
{
	struct mm_struct *mm = task->mm;
	struct vm_area_struct *vma;

	mmap_read_lock(mm);

	for (vma = mm->mmap; vma; vma = vma->vm_next) {
		unsigned long size = vma->vm_end - vma->vm_start;

		pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
		vdso_pagelist[i] = pg;
		if (vma_is_special_mapping(vma, vdso_info.dm))
			zap_page_range(vma, vma->vm_start, size);
	}
	vdso_pagelist[i] = virt_to_page(vdso_data);

	mmap_read_unlock(mm);
	return 0;
}

static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
{
	if (likely(vma->vm_mm == current->mm))
		return current->nsproxy->time_ns->vvar_page;

	/*
	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
	 * through interfaces like /proc/$pid/mem or
	 * process_vm_{readv,writev}() as long as there's no .access()
	 * in special_mapping_vmops.
	 * For more details check_vma_flags() and __access_remote_vm()
	 */
	WARN(1, "vvar_page accessed remotely");

	return NULL;
}
#else
static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
{
	return NULL;
}
#endif

static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
			     struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct page *timens_page = find_timens_vvar_page(vma);
	unsigned long pfn;

	switch (vmf->pgoff) {
	case VVAR_DATA_PAGE_OFFSET:
		if (timens_page)
			pfn = page_to_pfn(timens_page);
		else
			pfn = sym_to_pfn(vdso_data);
		break;
#ifdef CONFIG_TIME_NS
	case VVAR_TIMENS_PAGE_OFFSET:
		/*
		 * If a task belongs to a time namespace then a namespace
		 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
		 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
		 * offset.
		 * See also the comment near timens_setup_vdso_data().
		 */
		if (!timens_page)
			return VM_FAULT_SIGBUS;
		pfn = sym_to_pfn(vdso_data);
		break;
#endif /* CONFIG_TIME_NS */
	default:
		return VM_FAULT_SIGBUS;
	}

	return vmf_insert_pfn(vma, vmf->address, pfn);
}

enum rv_vdso_map {
	RV_VDSO_MAP_VVAR,
	RV_VDSO_MAP_VDSO,
};

static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = {
	[RV_VDSO_MAP_VVAR] = {
		.name   = "[vvar]",
		.fault = vvar_fault,
	},
	[RV_VDSO_MAP_VDSO] = {
		.name   = "[vdso]",
		.mremap = vdso_mremap,
	},
};

static int __init vdso_init(void)
{
	vdso_info.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR];
	vdso_info.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO];

	return __vdso_init();
}
arch_initcall(vdso_init);

int arch_setup_additional_pages(struct linux_binprm *bprm,
static int __setup_additional_pages(struct mm_struct *mm,
				    struct linux_binprm *bprm,
				    int uses_interp)
{
	struct mm_struct *mm = current->mm;
	unsigned long vdso_base, vdso_len;
	int ret;
	unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
	void *ret;

	BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);

	vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT;
	vdso_text_len = vdso_info.vdso_pages << PAGE_SHIFT;
	/* Be sure to map the data page */
	vdso_mapping_len = vdso_text_len + VVAR_SIZE;

	if (mmap_write_lock_killable(mm))
		return -EINTR;

	vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0);
	vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
	if (IS_ERR_VALUE(vdso_base)) {
		ret = vdso_base;
		goto end;
		ret = ERR_PTR(vdso_base);
		goto up_fail;
	}

	mm->context.vdso = NULL;
	ret = install_special_mapping(mm, vdso_base, VVAR_SIZE,
		(VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
	if (unlikely(ret))
		goto end;
	ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE,
		(VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info.dm);
	if (IS_ERR(ret))
		goto up_fail;

	vdso_base += VVAR_SIZE;
	mm->context.vdso = (void *)vdso_base;
	ret =
	   install_special_mapping(mm, vdso_base + VVAR_SIZE,
		vdso_pages << PAGE_SHIFT,
	   _install_special_mapping(mm, vdso_base, vdso_text_len,
		(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
		vdso_pagelist);
		vdso_info.cm);

	if (unlikely(ret))
		goto end;
	if (IS_ERR(ret))
		goto up_fail;

	/*
	 * Put vDSO base into mm struct. We need to do this before calling
	 * install_special_mapping or the perf counter mmap tracking code
	 * will fail to recognise it as a vDSO (since arch_vma_name fails).
	 */
	mm->context.vdso = (void *)vdso_base + VVAR_SIZE;
	return 0;

end:
	mmap_write_unlock(mm);
	return ret;
up_fail:
	mm->context.vdso = NULL;
	return PTR_ERR(ret);
}

const char *arch_vma_name(struct vm_area_struct *vma)
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
	if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
		return "[vdso]";
	if (vma->vm_mm && (vma->vm_start ==
			   (long)vma->vm_mm->context.vdso - VVAR_SIZE))
		return "[vdso_data]";
	return NULL;
	struct mm_struct *mm = current->mm;
	int ret;

	if (mmap_write_lock_killable(mm))
		return -EINTR;

	ret = __setup_additional_pages(mm, bprm, uses_interp);
	mmap_write_unlock(mm);

	return ret;
}
Loading