Commit 15bbeec0 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'core-entry-2023-04-24' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull core entry/ptrace update from Thomas Gleixner:
 "Provide a ptrace set/get interface for syscall user dispatch. The main
  purpose is to enable checkpoint/restore (CRIU) to handle processes
  which utilize syscall user dispatch correctly"

* tag 'core-entry-2023-04-24' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  selftest, ptrace: Add selftest for syscall user dispatch config api
  ptrace: Provide set/get interface for syscall user dispatch
  syscall_user_dispatch: Untag selector address before access_ok()
  syscall_user_dispatch: Split up set_syscall_user_dispatch()
parents 29e95a4b 8c8fa605
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -73,6 +73,10 @@ thread-wide, without the need to invoke the kernel directly. selector
can be set to SYSCALL_DISPATCH_FILTER_ALLOW or SYSCALL_DISPATCH_FILTER_BLOCK.
Any other value should terminate the program with a SIGSYS.

Additionally, a tasks syscall user dispatch configuration can be peeked
and poked via the PTRACE_(GET|SET)_SYSCALL_USER_DISPATCH_CONFIG ptrace
requests. This is useful for checkpoint/restart software.

Security Notes
--------------

+18 −0
Original line number Diff line number Diff line
@@ -22,6 +22,12 @@ int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
#define clear_syscall_work_syscall_user_dispatch(tsk) \
	clear_task_syscall_work(tsk, SYSCALL_USER_DISPATCH)

int syscall_user_dispatch_get_config(struct task_struct *task, unsigned long size,
				     void __user *data);

int syscall_user_dispatch_set_config(struct task_struct *task, unsigned long size,
				     void __user *data);

#else
struct syscall_user_dispatch {};

@@ -35,6 +41,18 @@ static inline void clear_syscall_work_syscall_user_dispatch(struct task_struct *
{
}

static inline int syscall_user_dispatch_get_config(struct task_struct *task,
						   unsigned long size, void __user *data)
{
	return -EINVAL;
}

static inline int syscall_user_dispatch_set_config(struct task_struct *task,
						   unsigned long size, void __user *data)
{
	return -EINVAL;
}

#endif /* CONFIG_GENERIC_ENTRY */

#endif /* _SYSCALL_USER_DISPATCH_H */
+30 −0
Original line number Diff line number Diff line
@@ -112,6 +112,36 @@ struct ptrace_rseq_configuration {
	__u32 pad;
};

#define PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG 0x4210
#define PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG 0x4211

/*
 * struct ptrace_sud_config - Per-task configuration for Syscall User Dispatch
 * @mode:	One of PR_SYS_DISPATCH_ON or PR_SYS_DISPATCH_OFF
 * @selector:	Tracees user virtual address of SUD selector
 * @offset:	SUD exclusion area (virtual address)
 * @len:	Length of SUD exclusion area
 *
 * Used to get/set the syscall user dispatch configuration for a tracee.
 * Selector is optional (may be NULL), and if invalid will produce
 * a SIGSEGV in the tracee upon first access.
 *
 * If mode is PR_SYS_DISPATCH_ON, syscall dispatch will be enabled. If
 * PR_SYS_DISPATCH_OFF, syscall dispatch will be disabled and all other
 * parameters must be 0.  The value in *selector (if not null), also determines
 * whether syscall dispatch will occur.
 *
 * The Syscall User Dispatch Exclusion area described by offset/len is the
 * virtual address space from which syscalls will not produce a user
 * dispatch.
 */
struct ptrace_sud_config {
	__u64 mode;
	__u64 selector;
	__u64 offset;
	__u64 len;
};

/*
 * These values are stored in task->ptrace_message
 * by ptrace_stop to describe the current syscall-stop.
+65 −9
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@
 */
#include <linux/sched.h>
#include <linux/prctl.h>
#include <linux/ptrace.h>
#include <linux/syscall_user_dispatch.h>
#include <linux/uaccess.h>
#include <linux/signal.h>
@@ -68,8 +69,9 @@ bool syscall_user_dispatch(struct pt_regs *regs)
	return true;
}

int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
			      unsigned long len, char __user *selector)
static int task_set_syscall_user_dispatch(struct task_struct *task, unsigned long mode,
					  unsigned long offset, unsigned long len,
					  char __user *selector)
{
	switch (mode) {
	case PR_SYS_DISPATCH_OFF:
@@ -86,7 +88,16 @@ int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
		if (offset && offset + len <= offset)
			return -EINVAL;

		if (selector && !access_ok(selector, sizeof(*selector)))
		/*
		 * access_ok() will clear memory tags for tagged addresses
		 * if current has memory tagging enabled.

		 * To enable a tracer to set a tracees selector the
		 * selector address must be untagged for access_ok(),
		 * otherwise an untagged tracer will always fail to set a
		 * tagged tracees selector.
		 */
		if (selector && !access_ok(untagged_addr(selector), sizeof(*selector)))
			return -EFAULT;

		break;
@@ -94,15 +105,60 @@ int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
		return -EINVAL;
	}

	current->syscall_dispatch.selector = selector;
	current->syscall_dispatch.offset = offset;
	current->syscall_dispatch.len = len;
	current->syscall_dispatch.on_dispatch = false;
	task->syscall_dispatch.selector = selector;
	task->syscall_dispatch.offset = offset;
	task->syscall_dispatch.len = len;
	task->syscall_dispatch.on_dispatch = false;

	if (mode == PR_SYS_DISPATCH_ON)
		set_syscall_work(SYSCALL_USER_DISPATCH);
		set_task_syscall_work(task, SYSCALL_USER_DISPATCH);
	else
		clear_task_syscall_work(task, SYSCALL_USER_DISPATCH);

	return 0;
}

int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
			      unsigned long len, char __user *selector)
{
	return task_set_syscall_user_dispatch(current, mode, offset, len, selector);
}

int syscall_user_dispatch_get_config(struct task_struct *task, unsigned long size,
				     void __user *data)
{
	struct syscall_user_dispatch *sd = &task->syscall_dispatch;
	struct ptrace_sud_config cfg;

	if (size != sizeof(cfg))
		return -EINVAL;

	if (test_task_syscall_work(task, SYSCALL_USER_DISPATCH))
		cfg.mode = PR_SYS_DISPATCH_ON;
	else
		clear_syscall_work(SYSCALL_USER_DISPATCH);
		cfg.mode = PR_SYS_DISPATCH_OFF;

	cfg.offset = sd->offset;
	cfg.len = sd->len;
	cfg.selector = (__u64)(uintptr_t)sd->selector;

	if (copy_to_user(data, &cfg, sizeof(cfg)))
		return -EFAULT;

	return 0;
}

int syscall_user_dispatch_set_config(struct task_struct *task, unsigned long size,
				     void __user *data)
{
	struct ptrace_sud_config cfg;

	if (size != sizeof(cfg))
		return -EINVAL;

	if (copy_from_user(&cfg, data, sizeof(cfg)))
		return -EFAULT;

	return task_set_syscall_user_dispatch(task, cfg.mode, cfg.offset, cfg.len,
					      (char __user *)(uintptr_t)cfg.selector);
}
+9 −0
Original line number Diff line number Diff line
@@ -32,6 +32,7 @@
#include <linux/compat.h>
#include <linux/sched/signal.h>
#include <linux/minmax.h>
#include <linux/syscall_user_dispatch.h>

#include <asm/syscall.h>	/* for syscall_get_* */

@@ -1259,6 +1260,14 @@ int ptrace_request(struct task_struct *child, long request,
		break;
#endif

	case PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG:
		ret = syscall_user_dispatch_set_config(child, addr, datavp);
		break;

	case PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG:
		ret = syscall_user_dispatch_get_config(child, addr, datavp);
		break;

	default:
		break;
	}
Loading