Commit 3aec4ecb authored by Brian Gerst's avatar Brian Gerst Committed by Peter Zijlstra
Browse files

x86: Rewrite ret_from_fork() in C



When kCFI is enabled, special handling is needed for the indirect call
to the kernel thread function.  Rewrite the ret_from_fork() function in
C so that the compiler can properly handle the indirect call.

Suggested-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarBrian Gerst <brgerst@gmail.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarKees Cook <keescook@chromium.org>
Reviewed-by: default avatarSami Tolvanen <samitolvanen@google.com>
Link: https://lkml.kernel.org/r/20230623225529.34590-3-brgerst@gmail.com
parent 81f755d5
Loading
Loading
Loading
Loading
+8 −22
Original line number Diff line number Diff line
@@ -727,36 +727,22 @@ SYM_CODE_END(__switch_to_asm)
 * edi: kernel thread arg
 */
.pushsection .text, "ax"
SYM_CODE_START(ret_from_fork)
SYM_CODE_START(ret_from_fork_asm)
	movl	%esp, %edx	/* regs */

	/* return address for the stack unwinder */
	pushl	$.Lsyscall_32_done

	FRAME_BEGIN
	pushl	%eax
	call	schedule_tail
	/* prev already in EAX */
	movl	%ebx, %ecx	/* fn */
	pushl	%edi		/* fn_arg */
	call	ret_from_fork
	addl	$4, %esp
	FRAME_END

	testl	%ebx, %ebx
	jnz	1f		/* kernel threads are uncommon */

2:
	/* When we fork, we trace the syscall return in the child, too. */
	leal    4(%esp), %eax
	call    syscall_exit_to_user_mode
	RET

	/* kernel thread */
1:	movl	%edi, %eax
	CALL_NOSPEC ebx
	/*
	 * A kernel thread is allowed to return here after successfully
	 * calling kernel_execve().  Exit to userspace to complete the execve()
	 * syscall.
	 */
	movl	$0, PT_EAX(%esp)
	jmp	2b
SYM_CODE_END(ret_from_fork)
SYM_CODE_END(ret_from_fork_asm)
.popsection

SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
+8 −25
Original line number Diff line number Diff line
@@ -284,36 +284,19 @@ SYM_FUNC_END(__switch_to_asm)
 * r12: kernel thread arg
 */
.pushsection .text, "ax"
	__FUNC_ALIGN
SYM_CODE_START_NOALIGN(ret_from_fork)
	UNWIND_HINT_END_OF_STACK
SYM_CODE_START(ret_from_fork_asm)
	UNWIND_HINT_REGS
	ANNOTATE_NOENDBR // copy_thread
	CALL_DEPTH_ACCOUNT
	movq	%rax, %rdi
	call	schedule_tail			/* rdi: 'prev' task parameter */

	testq	%rbx, %rbx			/* from kernel_thread? */
	jnz	1f				/* kernel threads are uncommon */
	movq	%rax, %rdi		/* prev */
	movq	%rsp, %rsi		/* regs */
	movq	%rbx, %rdx		/* fn */
	movq	%r12, %rcx		/* fn_arg */
	call	ret_from_fork

2:
	UNWIND_HINT_REGS
	movq	%rsp, %rdi
	call	syscall_exit_to_user_mode	/* returns with IRQs disabled */
	jmp	swapgs_restore_regs_and_return_to_usermode

1:
	/* kernel thread */
	UNWIND_HINT_END_OF_STACK
	movq	%r12, %rdi
	CALL_NOSPEC rbx
	/*
	 * A kernel thread is allowed to return here after successfully
	 * calling kernel_execve().  Exit to userspace to complete the execve()
	 * syscall.
	 */
	movq	$0, RAX(%rsp)
	jmp	2b
SYM_CODE_END(ret_from_fork)
SYM_CODE_END(ret_from_fork_asm)
.popsection

.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
+3 −1
Original line number Diff line number Diff line
@@ -12,7 +12,9 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
__visible struct task_struct *__switch_to(struct task_struct *prev,
					  struct task_struct *next);

asmlinkage void ret_from_fork(void);
asmlinkage void ret_from_fork_asm(void);
__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs,
			     int (*fn)(void *), void *fn_arg);

/*
 * This is the structure pointed to by thread.sp for an inactive task.  The
+21 −1
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@
#include <linux/static_call.h>
#include <trace/events/power.h>
#include <linux/hw_breakpoint.h>
#include <linux/entry-common.h>
#include <asm/cpu.h>
#include <asm/apic.h>
#include <linux/uaccess.h>
@@ -134,6 +135,25 @@ static int set_new_tls(struct task_struct *p, unsigned long tls)
		return do_set_thread_area_64(p, ARCH_SET_FS, tls);
}

__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs,
				     int (*fn)(void *), void *fn_arg)
{
	schedule_tail(prev);

	/* Is this a kernel thread? */
	if (unlikely(fn)) {
		fn(fn_arg);
		/*
		 * A kernel thread is allowed to return here after successfully
		 * calling kernel_execve().  Exit to userspace to complete the
		 * execve() syscall.
		 */
		regs->ax = 0;
	}

	syscall_exit_to_user_mode(regs);
}

int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
	unsigned long clone_flags = args->flags;
@@ -149,7 +169,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
	frame = &fork_frame->frame;

	frame->bp = encode_frame_pointer(childregs);
	frame->ret_addr = (unsigned long) ret_from_fork;
	frame->ret_addr = (unsigned long) ret_from_fork_asm;
	p->thread.sp = (unsigned long) fork_frame;
	p->thread.io_bitmap = NULL;
	p->thread.iopl_warn = 0;