Commit 7db99f01 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'x86_cpu_for_v6.1_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 cpu updates from Borislav Petkov:

 - Print the CPU number at segfault time.

   The number printed is not always accurate (preemption is enabled at
   that time) but the print string contains "likely" and after a lot of
   back'n'forth on this, this was the consensus that was reached. See
   thread at [1].

 - After a *lot* of testing and polishing, finally the clear_user()
   improvements to inline REP; STOSB by default

Link: https://lore.kernel.org/r/5d62c1d0-7425-d5bb-ecb5-1dc3b4d7d245@intel.com [1]

* tag 'x86_cpu_for_v6.1_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm: Print likely CPU at segfault time
  x86/clear_user: Make it faster
parents ba94a7a9 c926087e
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -502,9 +502,6 @@ strncpy_from_user(char *dst, const char __user *src, long count);

extern __must_check long strnlen_user(const char __user *str, long n);

unsigned long __must_check clear_user(void __user *mem, unsigned long len);
unsigned long __must_check __clear_user(void __user *mem, unsigned long len);

#ifdef CONFIG_ARCH_HAS_COPY_MC
unsigned long __must_check
copy_mc_to_kernel(void *to, const void *from, unsigned len);
@@ -526,6 +523,8 @@ extern struct movsl_mask {
#define ARCH_HAS_NOCACHE_UACCESS 1

#ifdef CONFIG_X86_32
unsigned long __must_check clear_user(void __user *mem, unsigned long len);
unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
# include <asm/uaccess_32.h>
#else
# include <asm/uaccess_64.h>
+45 −0
Original line number Diff line number Diff line
@@ -79,4 +79,49 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
	kasan_check_write(dst, size);
	return __copy_user_flushcache(dst, src, size);
}

/*
 * Zero Userspace.
 */

__must_check unsigned long
clear_user_original(void __user *addr, unsigned long len);
__must_check unsigned long
clear_user_rep_good(void __user *addr, unsigned long len);
__must_check unsigned long
clear_user_erms(void __user *addr, unsigned long len);

static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size)
{
	might_fault();
	stac();

	/*
	 * No memory constraint because it doesn't change any memory gcc
	 * knows about.
	 */
	asm volatile(
		"1:\n\t"
		ALTERNATIVE_3("rep stosb",
			      "call clear_user_erms",	  ALT_NOT(X86_FEATURE_FSRM),
			      "call clear_user_rep_good", ALT_NOT(X86_FEATURE_ERMS),
			      "call clear_user_original", ALT_NOT(X86_FEATURE_REP_GOOD))
		"2:\n"
	       _ASM_EXTABLE_UA(1b, 2b)
	       : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT
	       : "a" (0)
		/* rep_good clobbers %rdx */
	       : "rdx");

	clac();

	return size;
}

static __always_inline unsigned long clear_user(void __user *to, unsigned long n)
{
	if (access_ok(to, n))
		return __clear_user(to, n);
	return n;
}
#endif /* _ASM_X86_UACCESS_64_H */
+138 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm/export.h>

/*
@@ -50,3 +51,140 @@ SYM_FUNC_START(clear_page_erms)
	RET
SYM_FUNC_END(clear_page_erms)
EXPORT_SYMBOL_GPL(clear_page_erms)

/*
 * Default clear user-space.
 * Input:
 * rdi destination
 * rcx count
 *
 * Output:
 * rcx: uncleared bytes or 0 if successful.
 */
SYM_FUNC_START(clear_user_original)
	/*
	 * Copy only the lower 32 bits of size as that is enough to handle the rest bytes,
	 * i.e., no need for a 'q' suffix and thus a REX prefix.
	 */
	mov %ecx,%eax
	shr $3,%rcx
	jz .Lrest_bytes

	# do the qwords first
	.p2align 4
.Lqwords:
	movq $0,(%rdi)
	lea 8(%rdi),%rdi
	dec %rcx
	jnz .Lqwords

.Lrest_bytes:
	and $7,  %eax
	jz .Lexit

	# now do the rest bytes
.Lbytes:
	movb $0,(%rdi)
	inc %rdi
	dec %eax
	jnz .Lbytes

.Lexit:
	/*
	 * %rax still needs to be cleared in the exception case because this function is called
	 * from inline asm and the compiler expects %rax to be zero when exiting the inline asm,
	 * in case it might reuse it somewhere.
	 */
        xor %eax,%eax
        RET

.Lqwords_exception:
        # convert remaining qwords back into bytes to return to caller
        shl $3, %rcx
        and $7, %eax
        add %rax,%rcx
        jmp .Lexit

.Lbytes_exception:
        mov %eax,%ecx
        jmp .Lexit

        _ASM_EXTABLE_UA(.Lqwords, .Lqwords_exception)
        _ASM_EXTABLE_UA(.Lbytes, .Lbytes_exception)
SYM_FUNC_END(clear_user_original)
EXPORT_SYMBOL(clear_user_original)

/*
 * Alternative clear user-space when CPU feature X86_FEATURE_REP_GOOD is
 * present.
 * Input:
 * rdi destination
 * rcx count
 *
 * Output:
 * rcx: uncleared bytes or 0 if successful.
 */
SYM_FUNC_START(clear_user_rep_good)
	# call the original thing for less than a cacheline
	cmp $64, %rcx
	jb clear_user_original

.Lprep:
	# copy lower 32-bits for rest bytes
	mov %ecx, %edx
	shr $3, %rcx
	jz .Lrep_good_rest_bytes

.Lrep_good_qwords:
	rep stosq

.Lrep_good_rest_bytes:
	and $7, %edx
	jz .Lrep_good_exit

.Lrep_good_bytes:
	mov %edx, %ecx
	rep stosb

.Lrep_good_exit:
	# see .Lexit comment above
	xor %eax, %eax
	RET

.Lrep_good_qwords_exception:
	# convert remaining qwords back into bytes to return to caller
	shl $3, %rcx
	and $7, %edx
	add %rdx, %rcx
	jmp .Lrep_good_exit

	_ASM_EXTABLE_UA(.Lrep_good_qwords, .Lrep_good_qwords_exception)
	_ASM_EXTABLE_UA(.Lrep_good_bytes, .Lrep_good_exit)
SYM_FUNC_END(clear_user_rep_good)
EXPORT_SYMBOL(clear_user_rep_good)

/*
 * Alternative clear user-space when CPU feature X86_FEATURE_ERMS is present.
 * Input:
 * rdi destination
 * rcx count
 *
 * Output:
 * rcx: uncleared bytes or 0 if successful.
 *
 */
SYM_FUNC_START(clear_user_erms)
	# call the original thing for less than a cacheline
	cmp $64, %rcx
	jb clear_user_original

.Lerms_bytes:
	rep stosb

.Lerms_exit:
	xorl %eax,%eax
	RET

	_ASM_EXTABLE_UA(.Lerms_bytes, .Lerms_exit)
SYM_FUNC_END(clear_user_erms)
EXPORT_SYMBOL(clear_user_erms)
+0 −40
Original line number Diff line number Diff line
@@ -14,46 +14,6 @@
 * Zero Userspace
 */

unsigned long __clear_user(void __user *addr, unsigned long size)
{
	long __d0;
	might_fault();
	/* no memory constraint because it doesn't change any memory gcc knows
	   about */
	stac();
	asm volatile(
		"	testq  %[size8],%[size8]\n"
		"	jz     4f\n"
		"	.align 16\n"
		"0:	movq $0,(%[dst])\n"
		"	addq   $8,%[dst]\n"
		"	decl %%ecx ; jnz   0b\n"
		"4:	movq  %[size1],%%rcx\n"
		"	testl %%ecx,%%ecx\n"
		"	jz     2f\n"
		"1:	movb   $0,(%[dst])\n"
		"	incq   %[dst]\n"
		"	decl %%ecx ; jnz  1b\n"
		"2:\n"

		_ASM_EXTABLE_TYPE_REG(0b, 2b, EX_TYPE_UCOPY_LEN8, %[size1])
		_ASM_EXTABLE_UA(1b, 2b)

		: [size8] "=&c"(size), [dst] "=&D" (__d0)
		: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr));
	clac();
	return size;
}
EXPORT_SYMBOL(__clear_user);

unsigned long clear_user(void __user *to, unsigned long n)
{
	if (access_ok(to, n))
		return __clear_user(to, n);
	return n;
}
EXPORT_SYMBOL(clear_user);

#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
/**
 * clean_cache_range - write back a cache range with CLWB
+10 −0
Original line number Diff line number Diff line
@@ -769,6 +769,8 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
		unsigned long address, struct task_struct *tsk)
{
	const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG;
	/* This is a racy snapshot, but it's better than nothing. */
	int cpu = raw_smp_processor_id();

	if (!unhandled_signal(tsk, SIGSEGV))
		return;
@@ -782,6 +784,14 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,

	print_vma_addr(KERN_CONT " in ", regs->ip);

	/*
	 * Dump the likely CPU where the fatal segfault happened.
	 * This can help identify faulty hardware.
	 */
	printk(KERN_CONT " likely on CPU %d (core %d, socket %d)", cpu,
	       topology_core_id(cpu), topology_physical_package_id(cpu));


	printk(KERN_CONT "\n");

	show_opcodes(regs, loglvl);
Loading