Commit aebc7b0d authored by Marco Elver's avatar Marco Elver Committed by Kees Cook
Browse files

list: Introduce CONFIG_LIST_HARDENED

Numerous production kernel configs (see [1, 2]) are choosing to enable
CONFIG_DEBUG_LIST, which is also being recommended by KSPP for hardened
configs [3]. The motivation behind this is that the option can be used
as a security hardening feature (e.g. CVE-2019-2215 and CVE-2019-2025
are mitigated by the option [4]).

The feature has never been designed with performance in mind, yet common
list manipulation is happening across hot paths all over the kernel.

Introduce CONFIG_LIST_HARDENED, which performs list pointer checking
inline, and only upon list corruption calls the reporting slow path.

To generate optimal machine code with CONFIG_LIST_HARDENED:

  1. Elide checking for pointer values which upon dereference would
     result in an immediate access fault (i.e. minimal hardening
     checks).  The trade-off is lower-quality error reports.

  2. Use the __preserve_most function attribute (available with Clang,
     but not yet with GCC) to minimize the code footprint for calling
     the reporting slow path. As a result, function size of callers is
     reduced by avoiding saving registers before calling the rarely
     called reporting slow path.

     Note that all TUs in lib/Makefile already disable function tracing,
     including list_debug.c, and __preserve_most's implied notrace has
     no effect in this case.

  3. Because the inline checks are a subset of the full set of checks in
     __list_*_valid_or_report(), always return false if the inline
     checks failed.  This avoids redundant compare and conditional
     branch right after return from the slow path.

As a side-effect of the checks being inline, if the compiler can prove
some condition to always be true, it can completely elide some checks.

Since DEBUG_LIST is functionally a superset of LIST_HARDENED, the
Kconfig variables are changed to reflect that: DEBUG_LIST selects
LIST_HARDENED, whereas LIST_HARDENED itself has no dependency on
DEBUG_LIST.

Running netperf with CONFIG_LIST_HARDENED (using a Clang compiler with
"preserve_most") shows throughput improvements, in my case of ~7% on
average (up to 20-30% on some test cases).

Link: https://r.android.com/1266735 [1]
Link: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/blob/main/config [2]
Link: https://kernsec.org/wiki/index.php/Kernel_Self_Protection_Project/Recommended_Settings [3]
Link: https://googleprojectzero.blogspot.com/2019/11/bad-binder-android-in-wild-exploit.html

 [4]
Signed-off-by: default avatarMarco Elver <elver@google.com>
Link: https://lore.kernel.org/r/20230811151847.1594958-3-elver@google.com


Signed-off-by: default avatarKees Cook <keescook@chromium.org>
parent b16c42c8
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -25,7 +25,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
	 cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
	 ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o
hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
hyp-obj-y += $(lib-objs)

##
+2 −0
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@ static inline __must_check bool nvhe_check_data_corruption(bool v)

/* The predicates checked here are taken from lib/list_debug.c. */

__list_valid_slowpath
bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev,
				struct list_head *next)
{
@@ -37,6 +38,7 @@ bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev,
	return true;
}

__list_valid_slowpath
bool __list_del_entry_valid_or_report(struct list_head *entry)
{
	struct list_head *prev, *next;
+2 −2
Original line number Diff line number Diff line
@@ -393,7 +393,7 @@ static void lkdtm_CORRUPT_LIST_ADD(void)
		pr_err("Overwrite did not happen, but no BUG?!\n");
	else {
		pr_err("list_add() corruption not detected!\n");
		pr_expected_config(CONFIG_DEBUG_LIST);
		pr_expected_config(CONFIG_LIST_HARDENED);
	}
}

@@ -420,7 +420,7 @@ static void lkdtm_CORRUPT_LIST_DEL(void)
		pr_err("Overwrite did not happen, but no BUG?!\n");
	else {
		pr_err("list_del() corruption not detected!\n");
		pr_expected_config(CONFIG_DEBUG_LIST);
		pr_expected_config(CONFIG_LIST_HARDENED);
	}
}

+58 −6
Original line number Diff line number Diff line
@@ -38,39 +38,91 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
	WRITE_ONCE(list->prev, list);
}

#ifdef CONFIG_LIST_HARDENED

#ifdef CONFIG_DEBUG_LIST
# define __list_valid_slowpath
#else
# define __list_valid_slowpath __cold __preserve_most
#endif

/*
 * Performs the full set of list corruption checks before __list_add().
 * On list corruption reports a warning, and returns false.
 */
extern bool __list_add_valid_or_report(struct list_head *new,
extern bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new,
							     struct list_head *prev,
							     struct list_head *next);

/*
 * Performs list corruption checks before __list_add(). Returns false if a
 * corruption is detected, true otherwise.
 *
 * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking
 * inline to catch non-faulting corruptions, and only if a corruption is
 * detected calls the reporting function __list_add_valid_or_report().
 */
static __always_inline bool __list_add_valid(struct list_head *new,
					     struct list_head *prev,
					     struct list_head *next)
{
	return __list_add_valid_or_report(new, prev, next);
	bool ret = true;

	if (!IS_ENABLED(CONFIG_DEBUG_LIST)) {
		/*
		 * With the hardening version, elide checking if next and prev
		 * are NULL, since the immediate dereference of them below would
		 * result in a fault if NULL.
		 *
		 * With the reduced set of checks, we can afford to inline the
		 * checks, which also gives the compiler a chance to elide some
		 * of them completely if they can be proven at compile-time. If
		 * one of the pre-conditions does not hold, the slow-path will
		 * show a report which pre-condition failed.
		 */
		if (likely(next->prev == prev && prev->next == next && new != prev && new != next))
			return true;
		ret = false;
	}

	ret &= __list_add_valid_or_report(new, prev, next);
	return ret;
}

/*
 * Performs the full set of list corruption checks before __list_del_entry().
 * On list corruption reports a warning, and returns false.
 */
extern bool __list_del_entry_valid_or_report(struct list_head *entry);
extern bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry);

/*
 * Performs list corruption checks before __list_del_entry(). Returns false if a
 * corruption is detected, true otherwise.
 *
 * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking
 * inline to catch non-faulting corruptions, and only if a corruption is
 * detected calls the reporting function __list_del_entry_valid_or_report().
 */
static __always_inline bool __list_del_entry_valid(struct list_head *entry)
{
	return __list_del_entry_valid_or_report(entry);
	bool ret = true;

	if (!IS_ENABLED(CONFIG_DEBUG_LIST)) {
		struct list_head *prev = entry->prev;
		struct list_head *next = entry->next;

		/*
		 * With the hardening version, elide checking if next and prev
		 * are NULL, LIST_POISON1 or LIST_POISON2, since the immediate
		 * dereference of them below would result in a fault.
		 */
		if (likely(prev->next == entry && next->prev == entry))
			return true;
		ret = false;
	}

	ret &= __list_del_entry_valid_or_report(entry);
	return ret;
}
#else
static inline bool __list_add_valid(struct list_head *new,
+7 −2
Original line number Diff line number Diff line
@@ -1674,9 +1674,14 @@ menu "Debug kernel data structures"
config DEBUG_LIST
	bool "Debug linked list manipulation"
	depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION
	select LIST_HARDENED
	help
	  Enable this to turn on extended checks in the linked-list
	  walking routines.
	  Enable this to turn on extended checks in the linked-list walking
	  routines.

	  This option trades better quality error reports for performance, and
	  is more suitable for kernel debugging. If you care about performance,
	  you should only enable CONFIG_LIST_HARDENED instead.

	  If unsure, say N.

Loading