Merge tag 'rcu.2023.08.21a' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu (68cadad1) · Commits · EulixOS / Software / Kernel

Documentation/RCU/lockdep-splat.rst

+1 −1

Original line number	Diff line number	Diff line
		@@ -10,7 +10,7 @@ misuses of the RCU API, most notably using one of the rcu_dereference()
		family to access an RCU-protected pointer without the proper protection.
		When such misuse is detected, an lockdep-RCU splat is emitted.

		The usual cause of a lockdep-RCU slat is someone accessing an
		The usual cause of a lockdep-RCU splat is someone accessing an
		RCU-protected data structure without either (1) being in the right kind of
		RCU read-side critical section or (2) holding the right update-side lock.
		This problem can therefore be serious: it might result in random memory

Documentation/RCU/rculist_nulls.rst

+27 −11

Original line number	Diff line number	Diff line
		@@ -18,7 +18,16 @@ to solve following problem.

		Without 'nulls', a typical RCU linked list managing objects which are
		allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can use the following
		algorithms:
		algorithms. Following examples assume 'obj' is a pointer to such
		objects, which is having below type.

		::

		struct object {
		struct hlist_node obj_node;
		atomic_t refcnt;
		unsigned int key;
		};

		1) Lookup algorithm
		-------------------
		@@ -26,11 +35,13 @@ algorithms:
		::

		begin:
		rcu_read_lock()
		rcu_read_lock();
		obj = lockless_lookup(key);
		if (obj) {
		if (!try_get_ref(obj)) // might fail for free objects
		if (!try_get_ref(obj)) { // might fail for free objects
		rcu_read_unlock();
		goto begin;
		}
		/*
		* Because a writer could delete object, and a writer could
		* reuse these object before the RCU grace period, we
		@@ -54,7 +65,7 @@ but a version with an additional memory barrier (smp_rmb())
		struct hlist_node node, next;
		for (pos = rcu_dereference((head)->first);
		pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
		({ obj = hlist_entry(pos, typeof(*obj), obj_node); 1; });
		pos = rcu_dereference(next))
		if (obj->key == key)
		return obj;
		@@ -66,7 +77,7 @@ And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb()::
		struct hlist_node *node;
		for (pos = rcu_dereference((head)->first);
		pos && ({ prefetch(pos->next); 1; }) &&
		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
		({ obj = hlist_entry(pos, typeof(*obj), obj_node); 1; });
		pos = rcu_dereference(pos->next))
		if (obj->key == key)
		return obj;
		@@ -86,7 +97,7 @@ Quoting Corey Minyard::
		2) Insertion algorithm
		----------------------

		We need to make sure a reader cannot read the new 'obj->obj_next' value
		We need to make sure a reader cannot read the new 'obj->obj_node.next' value
		and previous value of 'obj->key'. Otherwise, an item could be deleted
		from a chain, and inserted into another chain. If new chain was empty
		before the move, 'next' pointer is NULL, and lockless reader can not
		@@ -129,8 +140,7 @@ very very fast (before the end of RCU grace period)
		Avoiding extra smp_rmb()
		========================

		With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup()
		and extra _release() in insert function.
		With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup().

		For example, if we choose to store the slot number as the 'nulls'
		end-of-list marker for each slot of the hash table, we can detect
		@@ -142,6 +152,9 @@ the beginning. If the object was moved to the same chain,
		then the reader doesn't care: It might occasionally
		scan the list again without harm.

		Note that using hlist_nulls means the type of 'obj_node' field of
		'struct object' becomes 'struct hlist_nulls_node'.


		1) lookup algorithm
		-------------------
		@@ -151,7 +164,7 @@ scan the list again without harm.
		head = &table[slot];
		begin:
		rcu_read_lock();
		hlist_nulls_for_each_entry_rcu(obj, node, head, member) {
		hlist_nulls_for_each_entry_rcu(obj, node, head, obj_node) {
		if (obj->key == key) {
		if (!try_get_ref(obj)) { // might fail for free objects
		rcu_read_unlock();
		@@ -182,6 +195,9 @@ scan the list again without harm.
		2) Insert algorithm
		-------------------

		Same to the above one, but uses hlist_nulls_add_head_rcu() instead of
		hlist_add_head_rcu().

		::

		/*

Documentation/admin-guide/kernel-parameters.txt

+55 −1

Original line number	Diff line number	Diff line
		@@ -2938,6 +2938,10 @@
		locktorture.torture_type= [KNL]
		Specify the locking implementation to test.

		locktorture.writer_fifo= [KNL]
		Run the write-side locktorture kthreads at
		sched_set_fifo() real-time priority.

		locktorture.verbose= [KNL]
		Enable additional printk() statements.

		@@ -4949,6 +4953,15 @@
		test until boot completes in order to avoid
		interference.

		rcuscale.kfree_by_call_rcu= [KNL]
		In kernels built with CONFIG_RCU_LAZY=y, test
		call_rcu() instead of kfree_rcu().

		rcuscale.kfree_mult= [KNL]
		Instead of allocating an object of size kfree_obj,
		allocate one of kfree_mult * sizeof(kfree_obj).
		Defaults to 1.

		rcuscale.kfree_rcu_test= [KNL]
		Set to measure performance of kfree_rcu() flooding.

		@@ -4974,6 +4987,12 @@
		Number of loops doing rcuscale.kfree_alloc_num number
		of allocations and frees.

		rcuscale.minruntime= [KNL]
		Set the minimum test run time in seconds. This
		does not affect the data-collection interval,
		but instead allows better measurement of things
		like CPU consumption.

		rcuscale.nreaders= [KNL]
		Set number of RCU readers. The value -1 selects
		N, where N is the number of CPUs. A value
		@@ -4988,7 +5007,7 @@
		the same as for rcuscale.nreaders.
		N, where N is the number of CPUs

		rcuscale.perf_type= [KNL]
		rcuscale.scale_type= [KNL]
		Specify the RCU implementation to test.

		rcuscale.shutdown= [KNL]
		@@ -5004,6 +5023,11 @@
		in microseconds. The default of zero says
		no holdoff.

		rcuscale.writer_holdoff_jiffies= [KNL]
		Additional write-side holdoff between grace
		periods, but in jiffies. The default of zero
		says no holdoff.

		rcutorture.fqs_duration= [KNL]
		Set duration of force_quiescent_state bursts
		in microseconds.
		@@ -5285,6 +5309,13 @@
		number avoids disturbing real-time workloads,
		but lengthens grace periods.

		rcupdate.rcu_task_lazy_lim= [KNL]
		Number of callbacks on a given CPU that will
		cancel laziness on that CPU. Use -1 to disable
		cancellation of laziness, but be advised that
		doing so increases the danger of OOM due to
		callback flooding.

		rcupdate.rcu_task_stall_info= [KNL]
		Set initial timeout in jiffies for RCU task stall
		informational messages, which give some indication
		@@ -5314,6 +5345,29 @@
		A change in value does not take effect until
		the beginning of the next grace period.

		rcupdate.rcu_tasks_lazy_ms= [KNL]
		Set timeout in milliseconds RCU Tasks asynchronous
		callback batching for call_rcu_tasks().
		A negative value will take the default. A value
		of zero will disable batching. Batching is
		always disabled for synchronize_rcu_tasks().

		rcupdate.rcu_tasks_rude_lazy_ms= [KNL]
		Set timeout in milliseconds RCU Tasks
		Rude asynchronous callback batching for
		call_rcu_tasks_rude(). A negative value
		will take the default. A value of zero will
		disable batching. Batching is always disabled
		for synchronize_rcu_tasks_rude().

		rcupdate.rcu_tasks_trace_lazy_ms= [KNL]
		Set timeout in milliseconds RCU Tasks
		Trace asynchronous callback batching for
		call_rcu_tasks_trace(). A negative value
		will take the default. A value of zero will
		disable batching. Batching is always disabled
		for synchronize_rcu_tasks_trace().

		rcupdate.rcu_self_test= [KNL]
		Run the RCU early boot self tests

include/linux/notifier.h

+0 −11

Original line number	Diff line number	Diff line
		@@ -73,9 +73,7 @@ struct raw_notifier_head {

		struct srcu_notifier_head {
		struct mutex mutex;
		#ifdef CONFIG_TREE_SRCU
		struct srcu_usage srcuu;
		#endif
		struct srcu_struct srcu;
		struct notifier_block __rcu *head;
		};
		@@ -106,7 +104,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
		#define RAW_NOTIFIER_INIT(name) { \
		.head = NULL }

		#ifdef CONFIG_TREE_SRCU
		#define SRCU_NOTIFIER_INIT(name, pcpu) \
		{ \
		.mutex = __MUTEX_INITIALIZER(name.mutex), \
		@@ -114,14 +111,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
		.srcuu = __SRCU_USAGE_INIT(name.srcuu), \
		.srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
		}
		#else
		#define SRCU_NOTIFIER_INIT(name, pcpu) \
		{ \
		.mutex = __MUTEX_INITIALIZER(name.mutex), \
		.head = NULL, \
		.srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
		}
		#endif

		#define ATOMIC_NOTIFIER_HEAD(name) \
		struct atomic_notifier_head name = \

include/linux/rculist_nulls.h

+2 −2

Original line number	Diff line number	Diff line
		@@ -101,7 +101,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
		{
		struct hlist_nulls_node *first = h->first;

		n->next = first;
		WRITE_ONCE(n->next, first);
		WRITE_ONCE(n->pprev, &h->first);
		rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
		if (!is_a_nulls(first))
		@@ -137,7 +137,7 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
		last = i;

		if (last) {
		n->next = last->next;
		WRITE_ONCE(n->next, last->next);
		n->pprev = &last->next;
		rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
		} else {