Commit 68cadad1 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull RCU updates from Paul McKenney:

 - Documentation updates

 - Miscellaneous fixes, perhaps most notably simplifying
   SRCU_NOTIFIER_INIT() as suggested

 - RCU Tasks updates, most notably treating Tasks RCU callbacks as lazy
   while still treating synchronous grace periods as urgent. Also fixes
   one bug that restores the ability to apply debug-objects to RCU Tasks
   and another that fixes a race condition that could result in
   false-positive failures of the boot-time self-test code

 - RCU-scalability performance-test updates, most notably adding the
   ability to measure the RCU-Tasks's grace-period kthread's CPU
   consumption. This proved quite useful for the RCU Tasks work

 - Reference-acquisition/release performance-test updates, including a
   fix for an uninitialized wait_queue_head_t

 - Miscellaneous torture-test updates

 - Torture-test scripting updates, including removal of the
   non-longer-functional formal-verification scripts, test builds of
   individual RCU Tasks flavors, better diagnostics for loss of
   connectivity for distributed rcutorture tests, disabling of reboot
   loops in qemu/KVM-based rcutorture testing, and passing of init
   parameters to rcutorture's init program

* tag 'rcu.2023.08.21a' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu: (64 commits)
  rcu: Use WRITE_ONCE() for assignments to ->next for rculist_nulls
  rcu: Make the rcu_nocb_poll boot parameter usable via boot config
  rcu: Mark __rcu_irq_enter_check_tick() ->rcu_urgent_qs load
  srcu,notifier: Remove #ifdefs in favor of SRCU Tiny srcu_usage
  rcutorture: Stop right-shifting torture_random() return values
  torture: Stop right-shifting torture_random() return values
  torture: Move stutter_wait() timeouts to hrtimers
  torture: Move torture_shuffle() timeouts to hrtimers
  torture: Move torture_onoff() timeouts to hrtimers
  torture: Make torture_hrtimeout_*() use TASK_IDLE
  torture: Add lock_torture writer_fifo module parameter
  torture: Add a kthread-creation callback to _torture_create_kthread()
  rcu-tasks: Fix boot-time RCU tasks debug-only deadlock
  rcu-tasks: Permit use of debug-objects with RCU Tasks flavors
  checkpatch: Complain about unexpected uses of RCU Tasks Trace
  torture: Cause mkinitrd.sh to indicate failure on compile errors
  torture: Make init program dump command-line arguments
  torture: Switch qemu from -nographic to -display none
  torture: Add init-program support for loongarch
  torture: Avoid torture-test reboot loops
  ...
parents 727dbda1 fe24a0b6
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -10,7 +10,7 @@ misuses of the RCU API, most notably using one of the rcu_dereference()
family to access an RCU-protected pointer without the proper protection.
When such misuse is detected, an lockdep-RCU splat is emitted.

The usual cause of a lockdep-RCU slat is someone accessing an
The usual cause of a lockdep-RCU splat is someone accessing an
RCU-protected data structure without either (1) being in the right kind of
RCU read-side critical section or (2) holding the right update-side lock.
This problem can therefore be serious: it might result in random memory
+27 −11
Original line number Diff line number Diff line
@@ -18,7 +18,16 @@ to solve following problem.

Without 'nulls', a typical RCU linked list managing objects which are
allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can use the following
algorithms:
algorithms.  Following examples assume 'obj' is a pointer to such
objects, which is having below type.

::

  struct object {
    struct hlist_node obj_node;
    atomic_t refcnt;
    unsigned int key;
  };

1) Lookup algorithm
-------------------
@@ -26,11 +35,13 @@ algorithms:
::

  begin:
  rcu_read_lock()
  rcu_read_lock();
  obj = lockless_lookup(key);
  if (obj) {
    if (!try_get_ref(obj)) // might fail for free objects
    if (!try_get_ref(obj)) { // might fail for free objects
      rcu_read_unlock();
      goto begin;
    }
    /*
    * Because a writer could delete object, and a writer could
    * reuse these object before the RCU grace period, we
@@ -54,7 +65,7 @@ but a version with an additional memory barrier (smp_rmb())
    struct hlist_node *node, *next;
    for (pos = rcu_dereference((head)->first);
         pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
         ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
         ({ obj = hlist_entry(pos, typeof(*obj), obj_node); 1; });
         pos = rcu_dereference(next))
      if (obj->key == key)
        return obj;
@@ -66,7 +77,7 @@ And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb()::
  struct hlist_node *node;
  for (pos = rcu_dereference((head)->first);
       pos && ({ prefetch(pos->next); 1; }) &&
       ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
       ({ obj = hlist_entry(pos, typeof(*obj), obj_node); 1; });
       pos = rcu_dereference(pos->next))
    if (obj->key == key)
      return obj;
@@ -86,7 +97,7 @@ Quoting Corey Minyard::
2) Insertion algorithm
----------------------

We need to make sure a reader cannot read the new 'obj->obj_next' value
We need to make sure a reader cannot read the new 'obj->obj_node.next' value
and previous value of 'obj->key'. Otherwise, an item could be deleted
from a chain, and inserted into another chain. If new chain was empty
before the move, 'next' pointer is NULL, and lockless reader can not
@@ -129,8 +140,7 @@ very very fast (before the end of RCU grace period)
Avoiding extra smp_rmb()
========================

With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup()
and extra _release() in insert function.
With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup().

For example, if we choose to store the slot number as the 'nulls'
end-of-list marker for each slot of the hash table, we can detect
@@ -142,6 +152,9 @@ the beginning. If the object was moved to the same chain,
then the reader doesn't care: It might occasionally
scan the list again without harm.

Note that using hlist_nulls means the type of 'obj_node' field of
'struct object' becomes 'struct hlist_nulls_node'.


1) lookup algorithm
-------------------
@@ -151,7 +164,7 @@ scan the list again without harm.
  head = &table[slot];
  begin:
  rcu_read_lock();
  hlist_nulls_for_each_entry_rcu(obj, node, head, member) {
  hlist_nulls_for_each_entry_rcu(obj, node, head, obj_node) {
    if (obj->key == key) {
      if (!try_get_ref(obj)) { // might fail for free objects
	rcu_read_unlock();
@@ -182,6 +195,9 @@ scan the list again without harm.
2) Insert algorithm
-------------------

Same to the above one, but uses hlist_nulls_add_head_rcu() instead of
hlist_add_head_rcu().

::

  /*
+55 −1
Original line number Diff line number Diff line
@@ -2938,6 +2938,10 @@
	locktorture.torture_type= [KNL]
			Specify the locking implementation to test.

	locktorture.writer_fifo= [KNL]
			Run the write-side locktorture kthreads at
			sched_set_fifo() real-time priority.

	locktorture.verbose= [KNL]
			Enable additional printk() statements.

@@ -4949,6 +4953,15 @@
			test until boot completes in order to avoid
			interference.

	rcuscale.kfree_by_call_rcu= [KNL]
			In kernels built with CONFIG_RCU_LAZY=y, test
			call_rcu() instead of kfree_rcu().

	rcuscale.kfree_mult= [KNL]
			Instead of allocating an object of size kfree_obj,
			allocate one of kfree_mult * sizeof(kfree_obj).
			Defaults to 1.

	rcuscale.kfree_rcu_test= [KNL]
			Set to measure performance of kfree_rcu() flooding.

@@ -4974,6 +4987,12 @@
			Number of loops doing rcuscale.kfree_alloc_num number
			of allocations and frees.

	rcuscale.minruntime= [KNL]
			Set the minimum test run time in seconds.  This
			does not affect the data-collection interval,
			but instead allows better measurement of things
			like CPU consumption.

	rcuscale.nreaders= [KNL]
			Set number of RCU readers.  The value -1 selects
			N, where N is the number of CPUs.  A value
@@ -4988,7 +5007,7 @@
			the same as for rcuscale.nreaders.
			N, where N is the number of CPUs

	rcuscale.perf_type= [KNL]
	rcuscale.scale_type= [KNL]
			Specify the RCU implementation to test.

	rcuscale.shutdown= [KNL]
@@ -5004,6 +5023,11 @@
			in microseconds.  The default of zero says
			no holdoff.

	rcuscale.writer_holdoff_jiffies= [KNL]
			Additional write-side holdoff between grace
			periods, but in jiffies.  The default of zero
			says no holdoff.

	rcutorture.fqs_duration= [KNL]
			Set duration of force_quiescent_state bursts
			in microseconds.
@@ -5285,6 +5309,13 @@
			number avoids disturbing real-time workloads,
			but lengthens grace periods.

	rcupdate.rcu_task_lazy_lim= [KNL]
			Number of callbacks on a given CPU that will
			cancel laziness on that CPU.  Use -1 to disable
			cancellation of laziness, but be advised that
			doing so increases the danger of OOM due to
			callback flooding.

	rcupdate.rcu_task_stall_info= [KNL]
			Set initial timeout in jiffies for RCU task stall
			informational messages, which give some indication
@@ -5314,6 +5345,29 @@
			A change in value does not take effect until
			the beginning of the next grace period.

	rcupdate.rcu_tasks_lazy_ms= [KNL]
			Set timeout in milliseconds RCU Tasks asynchronous
			callback batching for call_rcu_tasks().
			A negative value will take the default.  A value
			of zero will disable batching.	Batching is
			always disabled for synchronize_rcu_tasks().

	rcupdate.rcu_tasks_rude_lazy_ms= [KNL]
			Set timeout in milliseconds RCU Tasks
			Rude asynchronous callback batching for
			call_rcu_tasks_rude().	A negative value
			will take the default.	A value of zero will
			disable batching.  Batching is always disabled
			for synchronize_rcu_tasks_rude().

	rcupdate.rcu_tasks_trace_lazy_ms= [KNL]
			Set timeout in milliseconds RCU Tasks
			Trace asynchronous callback batching for
			call_rcu_tasks_trace().  A negative value
			will take the default.	A value of zero will
			disable batching.  Batching is always disabled
			for synchronize_rcu_tasks_trace().

	rcupdate.rcu_self_test= [KNL]
			Run the RCU early boot self tests

+0 −11
Original line number Diff line number Diff line
@@ -73,9 +73,7 @@ struct raw_notifier_head {

struct srcu_notifier_head {
	struct mutex mutex;
#ifdef CONFIG_TREE_SRCU
	struct srcu_usage srcuu;
#endif
	struct srcu_struct srcu;
	struct notifier_block __rcu *head;
};
@@ -106,7 +104,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
#define RAW_NOTIFIER_INIT(name)	{				\
		.head = NULL }

#ifdef CONFIG_TREE_SRCU
#define SRCU_NOTIFIER_INIT(name, pcpu)				\
	{							\
		.mutex = __MUTEX_INITIALIZER(name.mutex),	\
@@ -114,14 +111,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
		.srcuu = __SRCU_USAGE_INIT(name.srcuu),		\
		.srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
	}
#else
#define SRCU_NOTIFIER_INIT(name, pcpu)				\
	{							\
		.mutex = __MUTEX_INITIALIZER(name.mutex),	\
		.head = NULL,					\
		.srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
	}
#endif

#define ATOMIC_NOTIFIER_HEAD(name)				\
	struct atomic_notifier_head name =			\
+2 −2
Original line number Diff line number Diff line
@@ -101,7 +101,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
{
	struct hlist_nulls_node *first = h->first;

	n->next = first;
	WRITE_ONCE(n->next, first);
	WRITE_ONCE(n->pprev, &h->first);
	rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
	if (!is_a_nulls(first))
@@ -137,7 +137,7 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
		last = i;

	if (last) {
		n->next = last->next;
		WRITE_ONCE(n->next, last->next);
		n->pprev = &last->next;
		rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
	} else {
Loading