Commit 7f6dcffb authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'sched-rt-2022-10-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull preempt RT updates from Thomas Gleixner:
 "Introduce preempt_[dis|enable_nested() and use it to clean up various
  places which have open coded PREEMPT_RT conditionals.

  On PREEMPT_RT enabled kernels, spinlocks and rwlocks are neither
  disabling preemption nor interrupts. Though there are a few places
  which depend on the implicit preemption/interrupt disable of those
  locks, e.g. seqcount write sections, per CPU statistics updates etc.

  PREEMPT_RT added open coded CONFIG_PREEMPT_RT conditionals to
  disable/enable preemption in the related code parts all over the
  place. That's hard to read and does not really explain why this is
  necessary.

  Linus suggested to use helper functions (preempt_disable_nested() and
  preempt_enable_nested()) and use those in the affected places. On !RT
  enabled kernels these functions are NOPs, but contain a lockdep assert
  to validate that preemption is actually disabled to catch call sites
  which do not have preemption disabled.

  Clean up the affected code paths in mm, dentry and lib"

* tag 'sched-rt-2022-10-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  u64_stats: Streamline the implementation
  flex_proportions: Disable preemption entering the write section.
  mm/compaction: Get rid of RT ifdeffery
  mm/memcontrol: Replace the PREEMPT_RT conditionals
  mm/debug: Provide VM_WARN_ON_IRQS_ENABLED()
  mm/vmstat: Use preempt_[dis|en]able_nested()
  dentry: Use preempt_[dis|en]able_nested()
  preempt: Provide preempt_[dis|en]able_nested()
parents 65f109e1 44b0c295
Loading
Loading
Loading
Loading
+2 −11
Original line number Diff line number Diff line
@@ -2597,15 +2597,7 @@ EXPORT_SYMBOL(d_rehash);

static inline unsigned start_dir_add(struct inode *dir)
{
	/*
	 * The caller holds a spinlock (dentry::d_lock). On !PREEMPT_RT
	 * kernels spin_lock() implicitly disables preemption, but not on
	 * PREEMPT_RT.  So for RT it has to be done explicitly to protect
	 * the sequence count write side critical section against a reader
	 * or another writer preempting, which would result in a live lock.
	 */
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
		preempt_disable();
	preempt_disable_nested();
	for (;;) {
		unsigned n = dir->i_dir_seq;
		if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
@@ -2618,8 +2610,7 @@ static inline void end_dir_add(struct inode *dir, unsigned int n,
			       wait_queue_head_t *d_wait)
{
	smp_store_release(&dir->i_dir_seq, n + 2);
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
		preempt_enable();
	preempt_enable_nested();
	wake_up_all(d_wait);
}

+6 −0
Original line number Diff line number Diff line
@@ -94,6 +94,12 @@ void dump_mm(const struct mm_struct *mm);
#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
#endif

#ifdef CONFIG_DEBUG_VM_IRQSOFF
#define VM_WARN_ON_IRQS_ENABLED() WARN_ON_ONCE(!irqs_disabled())
#else
#define VM_WARN_ON_IRQS_ENABLED() do { } while (0)
#endif

#ifdef CONFIG_DEBUG_VIRTUAL
#define VIRTUAL_BUG_ON(cond) BUG_ON(cond)
#else
+42 −0
Original line number Diff line number Diff line
@@ -421,4 +421,46 @@ static inline void migrate_enable(void) { }

#endif /* CONFIG_SMP */

/**
 * preempt_disable_nested - Disable preemption inside a normally preempt disabled section
 *
 * Use for code which requires preemption protection inside a critical
 * section which has preemption disabled implicitly on non-PREEMPT_RT
 * enabled kernels, by e.g.:
 *  - holding a spinlock/rwlock
 *  - soft interrupt context
 *  - regular interrupt handlers
 *
 * On PREEMPT_RT enabled kernels spinlock/rwlock held sections, soft
 * interrupt context and regular interrupt handlers are preemptible and
 * only prevent migration. preempt_disable_nested() ensures that preemption
 * is disabled for cases which require CPU local serialization even on
 * PREEMPT_RT. For non-PREEMPT_RT kernels this is a NOP.
 *
 * The use cases are code sequences which are not serialized by a
 * particular lock instance, e.g.:
 *  - seqcount write side critical sections where the seqcount is not
 *    associated to a particular lock and therefore the automatic
 *    protection mechanism does not work. This prevents a live lock
 *    against a preempting high priority reader.
 *  - RMW per CPU variable updates like vmstat.
 */
/* Macro to avoid header recursion hell vs. lockdep */
#define preempt_disable_nested()				\
do {								\
	if (IS_ENABLED(CONFIG_PREEMPT_RT))			\
		preempt_disable();				\
	else							\
		lockdep_assert_preemption_disabled();		\
} while (0)

/**
 * preempt_enable_nested - Undo the effect of preempt_disable_nested()
 */
static __always_inline void preempt_enable_nested(void)
{
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
		preempt_enable();
}

#endif /* __LINUX_PREEMPT_H */
+64 −81
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@
 *
 * Key points :
 *
 * -  Use a seqcount on 32-bit SMP, only disable preemption for 32-bit UP.
 * -  Use a seqcount on 32-bit
 * -  The whole thing is a no-op on 64-bit architectures.
 *
 * Usage constraints:
@@ -20,7 +20,8 @@
 *    writer and also spin forever.
 *
 * 3) Write side must use the _irqsave() variant if other writers, or a reader,
 *    can be invoked from an IRQ context.
 *    can be invoked from an IRQ context. On 64bit systems this variant does not
 *    disable interrupts.
 *
 * 4) If reader fetches several counters, there is no guarantee the whole values
 *    are consistent w.r.t. each other (remember point #2: seqcounts are not
@@ -29,11 +30,6 @@
 * 5) Readers are allowed to sleep or be preempted/interrupted: they perform
 *    pure reads.
 *
 * 6) Readers must use both u64_stats_fetch_{begin,retry}_irq() if the stats
 *    might be updated from a hardirq or softirq context (remember point #1:
 *    seqcounts are not used for UP kernels). 32-bit UP stat readers could read
 *    corrupted 64-bit values otherwise.
 *
 * Usage :
 *
 * Stats producer (writer) should use following template granted it already got
@@ -66,7 +62,7 @@
#include <linux/seqlock.h>

struct u64_stats_sync {
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
#if BITS_PER_LONG == 32
	seqcount_t	seq;
#endif
};
@@ -98,7 +94,22 @@ static inline void u64_stats_inc(u64_stats_t *p)
	local64_inc(&p->v);
}

#else
static inline void u64_stats_init(struct u64_stats_sync *syncp) { }
static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { }
static inline void __u64_stats_update_end(struct u64_stats_sync *syncp) { }
static inline unsigned long __u64_stats_irqsave(void) { return 0; }
static inline void __u64_stats_irqrestore(unsigned long flags) { }
static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
{
	return 0;
}
static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
					   unsigned int start)
{
	return false;
}

#else /* 64 bit */

typedef struct {
	u64		v;
@@ -123,123 +134,95 @@ static inline void u64_stats_inc(u64_stats_t *p)
{
	p->v++;
}
#endif

#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
#define u64_stats_init(syncp)	seqcount_init(&(syncp)->seq)
#else
static inline void u64_stats_init(struct u64_stats_sync *syncp)
{
	seqcount_init(&syncp->seq);
}
#endif

static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp)
{
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
		preempt_disable();
	preempt_disable_nested();
	write_seqcount_begin(&syncp->seq);
#endif
}

static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
static inline void __u64_stats_update_end(struct u64_stats_sync *syncp)
{
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
	write_seqcount_end(&syncp->seq);
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
		preempt_enable();
#endif
	preempt_enable_nested();
}

static inline unsigned long
u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
static inline unsigned long __u64_stats_irqsave(void)
{
	unsigned long flags = 0;
	unsigned long flags;

#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
		preempt_disable();
	else
	local_irq_save(flags);
	write_seqcount_begin(&syncp->seq);
#endif
	return flags;
}

static inline void
u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
				unsigned long flags)
static inline void __u64_stats_irqrestore(unsigned long flags)
{
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
	write_seqcount_end(&syncp->seq);
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
		preempt_enable();
	else
	local_irq_restore(flags);
#endif
}

static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
{
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
	return read_seqcount_begin(&syncp->seq);
#else
	return 0;
#endif
}

static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
{
#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
	preempt_disable();
#endif
	return __u64_stats_fetch_begin(syncp);
}

static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
					   unsigned int start)
{
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
	return read_seqcount_retry(&syncp->seq, start);
#else
	return false;
#endif
}
#endif /* !64 bit */

static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
{
	__u64_stats_update_begin(syncp);
}

static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
{
	__u64_stats_update_end(syncp);
}

static inline unsigned long u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
{
	unsigned long flags = __u64_stats_irqsave();

	__u64_stats_update_begin(syncp);
	return flags;
}

static inline void u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
						   unsigned long flags)
{
	__u64_stats_update_end(syncp);
	__u64_stats_irqrestore(flags);
}

static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
{
	return __u64_stats_fetch_begin(syncp);
}

static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
					 unsigned int start)
{
#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
	preempt_enable();
#endif
	return __u64_stats_fetch_retry(syncp, start);
}

/*
 * In case irq handlers can update u64 counters, readers can use following helpers
 * - SMP 32bit arches use seqcount protection, irq safe.
 * - UP 32bit must disable irqs.
 * - 64bit have no problem atomically reading u64 values, irq safe.
 */
/* Obsolete interfaces */
static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
{
#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
	preempt_disable();
#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
	local_irq_disable();
#endif
	return __u64_stats_fetch_begin(syncp);
	return u64_stats_fetch_begin(syncp);
}

static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
					     unsigned int start)
{
#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
	preempt_enable();
#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
	local_irq_enable();
#endif
	return __u64_stats_fetch_retry(syncp, start);
	return u64_stats_fetch_retry(syncp, start);
}

#endif /* _LINUX_U64_STATS_SYNC_H */
+3 −0
Original line number Diff line number Diff line
@@ -805,6 +805,9 @@ config ARCH_HAS_DEBUG_VM_PGTABLE
	  An architecture should select this when it can successfully
	  build and run DEBUG_VM_PGTABLE.

config DEBUG_VM_IRQSOFF
	def_bool DEBUG_VM && !PREEMPT_RT

config DEBUG_VM
	bool "Debug VM"
	depends on DEBUG_KERNEL
Loading