Commit 29cbcd85 authored by Ahmed S. Darwish's avatar Ahmed S. Darwish Committed by David S. Miller
Browse files

net: sched: Remove Qdisc::running sequence counter



The Qdisc::running sequence counter has two uses:

  1. Reliably reading qdisc's tc statistics while the qdisc is running
     (a seqcount read/retry loop at gnet_stats_add_basic()).

  2. As a flag, indicating whether the qdisc in question is running
     (without any retry loops).

For the first usage, the Qdisc::running sequence counter write section,
qdisc_run_begin() => qdisc_run_end(), covers a much wider area than what
is actually needed: the raw qdisc's bstats update. A u64_stats sync
point was thus introduced (in previous commits) inside the bstats
structure itself. A local u64_stats write section is then started and
stopped for the bstats updates.

Use that u64_stats sync point mechanism for the bstats read/retry loop
at gnet_stats_add_basic().

For the second qdisc->running usage, a __QDISC_STATE_RUNNING bit flag,
accessed with atomic bitops, is sufficient. Using a bit flag instead of
a sequence counter at qdisc_run_begin/end() and qdisc_is_running() leads
to the SMP barriers implicitly added through raw_read_seqcount() and
write_seqcount_begin/end() getting removed. All call sites have been
surveyed though, and no required ordering was identified.

Now that the qdisc->running sequence counter is no longer used, remove
it.

Note, using u64_stats implies no sequence counter protection for 64-bit
architectures. This can lead to the qdisc tc statistics "packets" vs.
"bytes" values getting out of sync on rare occasions. The individual
values will still be valid.

Signed-off-by: default avatarAhmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: default avatarSebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 50dc9a85
Loading
Loading
Loading
Loading
+0 −4
Original line number Diff line number Diff line
@@ -1916,7 +1916,6 @@ enum netdev_ml_priv_type {
 *	@sfp_bus:	attached &struct sfp_bus structure.
 *
 *	@qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
 *	@qdisc_running_key: lockdep class annotating Qdisc->running seqcount
 *
 *	@proto_down:	protocol port state information can be sent to the
 *			switch driver and used to set the phys state of the
@@ -2250,7 +2249,6 @@ struct net_device {
	struct phy_device	*phydev;
	struct sfp_bus		*sfp_bus;
	struct lock_class_key	*qdisc_tx_busylock;
	struct lock_class_key	*qdisc_running_key;
	bool			proto_down;
	unsigned		wol_enabled:1;
	unsigned		threaded:1;
@@ -2360,13 +2358,11 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
#define netdev_lockdep_set_classes(dev)				\
{								\
	static struct lock_class_key qdisc_tx_busylock_key;	\
	static struct lock_class_key qdisc_running_key;		\
	static struct lock_class_key qdisc_xmit_lock_key;	\
	static struct lock_class_key dev_addr_list_lock_key;	\
	unsigned int i;						\
								\
	(dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key;	\
	(dev)->qdisc_running_key = &qdisc_running_key;		\
	lockdep_set_class(&(dev)->addr_list_lock,		\
			  &dev_addr_list_lock_key);		\
	for (i = 0; i < (dev)->num_tx_queues; i++)		\
+8 −11
Original line number Diff line number Diff line
@@ -46,18 +46,15 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
				 spinlock_t *lock, struct gnet_dump *d,
				 int padattr);

int gnet_stats_copy_basic(const seqcount_t *running,
			  struct gnet_dump *d,
int gnet_stats_copy_basic(struct gnet_dump *d,
			  struct gnet_stats_basic_sync __percpu *cpu,
			  struct gnet_stats_basic_sync *b);
void gnet_stats_add_basic(const seqcount_t *running,
			  struct gnet_stats_basic_sync *bstats,
			  struct gnet_stats_basic_sync *b, bool running);
void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
			  struct gnet_stats_basic_sync __percpu *cpu,
			  struct gnet_stats_basic_sync *b);
int gnet_stats_copy_basic_hw(const seqcount_t *running,
			     struct gnet_dump *d,
			  struct gnet_stats_basic_sync *b, bool running);
int gnet_stats_copy_basic_hw(struct gnet_dump *d,
			     struct gnet_stats_basic_sync __percpu *cpu,
			     struct gnet_stats_basic_sync *b);
			     struct gnet_stats_basic_sync *b, bool running);
int gnet_stats_copy_rate_est(struct gnet_dump *d,
			     struct net_rate_estimator __rcu **ptr);
int gnet_stats_copy_queue(struct gnet_dump *d,
@@ -74,13 +71,13 @@ int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
		      struct gnet_stats_basic_sync __percpu *cpu_bstats,
		      struct net_rate_estimator __rcu **rate_est,
		      spinlock_t *lock,
		      seqcount_t *running, struct nlattr *opt);
		      bool running, struct nlattr *opt);
void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
			  struct gnet_stats_basic_sync __percpu *cpu_bstats,
			  struct net_rate_estimator __rcu **ptr,
			  spinlock_t *lock,
			  seqcount_t *running, struct nlattr *opt);
			  bool running, struct nlattr *opt);
bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
			struct gnet_stats_rate_est64 *sample);
+14 −19
Original line number Diff line number Diff line
@@ -38,6 +38,10 @@ enum qdisc_state_t {
	__QDISC_STATE_DEACTIVATED,
	__QDISC_STATE_MISSED,
	__QDISC_STATE_DRAINING,
	/* Only for !TCQ_F_NOLOCK qdisc. Never access it directly.
	 * Use qdisc_run_begin/end() or qdisc_is_running() instead.
	 */
	__QDISC_STATE_RUNNING,
};

#define QDISC_STATE_MISSED	BIT(__QDISC_STATE_MISSED)
@@ -108,7 +112,6 @@ struct Qdisc {
	struct sk_buff_head	gso_skb ____cacheline_aligned_in_smp;
	struct qdisc_skb_head	q;
	struct gnet_stats_basic_sync bstats;
	seqcount_t		running;
	struct gnet_stats_queue	qstats;
	unsigned long		state;
	struct Qdisc            *next_sched;
@@ -143,11 +146,15 @@ static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc)
	return NULL;
}

/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc
 * root_lock section, or provide their own memory barriers -- ordering
 * against qdisc_run_begin/end() atomic bit operations.
 */
static inline bool qdisc_is_running(struct Qdisc *qdisc)
{
	if (qdisc->flags & TCQ_F_NOLOCK)
		return spin_is_locked(&qdisc->seqlock);
	return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
	return test_bit(__QDISC_STATE_RUNNING, &qdisc->state);
}

static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
@@ -167,6 +174,9 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
	return !READ_ONCE(qdisc->q.qlen);
}

/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with
 * the qdisc root lock acquired.
 */
static inline bool qdisc_run_begin(struct Qdisc *qdisc)
{
	if (qdisc->flags & TCQ_F_NOLOCK) {
@@ -206,15 +216,8 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
		 * after it releases the lock at the end of qdisc_run_end().
		 */
		return spin_trylock(&qdisc->seqlock);
	} else if (qdisc_is_running(qdisc)) {
		return false;
	}
	/* Variant of write_seqcount_begin() telling lockdep a trylock
	 * was attempted.
	 */
	raw_write_seqcount_begin(&qdisc->running);
	seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
	return true;
	return test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state);
}

static inline void qdisc_run_end(struct Qdisc *qdisc)
@@ -226,7 +229,7 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
				      &qdisc->state)))
			__netif_schedule(qdisc);
	} else {
		write_seqcount_end(&qdisc->running);
		clear_bit(__QDISC_STATE_RUNNING, &qdisc->state);
	}
}

@@ -592,14 +595,6 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
	return qdisc_lock(root);
}

static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
{
	struct Qdisc *root = qdisc_root_sleeping(qdisc);

	ASSERT_RTNL();
	return &root->running;
}

static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
{
	return qdisc->dev_queue->dev;
+10 −6
Original line number Diff line number Diff line
@@ -42,7 +42,7 @@
struct net_rate_estimator {
	struct gnet_stats_basic_sync	*bstats;
	spinlock_t		*stats_lock;
	seqcount_t		*running;
	bool			running;
	struct gnet_stats_basic_sync __percpu *cpu_bstats;
	u8			ewma_log;
	u8			intvl_log; /* period : (250ms << intvl_log) */
@@ -66,7 +66,7 @@ static void est_fetch_counters(struct net_rate_estimator *e,
	if (e->stats_lock)
		spin_lock(e->stats_lock);

	gnet_stats_add_basic(e->running, b, e->cpu_bstats, e->bstats);
	gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running);

	if (e->stats_lock)
		spin_unlock(e->stats_lock);
@@ -113,7 +113,9 @@ static void est_timer(struct timer_list *t)
 * @cpu_bstats: bstats per cpu
 * @rate_est: rate estimator statistics
 * @lock: lock for statistics and control path
 * @running: qdisc running seqcount
 * @running: true if @bstats represents a running qdisc, thus @bstats'
 *           internal values might change during basic reads. Only used
 *           if @bstats_cpu is NULL
 * @opt: rate estimator configuration TLV
 *
 * Creates a new rate estimator with &bstats as source and &rate_est
@@ -129,7 +131,7 @@ int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
		      struct gnet_stats_basic_sync __percpu *cpu_bstats,
		      struct net_rate_estimator __rcu **rate_est,
		      spinlock_t *lock,
		      seqcount_t *running,
		      bool running,
		      struct nlattr *opt)
{
	struct gnet_estimator *parm = nla_data(opt);
@@ -218,7 +220,9 @@ EXPORT_SYMBOL(gen_kill_estimator);
 * @cpu_bstats: bstats per cpu
 * @rate_est: rate estimator statistics
 * @lock: lock for statistics and control path
 * @running: qdisc running seqcount (might be NULL)
 * @running: true if @bstats represents a running qdisc, thus @bstats'
 *           internal values might change during basic reads. Only used
 *           if @cpu_bstats is NULL
 * @opt: rate estimator configuration TLV
 *
 * Replaces the configuration of a rate estimator by calling
@@ -230,7 +234,7 @@ int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
			  struct gnet_stats_basic_sync __percpu *cpu_bstats,
			  struct net_rate_estimator __rcu **rate_est,
			  spinlock_t *lock,
			  seqcount_t *running, struct nlattr *opt)
			  bool running, struct nlattr *opt)
{
	return gen_new_estimator(bstats, cpu_bstats, rate_est,
				 lock, running, opt);
+28 −22
Original line number Diff line number Diff line
@@ -146,42 +146,42 @@ static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats,
	_bstats_update(bstats, t_bytes, t_packets);
}

void gnet_stats_add_basic(const seqcount_t *running,
			  struct gnet_stats_basic_sync *bstats,
void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
			  struct gnet_stats_basic_sync __percpu *cpu,
			  struct gnet_stats_basic_sync *b)
			  struct gnet_stats_basic_sync *b, bool running)
{
	unsigned int seq;
	unsigned int start;
	u64 bytes = 0;
	u64 packets = 0;

	WARN_ON_ONCE((cpu || running) && !in_task());

	if (cpu) {
		gnet_stats_add_basic_cpu(bstats, cpu);
		return;
	}
	do {
		if (running)
			seq = read_seqcount_begin(running);
			start = u64_stats_fetch_begin_irq(&b->syncp);
		bytes = u64_stats_read(&b->bytes);
		packets = u64_stats_read(&b->packets);
	} while (running && read_seqcount_retry(running, seq));
	} while (running && u64_stats_fetch_retry_irq(&b->syncp, start));

	_bstats_update(bstats, bytes, packets);
}
EXPORT_SYMBOL(gnet_stats_add_basic);

static int
___gnet_stats_copy_basic(const seqcount_t *running,
			 struct gnet_dump *d,
___gnet_stats_copy_basic(struct gnet_dump *d,
			 struct gnet_stats_basic_sync __percpu *cpu,
			 struct gnet_stats_basic_sync *b,
			 int type)
			 int type, bool running)
{
	struct gnet_stats_basic_sync bstats;
	u64 bstats_bytes, bstats_packets;

	gnet_stats_basic_sync_init(&bstats);
	gnet_stats_add_basic(running, &bstats, cpu, b);
	gnet_stats_add_basic(&bstats, cpu, b, running);

	bstats_bytes = u64_stats_read(&bstats.bytes);
	bstats_packets = u64_stats_read(&bstats.packets);
@@ -210,10 +210,14 @@ ___gnet_stats_copy_basic(const seqcount_t *running,

/**
 * gnet_stats_copy_basic - copy basic statistics into statistic TLV
 * @running: seqcount_t pointer
 * @d: dumping handle
 * @cpu: copy statistic per cpu
 * @b: basic statistics
 * @running: true if @b represents a running qdisc, thus @b's
 *           internal values might change during basic reads.
 *           Only used if @cpu is NULL
 *
 * Context: task; must not be run from IRQ or BH contexts
 *
 * Appends the basic statistics to the top level TLV created by
 * gnet_stats_start_copy().
@@ -222,22 +226,25 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
 * if the room in the socket buffer was not sufficient.
 */
int
gnet_stats_copy_basic(const seqcount_t *running,
		      struct gnet_dump *d,
gnet_stats_copy_basic(struct gnet_dump *d,
		      struct gnet_stats_basic_sync __percpu *cpu,
		      struct gnet_stats_basic_sync *b)
		      struct gnet_stats_basic_sync *b,
		      bool running)
{
	return ___gnet_stats_copy_basic(running, d, cpu, b,
					TCA_STATS_BASIC);
	return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running);
}
EXPORT_SYMBOL(gnet_stats_copy_basic);

/**
 * gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV
 * @running: seqcount_t pointer
 * @d: dumping handle
 * @cpu: copy statistic per cpu
 * @b: basic statistics
 * @running: true if @b represents a running qdisc, thus @b's
 *           internal values might change during basic reads.
 *           Only used if @cpu is NULL
 *
 * Context: task; must not be run from IRQ or BH contexts
 *
 * Appends the basic statistics to the top level TLV created by
 * gnet_stats_start_copy().
@@ -246,13 +253,12 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
 * if the room in the socket buffer was not sufficient.
 */
int
gnet_stats_copy_basic_hw(const seqcount_t *running,
			 struct gnet_dump *d,
gnet_stats_copy_basic_hw(struct gnet_dump *d,
			 struct gnet_stats_basic_sync __percpu *cpu,
			 struct gnet_stats_basic_sync *b)
			 struct gnet_stats_basic_sync *b,
			 bool running)
{
	return ___gnet_stats_copy_basic(running, d, cpu, b,
					TCA_STATS_BASIC_HW);
	return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running);
}
EXPORT_SYMBOL(gnet_stats_copy_basic_hw);

Loading