Commit e940eb3c authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'lockless-qdisc-opts'



Yunsheng Lin says:

====================
Some optimization for lockless qdisc

Patch 1: remove unnecessary seqcount operation.
Patch 2: implement TCQ_F_CAN_BYPASS.
Patch 3: remove qdisc->empty.

Performance data for pktgen in queue_xmit mode + dummy netdev
with pfifo_fast:

 threads    unpatched           patched             delta
    1       2.60Mpps            3.21Mpps             +23%
    2       3.84Mpps            5.56Mpps             +44%
    4       5.52Mpps            5.58Mpps             +1%
    8       2.77Mpps            2.76Mpps             -0.3%
   16       2.24Mpps            2.23Mpps             -0.4%

Performance for IP forward testing: 1.05Mpps increases to
1.16Mpps, about 10% improvement.

V3: Add 'Acked-by' from Jakub and 'Tested-by' from Vladimir,
    and resend based on latest net-next.
V2: Adjust the comment and commit log according to discussion
    in V1.
V1: Drop RFC tag, add nolock_qdisc_is_empty() and do the qdisc
    empty checking without the protection of qdisc->seqlock to
    aviod doing unnecessary spin_trylock() for contention case.
RFC v4: Use STATE_MISSED and STATE_DRAINING to indicate non-empty
        qdisc, and add patch 1 and 3.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 38f75922 d3e0f575
Loading
Loading
Loading
Loading
+18 −13
Original line number Diff line number Diff line
@@ -37,8 +37,15 @@ enum qdisc_state_t {
	__QDISC_STATE_SCHED,
	__QDISC_STATE_DEACTIVATED,
	__QDISC_STATE_MISSED,
	__QDISC_STATE_DRAINING,
};

#define QDISC_STATE_MISSED	BIT(__QDISC_STATE_MISSED)
#define QDISC_STATE_DRAINING	BIT(__QDISC_STATE_DRAINING)

#define QDISC_STATE_NON_EMPTY	(QDISC_STATE_MISSED | \
					QDISC_STATE_DRAINING)

struct qdisc_size_table {
	struct rcu_head		rcu;
	struct list_head	list;
@@ -110,8 +117,6 @@ struct Qdisc {
	spinlock_t		busylock ____cacheline_aligned_in_smp;
	spinlock_t		seqlock;

	/* for NOLOCK qdisc, true if there are no enqueued skbs */
	bool			empty;
	struct rcu_head		rcu;

	/* private data */
@@ -145,6 +150,11 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc)
	return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
}

static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
{
	return !(READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY);
}

static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
{
	return q->flags & TCQ_F_CPUSTATS;
@@ -153,7 +163,7 @@ static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
{
	if (qdisc_is_percpu_stats(qdisc))
		return READ_ONCE(qdisc->empty);
		return nolock_qdisc_is_empty(qdisc);
	return !READ_ONCE(qdisc->q.qlen);
}

@@ -161,7 +171,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
{
	if (qdisc->flags & TCQ_F_NOLOCK) {
		if (spin_trylock(&qdisc->seqlock))
			goto nolock_empty;
			return true;

		/* If the MISSED flag is set, it means other thread has
		 * set the MISSED flag before second spin_trylock(), so
@@ -183,11 +193,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
		/* Retry again in case other CPU may not see the new flag
		 * after it releases the lock at the end of qdisc_run_end().
		 */
		if (!spin_trylock(&qdisc->seqlock))
			return false;

nolock_empty:
		WRITE_ONCE(qdisc->empty, false);
		return spin_trylock(&qdisc->seqlock);
	} else if (qdisc_is_running(qdisc)) {
		return false;
	}
@@ -201,15 +207,14 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)

static inline void qdisc_run_end(struct Qdisc *qdisc)
{
	write_seqcount_end(&qdisc->running);
	if (qdisc->flags & TCQ_F_NOLOCK) {
		spin_unlock(&qdisc->seqlock);

		if (unlikely(test_bit(__QDISC_STATE_MISSED,
				      &qdisc->state))) {
			clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
				      &qdisc->state)))
			__netif_schedule(qdisc);
		}
	} else {
		write_seqcount_end(&qdisc->running);
	}
}

+25 −2
Original line number Diff line number Diff line
@@ -3852,10 +3852,33 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
	qdisc_calculate_pkt_len(skb, q);

	if (q->flags & TCQ_F_NOLOCK) {
		if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
		    qdisc_run_begin(q)) {
			/* Retest nolock_qdisc_is_empty() within the protection
			 * of q->seqlock to protect from racing with requeuing.
			 */
			if (unlikely(!nolock_qdisc_is_empty(q))) {
				rc = q->enqueue(skb, q, &to_free) &
					NET_XMIT_MASK;
				__qdisc_run(q);
				qdisc_run_end(q);

				goto no_lock_out;
			}

			qdisc_bstats_cpu_update(q, skb);
			if (sch_direct_xmit(skb, q, dev, txq, NULL, true) &&
			    !nolock_qdisc_is_empty(q))
				__qdisc_run(q);

			qdisc_run_end(q);
			return NET_XMIT_SUCCESS;
		}

		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
		if (likely(!netif_xmit_frozen_or_stopped(txq)))
		qdisc_run(q);

no_lock_out:
		if (unlikely(to_free))
			kfree_skb_list(to_free);
		return rc;
+16 −7
Original line number Diff line number Diff line
@@ -52,6 +52,8 @@ static void qdisc_maybe_clear_missed(struct Qdisc *q,
	 */
	if (!netif_xmit_frozen_or_stopped(txq))
		set_bit(__QDISC_STATE_MISSED, &q->state);
	else
		set_bit(__QDISC_STATE_DRAINING, &q->state);
}

/* Main transmission queue. */
@@ -164,10 +166,14 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)

		skb = next;
	}
	if (lock)

	if (lock) {
		spin_unlock(lock);
		set_bit(__QDISC_STATE_MISSED, &q->state);
	} else {
		__netif_schedule(q);
	}
}

static void try_bulk_dequeue_skb(struct Qdisc *q,
				 struct sk_buff *skb,
@@ -409,7 +415,11 @@ void __qdisc_run(struct Qdisc *q)
	while (qdisc_restart(q, &packets)) {
		quota -= packets;
		if (quota <= 0) {
			if (q->flags & TCQ_F_NOLOCK)
				set_bit(__QDISC_STATE_MISSED, &q->state);
			else
				__netif_schedule(q);

			break;
		}
	}
@@ -698,13 +708,14 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
	if (likely(skb)) {
		qdisc_update_stats_at_dequeue(qdisc, skb);
	} else if (need_retry &&
		   test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
		   READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY) {
		/* Delay clearing the STATE_MISSED here to reduce
		 * the overhead of the second spin_trylock() in
		 * qdisc_run_begin() and __netif_schedule() calling
		 * in qdisc_run_end().
		 */
		clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
		clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);

		/* Make sure dequeuing happens after clearing
		 * STATE_MISSED.
@@ -714,8 +725,6 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
		need_retry = false;

		goto retry;
	} else {
		WRITE_ONCE(qdisc->empty, true);
	}

	return skb;
@@ -916,7 +925,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
	sch->dev_queue = dev_queue;
	sch->empty = true;
	dev_hold(dev);
	refcount_set(&sch->refcnt, 1);

@@ -1222,6 +1230,7 @@ static void dev_reset_queue(struct net_device *dev,
	spin_unlock_bh(qdisc_lock(qdisc));
	if (nolock) {
		clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
		clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
		spin_unlock_bh(&qdisc->seqlock);
	}
}