Commit 301f227f authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

net: piggy back on the memory barrier in bql when waking queues



Drivers call netdev_tx_completed_queue() right before
netif_txq_maybe_wake(). If BQL is enabled netdev_tx_completed_queue()
should issue a memory barrier, so we can depend on that separating
the stop check from the consumer index update, instead of adding
another barrier in netif_txq_maybe_wake().

This matters more than the barriers on the xmit path, because
the wake condition is almost always true. So we issue the
consumer side barrier often.

Wrap netdev_tx_completed_queue() in a local helper to issue
the barrier even if BQL is disabled. Keep the same semantics
as netdev_tx_completed_queue() (barrier only if bytes != 0)
to make it clear that the barrier is conditional.

Plus since macro gets pkt/byte counts as arguments now -
we can skip waking if there were no packets completed.

Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 08a09678
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -688,10 +688,10 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
		dev_kfree_skb_any(skb);
	}

	netdev_tx_completed_queue(txq, nr_pkts, tx_bytes);
	txr->tx_cons = cons;

	__netif_txq_maybe_wake(txq, bnxt_tx_avail(bp, txr), bp->tx_wake_thresh,
	__netif_txq_completed_wake(txq, nr_pkts, tx_bytes,
				   bnxt_tx_avail(bp, txr), bp->tx_wake_thresh,
				   READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING);
}

+5 −7
Original line number Diff line number Diff line
@@ -1251,14 +1251,12 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
	if (ring_is_xdp(tx_ring))
		return !!budget;

	netdev_tx_completed_queue(txring_txq(tx_ring),
				  total_packets, total_bytes);

#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
	txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
	if (total_packets && netif_carrier_ok(tx_ring->netdev) &&
	    !__netif_txq_maybe_wake(txq, ixgbe_desc_unused(tx_ring),
	if (!__netif_txq_completed_wake(txq, total_packets, total_bytes,
					ixgbe_desc_unused(tx_ring),
					TX_WAKE_THRESHOLD,
					netif_carrier_ok(tx_ring->netdev) &&
					test_bit(__IXGBE_DOWN, &adapter->state)))
		++tx_ring->tx_stats.restart_queue;

+1 −1
Original line number Diff line number Diff line
@@ -3532,7 +3532,7 @@ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
	 * netdev_tx_sent_queue will miss the update and cause the queue to
	 * be stopped forever
	 */
	smp_mb();
	smp_mb(); /* NOTE: netdev_txq_completed_mb() assumes this exists */

	if (unlikely(dql_avail(&dev_queue->dql) < 0))
		return;
+29 −10
Original line number Diff line number Diff line
@@ -38,7 +38,7 @@
		netif_tx_stop_queue(txq);				\
		/* Producer index and stop bit must be visible		\
		 * to consumer before we recheck.			\
		 * Pairs with a barrier in __netif_txq_maybe_wake().	\
		 * Pairs with a barrier in __netif_txq_completed_wake(). \
		 */							\
		smp_mb__after_atomic();					\
									\
@@ -82,10 +82,24 @@
		_res;							\
	})								\

/* Variant of netdev_tx_completed_queue() which guarantees smp_mb() if
 * @bytes != 0, regardless of kernel config.
 */
static inline void
netdev_txq_completed_mb(struct netdev_queue *dev_queue,
			unsigned int pkts, unsigned int bytes)
{
	if (IS_ENABLED(CONFIG_BQL))
		netdev_tx_completed_queue(dev_queue, pkts, bytes);
	else if (bytes)
		smp_mb();
}

/**
 * __netif_txq_maybe_wake() - locklessly wake a Tx queue, if needed
 * __netif_txq_completed_wake() - locklessly wake a Tx queue, if needed
 * @txq:	struct netdev_queue to stop/start
 * @pkts:	number of packets completed
 * @bytes:	number of bytes completed
 * @get_desc:	get current number of free descriptors (see requirements below!)
 * @start_thrs:	minimal number of descriptors to re-enable the queue
 * @down_cond:	down condition, predicate indicating that the queue should
@@ -94,22 +108,27 @@
 * All arguments may be evaluated multiple times.
 * @get_desc must be a formula or a function call, it must always
 * return up-to-date information when evaluated!
 * Reports completed pkts/bytes to BQL.
 *
 * Returns:
 *	 0 if the queue was woken up
 *	 1 if the queue was already enabled (or disabled but @down_cond is true)
 *	-1 if the queue was left unchanged (@start_thrs not reached)
 */
#define __netif_txq_maybe_wake(txq, get_desc, start_thrs, down_cond)	\
#define __netif_txq_completed_wake(txq, pkts, bytes,			\
				   get_desc, start_thrs, down_cond)	\
	({								\
		int _res;						\
									\
		_res = -1;						\
		if (likely(get_desc > start_thrs)) {			\
			/* Make sure that anybody stopping the queue after \
			 * this sees the new next_to_clean.		\
		/* Report to BQL and piggy back on its barrier.		\
		 * Barrier makes sure that anybody stopping the queue	\
		 * after this point sees the new consumer index.	\
		 * Pairs with barrier in netif_txq_try_stop().		\
		 */							\
			smp_mb();					\
		netdev_txq_completed_mb(txq, pkts, bytes);		\
									\
		_res = -1;						\
		if (pkts && likely(get_desc > start_thrs)) {		\
			_res = 1;					\
			if (unlikely(netif_tx_queue_stopped(txq)) &&	\
			    !(down_cond)) {				\
@@ -120,8 +139,8 @@
		_res;							\
	})

#define netif_txq_maybe_wake(txq, get_desc, start_thrs)		\
	__netif_txq_maybe_wake(txq, get_desc, start_thrs, false)
#define netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs) \
	__netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs, false)

/* subqueue variants follow */