Commit 3a9e5fff authored by Maxim Mikityanskiy's avatar Maxim Mikityanskiy Committed by Saeed Mahameed
Browse files

net/mlx5e: Optimize modulo in mlx5e_select_queue



To improve the performance of the modulo operation (%), it's replaced by
a subtracting the divisor in a loop. The modulo is used to fix up an
out-of-bounds value that might be returned by netdev_pick_tx or to
convert the queue number to the channel number when num_tcs > 1. Both
situations are unlikely, because XPS is configured not to pick higher
queues (qid >= num_channels) by default, so under normal circumstances
the flow won't go inside the loop, and it will be faster than %.

num_tcs == 8 adds at most 7 iterations to the loop. PTP adds at most 1
iteration to the loop. HTB would add at most 256 iterations (when
num_channels == 1), so there is an additional boundary check in the HTB
flow, which falls back to % if more than 7 iterations are expected.

Signed-off-by: default avatarMaxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: default avatarTariq Toukan <tariqt@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 3c87aedd
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -178,7 +178,8 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
		 * So we can return a txq_ix that matches the channel and
		 * packet UP.
		 */
		return txq_ix % selq->num_channels + up * selq->num_channels;
		return mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels) +
			up * selq->num_channels;
	}

	if (unlikely(selq->is_htb)) {
@@ -198,7 +199,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
		 * Driver to select these queues only at mlx5e_select_ptpsq()
		 * and mlx5e_select_htb_queue().
		 */
		return txq_ix % selq->num_channels;
		return mlx5e_txq_to_ch_ix_htb(txq_ix, selq->num_channels);
	}

	/* PTP is enabled */
@@ -214,7 +215,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
	 * If netdev_pick_tx() picks ptp_channel, switch to a regular queue,
	 * because driver should select the PTP only at mlx5e_select_ptpsq().
	 */
	txq_ix %= selq->num_channels;
	txq_ix = mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels);

	if (selq->num_tcs <= 1)
		return txq_ix;
+20 −0
Original line number Diff line number Diff line
@@ -25,6 +25,26 @@ void mlx5e_selq_prepare(struct mlx5e_selq *selq, struct mlx5e_params *params, bo
void mlx5e_selq_apply(struct mlx5e_selq *selq);
void mlx5e_selq_cancel(struct mlx5e_selq *selq);

static inline u16 mlx5e_txq_to_ch_ix(u16 txq, u16 num_channels)
{
	while (unlikely(txq >= num_channels))
		txq -= num_channels;
	return txq;
}

static inline u16 mlx5e_txq_to_ch_ix_htb(u16 txq, u16 num_channels)
{
	if (unlikely(txq >= num_channels)) {
		if (unlikely(txq >= num_channels << 3))
			txq %= num_channels;
		else
			do
				txq -= num_channels;
			while (txq >= num_channels);
	}
	return txq;
}

u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
		       struct net_device *sb_dev);