Commit 3db4c85c authored by Maxim Mikityanskiy's avatar Maxim Mikityanskiy Committed by Jakub Kicinski
Browse files

net/mlx5e: xsk: Use queue indices starting from 0 for XSK queues



In the initial implementation of XSK in mlx5e, XSK RQs coexisted with
regular RQs in the same channel. The main idea was to allow RSS work the
same for regular traffic, without need to reconfigure RSS to exclude XSK
queues.

However, this scheme didn't prove to be beneficial, mainly because of
incompatibility with other vendors. Some tools don't properly support
using higher indices for XSK queues, some tools get confused with the
double amount of RQs exposed in sysfs. Some use cases are purely XSK,
and allocating the same amount of unused regular RQs is a waste of
resources.

This commit changes the queuing scheme to the standard one, where XSK
RQs replace regular RQs on the channels where XSK sockets are open. Two
RQs still exist in the channel to allow failsafe disable of XSK, but
only one is exposed at a time. The next commit will achieve the desired
memory save by flushing the buffers when the regular RQ is unused.

As the result of this transition:

1. It's possible to use RSS contexts over XSK RQs.

2. It's possible to dedicate all queues to XSK.

3. When XSK RQs coexist with regular RQs, the admin should make sure no
unwanted traffic goes into XSK RQs by either excluding them from RSS or
settings up the XDP program to return XDP_PASS for non-XSK traffic.

4. When using a mixed fleet of mlx5e devices and other netdevs, the same
configuration can be applied. If the application supports the fallback
to copy mode on unsupported drivers, it will work too.

Signed-off-by: default avatarMaxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: default avatarTariq Toukan <tariqt@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent d9ba64de
Loading
Loading
Loading
Loading
+0 −7
Original line number Diff line number Diff line
@@ -181,12 +181,6 @@ do { \
#define mlx5e_state_dereference(priv, p) \
	rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock))

enum mlx5e_rq_group {
	MLX5E_RQ_GROUP_REGULAR,
	MLX5E_RQ_GROUP_XSK,
#define MLX5E_NUM_RQ_GROUPS(g) (1 + MLX5E_RQ_GROUP_##g)
};

static inline u8 mlx5e_get_num_lag_ports(struct mlx5_core_dev *mdev)
{
	if (mlx5_lag_is_lacp_owner(mdev))
@@ -1005,7 +999,6 @@ struct mlx5e_profile {
	mlx5e_stats_grp_t *stats_grps;
	const struct mlx5e_rx_handlers *rx_handlers;
	int	max_tc;
	u8	rq_groups;
	u32     features;
};

+17 −12
Original line number Diff line number Diff line
@@ -10,28 +10,33 @@ unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs)
	return chs->num;
}

void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
static struct mlx5e_channel *mlx5e_channels_get(struct mlx5e_channels *chs, unsigned int ix)
{
	struct mlx5e_channel *c;
	WARN_ON_ONCE(ix >= mlx5e_channels_get_num(chs));
	return chs->c[ix];
}

	WARN_ON(ix >= mlx5e_channels_get_num(chs));
	c = chs->c[ix];
bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix)
{
	struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);

	*rqn = c->rq.rqn;
	return test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
}

bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
{
	struct mlx5e_channel *c;
	struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);

	WARN_ON(ix >= mlx5e_channels_get_num(chs));
	c = chs->c[ix];
	*rqn = c->rq.rqn;
}

	if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
		return false;
void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
{
	struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);

	WARN_ON_ONCE(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state));

	*rqn = c->xskrq.rqn;
	return true;
}

bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn)
+2 −1
Original line number Diff line number Diff line
@@ -9,8 +9,9 @@
struct mlx5e_channels;

unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs);
bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix);
void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn);

#endif /* __MLX5_EN_CHANNELS_H__ */
+0 −32
Original line number Diff line number Diff line
@@ -53,38 +53,6 @@ struct mlx5e_create_sq_param {
	u8                          min_inline_mode;
};

static inline bool mlx5e_qid_get_ch_if_in_group(struct mlx5e_params *params,
						u16 qid,
						enum mlx5e_rq_group group,
						u16 *ix)
{
	int nch = params->num_channels;
	int ch = qid - nch * group;

	if (ch < 0 || ch >= nch)
		return false;

	*ix = ch;
	return true;
}

static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params,
					      u16 qid,
					      u16 *ix,
					      enum mlx5e_rq_group *group)
{
	u16 nch = params->num_channels;

	*ix = qid % nch;
	*group = qid / nch;
}

static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile,
				      struct mlx5e_params *params, u64 qid)
{
	return qid < params->num_channels * profile->rq_groups;
}

/* Striding RQ dynamic parameters */

u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk);
+15 −103
Original line number Diff line number Diff line
@@ -24,8 +24,6 @@ struct mlx5e_rx_res {
	struct {
		struct mlx5e_rqt direct_rqt;
		struct mlx5e_tir direct_tir;
		struct mlx5e_rqt xsk_rqt;
		struct mlx5e_tir xsk_tir;
	} *channels;

	struct {
@@ -320,48 +318,8 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res)
		mlx5e_tir_builder_clear(builder);
	}

	if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
	goto out;

	for (ix = 0; ix < res->max_nch; ix++) {
		err = mlx5e_rqt_init_direct(&res->channels[ix].xsk_rqt,
					    res->mdev, false, res->drop_rqn);
		if (err) {
			mlx5_core_warn(res->mdev, "Failed to create an XSK RQT: err = %d, ix = %u\n",
				       err, ix);
			goto err_destroy_xsk_rqts;
		}
	}

	for (ix = 0; ix < res->max_nch; ix++) {
		mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
					    mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
					    inner_ft_support);
		mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param);
		mlx5e_tir_builder_build_direct(builder);

		err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true);
		if (err) {
			mlx5_core_warn(res->mdev, "Failed to create an XSK TIR: err = %d, ix = %u\n",
				       err, ix);
			goto err_destroy_xsk_tirs;
		}

		mlx5e_tir_builder_clear(builder);
	}

	goto out;

err_destroy_xsk_tirs:
	while (--ix >= 0)
		mlx5e_tir_destroy(&res->channels[ix].xsk_tir);

	ix = res->max_nch;
err_destroy_xsk_rqts:
	while (--ix >= 0)
		mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);

	ix = res->max_nch;
err_destroy_direct_tirs:
	while (--ix >= 0)
		mlx5e_tir_destroy(&res->channels[ix].direct_tir);
@@ -420,12 +378,6 @@ static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res)
	for (ix = 0; ix < res->max_nch; ix++) {
		mlx5e_tir_destroy(&res->channels[ix].direct_tir);
		mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);

		if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
			continue;

		mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
		mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
	}

	kvfree(res->channels);
@@ -491,13 +443,6 @@ u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix)
	return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir);
}

u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix)
{
	WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_XSK));

	return mlx5e_tir_get_tirn(&res->channels[ix].xsk_tir);
}

u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
{
	struct mlx5e_rss *rss = res->rss[0];
@@ -527,26 +472,14 @@ static void mlx5e_rx_res_channel_activate_direct(struct mlx5e_rx_res *res,
						 struct mlx5e_channels *chs,
						 unsigned int ix)
{
	u32 rqn;
	u32 rqn = res->rss_rqns[ix];
	int err;

	mlx5e_channels_get_regular_rqn(chs, ix, &rqn);
	err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
	if (err)
		mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n",
			       mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
			       rqn, ix, err);

	if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
		return;

	if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
		rqn = res->drop_rqn;
	err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
	if (err)
		mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to RQ %#x (channel %u): err = %d\n",
			       mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
			       rqn, ix, err);
}

static void mlx5e_rx_res_channel_deactivate_direct(struct mlx5e_rx_res *res,
@@ -559,15 +492,6 @@ static void mlx5e_rx_res_channel_deactivate_direct(struct mlx5e_rx_res *res,
		mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
			       mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
			       res->drop_rqn, ix, err);

	if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
		return;

	err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
	if (err)
		mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
			       mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
			       res->drop_rqn, ix, err);
}

void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs)
@@ -577,8 +501,12 @@ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_chann

	nch = mlx5e_channels_get_num(chs);

	for (ix = 0; ix < chs->num; ix++)
	for (ix = 0; ix < chs->num; ix++) {
		if (mlx5e_channels_is_xsk(chs, ix))
			mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]);
		else
			mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
	}
	res->rss_nch = chs->num;

	mlx5e_rx_res_rss_enable(res);
@@ -621,33 +549,17 @@ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res)
	}
}

int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
			      unsigned int ix)
void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
			     unsigned int ix, bool xsk)
{
	u32 rqn;
	int err;

	if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
		return -EINVAL;

	err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
	if (err)
		mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to XSK RQ %#x (channel %u): err = %d\n",
			       mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
			       rqn, ix, err);
	return err;
}
	if (xsk)
		mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]);
	else
		mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);

int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix)
{
	int err;
	mlx5e_rx_res_rss_enable(res);

	err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
	if (err)
		mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
			       mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
			       res->drop_rqn, ix, err);
	return err;
	mlx5e_rx_res_channel_activate_direct(res, chs, ix);
}

int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
Loading