Commit 7db98396 authored by Yishai Hadas's avatar Yishai Hadas Committed by Jakub Kicinski
Browse files

net/mlx5: E-Switch, Implement devlink port function cmds to control RoCE



Implement devlink port function commands to enable / disable RoCE.
This is used to control the RoCE device capabilities.

This patch implement infrastructure which will be used by downstream
patches that will add additional capabilities.

Signed-off-by: default avatarYishai Hadas <yishaih@nvidia.com>
Signed-off-by: default avatarDaniel Jurgens <danielj@nvidia.com>
Signed-off-by: default avatarShay Drory <shayd@nvidia.com>
Reviewed-by: default avatarParav Pandit <parav@nvidia.com>
Reviewed-by: default avatarMark Bloch <mbloch@nvidia.com>
Acked-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 47d0c500
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -354,6 +354,16 @@ MAC address setup
mlx5 driver support devlink port function attr mechanism to setup MAC
address. (refer to Documentation/networking/devlink/devlink-port.rst)

RoCE capability setup
---------------------
Not all mlx5 PCI devices/SFs require RoCE capability.

When RoCE capability is disabled, it saves 1 Mbytes worth of system memory per
PCI devices/SF.

mlx5 driver support devlink port function attr mechanism to setup RoCE
capability. (refer to Documentation/networking/devlink/devlink-port.rst)

SF state setup
--------------
To use the SF, the user must activate the SF using the SF function state
+2 −0
Original line number Diff line number Diff line
@@ -314,6 +314,8 @@ static const struct devlink_ops mlx5_devlink_ops = {
	.rate_node_new = mlx5_esw_devlink_rate_node_new,
	.rate_node_del = mlx5_esw_devlink_rate_node_del,
	.rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set,
	.port_fn_roce_get = mlx5_devlink_port_fn_roce_get,
	.port_fn_roce_set = mlx5_devlink_port_fn_roce_set,
#endif
#ifdef CONFIG_MLX5_SF_MANAGER
	.port_new = mlx5_devlink_sf_port_new,
+35 −0
Original line number Diff line number Diff line
@@ -772,6 +772,33 @@ static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw,
		esw_vport_destroy_offloads_acl_tables(esw, vport);
}

static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
{
	int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
	void *query_ctx;
	void *hca_caps;
	int err;

	if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
		return 0;

	query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
	if (!query_ctx)
		return -ENOMEM;

	err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx,
					    MLX5_CAP_GENERAL);
	if (err)
		goto out_free;

	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
	vport->info.roce_enabled = MLX5_GET(cmd_hca_cap, hca_caps, roce);

out_free:
	kfree(query_ctx);
	return err;
}

static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
{
	u16 vport_num = vport->vport;
@@ -785,6 +812,10 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
	if (mlx5_esw_is_manager_vport(esw, vport_num))
		return 0;

	err = mlx5_esw_vport_caps_get(esw, vport);
	if (err)
		goto err_caps;

	mlx5_modify_vport_admin_state(esw->dev,
				      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
				      vport_num, 1,
@@ -804,6 +835,10 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
			       vport->info.qos, flags);

	return 0;

err_caps:
	esw_vport_cleanup_acl(esw, vport);
	return err;
}

/* Don't cleanup vport->info, it's needed to restore vport configuration */
+5 −1
Original line number Diff line number Diff line
@@ -153,6 +153,7 @@ struct mlx5_vport_info {
	u8                      qos;
	u8                      spoofchk: 1;
	u8                      trusted: 1;
	u8                      roce_enabled: 1;
};

/* Vport context events */
@@ -508,7 +509,10 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
					   const u8 *hw_addr, int hw_addr_len,
					   struct netlink_ext_ack *extack);

int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled,
				  struct netlink_ext_ack *extack);
int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable,
				  struct netlink_ext_ack *extack);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);

int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
+108 −0
Original line number Diff line number Diff line
@@ -4022,3 +4022,111 @@ int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,

	return mlx5_eswitch_set_vport_mac(esw, vport_num, hw_addr);
}

static struct mlx5_vport *
mlx5_devlink_port_fn_get_vport(struct devlink_port *port, struct mlx5_eswitch *esw)
{
	u16 vport_num;

	if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
		return ERR_PTR(-EOPNOTSUPP);

	vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
	if (!is_port_function_supported(esw, vport_num))
		return ERR_PTR(-EOPNOTSUPP);

	return mlx5_eswitch_get_vport(esw, vport_num);
}

int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled,
				  struct netlink_ext_ack *extack)
{
	struct mlx5_eswitch *esw;
	struct mlx5_vport *vport;
	int err = -EOPNOTSUPP;

	esw = mlx5_devlink_eswitch_get(port->devlink);
	if (IS_ERR(esw))
		return PTR_ERR(esw);

	vport = mlx5_devlink_port_fn_get_vport(port, esw);
	if (IS_ERR(vport)) {
		NL_SET_ERR_MSG_MOD(extack, "Invalid port");
		return PTR_ERR(vport);
	}

	mutex_lock(&esw->state_lock);
	if (vport->enabled) {
		*is_enabled = vport->info.roce_enabled;
		err = 0;
	}
	mutex_unlock(&esw->state_lock);
	return err;
}

int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable,
				  struct netlink_ext_ack *extack)
{
	int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
	struct mlx5_eswitch *esw;
	struct mlx5_vport *vport;
	int err = -EOPNOTSUPP;
	void *query_ctx;
	void *hca_caps;
	u16 vport_num;

	esw = mlx5_devlink_eswitch_get(port->devlink);
	if (IS_ERR(esw))
		return PTR_ERR(esw);

	vport = mlx5_devlink_port_fn_get_vport(port, esw);
	if (IS_ERR(vport)) {
		NL_SET_ERR_MSG_MOD(extack, "Invalid port");
		return PTR_ERR(vport);
	}
	vport_num = vport->vport;

	mutex_lock(&esw->state_lock);
	if (!vport->enabled) {
		NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled");
		goto out;
	}

	if (vport->info.roce_enabled == enable) {
		err = 0;
		goto out;
	}

	query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
	if (!query_ctx) {
		err = -ENOMEM;
		goto out;
	}

	err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
					    MLX5_CAP_GENERAL);
	if (err) {
		NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
		goto out_free;
	}

	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
	memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability),
	       MLX5_UN_SZ_BYTES(hca_cap_union));
	MLX5_SET(cmd_hca_cap, hca_caps, roce, enable);

	err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num,
					    MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
	if (err) {
		NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA roce cap");
		goto out_free;
	}

	vport->info.roce_enabled = enable;

out_free:
	kfree(query_ctx);
out:
	mutex_unlock(&esw->state_lock);
	return err;
}
Loading