Commit e71383fb authored by Shay Drory's avatar Shay Drory Committed by Saeed Mahameed
Browse files

net/mlx5: Light probe local SFs



In case user wants to configure the SFs, for example: to use only vdpa
functionality, he needs to fully probe a SF, configure what he wants,
and afterward reload the SF.

In order to save the time of the reload, local SFs will probe without
any auxiliary sub-device, so that the SFs can be configured prior to
its full probe.

The defaults of the enable_* devlink params of these SFs are set to
false.

Usage example:
Create SF:
$ devlink port add pci/0000:08:00.0 flavour pcisf pfnum 0 sfnum 11
$ devlink port function set pci/0000:08:00.0/32768 \
               hw_addr 00:00:00:00:00:11 state active

Enable ETH auxiliary device:
$ devlink dev param set auxiliary/mlx5_core.sf.1 \
              name enable_eth value true cmode driverinit

Now, in order to fully probe the SF, use devlink reload:
$ devlink dev reload auxiliary/mlx5_core.sf.1

At this point the user have SF devlink instance with auxiliary device
for the Ethernet functionality only.

Signed-off-by: default avatarShay Drory <shayd@nvidia.com>
Reviewed-by: default avatarMoshe Shemesh <moshe@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 3f908403
Loading
Loading
Loading
Loading
+20 −0
Original line number Diff line number Diff line
@@ -45,6 +45,26 @@ Following bridge VLAN functions are supported by mlx5:
Subfunction
===========

Subfunction which are spawned over the E-switch are created only with devlink
device, and by default all the SF auxiliary devices are disabled.
This will allow user to configure the SF before the SF have been fully probed,
which will save time.

Usage example:
Create SF:
$ devlink port add pci/0000:08:00.0 flavour pcisf pfnum 0 sfnum 11
$ devlink port function set pci/0000:08:00.0/32768 \
               hw_addr 00:00:00:00:00:11 state active

Enable ETH auxiliary device:
$ devlink dev param set auxiliary/mlx5_core.sf.1 \
              name enable_eth value true cmode driverinit

Now, in order to fully probe the SF, use devlink reload:
$ devlink dev reload auxiliary/mlx5_core.sf.1

mlx5 supports ETH,rdma and vdpa (vnet) auxiliary devices devlink params (see :ref:`Documentation/networking/devlink/devlink-params.rst`)

mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst <devlink_port>`) interface.

A subfunction has its own function capabilities and its own resources. This
+16 −0
Original line number Diff line number Diff line
@@ -323,6 +323,18 @@ static void del_adev(struct auxiliary_device *adev)
	auxiliary_device_uninit(adev);
}

void mlx5_dev_set_lightweight(struct mlx5_core_dev *dev)
{
	mutex_lock(&mlx5_intf_mutex);
	dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
	mutex_unlock(&mlx5_intf_mutex);
}

bool mlx5_dev_is_lightweight(struct mlx5_core_dev *dev)
{
	return dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
}

int mlx5_attach_device(struct mlx5_core_dev *dev)
{
	struct mlx5_priv *priv = &dev->priv;
@@ -457,6 +469,10 @@ static int add_drivers(struct mlx5_core_dev *dev)
		if (priv->adev[i])
			continue;

		if (mlx5_adev_devices[i].is_enabled &&
		    !(mlx5_adev_devices[i].is_enabled(dev)))
			continue;

		if (mlx5_adev_devices[i].is_supported)
			is_supported = mlx5_adev_devices[i].is_supported(dev);

+16 −4
Original line number Diff line number Diff line
@@ -141,6 +141,13 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
	bool sf_dev_allocated;
	int ret = 0;

	if (mlx5_dev_is_lightweight(dev)) {
		if (action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT)
			return -EOPNOTSUPP;
		mlx5_unload_one_light(dev);
		return 0;
	}

	sf_dev_allocated = mlx5_sf_dev_allocated(dev);
	if (sf_dev_allocated) {
		/* Reload results in deleting SF device which further results in
@@ -193,6 +200,10 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a
	*actions_performed = BIT(action);
	switch (action) {
	case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
		if (mlx5_dev_is_lightweight(dev)) {
			mlx5_fw_reporters_create(dev);
			return mlx5_init_one_devl_locked(dev);
		}
		ret = mlx5_load_one_devl_locked(dev, false);
		break;
	case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
@@ -511,7 +522,7 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
	struct mlx5_core_dev *dev = devlink_priv(devlink);
	union devlink_param_value value;

	value.vbool = MLX5_CAP_GEN(dev, roce);
	value.vbool = MLX5_CAP_GEN(dev, roce) && !mlx5_dev_is_lightweight(dev);
	devl_param_driverinit_value_set(devlink,
					DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
					value);
@@ -561,7 +572,7 @@ static int mlx5_devlink_eth_params_register(struct devlink *devlink)
	if (err)
		return err;

	value.vbool = true;
	value.vbool = !mlx5_dev_is_lightweight(dev);
	devl_param_driverinit_value_set(devlink,
					DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
					value);
@@ -601,6 +612,7 @@ static const struct devlink_param mlx5_devlink_rdma_params[] = {

static int mlx5_devlink_rdma_params_register(struct devlink *devlink)
{
	struct mlx5_core_dev *dev = devlink_priv(devlink);
	union devlink_param_value value;
	int err;

@@ -612,7 +624,7 @@ static int mlx5_devlink_rdma_params_register(struct devlink *devlink)
	if (err)
		return err;

	value.vbool = true;
	value.vbool = !mlx5_dev_is_lightweight(dev);
	devl_param_driverinit_value_set(devlink,
					DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
					value);
@@ -647,7 +659,7 @@ static int mlx5_devlink_vnet_params_register(struct devlink *devlink)
	if (err)
		return err;

	value.vbool = true;
	value.vbool = !mlx5_dev_is_lightweight(dev);
	devl_param_driverinit_value_set(devlink,
					DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
					value);
+15 −9
Original line number Diff line number Diff line
@@ -719,7 +719,7 @@ static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000
#define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD

static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
{
	struct mlx5_core_health *health = &dev->priv.health;
	struct devlink *devlink = priv_to_devlink(dev);
@@ -735,14 +735,14 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
	}

	health->fw_reporter =
		devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
		devl_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
					    0, dev);
	if (IS_ERR(health->fw_reporter))
		mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
			       PTR_ERR(health->fw_reporter));

	health->fw_fatal_reporter =
		devlink_health_reporter_create(devlink,
		devl_health_reporter_create(devlink,
					    &mlx5_fw_fatal_reporter_ops,
					    grace_period,
					    dev);
@@ -777,6 +777,7 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
{
	struct mlx5_core_health *health = &dev->priv.health;

	if (!mlx5_dev_is_lightweight(dev))
		queue_work(health->wq, &health->fatal_report_work);
}

@@ -905,10 +906,15 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev)

int mlx5_health_init(struct mlx5_core_dev *dev)
{
	struct devlink *devlink = priv_to_devlink(dev);
	struct mlx5_core_health *health;
	char *name;

	if (!mlx5_dev_is_lightweight(dev)) {
		devl_lock(devlink);
		mlx5_fw_reporters_create(dev);
		devl_unlock(devlink);
	}
	mlx5_reporter_vnic_create(dev);

	health = &dev->priv.health;
+116 −8
Original line number Diff line number Diff line
@@ -1424,12 +1424,11 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
	mlx5_put_uars_page(dev, dev->priv.uar);
}

int mlx5_init_one(struct mlx5_core_dev *dev)
int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev)
{
	struct devlink *devlink = priv_to_devlink(dev);
	bool light_probe = mlx5_dev_is_lightweight(dev);
	int err = 0;

	devl_lock(devlink);
	mutex_lock(&dev->intf_state_mutex);
	dev->state = MLX5_DEVICE_STATE_UP;

@@ -1443,9 +1442,14 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
		goto function_teardown;
	}

	/* In case of light_probe, mlx5_devlink is already registered.
	 * Hence, don't register devlink again.
	 */
	if (!light_probe) {
		err = mlx5_devlink_params_register(priv_to_devlink(dev));
		if (err)
			goto err_devlink_params_reg;
	}

	err = mlx5_load(dev);
	if (err)
@@ -1458,13 +1462,13 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
		goto err_register;

	mutex_unlock(&dev->intf_state_mutex);
	devl_unlock(devlink);
	return 0;

err_register:
	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
	mlx5_unload(dev);
err_load:
	if (!light_probe)
		mlx5_devlink_params_unregister(priv_to_devlink(dev));
err_devlink_params_reg:
	mlx5_cleanup_once(dev);
@@ -1473,6 +1477,16 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
err_function:
	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
	mutex_unlock(&dev->intf_state_mutex);
	return err;
}

int mlx5_init_one(struct mlx5_core_dev *dev)
{
	struct devlink *devlink = priv_to_devlink(dev);
	int err;

	devl_lock(devlink);
	err = mlx5_init_one_devl_locked(dev);
	devl_unlock(devlink);
	return err;
}
@@ -1590,6 +1604,100 @@ void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend)
	devl_unlock(devlink);
}

/* In case of light probe, we don't need a full query of hca_caps, but only the bellow caps.
 * A full query of hca_caps will be done when the device will reload.
 */
static int mlx5_query_hca_caps_light(struct mlx5_core_dev *dev)
{
	int err;

	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
	if (err)
		return err;

	if (MLX5_CAP_GEN(dev, eth_net_offloads)) {
		err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS);
		if (err)
			return err;
	}

	if (MLX5_CAP_GEN(dev, nic_flow_table) ||
	    MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) {
		err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE);
		if (err)
			return err;
	}

	if (MLX5_CAP_GEN_64(dev, general_obj_types) &
		MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) {
		err = mlx5_core_get_caps(dev, MLX5_CAP_VDPA_EMULATION);
		if (err)
			return err;
	}

	return 0;
}

int mlx5_init_one_light(struct mlx5_core_dev *dev)
{
	struct devlink *devlink = priv_to_devlink(dev);
	int err;

	dev->state = MLX5_DEVICE_STATE_UP;
	err = mlx5_function_enable(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
	if (err) {
		mlx5_core_warn(dev, "mlx5_function_enable err=%d\n", err);
		goto out;
	}

	err = mlx5_query_hca_caps_light(dev);
	if (err) {
		mlx5_core_warn(dev, "mlx5_query_hca_caps_light err=%d\n", err);
		goto query_hca_caps_err;
	}

	devl_lock(devlink);
	err = mlx5_devlink_params_register(priv_to_devlink(dev));
	devl_unlock(devlink);
	if (err) {
		mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err);
		goto query_hca_caps_err;
	}

	return 0;

query_hca_caps_err:
	mlx5_function_disable(dev, true);
out:
	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
	return err;
}

void mlx5_uninit_one_light(struct mlx5_core_dev *dev)
{
	struct devlink *devlink = priv_to_devlink(dev);

	devl_lock(devlink);
	mlx5_devlink_params_unregister(priv_to_devlink(dev));
	devl_unlock(devlink);
	if (dev->state != MLX5_DEVICE_STATE_UP)
		return;
	mlx5_function_disable(dev, true);
}

/* xxx_light() function are used in order to configure the device without full
 * init (light init). e.g.: There isn't a point in reload a device to light state.
 * Hence, mlx5_load_one_light() isn't needed.
 */

void mlx5_unload_one_light(struct mlx5_core_dev *dev)
{
	if (dev->state != MLX5_DEVICE_STATE_UP)
		return;
	mlx5_function_disable(dev, false);
	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
}

static const int types[] = {
	MLX5_CAP_GENERAL,
	MLX5_CAP_GENERAL_2,
Loading