Commit ca4f3f18 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'devlink-locking'



Jakub Kicinski says:

====================
devlink: hold the instance lock in eswitch callbacks

Series number 2 in the effort to hold the devlink instance lock
in call driver callbacks. We have the following drivers using
this API:

 - bnxt, nfp, netdevsim - their own locking is removed / simplified
   by this series; all of them needed a lock to protect from changes
   to the number of VFs while switching modes, now the VF config bus
   callback takes the devlink instance lock via devl_lock();
 - ice - appears not to allow changing modes while SR-IOV enabled,
   so nothing to do there;
 - liquidio - does not contain any locking;
 - octeontx2/af - is very special but at least doesn't have locking
   so doesn't get in the way either;
 - mlx5 has a wealth of locks - I chickened out and dropped the lock
   in the callbacks so that I can leave the driver be, for now.

The last one is obviously not ideal, but I would prefer to transition
the API already as it make take longer.

v2: use a wrapper in mlx5 and extend the comment
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e7dc00f3 14e426bf
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -13470,7 +13470,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)

#ifdef CONFIG_BNXT_SRIOV
	init_waitqueue_head(&bp->sriov_cfg_wait);
	mutex_init(&bp->sriov_lock);
#endif
	if (BNXT_SUPPORTS_TPA(bp)) {
		bp->gro_func = bnxt_gro_func_5730x;
+0 −6
Original line number Diff line number Diff line
@@ -2072,12 +2072,6 @@ struct bnxt {
	wait_queue_head_t	sriov_cfg_wait;
	bool			sriov_cfg;
#define BNXT_SRIOV_CFG_WAIT_TMO	msecs_to_jiffies(10000)

	/* lock to protect VF-rep creation/cleanup via
	 * multiple paths such as ->sriov_configure() and
	 * devlink ->eswitch_mode_set()
	 */
	struct mutex		sriov_lock;
#endif

#if BITS_PER_LONG == 32
+2 −2
Original line number Diff line number Diff line
@@ -846,7 +846,7 @@ void bnxt_sriov_disable(struct bnxt *bp)
		return;

	/* synchronize VF and VF-rep create and destroy */
	mutex_lock(&bp->sriov_lock);
	devl_lock(bp->dl);
	bnxt_vf_reps_destroy(bp);

	if (pci_vfs_assigned(bp->pdev)) {
@@ -859,7 +859,7 @@ void bnxt_sriov_disable(struct bnxt *bp)
		/* Free the HW resources reserved for various VF's */
		bnxt_hwrm_func_vf_resource_free(bp, num_vfs);
	}
	mutex_unlock(&bp->sriov_lock);
	devl_unlock(bp->dl);

	bnxt_free_vf_resources(bp);

+6 −16
Original line number Diff line number Diff line
@@ -559,44 +559,34 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode,
			     struct netlink_ext_ack *extack)
{
	struct bnxt *bp = bnxt_get_bp_from_dl(devlink);
	int rc = 0;

	mutex_lock(&bp->sriov_lock);
	if (bp->eswitch_mode == mode) {
		netdev_info(bp->dev, "already in %s eswitch mode\n",
			    mode == DEVLINK_ESWITCH_MODE_LEGACY ?
			    "legacy" : "switchdev");
		rc = -EINVAL;
		goto done;
		return -EINVAL;
	}

	switch (mode) {
	case DEVLINK_ESWITCH_MODE_LEGACY:
		bnxt_vf_reps_destroy(bp);
		break;
		return 0;

	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
		if (bp->hwrm_spec_code < 0x10803) {
			netdev_warn(bp->dev, "FW does not support SRIOV E-Switch SWITCHDEV mode\n");
			rc = -ENOTSUPP;
			goto done;
			return -ENOTSUPP;
		}

		if (pci_num_vf(bp->pdev) == 0) {
			netdev_info(bp->dev, "Enable VFs before setting switchdev mode\n");
			rc = -EPERM;
			goto done;
			return -EPERM;
		}
		rc = bnxt_vf_reps_create(bp);
		break;
		return bnxt_vf_reps_create(bp);

	default:
		rc = -EINVAL;
		goto done;
		return -EINVAL;
	}
done:
	mutex_unlock(&bp->sriov_lock);
	return rc;
}

#endif
+42 −12
Original line number Diff line number Diff line
@@ -3337,6 +3337,27 @@ static int eswitch_devlink_esw_mode_check(const struct mlx5_eswitch *esw)
		!mlx5_core_is_ecpf_esw_manager(esw->dev)) ? -EOPNOTSUPP : 0;
}

/* FIXME: devl_unlock() followed by devl_lock() inside driver callback
 * is never correct and prone to races. It's a transitional workaround,
 * never repeat this pattern.
 *
 * This code MUST be fixed before removing devlink_mutex as it is safe
 * to do only because of that mutex.
 */
static void mlx5_eswtich_mode_callback_enter(struct devlink *devlink,
					     struct mlx5_eswitch *esw)
{
	devl_unlock(devlink);
	down_write(&esw->mode_lock);
}

static void mlx5_eswtich_mode_callback_exit(struct devlink *devlink,
					    struct mlx5_eswitch *esw)
{
	up_write(&esw->mode_lock);
	devl_lock(devlink);
}

int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
				  struct netlink_ext_ack *extack)
{
@@ -3351,6 +3372,15 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
	if (esw_mode_from_devlink(mode, &mlx5_mode))
		return -EINVAL;

	/* FIXME: devl_unlock() followed by devl_lock() inside driver callback
	 * is never correct and prone to races. It's a transitional workaround,
	 * never repeat this pattern.
	 *
	 * This code MUST be fixed before removing devlink_mutex as it is safe
	 * to do only because of that mutex.
	 */
	devl_unlock(devlink);

	mlx5_lag_disable_change(esw->dev);
	err = mlx5_esw_try_lock(esw);
	if (err < 0) {
@@ -3381,6 +3411,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
	mlx5_esw_unlock(esw);
enable_lag:
	mlx5_lag_enable_change(esw->dev);
	devl_lock(devlink);
	return err;
}

@@ -3393,14 +3424,14 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
	if (IS_ERR(esw))
		return PTR_ERR(esw);

	down_write(&esw->mode_lock);
	mlx5_eswtich_mode_callback_enter(devlink, esw);
	err = eswitch_devlink_esw_mode_check(esw);
	if (err)
		goto unlock;

	err = esw_mode_to_devlink(esw->mode, mode);
unlock:
	up_write(&esw->mode_lock);
	mlx5_eswtich_mode_callback_exit(devlink, esw);
	return err;
}

@@ -3447,7 +3478,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
	if (IS_ERR(esw))
		return PTR_ERR(esw);

	down_write(&esw->mode_lock);
	mlx5_eswtich_mode_callback_enter(devlink, esw);
	err = eswitch_devlink_esw_mode_check(esw);
	if (err)
		goto out;
@@ -3484,11 +3515,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
		goto out;

	esw->offloads.inline_mode = mlx5_mode;
	up_write(&esw->mode_lock);
	mlx5_eswtich_mode_callback_exit(devlink, esw);
	return 0;

out:
	up_write(&esw->mode_lock);
	mlx5_eswtich_mode_callback_exit(devlink, esw);
	return err;
}

@@ -3501,14 +3532,14 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
	if (IS_ERR(esw))
		return PTR_ERR(esw);

	down_write(&esw->mode_lock);
	mlx5_eswtich_mode_callback_enter(devlink, esw);
	err = eswitch_devlink_esw_mode_check(esw);
	if (err)
		goto unlock;

	err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
unlock:
	up_write(&esw->mode_lock);
	mlx5_eswtich_mode_callback_exit(devlink, esw);
	return err;
}

@@ -3524,7 +3555,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
	if (IS_ERR(esw))
		return PTR_ERR(esw);

	down_write(&esw->mode_lock);
	mlx5_eswtich_mode_callback_enter(devlink, esw);
	err = eswitch_devlink_esw_mode_check(esw);
	if (err)
		goto unlock;
@@ -3570,7 +3601,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
	}

unlock:
	up_write(&esw->mode_lock);
	mlx5_eswtich_mode_callback_exit(devlink, esw);
	return err;
}

@@ -3584,15 +3615,14 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
	if (IS_ERR(esw))
		return PTR_ERR(esw);


	down_write(&esw->mode_lock);
	mlx5_eswtich_mode_callback_enter(devlink, esw);
	err = eswitch_devlink_esw_mode_check(esw);
	if (err)
		goto unlock;

	*encap = esw->offloads.encap;
unlock:
	up_write(&esw->mode_lock);
	mlx5_eswtich_mode_callback_exit(devlink, esw);
	return err;
}

Loading