Commit dca55da0 authored by Jiri Pirko's avatar Jiri Pirko Committed by Saeed Mahameed
Browse files

RDMA/mlx5: Track netdev to avoid deadlock during netdev notifier unregister



When removing a network namespace with mlx5 devlink instance being in
it, following callchain is performed:

cleanup_net (takes down_read(&pernet_ops_rwsem)
devlink_pernet_pre_exit()
devlink_reload()
mlx5_devlink_reload_down()
mlx5_unload_one_devl_locked()
mlx5_detach_device()
del_adev()
mlx5r_remove()
__mlx5_ib_remove()
mlx5_ib_roce_cleanup()
mlx5_remove_netdev_notifier()
unregister_netdevice_notifier (takes down_write(&pernet_ops_rwsem)

This deadlocks.

Resolve this by converting to register_netdevice_notifier_dev_net()
which does not take pernet_ops_rwsem and moves the notifier block around
according to netdev it takes as arg.

Use previously introduced netdev added/removed events to track uplink
netdev to be used for register_netdevice_notifier_dev_net() purposes.

Signed-off-by: default avatarJiri Pirko <jiri@nvidia.com>
Reviewed-by: default avatarLeon Romanovsky <leonro@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent c7d4e6ab
Loading
Loading
Loading
Loading
+54 −24
Original line number Diff line number Diff line
@@ -3012,26 +3012,63 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
	}
}

static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u32 port_num)
static void mlx5_netdev_notifier_register(struct mlx5_roce *roce,
					  struct net_device *netdev)
{
	int err;

	dev->port[port_num].roce.nb.notifier_call = mlx5_netdev_event;
	err = register_netdevice_notifier(&dev->port[port_num].roce.nb);
	if (err) {
		dev->port[port_num].roce.nb.notifier_call = NULL;
		return err;
	if (roce->tracking_netdev)
		return;
	roce->tracking_netdev = netdev;
	roce->nb.notifier_call = mlx5_netdev_event;
	err = register_netdevice_notifier_dev_net(netdev, &roce->nb, &roce->nn);
	WARN_ON(err);
}

	return 0;
static void mlx5_netdev_notifier_unregister(struct mlx5_roce *roce)
{
	if (!roce->tracking_netdev)
		return;
	unregister_netdevice_notifier_dev_net(roce->tracking_netdev, &roce->nb,
					      &roce->nn);
	roce->tracking_netdev = NULL;
}

static int mlx5e_mdev_notifier_event(struct notifier_block *nb,
				     unsigned long event, void *data)
{
	struct mlx5_roce *roce = container_of(nb, struct mlx5_roce, mdev_nb);
	struct net_device *netdev = data;

	switch (event) {
	case MLX5_DRIVER_EVENT_UPLINK_NETDEV:
		if (netdev)
			mlx5_netdev_notifier_register(roce, netdev);
		else
			mlx5_netdev_notifier_unregister(roce);
		break;
	default:
		return NOTIFY_DONE;
	}

	return NOTIFY_OK;
}

static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u32 port_num)
static void mlx5_mdev_netdev_track(struct mlx5_ib_dev *dev, u32 port_num)
{
	if (dev->port[port_num].roce.nb.notifier_call) {
		unregister_netdevice_notifier(&dev->port[port_num].roce.nb);
		dev->port[port_num].roce.nb.notifier_call = NULL;
	struct mlx5_roce *roce = &dev->port[port_num].roce;

	roce->mdev_nb.notifier_call = mlx5e_mdev_notifier_event;
	mlx5_blocking_notifier_register(dev->mdev, &roce->mdev_nb);
	mlx5_core_uplink_netdev_event_replay(dev->mdev);
}

static void mlx5_mdev_netdev_untrack(struct mlx5_ib_dev *dev, u32 port_num)
{
	struct mlx5_roce *roce = &dev->port[port_num].roce;

	mlx5_blocking_notifier_unregister(dev->mdev, &roce->mdev_nb);
	mlx5_netdev_notifier_unregister(roce);
}

static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
@@ -3138,7 +3175,7 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
	if (mpi->mdev_events.notifier_call)
		mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
	mpi->mdev_events.notifier_call = NULL;
	mlx5_remove_netdev_notifier(ibdev, port_num);
	mlx5_mdev_netdev_untrack(ibdev, port_num);
	spin_lock(&port->mp.mpi_lock);

	comps = mpi->mdev_refcnt;
@@ -3196,12 +3233,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
	if (err)
		goto unbind;

	err = mlx5_add_netdev_notifier(ibdev, port_num);
	if (err) {
		mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n",
			    port_num + 1);
		goto unbind;
	}
	mlx5_mdev_netdev_track(ibdev, port_num);

	mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port;
	mlx5_notifier_register(mpi->mdev, &mpi->mdev_events);
@@ -3909,9 +3941,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
		port_num = mlx5_core_native_port_num(dev->mdev) - 1;

		/* Register only for native ports */
		err = mlx5_add_netdev_notifier(dev, port_num);
		if (err)
			return err;
		mlx5_mdev_netdev_track(dev, port_num);

		err = mlx5_enable_eth(dev);
		if (err)
@@ -3920,7 +3950,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)

	return 0;
cleanup:
	mlx5_remove_netdev_notifier(dev, port_num);
	mlx5_mdev_netdev_untrack(dev, port_num);
	return err;
}

@@ -3938,7 +3968,7 @@ static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
		mlx5_disable_eth(dev);

		port_num = mlx5_core_native_port_num(dev->mdev) - 1;
		mlx5_remove_netdev_notifier(dev, port_num);
		mlx5_mdev_netdev_untrack(dev, port_num);
	}
}

+3 −0
Original line number Diff line number Diff line
@@ -832,6 +832,9 @@ struct mlx5_roce {
	rwlock_t		netdev_lock;
	struct net_device	*netdev;
	struct notifier_block	nb;
	struct netdev_net_notifier nn;
	struct notifier_block	mdev_nb;
	struct net_device	*tracking_netdev;
	atomic_t		tx_port_affinity;
	enum ib_port_state last_port_state;
	struct mlx5_ib_dev	*dev;
+2 −0
Original line number Diff line number Diff line
@@ -424,6 +424,7 @@ int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_b

	return blocking_notifier_chain_register(&events->sw_nh, nb);
}
EXPORT_SYMBOL(mlx5_blocking_notifier_register);

int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
{
@@ -431,6 +432,7 @@ int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier

	return blocking_notifier_chain_unregister(&events->sw_nh, nb);
}
EXPORT_SYMBOL(mlx5_blocking_notifier_unregister);

int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event,
				      void *data)