Commit 5c42b66d authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mlx4-aux-bus'

Petr Pavlu says:

====================
Convert mlx4 to use auxiliary bus

This series converts the mlx4 drivers to use auxiliary bus, similarly to
how mlx5 was converted [1]. The first 6 patches are preparatory changes,
the remaining 4 are the final conversion.

Initial motivation for this change was to address a problem related to
loading mlx4_en/mlx4_ib by mlx4_core using request_module_nowait(). When
doing such a load in initrd, the operation is asynchronous to any init
control and can get unexpectedly affected/interrupted by an eventual
root switch. Using an auxiliary bus leaves these module loads to udevd
which better integrates with systemd processing. [2]

General benefit is to get rid of custom interface logic and instead use
a common facility available for this task. An obvious risk is that some
new bug is introduced by the conversion.

Leon Romanovsky was kind enough to check for me that the series passes
their verification tests.

Changes since v2 [3]:
* Use 'void *' as the event param of mlx4_dispatch_event().

Changes since v1 [4]:
* Fix a missing definition of the err variable in mlx4_en_add().
* Remove not needed comments about the event type in mlx4_en_event()
  and mlx4_ib_event().

[1] https://lore.kernel.org/netdev/20201101201542.2027568-1-leon@kernel.org/
[2] https://lore.kernel.org/netdev/0a361ac2-c6bd-2b18-4841-b1b991f0635e@suse.com/
[3] https://lore.kernel.org/netdev/20230813145127.10653-1-petr.pavlu@suse.com/
[4] https://lore.kernel.org/netdev/20230804150527.6117-1-petr.pavlu@suse.com/


====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents eb660324 c138cdb8
Loading
Loading
Loading
Loading
+132 −86
Original line number Diff line number Diff line
@@ -82,6 +82,8 @@ static const char mlx4_ib_version[] =
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device,
						    u32 port_num);
static int mlx4_ib_event(struct notifier_block *this, unsigned long event,
			 void *param);

static struct workqueue_struct *wq;

@@ -125,12 +127,14 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device,
					     u32 port_num)
{
	struct mlx4_ib_dev *ibdev = to_mdev(device);
	struct net_device *dev;
	struct net_device *dev, *ret = NULL;

	rcu_read_lock();
	dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
	for_each_netdev_rcu(&init_net, dev) {
		if (dev->dev.parent != ibdev->ib_dev.dev.parent ||
		    dev->dev_port + 1 != port_num)
			continue;

	if (dev) {
		if (mlx4_is_bonded(ibdev->dev)) {
			struct net_device *upper = NULL;

@@ -143,11 +147,14 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device,
					dev = active;
			}
		}
	}

		dev_hold(dev);
		ret = dev;
		break;
	}

	rcu_read_unlock();
	return dev;
	return ret;
}

static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
@@ -2319,61 +2326,53 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
	mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
}

static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
static void mlx4_ib_scan_netdev(struct mlx4_ib_dev *ibdev,
				struct net_device *dev,
				unsigned long event)

{
	struct mlx4_ib_iboe *iboe;
	int update_qps_port = -1;
	int port;
	struct mlx4_ib_iboe *iboe = &ibdev->iboe;

	ASSERT_RTNL();

	iboe = &ibdev->iboe;
	if (dev->dev.parent != ibdev->ib_dev.dev.parent)
		return;

	spin_lock_bh(&iboe->lock);
	mlx4_foreach_ib_transport_port(port, ibdev->dev) {

		iboe->netdevs[port - 1] =
			mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);

		if (dev == iboe->netdevs[port - 1] &&
		    (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
		     event == NETDEV_UP || event == NETDEV_CHANGE))
			update_qps_port = port;
	iboe->netdevs[dev->dev_port] = event != NETDEV_UNREGISTER ? dev : NULL;

		if (dev == iboe->netdevs[port - 1] &&
		    (event == NETDEV_UP || event == NETDEV_DOWN)) {
	if (event == NETDEV_UP || event == NETDEV_DOWN) {
		enum ib_port_state port_state;
		struct ib_event ibev = { };

			if (ib_get_cached_port_state(&ibdev->ib_dev, port,
		if (ib_get_cached_port_state(&ibdev->ib_dev, dev->dev_port + 1,
					     &port_state))
				continue;
			goto iboe_out;

		if (event == NETDEV_UP &&
		    (port_state != IB_PORT_ACTIVE ||
			     iboe->last_port_state[port - 1] != IB_PORT_DOWN))
				continue;
		     iboe->last_port_state[dev->dev_port] != IB_PORT_DOWN))
			goto iboe_out;
		if (event == NETDEV_DOWN &&
		    (port_state != IB_PORT_DOWN ||
			     iboe->last_port_state[port - 1] != IB_PORT_ACTIVE))
				continue;
			iboe->last_port_state[port - 1] = port_state;
		     iboe->last_port_state[dev->dev_port] != IB_PORT_ACTIVE))
			goto iboe_out;
		iboe->last_port_state[dev->dev_port] = port_state;

		ibev.device = &ibdev->ib_dev;
			ibev.element.port_num = port;
		ibev.element.port_num = dev->dev_port + 1;
		ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE :
						  IB_EVENT_PORT_ERR;
		ib_dispatch_event(&ibev);
	}

	}
iboe_out:
	spin_unlock_bh(&iboe->lock);

	if (update_qps_port > 0)
		mlx4_ib_update_qps(ibdev, dev, update_qps_port);
	if (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
	    event == NETDEV_UP || event == NETDEV_CHANGE)
		mlx4_ib_update_qps(ibdev, dev, dev->dev_port + 1);
}

static int mlx4_ib_netdev_event(struct notifier_block *this,
@@ -2386,7 +2385,7 @@ static int mlx4_ib_netdev_event(struct notifier_block *this,
		return NOTIFY_DONE;

	ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
	mlx4_ib_scan_netdevs(ibdev, dev, event);
	mlx4_ib_scan_netdev(ibdev, dev, event);

	return NOTIFY_DONE;
}
@@ -2610,8 +2609,11 @@ static const struct ib_device_ops mlx4_ib_dev_fs_ops = {
	.destroy_flow = mlx4_ib_destroy_flow,
};

static void *mlx4_ib_add(struct mlx4_dev *dev)
static int mlx4_ib_probe(struct auxiliary_device *adev,
			 const struct auxiliary_device_id *id)
{
	struct mlx4_adev *madev = container_of(adev, struct mlx4_adev, adev);
	struct mlx4_dev *dev = madev->mdev;
	struct mlx4_ib_dev *ibdev;
	int num_ports = 0;
	int i, j;
@@ -2631,27 +2633,31 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)

	/* No point in registering a device with no ports... */
	if (num_ports == 0)
		return NULL;
		return -ENODEV;

	ibdev = ib_alloc_device(mlx4_ib_dev, ib_dev);
	if (!ibdev) {
		dev_err(&dev->persist->pdev->dev,
			"Device struct alloc failed\n");
		return NULL;
		return -ENOMEM;
	}

	iboe = &ibdev->iboe;

	if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
	err = mlx4_pd_alloc(dev, &ibdev->priv_pdn);
	if (err)
		goto err_dealloc;

	if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
	err = mlx4_uar_alloc(dev, &ibdev->priv_uar);
	if (err)
		goto err_pd;

	ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
				 PAGE_SIZE);
	if (!ibdev->uar_map)
	if (!ibdev->uar_map) {
		err = -ENOMEM;
		goto err_uar;
	}
	MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);

	ibdev->dev = dev;
@@ -2695,7 +2701,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)

	spin_lock_init(&iboe->lock);

	if (init_node_data(ibdev))
	err = init_node_data(ibdev);
	if (err)
		goto err_map;
	mlx4_init_sl2vl_tbl(ibdev);

@@ -2727,6 +2734,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
		new_counter_index = kmalloc(sizeof(*new_counter_index),
					    GFP_KERNEL);
		if (!new_counter_index) {
			err = -ENOMEM;
			if (allocated)
				mlx4_counter_free(ibdev->dev, counter_index);
			goto err_counter;
@@ -2744,8 +2752,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
			new_counter_index =
					kmalloc(sizeof(struct counter_index),
						GFP_KERNEL);
			if (!new_counter_index)
			if (!new_counter_index) {
				err = -ENOMEM;
				goto err_counter;
			}
			new_counter_index->index = counter_index;
			new_counter_index->allocated = 0;
			list_add_tail(&new_counter_index->list,
@@ -2774,8 +2784,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)

		ibdev->ib_uc_qpns_bitmap = bitmap_alloc(ibdev->steer_qpn_count,
							GFP_KERNEL);
		if (!ibdev->ib_uc_qpns_bitmap)
		if (!ibdev->ib_uc_qpns_bitmap) {
			err = -ENOMEM;
			goto err_steer_qp_release;
		}

		if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) {
			bitmap_zero(ibdev->ib_uc_qpns_bitmap,
@@ -2795,17 +2807,21 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
	for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
		atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);

	if (mlx4_ib_alloc_diag_counters(ibdev))
	err = mlx4_ib_alloc_diag_counters(ibdev);
	if (err)
		goto err_steer_free_bitmap;

	if (ib_register_device(&ibdev->ib_dev, "mlx4_%d",
			       &dev->persist->pdev->dev))
	err = ib_register_device(&ibdev->ib_dev, "mlx4_%d",
				 &dev->persist->pdev->dev);
	if (err)
		goto err_diag_counters;

	if (mlx4_ib_mad_init(ibdev))
	err = mlx4_ib_mad_init(ibdev);
	if (err)
		goto err_reg;

	if (mlx4_ib_init_sriov(ibdev))
	err = mlx4_ib_init_sriov(ibdev);
	if (err)
		goto err_mad;

	if (!iboe->nb.notifier_call) {
@@ -2839,7 +2855,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
				do_slave_init(ibdev, j, 1);
		}
	}
	return ibdev;

	/* register mlx4 core notifier */
	ibdev->mlx_nb.notifier_call = mlx4_ib_event;
	err = mlx4_register_event_notifier(dev, &ibdev->mlx_nb);
	WARN(err, "failed to register mlx4 event notifier (%d)", err);

	auxiliary_set_drvdata(adev, ibdev);
	return 0;

err_notif:
	if (ibdev->iboe.nb.notifier_call) {
@@ -2883,7 +2906,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
err_dealloc:
	ib_dealloc_device(&ibdev->ib_dev);

	return NULL;
	return err;
}

int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
@@ -2950,12 +2973,16 @@ int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
	return err;
}

static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
static void mlx4_ib_remove(struct auxiliary_device *adev)
{
	struct mlx4_ib_dev *ibdev = ibdev_ptr;
	struct mlx4_adev *madev = container_of(adev, struct mlx4_adev, adev);
	struct mlx4_dev *dev = madev->mdev;
	struct mlx4_ib_dev *ibdev = auxiliary_get_drvdata(adev);
	int p;
	int i;

	mlx4_unregister_event_notifier(dev, &ibdev->mlx_nb);

	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
		devlink_port_type_clear(mlx4_get_devlink_port(dev, i));
	ibdev->ib_active = false;
@@ -3176,11 +3203,13 @@ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
	}
}

static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
			  enum mlx4_dev_event event, unsigned long param)
static int mlx4_ib_event(struct notifier_block *this, unsigned long event,
			 void *param)
{
	struct mlx4_ib_dev *ibdev =
		container_of(this, struct mlx4_ib_dev, mlx_nb);
	struct mlx4_dev *dev = ibdev->dev;
	struct ib_event ibev;
	struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
	struct mlx4_eqe *eqe = NULL;
	struct ib_event_work *ew;
	int p = 0;
@@ -3190,22 +3219,28 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
	    (event == MLX4_DEV_EVENT_PORT_DOWN))) {
		ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
		if (!ew)
			return;
			return NOTIFY_DONE;
		INIT_WORK(&ew->work, handle_bonded_port_state_event);
		ew->ib_dev = ibdev;
		queue_work(wq, &ew->work);
		return;
		return NOTIFY_DONE;
	}

	if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
	switch (event) {
	case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
		break;
	case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
		eqe = (struct mlx4_eqe *)param;
	else
		p = (int) param;
		break;
	default:
		p = *(int *)param;
		break;
	}

	switch (event) {
	case MLX4_DEV_EVENT_PORT_UP:
		if (p > ibdev->num_ports)
			return;
			return NOTIFY_DONE;
		if (!mlx4_is_slave(dev) &&
		    rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
			IB_LINK_LAYER_INFINIBAND) {
@@ -3220,7 +3255,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,

	case MLX4_DEV_EVENT_PORT_DOWN:
		if (p > ibdev->num_ports)
			return;
			return NOTIFY_DONE;
		ibev.event = IB_EVENT_PORT_ERR;
		break;

@@ -3233,7 +3268,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
	case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
		ew = kmalloc(sizeof *ew, GFP_ATOMIC);
		if (!ew)
			return;
			return NOTIFY_DONE;

		INIT_WORK(&ew->work, handle_port_mgmt_change_event);
		memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
@@ -3243,7 +3278,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
			queue_work(wq, &ew->work);
		else
			handle_port_mgmt_change_event(&ew->work);
		return;
		return NOTIFY_DONE;

	case MLX4_DEV_EVENT_SLAVE_INIT:
		/* here, p is the slave id */
@@ -3259,7 +3294,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
								       1);
			}
		}
		return;
		return NOTIFY_DONE;

	case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
		if (mlx4_is_master(dev)) {
@@ -3275,22 +3310,33 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
		}
		/* here, p is the slave id */
		do_slave_init(ibdev, p, 0);
		return;
		return NOTIFY_DONE;

	default:
		return;
		return NOTIFY_DONE;
	}

	ibev.device	      = ibdev_ptr;
	ibev.device	      = &ibdev->ib_dev;
	ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;

	ib_dispatch_event(&ibev);
	return NOTIFY_DONE;
}

static struct mlx4_interface mlx4_ib_interface = {
	.add		= mlx4_ib_add,
static const struct auxiliary_device_id mlx4_ib_id_table[] = {
	{ .name = MLX4_ADEV_NAME ".ib" },
	{},
};

MODULE_DEVICE_TABLE(auxiliary, mlx4_ib_id_table);

static struct mlx4_adrv mlx4_ib_adrv = {
	.adrv = {
		.name	= "ib",
		.probe	= mlx4_ib_probe,
		.remove	= mlx4_ib_remove,
	.event		= mlx4_ib_event,
		.id_table = mlx4_ib_id_table,
	},
	.protocol	= MLX4_PROT_IB_IPV6,
	.flags		= MLX4_INTFF_BONDING
};
@@ -3315,7 +3361,7 @@ static int __init mlx4_ib_init(void)
	if (err)
		goto clean_cm;

	err = mlx4_register_interface(&mlx4_ib_interface);
	err = mlx4_register_auxiliary_driver(&mlx4_ib_adrv);
	if (err)
		goto clean_mcg;

@@ -3337,7 +3383,7 @@ static int __init mlx4_ib_init(void)

static void __exit mlx4_ib_cleanup(void)
{
	mlx4_unregister_interface(&mlx4_ib_interface);
	mlx4_unregister_auxiliary_driver(&mlx4_ib_adrv);
	mlx4_ib_mcg_destroy();
	mlx4_ib_cm_destroy();
	mlx4_ib_qp_event_cleanup();
+2 −0
Original line number Diff line number Diff line
@@ -38,6 +38,7 @@
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/idr.h>
#include <linux/notifier.h>

#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
@@ -644,6 +645,7 @@ struct mlx4_ib_dev {
	spinlock_t		reset_flow_resource_lock;
	struct list_head		qp_list;
	struct mlx4_ib_diag_counters diag_counters[MLX4_DIAG_COUNTERS_TYPES];
	struct notifier_block	mlx_nb;
};

struct ib_event_work {
+1 −0
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@ config MLX4_EN_DCB
config MLX4_CORE
	tristate
	depends on PCI
	select AUXILIARY_BUS
	select NET_DEVLINK
	default n

+1 −1
Original line number Diff line number Diff line
@@ -194,7 +194,7 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
	mutex_unlock(&persist->device_state_mutex);

	/* At that step HW was already reset, now notify clients */
	mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
	mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, NULL);
	mlx4_cmd_wake_completions(dev);
	return;

+2 −2
Original line number Diff line number Diff line
@@ -2113,7 +2113,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
		if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd)
			goto inform_slave_state;

		mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, slave);
		mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, &slave);

		/* write the version in the event field */
		reply |= mlx4_comm_get_version();
@@ -2152,7 +2152,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
		if (mlx4_master_activate_admin_state(priv, slave))
				goto reset_slave;
		slave_state[slave].active = true;
		mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave);
		mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, &slave);
		break;
	case MLX4_COMM_CMD_VHCR_POST:
		if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) &&
Loading