Commit 93f82444 authored by Leon Romanovsky's avatar Leon Romanovsky
Browse files

RDMA/mlx5: Convert mlx5_ib to use auxiliary bus



The conversion to auxiliary bus solves long standing issue with
existing mlx5_ib<->mlx5_core coupling. It required to have both
modules in initramfs if one of them needed for the boot.

Signed-off-by: default avatarLeon Romanovsky <leonro@nvidia.com>
parent 912cebf4
Loading
Loading
Loading
Loading
+58 −18
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
	const struct mlx5_ib_profile *profile;
	struct mlx5_ib_dev *ibdev;
	int vport_index;
	int ret;

	if (rep->vport == MLX5_VPORT_UPLINK)
		profile = &raw_eth_profile;
@@ -46,8 +47,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
	ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port),
			      GFP_KERNEL);
	if (!ibdev->port) {
		ib_dealloc_device(&ibdev->ib_dev);
		return -ENOMEM;
		ret = -ENOMEM;
		goto fail_port;
	}

	ibdev->is_rep = true;
@@ -58,12 +59,19 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
	ibdev->mdev = dev;
	ibdev->num_ports = num_ports;

	if (!__mlx5_ib_add(ibdev, profile))
		return -EINVAL;
	ret = __mlx5_ib_add(ibdev, profile);
	if (ret)
		goto fail_add;

	rep->rep_data[REP_IB].priv = ibdev;

	return 0;

fail_add:
	kfree(ibdev->port);
fail_port:
	ib_dealloc_device(&ibdev->ib_dev);
	return ret;
}

static void
@@ -94,20 +102,6 @@ static const struct mlx5_eswitch_rep_ops rep_ops = {
	.get_proto_dev = mlx5_ib_vport_get_proto_dev,
};

void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev)
{
	struct mlx5_eswitch *esw = mdev->priv.eswitch;

	mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
}

void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev)
{
	struct mlx5_eswitch *esw = mdev->priv.eswitch;

	mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
}

u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
{
	return mlx5_eswitch_mode(esw);
@@ -154,3 +148,49 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
	return mlx5_eswitch_add_send_to_vport_rule(esw, rep->vport,
						   sq->base.mqp.qpn);
}

static int mlx5r_rep_probe(struct auxiliary_device *adev,
			   const struct auxiliary_device_id *id)
{
	struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
	struct mlx5_core_dev *mdev = idev->mdev;
	struct mlx5_eswitch *esw;

	esw = mdev->priv.eswitch;
	mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
	return 0;
}

static void mlx5r_rep_remove(struct auxiliary_device *adev)
{
	struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
	struct mlx5_core_dev *mdev = idev->mdev;
	struct mlx5_eswitch *esw;

	esw = mdev->priv.eswitch;
	mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
}

static const struct auxiliary_device_id mlx5r_rep_id_table[] = {
	{ .name = MLX5_ADEV_NAME ".rdma-rep", },
	{},
};

MODULE_DEVICE_TABLE(auxiliary, mlx5r_rep_id_table);

static struct auxiliary_driver mlx5r_rep_driver = {
	.name = "rep",
	.probe = mlx5r_rep_probe,
	.remove = mlx5r_rep_remove,
	.id_table = mlx5r_rep_id_table,
};

int mlx5r_rep_init(void)
{
	return auxiliary_driver_register(&mlx5r_rep_driver);
}

void mlx5r_rep_cleanup(void)
{
	auxiliary_driver_unregister(&mlx5r_rep_driver);
}
+4 −4
Original line number Diff line number Diff line
@@ -18,8 +18,8 @@ struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
					   u16 vport_num);
void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev);
void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev);
int mlx5r_rep_init(void);
void mlx5r_rep_cleanup(void);
struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
						   struct mlx5_ib_sq *sq,
						   u16 port);
@@ -51,8 +51,8 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
	return NULL;
}

static inline void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) {}
static inline void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) {}
static inline int mlx5r_rep_init(void) { return 0; }
static inline void mlx5r_rep_cleanup(void) {}
static inline
struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
						   struct mlx5_ib_sq *sq,
+97 −56
Original line number Diff line number Diff line
@@ -4593,7 +4593,7 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
	ib_dealloc_device(&dev->ib_dev);
}

void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
int __mlx5_ib_add(struct mlx5_ib_dev *dev,
		  const struct mlx5_ib_profile *profile)
{
	int err;
@@ -4610,13 +4610,16 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
	}

	dev->ib_active = true;

	return dev;
	return 0;

err_out:
	__mlx5_ib_remove(dev, profile, i);

	return NULL;
	/* Clean up stages which were initialized */
	while (i) {
		i--;
		if (profile->stage[i].cleanup)
			profile->stage[i].cleanup(dev);
	}
	return -ENOMEM;
}

static const struct mlx5_ib_profile pf_profile = {
@@ -4739,8 +4742,11 @@ const struct mlx5_ib_profile raw_eth_profile = {
		     NULL),
};

static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
static int mlx5r_mp_probe(struct auxiliary_device *adev,
			  const struct auxiliary_device_id *id)
{
	struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
	struct mlx5_core_dev *mdev = idev->mdev;
	struct mlx5_ib_multiport_info *mpi;
	struct mlx5_ib_dev *dev;
	bool bound = false;
@@ -4748,15 +4754,14 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)

	mpi = kzalloc(sizeof(*mpi), GFP_KERNEL);
	if (!mpi)
		return NULL;
		return -ENOMEM;

	mpi->mdev = mdev;

	err = mlx5_query_nic_vport_system_image_guid(mdev,
						     &mpi->sys_image_guid);
	if (err) {
		kfree(mpi);
		return NULL;
		return err;
	}

	mutex_lock(&mlx5_ib_multiport_mutex);
@@ -4777,40 +4782,46 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
	}
	mutex_unlock(&mlx5_ib_multiport_mutex);

	return mpi;
	dev_set_drvdata(&adev->dev, mpi);
	return 0;
}

static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
static void mlx5r_mp_remove(struct auxiliary_device *adev)
{
	struct mlx5_ib_multiport_info *mpi;

	mpi = dev_get_drvdata(&adev->dev);
	mutex_lock(&mlx5_ib_multiport_mutex);
	if (mpi->ibdev)
		mlx5_ib_unbind_slave_port(mpi->ibdev, mpi);
	list_del(&mpi->list);
	mutex_unlock(&mlx5_ib_multiport_mutex);
	kfree(mpi);
}

static int mlx5r_probe(struct auxiliary_device *adev,
		       const struct auxiliary_device_id *id)
{
	struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
	struct mlx5_core_dev *mdev = idev->mdev;
	const struct mlx5_ib_profile *profile;
	int port_type_cap, num_ports, ret;
	enum rdma_link_layer ll;
	struct mlx5_ib_dev *dev;
	int port_type_cap;
	int num_ports;

	if (MLX5_ESWITCH_MANAGER(mdev) &&
	    mlx5_ib_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) {
		if (!mlx5_core_mp_enabled(mdev))
			mlx5_ib_register_vport_reps(mdev);
		return mdev;
	}

	port_type_cap = MLX5_CAP_GEN(mdev, port_type);
	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);

	if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET)
		return mlx5_ib_add_slave_port(mdev);

	num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
			MLX5_CAP_GEN(mdev, num_vhca_ports));
	dev = ib_alloc_device(mlx5_ib_dev, ib_dev);
	if (!dev)
		return NULL;
		return -ENOMEM;
	dev->port = kcalloc(num_ports, sizeof(*dev->port),
			     GFP_KERNEL);
	if (!dev->port) {
		ib_dealloc_device(&dev->ib_dev);
		return NULL;
		return -ENOMEM;
	}

	dev->mdev = mdev;
@@ -4821,38 +4832,50 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
	else
		profile = &pf_profile;

	return __mlx5_ib_add(dev, profile);
	ret = __mlx5_ib_add(dev, profile);
	if (ret) {
		kfree(dev->port);
		ib_dealloc_device(&dev->ib_dev);
		return ret;
	}

	dev_set_drvdata(&adev->dev, dev);
	return 0;
}

static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
static void mlx5r_remove(struct auxiliary_device *adev)
{
	struct mlx5_ib_multiport_info *mpi;
	struct mlx5_ib_dev *dev;

	if (MLX5_ESWITCH_MANAGER(mdev) && context == mdev) {
		mlx5_ib_unregister_vport_reps(mdev);
		return;
	dev = dev_get_drvdata(&adev->dev);
	__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}

	if (mlx5_core_is_mp_slave(mdev)) {
		mpi = context;
		mutex_lock(&mlx5_ib_multiport_mutex);
		if (mpi->ibdev)
			mlx5_ib_unbind_slave_port(mpi->ibdev, mpi);
		list_del(&mpi->list);
		mutex_unlock(&mlx5_ib_multiport_mutex);
		kfree(mpi);
		return;
	}
static const struct auxiliary_device_id mlx5r_mp_id_table[] = {
	{ .name = MLX5_ADEV_NAME ".multiport", },
	{},
};

	dev = context;
	__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}
static const struct auxiliary_device_id mlx5r_id_table[] = {
	{ .name = MLX5_ADEV_NAME ".rdma", },
	{},
};

static struct mlx5_interface mlx5_ib_interface = {
	.add            = mlx5_ib_add,
	.remove         = mlx5_ib_remove,
	.protocol	= MLX5_INTERFACE_PROTOCOL_IB,
MODULE_DEVICE_TABLE(auxiliary, mlx5r_mp_id_table);
MODULE_DEVICE_TABLE(auxiliary, mlx5r_id_table);

static struct auxiliary_driver mlx5r_mp_driver = {
	.name = "multiport",
	.probe = mlx5r_mp_probe,
	.remove = mlx5r_mp_remove,
	.id_table = mlx5r_mp_id_table,
};

static struct auxiliary_driver mlx5r_driver = {
	.name = "rdma",
	.probe = mlx5r_probe,
	.remove = mlx5r_remove,
	.id_table = mlx5r_id_table,
};

unsigned long mlx5_ib_get_xlt_emergency_page(void)
@@ -4868,7 +4891,7 @@ void mlx5_ib_put_xlt_emergency_page(void)

static int __init mlx5_ib_init(void)
{
	int err;
	int ret;

	xlt_emergency_page = __get_free_page(GFP_KERNEL);
	if (!xlt_emergency_page)
@@ -4883,15 +4906,33 @@ static int __init mlx5_ib_init(void)
	}

	mlx5_ib_odp_init();
	ret = mlx5r_rep_init();
	if (ret)
		goto rep_err;
	ret = auxiliary_driver_register(&mlx5r_mp_driver);
	if (ret)
		goto mp_err;
	ret = auxiliary_driver_register(&mlx5r_driver);
	if (ret)
		goto drv_err;
	return 0;

	err = mlx5_register_interface(&mlx5_ib_interface);

	return err;
drv_err:
	auxiliary_driver_unregister(&mlx5r_mp_driver);
mp_err:
	mlx5r_rep_cleanup();
rep_err:
	destroy_workqueue(mlx5_ib_event_wq);
	free_page((unsigned long)xlt_emergency_page);
	return ret;
}

static void __exit mlx5_ib_cleanup(void)
{
	mlx5_unregister_interface(&mlx5_ib_interface);
	auxiliary_driver_unregister(&mlx5r_driver);
	auxiliary_driver_unregister(&mlx5r_mp_driver);
	mlx5r_rep_cleanup();

	destroy_workqueue(mlx5_ib_event_wq);
	mutex_destroy(&xlt_emergency_page_mutex);
	free_page(xlt_emergency_page);
+2 −2
Original line number Diff line number Diff line
@@ -1317,7 +1317,7 @@ extern const struct mmu_interval_notifier_ops mlx5_mn_ops;
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
		      const struct mlx5_ib_profile *profile,
		      int stage);
void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
int __mlx5_ib_add(struct mlx5_ib_dev *dev,
		  const struct mlx5_ib_profile *profile);

int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
+66 −0
Original line number Diff line number Diff line
@@ -144,16 +144,82 @@ static bool is_vnet_supported(struct mlx5_core_dev *dev)
	return true;
}

static bool is_ib_rep_supported(struct mlx5_core_dev *dev)
{
	if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
		return false;

	if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV)
		return false;

	if (!is_eth_rep_supported(dev))
		return false;

	if (!MLX5_ESWITCH_MANAGER(dev))
		return false;

	if (mlx5_eswitch_mode(dev->priv.eswitch) != MLX5_ESWITCH_OFFLOADS)
		return false;

	if (mlx5_core_mp_enabled(dev))
		return false;

	return true;
}

static bool is_mp_supported(struct mlx5_core_dev *dev)
{
	if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
		return false;

	if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV)
		return false;

	if (is_ib_rep_supported(dev))
		return false;

	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
		return false;

	if (!mlx5_core_is_mp_slave(dev))
		return false;

	return true;
}

static bool is_ib_supported(struct mlx5_core_dev *dev)
{
	if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
		return false;

	if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV)
		return false;

	if (is_ib_rep_supported(dev))
		return false;

	if (is_mp_supported(dev))
		return false;

	return true;
}

static const struct mlx5_adev_device {
	const char *suffix;
	bool (*is_supported)(struct mlx5_core_dev *dev);
} mlx5_adev_devices[] = {
	[MLX5_INTERFACE_PROTOCOL_VDPA] = { .suffix = "vnet",
					   .is_supported = &is_vnet_supported },
	[MLX5_INTERFACE_PROTOCOL_IB] = { .suffix = "rdma",
					 .is_supported = &is_ib_supported },
	[MLX5_INTERFACE_PROTOCOL_ETH] = { .suffix = "eth",
					  .is_supported = &is_eth_supported },
	[MLX5_INTERFACE_PROTOCOL_ETH_REP] = { .suffix = "eth-rep",
					   .is_supported = &is_eth_rep_supported },
	[MLX5_INTERFACE_PROTOCOL_IB_REP] = { .suffix = "rdma-rep",
					   .is_supported = &is_ib_rep_supported },
	[MLX5_INTERFACE_PROTOCOL_MPIB] = { .suffix = "multiport",
					   .is_supported = &is_mp_supported },
};

int mlx5_adev_idx_alloc(void)
Loading