Commit c80a0c52 authored by Leon Romanovsky's avatar Leon Romanovsky Committed by Jason Gunthorpe
Browse files

RDMA/cma: Add missing error handling of listen_id

Don't silently continue if rdma_listen() fails but destroy previously
created CM_ID and return an error to the caller.

Fixes: d02d1f53 ("RDMA/cma: Fix deadlock destroying listen requests")
Link: https://lore.kernel.org/r/20201104144008.3808124-5-leon@kernel.org


Signed-off-by: default avatarLeon Romanovsky <leonro@nvidia.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 0413755c
Loading
Loading
Loading
Loading
+101 −74
Original line number Diff line number Diff line
@@ -2495,7 +2495,7 @@ static int cma_listen_handler(struct rdma_cm_id *id,
	return id_priv->id.event_handler(id, event);
}

static void cma_listen_on_dev(struct rdma_id_private *id_priv,
static int cma_listen_on_dev(struct rdma_id_private *id_priv,
			     struct cma_device *cma_dev)
{
	struct rdma_id_private *dev_id_priv;
@@ -2505,13 +2505,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
	lockdep_assert_held(&lock);

	if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
		return;
		return 0;

	dev_id_priv =
		__rdma_create_id(net, cma_listen_handler, id_priv,
				 id_priv->id.ps, id_priv->id.qp_type, id_priv);
	if (IS_ERR(dev_id_priv))
		return;
		return PTR_ERR(dev_id_priv);

	dev_id_priv->state = RDMA_CM_ADDR_BOUND;
	memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
@@ -2527,19 +2527,34 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,

	ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
	if (ret)
		dev_warn(&cma_dev->device->dev,
			 "RDMA CMA: cma_listen_on_dev, error %d\n", ret);
		goto err_listen;
	return 0;
err_listen:
	list_del(&id_priv->listen_list);
	dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret);
	rdma_destroy_id(&dev_id_priv->id);
	return ret;
}

static void cma_listen_on_all(struct rdma_id_private *id_priv)
static int cma_listen_on_all(struct rdma_id_private *id_priv)
{
	struct cma_device *cma_dev;
	int ret;

	mutex_lock(&lock);
	list_add_tail(&id_priv->list, &listen_any_list);
	list_for_each_entry(cma_dev, &dev_list, list)
		cma_listen_on_dev(id_priv, cma_dev);
	list_for_each_entry(cma_dev, &dev_list, list) {
		ret = cma_listen_on_dev(id_priv, cma_dev);
		if (ret)
			goto err_listen;
	}
	mutex_unlock(&lock);
	return 0;

err_listen:
	list_del(&id_priv->list);
	mutex_unlock(&lock);
	return ret;
}

void rdma_set_service_type(struct rdma_cm_id *id, int tos)
@@ -3692,8 +3707,11 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
			ret = -ENOSYS;
			goto err;
		}
	} else
		cma_listen_on_all(id_priv);
	} else {
		ret = cma_listen_on_all(id_priv);
		if (ret)
			goto err;
	}

	return 0;
err:
@@ -4745,69 +4763,6 @@ static struct notifier_block cma_nb = {
	.notifier_call = cma_netdev_callback
};

static int cma_add_one(struct ib_device *device)
{
	struct cma_device *cma_dev;
	struct rdma_id_private *id_priv;
	unsigned int i;
	unsigned long supported_gids = 0;
	int ret;

	cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
	if (!cma_dev)
		return -ENOMEM;

	cma_dev->device = device;
	cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
					    sizeof(*cma_dev->default_gid_type),
					    GFP_KERNEL);
	if (!cma_dev->default_gid_type) {
		ret = -ENOMEM;
		goto free_cma_dev;
	}

	cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
					    sizeof(*cma_dev->default_roce_tos),
					    GFP_KERNEL);
	if (!cma_dev->default_roce_tos) {
		ret = -ENOMEM;
		goto free_gid_type;
	}

	rdma_for_each_port (device, i) {
		supported_gids = roce_gid_type_mask_support(device, i);
		WARN_ON(!supported_gids);
		if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE))
			cma_dev->default_gid_type[i - rdma_start_port(device)] =
				CMA_PREFERRED_ROCE_GID_TYPE;
		else
			cma_dev->default_gid_type[i - rdma_start_port(device)] =
				find_first_bit(&supported_gids, BITS_PER_LONG);
		cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0;
	}

	init_completion(&cma_dev->comp);
	refcount_set(&cma_dev->refcount, 1);
	INIT_LIST_HEAD(&cma_dev->id_list);
	ib_set_client_data(device, &cma_client, cma_dev);

	mutex_lock(&lock);
	list_add_tail(&cma_dev->list, &dev_list);
	list_for_each_entry(id_priv, &listen_any_list, list)
		cma_listen_on_dev(id_priv, cma_dev);
	mutex_unlock(&lock);

	trace_cm_add_one(device);
	return 0;

free_gid_type:
	kfree(cma_dev->default_gid_type);

free_cma_dev:
	kfree(cma_dev);
	return ret;
}

static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
{
	struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
@@ -4870,6 +4825,78 @@ static void cma_process_remove(struct cma_device *cma_dev)
	wait_for_completion(&cma_dev->comp);
}

static int cma_add_one(struct ib_device *device)
{
	struct cma_device *cma_dev;
	struct rdma_id_private *id_priv;
	unsigned int i;
	unsigned long supported_gids = 0;
	int ret;

	cma_dev = kmalloc(sizeof(*cma_dev), GFP_KERNEL);
	if (!cma_dev)
		return -ENOMEM;

	cma_dev->device = device;
	cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
					    sizeof(*cma_dev->default_gid_type),
					    GFP_KERNEL);
	if (!cma_dev->default_gid_type) {
		ret = -ENOMEM;
		goto free_cma_dev;
	}

	cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
					    sizeof(*cma_dev->default_roce_tos),
					    GFP_KERNEL);
	if (!cma_dev->default_roce_tos) {
		ret = -ENOMEM;
		goto free_gid_type;
	}

	rdma_for_each_port (device, i) {
		supported_gids = roce_gid_type_mask_support(device, i);
		WARN_ON(!supported_gids);
		if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE))
			cma_dev->default_gid_type[i - rdma_start_port(device)] =
				CMA_PREFERRED_ROCE_GID_TYPE;
		else
			cma_dev->default_gid_type[i - rdma_start_port(device)] =
				find_first_bit(&supported_gids, BITS_PER_LONG);
		cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0;
	}

	init_completion(&cma_dev->comp);
	refcount_set(&cma_dev->refcount, 1);
	INIT_LIST_HEAD(&cma_dev->id_list);
	ib_set_client_data(device, &cma_client, cma_dev);

	mutex_lock(&lock);
	list_add_tail(&cma_dev->list, &dev_list);
	list_for_each_entry(id_priv, &listen_any_list, list) {
		ret = cma_listen_on_dev(id_priv, cma_dev);
		if (ret)
			goto free_listen;
	}
	mutex_unlock(&lock);

	trace_cm_add_one(device);
	return 0;

free_listen:
	list_del(&cma_dev->list);
	mutex_unlock(&lock);

	cma_process_remove(cma_dev);
	kfree(cma_dev->default_roce_tos);
free_gid_type:
	kfree(cma_dev->default_gid_type);

free_cma_dev:
	kfree(cma_dev);
	return ret;
}

static void cma_remove_one(struct ib_device *device, void *client_data)
{
	struct cma_device *cma_dev = client_data;