Commit 4df22ca8 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull rdma fixes from Jason Gunthorpe:
 "A few recent regressions in rxe's multicast code, and some old driver
  bugs:

   - Error case unwind bug in rxe for rkeys

   - Dot not call netdev functions under a spinlock in rxe multicast
     code

   - Use the proper BH lock type in rxe multicast code

   - Fix idrma deadlock and crash

   - Add a missing flush to drain irdma QPs when in error

   - Fix high userspace latency in irdma during destroy due to
     synchronize_rcu()

   - Rare race in siw MPA processing"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/rxe: Change mcg_lock to a _bh lock
  RDMA/rxe: Do not call  dev_mc_add/del() under a spinlock
  RDMA/siw: Fix a condition race issue in MPA request processing
  RDMA/irdma: Fix possible crash due to NULL netdev in notifier
  RDMA/irdma: Reduce iWARP QP destroy time
  RDMA/irdma: Flush iWARP QP if modified to ERR from RTR state
  RDMA/rxe: Recheck the MR in when generating a READ reply
  RDMA/irdma: Fix deadlock in irdma_cleanup_cm_core()
  RDMA/rxe: Fix "Replace mr by rkey in responder resources"
parents 64267926 bfdc0edd
Loading
Loading
Loading
Loading
+10 −23
Original line number Diff line number Diff line
@@ -2308,10 +2308,8 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev,
	return NULL;
}

static void irdma_cm_node_free_cb(struct rcu_head *rcu_head)
static void irdma_destroy_connection(struct irdma_cm_node *cm_node)
{
	struct irdma_cm_node *cm_node =
			    container_of(rcu_head, struct irdma_cm_node, rcu_head);
	struct irdma_cm_core *cm_core = cm_node->cm_core;
	struct irdma_qp *iwqp;
	struct irdma_cm_info nfo;
@@ -2359,7 +2357,6 @@ static void irdma_cm_node_free_cb(struct rcu_head *rcu_head)
	}

	cm_core->cm_free_ah(cm_node);
	kfree(cm_node);
}

/**
@@ -2387,8 +2384,9 @@ void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node)

	spin_unlock_irqrestore(&cm_core->ht_lock, flags);

	/* wait for all list walkers to exit their grace period */
	call_rcu(&cm_node->rcu_head, irdma_cm_node_free_cb);
	irdma_destroy_connection(cm_node);

	kfree_rcu(cm_node, rcu_head);
}

/**
@@ -3246,15 +3244,10 @@ int irdma_setup_cm_core(struct irdma_device *iwdev, u8 rdma_ver)
 */
void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core)
{
	unsigned long flags;

	if (!cm_core)
		return;

	spin_lock_irqsave(&cm_core->ht_lock, flags);
	if (timer_pending(&cm_core->tcp_timer))
	del_timer_sync(&cm_core->tcp_timer);
	spin_unlock_irqrestore(&cm_core->ht_lock, flags);

	destroy_workqueue(cm_core->event_wq);
	cm_core->dev->ws_reset(&cm_core->iwdev->vsi);
@@ -3467,12 +3460,6 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
	}

	cm_id = iwqp->cm_id;
	/* make sure we havent already closed this connection */
	if (!cm_id) {
		spin_unlock_irqrestore(&iwqp->lock, flags);
		return;
	}

	original_hw_tcp_state = iwqp->hw_tcp_state;
	original_ibqp_state = iwqp->ibqp_state;
	last_ae = iwqp->last_aeq;
@@ -3494,11 +3481,11 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
			disconn_status = -ECONNRESET;
	}

	if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
	if (original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
	    original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
	    last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
	    last_ae == IRDMA_AE_BAD_CLOSE ||
	     last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset)) {
	    last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset || !cm_id) {
		issue_close = 1;
		iwqp->cm_id = NULL;
		qp->term_flags = 0;
+9 −12
Original line number Diff line number Diff line
@@ -258,18 +258,16 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event,
	u32 local_ipaddr[4] = {};
	bool ipv4 = true;

	switch (event) {
	case NETEVENT_NEIGH_UPDATE:
		real_dev = rdma_vlan_dev_real_dev(netdev);
		if (!real_dev)
			real_dev = netdev;

		ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
		if (!ibdev)
			return NOTIFY_DONE;

		iwdev = to_iwdev(ibdev);

	switch (event) {
	case NETEVENT_NEIGH_UPDATE:
		p = (__be32 *)neigh->primary_key;
		if (neigh->tbl->family == AF_INET6) {
			ipv4 = false;
@@ -290,13 +288,12 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event,
			irdma_manage_arp_cache(iwdev->rf, neigh->ha,
					       local_ipaddr, ipv4,
					       IRDMA_ARP_DELETE);
		ib_device_put(ibdev);
		break;
	default:
		break;
	}

	ib_device_put(ibdev);

	return NOTIFY_DONE;
}

+2 −2
Original line number Diff line number Diff line
@@ -1618,13 +1618,13 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,

	if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) {
		if (dont_wait) {
			if (iwqp->cm_id && iwqp->hw_tcp_state) {
			if (iwqp->hw_tcp_state) {
				spin_lock_irqsave(&iwqp->lock, flags);
				iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED;
				iwqp->last_aeq = IRDMA_AE_RESET_SENT;
				spin_unlock_irqrestore(&iwqp->lock, flags);
				irdma_cm_disconn(iwqp);
			}
			irdma_cm_disconn(iwqp);
		} else {
			int close_timer_started;

+35 −46
Original line number Diff line number Diff line
@@ -38,13 +38,13 @@ static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
}

/**
 * rxe_mcast_delete - delete multicast address from rxe device
 * rxe_mcast_del - delete multicast address from rxe device
 * @rxe: rxe device object
 * @mgid: multicast address as a gid
 *
 * Returns 0 on success else an error
 */
static int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
{
	unsigned char ll_addr[ETH_ALEN];

@@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
{
	struct rxe_mcg *mcg;
	unsigned long flags;

	spin_lock_irqsave(&rxe->mcg_lock, flags);
	spin_lock_bh(&rxe->mcg_lock);
	mcg = __rxe_lookup_mcg(rxe, mgid);
	spin_unlock_irqrestore(&rxe->mcg_lock, flags);
	spin_unlock_bh(&rxe->mcg_lock);

	return mcg;
}
@@ -159,17 +158,10 @@ struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
 * @mcg: new mcg object
 *
 * Context: caller should hold rxe->mcg lock
 * Returns: 0 on success else an error
 */
static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
			   struct rxe_mcg *mcg)
{
	int err;

	err = rxe_mcast_add(rxe, mgid);
	if (unlikely(err))
		return err;

	kref_init(&mcg->ref_cnt);
	memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
	INIT_LIST_HEAD(&mcg->qp_list);
@@ -184,8 +176,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
	 */
	kref_get(&mcg->ref_cnt);
	__rxe_insert_mcg(mcg);

	return 0;
}

/**
@@ -198,7 +188,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
{
	struct rxe_mcg *mcg, *tmp;
	unsigned long flags;
	int err;

	if (rxe->attr.max_mcast_grp == 0)
@@ -209,36 +198,38 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
	if (mcg)
		return mcg;

	/* check to see if we have reached limit */
	if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) {
		err = -ENOMEM;
		goto err_dec;
	}

	/* speculative alloc of new mcg */
	mcg = kzalloc(sizeof(*mcg), GFP_KERNEL);
	if (!mcg)
		return ERR_PTR(-ENOMEM);

	spin_lock_irqsave(&rxe->mcg_lock, flags);
	spin_lock_bh(&rxe->mcg_lock);
	/* re-check to see if someone else just added it */
	tmp = __rxe_lookup_mcg(rxe, mgid);
	if (tmp) {
		spin_unlock_bh(&rxe->mcg_lock);
		atomic_dec(&rxe->mcg_num);
		kfree(mcg);
		mcg = tmp;
		goto out;
		return tmp;
	}

	if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) {
		err = -ENOMEM;
		goto err_dec;
	}
	__rxe_init_mcg(rxe, mgid, mcg);
	spin_unlock_bh(&rxe->mcg_lock);

	err = __rxe_init_mcg(rxe, mgid, mcg);
	if (err)
		goto err_dec;
out:
	spin_unlock_irqrestore(&rxe->mcg_lock, flags);
	/* add mcast address outside of lock */
	err = rxe_mcast_add(rxe, mgid);
	if (!err)
		return mcg;

	kfree(mcg);
err_dec:
	atomic_dec(&rxe->mcg_num);
	spin_unlock_irqrestore(&rxe->mcg_lock, flags);
	kfree(mcg);
	return ERR_PTR(err);
}

@@ -268,7 +259,6 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
	__rxe_remove_mcg(mcg);
	kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);

	rxe_mcast_delete(mcg->rxe, &mcg->mgid);
	atomic_dec(&rxe->mcg_num);
}

@@ -280,11 +270,12 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
 */
static void rxe_destroy_mcg(struct rxe_mcg *mcg)
{
	unsigned long flags;
	/* delete mcast address outside of lock */
	rxe_mcast_del(mcg->rxe, &mcg->mgid);

	spin_lock_irqsave(&mcg->rxe->mcg_lock, flags);
	spin_lock_bh(&mcg->rxe->mcg_lock);
	__rxe_destroy_mcg(mcg);
	spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags);
	spin_unlock_bh(&mcg->rxe->mcg_lock);
}

/**
@@ -339,25 +330,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
{
	struct rxe_dev *rxe = mcg->rxe;
	struct rxe_mca *mca, *tmp;
	unsigned long flags;
	int err;

	/* check to see if the qp is already a member of the group */
	spin_lock_irqsave(&rxe->mcg_lock, flags);
	spin_lock_bh(&rxe->mcg_lock);
	list_for_each_entry(mca, &mcg->qp_list, qp_list) {
		if (mca->qp == qp) {
			spin_unlock_irqrestore(&rxe->mcg_lock, flags);
			spin_unlock_bh(&rxe->mcg_lock);
			return 0;
		}
	}
	spin_unlock_irqrestore(&rxe->mcg_lock, flags);
	spin_unlock_bh(&rxe->mcg_lock);

	/* speculative alloc new mca without using GFP_ATOMIC */
	mca = kzalloc(sizeof(*mca), GFP_KERNEL);
	if (!mca)
		return -ENOMEM;

	spin_lock_irqsave(&rxe->mcg_lock, flags);
	spin_lock_bh(&rxe->mcg_lock);
	/* re-check to see if someone else just attached qp */
	list_for_each_entry(tmp, &mcg->qp_list, qp_list) {
		if (tmp->qp == qp) {
@@ -371,7 +361,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
	if (err)
		kfree(mca);
out:
	spin_unlock_irqrestore(&rxe->mcg_lock, flags);
	spin_unlock_bh(&rxe->mcg_lock);
	return err;
}

@@ -405,9 +395,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
{
	struct rxe_dev *rxe = mcg->rxe;
	struct rxe_mca *mca, *tmp;
	unsigned long flags;

	spin_lock_irqsave(&rxe->mcg_lock, flags);
	spin_lock_bh(&rxe->mcg_lock);
	list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
		if (mca->qp == qp) {
			__rxe_cleanup_mca(mca, mcg);
@@ -421,13 +410,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
			if (atomic_read(&mcg->qp_num) <= 0)
				__rxe_destroy_mcg(mcg);

			spin_unlock_irqrestore(&rxe->mcg_lock, flags);
			spin_unlock_bh(&rxe->mcg_lock);
			return 0;
		}
	}

	/* we didn't find the qp on the list */
	spin_unlock_irqrestore(&rxe->mcg_lock, flags);
	spin_unlock_bh(&rxe->mcg_lock);
	return -EINVAL;
}

+25 −10
Original line number Diff line number Diff line
@@ -680,6 +680,11 @@ static struct resp_res *rxe_prepare_read_res(struct rxe_qp *qp,
 * It is assumed that the access permissions if originally good
 * are OK and the mappings to be unchanged.
 *
 * TODO: If someone reregisters an MR to change its size or
 * access permissions during the processing of an RDMA read
 * we should kill the responder resource and complete the
 * operation with an error.
 *
 * Return: mr on success else NULL
 */
static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
@@ -690,23 +695,27 @@ static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)

	if (rkey_is_mw(rkey)) {
		mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
		if (!mw || mw->rkey != rkey)
		if (!mw)
			return NULL;

		if (mw->state != RXE_MW_STATE_VALID) {
		mr = mw->mr;
		if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID ||
		    !mr || mr->state != RXE_MR_STATE_VALID) {
			rxe_put(mw);
			return NULL;
		}

		mr = mw->mr;
		rxe_get(mr);
		rxe_put(mw);
	} else {

		return mr;
	}

	mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
		if (!mr || mr->rkey != rkey)
	if (!mr)
		return NULL;
	}

	if (mr->state != RXE_MR_STATE_VALID) {
	if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) {
		rxe_put(mr);
		return NULL;
	}
@@ -736,8 +745,14 @@ static enum resp_states read_reply(struct rxe_qp *qp,
	}

	if (res->state == rdatm_res_state_new) {
		if (!res->replay) {
			mr = qp->resp.mr;
			qp->resp.mr = NULL;
		} else {
			mr = rxe_recheck_mr(qp, res->read.rkey);
			if (!mr)
				return RESPST_ERR_RKEY_VIOLATION;
		}

		if (res->read.resid <= mtu)
			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;
Loading