Commit 16fa29ae authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'smc-fixes'



Dust Li says:

====================
net/smc: fix kernel panic caused by race of smc_sock

This patchset fixes the race between smc_release triggered by
close(2) and cdc_handle triggered by underlaying RDMA device.

The race is caused because the smc_connection may been released
before the pending tx CDC messages got its CQEs. In order to fix
this, I add a counter to track how many pending WRs we have posted
through the smc_connection, and only release the smc_connection
after there is no pending WRs on the connection.

The first patch prevents posting WR on a QP that is not in RTS
state. This patch is needed because if we post WR on a QP that
is not in RTS state, ib_post_send() may success but no CQE will
return, and that will confuse the counter tracking the pending
WRs.

The second patch add a counter to track how many WRs were posted
through the smc_connection, and don't reset the QP on link destroying
to prevent leak of the counter.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1b9dadba 349d4312
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -180,6 +180,11 @@ struct smc_connection {
	u16			tx_cdc_seq;	/* sequence # for CDC send */
	u16			tx_cdc_seq_fin;	/* sequence # - tx completed */
	spinlock_t		send_lock;	/* protect wr_sends */
	atomic_t		cdc_pend_tx_wr; /* number of pending tx CDC wqe
						 * - inc when post wqe,
						 * - dec on polled tx cqe
						 */
	wait_queue_head_t	cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/
	struct delayed_work	tx_work;	/* retry of smc_cdc_msg_send */
	u32			tx_off;		/* base offset in peer rmb */

+24 −28
Original line number Diff line number Diff line
@@ -31,10 +31,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
	struct smc_sock *smc;
	int diff;

	if (!conn)
		/* already dismissed */
		return;

	smc = container_of(conn, struct smc_sock, conn);
	bh_lock_sock(&smc->sk);
	if (!wc_status) {
@@ -51,6 +47,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
			      conn);
		conn->tx_cdc_seq_fin = cdcpend->ctrl_seq;
	}

	if (atomic_dec_and_test(&conn->cdc_pend_tx_wr) &&
	    unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq)))
		wake_up(&conn->cdc_pend_tx_wq);
	WARN_ON(atomic_read(&conn->cdc_pend_tx_wr) < 0);

	smc_tx_sndbuf_nonfull(smc);
	bh_unlock_sock(&smc->sk);
}
@@ -107,6 +109,10 @@ int smc_cdc_msg_send(struct smc_connection *conn,
	conn->tx_cdc_seq++;
	conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
	smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed);

	atomic_inc(&conn->cdc_pend_tx_wr);
	smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */

	rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
	if (!rc) {
		smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn);
@@ -114,6 +120,7 @@ int smc_cdc_msg_send(struct smc_connection *conn,
	} else {
		conn->tx_cdc_seq--;
		conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
		atomic_dec(&conn->cdc_pend_tx_wr);
	}

	return rc;
@@ -136,7 +143,18 @@ int smcr_cdc_msg_send_validation(struct smc_connection *conn,
	peer->token = htonl(local->token);
	peer->prod_flags.failover_validation = 1;

	/* We need to set pend->conn here to make sure smc_cdc_tx_handler()
	 * can handle properly
	 */
	smc_cdc_add_pending_send(conn, pend);

	atomic_inc(&conn->cdc_pend_tx_wr);
	smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */

	rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
	if (unlikely(rc))
		atomic_dec(&conn->cdc_pend_tx_wr);

	return rc;
}

@@ -193,31 +211,9 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
	return rc;
}

static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend,
			      unsigned long data)
void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn)
{
	struct smc_connection *conn = (struct smc_connection *)data;
	struct smc_cdc_tx_pend *cdc_pend =
		(struct smc_cdc_tx_pend *)tx_pend;

	return cdc_pend->conn == conn;
}

static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend)
{
	struct smc_cdc_tx_pend *cdc_pend =
		(struct smc_cdc_tx_pend *)tx_pend;

	cdc_pend->conn = NULL;
}

void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
{
	struct smc_link *link = conn->lnk;

	smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE,
				smc_cdc_tx_filter, smc_cdc_tx_dismisser,
				(unsigned long)conn);
	wait_event(conn->cdc_pend_tx_wq, !atomic_read(&conn->cdc_pend_tx_wr));
}

/* Send a SMC-D CDC header.
+1 −1
Original line number Diff line number Diff line
@@ -291,7 +291,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
			  struct smc_wr_buf **wr_buf,
			  struct smc_rdma_wr **wr_rdma_buf,
			  struct smc_cdc_tx_pend **pend);
void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn);
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
		     struct smc_cdc_tx_pend *pend);
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
+21 −6
Original line number Diff line number Diff line
@@ -647,7 +647,7 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		struct smc_link *lnk = &lgr->lnk[i];

		if (smc_link_usable(lnk))
		if (smc_link_sendable(lnk))
			lnk->state = SMC_LNK_INACTIVE;
	}
	wake_up_all(&lgr->llc_msg_waiter);
@@ -1127,7 +1127,7 @@ void smc_conn_free(struct smc_connection *conn)
			smc_ism_unset_conn(conn);
		tasklet_kill(&conn->rx_tsklet);
	} else {
		smc_cdc_tx_dismiss_slots(conn);
		smc_cdc_wait_pend_tx_wr(conn);
		if (current_work() != &conn->abort_work)
			cancel_work_sync(&conn->abort_work);
	}
@@ -1204,7 +1204,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log)
	smc_llc_link_clear(lnk, log);
	smcr_buf_unmap_lgr(lnk);
	smcr_rtoken_clear_link(lnk);
	smc_ib_modify_qp_reset(lnk);
	smc_ib_modify_qp_error(lnk);
	smc_wr_free_link(lnk);
	smc_ib_destroy_queue_pair(lnk);
	smc_ib_dealloc_protection_domain(lnk);
@@ -1336,7 +1336,7 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft)
		else
			tasklet_unlock_wait(&conn->rx_tsklet);
	} else {
		smc_cdc_tx_dismiss_slots(conn);
		smc_cdc_wait_pend_tx_wr(conn);
	}
	smc_lgr_unregister_conn(conn);
	smc_close_active_abort(smc);
@@ -1459,11 +1459,16 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd)
/* Called when an SMCR device is removed or the smc module is unloaded.
 * If smcibdev is given, all SMCR link groups using this device are terminated.
 * If smcibdev is NULL, all SMCR link groups are terminated.
 *
 * We must wait here for QPs been destroyed before we destroy the CQs,
 * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus
 * smc_sock cannot be released.
 */
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
{
	struct smc_link_group *lgr, *lg;
	LIST_HEAD(lgr_free_list);
	LIST_HEAD(lgr_linkdown_list);
	int i;

	spin_lock_bh(&smc_lgr_list.lock);
@@ -1475,7 +1480,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
		list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
			for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
				if (lgr->lnk[i].smcibdev == smcibdev)
					smcr_link_down_cond_sched(&lgr->lnk[i]);
					list_move_tail(&lgr->list, &lgr_linkdown_list);
			}
		}
	}
@@ -1487,6 +1492,16 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
		__smc_lgr_terminate(lgr, false);
	}

	list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) {
		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
			if (lgr->lnk[i].smcibdev == smcibdev) {
				mutex_lock(&lgr->llc_conf_mutex);
				smcr_link_down_cond(&lgr->lnk[i]);
				mutex_unlock(&lgr->llc_conf_mutex);
			}
		}
	}

	if (smcibdev) {
		if (atomic_read(&smcibdev->lnk_cnt))
			wait_event(smcibdev->lnks_deleted,
@@ -1586,7 +1601,6 @@ static void smcr_link_down(struct smc_link *lnk)
	if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
		return;

	smc_ib_modify_qp_reset(lnk);
	to_lnk = smc_switch_conns(lgr, lnk, true);
	if (!to_lnk) { /* no backup link available */
		smcr_link_clear(lnk, true);
@@ -1824,6 +1838,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
	conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
	conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
	conn->urg_state = SMC_URG_READ;
	init_waitqueue_head(&conn->cdc_pend_tx_wq);
	INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
	if (ini->is_smcd) {
		conn->rx_off = sizeof(struct smcd_cdc_msg);
+6 −0
Original line number Diff line number Diff line
@@ -415,6 +415,12 @@ static inline bool smc_link_usable(struct smc_link *lnk)
	return true;
}

static inline bool smc_link_sendable(struct smc_link *lnk)
{
	return smc_link_usable(lnk) &&
		lnk->qp_attr.cur_qp_state == IB_QPS_RTS;
}

static inline bool smc_link_active(struct smc_link *lnk)
{
	return lnk->state == SMC_LNK_ACTIVE;
Loading