Commit 7dad816c authored by Wen Gu's avatar Wen Gu Committed by Litao Jiao
Browse files

anolis: net/smc: Resolve the race between SMC-R link access and clear

anolis inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I79GVV
CVE: NA

Reference: https://gitee.com/anolis/cloud-kernel/commit/22c9ca2d0f66980961a49698a2a223be4913e24a



--------------------------------

ANBZ: #264

We encountered some crashes caused by the race between SMC-R
link access and link clear triggered by link group termination
in abnormal case, like port error.

Here are some of panic stacks we met:

1) Race between smc_llc_flow_initiate() and smcr_link_clear()

 BUG: kernel NULL pointer dereference, address: 0000000000000000
 Workqueue: smc_hs_wq smc_listen_work [smc]
 RIP: 0010:smc_llc_flow_initiate+0x44/0x190 [smc]
 Call Trace:
  <TASK>
  ? __smc_buf_create+0x75a/0x950 [smc]
  smcr_lgr_reg_rmbs+0x2a/0xbf [smc]
  smc_listen_work+0xf72/0x1230 [smc]
  ? process_one_work+0x25c/0x600
  process_one_work+0x25c/0x600
  worker_thread+0x4f/0x3a0
  ? process_one_work+0x600/0x600
  kthread+0x15d/0x1a0
  ? set_kthread_struct+0x40/0x40
  ret_from_fork+0x1f/0x30
  </TASK>

smc_listen_work()                       __smc_lgr_terminate()
---------------------------------------------------------------
                                       | smc_lgr_free()
                                       |     |- smcr_link_clear()
                                       |            |- memset(lnk, 0)
smc_listen_rdma_reg()                  |
  |- smcr_lgr_reg_rmbs()               |
       |- smc_llc_flow_initiate()      |
            |- access lnk->lgr (panic) |

2) Race between smc_wr_tx_dismiss_slots() and smcr_link_clear()

 BUG: kernel NULL pointer dereference, address: 0000000000000000
 RIP: 0010:_find_first_bit+0x8/0x50
 Call Trace:
  <TASK>
  smc_wr_tx_dismiss_slots+0x34/0xc0 [smc]
  ? smc_cdc_tx_filter+0x10/0x10 [smc]
  smc_conn_free+0xd8/0x100 [smc]
  __smc_release+0xf1/0x140 [smc]
  smc_release+0x89/0x1b0 [smc]
  __sock_release+0x37/0xb0
  sock_close+0x14/0x20
  __fput+0xa9/0x260
  task_work_run+0x6b/0xb0
  do_exit+0x3ef/0xd40
  do_group_exit+0x47/0xb0
  __x64_sys_exit_group+0x14/0x20
  do_syscall_64+0x34/0x90
  entry_SYSCALL_64_after_hwframe+0x44/0xae
  </TASK>

smc_conn_free()                           __smc_lgr_terminate()
----------------------------------------------------------------
                                         | smc_lgr_free()
                                         |  |- smcr_link_clear()
                                         |      |- smc_wr_free_link_mem()
                                         |          |- lnk->wr_tx_mask = NULL;
smc_wr_tx_dismiss_slots()                |
  |- for_each_set_bit(link->wr_tx_mask)  |
            |- (panic)                   |

These crashes are caused by clearing SMC-R link resources
when someone is still accessing to them. So this patch tries
to fix it by introducing reference count of SMC-R links and
ensuring that the sensitive resources of links are not
cleared until reference count is zero.

The operation to the SMC-R link reference count can be concluded
as follows:

object          [hold or initialized as 1]         [put]
--------------------------------------------------------------------
links           smcr_link_init()                   smcr_link_clear()
connections     smcr_lgr_conn_assign_link()        smc_conn_free()

Through this way, the clear of SMC-R links is later than the
free of all the smc connections above it, thus avoiding the
unsafe reference to SMC-R links.

Signed-off-by: default avatarWen Gu <guwen@linux.alibaba.com>
Acked-by: default avatarTony Lu <tonylu@linux.alibaba.com>
Signed-off-by: default avatarGengbiao Shen <shengengbiao@sangfor.com.cn>
parent cbc137d2
Loading
Loading
Loading
Loading
+35 −8
Original line number Diff line number Diff line
@@ -140,6 +140,7 @@ static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
	if (!conn->lnk)
		return SMC_CLC_DECL_NOACTLINK;
	atomic_inc(&conn->lnk->conn_cnt);
	smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */
	return 0;
}

@@ -311,6 +312,8 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,

	get_device(&ini->ib_dev->ibdev->dev);
	atomic_inc(&ini->ib_dev->lnk_cnt);
	refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */
	lnk->clearing = 0;
	lnk->link_id = smcr_next_link_id(lgr);
	lnk->lgr = lgr;
	smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
@@ -536,8 +539,12 @@ void smc_switch_link_and_count(struct smc_connection *conn,
			       struct smc_link *to_lnk)
{
	atomic_dec(&conn->lnk->conn_cnt);
	/* put old link, hold in smcr_lgr_conn_assign_link() */
	smcr_link_put(conn->lnk);
	conn->lnk = to_lnk;
	atomic_inc(&conn->lnk->conn_cnt);
	/* hold new link, put in smc_conn_free() */
	smcr_link_hold(conn->lnk);
}

struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
@@ -675,9 +682,9 @@ void smc_conn_free(struct smc_connection *conn)
		/* smc connection wasn't registered to a link group
		 * or has already been freed before.
		 *
		 * Judge these to ensure that lgr refcnt will be put
		 * only once if connection has been registered to a
		 * link group successfully.
		 * Judge these to ensure that lgr/link refcnt will be
		 * put only once if connection has been registered to
		 * a link group successfully.
		 */
		return;

@@ -702,6 +709,8 @@ void smc_conn_free(struct smc_connection *conn)
	if (!lgr->conns_num)
		smc_lgr_schedule_free_work(lgr);
lgr_put:
	if (!lgr->is_smcd)
		smcr_link_put(conn->lnk); /* link_hold in smcr_lgr_conn_assign_link() */
	smc_lgr_put(lgr); /* lgr_hold in smc_lgr_register_conn() */
}

@@ -758,13 +767,23 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk)
	}
}

void __smcr_link_clear(struct smc_link *lnk)
{
	smc_wr_free_link_mem(lnk);
	smc_lgr_put(lnk->lgr);  /* lgr_hold in smcr_link_init() */
	memset(lnk, 0, sizeof(struct smc_link));
	lnk->state = SMC_LNK_UNUSED;
}

/* must be called under lgr->llc_conf_mutex lock */
void smcr_link_clear(struct smc_link *lnk, bool log)
{
	struct smc_ib_device *smcibdev;

	if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
	if (lnk->clearing || !lnk->lgr ||
	    lnk->state == SMC_LNK_UNUSED)
		return;
	lnk->clearing = 1;
	lnk->peer_qpn = 0;
	smc_llc_link_clear(lnk, log);
	smcr_buf_unmap_lgr(lnk);
@@ -773,14 +792,22 @@ void smcr_link_clear(struct smc_link *lnk, bool log)
	smc_wr_free_link(lnk);
	smc_ib_destroy_queue_pair(lnk);
	smc_ib_dealloc_protection_domain(lnk);
	smc_wr_free_link_mem(lnk);
	smc_lgr_put(lnk->lgr); /* lgr_hold in smcr_link_init() */
	put_device(&lnk->smcibdev->ibdev->dev);
	smcibdev = lnk->smcibdev;
	memset(lnk, 0, sizeof(struct smc_link));
	lnk->state = SMC_LNK_UNUSED;
	if (!atomic_dec_return(&smcibdev->lnk_cnt))
		wake_up(&smcibdev->lnks_deleted);
	smcr_link_put(lnk); /* theoretically last link_put */
}

void smcr_link_hold(struct smc_link *lnk)
{
	refcount_inc(&lnk->refcnt);
}

void smcr_link_put(struct smc_link *lnk)
{
	if (refcount_dec_and_test(&lnk->refcnt))
		__smcr_link_clear(lnk);
}

static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
+4 −0
Original line number Diff line number Diff line
@@ -124,6 +124,8 @@ struct smc_link {
	u8			peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */
	u8			link_idx;	/* index in lgr link array */
	u8			link_is_asym;	/* is link asymmetric? */
	u8			clearing : 1;	/* link is being cleared */
	refcount_t		refcnt;		/* link reference count */
	struct smc_link_group	*lgr;		/* parent link group */
	struct work_struct	link_down_wrk;	/* wrk to bring link down */

@@ -410,6 +412,8 @@ void smc_core_exit(void);
int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
		   u8 link_idx, struct smc_init_info *ini);
void smcr_link_clear(struct smc_link *lnk, bool log);
void smcr_link_hold(struct smc_link *lnk);
void smcr_link_put(struct smc_link *lnk);
void smc_switch_link_and_count(struct smc_connection *conn,
			       struct smc_link *to_lnk);
int smcr_buf_map_lgr(struct smc_link *lnk);