Commit 4e6e167b authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'smc-rv23'

Karsten Graul says:

====================
net/smc: introduce SMC-Rv2 support

Please apply the following patch series for smc to netdev's net-next tree.

SMC-Rv2 support (see https://www.ibm.com/support/pages/node/6326337

)
provides routable RoCE support for SMC-R, eliminating the current
same-subnet restriction, by exploiting the UDP encapsulation feature
of the RoCE adapter hardware.

v2: resend of the v1 patch series, and CC linux-rdma this time
v3: rebase after net tree was merged into net-next
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 24bcbe1c 29397e34
Loading
Loading
Loading
Loading
+15 −2
Original line number Diff line number Diff line
@@ -84,17 +84,28 @@ enum {
	SMC_NLA_SYS_IS_ISM_V2,		/* u8 */
	SMC_NLA_SYS_LOCAL_HOST,		/* string */
	SMC_NLA_SYS_SEID,		/* string */
	SMC_NLA_SYS_IS_SMCR_V2,		/* u8 */
	__SMC_NLA_SYS_MAX,
	SMC_NLA_SYS_MAX = __SMC_NLA_SYS_MAX - 1
};

/* SMC_NLA_LGR_V2 nested attributes */
/* SMC_NLA_LGR_D_V2_COMMON and SMC_NLA_LGR_R_V2_COMMON nested attributes */
enum {
	SMC_NLA_LGR_V2_VER,		/* u8 */
	SMC_NLA_LGR_V2_REL,		/* u8 */
	SMC_NLA_LGR_V2_OS,		/* u8 */
	SMC_NLA_LGR_V2_NEG_EID,		/* string */
	SMC_NLA_LGR_V2_PEER_HOST,	/* string */
	__SMC_NLA_LGR_V2_MAX,
	SMC_NLA_LGR_V2_MAX = __SMC_NLA_LGR_V2_MAX - 1
};

/* SMC_NLA_LGR_R_V2 nested attributes */
enum {
	SMC_NLA_LGR_R_V2_UNSPEC,
	SMC_NLA_LGR_R_V2_DIRECT,	/* u8 */
	__SMC_NLA_LGR_R_V2_MAX,
	SMC_NLA_LGR_R_V2_MAX = __SMC_NLA_LGR_R_V2_MAX - 1
};

/* SMC_GEN_LGR_SMCR attributes */
@@ -106,6 +117,8 @@ enum {
	SMC_NLA_LGR_R_PNETID,		/* string */
	SMC_NLA_LGR_R_VLAN_ID,		/* u8 */
	SMC_NLA_LGR_R_CONNS_NUM,	/* u32 */
	SMC_NLA_LGR_R_V2_COMMON,	/* nest */
	SMC_NLA_LGR_R_V2,		/* nest */
	__SMC_NLA_LGR_R_MAX,
	SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1
};
@@ -138,7 +151,7 @@ enum {
	SMC_NLA_LGR_D_PNETID,		/* string */
	SMC_NLA_LGR_D_CHID,		/* u16 */
	SMC_NLA_LGR_D_PAD,		/* flag */
	SMC_NLA_LGR_V2,			/* nest */
	SMC_NLA_LGR_D_V2_COMMON,	/* nest */
	__SMC_NLA_LGR_D_MAX,
	SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1
};
+311 −120
Original line number Diff line number Diff line
@@ -439,6 +439,47 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
	return 0;
}

static bool smc_isascii(char *hostname)
{
	int i;

	for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
		if (!isascii(hostname[i]))
			return false;
	return true;
}

static void smc_conn_save_peer_info_fce(struct smc_sock *smc,
					struct smc_clc_msg_accept_confirm *clc)
{
	struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
		(struct smc_clc_msg_accept_confirm_v2 *)clc;
	struct smc_clc_first_contact_ext *fce;
	int clc_v2_len;

	if (clc->hdr.version == SMC_V1 ||
	    !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
		return;

	if (smc->conn.lgr->is_smcd) {
		memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid,
		       SMC_MAX_EID_LEN);
		clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
					 d1);
	} else {
		memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid,
		       SMC_MAX_EID_LEN);
		clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
					 r1);
	}
	fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len);
	smc->conn.lgr->peer_os = fce->os_type;
	smc->conn.lgr->peer_smc_release = fce->release;
	if (smc_isascii(fce->hostname))
		memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
		       SMC_MAX_HOSTNAME_LEN);
}

static void smcr_conn_save_peer_info(struct smc_sock *smc,
				     struct smc_clc_msg_accept_confirm *clc)
{
@@ -451,16 +492,6 @@ static void smcr_conn_save_peer_info(struct smc_sock *smc,
	smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
}

static bool smc_isascii(char *hostname)
{
	int i;

	for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
		if (!isascii(hostname[i]))
			return false;
	return true;
}

static void smcd_conn_save_peer_info(struct smc_sock *smc,
				     struct smc_clc_msg_accept_confirm *clc)
{
@@ -472,22 +503,6 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc,
	smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
	atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
	smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
	if (clc->hdr.version > SMC_V1 &&
	    (clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) {
		struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
			(struct smc_clc_msg_accept_confirm_v2 *)clc;
		struct smc_clc_first_contact_ext *fce =
			(struct smc_clc_first_contact_ext *)
				(((u8 *)clc_v2) + sizeof(*clc_v2));

		memcpy(smc->conn.lgr->negotiated_eid, clc_v2->eid,
		       SMC_MAX_EID_LEN);
		smc->conn.lgr->peer_os = fce->os_type;
		smc->conn.lgr->peer_smc_release = fce->release;
		if (smc_isascii(fce->hostname))
			memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
			       SMC_MAX_HOSTNAME_LEN);
	}
}

static void smc_conn_save_peer_info(struct smc_sock *smc,
@@ -497,14 +512,16 @@ static void smc_conn_save_peer_info(struct smc_sock *smc,
		smcd_conn_save_peer_info(smc, clc);
	else
		smcr_conn_save_peer_info(smc, clc);
	smc_conn_save_peer_info_fce(smc, clc);
}

static void smc_link_save_peer_info(struct smc_link *link,
				    struct smc_clc_msg_accept_confirm *clc)
				    struct smc_clc_msg_accept_confirm *clc,
				    struct smc_init_info *ini)
{
	link->peer_qpn = ntoh24(clc->r0.qpn);
	memcpy(link->peer_gid, clc->r0.lcl.gid, SMC_GID_SIZE);
	memcpy(link->peer_mac, clc->r0.lcl.mac, sizeof(link->peer_mac));
	memcpy(link->peer_gid, ini->peer_gid, SMC_GID_SIZE);
	memcpy(link->peer_mac, ini->peer_mac, sizeof(link->peer_mac));
	link->peer_psn = ntoh24(clc->r0.psn);
	link->peer_mtu = clc->r0.qp_mtu;
}
@@ -608,7 +625,9 @@ static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
	 * used for the internal TCP socket
	 */
	smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
	if (!ini->ib_dev)
	if (!ini->check_smcrv2 && !ini->ib_dev)
		return SMC_CLC_DECL_NOSMCRDEV;
	if (ini->check_smcrv2 && !ini->smcrv2.ib_dev_v2)
		return SMC_CLC_DECL_NOSMCRDEV;
	return 0;
}
@@ -692,27 +711,42 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
	int rc = 0;

	/* check if there is an ism device available */
	if (ini->smcd_version & SMC_V1) {
		if (smc_find_ism_device(smc, ini) ||
		    smc_connect_ism_vlan_setup(smc, ini)) {
			if (ini->smc_type_v1 == SMC_TYPE_B)
				ini->smc_type_v1 = SMC_TYPE_R;
			else
				ini->smc_type_v1 = SMC_TYPE_N;
		} /* else ISM V1 is supported for this connection */
		if (smc_find_rdma_device(smc, ini)) {
			if (ini->smc_type_v1 == SMC_TYPE_B)
				ini->smc_type_v1 = SMC_TYPE_D;
			else
				ini->smc_type_v1 = SMC_TYPE_N;
		} /* else RDMA is supported for this connection */
	}
	if (smc_ism_is_v2_capable() && smc_find_ism_v2_device_clnt(smc, ini))
		ini->smc_type_v2 = SMC_TYPE_N;
	if (!(ini->smcd_version & SMC_V1) ||
	    smc_find_ism_device(smc, ini) ||
	    smc_connect_ism_vlan_setup(smc, ini))
		ini->smcd_version &= ~SMC_V1;
	/* else ISM V1 is supported for this connection */

	/* check if there is an rdma device available */
	if (!(ini->smcr_version & SMC_V1) ||
	    smc_find_rdma_device(smc, ini))
		ini->smcr_version &= ~SMC_V1;
	/* else RDMA is supported for this connection */

	ini->smc_type_v1 = smc_indicated_type(ini->smcd_version & SMC_V1,
					      ini->smcr_version & SMC_V1);

	/* check if there is an ism v2 device available */
	if (!(ini->smcd_version & SMC_V2) ||
	    !smc_ism_is_v2_capable() ||
	    smc_find_ism_v2_device_clnt(smc, ini))
		ini->smcd_version &= ~SMC_V2;

	/* check if there is an rdma v2 device available */
	ini->check_smcrv2 = true;
	ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
	if (!(ini->smcr_version & SMC_V2) ||
	    smc->clcsock->sk->sk_family != AF_INET ||
	    !smc_clc_ueid_count() ||
	    smc_find_rdma_device(smc, ini))
		ini->smcr_version &= ~SMC_V2;
	ini->check_smcrv2 = false;

	ini->smc_type_v2 = smc_indicated_type(ini->smcd_version & SMC_V2,
					      ini->smcr_version & SMC_V2);

	/* if neither ISM nor RDMA are supported, fallback */
	if (!smcr_indicated(ini->smc_type_v1) &&
	    ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
	if (ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
		rc = SMC_CLC_DECL_NOSMCDEV;

	return rc;
@@ -752,6 +786,64 @@ static int smc_connect_clc(struct smc_sock *smc,
				SMC_CLC_ACCEPT, CLC_WAIT_TIME);
}

void smc_fill_gid_list(struct smc_link_group *lgr,
		       struct smc_gidlist *gidlist,
		       struct smc_ib_device *known_dev, u8 *known_gid)
{
	struct smc_init_info *alt_ini = NULL;

	memset(gidlist, 0, sizeof(*gidlist));
	memcpy(gidlist->list[gidlist->len++], known_gid, SMC_GID_SIZE);

	alt_ini = kzalloc(sizeof(*alt_ini), GFP_KERNEL);
	if (!alt_ini)
		goto out;

	alt_ini->vlan_id = lgr->vlan_id;
	alt_ini->check_smcrv2 = true;
	alt_ini->smcrv2.saddr = lgr->saddr;
	smc_pnet_find_alt_roce(lgr, alt_ini, known_dev);

	if (!alt_ini->smcrv2.ib_dev_v2)
		goto out;

	memcpy(gidlist->list[gidlist->len++], alt_ini->smcrv2.ib_gid_v2,
	       SMC_GID_SIZE);

out:
	kfree(alt_ini);
}

static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
				       struct smc_clc_msg_accept_confirm *aclc,
				       struct smc_init_info *ini)
{
	struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
		(struct smc_clc_msg_accept_confirm_v2 *)aclc;
	struct smc_clc_first_contact_ext *fce =
		(struct smc_clc_first_contact_ext *)
			(((u8 *)clc_v2) + sizeof(*clc_v2));

	if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
		return 0;

	if (fce->v2_direct) {
		memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
		ini->smcrv2.uses_gateway = false;
	} else {
		if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
				      smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
				      ini->smcrv2.nexthop_mac,
				      &ini->smcrv2.uses_gateway))
			return SMC_CLC_DECL_NOROUTE;
		if (!ini->smcrv2.uses_gateway) {
			/* mismatch: peer claims indirect, but its direct */
			return SMC_CLC_DECL_NOINDIRECT;
		}
	}
	return 0;
}

/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc,
			    struct smc_clc_msg_accept_confirm *aclc,
@@ -759,11 +851,18 @@ static int smc_connect_rdma(struct smc_sock *smc,
{
	int i, reason_code = 0;
	struct smc_link *link;
	u8 *eid = NULL;

	ini->is_smcd = false;
	ini->ib_lcl = &aclc->r0.lcl;
	ini->ib_clcqpn = ntoh24(aclc->r0.qpn);
	ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
	memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN);
	memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE);
	memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN);

	reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini);
	if (reason_code)
		return reason_code;

	mutex_lock(&smc_client_lgr_pending);
	reason_code = smc_conn_create(smc, ini);
@@ -785,8 +884,9 @@ static int smc_connect_rdma(struct smc_sock *smc,
			if (l->peer_qpn == ntoh24(aclc->r0.qpn) &&
			    !memcmp(l->peer_gid, &aclc->r0.lcl.gid,
				    SMC_GID_SIZE) &&
			    (aclc->hdr.version > SMC_V1 ||
			     !memcmp(l->peer_mac, &aclc->r0.lcl.mac,
				    sizeof(l->peer_mac))) {
				     sizeof(l->peer_mac)))) {
				link = l;
				break;
			}
@@ -805,7 +905,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
	}

	if (ini->first_contact_local)
		smc_link_save_peer_info(link, aclc);
		smc_link_save_peer_info(link, aclc, ini);

	if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) {
		reason_code = SMC_CLC_DECL_ERR_RTOK;
@@ -828,8 +928,18 @@ static int smc_connect_rdma(struct smc_sock *smc,
	}
	smc_rmb_sync_sg_for_device(&smc->conn);

	if (aclc->hdr.version > SMC_V1) {
		struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
			(struct smc_clc_msg_accept_confirm_v2 *)aclc;

		eid = clc_v2->r1.eid;
		if (ini->first_contact_local)
			smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist,
					  link->smcibdev, link->gid);
	}

	reason_code = smc_clc_send_confirm(smc, ini->first_contact_local,
					   SMC_V1, NULL);
					   aclc->hdr.version, eid, ini);
	if (reason_code)
		goto connect_abort;

@@ -869,7 +979,7 @@ smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc,
	int i;

	for (i = 0; i < ini->ism_offered_cnt + 1; i++) {
		if (ini->ism_chid[i] == ntohs(aclc->chid)) {
		if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) {
			ini->ism_selected = i;
			return 0;
		}
@@ -923,11 +1033,11 @@ static int smc_connect_ism(struct smc_sock *smc,
		struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
			(struct smc_clc_msg_accept_confirm_v2 *)aclc;

		eid = clc_v2->eid;
		eid = clc_v2->d1.eid;
	}

	rc = smc_clc_send_confirm(smc, ini->first_contact_local,
				  aclc->hdr.version, eid);
				  aclc->hdr.version, eid, NULL);
	if (rc)
		goto connect_abort;
	mutex_unlock(&smc_server_lgr_pending);
@@ -950,16 +1060,23 @@ static int smc_connect_ism(struct smc_sock *smc,
static int smc_connect_check_aclc(struct smc_init_info *ini,
				  struct smc_clc_msg_accept_confirm *aclc)
{
	if (aclc->hdr.typev1 != SMC_TYPE_R &&
	    aclc->hdr.typev1 != SMC_TYPE_D)
		return SMC_CLC_DECL_MODEUNSUPP;

	if (aclc->hdr.version >= SMC_V2) {
		if ((aclc->hdr.typev1 == SMC_TYPE_R &&
		     !smcr_indicated(ini->smc_type_v2)) ||
		    (aclc->hdr.typev1 == SMC_TYPE_D &&
		     !smcd_indicated(ini->smc_type_v2)))
			return SMC_CLC_DECL_MODEUNSUPP;
	} else {
		if ((aclc->hdr.typev1 == SMC_TYPE_R &&
		     !smcr_indicated(ini->smc_type_v1)) ||
		    (aclc->hdr.typev1 == SMC_TYPE_D &&
	     ((!smcd_indicated(ini->smc_type_v1) &&
	       !smcd_indicated(ini->smc_type_v2)) ||
	      (aclc->hdr.version == SMC_V1 &&
	       !smcd_indicated(ini->smc_type_v1)) ||
	      (aclc->hdr.version == SMC_V2 &&
	       !smcd_indicated(ini->smc_type_v2)))))
		     !smcd_indicated(ini->smc_type_v1)))
			return SMC_CLC_DECL_MODEUNSUPP;
	}

	return 0;
}
@@ -991,14 +1108,15 @@ static int __smc_connect(struct smc_sock *smc)
		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM,
						    version);

	ini->smcd_version = SMC_V1;
	ini->smcd_version |= smc_ism_is_v2_capable() ? SMC_V2 : 0;
	ini->smcd_version = SMC_V1 | SMC_V2;
	ini->smcr_version = SMC_V1 | SMC_V2;
	ini->smc_type_v1 = SMC_TYPE_B;
	ini->smc_type_v2 = smc_ism_is_v2_capable() ? SMC_TYPE_D : SMC_TYPE_N;
	ini->smc_type_v2 = SMC_TYPE_B;

	/* get vlan id from IP device */
	if (smc_vlan_by_tcpsk(smc->clcsock, ini)) {
		ini->smcd_version &= ~SMC_V1;
		ini->smcr_version = 0;
		ini->smc_type_v1 = SMC_TYPE_N;
		if (!ini->smcd_version) {
			rc = SMC_CLC_DECL_GETVLANERR;
@@ -1026,15 +1144,17 @@ static int __smc_connect(struct smc_sock *smc)
	/* check if smc modes and versions of CLC proposal and accept match */
	rc = smc_connect_check_aclc(ini, aclc);
	version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2;
	ini->smcd_version = version;
	if (rc)
		goto vlan_cleanup;

	/* depending on previous steps, connect using rdma or ism */
	if (aclc->hdr.typev1 == SMC_TYPE_R)
	if (aclc->hdr.typev1 == SMC_TYPE_R) {
		ini->smcr_version = version;
		rc = smc_connect_rdma(smc, aclc, ini);
	else if (aclc->hdr.typev1 == SMC_TYPE_D)
	} else if (aclc->hdr.typev1 == SMC_TYPE_D) {
		ini->smcd_version = version;
		rc = smc_connect_ism(smc, aclc, ini);
	}
	if (rc)
		goto vlan_cleanup;

@@ -1315,7 +1435,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
	smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);

	/* initial contact - try to establish second link */
	smc_llc_srv_add_link(link);
	smc_llc_srv_add_link(link, NULL);
	return 0;
}

@@ -1395,33 +1515,48 @@ static int smc_listen_v2_check(struct smc_sock *new_smc,

	ini->smc_type_v1 = pclc->hdr.typev1;
	ini->smc_type_v2 = pclc->hdr.typev2;
	ini->smcd_version = ini->smc_type_v1 != SMC_TYPE_N ? SMC_V1 : 0;
	if (pclc->hdr.version > SMC_V1)
		ini->smcd_version |=
				ini->smc_type_v2 != SMC_TYPE_N ? SMC_V2 : 0;
	if (!(ini->smcd_version & SMC_V2)) {
	ini->smcd_version = smcd_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
	ini->smcr_version = smcr_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
	if (pclc->hdr.version > SMC_V1) {
		if (smcd_indicated(ini->smc_type_v2))
			ini->smcd_version |= SMC_V2;
		if (smcr_indicated(ini->smc_type_v2))
			ini->smcr_version |= SMC_V2;
	}
	if (!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) {
		rc = SMC_CLC_DECL_PEERNOSMC;
		goto out;
	}
	if (!smc_ism_is_v2_capable()) {
		ini->smcd_version &= ~SMC_V2;
		rc = SMC_CLC_DECL_NOISM2SUPP;
		goto out;
	}
	pclc_v2_ext = smc_get_clc_v2_ext(pclc);
	if (!pclc_v2_ext) {
		ini->smcd_version &= ~SMC_V2;
		ini->smcr_version &= ~SMC_V2;
		rc = SMC_CLC_DECL_NOV2EXT;
		goto out;
	}
	pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext);
	if (!pclc_smcd_v2_ext) {
	if (ini->smcd_version & SMC_V2) {
		if (!smc_ism_is_v2_capable()) {
			ini->smcd_version &= ~SMC_V2;
			rc = SMC_CLC_DECL_NOISM2SUPP;
		} else if (!pclc_smcd_v2_ext) {
			ini->smcd_version &= ~SMC_V2;
			rc = SMC_CLC_DECL_NOV2DEXT;
		} else if (!pclc_v2_ext->hdr.eid_cnt &&
			   !pclc_v2_ext->hdr.flag.seid) {
			ini->smcd_version &= ~SMC_V2;
			rc = SMC_CLC_DECL_NOUEID;
		}
	}
	if (ini->smcr_version & SMC_V2) {
		if (!pclc_v2_ext->hdr.eid_cnt) {
			ini->smcr_version &= ~SMC_V2;
			rc = SMC_CLC_DECL_NOUEID;
		}
	}

out:
	if (!ini->smcd_version)
	if (!ini->smcd_version && !ini->smcr_version)
		return rc;

	return 0;
@@ -1541,10 +1676,6 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
	pclc_smcd = smc_get_clc_msg_smcd(pclc);
	smc_v2_ext = smc_get_clc_v2_ext(pclc);
	smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
	if (!smcd_v2_ext) {
		smc_find_ism_store_rc(SMC_CLC_DECL_NOV2DEXT, ini);
		goto not_found;
	}

	mutex_lock(&smcd_dev_list.mutex);
	if (pclc_smcd->ism.chid)
@@ -1562,8 +1693,10 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
	}
	mutex_unlock(&smcd_dev_list.mutex);

	if (!ini->ism_dev[0])
	if (!ini->ism_dev[0]) {
		smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini);
		goto not_found;
	}

	smc_ism_get_system_eid(&eid);
	if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext,
@@ -1616,6 +1749,7 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,

not_found:
	smc_find_ism_store_rc(rc, ini);
	ini->smcd_version &= ~SMC_V1;
	ini->ism_dev[0] = NULL;
	ini->is_smcd = false;
}
@@ -1634,24 +1768,69 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
	return 0;
}

static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
					 struct smc_clc_msg_proposal *pclc,
					 struct smc_init_info *ini)
{
	struct smc_clc_v2_extension *smc_v2_ext;
	u8 smcr_version;
	int rc;

	if (!(ini->smcr_version & SMC_V2) || !smcr_indicated(ini->smc_type_v2))
		goto not_found;

	smc_v2_ext = smc_get_clc_v2_ext(pclc);
	if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL))
		goto not_found;

	/* prepare RDMA check */
	memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
	memcpy(ini->peer_gid, smc_v2_ext->roce, SMC_GID_SIZE);
	memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
	ini->check_smcrv2 = true;
	ini->smcrv2.clc_sk = new_smc->clcsock->sk;
	ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr;
	ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce);
	rc = smc_find_rdma_device(new_smc, ini);
	if (rc) {
		smc_find_ism_store_rc(rc, ini);
		goto not_found;
	}
	if (!ini->smcrv2.uses_gateway)
		memcpy(ini->smcrv2.nexthop_mac, pclc->lcl.mac, ETH_ALEN);

	smcr_version = ini->smcr_version;
	ini->smcr_version = SMC_V2;
	rc = smc_listen_rdma_init(new_smc, ini);
	if (!rc)
		rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local);
	if (!rc)
		return;
	ini->smcr_version = smcr_version;
	smc_find_ism_store_rc(rc, ini);

not_found:
	ini->smcr_version &= ~SMC_V2;
	ini->check_smcrv2 = false;
}

static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc,
					struct smc_clc_msg_proposal *pclc,
					struct smc_init_info *ini)
{
	int rc;

	if (!smcr_indicated(ini->smc_type_v1))
	if (!(ini->smcr_version & SMC_V1) || !smcr_indicated(ini->smc_type_v1))
		return SMC_CLC_DECL_NOSMCDEV;

	/* prepare RDMA check */
	ini->ib_lcl = &pclc->lcl;
	memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
	memcpy(ini->peer_gid, pclc->lcl.gid, SMC_GID_SIZE);
	memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
	rc = smc_find_rdma_device(new_smc, ini);
	if (rc) {
		/* no RDMA device found */
		if (ini->smc_type_v1 == SMC_TYPE_B)
			/* neither ISM nor RDMA device found */
			rc = SMC_CLC_DECL_NOSMCDEV;
		return rc;
		return SMC_CLC_DECL_NOSMCDEV;
	}
	rc = smc_listen_rdma_init(new_smc, ini);
	if (rc)
@@ -1664,51 +1843,60 @@ static int smc_listen_find_device(struct smc_sock *new_smc,
				  struct smc_clc_msg_proposal *pclc,
				  struct smc_init_info *ini)
{
	int rc;
	int prfx_rc;

	/* check for ISM device matching V2 proposed device */
	smc_find_ism_v2_device_serv(new_smc, pclc, ini);
	if (ini->ism_dev[0])
		return 0;

	if (!(ini->smcd_version & SMC_V1))
		return ini->rc ?: SMC_CLC_DECL_NOSMCD2DEV;

	/* check for matching IP prefix and subnet length */
	rc = smc_listen_prfx_check(new_smc, pclc);
	if (rc)
		return ini->rc ?: rc;
	/* check for matching IP prefix and subnet length (V1) */
	prfx_rc = smc_listen_prfx_check(new_smc, pclc);
	if (prfx_rc)
		smc_find_ism_store_rc(prfx_rc, ini);

	/* get vlan id from IP device */
	if (smc_vlan_by_tcpsk(new_smc->clcsock, ini))
		return ini->rc ?: SMC_CLC_DECL_GETVLANERR;

	/* check for ISM device matching V1 proposed device */
	if (!prfx_rc)
		smc_find_ism_v1_device_serv(new_smc, pclc, ini);
	if (ini->ism_dev[0])
		return 0;

	if (pclc->hdr.typev1 == SMC_TYPE_D)
	if (!smcr_indicated(pclc->hdr.typev1) &&
	    !smcr_indicated(pclc->hdr.typev2))
		/* skip RDMA and decline */
		return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV;

	/* check if RDMA is available */
	/* check if RDMA V2 is available */
	smc_find_rdma_v2_device_serv(new_smc, pclc, ini);
	if (ini->smcrv2.ib_dev_v2)
		return 0;

	/* check if RDMA V1 is available */
	if (!prfx_rc) {
		int rc;

		rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
		smc_find_ism_store_rc(rc, ini);

		return (!rc) ? 0 : ini->rc;
	}
	return SMC_CLC_DECL_NOSMCDEV;
}

/* listen worker: finish RDMA setup */
static int smc_listen_rdma_finish(struct smc_sock *new_smc,
				  struct smc_clc_msg_accept_confirm *cclc,
				  bool local_first)
				  bool local_first,
				  struct smc_init_info *ini)
{
	struct smc_link *link = new_smc->conn.lnk;
	int reason_code = 0;

	if (local_first)
		smc_link_save_peer_info(link, cclc);
		smc_link_save_peer_info(link, cclc, ini);

	if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc))
		return SMC_CLC_DECL_ERR_RTOK;
@@ -1729,12 +1917,13 @@ static void smc_listen_work(struct work_struct *work)
{
	struct smc_sock *new_smc = container_of(work, struct smc_sock,
						smc_listen_work);
	u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1;
	struct socket *newclcsock = new_smc->clcsock;
	struct smc_clc_msg_accept_confirm *cclc;
	struct smc_clc_msg_proposal_area *buf;
	struct smc_clc_msg_proposal *pclc;
	struct smc_init_info *ini = NULL;
	u8 proposal_version = SMC_V1;
	u8 accept_version;
	int rc = 0;

	if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
@@ -1765,7 +1954,9 @@ static void smc_listen_work(struct work_struct *work)
			      SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
	if (rc)
		goto out_decl;
	version = pclc->hdr.version == SMC_V1 ? SMC_V1 : version;

	if (pclc->hdr.version > SMC_V1)
		proposal_version = SMC_V2;

	/* IPSec connections opt out of SMC optimizations */
	if (using_ipsec(new_smc)) {
@@ -1795,9 +1986,9 @@ static void smc_listen_work(struct work_struct *work)
		goto out_unlock;

	/* send SMC Accept CLC message */
	accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version;
	rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
				 ini->smcd_version == SMC_V2 ? SMC_V2 : SMC_V1,
				 ini->negotiated_eid);
				 accept_version, ini->negotiated_eid);
	if (rc)
		goto out_unlock;

@@ -1819,7 +2010,7 @@ static void smc_listen_work(struct work_struct *work)
	/* finish worker */
	if (!ini->is_smcd) {
		rc = smc_listen_rdma_finish(new_smc, cclc,
					    ini->first_contact_local);
					    ini->first_contact_local, ini);
		if (rc)
			goto out_unlock;
		mutex_unlock(&smc_server_lgr_pending);
@@ -1833,7 +2024,7 @@ static void smc_listen_work(struct work_struct *work)
	mutex_unlock(&smc_server_lgr_pending);
out_decl:
	smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0,
			   version);
			   proposal_version);
out_free:
	kfree(ini);
	kfree(buf);
+19 −1
Original line number Diff line number Diff line
@@ -56,7 +56,20 @@ enum smc_state { /* possible states of an SMC socket */
struct smc_link_group;

struct smc_wr_rx_hdr {	/* common prefix part of LLC and CDC to demultiplex */
	union {
		u8 type;
#if defined(__BIG_ENDIAN_BITFIELD)
		struct {
			u8 llc_version:4,
			   llc_type:4;
		};
#elif defined(__LITTLE_ENDIAN_BITFIELD)
		struct {
			u8 llc_type:4,
			   llc_version:4;
		};
#endif
	};
} __aligned(1);

struct smc_cdc_conn_state_flags {
@@ -286,7 +299,12 @@ static inline bool using_ipsec(struct smc_sock *smc)
}
#endif

struct smc_gidlist;

struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
void smc_close_non_accepted(struct sock *sk);
void smc_fill_gid_list(struct smc_link_group *lgr,
		       struct smc_gidlist *gidlist,
		       struct smc_ib_device *known_dev, u8 *known_gid);

#endif	/* __SMC_H */
+118 −29

File changed.

Preview size limit exceeded, changes collapsed.

+51 −4
Original line number Diff line number Diff line
@@ -44,6 +44,7 @@
#define SMC_CLC_DECL_NOV2DEXT	0x03030005  /* peer sent no clc SMC-Dv2 ext.  */
#define SMC_CLC_DECL_NOSEID	0x03030006  /* peer sent no SEID	      */
#define SMC_CLC_DECL_NOSMCD2DEV	0x03030007  /* no SMC-Dv2 device found	      */
#define SMC_CLC_DECL_NOUEID	0x03030008  /* peer sent no UEID	      */
#define SMC_CLC_DECL_MODEUNSUPP	0x03040000  /* smc modes do not match (R or D)*/
#define SMC_CLC_DECL_RMBE_EC	0x03050000  /* peer has eyecatcher in RMBE    */
#define SMC_CLC_DECL_OPTUNSUPP	0x03060000  /* fastopen sockopt not supported */
@@ -54,6 +55,8 @@
#define SMC_CLC_DECL_NOSRVLINK	0x030b0000  /* SMC-R link from srv not found  */
#define SMC_CLC_DECL_VERSMISMAT	0x030c0000  /* SMC version mismatch	      */
#define SMC_CLC_DECL_MAX_DMB	0x030d0000  /* SMC-D DMB limit exceeded       */
#define SMC_CLC_DECL_NOROUTE	0x030e0000  /* SMC-Rv2 conn. no route to peer */
#define SMC_CLC_DECL_NOINDIRECT	0x030f0000  /* SMC-Rv2 conn. indirect mismatch*/
#define SMC_CLC_DECL_SYNCERR	0x04000000  /* synchronization error          */
#define SMC_CLC_DECL_PEERDECL	0x05000000  /* peer declined during handshake */
#define SMC_CLC_DECL_INTERR	0x09990000  /* internal error		      */
@@ -213,11 +216,14 @@ struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */
#define SMC_CLC_OS_AIX		3

struct smc_clc_first_contact_ext {
	u8 reserved1;
#if defined(__BIG_ENDIAN_BITFIELD)
	u8 v2_direct : 1,
	   reserved  : 7;
	u8 os_type : 4,
	   release : 4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
	u8 reserved  : 7,
	   v2_direct : 1;
	u8 release : 4,
	   os_type : 4;
#endif
@@ -225,6 +231,13 @@ struct smc_clc_first_contact_ext {
	u8 hostname[SMC_MAX_HOSTNAME_LEN];
};

struct smc_clc_fce_gid_ext {
	u8 reserved[16];
	u8 gid_cnt;
	u8 reserved2[3];
	u8 gid[][SMC_GID_SIZE];
};

struct smc_clc_msg_accept_confirm {	/* clc accept / confirm message */
	struct smc_clc_msg_hdr hdr;
	union {
@@ -239,13 +252,17 @@ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_accept_confirm_v2 {	/* clc accept / confirm message */
	struct smc_clc_msg_hdr hdr;
	union {
		struct smcr_clc_msg_accept_confirm r0; /* SMC-R */
		struct { /* SMC-R */
			struct smcr_clc_msg_accept_confirm r0;
			u8 eid[SMC_MAX_EID_LEN];
			u8 reserved6[8];
		} r1;
		struct { /* SMC-D */
			struct smcd_clc_msg_accept_confirm_common d0;
			__be16 chid;
			u8 eid[SMC_MAX_EID_LEN];
			u8 reserved5[8];
		};
		} d1;
	};
};

@@ -264,6 +281,24 @@ struct smc_clc_msg_decline { /* clc decline message */
	struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */
} __aligned(4);

#define SMC_DECL_DIAG_COUNT_V2	4 /* no. of additional peer diagnosis codes */

struct smc_clc_msg_decline_v2 {	/* clc decline message */
	struct smc_clc_msg_hdr hdr;
	u8 id_for_peer[SMC_SYSTEMID_LEN]; /* sender peer_id */
	__be32 peer_diagnosis;	/* diagnosis information */
#if defined(__BIG_ENDIAN_BITFIELD)
	u8 os_type  : 4,
	   reserved : 4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
	u8 reserved : 4,
	   os_type  : 4;
#endif
	u8 reserved2[3];
	__be32 peer_diagnosis_v2[SMC_DECL_DIAG_COUNT_V2];
	struct smc_clc_msg_trail trl; /* eye catcher "SMCD" or "SMCR" EBCDIC */
} __aligned(4);

/* determine start of the prefix area within the proposal message */
static inline struct smc_clc_msg_proposal_prefix *
smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
@@ -282,6 +317,17 @@ static inline bool smcd_indicated(int smc_type)
	return smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B;
}

static inline u8 smc_indicated_type(int is_smcd, int is_smcr)
{
	if (is_smcd && is_smcr)
		return SMC_TYPE_B;
	if (is_smcd)
		return SMC_TYPE_D;
	if (is_smcr)
		return SMC_TYPE_R;
	return SMC_TYPE_N;
}

/* get SMC-D info from proposal message */
static inline struct smc_clc_msg_smcd *
smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop)
@@ -334,7 +380,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version);
int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini);
int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
			 u8 version, u8 *eid);
			 u8 version, u8 *eid, struct smc_init_info *ini);
int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact,
			u8 version, u8 *negotiated_eid);
void smc_clc_init(void) __init;
@@ -343,6 +389,7 @@ void smc_clc_get_hostname(u8 **host);
bool smc_clc_match_eid(u8 *negotiated_eid,
		       struct smc_clc_v2_extension *smc_v2_ext,
		       u8 *peer_eid, u8 *local_eid);
int smc_clc_ueid_count(void);
int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb);
int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info);
int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info);
Loading