Commit fe1fd0cc authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mptcp-fixes-for-6-1'

Mat Martineau says:

====================
mptcp: Fixes for 6.1

Patch 1 fixes an issue with assigning subflow IDs in cases where an
incoming MP_JOIN is processed before accept() completes on the MPTCP
socket.

Patches 2 and 3 fix a deadlock issue with fastopen code (new for 6.1) at
connection time.
====================

Link: https://lore.kernel.org/r/20221021225856.88119-1-mathew.j.martineau@linux.intel.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 4a4b6848 fa9e5746
Loading
Loading
Loading
Loading
+102 −80
Original line number Diff line number Diff line
@@ -1673,6 +1673,37 @@ static void mptcp_set_nospace(struct sock *sk)
	set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
}

static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
				  size_t len, int *copied_syn)
{
	unsigned int saved_flags = msg->msg_flags;
	struct mptcp_sock *msk = mptcp_sk(sk);
	int ret;

	lock_sock(ssk);
	msg->msg_flags |= MSG_DONTWAIT;
	msk->connect_flags = O_NONBLOCK;
	msk->is_sendmsg = 1;
	ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
	msk->is_sendmsg = 0;
	msg->msg_flags = saved_flags;
	release_sock(ssk);

	/* do the blocking bits of inet_stream_connect outside the ssk socket lock */
	if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) {
		ret = __inet_stream_connect(sk->sk_socket, msg->msg_name,
					    msg->msg_namelen, msg->msg_flags, 1);

		/* Keep the same behaviour of plain TCP: zero the copied bytes in
		 * case of any error, except timeout or signal
		 */
		if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
			*copied_syn = 0;
	}

	return ret;
}

static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
	struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1693,24 +1724,15 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)

	ssock = __mptcp_nmpc_socket(msk);
	if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) {
		struct sock *ssk = ssock->sk;
		int copied_syn = 0;

		lock_sock(ssk);

		ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL);
		ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
		copied += copied_syn;
		if (ret == -EINPROGRESS && copied_syn > 0) {
			/* reflect the new state on the MPTCP socket */
			inet_sk_state_store(sk, inet_sk_state_load(ssk));
			release_sock(ssk);
		if (ret == -EINPROGRESS && copied_syn > 0)
			goto out;
		} else if (ret) {
			release_sock(ssk);
		else if (ret)
			goto do_error;
	}
		release_sock(ssk);
	}

	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);

@@ -2952,7 +2974,7 @@ static void mptcp_close(struct sock *sk, long timeout)
	sock_put(sk);
}

static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
{
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
	const struct ipv6_pinfo *ssk6 = inet6_sk(ssk);
@@ -3507,10 +3529,73 @@ static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
	return put_user(answ, (int __user *)arg);
}

static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
					 struct mptcp_subflow_context *subflow)
{
	subflow->request_mptcp = 0;
	__mptcp_do_fallback(msk);
}

static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
	struct mptcp_subflow_context *subflow;
	struct mptcp_sock *msk = mptcp_sk(sk);
	struct socket *ssock;
	int err = -EINVAL;

	ssock = __mptcp_nmpc_socket(msk);
	if (!ssock)
		return -EINVAL;

	mptcp_token_destroy(msk);
	inet_sk_state_store(sk, TCP_SYN_SENT);
	subflow = mptcp_subflow_ctx(ssock->sk);
#ifdef CONFIG_TCP_MD5SIG
	/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
	 * TCP option space.
	 */
	if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
		mptcp_subflow_early_fallback(msk, subflow);
#endif
	if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
		MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
		mptcp_subflow_early_fallback(msk, subflow);
	}
	if (likely(!__mptcp_check_fallback(msk)))
		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE);

	/* if reaching here via the fastopen/sendmsg path, the caller already
	 * acquired the subflow socket lock, too.
	 */
	if (msk->is_sendmsg)
		err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1);
	else
		err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags);
	inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;

	/* on successful connect, the msk state will be moved to established by
	 * subflow_finish_connect()
	 */
	if (unlikely(err && err != -EINPROGRESS)) {
		inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
		return err;
	}

	mptcp_copy_inaddrs(sk, ssock->sk);

	/* unblocking connect, mptcp-level inet_stream_connect will error out
	 * without changing the socket state, update it here.
	 */
	if (err == -EINPROGRESS)
		sk->sk_socket->state = ssock->state;
	return err;
}

static struct proto mptcp_prot = {
	.name		= "MPTCP",
	.owner		= THIS_MODULE,
	.init		= mptcp_init_sock,
	.connect	= mptcp_connect,
	.disconnect	= mptcp_disconnect,
	.close		= mptcp_close,
	.accept		= mptcp_accept,
@@ -3562,78 +3647,16 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
	return err;
}

static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
					 struct mptcp_subflow_context *subflow)
{
	subflow->request_mptcp = 0;
	__mptcp_do_fallback(msk);
}

static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
				int addr_len, int flags)
{
	struct mptcp_sock *msk = mptcp_sk(sock->sk);
	struct mptcp_subflow_context *subflow;
	struct socket *ssock;
	int err = -EINVAL;
	int ret;

	lock_sock(sock->sk);
	if (uaddr) {
		if (addr_len < sizeof(uaddr->sa_family))
			goto unlock;

		if (uaddr->sa_family == AF_UNSPEC) {
			err = mptcp_disconnect(sock->sk, flags);
			sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
			goto unlock;
		}
	}

	if (sock->state != SS_UNCONNECTED && msk->subflow) {
		/* pending connection or invalid state, let existing subflow
		 * cope with that
		 */
		ssock = msk->subflow;
		goto do_connect;
	}

	ssock = __mptcp_nmpc_socket(msk);
	if (!ssock)
		goto unlock;

	mptcp_token_destroy(msk);
	inet_sk_state_store(sock->sk, TCP_SYN_SENT);
	subflow = mptcp_subflow_ctx(ssock->sk);
#ifdef CONFIG_TCP_MD5SIG
	/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
	 * TCP option space.
	 */
	if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
		mptcp_subflow_early_fallback(msk, subflow);
#endif
	if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
		MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
		mptcp_subflow_early_fallback(msk, subflow);
	}
	if (likely(!__mptcp_check_fallback(msk)))
		MPTCP_INC_STATS(sock_net(sock->sk), MPTCP_MIB_MPCAPABLEACTIVE);

do_connect:
	err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
	inet_sk(sock->sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
	sock->state = ssock->state;

	/* on successful connect, the msk state will be moved to established by
	 * subflow_finish_connect()
	 */
	if (!err || err == -EINPROGRESS)
		mptcp_copy_inaddrs(sock->sk, ssock->sk);
	else
		inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));

unlock:
	mptcp_sk(sock->sk)->connect_flags = flags;
	ret = __inet_stream_connect(sock, uaddr, addr_len, flags, 0);
	release_sock(sock->sk);
	return err;
	return ret;
}

static int mptcp_listen(struct socket *sock, int backlog)
@@ -3699,7 +3722,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
		if (mptcp_is_fully_established(newsk))
			mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL);

		mptcp_copy_inaddrs(newsk, msk->first);
		mptcp_rcv_space_init(msk, msk->first);
		mptcp_propagate_sndbuf(newsk, msk->first);

+4 −1
Original line number Diff line number Diff line
@@ -285,7 +285,9 @@ struct mptcp_sock {
	u8		mpc_endpoint_id;
	u8		recvmsg_inq:1,
			cork:1,
			nodelay:1;
			nodelay:1,
			is_sendmsg:1;
	int		connect_flags;
	struct work_struct work;
	struct sk_buff  *ooo_last_skb;
	struct rb_root  out_of_order_queue;
@@ -599,6 +601,7 @@ int mptcp_is_checksum_enabled(const struct net *net);
int mptcp_allow_join_id0(const struct net *net);
unsigned int mptcp_stale_loss_cnt(const struct net *net);
int mptcp_get_pm_type(const struct net *net);
void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
				     struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
+7 −0
Original line number Diff line number Diff line
@@ -723,6 +723,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
				goto dispose_child;
			}

			if (new_msk)
				mptcp_copy_inaddrs(new_msk, child);
			subflow_drop_ctx(child);
			goto out;
		}
@@ -750,6 +752,11 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
			ctx->conn = new_msk;
			new_msk = NULL;

			/* set msk addresses early to ensure mptcp_pm_get_local_id()
			 * uses the correct data
			 */
			mptcp_copy_inaddrs(ctx->conn, child);

			/* with OoO packets we can reach here without ingress
			 * mpc option
			 */