Commit bdda7dfa authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mptcp-fixes'



Mat Martineau says:

====================
mptcp: Fixes for v5.12

These patches from the MPTCP tree fix a few multipath TCP issues:

Patches 1 and 5 clear some stale pointers when subflows close.

Patches 2, 4, and 9 plug some memory leaks.

Patch 3 fixes a memory accounting error identified by syzkaller.

Patches 6 and 7 fix a race condition that slowed data transmission.

Patch 8 adds missing wakeups when write buffer space is freed.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents bfc25605 9238e900
Loading
Loading
Loading
Loading
+104 −61
Original line number Diff line number Diff line
@@ -1061,6 +1061,12 @@ static void __mptcp_clean_una(struct sock *sk)
	}
}

static void __mptcp_clean_una_wakeup(struct sock *sk)
{
	__mptcp_clean_una(sk);
	mptcp_write_space(sk);
}

static void mptcp_enter_memory_pressure(struct sock *sk)
{
	struct mptcp_subflow_context *subflow;
@@ -1189,6 +1195,7 @@ static bool mptcp_tx_cache_refill(struct sock *sk, int size,
			 */
			while (skbs->qlen > 1) {
				skb = __skb_dequeue_tail(skbs);
				*total_ts -= skb->truesize;
				__kfree_skb(skb);
			}
			return skbs->qlen > 0;
@@ -1444,7 +1451,7 @@ static void mptcp_push_release(struct sock *sk, struct sock *ssk,
	release_sock(ssk);
}

static void mptcp_push_pending(struct sock *sk, unsigned int flags)
static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
{
	struct sock *prev_ssk = NULL, *ssk = NULL;
	struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1696,14 +1703,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)

wait_for_memory:
		mptcp_set_nospace(sk);
		mptcp_push_pending(sk, msg->msg_flags);
		__mptcp_push_pending(sk, msg->msg_flags);
		ret = sk_stream_wait_memory(sk, &timeo);
		if (ret)
			goto out;
	}

	if (copied)
		mptcp_push_pending(sk, msg->msg_flags);
		__mptcp_push_pending(sk, msg->msg_flags);

out:
	release_sock(sk);
@@ -2115,6 +2122,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
	return backup;
}

static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
{
	if (msk->subflow) {
		iput(SOCK_INODE(msk->subflow));
		msk->subflow = NULL;
	}
}

/* subflow sockets can be either outgoing (connect) or incoming
 * (accept).
 *
@@ -2126,6 +2141,8 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
			      struct mptcp_subflow_context *subflow)
{
	struct mptcp_sock *msk = mptcp_sk(sk);

	list_del(&subflow->node);

	lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
@@ -2154,6 +2171,18 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
	release_sock(ssk);

	sock_put(ssk);

	if (ssk == msk->last_snd)
		msk->last_snd = NULL;

	if (ssk == msk->ack_hint)
		msk->ack_hint = NULL;

	if (ssk == msk->first)
		msk->first = NULL;

	if (msk->subflow && ssk == msk->subflow->sk)
		mptcp_dispose_initial_subflow(msk);
}

void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
@@ -2238,14 +2267,58 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
	mptcp_close_wake_up(sk);
}

static void mptcp_worker(struct work_struct *work)
static void __mptcp_retrans(struct sock *sk)
{
	struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
	struct sock *ssk, *sk = &msk->sk.icsk_inet.sk;
	struct mptcp_sock *msk = mptcp_sk(sk);
	struct mptcp_sendmsg_info info = {};
	struct mptcp_data_frag *dfrag;
	size_t copied = 0;
	int state, ret;
	struct sock *ssk;
	int ret;

	__mptcp_clean_una_wakeup(sk);
	dfrag = mptcp_rtx_head(sk);
	if (!dfrag)
		return;

	ssk = mptcp_subflow_get_retrans(msk);
	if (!ssk)
		goto reset_timer;

	lock_sock(ssk);

	/* limit retransmission to the bytes already sent on some subflows */
	info.sent = 0;
	info.limit = dfrag->already_sent;
	while (info.sent < dfrag->already_sent) {
		if (!mptcp_alloc_tx_skb(sk, ssk))
			break;

		ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
		if (ret <= 0)
			break;

		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
		copied += ret;
		info.sent += ret;
	}
	if (copied)
		tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
			 info.size_goal);

	mptcp_set_timeout(sk, ssk);
	release_sock(ssk);

reset_timer:
	if (!mptcp_timer_pending(sk))
		mptcp_reset_timer(sk);
}

static void mptcp_worker(struct work_struct *work)
{
	struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
	struct sock *sk = &msk->sk.icsk_inet.sk;
	int state;

	lock_sock(sk);
	state = sk->sk_state;
@@ -2280,45 +2353,8 @@ static void mptcp_worker(struct work_struct *work)
	if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
		__mptcp_close_subflow(msk);

	if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
		goto unlock;

	__mptcp_clean_una(sk);
	dfrag = mptcp_rtx_head(sk);
	if (!dfrag)
		goto unlock;

	ssk = mptcp_subflow_get_retrans(msk);
	if (!ssk)
		goto reset_unlock;

	lock_sock(ssk);

	/* limit retransmission to the bytes already sent on some subflows */
	info.sent = 0;
	info.limit = dfrag->already_sent;
	while (info.sent < dfrag->already_sent) {
		if (!mptcp_alloc_tx_skb(sk, ssk))
			break;

		ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
		if (ret <= 0)
			break;

		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
		copied += ret;
		info.sent += ret;
	}
	if (copied)
		tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
			 info.size_goal);

	mptcp_set_timeout(sk, ssk);
	release_sock(ssk);

reset_unlock:
	if (!mptcp_timer_pending(sk))
		mptcp_reset_timer(sk);
	if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
		__mptcp_retrans(sk);

unlock:
	release_sock(sk);
@@ -2523,12 +2559,6 @@ static void __mptcp_destroy_sock(struct sock *sk)

	might_sleep();

	/* dispose the ancillatory tcp socket, if any */
	if (msk->subflow) {
		iput(SOCK_INODE(msk->subflow));
		msk->subflow = NULL;
	}

	/* be sure to always acquire the join list lock, to sync vs
	 * mptcp_finish_join().
	 */
@@ -2553,6 +2583,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
	sk_stream_kill_queues(sk);
	xfrm_sk_free_policy(sk);
	sk_refcnt_debug_release(sk);
	mptcp_dispose_initial_subflow(msk);
	sock_put(sk);
}

@@ -2934,13 +2965,14 @@ static void mptcp_release_cb(struct sock *sk)
{
	unsigned long flags, nflags;

	/* push_pending may touch wmem_reserved, do it before the later
	 * cleanup
	 */
	if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
		__mptcp_clean_una(sk);
	if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) {
		/* mptcp_push_pending() acquires the subflow socket lock
	for (;;) {
		flags = 0;
		if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
			flags |= MPTCP_PUSH_PENDING;
		if (!flags)
			break;

		/* the following actions acquire the subflow socket lock
		 *
		 * 1) can't be invoked in atomic scope
		 * 2) must avoid ABBA deadlock with msk socket spinlock: the RX
@@ -2949,13 +2981,21 @@ static void mptcp_release_cb(struct sock *sk)
		 */

		spin_unlock_bh(&sk->sk_lock.slock);
		mptcp_push_pending(sk, 0);
		if (flags & MPTCP_PUSH_PENDING)
			__mptcp_push_pending(sk, 0);

		cond_resched();
		spin_lock_bh(&sk->sk_lock.slock);
	}

	if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
		__mptcp_clean_una_wakeup(sk);
	if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
		__mptcp_error_report(sk);

	/* clear any wmem reservation and errors */
	/* push_pending may touch wmem_reserved, ensure we do the cleanup
	 * later
	 */
	__mptcp_update_wmem(sk);
	__mptcp_update_rmem(sk);

@@ -3285,6 +3325,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
		/* PM/worker can now acquire the first subflow socket
		 * lock without racing with listener queue cleanup,
		 * we can notify it, if needed.
		 *
		 * Even if remote has reset the initial subflow by now
		 * the refcnt is still at least one.
		 */
		subflow = mptcp_subflow_ctx(msk->first);
		list_add(&subflow->node, &msk->conn_list);
+8 −6
Original line number Diff line number Diff line
@@ -687,11 +687,6 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
			/* move the msk reference ownership to the subflow */
			subflow_req->msk = NULL;
			ctx->conn = (struct sock *)owner;
			if (!mptcp_finish_join(child))
				goto dispose_child;

			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
			tcp_rsk(req)->drop_req = true;

			if (subflow_use_different_sport(owner, sk)) {
				pr_debug("ack inet_sport=%d %d",
@@ -699,10 +694,16 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
					 ntohs(inet_sk((struct sock *)owner)->inet_sport));
				if (!mptcp_pm_sport_in_anno_list(owner, sk)) {
					SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX);
					goto out;
					goto dispose_child;
				}
				SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX);
			}

			if (!mptcp_finish_join(child))
				goto dispose_child;

			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
			tcp_rsk(req)->drop_req = true;
		}
	}

@@ -1297,6 +1298,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
	spin_lock_bh(&msk->join_list_lock);
	list_del(&subflow->node);
	spin_unlock_bh(&msk->join_list_lock);
	sock_put(mptcp_subflow_tcp_sock(subflow));

failed:
	subflow->disposable = 1;