Commit fde56eea authored by Paolo Abeni's avatar Paolo Abeni Committed by David S. Miller
Browse files

mptcp: refine mptcp_cleanup_rbuf



The current cleanup rbuf tries a bit too hard to avoid acquiring
the subflow socket lock. We may end-up delaying the needed ack,
or skip acking a blocked subflow.

Address the above extending the conditions used to trigger the cleanup
to reflect more closely what TCP does and invoking tcp_cleanup_rbuf()
on all the active subflows.

Note that we can't replicate the exact tests implemented in
tcp_cleanup_rbuf(), as MPTCP lacks some of the required info - e.g.
ping-pong mode.

Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarMat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent d8e336f7
Loading
Loading
Loading
Loading
+23 −33
Original line number Diff line number Diff line
@@ -442,51 +442,48 @@ static void mptcp_send_ack(struct mptcp_sock *msk)
	}
}

static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
static void mptcp_subflow_cleanup_rbuf(struct sock *ssk)
{
	bool slow;
	int ret;

	slow = lock_sock_fast(ssk);
	ret = tcp_can_send_ack(ssk);
	if (ret)
	if (tcp_can_send_ack(ssk))
		tcp_cleanup_rbuf(ssk, 1);
	unlock_sock_fast(ssk, slow);
	return ret;
}

static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty)
{
	const struct inet_connection_sock *icsk = inet_csk(ssk);
	bool ack_pending = READ_ONCE(icsk->icsk_ack.pending);
	const struct tcp_sock *tp = tcp_sk(ssk);

	return (ack_pending & ICSK_ACK_SCHED) &&
		((READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->rcv_wup) >
		  READ_ONCE(icsk->icsk_ack.rcv_mss)) ||
		 (rx_empty && ack_pending &
			      (ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED)));
}

static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
{
	struct sock *ack_hint = READ_ONCE(msk->ack_hint);
	int old_space = READ_ONCE(msk->old_wspace);
	struct mptcp_subflow_context *subflow;
	struct sock *sk = (struct sock *)msk;
	bool cleanup;
	int space =  __mptcp_space(sk);
	bool cleanup, rx_empty;

	/* this is a simple superset of what tcp_cleanup_rbuf() implements
	 * so that we don't have to acquire the ssk socket lock most of the time
	 * to do actually nothing
	 */
	cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
	if (!cleanup)
		return;
	cleanup = (space > 0) && (space >= (old_space << 1));
	rx_empty = !atomic_read(&sk->sk_rmem_alloc);

	/* if the hinted ssk is still active, try to use it */
	if (likely(ack_hint)) {
	mptcp_for_each_subflow(msk, subflow) {
		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);

			if (ack_hint == ssk && mptcp_subflow_cleanup_rbuf(ssk))
				return;
		if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty))
			mptcp_subflow_cleanup_rbuf(ssk);
	}
}

	/* otherwise pick the first active subflow */
	mptcp_for_each_subflow(msk, subflow)
		if (mptcp_subflow_cleanup_rbuf(mptcp_subflow_tcp_sock(subflow)))
			return;
}

static bool mptcp_check_data_fin(struct sock *sk)
{
	struct mptcp_sock *msk = mptcp_sk(sk);
@@ -629,7 +626,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
			break;
		}
	} while (more_data_avail);
	WRITE_ONCE(msk->ack_hint, ssk);

	*bytes += moved;
	return done;
@@ -1910,7 +1906,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
		__mptcp_update_rmem(sk);
		done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
		mptcp_data_unlock(sk);
		tcp_cleanup_rbuf(ssk, moved);

		if (unlikely(ssk->sk_err))
			__mptcp_error_report(sk);
@@ -1926,7 +1921,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
		ret |= __mptcp_ofo_queue(msk);
		__mptcp_splice_receive_queue(sk);
		mptcp_data_unlock(sk);
		mptcp_cleanup_rbuf(msk);
	}
	if (ret)
		mptcp_check_data_fin((struct sock *)msk);
@@ -2175,9 +2169,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
	if (ssk == msk->last_snd)
		msk->last_snd = NULL;

	if (ssk == msk->ack_hint)
		msk->ack_hint = NULL;

	if (ssk == msk->first)
		msk->first = NULL;

@@ -2392,7 +2383,6 @@ static int __mptcp_init_sock(struct sock *sk)
	msk->rmem_released = 0;
	msk->tx_pending_data = 0;

	msk->ack_hint = NULL;
	msk->first = NULL;
	inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
	WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
+0 −1
Original line number Diff line number Diff line
@@ -243,7 +243,6 @@ struct mptcp_sock {
	bool		use_64bit_ack; /* Set when we received a 64-bit DSN */
	bool		csum_enabled;
	spinlock_t	join_list_lock;
	struct sock	*ack_hint;
	struct work_struct work;
	struct sk_buff  *ooo_last_skb;
	struct rb_root  out_of_order_queue;