Commit e3859603 authored by Paolo Abeni's avatar Paolo Abeni Committed by David S. Miller
Browse files

mptcp: better msk receive window updates



Move mptcp_cleanup_rbuf() related checks inside the mentioned
helper and extend them to mirror TCP checks more closely.

Additionally drop the 'rmem_pending' hack, since commit 87952603
("mptcp: protect the rx path with the msk socket spinlock") we
can use instead 'rmem_released'.

Fixes: ea4ca586 ("mptcp: refine MPTCP-level ack scheduling")
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarMat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent d8b59efa
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -498,8 +498,8 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
	u64 snd_data_fin_enable, ack_seq;
	unsigned int dss_size = 0;
	u64 snd_data_fin_enable;
	struct mptcp_ext *mpext;
	unsigned int ack_size;
	bool ret = false;
@@ -531,13 +531,14 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
		return ret;
	}

	ack_seq = READ_ONCE(msk->ack_seq);
	if (READ_ONCE(msk->use_64bit_ack)) {
		ack_size = TCPOLEN_MPTCP_DSS_ACK64;
		opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq);
		opts->ext_copy.data_ack = ack_seq;
		opts->ext_copy.ack64 = 1;
	} else {
		ack_size = TCPOLEN_MPTCP_DSS_ACK32;
		opts->ext_copy.data_ack32 = (uint32_t)READ_ONCE(msk->ack_seq);
		opts->ext_copy.data_ack32 = (uint32_t)ack_seq;
		opts->ext_copy.ack64 = 0;
	}
	opts->ext_copy.use_ack = 1;
+22 −16
Original line number Diff line number Diff line
@@ -457,7 +457,18 @@ static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
{
	struct sock *ack_hint = READ_ONCE(msk->ack_hint);
	int old_space = READ_ONCE(msk->old_wspace);
	struct mptcp_subflow_context *subflow;
	struct sock *sk = (struct sock *)msk;
	bool cleanup;

	/* this is a simple superset of what tcp_cleanup_rbuf() implements
	 * so that we don't have to acquire the ssk socket lock most of the time
	 * to do actually nothing
	 */
	cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
	if (!cleanup)
		return;

	/* if the hinted ssk is still active, try to use it */
	if (likely(ack_hint)) {
@@ -1865,7 +1876,7 @@ static void __mptcp_splice_receive_queue(struct sock *sk)
	skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue);
}

static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
static bool __mptcp_move_skbs(struct mptcp_sock *msk)
{
	struct sock *sk = (struct sock *)msk;
	unsigned int moved = 0;
@@ -1885,13 +1896,10 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)

		slowpath = lock_sock_fast(ssk);
		mptcp_data_lock(sk);
		__mptcp_update_rmem(sk);
		done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
		mptcp_data_unlock(sk);
		if (moved && rcv) {
			WRITE_ONCE(msk->rmem_pending, min(rcv, moved));
			tcp_cleanup_rbuf(ssk, 1);
			WRITE_ONCE(msk->rmem_pending, 0);
		}
		tcp_cleanup_rbuf(ssk, moved);
		unlock_sock_fast(ssk, slowpath);
	} while (!done);

@@ -1904,6 +1912,7 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
		ret |= __mptcp_ofo_queue(msk);
		__mptcp_splice_receive_queue(sk);
		mptcp_data_unlock(sk);
		mptcp_cleanup_rbuf(msk);
	}
	if (ret)
		mptcp_check_data_fin((struct sock *)msk);
@@ -1933,7 +1942,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);

	while (copied < len) {
		int bytes_read, old_space;
		int bytes_read;

		bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied);
		if (unlikely(bytes_read < 0)) {
@@ -1944,15 +1953,12 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,

		copied += bytes_read;

		if (skb_queue_empty(&msk->receive_queue) &&
		    __mptcp_move_skbs(msk, len - copied))
			continue;

		/* be sure to advertise window change */
		old_space = READ_ONCE(msk->old_wspace);
		if ((tcp_space(sk) - old_space) >= old_space)
		mptcp_cleanup_rbuf(msk);

		if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk))
			continue;

		/* only the master socket status is relevant here. The exit
		 * conditions mirror closely tcp_recvmsg()
		 */
@@ -1979,7 +1985,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
				/* race breaker: the shutdown could be after the
				 * previous receive queue check
				 */
				if (__mptcp_move_skbs(msk, len - copied))
				if (__mptcp_move_skbs(msk))
					continue;
				break;
			}
@@ -2012,7 +2018,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
		/* .. race-breaker: ssk might have gotten new data
		 * after last __mptcp_move_skbs() returned false.
		 */
		if (unlikely(__mptcp_move_skbs(msk, 0)))
		if (unlikely(__mptcp_move_skbs(msk)))
			set_bit(MPTCP_DATA_READY, &msk->flags);
	} else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) {
		/* data to read but mptcp_wait_data() cleared DATA_READY */
+1 −2
Original line number Diff line number Diff line
@@ -234,7 +234,6 @@ struct mptcp_sock {
	u64		wnd_end;
	unsigned long	timer_ival;
	u32		token;
	int		rmem_pending;
	int		rmem_released;
	unsigned long	flags;
	bool		can_ack;
@@ -293,7 +292,7 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)

static inline int __mptcp_space(const struct sock *sk)
{
	return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_pending);
	return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released);
}

static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)