Commit 5f3e915c authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mptcp-avoid-workqueue-usage-for-data'

Paolo Abeni says:

====================
mptcp: avoid workqueue usage for data

The current locking schema used to protect the MPTCP data-path
requires the usage of the MPTCP workqueue to process the incoming
data, depending on trylock result.

The above poses scalability limits and introduces random delays
in MPTCP-level acks.

With this series we use a single spinlock to protect the MPTCP
data-path, removing the need for workqueue and delayed ack usage.

This additionally reduces the number of atomic operations required
per packet and cleans-up considerably the poll/wake-up code.
====================

Link: https://lore.kernel.org/r/cover.1606413118.git.pabeni@redhat.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents be572424 6e628cd3
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1590,6 +1590,7 @@ static inline void lock_sock(struct sock *sk)
	lock_sock_nested(sk, 0);
}

void __lock_sock(struct sock *sk);
void __release_sock(struct sock *sk);
void release_sock(struct sock *sk);

+1 −1
Original line number Diff line number Diff line
@@ -2486,7 +2486,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
}
EXPORT_SYMBOL(sk_page_frag_refill);

static void __lock_sock(struct sock *sk)
void __lock_sock(struct sock *sk)
	__releases(&sk->sk_lock.slock)
	__acquires(&sk->sk_lock.slock)
{
+1 −1
Original line number Diff line number Diff line
@@ -140,7 +140,7 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
	info->mptcpi_flags = flags;
	info->mptcpi_token = READ_ONCE(msk->token);
	info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
	info->mptcpi_snd_una = atomic64_read(&msk->snd_una);
	info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
	info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
	unlock_sock_fast(sk, slow);
}
+25 −22
Original line number Diff line number Diff line
@@ -830,18 +830,20 @@ static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit)
}

static void ack_update_msk(struct mptcp_sock *msk,
			   const struct sock *ssk,
			   struct sock *ssk,
			   struct mptcp_options_received *mp_opt)
{
	u64 new_snd_una, snd_una, old_snd_una = atomic64_read(&msk->snd_una);
	u64 new_wnd_end, wnd_end, old_wnd_end = atomic64_read(&msk->wnd_end);
	u64 snd_nxt = READ_ONCE(msk->snd_nxt);
	u64 new_wnd_end, new_snd_una, snd_nxt = READ_ONCE(msk->snd_nxt);
	struct sock *sk = (struct sock *)msk;
	u64 old_snd_una;

	mptcp_data_lock(sk);

	/* avoid ack expansion on update conflict, to reduce the risk of
	 * wrongly expanding to a future ack sequence number, which is way
	 * more dangerous than missing an ack
	 */
	old_snd_una = msk->snd_una;
	new_snd_una = expand_ack(old_snd_una, mp_opt->data_ack, mp_opt->ack64);

	/* ACK for data not even sent yet? Ignore. */
@@ -850,26 +852,16 @@ static void ack_update_msk(struct mptcp_sock *msk,

	new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;

	while (after64(new_wnd_end, old_wnd_end)) {
		wnd_end = old_wnd_end;
		old_wnd_end = atomic64_cmpxchg(&msk->wnd_end, wnd_end,
					       new_wnd_end);
		if (old_wnd_end == wnd_end) {
			if (mptcp_send_head(sk))
				mptcp_schedule_work(sk);
			break;
		}
	if (after64(new_wnd_end, msk->wnd_end)) {
		msk->wnd_end = new_wnd_end;
		__mptcp_wnd_updated(sk, ssk);
	}

	while (after64(new_snd_una, old_snd_una)) {
		snd_una = old_snd_una;
		old_snd_una = atomic64_cmpxchg(&msk->snd_una, snd_una,
					       new_snd_una);
		if (old_snd_una == snd_una) {
			mptcp_data_acked(sk);
			break;
		}
	if (after64(new_snd_una, old_snd_una)) {
		msk->snd_una = new_snd_una;
		__mptcp_data_acked(sk);
	}
	mptcp_data_unlock(sk);
}

bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit)
@@ -922,8 +914,19 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
	struct mptcp_options_received mp_opt;
	struct mptcp_ext *mpext;

	if (__mptcp_check_fallback(msk))
	if (__mptcp_check_fallback(msk)) {
		/* Keep it simple and unconditionally trigger send data cleanup and
		 * pending queue spooling. We will need to acquire the data lock
		 * for more accurate checks, and once the lock is acquired, such
		 * helpers are cheap.
		 */
		mptcp_data_lock(subflow->conn);
		if (mptcp_send_head(subflow->conn))
			__mptcp_wnd_updated(subflow->conn, sk);
		__mptcp_data_acked(subflow->conn);
		mptcp_data_unlock(subflow->conn);
		return;
	}

	mptcp_get_options(skb, &mp_opt);
	if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
+539 −194

File changed.

Preview size limit exceeded, changes collapsed.

Loading