Commit c5b4297d authored by Geliang Tang's avatar Geliang Tang Committed by Jakub Kicinski
Browse files

mptcp: refactor push_pending logic



To support redundant package schedulers more easily, this patch refactors
__mptcp_push_pending() logic from:

For each dfrag:
	While sends succeed:
		Call the scheduler (selects subflow and msk->snd_burst)
		Update subflow locks (push/release/acquire as needed)
		Send the dfrag data with mptcp_sendmsg_frag()
		Update already_sent, snd_nxt, snd_burst
	Update msk->first_pending
Push/release on final subflow

->

While first_pending isn't empty:
	Call the scheduler (selects subflow and msk->snd_burst)
	Update subflow locks (push/release/acquire as needed)
	For each pending dfrag:
		While sends succeed:
			Send the dfrag data with mptcp_sendmsg_frag()
			Update already_sent, snd_nxt, snd_burst
		Update msk->first_pending
		Break if required by msk->snd_burst / etc
	Push/release on final subflow

Refactors __mptcp_subflow_push_pending logic from:

For each dfrag:
	While sends succeed:
		Call the scheduler (selects subflow and msk->snd_burst)
		Send the dfrag data with mptcp_subflow_delegate(), break
		Send the dfrag data with mptcp_sendmsg_frag()
		Update dfrag->already_sent, msk->snd_nxt, msk->snd_burst
	Update msk->first_pending

->

While first_pending isn't empty:
	Call the scheduler (selects subflow and msk->snd_burst)
	Send the dfrag data with mptcp_subflow_delegate(), break
	Send the dfrag data with mptcp_sendmsg_frag()
	For each pending dfrag:
		While sends succeed:
			Send the dfrag data with mptcp_sendmsg_frag()
			Update already_sent, snd_nxt, snd_burst
		Update msk->first_pending
		Break if required by msk->snd_burst / etc

Move the duplicate code from __mptcp_push_pending() and
__mptcp_subflow_push_pending() into a new helper function, named
__subflow_push_pending(). Simplify __mptcp_push_pending() and
__mptcp_subflow_push_pending() by invoking this helper.

Also move the burst check conditions out of the function
mptcp_subflow_get_send(), check them in __subflow_push_pending() in
the inner "for each pending dfrag" loop.

Reviewed-by: default avatarMat Martineau <martineau@kernel.org>
Signed-off-by: default avatarGeliang Tang <geliang.tang@suse.com>
Signed-off-by: default avatarMat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20230821-upstream-net-next-20230818-v1-1-0c860fb256a8@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 98173633
Loading
Loading
Loading
Loading
+81 −72
Original line number Diff line number Diff line
@@ -1386,14 +1386,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
		       sk_stream_memory_free(msk->first) ? msk->first : NULL;
	}

	/* re-use last subflow, if the burst allow that */
	if (msk->last_snd && msk->snd_burst > 0 &&
	    sk_stream_memory_free(msk->last_snd) &&
	    mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) {
		mptcp_set_timeout(sk);
		return msk->last_snd;
	}

	/* pick the subflow with the lower wmem/wspace ratio */
	for (i = 0; i < SSK_MODE_MAX; ++i) {
		send_info[i].ssk = NULL;
@@ -1499,6 +1491,48 @@ void mptcp_check_and_set_pending(struct sock *sk)
		mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
}

static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
				  struct mptcp_sendmsg_info *info)
{
	struct mptcp_sock *msk = mptcp_sk(sk);
	struct mptcp_data_frag *dfrag;
	int len, copied = 0, err = 0;

	while ((dfrag = mptcp_send_head(sk))) {
		info->sent = dfrag->already_sent;
		info->limit = dfrag->data_len;
		len = dfrag->data_len - dfrag->already_sent;
		while (len > 0) {
			int ret = 0;

			ret = mptcp_sendmsg_frag(sk, ssk, dfrag, info);
			if (ret <= 0) {
				err = copied ? : ret;
				goto out;
			}

			info->sent += ret;
			copied += ret;
			len -= ret;

			mptcp_update_post_push(msk, dfrag, ret);
		}
		WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));

		if (msk->snd_burst <= 0 ||
		    !sk_stream_memory_free(ssk) ||
		    !mptcp_subflow_active(mptcp_subflow_ctx(ssk))) {
			err = copied;
			goto out;
		}
		mptcp_set_timeout(sk);
	}
	err = copied;

out:
	return err;
}

void __mptcp_push_pending(struct sock *sk, unsigned int flags)
{
	struct sock *prev_ssk = NULL, *ssk = NULL;
@@ -1507,14 +1541,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
				.flags = flags,
	};
	bool do_check_data_fin = false;
	struct mptcp_data_frag *dfrag;
	int len;

	while ((dfrag = mptcp_send_head(sk))) {
		info.sent = dfrag->already_sent;
		info.limit = dfrag->data_len;
		len = dfrag->data_len - dfrag->already_sent;
		while (len > 0) {
	while (mptcp_send_head(sk)) {
		int ret = 0;

		prev_ssk = ssk;
@@ -1535,21 +1563,14 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
		if (ssk != prev_ssk)
			lock_sock(ssk);

			ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
		ret = __subflow_push_pending(sk, ssk, &info);
		if (ret <= 0) {
			if (ret == -EAGAIN)
				continue;
			mptcp_push_release(ssk, &info);
			goto out;
		}

		do_check_data_fin = true;
			info.sent += ret;
			len -= ret;

			mptcp_update_post_push(msk, dfrag, ret);
		}
		WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
	}

	/* at this point we held the socket lock for the last subflow we used */
@@ -1570,16 +1591,11 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
	struct mptcp_sendmsg_info info = {
		.data_lock_held = true,
	};
	struct mptcp_data_frag *dfrag;
	struct sock *xmit_ssk;
	int len, copied = 0;
	int copied = 0;

	info.flags = 0;
	while ((dfrag = mptcp_send_head(sk))) {
		info.sent = dfrag->already_sent;
		info.limit = dfrag->data_len;
		len = dfrag->data_len - dfrag->already_sent;
		while (len > 0) {
	while (mptcp_send_head(sk)) {
		int ret = 0;

		/* check for a different subflow usage only after
@@ -1594,18 +1610,11 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
			goto out;
		}

			ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
		ret = __subflow_push_pending(sk, ssk, &info);
		first = false;
		if (ret <= 0)
				goto out;

			info.sent += ret;
			break;
		copied += ret;
			len -= ret;
			first = false;

			mptcp_update_post_push(msk, dfrag, ret);
		}
		WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
	}

out: