Commit 0fa1b378 authored by Geliang Tang's avatar Geliang Tang Committed by Jakub Kicinski
Browse files

mptcp: use get_send wrapper



This patch adds the multiple subflows support for __mptcp_push_pending
and __mptcp_subflow_push_pending. Use get_send() wrapper instead of
mptcp_subflow_get_send() in them.

Check the subflow scheduled flags to test which subflow or subflows are
picked by the scheduler, use them to send data.

Move msk_owned_by_me() and fallback checks into get_send() wrapper from
mptcp_subflow_get_send().

This commit allows the scheduler to set the subflow->scheduled bit in
multiple subflows, but it does not allow for sending redundant data.
Multiple scheduled subflows will send sequential data on each subflow.

Reviewed-by: default avatarMat Martineau <martineau@kernel.org>
Signed-off-by: default avatarGeliang Tang <geliang.tang@suse.com>
Signed-off-by: default avatarMat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20230821-upstream-net-next-20230818-v1-8-0c860fb256a8@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 07336a87
Loading
Loading
Loading
Loading
+68 −45
Original line number Diff line number Diff line
@@ -1377,15 +1377,6 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
	u64 linger_time;
	long tout = 0;

	msk_owned_by_me(msk);

	if (__mptcp_check_fallback(msk)) {
		if (!msk->first)
			return NULL;
		return __tcp_can_send(msk->first) &&
		       sk_stream_memory_free(msk->first) ? msk->first : NULL;
	}

	/* pick the subflow with the lower wmem/wspace ratio */
	for (i = 0; i < SSK_MODE_MAX; ++i) {
		send_info[i].ssk = NULL;
@@ -1538,43 +1529,56 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
				.flags = flags,
	};
	bool do_check_data_fin = false;
	int push_count = 1;

	while (mptcp_send_head(sk)) {
	while (mptcp_send_head(sk) && (push_count > 0)) {
		struct mptcp_subflow_context *subflow;
		int ret = 0;

		prev_ssk = ssk;
		ssk = mptcp_subflow_get_send(msk);
		if (mptcp_sched_get_send(msk))
			break;

		push_count = 0;

		mptcp_for_each_subflow(msk, subflow) {
			if (READ_ONCE(subflow->scheduled)) {
				mptcp_subflow_set_scheduled(subflow, false);

				prev_ssk = ssk;
				ssk = mptcp_subflow_tcp_sock(subflow);
				if (ssk != prev_ssk) {
					/* First check. If the ssk has changed since
					 * the last round, release prev_ssk
					 */
		if (ssk != prev_ssk && prev_ssk)
					if (prev_ssk)
						mptcp_push_release(prev_ssk, &info);
		if (!ssk)
			goto out;

					/* Need to lock the new subflow only if different
					 * from the previous one, otherwise we are still
					 * helding the relevant lock
					 */
		if (ssk != prev_ssk)
					lock_sock(ssk);
				}

				push_count++;

				ret = __subflow_push_pending(sk, ssk, &info);
				if (ret <= 0) {
			if (ret == -EAGAIN)
					if (ret != -EAGAIN ||
					    (1 << ssk->sk_state) &
					     (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSE))
						push_count--;
					continue;
			mptcp_push_release(ssk, &info);
			goto out;
				}
				do_check_data_fin = true;
			}
		}
	}

	/* at this point we held the socket lock for the last subflow we used */
	if (ssk)
		mptcp_push_release(ssk, &info);

out:
	/* ensure the rtx timer is running */
	if (!mptcp_timer_pending(sk))
		mptcp_reset_timer(sk);
@@ -1588,30 +1592,49 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
	struct mptcp_sendmsg_info info = {
		.data_lock_held = true,
	};
	bool keep_pushing = true;
	struct sock *xmit_ssk;
	int copied = 0;

	info.flags = 0;
	while (mptcp_send_head(sk)) {
	while (mptcp_send_head(sk) && keep_pushing) {
		struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
		int ret = 0;

		/* check for a different subflow usage only after
		 * spooling the first chunk of data
		 */
		xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk);
		if (!xmit_ssk)
			goto out;
		if (xmit_ssk != ssk) {
			mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
					       MPTCP_DELEGATE_SEND);
			goto out;
		}

		if (first) {
			mptcp_subflow_set_scheduled(subflow, false);
			ret = __subflow_push_pending(sk, ssk, &info);
			first = false;
			if (ret <= 0)
				break;
			copied += ret;
			continue;
		}

		if (mptcp_sched_get_send(msk))
			goto out;

		if (READ_ONCE(subflow->scheduled)) {
			mptcp_subflow_set_scheduled(subflow, false);
			ret = __subflow_push_pending(sk, ssk, &info);
			if (ret <= 0)
				keep_pushing = false;
			copied += ret;
		}

		mptcp_for_each_subflow(msk, subflow) {
			if (READ_ONCE(subflow->scheduled)) {
				xmit_ssk = mptcp_subflow_tcp_sock(subflow);
				if (xmit_ssk != ssk) {
					mptcp_subflow_delegate(subflow,
							       MPTCP_DELEGATE_SEND);
					keep_pushing = false;
				}
			}
		}
	}

out:
+13 −0
Original line number Diff line number Diff line
@@ -99,6 +99,19 @@ int mptcp_sched_get_send(struct mptcp_sock *msk)
	struct mptcp_subflow_context *subflow;
	struct mptcp_sched_data data;

	msk_owned_by_me(msk);

	/* the following check is moved out of mptcp_subflow_get_send */
	if (__mptcp_check_fallback(msk)) {
		if (msk->first &&
		    __tcp_can_send(msk->first) &&
		    sk_stream_memory_free(msk->first)) {
			mptcp_subflow_set_scheduled(mptcp_subflow_ctx(msk->first), true);
			return 0;
		}
		return -EINVAL;
	}

	mptcp_for_each_subflow(msk, subflow) {
		if (READ_ONCE(subflow->scheduled))
			return 0;