Commit 6fc5f5bc authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mptcp-cleanups'

Matthieu Baerts says:

====================
mptcp: a couple of cleanups and improvements

Patch 1 removes an unneeded address copy in subflow_syn_recv_sock().

Patch 2 simplifies subflow_syn_recv_sock() to postpone some actions and
to avoid a bunch of conditionals.

Patch 3 stops reporting limits that are not taken into account when the
userspace PM is used.

Patch 4 adds a new test to validate that the 'subflows' field reported
by the kernel is correct. Such info can be retrieved via Netlink (e.g.
with ss) or getsockopt(SOL_MPTCP, MPTCP_INFO).

---
Changes in v2:
- Patch 3/4's commit message has been updated to use the correct SHA
- Rebased on latest net-next
- Link to v1: https://lore.kernel.org/r/20230324-upstream-net-next-20230324-misc-features-v1-0-5a29154592bd@tessares.net


====================

Signed-off-by: default avatarMatthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9380d891 9095ce97
Loading
Loading
Loading
Loading
+13 −7
Original line number Diff line number Diff line
@@ -885,7 +885,6 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
{
	u32 flags = 0;
	u8 val;

	memset(info, 0, sizeof(*info));

@@ -893,12 +892,19 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
	info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
	info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
	info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
	info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk);
	val = mptcp_pm_get_add_addr_signal_max(msk);
	info->mptcpi_add_addr_signal_max = val;
	val = mptcp_pm_get_add_addr_accept_max(msk);
	info->mptcpi_add_addr_accepted_max = val;
	info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk);

	/* The following limits only make sense for the in-kernel PM */
	if (mptcp_pm_is_kernel(msk)) {
		info->mptcpi_subflows_max =
			mptcp_pm_get_subflows_max(msk);
		info->mptcpi_add_addr_signal_max =
			mptcp_pm_get_add_addr_signal_max(msk);
		info->mptcpi_add_addr_accepted_max =
			mptcp_pm_get_add_addr_accept_max(msk);
		info->mptcpi_local_addr_max =
			mptcp_pm_get_local_addr_max(msk);
	}

	if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags))
		flags |= MPTCP_INFO_FLAG_FALLBACK;
	if (READ_ONCE(msk->can_ack))
+13 −30
Original line number Diff line number Diff line
@@ -696,14 +696,6 @@ static bool subflow_hmac_valid(const struct request_sock *req,
	return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
}

static void mptcp_force_close(struct sock *sk)
{
	/* the msk is not yet exposed to user-space, and refcount is 2 */
	inet_sk_state_store(sk, TCP_CLOSE);
	sk_common_release(sk);
	sock_put(sk);
}

static void subflow_ulp_fallback(struct sock *sk,
				 struct mptcp_subflow_context *old_ctx)
{
@@ -755,7 +747,6 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
	struct mptcp_subflow_request_sock *subflow_req;
	struct mptcp_options_received mp_opt;
	bool fallback, fallback_is_fatal;
	struct sock *new_msk = NULL;
	struct mptcp_sock *owner;
	struct sock *child;

@@ -784,14 +775,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
		 * options.
		 */
		mptcp_get_options(skb, &mp_opt);
		if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
		if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC))
			fallback = true;
			goto create_child;
		}

		new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
		if (!new_msk)
			fallback = true;
	} else if (subflow_req->mp_join) {
		mptcp_get_options(skb, &mp_opt);
		if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) ||
@@ -820,23 +806,23 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
				subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP);
				goto dispose_child;
			}

			if (new_msk)
				mptcp_copy_inaddrs(new_msk, child);
			mptcp_subflow_drop_ctx(child);
			goto out;
			goto fallback;
		}

		/* ssk inherits options of listener sk */
		ctx->setsockopt_seq = listener->setsockopt_seq;

		if (ctx->mp_capable) {
			owner = mptcp_sk(new_msk);
			ctx->conn = mptcp_sk_clone(listener->conn, &mp_opt, req);
			if (!ctx->conn)
				goto fallback;

			owner = mptcp_sk(ctx->conn);

			/* this can't race with mptcp_close(), as the msk is
			 * not yet exposted to user-space
			 */
			inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
			inet_sk_state_store(ctx->conn, TCP_ESTABLISHED);

			/* record the newly created socket as the first msk
			 * subflow, but don't link it yet into conn_list
@@ -846,11 +832,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
			/* new mpc subflow takes ownership of the newly
			 * created mptcp socket
			 */
			mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq;
			owner->setsockopt_seq = ctx->setsockopt_seq;
			mptcp_pm_new_connection(owner, child, 1);
			mptcp_token_accept(subflow_req, owner);
			ctx->conn = new_msk;
			new_msk = NULL;

			/* set msk addresses early to ensure mptcp_pm_get_local_id()
			 * uses the correct data
@@ -900,11 +884,6 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
		}
	}

out:
	/* dispose of the left over mptcp master, if any */
	if (unlikely(new_msk))
		mptcp_force_close(new_msk);

	/* check for expected invariant - should never trigger, just help
	 * catching eariler subtle bugs
	 */
@@ -922,6 +901,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,

	/* The last child reference will be released by the caller */
	return child;

fallback:
	mptcp_subflow_drop_ctx(child);
	return child;
}

static struct inet_connection_sock_af_ops subflow_specific __ro_after_init;
+46 −1
Original line number Diff line number Diff line
@@ -1719,6 +1719,46 @@ chk_subflow_nr()
	fi
}

chk_mptcp_info()
{
	local nr_info=$1
	local info
	local cnt1
	local cnt2
	local dump_stats

	if [[ $nr_info = "subflows_"* ]]; then
		info="subflows"
		nr_info=${nr_info:9}
	else
		echo "[fail] unsupported argument: $nr_info"
		fail_test
		return 1
	fi

	printf "%-${nr_blank}s %-30s" " " "mptcp_info $info=$nr_info"

	cnt1=$(ss -N $ns1 -inmHM | grep "$info:" |
		sed -n 's/.*\('"$info"':\)\([[:digit:]]*\).*$/\2/p;q')
	[ -z "$cnt1" ] && cnt1=0
	cnt2=$(ss -N $ns2 -inmHM | grep "$info:" |
		sed -n 's/.*\('"$info"':\)\([[:digit:]]*\).*$/\2/p;q')
	[ -z "$cnt2" ] && cnt2=0
	if [ "$cnt1" != "$nr_info" ] || [ "$cnt2" != "$nr_info" ]; then
		echo "[fail] got $cnt1:$cnt2 $info expected $nr_info"
		fail_test
		dump_stats=1
	else
		echo "[ ok ]"
	fi

	if [ "$dump_stats" = 1 ]; then
		ss -N $ns1 -inmHM
		ss -N $ns2 -inmHM
		dump_stats
	fi
}

chk_link_usage()
{
	local ns=$1
@@ -3118,13 +3158,18 @@ endpoint_tests()
		run_tests $ns1 $ns2 10.0.1.1 4 0 0 speed_20 2>/dev/null &

		wait_mpj $ns2
		chk_subflow_nr needtitle "before delete" 2
		chk_mptcp_info subflows_1

		pm_nl_del_endpoint $ns2 2 10.0.2.2
		sleep 0.5
		chk_subflow_nr needtitle "after delete" 1
		chk_subflow_nr "" "after delete" 1
		chk_mptcp_info subflows_0

		pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
		wait_mpj $ns2
		chk_subflow_nr "" "after re-add" 2
		chk_mptcp_info subflows_1
		kill_tests_wait
	fi
}