Commit 98e95872 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mptcp-expose-more-info-and-small-improvements'

Matthieu Baerts says:

====================
mptcp: expose more info and small improvements

Patch 1-3/9 track and expose some aggregated data counters at the MPTCP
level: the number of retransmissions and the bytes that have been
transferred. The first patch prepares the work by moving where snd_una
is updated for fallback sockets while the last patch adds some tests to
cover the new code.

Patch 4-6/9 introduce a new getsockopt for SOL_MPTCP: MPTCP_FULL_INFO.
This new socket option allows to combine info from MPTCP_INFO,
MPTCP_TCPINFO and MPTCP_SUBFLOW_ADDRS socket options into one. It can be
needed to have all info in one because the path-manager can close and
re-create subflows between getsockopt() and fooling the accounting. The
first patch introduces a unique subflow ID to easily detect when
subflows are being re-created with the same 5-tuple while the last patch
adds some tests to cover the new code.

Please note that patch 5/9 ("mptcp: introduce MPTCP_FULL_INFO getsockopt")
can reveal a bug that were there for a bit of time, see [1]. A fix has
recently been fixed to netdev for the -net tree: "mptcp: ensure listener
is unhashed before updating the sk status", see [2]. There is no
conflicts between the two patches but it might be better to apply this
series after the one for -net and after having merged "net" into
"net-next".

Patch 7/9 is similar to commit 47867f0a ("selftests: mptcp: join:
skip check if MIB counter not supported") recently applied in the -net
tree but here it adapts the new code that is only in net-next (and it
fixes a merge conflict resolution which didn't have any impact).

Patch 8 and 9/9 are two simple refactoring. One to consolidate the
transition to TCP_CLOSE in mptcp_do_fastclose() and avoid duplicated
code. The other one reduces the scope of an argument passed to
mptcp_pm_alloc_anno_list() function.

Link: https://github.com/multipath-tcp/mptcp_net-next/issues/407 [1]
Link: https://lore.kernel.org/netdev/20230620-upstream-net-20230620-misc-fixes-for-v6-4-v1-0-f36aa5eae8b9@tessares.net/ [2]
====================

Link: https://lore.kernel.org/r/20230620-upstream-net-next-20230620-mptcp-expose-more-info-and-misc-v1-0-62b9444bfd48@tessares.net


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 5dfbbaa2 528cb5f2
Loading
Loading
Loading
Loading
+29 −0
Original line number Diff line number Diff line
@@ -123,6 +123,11 @@ struct mptcp_info {
	__u8	mptcpi_local_addr_used;
	__u8	mptcpi_local_addr_max;
	__u8	mptcpi_csum_enabled;
	__u32	mptcpi_retransmits;
	__u64	mptcpi_bytes_retrans;
	__u64	mptcpi_bytes_sent;
	__u64	mptcpi_bytes_received;
	__u64	mptcpi_bytes_acked;
};

/*
@@ -244,9 +249,33 @@ struct mptcp_subflow_addrs {
	};
};

struct mptcp_subflow_info {
	__u32				id;
	struct mptcp_subflow_addrs	addrs;
};

struct mptcp_full_info {
	__u32		size_tcpinfo_kernel;	/* must be 0, set by kernel */
	__u32		size_tcpinfo_user;
	__u32		size_sfinfo_kernel;	/* must be 0, set by kernel */
	__u32		size_sfinfo_user;
	__u32		num_subflows;		/* must be 0, set by kernel (real subflow count) */
	__u32		size_arrays_user;	/* max subflows that userspace is interested in;
						 * the buffers at subflow_info/tcp_info
						 * are respectively at least:
						 *  size_arrays * size_sfinfo_user
						 *  size_arrays * size_tcpinfo_user
						 * bytes wide
						 */
	__aligned_u64		subflow_info;
	__aligned_u64		tcp_info;
	struct mptcp_info	mptcp_info;
};

/* MPTCP socket options */
#define MPTCP_INFO		1
#define MPTCP_TCPINFO		2
#define MPTCP_SUBFLOW_ADDRS	3
#define MPTCP_FULL_INFO		4

#endif /* _UAPI_MPTCP_H */
+13 −1
Original line number Diff line number Diff line
@@ -1026,6 +1026,12 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq)
	return cur_seq;
}

static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
{
	msk->bytes_acked += new_snd_una - msk->snd_una;
	msk->snd_una = new_snd_una;
}

static void ack_update_msk(struct mptcp_sock *msk,
			   struct sock *ssk,
			   struct mptcp_options_received *mp_opt)
@@ -1057,7 +1063,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
		__mptcp_check_push(sk, ssk);

	if (after64(new_snd_una, old_snd_una)) {
		msk->snd_una = new_snd_una;
		__mptcp_snd_una_update(msk, new_snd_una);
		__mptcp_data_acked(sk);
	}
	mptcp_data_unlock(sk);
@@ -1119,6 +1125,12 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
		mptcp_data_lock(subflow->conn);
		if (sk_stream_memory_free(sk))
			__mptcp_check_push(subflow->conn, sk);

		/* on fallback we just need to ignore the msk-level snd_una, as
		 * this is really plain TCP
		 */
		__mptcp_snd_una_update(msk, READ_ONCE(msk->snd_nxt));

		__mptcp_data_acked(subflow->conn);
		mptcp_data_unlock(subflow->conn);
		return true;
+4 −4
Original line number Diff line number Diff line
@@ -341,7 +341,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
}

bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
			      const struct mptcp_pm_addr_entry *entry)
			      const struct mptcp_addr_info *addr)
{
	struct mptcp_pm_add_entry *add_entry = NULL;
	struct sock *sk = (struct sock *)msk;
@@ -349,7 +349,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,

	lockdep_assert_held(&msk->pm.lock);

	add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr);
	add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr);

	if (add_entry) {
		if (mptcp_pm_is_kernel(msk))
@@ -366,7 +366,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,

	list_add(&add_entry->list, &msk->pm.anno_list);

	add_entry->addr = entry->addr;
	add_entry->addr = *addr;
	add_entry->sock = msk;
	add_entry->retrans_times = 0;

@@ -576,7 +576,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
			return;

		if (local) {
			if (mptcp_pm_alloc_anno_list(msk, local)) {
			if (mptcp_pm_alloc_anno_list(msk, &local->addr)) {
				__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
				msk->pm.add_addr_signaled++;
				mptcp_pm_announce_addr(msk, &local->addr, false);
+1 −1
Original line number Diff line number Diff line
@@ -193,7 +193,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
	lock_sock((struct sock *)msk);
	spin_lock_bh(&msk->pm.lock);

	if (mptcp_pm_alloc_anno_list(msk, &addr_val)) {
	if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) {
		msk->pm.add_addr_signaled++;
		mptcp_pm_announce_addr(msk, &addr_val.addr, false);
		mptcp_pm_nl_addr_send_ack(msk);
+19 −12
Original line number Diff line number Diff line
@@ -96,6 +96,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
	list_add(&subflow->node, &msk->conn_list);
	sock_hold(ssock->sk);
	subflow->request_mptcp = 1;
	subflow->subflow_id = msk->subflow_id++;

	/* This is the first subflow, always with id 0 */
	subflow->local_id_valid = 1;
@@ -377,6 +378,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,

	if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
		/* in sequence */
		msk->bytes_received += copy_len;
		WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len);
		tail = skb_peek_tail(&sk->sk_receive_queue);
		if (tail && mptcp_try_coalesce(sk, tail, skb))
@@ -760,6 +762,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
			MPTCP_SKB_CB(skb)->map_seq += delta;
			__skb_queue_tail(&sk->sk_receive_queue, skb);
		}
		msk->bytes_received += end_seq - msk->ack_seq;
		msk->ack_seq = end_seq;
		moved = true;
	}
@@ -845,6 +848,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
	if (sk->sk_socket && !ssk->sk_socket)
		mptcp_sock_graft(ssk, sk->sk_socket);

	mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
	mptcp_sockopt_sync_locked(msk, ssk);
	mptcp_subflow_joined(msk, ssk);
	return true;
@@ -1004,12 +1008,6 @@ static void __mptcp_clean_una(struct sock *sk)
	struct mptcp_data_frag *dtmp, *dfrag;
	u64 snd_una;

	/* on fallback we just need to ignore snd_una, as this is really
	 * plain TCP
	 */
	if (__mptcp_check_fallback(msk))
		msk->snd_una = READ_ONCE(msk->snd_nxt);

	snd_una = msk->snd_una;
	list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) {
		if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
@@ -1537,9 +1535,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
	 * that has been handed to the subflow for transmission
	 * and skip update in case it was old dfrag.
	 */
	if (likely(after64(snd_nxt_new, msk->snd_nxt)))
	if (likely(after64(snd_nxt_new, msk->snd_nxt))) {
		msk->bytes_sent += snd_nxt_new - msk->snd_nxt;
		msk->snd_nxt = snd_nxt_new;
	}
}

void mptcp_check_and_set_pending(struct sock *sk)
{
@@ -2596,6 +2596,7 @@ static void __mptcp_retrans(struct sock *sk)
	}
	if (copied) {
		dfrag->already_sent = max(dfrag->already_sent, info.sent);
		msk->bytes_retrans += copied;
		tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
			 info.size_goal);
		WRITE_ONCE(msk->allow_infinite_fallback, false);
@@ -2654,6 +2655,7 @@ static void mptcp_do_fastclose(struct sock *sk)
	struct mptcp_subflow_context *subflow, *tmp;
	struct mptcp_sock *msk = mptcp_sk(sk);

	inet_sk_state_store(sk, TCP_CLOSE);
	mptcp_for_each_subflow_safe(msk, subflow, tmp)
		__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
				  subflow, MPTCP_CF_FASTCLOSE);
@@ -2691,10 +2693,9 @@ static void mptcp_worker(struct work_struct *work)
	 * even if it is orphaned and in FIN_WAIT2 state
	 */
	if (sock_flag(sk, SOCK_DEAD)) {
		if (mptcp_should_close(sk)) {
			inet_sk_state_store(sk, TCP_CLOSE);
		if (mptcp_should_close(sk))
			mptcp_do_fastclose(sk);
		}

		if (sk->sk_state == TCP_CLOSE) {
			__mptcp_destroy_sock(sk);
			goto unlock;
@@ -2733,6 +2734,7 @@ static int __mptcp_init_sock(struct sock *sk)
	WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
	WRITE_ONCE(msk->allow_infinite_fallback, true);
	msk->recovery = false;
	msk->subflow_id = 1;

	mptcp_pm_data_init(msk);

@@ -2936,7 +2938,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
void __mptcp_unaccepted_force_close(struct sock *sk)
{
	sock_set_flag(sk, SOCK_DEAD);
	inet_sk_state_store(sk, TCP_CLOSE);
	mptcp_do_fastclose(sk);
	__mptcp_destroy_sock(sk);
}
@@ -2978,7 +2979,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
		/* If the msk has read data, or the caller explicitly ask it,
		 * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose
		 */
		inet_sk_state_store(sk, TCP_CLOSE);
		mptcp_do_fastclose(sk);
		timeout = 0;
	} else if (mptcp_close_state(sk)) {
@@ -3108,6 +3108,10 @@ static int mptcp_disconnect(struct sock *sk, int flags)
	WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
	mptcp_pm_data_reset(msk);
	mptcp_ca_reset(sk);
	msk->bytes_acked = 0;
	msk->bytes_received = 0;
	msk->bytes_sent = 0;
	msk->bytes_retrans = 0;

	WRITE_ONCE(sk->sk_shutdown, 0);
	sk_error_report(sk);
@@ -3157,6 +3161,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
	msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
	msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;

	/* passive msk is created after the first/MPC subflow */
	msk->subflow_id = 2;

	sock_reset_flag(nsk, SOCK_RCU_FREE);
	security_inet_csk_clone(nsk, req);

Loading