Commit 7f0c940b authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mptcp-msg_fastopen-and-tfo-listener-side-support'

Matthieu Baerts says:

====================
mptcp: MSG_FASTOPEN and TFO listener side support

Before this series, only the initiator of a connection was able to combine
both TCP FastOpen and MPTCP when using TCP_FASTOPEN_CONNECT socket option.

These new patches here add (in theory) the full support of TFO with MPTCP,
which means:

 - MSG_FASTOPEN sendmsg flag support (patch 1/8)
 - TFO support for the listener side (patches 2-5/8)
 - TCP_FASTOPEN socket option (patch 6/8)
 - TCP_FASTOPEN_KEY socket option (patch 7/8)

To support TFO for the server side, a few preparation patches are needed
(patches 2 to 5/8). Some of them were inspired by a previous work from
Benjamin Hesmans.

Note that TFO support with MPTCP has been validated with selftests
(patch 8/8) but also with Packetdrill tests running with a modified
but still very WIP version supporting MPTCP. Both the modified tool
and the tests are available online:

  https://github.com/multipath-tcp/packetdrill/
====================

Link: https://lore.kernel.org/r/20221125222958.958636-1-matthieu.baerts@tessares.net


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents f2bb566f ca7ae891
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -2,7 +2,7 @@
obj-$(CONFIG_MPTCP) += mptcp.o

mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
	   mib.o pm_netlink.o sockopt.o pm_userspace.o
	   mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o

obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o

net/mptcp/fastopen.c

0 → 100644
+73 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/* MPTCP Fast Open Mechanism
 *
 * Copyright (c) 2021-2022, Dmytro SHYTYI
 */

#include "protocol.h"

void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
					      struct request_sock *req)
{
	struct sock *ssk = subflow->tcp_sock;
	struct sock *sk = subflow->conn;
	struct sk_buff *skb;
	struct tcp_sock *tp;

	tp = tcp_sk(ssk);

	subflow->is_mptfo = 1;

	skb = skb_peek(&ssk->sk_receive_queue);
	if (WARN_ON_ONCE(!skb))
		return;

	/* dequeue the skb from sk receive queue */
	__skb_unlink(skb, &ssk->sk_receive_queue);
	skb_ext_reset(skb);
	skb_orphan(skb);

	/* We copy the fastopen data, but that don't belong to the mptcp sequence
	 * space, need to offset it in the subflow sequence, see mptcp_subflow_get_map_offset()
	 */
	tp->copied_seq += skb->len;
	subflow->ssn_offset += skb->len;

	/* initialize a dummy sequence number, we will update it at MPC
	 * completion, if needed
	 */
	MPTCP_SKB_CB(skb)->map_seq = -skb->len;
	MPTCP_SKB_CB(skb)->end_seq = 0;
	MPTCP_SKB_CB(skb)->offset = 0;
	MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;

	mptcp_data_lock(sk);

	mptcp_set_owner_r(skb, sk);
	__skb_queue_tail(&sk->sk_receive_queue, skb);

	sk->sk_data_ready(sk);

	mptcp_data_unlock(sk);
}

void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
				   const struct mptcp_options_received *mp_opt)
{
	struct sock *sk = (struct sock *)msk;
	struct sk_buff *skb;

	mptcp_data_lock(sk);
	skb = skb_peek_tail(&sk->sk_receive_queue);
	if (skb) {
		WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
		pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx", sk,
			 MPTCP_SKB_CB(skb)->map_seq, MPTCP_SKB_CB(skb)->map_seq + msk->ack_seq,
			 MPTCP_SKB_CB(skb)->end_seq, MPTCP_SKB_CB(skb)->end_seq + msk->ack_seq);
		MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq;
		MPTCP_SKB_CB(skb)->end_seq += msk->ack_seq;
	}

	pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq);
	mptcp_data_unlock(sk);
}
+17 −8
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
{
	u8 subtype = *ptr >> 4;
	int expected_opsize;
	u16 subopt;
	u8 version;
	u8 flags;
	u8 i;
@@ -38,11 +39,15 @@ static void mptcp_parse_option(const struct sk_buff *skb,
				expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA;
			else
				expected_opsize = TCPOLEN_MPTCP_MPC_ACK;
			subopt = OPTION_MPTCP_MPC_ACK;
		} else {
			if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)
			if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK) {
				expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK;
			else
				subopt = OPTION_MPTCP_MPC_SYNACK;
			} else {
				expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
				subopt = OPTION_MPTCP_MPC_SYN;
			}
		}

		/* Cfr RFC 8684 Section 3.3.0:
@@ -85,7 +90,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,

		mp_opt->deny_join_id0 = !!(flags & MPTCP_CAP_DENY_JOIN_ID0);

		mp_opt->suboptions |= OPTIONS_MPTCP_MPC;
		mp_opt->suboptions |= subopt;
		if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
			mp_opt->sndr_key = get_unaligned_be64(ptr);
			ptr += 8;
@@ -934,7 +939,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
		    subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) &&
		    !subflow->request_join)
			tcp_send_ack(ssk);
		goto fully_established;
		goto check_notify;
	}

	/* we must process OoO packets before the first subflow is fully
@@ -945,17 +950,20 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
	if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) {
		if (subflow->mp_join)
			goto reset;
		if (subflow->is_mptfo && mp_opt->suboptions & OPTION_MPTCP_MPC_ACK)
			goto set_fully_established;
		return subflow->mp_capable;
	}

	if (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
	    ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo)) {
	if (subflow->remote_key_valid &&
	    (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
	     ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo))) {
		/* subflows are fully established as soon as we get any
		 * additional ack, including ADD_ADDR.
		 */
		subflow->fully_established = 1;
		WRITE_ONCE(msk->fully_established, true);
		goto fully_established;
		goto check_notify;
	}

	/* If the first established packet does not contain MP_CAPABLE + data
@@ -974,11 +982,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
	if (mp_opt->deny_join_id0)
		WRITE_ONCE(msk->pm.remote_deny_join_id0, true);

set_fully_established:
	if (unlikely(!READ_ONCE(msk->pm.server_side)))
		pr_warn_once("bogus mpc option on established client sk");
	mptcp_subflow_fully_established(subflow, mp_opt);

fully_established:
check_notify:
	/* if the subflow is not already linked into the conn_list, we can't
	 * notify the PM: this subflow is still on the listener queue
	 * and the PM possibly acquiring the subflow lock could race with
+5 −34
Original line number Diff line number Diff line
@@ -36,15 +36,6 @@ struct mptcp6_sock {
};
#endif

struct mptcp_skb_cb {
	u64 map_seq;
	u64 end_seq;
	u32 offset;
	u8  has_rxtstamp:1;
};

#define MPTCP_SKB_CB(__skb)	((struct mptcp_skb_cb *)&((__skb)->cb[0]))

enum {
	MPTCP_CMSG_TS = BIT(0),
	MPTCP_CMSG_INQ = BIT(1),
@@ -200,7 +191,7 @@ static void mptcp_rfree(struct sk_buff *skb)
	mptcp_rmem_uncharge(sk, len);
}

static void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
{
	skb_orphan(skb);
	skb->sk = sk;
@@ -1711,17 +1702,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
	int ret = 0;
	long timeo;

	/* we don't support FASTOPEN yet */
	if (msg->msg_flags & MSG_FASTOPEN)
		return -EOPNOTSUPP;

	/* silently ignore everything else */
	msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL;
	msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_FASTOPEN;

	lock_sock(sk);

	ssock = __mptcp_nmpc_socket(msk);
	if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) {
	if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect ||
			       msg->msg_flags & MSG_FASTOPEN))) {
		int copied_syn = 0;

		ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
@@ -3048,7 +3036,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
	struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
	struct mptcp_sock *msk;
	u64 ack_seq;

	if (!nsk)
		return NULL;
@@ -3074,15 +3061,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
	msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
	msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;

	if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
		msk->can_ack = true;
		msk->remote_key = mp_opt->sndr_key;
		mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
		ack_seq++;
		WRITE_ONCE(msk->ack_seq, ack_seq);
		atomic64_set(&msk->rcv_wnd_sent, ack_seq);
	}

	sock_reset_flag(nsk, SOCK_RCU_FREE);
	/* will be fully established after successful MPC subflow creation */
	inet_sk_state_store(nsk, TCP_SYN_RECV);
@@ -3355,7 +3333,6 @@ void mptcp_finish_connect(struct sock *ssk)
	struct mptcp_subflow_context *subflow;
	struct mptcp_sock *msk;
	struct sock *sk;
	u64 ack_seq;

	subflow = mptcp_subflow_ctx(ssk);
	sk = subflow->conn;
@@ -3363,22 +3340,16 @@ void mptcp_finish_connect(struct sock *ssk)

	pr_debug("msk=%p, token=%u", sk, subflow->token);

	mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
	ack_seq++;
	subflow->map_seq = ack_seq;
	subflow->map_seq = subflow->iasn;
	subflow->map_subflow_seq = 1;

	/* the socket is not connected yet, no msk/subflow ops can access/race
	 * accessing the field below
	 */
	WRITE_ONCE(msk->remote_key, subflow->remote_key);
	WRITE_ONCE(msk->local_key, subflow->local_key);
	WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
	WRITE_ONCE(msk->snd_nxt, msk->write_seq);
	WRITE_ONCE(msk->ack_seq, ack_seq);
	WRITE_ONCE(msk->can_ack, 1);
	WRITE_ONCE(msk->snd_una, msk->write_seq);
	atomic64_set(&msk->rcv_wnd_sent, ack_seq);

	mptcp_pm_new_connection(msk, ssk, 0);

+24 −4
Original line number Diff line number Diff line
@@ -126,6 +126,15 @@
#define MPTCP_CONNECTED		6
#define MPTCP_RESET_SCHEDULER	7

struct mptcp_skb_cb {
	u64 map_seq;
	u64 end_seq;
	u32 offset;
	u8  has_rxtstamp:1;
};

#define MPTCP_SKB_CB(__skb)	((struct mptcp_skb_cb *)&((__skb)->cb[0]))

static inline bool before64(__u64 seq1, __u64 seq2)
{
	return (__s64)(seq1 - seq2) < 0;
@@ -467,17 +476,22 @@ struct mptcp_subflow_context {
		send_fastclose : 1,
		send_infinite_map : 1,
		rx_eof : 1,
		can_ack : 1,        /* only after processing the remote a key */
		remote_key_valid : 1,        /* received the peer key from */
		disposable : 1,	    /* ctx can be free at ulp release time */
		stale : 1,	    /* unable to snd/rcv data, do not use for xmit */
		local_id_valid : 1, /* local_id is correctly initialized */
		valid_csum_seen : 1;        /* at least one csum validated */
		valid_csum_seen : 1,        /* at least one csum validated */
		is_mptfo : 1,	    /* subflow is doing TFO */
		__unused : 8;
	enum mptcp_data_avail data_avail;
	u32	remote_nonce;
	u64	thmac;
	u32	local_nonce;
	u32	remote_token;
	u8	hmac[MPTCPOPT_HMAC_LEN];
	union {
		u8	hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */
		u64	iasn;	    /* initial ack sequence number, MPC subflows only */
	};
	u8	local_id;
	u8	remote_id;
	u8	reset_seen:1;
@@ -603,7 +617,7 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
int mptcp_get_pm_type(const struct net *net);
void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
				     struct mptcp_options_received *mp_opt);
				     const struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
void mptcp_check_and_set_pending(struct sock *sk);
void __mptcp_push_pending(struct sock *sk, unsigned int flags);
@@ -619,6 +633,7 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);

bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
			   const struct mptcp_addr_info *b, bool use_port);
@@ -826,6 +841,11 @@ void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_
void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);

void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
				   const struct mptcp_options_received *mp_opt);
void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
					      struct request_sock *req);

static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{
	return READ_ONCE(msk->pm.addr_signal) &
Loading