Commit 124de271 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mptcp-MP_FAIL-timeout'



Mat Martineau says:

====================
mptcp: Timeout for MP_FAIL response

When one peer sends an infinite mapping to coordinate fallback from
MPTCP to regular TCP, the other peer is expected to send a packet with
the MPTCP MP_FAIL option to acknowledge the infinite mapping. Rather
than leave the connection in some half-fallback state, this series adds
a timeout after which the infinite mapping sender will reset the
connection.

Patch 1 adds a fallback self test.

Patches 2-5 make use of the MPTCP socket's retransmit timer to reset the
MPTCP connection if no MP_FAIL was received.

Patches 6 and 7 extends the self test to check MP_FAIL-related MIBs.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b1190d51 53f368bf
Loading
Loading
Loading
Loading
+17 −1
Original line number Diff line number Diff line
@@ -287,11 +287,27 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
	struct sock *s = (struct sock *)msk;

	pr_debug("fail_seq=%llu", fail_seq);

	if (!mptcp_has_another_subflow(sk) && READ_ONCE(msk->allow_infinite_fallback))
	if (mptcp_has_another_subflow(sk) || !READ_ONCE(msk->allow_infinite_fallback))
		return;

	if (!READ_ONCE(subflow->mp_fail_response_expect)) {
		pr_debug("send MP_FAIL response and infinite map");

		subflow->send_mp_fail = 1;
		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX);
		subflow->send_infinite_map = 1;
	} else if (s && inet_sk_state_load(s) != TCP_CLOSE) {
		pr_debug("MP_FAIL response received");

		mptcp_data_lock(s);
		if (inet_sk_state_load(s) != TCP_CLOSE)
			sk_stop_timer(s, &s->sk_timer);
		mptcp_data_unlock(s);
	}
}

/* path manager helpers */
+62 −2
Original line number Diff line number Diff line
@@ -1605,8 +1605,10 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)

out:
	/* ensure the rtx timer is running */
	mptcp_data_lock(sk);
	if (!mptcp_timer_pending(sk))
		mptcp_reset_timer(sk);
	mptcp_data_unlock(sk);
	if (copied)
		__mptcp_check_send_data_fin(sk);
}
@@ -2167,10 +2169,38 @@ static void mptcp_retransmit_timer(struct timer_list *t)
	sock_put(sk);
}

static struct mptcp_subflow_context *
mp_fail_response_expect_subflow(struct mptcp_sock *msk)
{
	struct mptcp_subflow_context *subflow, *ret = NULL;

	mptcp_for_each_subflow(msk, subflow) {
		if (READ_ONCE(subflow->mp_fail_response_expect)) {
			ret = subflow;
			break;
		}
	}

	return ret;
}

static void mptcp_check_mp_fail_response(struct mptcp_sock *msk)
{
	struct mptcp_subflow_context *subflow;
	struct sock *sk = (struct sock *)msk;

	bh_lock_sock(sk);
	subflow = mp_fail_response_expect_subflow(msk);
	if (subflow)
		__set_bit(MPTCP_FAIL_NO_RESPONSE, &msk->flags);
	bh_unlock_sock(sk);
}

static void mptcp_timeout_timer(struct timer_list *t)
{
	struct sock *sk = from_timer(sk, t, sk_timer);

	mptcp_check_mp_fail_response(mptcp_sk(sk));
	mptcp_schedule_work(sk);
	sock_put(sk);
}
@@ -2491,8 +2521,27 @@ static void __mptcp_retrans(struct sock *sk)
reset_timer:
	mptcp_check_and_set_pending(sk);

	mptcp_data_lock(sk);
	if (!mptcp_timer_pending(sk))
		mptcp_reset_timer(sk);
	mptcp_data_unlock(sk);
}

static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
{
	struct mptcp_subflow_context *subflow;
	struct sock *ssk;
	bool slow;

	subflow = mp_fail_response_expect_subflow(msk);
	if (subflow) {
		pr_debug("MP_FAIL doesn't respond, reset the subflow");

		ssk = mptcp_subflow_tcp_sock(subflow);
		slow = lock_sock_fast(ssk);
		mptcp_subflow_reset(ssk);
		unlock_sock_fast(ssk, slow);
	}
}

static void mptcp_worker(struct work_struct *work)
@@ -2535,6 +2584,9 @@ static void mptcp_worker(struct work_struct *work)
	if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
		__mptcp_retrans(sk);

	if (test_and_clear_bit(MPTCP_FAIL_NO_RESPONSE, &msk->flags))
		mptcp_mp_fail_no_response(msk);

unlock:
	release_sock(sk);
	sock_put(sk);
@@ -2651,8 +2703,10 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
		} else {
			pr_debug("Sending DATA_FIN on subflow %p", ssk);
			tcp_send_ack(ssk);
			mptcp_data_lock(sk);
			if (!mptcp_timer_pending(sk))
				mptcp_reset_timer(sk);
			mptcp_data_unlock(sk);
		}
		break;
	}
@@ -2753,8 +2807,10 @@ static void __mptcp_destroy_sock(struct sock *sk)
	/* join list will be eventually flushed (with rst) at sock lock release time*/
	list_splice_init(&msk->conn_list, &conn_list);

	sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
	mptcp_data_lock(sk);
	mptcp_stop_timer(sk);
	sk_stop_timer(sk, &sk->sk_timer);
	mptcp_data_unlock(sk);
	msk->pm.status = 0;

	/* clears msk->subflow, allowing the following loop to close
@@ -2816,7 +2872,9 @@ static void mptcp_close(struct sock *sk, long timeout)
		__mptcp_destroy_sock(sk);
		do_cancel_work = true;
	} else {
		mptcp_data_lock(sk);
		sk_reset_timer(sk, &sk->sk_timer, jiffies + TCP_TIMEWAIT_LEN);
		mptcp_data_unlock(sk);
	}
	release_sock(sk);
	if (do_cancel_work)
@@ -2861,8 +2919,10 @@ static int mptcp_disconnect(struct sock *sk, int flags)
		__mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE);
	}

	sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
	mptcp_data_lock(sk);
	mptcp_stop_timer(sk);
	sk_stop_timer(sk, &sk->sk_timer);
	mptcp_data_unlock(sk);

	if (mptcp_sk(sk)->token)
		mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
+2 −0
Original line number Diff line number Diff line
@@ -116,6 +116,7 @@
#define MPTCP_WORK_EOF		3
#define MPTCP_FALLBACK_DONE	4
#define MPTCP_WORK_CLOSE_SUBFLOW 5
#define MPTCP_FAIL_NO_RESPONSE	6

/* MPTCP socket release cb flags */
#define MPTCP_PUSH_PENDING	1
@@ -448,6 +449,7 @@ struct mptcp_subflow_context {
		stale : 1,	    /* unable to snd/rcv data, do not use for xmit */
		local_id_valid : 1; /* local_id is correctly initialized */
	enum mptcp_data_avail data_avail;
	bool	mp_fail_response_expect;
	u32	remote_nonce;
	u64	thmac;
	u32	local_nonce;
+13 −0
Original line number Diff line number Diff line
@@ -968,6 +968,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
	bool csum_reqd = READ_ONCE(msk->csum_enabled);
	struct sock *sk = (struct sock *)msk;
	struct mptcp_ext *mpext;
	struct sk_buff *skb;
	u16 data_len;
@@ -1009,6 +1010,12 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
		pr_debug("infinite mapping received");
		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
		subflow->map_data_len = 0;
		if (sk && inet_sk_state_load(sk) != TCP_CLOSE) {
			mptcp_data_lock(sk);
			if (inet_sk_state_load(sk) != TCP_CLOSE)
				sk_stop_timer(sk, &sk->sk_timer);
			mptcp_data_unlock(sk);
		}
		return MAPPING_INVALID;
	}

@@ -1217,6 +1224,12 @@ static bool subflow_check_data_avail(struct sock *ssk)
				tcp_send_active_reset(ssk, GFP_ATOMIC);
				while ((skb = skb_peek(&ssk->sk_receive_queue)))
					sk_eat_skb(ssk, skb);
			} else {
				WRITE_ONCE(subflow->mp_fail_response_expect, true);
				/* The data lock is acquired in __mptcp_move_skbs() */
				sk_reset_timer((struct sock *)msk,
					       &((struct sock *)msk)->sk_timer,
					       jiffies + TCP_RTO_MAX);
			}
			WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
			return true;
+8 −0
Original line number Diff line number Diff line
@@ -12,6 +12,9 @@ CONFIG_NF_TABLES=m
CONFIG_NFT_COMPAT=m
CONFIG_NETFILTER_XTABLES=m
CONFIG_NETFILTER_XT_MATCH_BPF=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
CONFIG_NETFILTER_XT_TARGET_MARK=m
CONFIG_NF_TABLES_INET=y
CONFIG_NFT_TPROXY=m
CONFIG_NFT_SOCKET=m
@@ -19,3 +22,8 @@ CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_CLS_FW=m
CONFIG_NET_SCH_INGRESS=m
Loading