Commit 6850ec97 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mptcp-fixes-for-5-13'

Mat Martineau says:

====================
mptcp: Fixes for 5.13

These patches address two issues in MPTCP.

Patch 1 fixes a locking issue affecting MPTCP-level retransmissions.

Patches 2-4 improve handling of out-of-order packet arrival early
in a connection, so it falls back to TCP rather than forcing a
reset. Includes a selftest.
====================

Link: https://lore.kernel.org/r/20210527233140.182728-1-mathew.j.martineau@linux.intel.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 44991d61 69ca3d29
Loading
Loading
Loading
Loading
+15 −1
Original line number Diff line number Diff line
@@ -947,6 +947,10 @@ static void __mptcp_update_wmem(struct sock *sk)
{
	struct mptcp_sock *msk = mptcp_sk(sk);

#ifdef CONFIG_LOCKDEP
	WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock));
#endif

	if (!msk->wmem_reserved)
		return;

@@ -1085,10 +1089,20 @@ static void __mptcp_clean_una(struct sock *sk)

static void __mptcp_clean_una_wakeup(struct sock *sk)
{
#ifdef CONFIG_LOCKDEP
	WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock));
#endif
	__mptcp_clean_una(sk);
	mptcp_write_space(sk);
}

static void mptcp_clean_una_wakeup(struct sock *sk)
{
	mptcp_data_lock(sk);
	__mptcp_clean_una_wakeup(sk);
	mptcp_data_unlock(sk);
}

static void mptcp_enter_memory_pressure(struct sock *sk)
{
	struct mptcp_subflow_context *subflow;
@@ -2299,7 +2313,7 @@ static void __mptcp_retrans(struct sock *sk)
	struct sock *ssk;
	int ret;

	__mptcp_clean_una_wakeup(sk);
	mptcp_clean_una_wakeup(sk);
	dfrag = mptcp_rtx_head(sk);
	if (!dfrag) {
		if (mptcp_data_fin_enabled(msk)) {
+40 −39
Original line number Diff line number Diff line
@@ -630,21 +630,20 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,

	/* if the sk is MP_CAPABLE, we try to fetch the client key */
	if (subflow_req->mp_capable) {
		if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
			/* here we can receive and accept an in-window,
			 * out-of-order pkt, which will not carry the MP_CAPABLE
			 * opt even on mptcp enabled paths
		/* we can receive and accept an in-window, out-of-order pkt,
		 * which may not carry the MP_CAPABLE opt even on mptcp enabled
		 * paths: always try to extract the peer key, and fallback
		 * for packets missing it.
		 * Even OoO DSS packets coming legitly after dropped or
		 * reordered MPC will cause fallback, but we don't have other
		 * options.
		 */
			goto create_msk;
		}

		mptcp_get_options(skb, &mp_opt);
		if (!mp_opt.mp_capable) {
			fallback = true;
			goto create_child;
		}

create_msk:
		new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
		if (!new_msk)
			fallback = true;
@@ -1012,21 +1011,11 @@ static bool subflow_check_data_avail(struct sock *ssk)

		status = get_mapping_status(ssk, msk);
		trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
		if (status == MAPPING_INVALID) {
			ssk->sk_err = EBADMSG;
			goto fatal;
		}
		if (status == MAPPING_DUMMY) {
			__mptcp_do_fallback(msk);
			skb = skb_peek(&ssk->sk_receive_queue);
			subflow->map_valid = 1;
			subflow->map_seq = READ_ONCE(msk->ack_seq);
			subflow->map_data_len = skb->len;
			subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
						   subflow->ssn_offset;
			subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
			return true;
		}
		if (unlikely(status == MAPPING_INVALID))
			goto fallback;

		if (unlikely(status == MAPPING_DUMMY))
			goto fallback;

		if (status != MAPPING_OK)
			goto no_data;
@@ -1039,10 +1028,8 @@ static bool subflow_check_data_avail(struct sock *ssk)
		 * MP_CAPABLE-based mapping
		 */
		if (unlikely(!READ_ONCE(msk->can_ack))) {
			if (!subflow->mpc_map) {
				ssk->sk_err = EBADMSG;
				goto fatal;
			}
			if (!subflow->mpc_map)
				goto fallback;
			WRITE_ONCE(msk->remote_key, subflow->remote_key);
			WRITE_ONCE(msk->ack_seq, subflow->map_seq);
			WRITE_ONCE(msk->can_ack, true);
@@ -1070,10 +1057,14 @@ static bool subflow_check_data_avail(struct sock *ssk)
no_data:
	subflow_sched_work_if_closed(msk, ssk);
	return false;
fatal:
	/* fatal protocol error, close the socket */
	/* This barrier is coupled with smp_rmb() in tcp_poll() */
	smp_wmb();

fallback:
	/* RFC 8684 section 3.7. */
	if (subflow->mp_join || subflow->fully_established) {
		/* fatal protocol error, close the socket.
		 * subflow_error_report() will introduce the appropriate barriers
		 */
		ssk->sk_err = EBADMSG;
		ssk->sk_error_report(ssk);
		tcp_set_state(ssk, TCP_CLOSE);
		subflow->reset_transient = 0;
@@ -1083,6 +1074,16 @@ static bool subflow_check_data_avail(struct sock *ssk)
		return false;
	}

	__mptcp_do_fallback(msk);
	skb = skb_peek(&ssk->sk_receive_queue);
	subflow->map_valid = 1;
	subflow->map_seq = READ_ONCE(msk->ack_seq);
	subflow->map_data_len = skb->len;
	subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
	subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
	return true;
}

bool mptcp_subflow_data_available(struct sock *sk)
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+9 −4
Original line number Diff line number Diff line
@@ -501,6 +501,7 @@ do_transfer()
	local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
	local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
	local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
	local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue")

	expect_synrx=$((stat_synrx_last_l))
	expect_ackrx=$((stat_ackrx_last_l))
@@ -518,10 +519,14 @@ do_transfer()
			"${stat_synrx_now_l}" "${expect_synrx}" 1>&2
		retc=1
	fi
	if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then
	if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then
		if [ ${stat_ooo_now} -eq 0 ]; then
			printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \
				"${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
			rets=1
		else
			printf "[ Note ] fallback due to TCP OoO"
		fi
	fi

	if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then