Commit efaa9990 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mptcp-rtx-timer'



Paolo Abeni says:

====================
mptcp: fix 3rd ack rtx timer

Eric noted that the MPTCP code do the wrong thing to schedule
the MPJ 3rd ack timer. He also provided a patch to address the
issues (patch 1/2).

To fix for good the MPJ 3rd ack retransmission timer, we additionally
need to set it after the current ack is transmitted (patch 2/2)

Note that the bug went unnotice so far because all the related
tests required some running data transfer, and that causes
MPTCP-level ack even on the opening MPJ subflow. We now have
explicit packet drill coverage for this code path.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 262ae1f9 bcd97734
Loading
Loading
Loading
Loading
+9 −23
Original line number Diff line number Diff line
@@ -422,28 +422,6 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
	return false;
}

/* MP_JOIN client subflow must wait for 4th ack before sending any data:
 * TCP can't schedule delack timer before the subflow is fully established.
 * MPTCP uses the delack timer to do 3rd ack retransmissions
 */
static void schedule_3rdack_retransmission(struct sock *sk)
{
	struct inet_connection_sock *icsk = inet_csk(sk);
	struct tcp_sock *tp = tcp_sk(sk);
	unsigned long timeout;

	/* reschedule with a timeout above RTT, as we must look only for drop */
	if (tp->srtt_us)
		timeout = tp->srtt_us << 1;
	else
		timeout = TCP_TIMEOUT_INIT;

	WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
	icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
	icsk->icsk_ack.timeout = timeout;
	sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
}

static void clear_3rdack_retransmission(struct sock *sk)
{
	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -526,7 +504,15 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
		*size = TCPOLEN_MPTCP_MPJ_ACK;
		pr_debug("subflow=%p", subflow);

		schedule_3rdack_retransmission(sk);
		/* we can use the full delegate action helper only from BH context
		 * If we are in process context - sk is flushing the backlog at
		 * socket lock release time - just set the appropriate flag, will
		 * be handled by the release callback
		 */
		if (sock_owned_by_user(sk))
			set_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status);
		else
			mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_ACK);
		return true;
	}
	return false;
+42 −9
Original line number Diff line number Diff line
@@ -1596,7 +1596,8 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
			if (!xmit_ssk)
				goto out;
			if (xmit_ssk != ssk) {
				mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
				mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
						       MPTCP_DELEGATE_SEND);
				goto out;
			}

@@ -2943,7 +2944,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
		if (xmit_ssk == ssk)
			__mptcp_subflow_push_pending(sk, ssk);
		else if (xmit_ssk)
			mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
			mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), MPTCP_DELEGATE_SEND);
	} else {
		set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
	}
@@ -2993,18 +2994,50 @@ static void mptcp_release_cb(struct sock *sk)
	__mptcp_update_rmem(sk);
}

/* MP_JOIN client subflow must wait for 4th ack before sending any data:
 * TCP can't schedule delack timer before the subflow is fully established.
 * MPTCP uses the delack timer to do 3rd ack retransmissions
 */
static void schedule_3rdack_retransmission(struct sock *ssk)
{
	struct inet_connection_sock *icsk = inet_csk(ssk);
	struct tcp_sock *tp = tcp_sk(ssk);
	unsigned long timeout;

	if (mptcp_subflow_ctx(ssk)->fully_established)
		return;

	/* reschedule with a timeout above RTT, as we must look only for drop */
	if (tp->srtt_us)
		timeout = usecs_to_jiffies(tp->srtt_us >> (3 - 1));
	else
		timeout = TCP_TIMEOUT_INIT;
	timeout += jiffies;

	WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
	icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
	icsk->icsk_ack.timeout = timeout;
	sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout);
}

void mptcp_subflow_process_delegated(struct sock *ssk)
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
	struct sock *sk = subflow->conn;

	if (test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
		mptcp_data_lock(sk);
		if (!sock_owned_by_user(sk))
			__mptcp_subflow_push_pending(sk, ssk);
		else
			set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
		mptcp_data_unlock(sk);
	mptcp_subflow_delegated_done(subflow);
		mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND);
	}
	if (test_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status)) {
		schedule_3rdack_retransmission(ssk);
		mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_ACK);
	}
}

static int mptcp_hash(struct sock *sk)
+9 −8
Original line number Diff line number Diff line
@@ -387,6 +387,7 @@ struct mptcp_delegated_action {
DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);

#define MPTCP_DELEGATE_SEND		0
#define MPTCP_DELEGATE_ACK		1

/* MPTCP subflow context */
struct mptcp_subflow_context {
@@ -492,23 +493,23 @@ static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,

void mptcp_subflow_process_delegated(struct sock *ssk);

static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow)
static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
{
	struct mptcp_delegated_action *delegated;
	bool schedule;

	/* the caller held the subflow bh socket lock */
	lockdep_assert_in_softirq();

	/* The implied barrier pairs with mptcp_subflow_delegated_done(), and
	 * ensures the below list check sees list updates done prior to status
	 * bit changes
	 */
	if (!test_and_set_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
	if (!test_and_set_bit(action, &subflow->delegated_status)) {
		/* still on delegated list from previous scheduling */
		if (!list_empty(&subflow->delegated_node))
			return;

		/* the caller held the subflow bh socket lock */
		lockdep_assert_in_softirq();

		delegated = this_cpu_ptr(&mptcp_delegated_actions);
		schedule = list_empty(&delegated->head);
		list_add_tail(&subflow->delegated_node, &delegated->head);
@@ -533,16 +534,16 @@ mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)

static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow)
{
	return test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
	return !!READ_ONCE(subflow->delegated_status);
}

static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow)
static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow, int action)
{
	/* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before
	 * touching the status bit
	 */
	smp_wmb();
	clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
	clear_bit(action, &subflow->delegated_status);
}

int mptcp_is_enabled(const struct net *net);