Commit 24aa160d authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mptcp-fixes'



Mat Martineau says:

====================
mptcp: Miscellaneous fixes

Here are five changes we've collected and tested in the mptcp-tree:

Patch 1 changes handling of the MPTCP-level snd_next value during the
recovery phase after a subflow link failure.

Patches 2 and 3 are some small refactoring changes to replace some
open-coded bits.

Patch 4 removes an unused field in struct mptcp_sock.

Patch 5 restarts the MPTCP retransmit timer when there is
not-yet-transmitted data to send and all previously sent data has been
acknowledged. This prevents some sending stalls.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 8765de69 3241a9c0
Loading
Loading
Loading
Loading
+5 −10
Original line number Original line Diff line number Diff line
@@ -748,9 +748,7 @@ static bool mptcp_established_options_mp_prio(struct sock *sk,
	/* can't send MP_PRIO with MPC, as they share the same option space:
	/* can't send MP_PRIO with MPC, as they share the same option space:
	 * 'backup'. Also it makes no sense at all
	 * 'backup'. Also it makes no sense at all
	 */
	 */
	if (!subflow->send_mp_prio ||
	if (!subflow->send_mp_prio || (opts->suboptions & OPTIONS_MPTCP_MPC))
	    ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
	      OPTION_MPTCP_MPC_ACK) & opts->suboptions))
		return false;
		return false;


	/* account for the trailing 'nop' option */
	/* account for the trailing 'nop' option */
@@ -1019,11 +1017,9 @@ static void ack_update_msk(struct mptcp_sock *msk,
	old_snd_una = msk->snd_una;
	old_snd_una = msk->snd_una;
	new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
	new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64);


	/* ACK for data not even sent yet and even above recovery bound? Ignore.*/
	/* ACK for data not even sent yet? Ignore.*/
	if (unlikely(after64(new_snd_una, snd_nxt))) {
	if (unlikely(after64(new_snd_una, snd_nxt)))
		if (!msk->recovery || after64(new_snd_una, msk->recovery_snd_nxt))
		new_snd_una = old_snd_una;
		new_snd_una = old_snd_una;
	}


	new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
	new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;


@@ -1329,8 +1325,7 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
						   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
						   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
			}
			}
		}
		}
	} else if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
	} else if (OPTIONS_MPTCP_MPC & opts->suboptions) {
		    OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
		u8 len, flag = MPTCP_CAP_HMAC_SHA256;
		u8 len, flag = MPTCP_CAP_HMAC_SHA256;


		if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
		if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
+51 −24
Original line number Original line Diff line number Diff line
@@ -956,9 +956,7 @@ static void __mptcp_update_wmem(struct sock *sk)
{
{
	struct mptcp_sock *msk = mptcp_sk(sk);
	struct mptcp_sock *msk = mptcp_sk(sk);


#ifdef CONFIG_LOCKDEP
	lockdep_assert_held_once(&sk->sk_lock.slock);
	WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock));
#endif


	if (!msk->wmem_reserved)
	if (!msk->wmem_reserved)
		return;
		return;
@@ -1107,7 +1105,8 @@ static void __mptcp_clean_una(struct sock *sk)
	if (cleaned && tcp_under_memory_pressure(sk))
	if (cleaned && tcp_under_memory_pressure(sk))
		__mptcp_mem_reclaim_partial(sk);
		__mptcp_mem_reclaim_partial(sk);


	if (snd_una == READ_ONCE(msk->snd_nxt) && !msk->recovery) {
	if (snd_una == READ_ONCE(msk->snd_nxt) &&
	    snd_una == READ_ONCE(msk->write_seq)) {
		if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
		if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
			mptcp_stop_timer(sk);
			mptcp_stop_timer(sk);
	} else {
	} else {
@@ -1117,9 +1116,8 @@ static void __mptcp_clean_una(struct sock *sk)


static void __mptcp_clean_una_wakeup(struct sock *sk)
static void __mptcp_clean_una_wakeup(struct sock *sk)
{
{
#ifdef CONFIG_LOCKDEP
	lockdep_assert_held_once(&sk->sk_lock.slock);
	WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock));

#endif
	__mptcp_clean_una(sk);
	__mptcp_clean_una(sk);
	mptcp_write_space(sk);
	mptcp_write_space(sk);
}
}
@@ -1525,6 +1523,38 @@ static void mptcp_push_release(struct sock *sk, struct sock *ssk,
	release_sock(ssk);
	release_sock(ssk);
}
}


static void mptcp_update_post_push(struct mptcp_sock *msk,
				   struct mptcp_data_frag *dfrag,
				   u32 sent)
{
	u64 snd_nxt_new = dfrag->data_seq;

	dfrag->already_sent += sent;

	msk->snd_burst -= sent;

	snd_nxt_new += dfrag->already_sent;

	/* snd_nxt_new can be smaller than snd_nxt in case mptcp
	 * is recovering after a failover. In that event, this re-sends
	 * old segments.
	 *
	 * Thus compute snd_nxt_new candidate based on
	 * the dfrag->data_seq that was sent and the data
	 * that has been handed to the subflow for transmission
	 * and skip update in case it was old dfrag.
	 */
	if (likely(after64(snd_nxt_new, msk->snd_nxt)))
		msk->snd_nxt = snd_nxt_new;
}

static void mptcp_check_and_set_pending(struct sock *sk)
{
	if (mptcp_send_head(sk) &&
	    !test_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
		set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
}

void __mptcp_push_pending(struct sock *sk, unsigned int flags)
void __mptcp_push_pending(struct sock *sk, unsigned int flags)
{
{
	struct sock *prev_ssk = NULL, *ssk = NULL;
	struct sock *prev_ssk = NULL, *ssk = NULL;
@@ -1568,12 +1598,10 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
			}
			}


			info.sent += ret;
			info.sent += ret;
			dfrag->already_sent += ret;
			msk->snd_nxt += ret;
			msk->snd_burst -= ret;
			msk->tx_pending_data -= ret;
			copied += ret;
			copied += ret;
			len -= ret;
			len -= ret;

			mptcp_update_post_push(msk, dfrag, ret);
		}
		}
		WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
		WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
	}
	}
@@ -1626,13 +1654,11 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
				goto out;
				goto out;


			info.sent += ret;
			info.sent += ret;
			dfrag->already_sent += ret;
			msk->snd_nxt += ret;
			msk->snd_burst -= ret;
			msk->tx_pending_data -= ret;
			copied += ret;
			copied += ret;
			len -= ret;
			len -= ret;
			first = false;
			first = false;

			mptcp_update_post_push(msk, dfrag, ret);
		}
		}
		WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
		WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
	}
	}
@@ -1742,7 +1768,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
		frag_truesize += psize;
		frag_truesize += psize;
		pfrag->offset += frag_truesize;
		pfrag->offset += frag_truesize;
		WRITE_ONCE(msk->write_seq, msk->write_seq + psize);
		WRITE_ONCE(msk->write_seq, msk->write_seq + psize);
		msk->tx_pending_data += psize;


		/* charge data on mptcp pending queue to the msk socket
		/* charge data on mptcp pending queue to the msk socket
		 * Note: we charge such data both to sk and ssk
		 * Note: we charge such data both to sk and ssk
@@ -2230,15 +2255,11 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
		return false;
		return false;
	}
	}


	/* will accept ack for reijected data before re-sending them */
	if (!msk->recovery || after64(msk->snd_nxt, msk->recovery_snd_nxt))
	msk->recovery_snd_nxt = msk->snd_nxt;
	msk->recovery_snd_nxt = msk->snd_nxt;
	msk->recovery = true;
	msk->recovery = true;
	mptcp_data_unlock(sk);
	mptcp_data_unlock(sk);


	msk->first_pending = rtx_head;
	msk->first_pending = rtx_head;
	msk->tx_pending_data += msk->snd_nxt - rtx_head->data_seq;
	msk->snd_nxt = rtx_head->data_seq;
	msk->snd_burst = 0;
	msk->snd_burst = 0;


	/* be sure to clear the "sent status" on all re-injected fragments */
	/* be sure to clear the "sent status" on all re-injected fragments */
@@ -2401,6 +2422,9 @@ static void __mptcp_retrans(struct sock *sk)
	int ret;
	int ret;


	mptcp_clean_una_wakeup(sk);
	mptcp_clean_una_wakeup(sk);

	/* first check ssk: need to kick "stale" logic */
	ssk = mptcp_subflow_get_retrans(msk);
	dfrag = mptcp_rtx_head(sk);
	dfrag = mptcp_rtx_head(sk);
	if (!dfrag) {
	if (!dfrag) {
		if (mptcp_data_fin_enabled(msk)) {
		if (mptcp_data_fin_enabled(msk)) {
@@ -2413,10 +2437,12 @@ static void __mptcp_retrans(struct sock *sk)
			goto reset_timer;
			goto reset_timer;
		}
		}


		if (!mptcp_send_head(sk))
			return;
			return;

		goto reset_timer;
	}
	}


	ssk = mptcp_subflow_get_retrans(msk);
	if (!ssk)
	if (!ssk)
		goto reset_timer;
		goto reset_timer;


@@ -2443,6 +2469,8 @@ static void __mptcp_retrans(struct sock *sk)
	release_sock(ssk);
	release_sock(ssk);


reset_timer:
reset_timer:
	mptcp_check_and_set_pending(sk);

	if (!mptcp_timer_pending(sk))
	if (!mptcp_timer_pending(sk))
		mptcp_reset_timer(sk);
		mptcp_reset_timer(sk);
}
}
@@ -2509,7 +2537,6 @@ static int __mptcp_init_sock(struct sock *sk)
	msk->first_pending = NULL;
	msk->first_pending = NULL;
	msk->wmem_reserved = 0;
	msk->wmem_reserved = 0;
	WRITE_ONCE(msk->rmem_released, 0);
	WRITE_ONCE(msk->rmem_released, 0);
	msk->tx_pending_data = 0;
	msk->timer_ival = TCP_RTO_MIN;
	msk->timer_ival = TCP_RTO_MIN;


	msk->first = NULL;
	msk->first = NULL;
+0 −1
Original line number Original line Diff line number Diff line
@@ -254,7 +254,6 @@ struct mptcp_sock {
	struct sk_buff  *ooo_last_skb;
	struct sk_buff  *ooo_last_skb;
	struct rb_root  out_of_order_queue;
	struct rb_root  out_of_order_queue;
	struct sk_buff_head receive_queue;
	struct sk_buff_head receive_queue;
	int		tx_pending_data;
	struct list_head conn_list;
	struct list_head conn_list;
	struct list_head rtx_queue;
	struct list_head rtx_queue;
	struct mptcp_data_frag *first_pending;
	struct mptcp_data_frag *first_pending;