Commit 1a77de09 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mptcp-optimizations'



Mat Martineau says:

====================
mptcp: A few optimizations

Here is a set of patches that we've accumulated and tested in the MPTCP
tree.

Patch 1 removes the MPTCP-level tx skb cache that added complexity but
did not provide a meaningful benefit.

Patch 2 uses the fast socket lock in more places.

Patch 3 improves handling of a data-ready flag.

Patch 4 deletes an unnecessary and racy connection state check.

Patch 5 adds a MIB counter for one type of invalid MPTCP header.

Patch 6 improves self test failure output.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 070258ef a4debc47
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
	SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
	SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
	SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
	SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH),
	SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR),
	SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL),
	SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE),
+1 −0
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@ enum linux_mptcp_mib_field {
	MPTCP_MIB_JOINACKMAC,		/* HMAC was wrong on ACK + MP_JOIN */
	MPTCP_MIB_DSSNOMATCH,		/* Received a new mapping that did not match the previous one */
	MPTCP_MIB_INFINITEMAPRX,	/* Received an infinite mapping */
	MPTCP_MIB_DSSTCPMISMATCH,	/* DSS-mapping did not map with TCP's sequence numbers */
	MPTCP_MIB_DATACSUMERR,		/* The data checksum fail */
	MPTCP_MIB_OFOQUEUETAIL,	/* Segments inserted into OoO queue tail */
	MPTCP_MIB_OFOQUEUE,		/* Segments inserted into OoO queue */
+6 −4
Original line number Diff line number Diff line
@@ -540,6 +540,7 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
	subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
	if (subflow) {
		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
		bool slow;

		spin_unlock_bh(&msk->pm.lock);
		pr_debug("send ack for %s%s%s",
@@ -547,9 +548,9 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
			 mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "",
			 mptcp_pm_should_add_signal_port(msk) ? " [port]" : "");

		lock_sock(ssk);
		slow = lock_sock_fast(ssk);
		tcp_send_ack(ssk);
		release_sock(ssk);
		unlock_sock_fast(ssk, slow);
		spin_lock_bh(&msk->pm.lock);
	}
}
@@ -566,6 +567,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
		struct sock *sk = (struct sock *)msk;
		struct mptcp_addr_info local;
		bool slow;

		local_address((struct sock_common *)ssk, &local);
		if (!addresses_equal(&local, addr, addr->port))
@@ -578,9 +580,9 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,

		spin_unlock_bh(&msk->pm.lock);
		pr_debug("send ack for mp_prio");
		lock_sock(ssk);
		slow = lock_sock_fast(ssk);
		tcp_send_ack(ssk);
		release_sock(ssk);
		unlock_sock_fast(ssk, slow);
		spin_lock_bh(&msk->pm.lock);

		return 0;
+15 −98
Original line number Diff line number Diff line
@@ -433,23 +433,25 @@ static void mptcp_send_ack(struct mptcp_sock *msk)

	mptcp_for_each_subflow(msk, subflow) {
		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
		bool slow;

		lock_sock(ssk);
		slow = lock_sock_fast(ssk);
		if (tcp_can_send_ack(ssk))
			tcp_send_ack(ssk);
		release_sock(ssk);
		unlock_sock_fast(ssk, slow);
	}
}

static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
{
	bool slow;
	int ret;

	lock_sock(ssk);
	slow = lock_sock_fast(ssk);
	ret = tcp_can_send_ack(ssk);
	if (ret)
		tcp_cleanup_rbuf(ssk, 1);
	release_sock(ssk);
	unlock_sock_fast(ssk, slow);
	return ret;
}

@@ -684,9 +686,6 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
	struct sock *sk = (struct sock *)msk;
	unsigned int moved = 0;

	if (inet_sk_state_load(sk) == TCP_CLOSE)
		return false;

	__mptcp_move_skbs_from_subflow(msk, ssk, &moved);
	__mptcp_ofo_queue(msk);
	if (unlikely(ssk->sk_err)) {
@@ -902,22 +901,14 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
		df->data_seq + df->data_len == msk->write_seq;
}

static int mptcp_wmem_with_overhead(struct sock *sk, int size)
static int mptcp_wmem_with_overhead(int size)
{
	struct mptcp_sock *msk = mptcp_sk(sk);
	int ret, skbs;

	ret = size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
	skbs = (msk->tx_pending_data + size) / msk->size_goal_cache;
	if (skbs < msk->skb_tx_cache.qlen)
		return ret;

	return ret + (skbs - msk->skb_tx_cache.qlen) * SKB_TRUESIZE(MAX_TCP_HEADER);
	return size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
}

static void __mptcp_wmem_reserve(struct sock *sk, int size)
{
	int amount = mptcp_wmem_with_overhead(sk, size);
	int amount = mptcp_wmem_with_overhead(size);
	struct mptcp_sock *msk = mptcp_sk(sk);

	WARN_ON_ONCE(msk->wmem_reserved);
@@ -1212,49 +1203,8 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
	return NULL;
}

static bool mptcp_tx_cache_refill(struct sock *sk, int size,
				  struct sk_buff_head *skbs, int *total_ts)
{
	struct mptcp_sock *msk = mptcp_sk(sk);
	struct sk_buff *skb;
	int space_needed;

	if (unlikely(tcp_under_memory_pressure(sk))) {
		mptcp_mem_reclaim_partial(sk);

		/* under pressure pre-allocate at most a single skb */
		if (msk->skb_tx_cache.qlen)
			return true;
		space_needed = msk->size_goal_cache;
	} else {
		space_needed = msk->tx_pending_data + size -
			       msk->skb_tx_cache.qlen * msk->size_goal_cache;
	}

	while (space_needed > 0) {
		skb = __mptcp_do_alloc_tx_skb(sk, sk->sk_allocation);
		if (unlikely(!skb)) {
			/* under memory pressure, try to pass the caller a
			 * single skb to allow forward progress
			 */
			while (skbs->qlen > 1) {
				skb = __skb_dequeue_tail(skbs);
				*total_ts -= skb->truesize;
				__kfree_skb(skb);
			}
			return skbs->qlen > 0;
		}

		*total_ts += skb->truesize;
		__skb_queue_tail(skbs, skb);
		space_needed -= msk->size_goal_cache;
	}
	return true;
}

static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
{
	struct mptcp_sock *msk = mptcp_sk(sk);
	struct sk_buff *skb;

	if (ssk->sk_tx_skb_cache) {
@@ -1265,22 +1215,6 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
		return true;
	}

	skb = skb_peek(&msk->skb_tx_cache);
	if (skb) {
		if (likely(sk_wmem_schedule(ssk, skb->truesize))) {
			skb = __skb_dequeue(&msk->skb_tx_cache);
			if (WARN_ON_ONCE(!skb))
				return false;

			mptcp_wmem_uncharge(sk, skb->truesize);
			ssk->sk_tx_skb_cache = skb;
			return true;
		}

		/* over memory limit, no point to try to allocate a new skb */
		return false;
	}

	skb = __mptcp_do_alloc_tx_skb(sk, gfp);
	if (!skb)
		return false;
@@ -1296,7 +1230,6 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
static bool mptcp_must_reclaim_memory(struct sock *sk, struct sock *ssk)
{
	return !ssk->sk_tx_skb_cache &&
	       !skb_peek(&mptcp_sk(sk)->skb_tx_cache) &&
	       tcp_under_memory_pressure(sk);
}

@@ -1339,7 +1272,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
	/* compute send limit */
	info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
	avail_size = info->size_goal;
	msk->size_goal_cache = info->size_goal;
	skb = tcp_write_queue_tail(ssk);
	if (skb) {
		/* Limit the write to the size available in the
@@ -1688,7 +1620,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
	while (msg_data_left(msg)) {
		int total_ts, frag_truesize = 0;
		struct mptcp_data_frag *dfrag;
		struct sk_buff_head skbs;
		bool dfrag_collapsed;
		size_t psize, offset;

@@ -1721,16 +1652,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
		psize = pfrag->size - offset;
		psize = min_t(size_t, psize, msg_data_left(msg));
		total_ts = psize + frag_truesize;
		__skb_queue_head_init(&skbs);
		if (!mptcp_tx_cache_refill(sk, psize, &skbs, &total_ts))
			goto wait_for_memory;

		if (!mptcp_wmem_alloc(sk, total_ts)) {
			__skb_queue_purge(&skbs);
		if (!mptcp_wmem_alloc(sk, total_ts))
			goto wait_for_memory;
		}

		skb_queue_splice_tail(&skbs, &msk->skb_tx_cache);
		if (copy_page_from_iter(dfrag->page, offset, psize,
					&msg->msg_iter) != psize) {
			mptcp_wmem_uncharge(sk, psize + frag_truesize);
@@ -1787,7 +1712,7 @@ static void mptcp_wait_data(struct sock *sk, long *timeo)
	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);

	sk_wait_event(sk, timeo,
		      test_and_clear_bit(MPTCP_DATA_READY, &msk->flags), &wait);
		      test_bit(MPTCP_DATA_READY, &msk->flags), &wait);

	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
	remove_wait_queue(sk_sleep(sk), &wait);
@@ -2111,10 +2036,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
		 */
		if (unlikely(__mptcp_move_skbs(msk)))
			set_bit(MPTCP_DATA_READY, &msk->flags);
	} else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) {
		/* data to read but mptcp_wait_data() cleared DATA_READY */
		set_bit(MPTCP_DATA_READY, &msk->flags);
	}

out_err:
	if (cmsg_flags && copied >= 0) {
		if (cmsg_flags & MPTCP_CMSG_TS)
@@ -2326,13 +2249,14 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)

	list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
		struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
		bool slow;

		lock_sock(tcp_sk);
		slow = lock_sock_fast(tcp_sk);
		if (tcp_sk->sk_state != TCP_CLOSE) {
			tcp_send_active_reset(tcp_sk, GFP_ATOMIC);
			tcp_set_state(tcp_sk, TCP_CLOSE);
		}
		release_sock(tcp_sk);
		unlock_sock_fast(tcp_sk, slow);
	}

	inet_sk_state_store(sk, TCP_CLOSE);
@@ -2462,13 +2386,11 @@ static int __mptcp_init_sock(struct sock *sk)
	INIT_LIST_HEAD(&msk->rtx_queue);
	INIT_WORK(&msk->work, mptcp_worker);
	__skb_queue_head_init(&msk->receive_queue);
	__skb_queue_head_init(&msk->skb_tx_cache);
	msk->out_of_order_queue = RB_ROOT;
	msk->first_pending = NULL;
	msk->wmem_reserved = 0;
	msk->rmem_released = 0;
	msk->tx_pending_data = 0;
	msk->size_goal_cache = TCP_BASE_MSS;

	msk->ack_hint = NULL;
	msk->first = NULL;
@@ -2525,15 +2447,10 @@ static void __mptcp_clear_xmit(struct sock *sk)
{
	struct mptcp_sock *msk = mptcp_sk(sk);
	struct mptcp_data_frag *dtmp, *dfrag;
	struct sk_buff *skb;

	WRITE_ONCE(msk->first_pending, NULL);
	list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list)
		dfrag_clear(sk, dfrag);
	while ((skb = __skb_dequeue(&msk->skb_tx_cache)) != NULL) {
		sk->sk_forward_alloc += skb->truesize;
		kfree_skb(skb);
	}
}

static void mptcp_cancel_work(struct sock *sk)
+0 −2
Original line number Diff line number Diff line
@@ -245,9 +245,7 @@ struct mptcp_sock {
	struct sk_buff  *ooo_last_skb;
	struct rb_root  out_of_order_queue;
	struct sk_buff_head receive_queue;
	struct sk_buff_head skb_tx_cache;	/* this is wmem accounted */
	int		tx_pending_data;
	int		size_goal_cache;
	struct list_head conn_list;
	struct list_head rtx_queue;
	struct mptcp_data_frag *first_pending;
Loading