Commit 38e3bfa8 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mptcp-improve-backup-subflows'



Mat Martineau says:

====================
mptcp: Improve use of backup subflows

Multipath TCP combines multiple TCP subflows in to one stream, and the
MPTCP-level socket must decide which subflow to use when sending (or
resending) chunks of data. The choice of the "best" subflow to transmit
on can vary depending on the priority (normal or backup) for each
subflow and how well the subflow is performing.

In order to improve MPTCP performance when some subflows are failing,
this patch set changes how backup subflows are utilized and introduces
tracking of "stale" subflows that are still connected but not making
progress.

Patch 1 adjusts MPTCP-level retransmit timeouts to use data from all
subflows.

Patch 2 makes MPTCP-level retransmissions less aggressive to avoid
resending data that's still queued at the TCP level.

Patch 3 changes the way pending data is handled when subflows are
closed. Unacked MPTCP-level data still in the subflow tx queue is
immediately moved to another subflow for transmission instead of waiting
for MPTCP-level timeouts to trigger retransmission.

Patch 4 has some sysctl code cleanup.

Patches 5 and 6 add tracking of "stale" subflows, so only underlying TCP
subflow connections that appear to be making progress are considered
when selecting a subflow to (re)transmit data. How fast a subflow goes
stale is configurable with a per-namespace sysctl. Related MIBS are
added too.

Patch 7 makes sure the backup flag is always correctly recorded when the
MP_JOIN SYN/ACK is received for an added subflow.

Patch 8 adds more test cases for backup subflows and stale subflows.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e5f31552 7d1e6f16
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -45,3 +45,15 @@ allow_join_initial_addr_port - BOOLEAN
	This is a per-namespace sysctl.

	Default: 1

stale_loss_cnt - INTEGER
	The number of MPTCP-level retransmission intervals with no traffic and
	pending outstanding data on a given subflow required to declare it stale.
	The packet scheduler ignores stale subflows.
	A low stale_loss_cnt  value allows for fast active-backup switch-over,
	an high value maximize links utilization on edge scenarios e.g. lossy
	link with high BER or peer pausing the data processing.

	This is a per-namespace sysctl.

	Default: 4
+20 −6
Original line number Diff line number Diff line
@@ -21,43 +21,50 @@ struct mptcp_pernet {
	struct ctl_table_header *ctl_table_hdr;
#endif

	u8 mptcp_enabled;
	unsigned int add_addr_timeout;
	unsigned int stale_loss_cnt;
	u8 mptcp_enabled;
	u8 checksum_enabled;
	u8 allow_join_initial_addr_port;
};

static struct mptcp_pernet *mptcp_get_pernet(struct net *net)
static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
{
	return net_generic(net, mptcp_pernet_id);
}

int mptcp_is_enabled(struct net *net)
int mptcp_is_enabled(const struct net *net)
{
	return mptcp_get_pernet(net)->mptcp_enabled;
}

unsigned int mptcp_get_add_addr_timeout(struct net *net)
unsigned int mptcp_get_add_addr_timeout(const struct net *net)
{
	return mptcp_get_pernet(net)->add_addr_timeout;
}

int mptcp_is_checksum_enabled(struct net *net)
int mptcp_is_checksum_enabled(const struct net *net)
{
	return mptcp_get_pernet(net)->checksum_enabled;
}

int mptcp_allow_join_id0(struct net *net)
int mptcp_allow_join_id0(const struct net *net)
{
	return mptcp_get_pernet(net)->allow_join_initial_addr_port;
}

unsigned int mptcp_stale_loss_cnt(const struct net *net)
{
	return mptcp_get_pernet(net)->stale_loss_cnt;
}

static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
	pernet->mptcp_enabled = 1;
	pernet->add_addr_timeout = TCP_RTO_MAX;
	pernet->checksum_enabled = 0;
	pernet->allow_join_initial_addr_port = 1;
	pernet->stale_loss_cnt = 4;
}

#ifdef CONFIG_SYSCTL
@@ -95,6 +102,12 @@ static struct ctl_table mptcp_sysctl_table[] = {
		.extra1       = SYSCTL_ZERO,
		.extra2       = SYSCTL_ONE
	},
	{
		.procname = "stale_loss_cnt",
		.maxlen = sizeof(unsigned int),
		.mode = 0644,
		.proc_handler = proc_douintvec_minmax,
	},
	{}
};

@@ -114,6 +127,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
	table[1].data = &pernet->add_addr_timeout;
	table[2].data = &pernet->checksum_enabled;
	table[3].data = &pernet->allow_join_initial_addr_port;
	table[4].data = &pernet->stale_loss_cnt;

	hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
	if (!hdr)
+2 −0
Original line number Diff line number Diff line
@@ -45,6 +45,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
	SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
	SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
	SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
	SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE),
	SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER),
	SNMP_MIB_SENTINEL
};

+2 −0
Original line number Diff line number Diff line
@@ -38,6 +38,8 @@ enum linux_mptcp_mib_field {
	MPTCP_MIB_MPPRIOTX,		/* Transmit a MP_PRIO */
	MPTCP_MIB_MPPRIORX,		/* Received a MP_PRIO */
	MPTCP_MIB_RCVPRUNED,		/* Incoming packet dropped due to memory limit */
	MPTCP_MIB_SUBFLOWSTALE,		/* Subflows entered 'stale' status */
	MPTCP_MIB_SUBFLOWRECOVER,	/* Subflows returned to active status after being stale */
	__MPTCP_MIB_MAX
};

+5 −3
Original line number Diff line number Diff line
@@ -975,9 +975,11 @@ static void ack_update_msk(struct mptcp_sock *msk,
	old_snd_una = msk->snd_una;
	new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64);

	/* ACK for data not even sent yet? Ignore. */
	if (after64(new_snd_una, snd_nxt))
	/* ACK for data not even sent yet and even above recovery bound? Ignore.*/
	if (unlikely(after64(new_snd_una, snd_nxt))) {
		if (!msk->recovery || after64(new_snd_una, msk->recovery_snd_nxt))
			new_snd_una = old_snd_una;
	}

	new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;

Loading