Commit 4994d4fa authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mptcp-path-manager-mode-selection'

Mat Martineau says:

====================
mptcp: Path manager mode selection

MPTCP already has an in-kernel path manager (PM) to add and remove TCP
subflows associated with a given MPTCP connection. This in-kernel PM has
been designed to handle typical server-side use cases, but is not very
flexible or configurable for client devices that may have more
complicated policies to implement.

This patch series from the MPTCP tree is the first step toward adding a
generic-netlink-based API for MPTCP path management, which a privileged
userspace daemon will be able to use to control subflow
establishment. These patches add a per-namespace sysctl to select the
default PM type (in-kernel or userspace) for new MPTCP sockets. New
self-tests confirm expected behavior when userspace PM is selected but
there is no daemon available to handle existing MPTCP PM events.

Subsequent patch series (already staged in the MPTCP tree) will add the
generic netlink path management API.
====================

Link: https://lore.kernel.org/r/20220427225002.231996-1-mathew.j.martineau@linux.intel.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents a41c653d 5ac1d2d6
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
@@ -46,6 +46,24 @@ allow_join_initial_addr_port - BOOLEAN

	Default: 1

pm_type - INTEGER

	Set the default path manager type to use for each new MPTCP
	socket. In-kernel path management will control subflow
	connections and address advertisements according to
	per-namespace values configured over the MPTCP netlink
	API. Userspace path management puts per-MPTCP-connection subflow
	connection decisions and address advertisements under control of
	a privileged userspace program, at the cost of more netlink
	traffic to propagate all of the related events and commands.

	This is a per-namespace sysctl.

	* 0 - In-kernel path manager
	* 1 - Userspace path manager

	Default: 0

stale_loss_cnt - INTEGER
	The number of MPTCP-level retransmission intervals with no traffic and
	pending outstanding data on a given subflow required to declare it stale.
+21 −0
Original line number Diff line number Diff line
@@ -16,6 +16,11 @@
#define MPTCP_SYSCTL_PATH "net/mptcp"

static int mptcp_pernet_id;

#ifdef CONFIG_SYSCTL
static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX;
#endif

struct mptcp_pernet {
#ifdef CONFIG_SYSCTL
	struct ctl_table_header *ctl_table_hdr;
@@ -26,6 +31,7 @@ struct mptcp_pernet {
	u8 mptcp_enabled;
	u8 checksum_enabled;
	u8 allow_join_initial_addr_port;
	u8 pm_type;
};

static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
@@ -58,6 +64,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net)
	return mptcp_get_pernet(net)->stale_loss_cnt;
}

int mptcp_get_pm_type(const struct net *net)
{
	return mptcp_get_pernet(net)->pm_type;
}

static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
	pernet->mptcp_enabled = 1;
@@ -65,6 +76,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
	pernet->checksum_enabled = 0;
	pernet->allow_join_initial_addr_port = 1;
	pernet->stale_loss_cnt = 4;
	pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
}

#ifdef CONFIG_SYSCTL
@@ -108,6 +120,14 @@ static struct ctl_table mptcp_sysctl_table[] = {
		.mode = 0644,
		.proc_handler = proc_douintvec_minmax,
	},
	{
		.procname = "pm_type",
		.maxlen = sizeof(u8),
		.mode = 0644,
		.proc_handler = proc_dou8vec_minmax,
		.extra1       = SYSCTL_ZERO,
		.extra2       = &mptcp_pm_type_max
	},
	{}
};

@@ -128,6 +148,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
	table[2].data = &pernet->checksum_enabled;
	table[3].data = &pernet->allow_join_initial_addr_port;
	table[4].data = &pernet->stale_loss_cnt;
	table[5].data = &pernet->pm_type;

	hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
	if (!hdr)
+35 −15
Original line number Diff line number Diff line
@@ -208,7 +208,7 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,

	spin_lock_bh(&pm->lock);

	if (!READ_ONCE(pm->accept_addr)) {
	if (!READ_ONCE(pm->accept_addr) || mptcp_pm_is_userspace(msk)) {
		mptcp_pm_announce_addr(msk, addr, true);
		mptcp_pm_add_addr_send_ack(msk);
	} else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
@@ -415,21 +415,41 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)

void mptcp_pm_data_reset(struct mptcp_sock *msk)
{
	msk->pm.add_addr_signaled = 0;
	msk->pm.add_addr_accepted = 0;
	msk->pm.local_addr_used = 0;
	msk->pm.subflows = 0;
	msk->pm.rm_list_tx.nr = 0;
	msk->pm.rm_list_rx.nr = 0;
	WRITE_ONCE(msk->pm.work_pending, false);
	WRITE_ONCE(msk->pm.addr_signal, 0);
	WRITE_ONCE(msk->pm.accept_addr, false);
	WRITE_ONCE(msk->pm.accept_subflow, false);
	WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
	msk->pm.status = 0;
	bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
	u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk));
	struct mptcp_pm_data *pm = &msk->pm;

	pm->add_addr_signaled = 0;
	pm->add_addr_accepted = 0;
	pm->local_addr_used = 0;
	pm->subflows = 0;
	pm->rm_list_tx.nr = 0;
	pm->rm_list_rx.nr = 0;
	WRITE_ONCE(pm->pm_type, pm_type);

	mptcp_pm_nl_data_init(msk);
	if (pm_type == MPTCP_PM_TYPE_KERNEL) {
		bool subflows_allowed = !!mptcp_pm_get_subflows_max(msk);

		/* pm->work_pending must be only be set to 'true' when
		 * pm->pm_type is set to MPTCP_PM_TYPE_KERNEL
		 */
		WRITE_ONCE(pm->work_pending,
			   (!!mptcp_pm_get_local_addr_max(msk) &&
			    subflows_allowed) ||
			   !!mptcp_pm_get_add_addr_signal_max(msk));
		WRITE_ONCE(pm->accept_addr,
			   !!mptcp_pm_get_add_addr_accept_max(msk) &&
			   subflows_allowed);
		WRITE_ONCE(pm->accept_subflow, subflows_allowed);
	} else {
		WRITE_ONCE(pm->work_pending, 0);
		WRITE_ONCE(pm->accept_addr, 0);
		WRITE_ONCE(pm->accept_subflow, 0);
	}

	WRITE_ONCE(pm->addr_signal, 0);
	WRITE_ONCE(pm->remote_deny_join_id0, false);
	pm->status = 0;
	bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
}

void mptcp_pm_data_init(struct mptcp_sock *msk)
+12 −18
Original line number Diff line number Diff line
@@ -1061,18 +1061,6 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
	return ret;
}

void mptcp_pm_nl_data_init(struct mptcp_sock *msk)
{
	struct mptcp_pm_data *pm = &msk->pm;
	bool subflows;

	subflows = !!mptcp_pm_get_subflows_max(msk);
	WRITE_ONCE(pm->work_pending, (!!mptcp_pm_get_local_addr_max(msk) && subflows) ||
		   !!mptcp_pm_get_add_addr_signal_max(msk));
	WRITE_ONCE(pm->accept_addr, !!mptcp_pm_get_add_addr_accept_max(msk) && subflows);
	WRITE_ONCE(pm->accept_subflow, subflows);
}

#define MPTCP_PM_CMD_GRP_OFFSET       0
#define MPTCP_PM_EV_GRP_OFFSET        1

@@ -1232,7 +1220,8 @@ static int mptcp_nl_add_subflow_or_signal_addr(struct net *net)
	while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
		struct sock *sk = (struct sock *)msk;

		if (!READ_ONCE(msk->fully_established))
		if (!READ_ONCE(msk->fully_established) ||
		    mptcp_pm_is_userspace(msk))
			goto next;

		lock_sock(sk);
@@ -1375,6 +1364,9 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
		struct sock *sk = (struct sock *)msk;
		bool remove_subflow;

		if (mptcp_pm_is_userspace(msk))
			goto next;

		if (list_empty(&msk->conn_list)) {
			mptcp_pm_remove_anno_addr(msk, addr, false);
			goto next;
@@ -1409,7 +1401,7 @@ static int mptcp_nl_remove_id_zero_address(struct net *net,
		struct sock *sk = (struct sock *)msk;
		struct mptcp_addr_info msk_local;

		if (list_empty(&msk->conn_list))
		if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
			goto next;

		local_address((struct sock_common *)msk, &msk_local);
@@ -1516,9 +1508,11 @@ static void mptcp_nl_remove_addrs_list(struct net *net,
	while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
		struct sock *sk = (struct sock *)msk;

		if (!mptcp_pm_is_userspace(msk)) {
			lock_sock(sk);
			mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
			release_sock(sk);
		}

		sock_put(sk);
		cond_resched();
@@ -1791,7 +1785,7 @@ static int mptcp_nl_set_flags(struct net *net,
	while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
		struct sock *sk = (struct sock *)msk;

		if (list_empty(&msk->conn_list))
		if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
			goto next;

		lock_sock(sk);
+15 −1
Original line number Diff line number Diff line
@@ -184,6 +184,14 @@ enum mptcp_pm_status {
					 */
};

enum mptcp_pm_type {
	MPTCP_PM_TYPE_KERNEL = 0,
	MPTCP_PM_TYPE_USERSPACE,

	__MPTCP_PM_TYPE_NR,
	__MPTCP_PM_TYPE_MAX = __MPTCP_PM_TYPE_NR - 1,
};

/* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */
#define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1)

@@ -212,6 +220,7 @@ struct mptcp_pm_data {
	u8		add_addr_signaled;
	u8		add_addr_accepted;
	u8		local_addr_used;
	u8		pm_type;
	u8		subflows;
	u8		status;
	DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
@@ -576,6 +585,7 @@ unsigned int mptcp_get_add_addr_timeout(const struct net *net);
int mptcp_is_checksum_enabled(const struct net *net);
int mptcp_allow_join_id0(const struct net *net);
unsigned int mptcp_stale_loss_cnt(const struct net *net);
int mptcp_get_pm_type(const struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
				     struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
@@ -796,6 +806,11 @@ static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk)
	return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_RM_ADDR_SIGNAL);
}

static inline bool mptcp_pm_is_userspace(const struct mptcp_sock *msk)
{
	return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_USERSPACE;
}

static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port)
{
	u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
@@ -828,7 +843,6 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);

void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
				     const struct mptcp_rm_list *rm_list);
Loading