Commit 0a2f6b32 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mptcp-genl-events'



Mat Martineau says:

====================
mptcp: Add genl events for connection info

This series from the MPTCP tree adds genl multicast events that are
important for implementing a userspace path manager. In MPTCP, a path
manager is responsible for adding or removing additional subflows on
each MPTCP connection. The in-kernel path manager (already part of the
kernel) is a better fit for many server use cases, but the additional
flexibility of userspace path managers is often useful for client
devices.

Patches 1, 2, 4, 5, and 6 do some refactoring to streamline the netlink
event implementation in the final patch.

Patch 3 improves the timeliness of subflow destruction to ensure the
'subflow closed' event will be sent soon enough.

Patch 7 allows use of the GENL_UNS_ADMIN_PERM flag on genl mcast groups
to mandate CAP_NET_ADMIN, which is important to protect token information
in the MPTCP events. This is a genetlink change.

Patch 8 adds the MPTCP netlink events.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 0a82c37e b911c97c
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
 */
struct genl_multicast_group {
	char			name[GENL_NAMSIZ];
	u8			flags;
};

struct genl_ops;
+74 −0
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@ enum {
/* netlink interface */
#define MPTCP_PM_NAME		"mptcp_pm"
#define MPTCP_PM_CMD_GRP_NAME	"mptcp_pm_cmds"
#define MPTCP_PM_EV_GRP_NAME	"mptcp_pm_events"
#define MPTCP_PM_VER		0x1

/*
@@ -104,4 +105,77 @@ struct mptcp_info {
	__u64	mptcpi_rcv_nxt;
};

/*
 * MPTCP_EVENT_CREATED: token, family, saddr4 | saddr6, daddr4 | daddr6,
 *                      sport, dport
 * A new MPTCP connection has been created. It is the good time to allocate
 * memory and send ADD_ADDR if needed. Depending on the traffic-patterns
 * it can take a long time until the MPTCP_EVENT_ESTABLISHED is sent.
 *
 * MPTCP_EVENT_ESTABLISHED: token, family, saddr4 | saddr6, daddr4 | daddr6,
 *			    sport, dport
 * A MPTCP connection is established (can start new subflows).
 *
 * MPTCP_EVENT_CLOSED: token
 * A MPTCP connection has stopped.
 *
 * MPTCP_EVENT_ANNOUNCED: token, rem_id, family, daddr4 | daddr6 [, dport]
 * A new address has been announced by the peer.
 *
 * MPTCP_EVENT_REMOVED: token, rem_id
 * An address has been lost by the peer.
 *
 * MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6,
 *                              daddr4 | daddr6, sport, dport, backup,
 *                              if_idx [, error]
 * A new subflow has been established. 'error' should not be set.
 *
 * MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6,
 *                         sport, dport, backup, if_idx [, error]
 * A subflow has been closed. An error (copy of sk_err) could be set if an
 * error has been detected for this subflow.
 *
 * MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6,
 *                           sport, dport, backup, if_idx [, error]
 *       The priority of a subflow has changed. 'error' should not be set.
 */
enum mptcp_event_type {
	MPTCP_EVENT_UNSPEC = 0,
	MPTCP_EVENT_CREATED = 1,
	MPTCP_EVENT_ESTABLISHED = 2,
	MPTCP_EVENT_CLOSED = 3,

	MPTCP_EVENT_ANNOUNCED = 6,
	MPTCP_EVENT_REMOVED = 7,

	MPTCP_EVENT_SUB_ESTABLISHED = 10,
	MPTCP_EVENT_SUB_CLOSED = 11,

	MPTCP_EVENT_SUB_PRIORITY = 13,
};

enum mptcp_event_attr {
	MPTCP_ATTR_UNSPEC = 0,

	MPTCP_ATTR_TOKEN,	/* u32 */
	MPTCP_ATTR_FAMILY,	/* u16 */
	MPTCP_ATTR_LOC_ID,	/* u8 */
	MPTCP_ATTR_REM_ID,	/* u8 */
	MPTCP_ATTR_SADDR4,	/* be32 */
	MPTCP_ATTR_SADDR6,	/* struct in6_addr */
	MPTCP_ATTR_DADDR4,	/* be32 */
	MPTCP_ATTR_DADDR6,	/* struct in6_addr */
	MPTCP_ATTR_SPORT,	/* be16 */
	MPTCP_ATTR_DPORT,	/* be16 */
	MPTCP_ATTR_BACKUP,	/* u8 */
	MPTCP_ATTR_ERROR,	/* u8 */
	MPTCP_ATTR_FLAGS,	/* u16 */
	MPTCP_ATTR_TIMEOUT,	/* u32 */
	MPTCP_ATTR_IF_IDX,	/* s32 */

	__MPTCP_ATTR_AFTER_LAST
};

#define MPTCP_ATTR_MAX (__MPTCP_ATTR_AFTER_LAST - 1)

#endif /* _UAPI_MPTCP_H */
+1 −1
Original line number Diff line number Diff line
@@ -867,7 +867,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
		clear_3rdack_retransmission(ssk);
		mptcp_pm_subflow_established(msk, subflow);
	} else {
		mptcp_pm_fully_established(msk);
		mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC);
	}
	return true;

+17 −7
Original line number Diff line number Diff line
@@ -68,13 +68,14 @@ int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id)

/* path manager event handlers */

void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side)
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side)
{
	struct mptcp_pm_data *pm = &msk->pm;

	pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side);

	WRITE_ONCE(pm->server_side, server_side);
	mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC);
}

bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
@@ -119,16 +120,13 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
	return true;
}

void mptcp_pm_fully_established(struct mptcp_sock *msk)
void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp)
{
	struct mptcp_pm_data *pm = &msk->pm;
	bool announce = false;

	pr_debug("msk=%p", msk);

	/* try to avoid acquiring the lock below */
	if (!READ_ONCE(pm->work_pending))
		return;

	spin_lock_bh(&pm->lock);

	/* mptcp_pm_fully_established() can be invoked by multiple
@@ -138,9 +136,15 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk)
	if (READ_ONCE(pm->work_pending) &&
	    !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
		mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);
	msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);

	if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0)
		announce = true;

	msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
	spin_unlock_bh(&pm->lock);

	if (announce)
		mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp);
}

void mptcp_pm_connection_closed(struct mptcp_sock *msk)
@@ -179,6 +183,8 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
	pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id,
		 READ_ONCE(pm->accept_addr));

	mptcp_event_addr_announced(msk, addr);

	spin_lock_bh(&pm->lock);

	if (!READ_ONCE(pm->accept_addr)) {
@@ -205,6 +211,8 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id)

	pr_debug("msk=%p remote_id=%d", msk, rm_id);

	mptcp_event_addr_removed(msk, rm_id);

	spin_lock_bh(&pm->lock);
	mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED);
	pm->rm_id = rm_id;
@@ -217,6 +225,8 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)

	pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
	subflow->backup = bkup;

	mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
}

/* path manager helpers */
+302 −8
Original line number Diff line number Diff line
@@ -56,6 +56,8 @@ struct pm_nl_pernet {
#define MPTCP_PM_ADDR_MAX	8
#define ADD_ADDR_RETRANS_MAX	3

static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk);

static bool addresses_equal(const struct mptcp_addr_info *a,
			    struct mptcp_addr_info *b, bool use_port)
{
@@ -448,17 +450,17 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
	}
}

void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
{
	mptcp_pm_create_subflow_or_signal_addr(msk);
}

void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
{
	mptcp_pm_create_subflow_or_signal_addr(msk);
}

void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
{
	struct sock *sk = (struct sock *)msk;
	unsigned int add_addr_accept_max;
@@ -498,7 +500,7 @@ void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
	mptcp_pm_nl_add_addr_send_ack(msk);
}

void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
{
	struct mptcp_subflow_context *subflow;

@@ -568,7 +570,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
	return -EINVAL;
}

void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
{
	struct mptcp_subflow_context *subflow, *tmp;
	struct sock *sk = (struct sock *)msk;
@@ -592,7 +594,7 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)

		spin_unlock_bh(&msk->pm.lock);
		mptcp_subflow_shutdown(sk, ssk, how);
		__mptcp_close_ssk(sk, ssk, subflow);
		mptcp_close_ssk(sk, ssk, subflow);
		spin_lock_bh(&msk->pm.lock);

		msk->pm.add_addr_accepted--;
@@ -605,6 +607,39 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
	}
}

void mptcp_pm_nl_work(struct mptcp_sock *msk)
{
	struct mptcp_pm_data *pm = &msk->pm;

	msk_owned_by_me(msk);

	spin_lock_bh(&msk->pm.lock);

	pr_debug("msk=%p status=%x", msk, pm->status);
	if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) {
		pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
		mptcp_pm_nl_add_addr_received(msk);
	}
	if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) {
		pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK);
		mptcp_pm_nl_add_addr_send_ack(msk);
	}
	if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) {
		pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED);
		mptcp_pm_nl_rm_addr_received(msk);
	}
	if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
		pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
		mptcp_pm_nl_fully_established(msk);
	}
	if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
		pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
		mptcp_pm_nl_subflow_established(msk);
	}

	spin_unlock_bh(&msk->pm.lock);
}

void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id)
{
	struct mptcp_subflow_context *subflow, *tmp;
@@ -629,7 +664,7 @@ void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id)

		spin_unlock_bh(&msk->pm.lock);
		mptcp_subflow_shutdown(sk, ssk, how);
		__mptcp_close_ssk(sk, ssk, subflow);
		mptcp_close_ssk(sk, ssk, subflow);
		spin_lock_bh(&msk->pm.lock);

		msk->pm.local_addr_used--;
@@ -826,9 +861,13 @@ void mptcp_pm_nl_data_init(struct mptcp_sock *msk)
}

#define MPTCP_PM_CMD_GRP_OFFSET       0
#define MPTCP_PM_EV_GRP_OFFSET        1

static const struct genl_multicast_group mptcp_pm_mcgrps[] = {
	[MPTCP_PM_CMD_GRP_OFFSET]	= { .name = MPTCP_PM_CMD_GRP_NAME, },
	[MPTCP_PM_EV_GRP_OFFSET]        = { .name = MPTCP_PM_EV_GRP_NAME,
					    .flags = GENL_UNS_ADMIN_PERM,
					  },
};

static const struct nla_policy
@@ -1447,6 +1486,261 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
	return 0;
}

static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp)
{
	genlmsg_multicast_netns(&mptcp_genl_family, net,
				nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp);
}

static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
{
	const struct inet_sock *issk = inet_sk(ssk);
	const struct mptcp_subflow_context *sf;

	if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family))
		return -EMSGSIZE;

	switch (ssk->sk_family) {
	case AF_INET:
		if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr))
			return -EMSGSIZE;
		if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, issk->inet_daddr))
			return -EMSGSIZE;
		break;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
	case AF_INET6: {
		const struct ipv6_pinfo *np = inet6_sk(ssk);

		if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr))
			return -EMSGSIZE;
		if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &ssk->sk_v6_daddr))
			return -EMSGSIZE;
		break;
	}
#endif
	default:
		WARN_ON_ONCE(1);
		return -EMSGSIZE;
	}

	if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport))
		return -EMSGSIZE;
	if (nla_put_be16(skb, MPTCP_ATTR_DPORT, issk->inet_dport))
		return -EMSGSIZE;

	sf = mptcp_subflow_ctx(ssk);
	if (WARN_ON_ONCE(!sf))
		return -EINVAL;

	if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id))
		return -EMSGSIZE;

	if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id))
		return -EMSGSIZE;

	return 0;
}

static int mptcp_event_put_token_and_ssk(struct sk_buff *skb,
					 const struct mptcp_sock *msk,
					 const struct sock *ssk)
{
	const struct sock *sk = (const struct sock *)msk;
	const struct mptcp_subflow_context *sf;
	u8 sk_err;

	if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
		return -EMSGSIZE;

	if (mptcp_event_add_subflow(skb, ssk))
		return -EMSGSIZE;

	sf = mptcp_subflow_ctx(ssk);
	if (WARN_ON_ONCE(!sf))
		return -EINVAL;

	if (nla_put_u8(skb, MPTCP_ATTR_BACKUP, sf->backup))
		return -EMSGSIZE;

	if (ssk->sk_bound_dev_if &&
	    nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if))
		return -EMSGSIZE;

	sk_err = ssk->sk_err;
	if (sk_err && sk->sk_state == TCP_ESTABLISHED &&
	    nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err))
		return -EMSGSIZE;

	return 0;
}

static int mptcp_event_sub_established(struct sk_buff *skb,
				       const struct mptcp_sock *msk,
				       const struct sock *ssk)
{
	return mptcp_event_put_token_and_ssk(skb, msk, ssk);
}

static int mptcp_event_sub_closed(struct sk_buff *skb,
				  const struct mptcp_sock *msk,
				  const struct sock *ssk)
{
	if (mptcp_event_put_token_and_ssk(skb, msk, ssk))
		return -EMSGSIZE;

	return 0;
}

static int mptcp_event_created(struct sk_buff *skb,
			       const struct mptcp_sock *msk,
			       const struct sock *ssk)
{
	int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token);

	if (err)
		return err;

	return mptcp_event_add_subflow(skb, ssk);
}

void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id)
{
	struct net *net = sock_net((const struct sock *)msk);
	struct nlmsghdr *nlh;
	struct sk_buff *skb;

	if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
		return;

	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
	if (!skb)
		return;

	nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, MPTCP_EVENT_REMOVED);
	if (!nlh)
		goto nla_put_failure;

	if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
		goto nla_put_failure;

	if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id))
		goto nla_put_failure;

	genlmsg_end(skb, nlh);
	mptcp_nl_mcast_send(net, skb, GFP_ATOMIC);
	return;

nla_put_failure:
	kfree_skb(skb);
}

void mptcp_event_addr_announced(const struct mptcp_sock *msk,
				const struct mptcp_addr_info *info)
{
	struct net *net = sock_net((const struct sock *)msk);
	struct nlmsghdr *nlh;
	struct sk_buff *skb;

	if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
		return;

	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
	if (!skb)
		return;

	nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0,
			  MPTCP_EVENT_ANNOUNCED);
	if (!nlh)
		goto nla_put_failure;

	if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
		goto nla_put_failure;

	if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id))
		goto nla_put_failure;

	if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port))
		goto nla_put_failure;

	switch (info->family) {
	case AF_INET:
		if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, info->addr.s_addr))
			goto nla_put_failure;
		break;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
	case AF_INET6:
		if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &info->addr6))
			goto nla_put_failure;
		break;
#endif
	default:
		WARN_ON_ONCE(1);
		goto nla_put_failure;
	}

	genlmsg_end(skb, nlh);
	mptcp_nl_mcast_send(net, skb, GFP_ATOMIC);
	return;

nla_put_failure:
	kfree_skb(skb);
}

void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
		 const struct sock *ssk, gfp_t gfp)
{
	struct net *net = sock_net((const struct sock *)msk);
	struct nlmsghdr *nlh;
	struct sk_buff *skb;

	if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
		return;

	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
	if (!skb)
		return;

	nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, type);
	if (!nlh)
		goto nla_put_failure;

	switch (type) {
	case MPTCP_EVENT_UNSPEC:
		WARN_ON_ONCE(1);
		break;
	case MPTCP_EVENT_CREATED:
	case MPTCP_EVENT_ESTABLISHED:
		if (mptcp_event_created(skb, msk, ssk) < 0)
			goto nla_put_failure;
		break;
	case MPTCP_EVENT_CLOSED:
		if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token) < 0)
			goto nla_put_failure;
		break;
	case MPTCP_EVENT_ANNOUNCED:
	case MPTCP_EVENT_REMOVED:
		/* call mptcp_event_addr_announced()/removed instead */
		WARN_ON_ONCE(1);
		break;
	case MPTCP_EVENT_SUB_ESTABLISHED:
	case MPTCP_EVENT_SUB_PRIORITY:
		if (mptcp_event_sub_established(skb, msk, ssk) < 0)
			goto nla_put_failure;
		break;
	case MPTCP_EVENT_SUB_CLOSED:
		if (mptcp_event_sub_closed(skb, msk, ssk) < 0)
			goto nla_put_failure;
		break;
	}

	genlmsg_end(skb, nlh);
	mptcp_nl_mcast_send(net, skb, gfp);
	return;

nla_put_failure:
	kfree_skb(skb);
}

static const struct genl_small_ops mptcp_pm_ops[] = {
	{
		.cmd    = MPTCP_PM_CMD_ADD_ADDR,
Loading