Commit 983e59a2 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mptcp-next'



Mat Martineau says:

====================
mptcp: Add SOL_MPTCP getsockopt support

Here's the first new MPTCP feature for the v5.16 cycle, and I'll defer
to Florian's helpful description of the series implementing some new
MPTCP socket options:

========

This adds the MPTCP_INFO, MPTCP_TCPINFO and MPTCP_SUBFLOW_ADDRS
mptcp getsockopt optnames.

MPTCP_INFO exposes the mptcp_info struct as an alternative to the
existing netlink diag interface.

MPTCP_TCPINFO exposes the tcp_info struct.
Unlike SOL_TCP/TCP_INFO, this returns one struct for each active
subflow.

MPTCP_SUBFLOW_ADDRS allows userspace to discover the ip addresses/ports
used by the local and remote endpoints, one for each active tcp subflow.

MPTCP_TCPINFO and MPTCP_SUBFLOW_ADDRS share the same meta-header that
needs to be pre-filled by userspace with the size of the data structures
it expects.  This is done to allow extension of the involved structs
later on, without breaking backwards compatibility.

The meta-structure can also be used to discover the required space
to obtain all information, as kernel will fill in the number of
active subflows even if there is not enough room for the requested info
itself.

More information is available in the individual patches.
Last patch adds test cases for the three optnames.

========
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 95dca2d5 ce997912
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -364,6 +364,7 @@ struct ucred {
#define SOL_KCM		281
#define SOL_TLS		282
#define SOL_XDP		283
#define SOL_MPTCP	284

/* IPX options */
#define IPX_TYPE	1
+4 −0
Original line number Diff line number Diff line
@@ -12,6 +12,8 @@
#include <linux/tcp.h>
#include <linux/types.h>

struct mptcp_info;
struct mptcp_sock;
struct seq_file;

/* MPTCP sk_buff extension data */
@@ -121,6 +123,8 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb);
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
			 struct mptcp_out_options *opts);

void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info);

/* move the skb extension owership, with the assumption that 'to' is
 * newly allocated
 */
+35 −0
Original line number Diff line number Diff line
@@ -4,6 +4,13 @@

#include <linux/const.h>
#include <linux/types.h>
#include <linux/in.h>		/* for sockaddr_in			*/
#include <linux/in6.h>		/* for sockaddr_in6			*/
#include <linux/socket.h>	/* for sockaddr_storage and sa_family	*/

#ifndef __KERNEL__
#include <sys/socket.h>		/* for struct sockaddr			*/
#endif

#define MPTCP_SUBFLOW_FLAG_MCAP_REM		_BITUL(0)
#define MPTCP_SUBFLOW_FLAG_MCAP_LOC		_BITUL(1)
@@ -193,4 +200,32 @@ enum mptcp_event_attr {
#define MPTCP_RST_EBADPERF	5
#define MPTCP_RST_EMIDDLEBOX	6

struct mptcp_subflow_data {
	__u32		size_subflow_data;		/* size of this structure in userspace */
	__u32		num_subflows;			/* must be 0, set by kernel */
	__u32		size_kernel;			/* must be 0, set by kernel */
	__u32		size_user;			/* size of one element in data[] */
} __attribute__((aligned(8)));

struct mptcp_subflow_addrs {
	union {
		__kernel_sa_family_t sa_family;
		struct sockaddr sa_local;
		struct sockaddr_in sin_local;
		struct sockaddr_in6 sin6_local;
		struct __kernel_sockaddr_storage ss_local;
	};
	union {
		struct sockaddr sa_remote;
		struct sockaddr_in sin_remote;
		struct sockaddr_in6 sin6_remote;
		struct __kernel_sockaddr_storage ss_remote;
	};
};

/* MPTCP socket options */
#define MPTCP_INFO		1
#define MPTCP_TCPINFO		2
#define MPTCP_SUBFLOW_ADDRS	3

#endif /* _UAPI_MPTCP_H */
+1 −25
Original line number Diff line number Diff line
@@ -113,37 +113,13 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
{
	struct mptcp_sock *msk = mptcp_sk(sk);
	struct mptcp_info *info = _info;
	u32 flags = 0;
	bool slow;
	u8 val;

	r->idiag_rqueue = sk_rmem_alloc_get(sk);
	r->idiag_wqueue = sk_wmem_alloc_get(sk);
	if (!info)
		return;

	slow = lock_sock_fast(sk);
	info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
	info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
	info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
	info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
	info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk);
	val = mptcp_pm_get_add_addr_signal_max(msk);
	info->mptcpi_add_addr_signal_max = val;
	val = mptcp_pm_get_add_addr_accept_max(msk);
	info->mptcpi_add_addr_accepted_max = val;
	info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk);
	if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags))
		flags |= MPTCP_INFO_FLAG_FALLBACK;
	if (READ_ONCE(msk->can_ack))
		flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
	info->mptcpi_flags = flags;
	info->mptcpi_token = READ_ONCE(msk->token);
	info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
	info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
	info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
	info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
	unlock_sock_fast(sk, slow);
	mptcp_diag_fill_info(msk, info);
}

static const struct inet_diag_handler mptcp_diag_handler = {
+276 −0
Original line number Diff line number Diff line
@@ -14,6 +14,8 @@
#include <net/mptcp.h>
#include "protocol.h"

#define MIN_INFO_OPTLEN_SIZE	16

static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
	sock_owned_by_me((const struct sock *)msk);
@@ -670,6 +672,263 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
	return ret;
}

void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
{
	struct sock *sk = &msk->sk.icsk_inet.sk;
	u32 flags = 0;
	bool slow;
	u8 val;

	memset(info, 0, sizeof(*info));

	slow = lock_sock_fast(sk);

	info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
	info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
	info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
	info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
	info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk);
	val = mptcp_pm_get_add_addr_signal_max(msk);
	info->mptcpi_add_addr_signal_max = val;
	val = mptcp_pm_get_add_addr_accept_max(msk);
	info->mptcpi_add_addr_accepted_max = val;
	info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk);
	if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags))
		flags |= MPTCP_INFO_FLAG_FALLBACK;
	if (READ_ONCE(msk->can_ack))
		flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
	info->mptcpi_flags = flags;
	info->mptcpi_token = READ_ONCE(msk->token);
	info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
	info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
	info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
	info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);

	unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);

static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen)
{
	struct mptcp_info m_info;
	int len;

	if (get_user(len, optlen))
		return -EFAULT;

	len = min_t(unsigned int, len, sizeof(struct mptcp_info));

	mptcp_diag_fill_info(msk, &m_info);

	if (put_user(len, optlen))
		return -EFAULT;

	if (copy_to_user(optval, &m_info, len))
		return -EFAULT;

	return 0;
}

static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
				  char __user *optval,
				  u32 copied,
				  int __user *optlen)
{
	u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd));

	if (copied)
		copied += sfd->size_subflow_data;
	else
		copied = copylen;

	if (put_user(copied, optlen))
		return -EFAULT;

	if (copy_to_user(optval, sfd, copylen))
		return -EFAULT;

	return 0;
}

static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
				  char __user *optval, int __user *optlen)
{
	int len, copylen;

	if (get_user(len, optlen))
		return -EFAULT;

	/* if mptcp_subflow_data size is changed, need to adjust
	 * this function to deal with programs using old version.
	 */
	BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE);

	if (len < MIN_INFO_OPTLEN_SIZE)
		return -EINVAL;

	memset(sfd, 0, sizeof(*sfd));

	copylen = min_t(unsigned int, len, sizeof(*sfd));
	if (copy_from_user(sfd, optval, copylen))
		return -EFAULT;

	/* size_subflow_data is u32, but len is signed */
	if (sfd->size_subflow_data > INT_MAX ||
	    sfd->size_user > INT_MAX)
		return -EINVAL;

	if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE ||
	    sfd->size_subflow_data > len)
		return -EINVAL;

	if (sfd->num_subflows || sfd->size_kernel)
		return -EINVAL;

	return len - sfd->size_subflow_data;
}

static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval,
				    int __user *optlen)
{
	struct mptcp_subflow_context *subflow;
	struct sock *sk = &msk->sk.icsk_inet.sk;
	unsigned int sfcount = 0, copied = 0;
	struct mptcp_subflow_data sfd;
	char __user *infoptr;
	int len;

	len = mptcp_get_subflow_data(&sfd, optval, optlen);
	if (len < 0)
		return len;

	sfd.size_kernel = sizeof(struct tcp_info);
	sfd.size_user = min_t(unsigned int, sfd.size_user,
			      sizeof(struct tcp_info));

	infoptr = optval + sfd.size_subflow_data;

	lock_sock(sk);

	mptcp_for_each_subflow(msk, subflow) {
		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);

		++sfcount;

		if (len && len >= sfd.size_user) {
			struct tcp_info info;

			tcp_get_info(ssk, &info);

			if (copy_to_user(infoptr, &info, sfd.size_user)) {
				release_sock(sk);
				return -EFAULT;
			}

			infoptr += sfd.size_user;
			copied += sfd.size_user;
			len -= sfd.size_user;
		}
	}

	release_sock(sk);

	sfd.num_subflows = sfcount;

	if (mptcp_put_subflow_data(&sfd, optval, copied, optlen))
		return -EFAULT;

	return 0;
}

static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a)
{
	struct inet_sock *inet = inet_sk(sk);

	memset(a, 0, sizeof(*a));

	if (sk->sk_family == AF_INET) {
		a->sin_local.sin_family = AF_INET;
		a->sin_local.sin_port = inet->inet_sport;
		a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr;

		if (!a->sin_local.sin_addr.s_addr)
			a->sin_local.sin_addr.s_addr = inet->inet_saddr;

		a->sin_remote.sin_family = AF_INET;
		a->sin_remote.sin_port = inet->inet_dport;
		a->sin_remote.sin_addr.s_addr = inet->inet_daddr;
#if IS_ENABLED(CONFIG_IPV6)
	} else if (sk->sk_family == AF_INET6) {
		const struct ipv6_pinfo *np = inet6_sk(sk);

		a->sin6_local.sin6_family = AF_INET6;
		a->sin6_local.sin6_port = inet->inet_sport;

		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
			a->sin6_local.sin6_addr = np->saddr;
		else
			a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr;

		a->sin6_remote.sin6_family = AF_INET6;
		a->sin6_remote.sin6_port = inet->inet_dport;
		a->sin6_remote.sin6_addr = sk->sk_v6_daddr;
#endif
	}
}

static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval,
					  int __user *optlen)
{
	struct sock *sk = &msk->sk.icsk_inet.sk;
	struct mptcp_subflow_context *subflow;
	unsigned int sfcount = 0, copied = 0;
	struct mptcp_subflow_data sfd;
	char __user *addrptr;
	int len;

	len = mptcp_get_subflow_data(&sfd, optval, optlen);
	if (len < 0)
		return len;

	sfd.size_kernel = sizeof(struct mptcp_subflow_addrs);
	sfd.size_user = min_t(unsigned int, sfd.size_user,
			      sizeof(struct mptcp_subflow_addrs));

	addrptr = optval + sfd.size_subflow_data;

	lock_sock(sk);

	mptcp_for_each_subflow(msk, subflow) {
		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);

		++sfcount;

		if (len && len >= sfd.size_user) {
			struct mptcp_subflow_addrs a;

			mptcp_get_sub_addrs(ssk, &a);

			if (copy_to_user(addrptr, &a, sfd.size_user)) {
				release_sock(sk);
				return -EFAULT;
			}

			addrptr += sfd.size_user;
			copied += sfd.size_user;
			len -= sfd.size_user;
		}
	}

	release_sock(sk);

	sfd.num_subflows = sfcount;

	if (mptcp_put_subflow_data(&sfd, optval, copied, optlen))
		return -EFAULT;

	return 0;
}

static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
				    char __user *optval, int __user *optlen)
{
@@ -684,6 +943,21 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
	return -EOPNOTSUPP;
}

static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
				      char __user *optval, int __user *optlen)
{
	switch (optname) {
	case MPTCP_INFO:
		return mptcp_getsockopt_info(msk, optval, optlen);
	case MPTCP_TCPINFO:
		return mptcp_getsockopt_tcpinfo(msk, optval, optlen);
	case MPTCP_SUBFLOW_ADDRS:
		return mptcp_getsockopt_subflow_addrs(msk, optval, optlen);
	}

	return -EOPNOTSUPP;
}

int mptcp_getsockopt(struct sock *sk, int level, int optname,
		     char __user *optval, int __user *option)
{
@@ -706,6 +980,8 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname,

	if (level == SOL_TCP)
		return mptcp_getsockopt_sol_tcp(msk, optname, optval, option);
	if (level == SOL_MPTCP)
		return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option);
	return -EOPNOTSUPP;
}

Loading