Commit 59d58d93 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mptcp-new-features-for-mptcp-sockets-and-netlink-pm'

Mat Martineau says:

====================
mptcp: New features for MPTCP sockets and netlink PM

This collection of patches adds MPTCP socket support for a few socket
options, ioctls, and one ancillary data type (specifics for each are
listed below). There's also a patch modifying the netlink MPTCP path
manager API to allow setting the backup flag on a configured interface
using the endpoint ID instead of the full IP address.

Patches 1 & 2: TCP_INQ cmsg and selftests.

Patches 2 & 3: SIOCINQ, OUTQ, and OUTQNSD ioctls and selftests.

Patch 5: Change backup flag using endpoint ID.

Patches 6 & 7: IP_TOS socket option and selftests.

Patches 8-10: TCP_CORK and TCP_NODELAY socket options. Includes a tcp
change to expose __tcp_sock_set_cork() and __tcp_sock_set_nodelay() for
use by MPTCP.
====================

Link: https://lore.kernel.org/r/20211203223541.69364-1-mathew.j.martineau@linux.intel.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents c0e5e11a 4f6e14bd
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -512,11 +512,13 @@ static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss)
int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
		  int shiftlen);

void __tcp_sock_set_cork(struct sock *sk, bool on);
void tcp_sock_set_cork(struct sock *sk, bool on);
int tcp_sock_set_keepcnt(struct sock *sk, int val);
int tcp_sock_set_keepidle_locked(struct sock *sk, int val);
int tcp_sock_set_keepidle(struct sock *sk, int val);
int tcp_sock_set_keepintvl(struct sock *sk, int val);
void __tcp_sock_set_nodelay(struct sock *sk, bool on);
void tcp_sock_set_nodelay(struct sock *sk);
void tcp_sock_set_quickack(struct sock *sk, int val);
int tcp_sock_set_syncnt(struct sock *sk, int val);
+2 −2
Original line number Diff line number Diff line
@@ -3207,7 +3207,7 @@ static void tcp_enable_tx_delay(void)
 * TCP_CORK can be set together with TCP_NODELAY and it is stronger than
 * TCP_NODELAY.
 */
static void __tcp_sock_set_cork(struct sock *sk, bool on)
void __tcp_sock_set_cork(struct sock *sk, bool on)
{
	struct tcp_sock *tp = tcp_sk(sk);

@@ -3235,7 +3235,7 @@ EXPORT_SYMBOL(tcp_sock_set_cork);
 * However, when TCP_NODELAY is set we make an explicit push, which overrides
 * even TCP_CORK for currently queued segments.
 */
static void __tcp_sock_set_nodelay(struct sock *sk, bool on)
void __tcp_sock_set_nodelay(struct sock *sk, bool on)
{
	if (on) {
		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
+10 −4
Original line number Diff line number Diff line
@@ -1702,22 +1702,28 @@ static int mptcp_nl_addr_backup(struct net *net,

static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
{
	struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry;
	struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
	struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
	struct mptcp_pm_addr_entry addr, *entry;
	struct net *net = sock_net(skb->sk);
	u8 bkup = 0;
	u8 bkup = 0, lookup_by_id = 0;
	int ret;

	ret = mptcp_pm_parse_addr(attr, info, true, &addr);
	ret = mptcp_pm_parse_addr(attr, info, false, &addr);
	if (ret < 0)
		return ret;

	if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
		bkup = 1;
	if (addr.addr.family == AF_UNSPEC) {
		lookup_by_id = 1;
		if (!addr.addr.id)
			return -EOPNOTSUPP;
	}

	list_for_each_entry(entry, &pernet->local_addr_list, list) {
		if (addresses_equal(&entry->addr, &addr.addr, true)) {
		if ((!lookup_by_id && addresses_equal(&entry->addr, &addr.addr, true)) ||
		    (lookup_by_id && entry->addr.id == addr.addr.id)) {
			mptcp_nl_addr_backup(net, &entry->addr, bkup);

			if (bkup)
+89 −2
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@
#endif
#include <net/mptcp.h>
#include <net/xfrm.h>
#include <asm/ioctls.h>
#include "protocol.h"
#include "mib.h"

@@ -46,6 +47,7 @@ struct mptcp_skb_cb {

enum {
	MPTCP_CMSG_TS = BIT(0),
	MPTCP_CMSG_INQ = BIT(1),
};

static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp;
@@ -738,6 +740,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
				 MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq,
				 delta);
			MPTCP_SKB_CB(skb)->offset += delta;
			MPTCP_SKB_CB(skb)->map_seq += delta;
			__skb_queue_tail(&sk->sk_receive_queue, skb);
		}
		msk->ack_seq = end_seq;
@@ -1499,7 +1502,7 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
		msk->snd_nxt = snd_nxt_new;
}

static void mptcp_check_and_set_pending(struct sock *sk)
void mptcp_check_and_set_pending(struct sock *sk)
{
	if (mptcp_send_head(sk) &&
	    !test_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
@@ -1784,8 +1787,10 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
		copied += count;

		if (count < data_len) {
			if (!(flags & MSG_PEEK))
			if (!(flags & MSG_PEEK)) {
				MPTCP_SKB_CB(skb)->offset += count;
				MPTCP_SKB_CB(skb)->map_seq += count;
			}
			break;
		}

@@ -1965,6 +1970,27 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
	return !skb_queue_empty(&msk->receive_queue);
}

static unsigned int mptcp_inq_hint(const struct sock *sk)
{
	const struct mptcp_sock *msk = mptcp_sk(sk);
	const struct sk_buff *skb;

	skb = skb_peek(&msk->receive_queue);
	if (skb) {
		u64 hint_val = msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq;

		if (hint_val >= INT_MAX)
			return INT_MAX;

		return (unsigned int)hint_val;
	}

	if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN))
		return 1;

	return 0;
}

static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
			 int nonblock, int flags, int *addr_len)
{
@@ -1989,6 +2015,9 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
	len = min_t(size_t, len, INT_MAX);
	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);

	if (unlikely(msk->recvmsg_inq))
		cmsg_flags = MPTCP_CMSG_INQ;

	while (copied < len) {
		int bytes_read;

@@ -2062,6 +2091,12 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
	if (cmsg_flags && copied >= 0) {
		if (cmsg_flags & MPTCP_CMSG_TS)
			tcp_recv_timestamp(msg, sk, &tss);

		if (cmsg_flags & MPTCP_CMSG_INQ) {
			unsigned int inq = mptcp_inq_hint(sk);

			put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
		}
	}

	pr_debug("msk=%p rx queue empty=%d:%d copied=%d",
@@ -3177,6 +3212,57 @@ static int mptcp_forward_alloc_get(const struct sock *sk)
	return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc;
}

static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
{
	const struct sock *sk = (void *)msk;
	u64 delta;

	if (sk->sk_state == TCP_LISTEN)
		return -EINVAL;

	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
		return 0;

	delta = msk->write_seq - v;
	if (delta > INT_MAX)
		delta = INT_MAX;

	return (int)delta;
}

static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
	struct mptcp_sock *msk = mptcp_sk(sk);
	bool slow;
	int answ;

	switch (cmd) {
	case SIOCINQ:
		if (sk->sk_state == TCP_LISTEN)
			return -EINVAL;

		lock_sock(sk);
		__mptcp_move_skbs(msk);
		answ = mptcp_inq_hint(sk);
		release_sock(sk);
		break;
	case SIOCOUTQ:
		slow = lock_sock_fast(sk);
		answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una));
		unlock_sock_fast(sk, slow);
		break;
	case SIOCOUTQNSD:
		slow = lock_sock_fast(sk);
		answ = mptcp_ioctl_outq(msk, msk->snd_nxt);
		unlock_sock_fast(sk, slow);
		break;
	default:
		return -ENOIOCTLCMD;
	}

	return put_user(answ, (int __user *)arg);
}

static struct proto mptcp_prot = {
	.name		= "MPTCP",
	.owner		= THIS_MODULE,
@@ -3189,6 +3275,7 @@ static struct proto mptcp_prot = {
	.shutdown	= mptcp_shutdown,
	.destroy	= mptcp_destroy,
	.sendmsg	= mptcp_sendmsg,
	.ioctl		= mptcp_ioctl,
	.recvmsg	= mptcp_recvmsg,
	.release_cb	= mptcp_release_cb,
	.hash		= mptcp_hash,
+4 −0
Original line number Diff line number Diff line
@@ -249,6 +249,9 @@ struct mptcp_sock {
	bool		rcv_fastclose;
	bool		use_64bit_ack; /* Set when we received a 64-bit DSN */
	bool		csum_enabled;
	u8		recvmsg_inq:1,
			cork:1,
			nodelay:1;
	spinlock_t	join_list_lock;
	struct work_struct work;
	struct sk_buff  *ooo_last_skb;
@@ -554,6 +557,7 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
				     struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
void mptcp_check_and_set_pending(struct sock *sk);
void __mptcp_push_pending(struct sock *sk, unsigned int flags);
bool mptcp_subflow_data_available(struct sock *sk);
void __init mptcp_subflow_init(void);
Loading