Commit 2c9e7765 authored by Florian Westphal's avatar Florian Westphal Committed by Jakub Kicinski
Browse files

mptcp: add TCP_INQ cmsg support

Support the TCP_INQ setsockopt.

This is a boolean that tells recvmsg path to include the remaining
in-sequence bytes in the cmsg data.

v2: do not use CB(skb)->offset, increment map_seq instead (Paolo Abeni)
v3: adjust CB(skb)->map_seq when taking skb from ofo queue (Paolo Abeni)

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/224


Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarMat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent c0e5e11a
Loading
Loading
Loading
Loading
+35 −1
Original line number Diff line number Diff line
@@ -46,6 +46,7 @@ struct mptcp_skb_cb {

enum {
	MPTCP_CMSG_TS = BIT(0),
	MPTCP_CMSG_INQ = BIT(1),
};

static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp;
@@ -738,6 +739,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
				 MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq,
				 delta);
			MPTCP_SKB_CB(skb)->offset += delta;
			MPTCP_SKB_CB(skb)->map_seq += delta;
			__skb_queue_tail(&sk->sk_receive_queue, skb);
		}
		msk->ack_seq = end_seq;
@@ -1784,8 +1786,10 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
		copied += count;

		if (count < data_len) {
			if (!(flags & MSG_PEEK))
			if (!(flags & MSG_PEEK)) {
				MPTCP_SKB_CB(skb)->offset += count;
				MPTCP_SKB_CB(skb)->map_seq += count;
			}
			break;
		}

@@ -1965,6 +1969,27 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
	return !skb_queue_empty(&msk->receive_queue);
}

static unsigned int mptcp_inq_hint(const struct sock *sk)
{
	const struct mptcp_sock *msk = mptcp_sk(sk);
	const struct sk_buff *skb;

	skb = skb_peek(&msk->receive_queue);
	if (skb) {
		u64 hint_val = msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq;

		if (hint_val >= INT_MAX)
			return INT_MAX;

		return (unsigned int)hint_val;
	}

	if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN))
		return 1;

	return 0;
}

static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
			 int nonblock, int flags, int *addr_len)
{
@@ -1989,6 +2014,9 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
	len = min_t(size_t, len, INT_MAX);
	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);

	if (unlikely(msk->recvmsg_inq))
		cmsg_flags = MPTCP_CMSG_INQ;

	while (copied < len) {
		int bytes_read;

@@ -2062,6 +2090,12 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
	if (cmsg_flags && copied >= 0) {
		if (cmsg_flags & MPTCP_CMSG_TS)
			tcp_recv_timestamp(msg, sk, &tss);

		if (cmsg_flags & MPTCP_CMSG_INQ) {
			unsigned int inq = mptcp_inq_hint(sk);

			put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
		}
	}

	pr_debug("msk=%p rx queue empty=%d:%d copied=%d",
+1 −0
Original line number Diff line number Diff line
@@ -249,6 +249,7 @@ struct mptcp_sock {
	bool		rcv_fastclose;
	bool		use_64bit_ack; /* Set when we received a 64-bit DSN */
	bool		csum_enabled;
	u8		recvmsg_inq:1;
	spinlock_t	join_list_lock;
	struct work_struct work;
	struct sk_buff  *ooo_last_skb;
+37 −1
Original line number Diff line number Diff line
@@ -557,6 +557,7 @@ static bool mptcp_supported_sockopt(int level, int optname)
		case TCP_TIMESTAMP:
		case TCP_NOTSENT_LOWAT:
		case TCP_TX_DELAY:
		case TCP_INQ:
			return true;
		}

@@ -568,7 +569,6 @@ static bool mptcp_supported_sockopt(int level, int optname)
		/* TCP_FASTOPEN_KEY, TCP_FASTOPEN TCP_FASTOPEN_CONNECT, TCP_FASTOPEN_NO_COOKIE,
		 * are not supported fastopen is currently unsupported
		 */
		/* TCP_INQ is currently unsupported, needs some recvmsg work */
	}
	return false;
}
@@ -698,7 +698,21 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
				    sockptr_t optval, unsigned int optlen)
{
	struct sock *sk = (void *)msk;
	int ret, val;

	switch (optname) {
	case TCP_INQ:
		ret = mptcp_get_int_option(msk, optval, optlen, &val);
		if (ret)
			return ret;
		if (val < 0 || val > 1)
			return -EINVAL;

		lock_sock(sk);
		msk->recvmsg_inq = !!val;
		release_sock(sk);
		return 0;
	case TCP_ULP:
		return -EOPNOTSUPP;
	case TCP_CONGESTION:
@@ -1032,6 +1046,26 @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o
	return 0;
}

static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
				int __user *optlen, int val)
{
	int len;

	if (get_user(len, optlen))
		return -EFAULT;

	len = min_t(unsigned int, len, sizeof(int));
	if (len < 0)
		return -EINVAL;

	if (put_user(len, optlen))
		return -EFAULT;
	if (copy_to_user(optval, &val, len))
		return -EFAULT;

	return 0;
}

static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
				    char __user *optval, int __user *optlen)
{
@@ -1042,6 +1076,8 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
	case TCP_CC_INFO:
		return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
						      optval, optlen);
	case TCP_INQ:
		return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq);
	}
	return -EOPNOTSUPP;
}