Commit ee7f1e13 authored by Martin KaFai Lau's avatar Martin KaFai Lau Committed by Alexei Starovoitov
Browse files

bpf: Change bpf_setsockopt(SOL_IP) to reuse do_ip_setsockopt()



After the prep work in the previous patches,
this patch removes the dup code from bpf_setsockopt(SOL_IP)
and reuses the implementation in do_ip_setsockopt().

The existing optname white-list is refactored into a new
function sol_ip_setsockopt().

NOTE,
the current bpf_setsockopt(IP_TOS) is quite different from the
the do_ip_setsockopt(IP_TOS).  For example, it does not take
the INET_ECN_MASK into the account for tcp and also does not adjust
sk->sk_priority.  It looks like the current bpf_setsockopt(IP_TOS)
was referencing the IPV6_TCLASS implementation instead of IP_TOS.
This patch tries to rectify that by using the do_ip_setsockopt(IP_TOS).
While this is a behavior change,  the do_ip_setsockopt(IP_TOS) behavior
is arguably what the user is expecting.  At least, the INET_ECN_MASK bits
should be masked out for tcp.

Reviewed-by: default avatarStanislav Fomichev <sdf@google.com>
Signed-off-by: default avatarMartin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/r/20220817061826.4180990-1-kafai@fb.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 0c751f70
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -743,6 +743,8 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
		 struct ipcm_cookie *ipc, bool allow_ipv6);
DECLARE_STATIC_KEY_FALSE(ip4_min_ttl);
int do_ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
		     unsigned int optlen);
int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
		  unsigned int optlen);
int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
+20 −20
Original line number Diff line number Diff line
@@ -5114,6 +5114,25 @@ static int sol_tcp_setsockopt(struct sock *sk, int optname,
				 KERNEL_SOCKPTR(optval), optlen);
}

static int sol_ip_setsockopt(struct sock *sk, int optname,
			     char *optval, int optlen)
{
	if (sk->sk_family != AF_INET)
		return -EINVAL;

	switch (optname) {
	case IP_TOS:
		if (optlen != sizeof(int))
			return -EINVAL;
		break;
	default:
		return -EINVAL;
	}

	return do_ip_setsockopt(sk, SOL_IP, optname,
				KERNEL_SOCKPTR(optval), optlen);
}

static int __bpf_setsockopt(struct sock *sk, int level, int optname,
			    char *optval, int optlen)
{
@@ -5125,26 +5144,7 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname,
	if (level == SOL_SOCKET) {
		return sol_socket_setsockopt(sk, optname, optval, optlen);
	} else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) {
		if (optlen != sizeof(int) || sk->sk_family != AF_INET)
			return -EINVAL;

		val = *((int *)optval);
		/* Only some options are supported */
		switch (optname) {
		case IP_TOS:
			if (val < -1 || val > 0xff) {
				ret = -EINVAL;
			} else {
				struct inet_sock *inet = inet_sk(sk);

				if (val == -1)
					val = 0;
				inet->tos = val;
			}
			break;
		default:
			ret = -EINVAL;
		}
		return sol_ip_setsockopt(sk, optname, optval, optlen);
	} else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6) {
		if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
			return -EINVAL;
+2 −2
Original line number Diff line number Diff line
@@ -888,7 +888,7 @@ static int compat_ip_mcast_join_leave(struct sock *sk, int optname,

DEFINE_STATIC_KEY_FALSE(ip4_min_ttl);

static int do_ip_setsockopt(struct sock *sk, int level, int optname,
int do_ip_setsockopt(struct sock *sk, int level, int optname,
		     sockptr_t optval, unsigned int optlen)
{
	struct inet_sock *inet = inet_sk(sk);