Commit 1e0ab707 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'sock_map: clean up and refactor code for BPF_SK_SKB_VERDICT'

Cong Wang says:

====================

From: Cong Wang <cong.wang@bytedance.com>

This patchset is the first series of patches separated out from
the original large patchset, to make reviews easier. This patchset
does not add any new feature or change any functionality but merely
cleans up the existing sockmap and skmsg code and refactors it, to
prepare for the patches followed up. This passed all BPF selftests.

To see the big picture, the original whole patchset is available
on github: https://github.com/congwang/linux/tree/sockmap

and this patchset is also available on github:
https://github.com/congwang/linux/tree/sockmap1


---
v7: add 1 trivial cleanup patch
    define a mask for sk_redir
    fix CONFIG_BPF_SYSCALL in include/net/udp.h
    make sk_psock_done_strp() static
    move skb_bpf_redirect_clear() to sk_psock_backlog()

v6: fix !CONFIG_INET case

v5: improve CONFIG_BPF_SYSCALL dependency
    add 3 trivial cleanup patches

v4: reuse skb dst instead of skb ext
    fix another Kconfig error

v3: fix a few Kconfig compile errors
    remove an unused variable
    add a comment for bpf_convert_data_end_access()

v2: split the original patchset
    compute data_end with bpf_convert_data_end_access()
    get rid of psock->bpf_running
    reduce the scope of CONFIG_BPF_STREAM_PARSER
    do not add CONFIG_BPF_SOCK_MAP
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents a83586a7 ff9614b8
Loading
Loading
Loading
Loading
+9 −20
Original line number Diff line number Diff line
@@ -1778,22 +1778,24 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
}
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */

#if defined(CONFIG_BPF_STREAM_PARSER)
int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
			 struct bpf_prog *old, u32 which);
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);

void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
				       void *value);
int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
				       void *value, u64 map_flags);
#else
static inline int sock_map_prog_update(struct bpf_map *map,
				       struct bpf_prog *prog,
				       struct bpf_prog *old, u32 which)
static inline void bpf_sk_reuseport_detach(struct sock *sk)
{
	return -EOPNOTSUPP;
}

#ifdef CONFIG_BPF_SYSCALL
static inline int sock_map_get_from_fd(const union bpf_attr *attr,
				       struct bpf_prog *prog)
{
@@ -1811,20 +1813,7 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
{
	return -EOPNOTSUPP;
}
#endif /* CONFIG_BPF_STREAM_PARSER */

#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
				       void *value);
int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
				       void *value, u64 map_flags);
#else
static inline void bpf_sk_reuseport_detach(struct sock *sk)
{
}

#ifdef CONFIG_BPF_SYSCALL
static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
						     void *key, void *value)
{
+2 −4
Original line number Diff line number Diff line
@@ -103,10 +103,6 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
#if defined(CONFIG_BPF_STREAM_PARSER)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
#endif
#ifdef CONFIG_BPF_LSM
BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
#endif
@@ -116,6 +112,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
#endif
#ifdef CONFIG_INET
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
#endif
#endif
+3 −0
Original line number Diff line number Diff line
@@ -755,6 +755,9 @@ struct sk_buff {
			void		(*destructor)(struct sk_buff *skb);
		};
		struct list_head	tcp_tsorted_anchor;
#ifdef CONFIG_NET_SOCK_MSG
		unsigned long		_sk_redir;
#endif
	};

#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+65 −17
Original line number Diff line number Diff line
@@ -56,8 +56,8 @@ struct sk_msg {

struct sk_psock_progs {
	struct bpf_prog			*msg_parser;
	struct bpf_prog			*skb_parser;
	struct bpf_prog			*skb_verdict;
	struct bpf_prog			*stream_parser;
	struct bpf_prog			*stream_verdict;
};

enum sk_psock_state_bits {
@@ -70,12 +70,6 @@ struct sk_psock_link {
	void				*link_raw;
};

struct sk_psock_parser {
	struct strparser		strp;
	bool				enabled;
	void (*saved_data_ready)(struct sock *sk);
};

struct sk_psock_work_state {
	struct sk_buff			*skb;
	u32				len;
@@ -90,7 +84,9 @@ struct sk_psock {
	u32				eval;
	struct sk_msg			*cork;
	struct sk_psock_progs		progs;
	struct sk_psock_parser		parser;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
	struct strparser		strp;
#endif
	struct sk_buff_head		ingress_skb;
	struct list_head		ingress_msg;
	unsigned long			state;
@@ -100,6 +96,7 @@ struct sk_psock {
	void (*saved_unhash)(struct sock *sk);
	void (*saved_close)(struct sock *sk, long timeout);
	void (*saved_write_space)(struct sock *sk);
	void (*saved_data_ready)(struct sock *sk);
	struct proto			*sk_proto;
	struct sk_psock_work_state	work_state;
	struct work_struct		work;
@@ -305,9 +302,25 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)

struct sk_psock *sk_psock_init(struct sock *sk, int node);

#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
#else
static inline int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
{
	return -EOPNOTSUPP;
}

static inline void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
{
}

static inline void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
{
}
#endif

void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock);
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);

@@ -327,8 +340,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link)

struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);

void __sk_psock_purge_ingress_msg(struct sk_psock *psock);

static inline void sk_psock_cork_free(struct sk_psock *psock)
{
	if (psock->cork) {
@@ -389,7 +400,6 @@ static inline struct sk_psock *sk_psock_get(struct sock *sk)
	return psock;
}

void sk_psock_stop(struct sock *sk, struct sk_psock *psock);
void sk_psock_drop(struct sock *sk, struct sk_psock *psock);

static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
@@ -400,8 +410,8 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)

static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock)
{
	if (psock->parser.enabled)
		psock->parser.saved_data_ready(sk);
	if (psock->saved_data_ready)
		psock->saved_data_ready(sk);
	else
		sk->sk_data_ready(sk);
}
@@ -430,8 +440,8 @@ static inline int psock_replace_prog(struct bpf_prog **pprog,
static inline void psock_progs_drop(struct sk_psock_progs *progs)
{
	psock_set_prog(&progs->msg_parser, NULL);
	psock_set_prog(&progs->skb_parser, NULL);
	psock_set_prog(&progs->skb_verdict, NULL);
	psock_set_prog(&progs->stream_parser, NULL);
	psock_set_prog(&progs->stream_verdict, NULL);
}

int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
@@ -440,6 +450,44 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
{
	if (!psock)
		return false;
	return psock->parser.enabled;
	return !!psock->saved_data_ready;
}

#if IS_ENABLED(CONFIG_NET_SOCK_MSG)

/* We only have one bit so far. */
#define BPF_F_PTR_MASK ~(BPF_F_INGRESS)

static inline bool skb_bpf_ingress(const struct sk_buff *skb)
{
	unsigned long sk_redir = skb->_sk_redir;

	return sk_redir & BPF_F_INGRESS;
}

static inline void skb_bpf_set_ingress(struct sk_buff *skb)
{
	skb->_sk_redir |= BPF_F_INGRESS;
}

static inline void skb_bpf_set_redir(struct sk_buff *skb, struct sock *sk_redir,
				     bool ingress)
{
	skb->_sk_redir = (unsigned long)sk_redir;
	if (ingress)
		skb->_sk_redir |= BPF_F_INGRESS;
}

static inline struct sock *skb_bpf_redirect_fetch(const struct sk_buff *skb)
{
	unsigned long sk_redir = skb->_sk_redir;

	return (struct sock *)(sk_redir & BPF_F_PTR_MASK);
}

static inline void skb_bpf_redirect_clear(struct sk_buff *skb)
{
	skb->_sk_redir = 0;
}
#endif /* CONFIG_NET_SOCK_MSG */
#endif /* _LINUX_SKMSG_H */
+9 −32
Original line number Diff line number Diff line
@@ -883,36 +883,11 @@ struct tcp_skb_cb {
			struct inet6_skb_parm	h6;
#endif
		} header;	/* For incoming skbs */
		struct {
			__u32 flags;
			struct sock *sk_redir;
			void *data_end;
		} bpf;
	};
};

#define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))

static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
{
	TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
}

static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
{
	return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
}

static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb)
{
	return TCP_SKB_CB(skb)->bpf.sk_redir;
}

static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb)
{
	TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
}

extern const struct inet_connection_sock_af_ops ipv4_specific;

#if IS_ENABLED(CONFIG_IPV6)
@@ -2222,25 +2197,27 @@ void tcp_update_ulp(struct sock *sk, struct proto *p,
	__MODULE_INFO(alias, alias_userspace, name);		\
	__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)

#ifdef CONFIG_NET_SOCK_MSG
struct sk_msg;
struct sk_psock;

#ifdef CONFIG_BPF_STREAM_PARSER
#ifdef CONFIG_BPF_SYSCALL
struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#else
static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
}
#endif /* CONFIG_BPF_STREAM_PARSER */
#endif /* CONFIG_BPF_SYSCALL */

#ifdef CONFIG_NET_SOCK_MSG
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
			  int flags);
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
		      struct msghdr *msg, int len, int flags);
#endif /* CONFIG_NET_SOCK_MSG */

#if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
}
#endif

#ifdef CONFIG_CGROUP_BPF
static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
				      struct sk_buff *skb,
Loading