Commit 82a1ffe5 authored by David S. Miller's avatar David S. Miller
Browse files


Alexei Starovoitov says:

====================
pull-request: bpf-next 2021-07-15

The following pull-request contains BPF updates for your *net-next* tree.

We've added 45 non-merge commits during the last 15 day(s) which contain
a total of 52 files changed, 3122 insertions(+), 384 deletions(-).

The main changes are:

1) Introduce bpf timers, from Alexei.

2) Add sockmap support for unix datagram socket, from Cong.

3) Fix potential memleak and UAF in the verifier, from He.

4) Add bpf_get_func_ip helper, from Jiri.

5) Improvements to generic XDP mode, from Kumar.

6) Support for passing xdp_md to XDP programs in bpf_prog_run, from Zvi.
===================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 87117baf c50524ec
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -10387,6 +10387,7 @@ F: net/core/skmsg.c
F:	net/core/sock_map.c
F:	net/ipv4/tcp_bpf.c
F:	net/ipv4/udp_bpf.c
F:	net/unix/unix_bpf.c
LANDLOCK SECURITY MODULE
M:	Mickaël Salaün <mic@digikod.net>
+19 −0
Original line number Diff line number Diff line
@@ -1954,6 +1954,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
	if (flags & BPF_TRAMP_F_CALL_ORIG)
		stack_size += 8; /* room for return value of orig_call */

	if (flags & BPF_TRAMP_F_IP_ARG)
		stack_size += 8; /* room for IP address argument */

	if (flags & BPF_TRAMP_F_SKIP_FRAME)
		/* skip patched call instruction and point orig_call to actual
		 * body of the kernel function.
@@ -1967,6 +1970,22 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
	EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
	EMIT1(0x53);		 /* push rbx */

	if (flags & BPF_TRAMP_F_IP_ARG) {
		/* Store IP address of the traced function:
		 * mov rax, QWORD PTR [rbp + 8]
		 * sub rax, X86_PATCH_SIZE
		 * mov QWORD PTR [rbp - stack_size], rax
		 */
		emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
		EMIT4(0x48, 0x83, 0xe8, X86_PATCH_SIZE);
		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -stack_size);

		/* Continue with stack_size for regs storage, stack will
		 * be correctly restored with 'leave' instruction.
		 */
		stack_size -= 8;
	}

	save_regs(m, &prog, nr_args, stack_size);

	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+50 −0
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@

#include <asm/types.h>
#include <linux/bits.h>
#include <linux/typecheck.h>

#include <uapi/linux/kernel.h>

@@ -253,6 +254,55 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr,
		__clear_bit(nr, addr);
}

/**
 * __ptr_set_bit - Set bit in a pointer's value
 * @nr: the bit to set
 * @addr: the address of the pointer variable
 *
 * Example:
 *	void *p = foo();
 *	__ptr_set_bit(bit, &p);
 */
#define __ptr_set_bit(nr, addr)                         \
	({                                              \
		typecheck_pointer(*(addr));             \
		__set_bit(nr, (unsigned long *)(addr)); \
	})

/**
 * __ptr_clear_bit - Clear bit in a pointer's value
 * @nr: the bit to clear
 * @addr: the address of the pointer variable
 *
 * Example:
 *	void *p = foo();
 *	__ptr_clear_bit(bit, &p);
 */
#define __ptr_clear_bit(nr, addr)                         \
	({                                                \
		typecheck_pointer(*(addr));               \
		__clear_bit(nr, (unsigned long *)(addr)); \
	})

/**
 * __ptr_test_bit - Test bit in a pointer's value
 * @nr: the bit to test
 * @addr: the address of the pointer variable
 *
 * Example:
 *	void *p = foo();
 *	if (__ptr_test_bit(bit, &p)) {
 *	        ...
 *	} else {
 *		...
 *	}
 */
#define __ptr_test_bit(nr, addr)                       \
	({                                             \
		typecheck_pointer(*(addr));            \
		test_bit(nr, (unsigned long *)(addr)); \
	})

#ifdef __KERNEL__

#ifndef set_mask_bits
+69 −31
Original line number Diff line number Diff line
@@ -168,6 +168,7 @@ struct bpf_map {
	u32 max_entries;
	u32 map_flags;
	int spin_lock_off; /* >=0 valid offset, <0 error */
	int timer_off; /* >=0 valid offset, <0 error */
	u32 id;
	int numa_node;
	u32 btf_key_type_id;
@@ -197,30 +198,53 @@ static inline bool map_value_has_spin_lock(const struct bpf_map *map)
	return map->spin_lock_off >= 0;
}

static inline void check_and_init_map_lock(struct bpf_map *map, void *dst)
static inline bool map_value_has_timer(const struct bpf_map *map)
{
	if (likely(!map_value_has_spin_lock(map)))
		return;
	return map->timer_off >= 0;
}

static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
{
	if (unlikely(map_value_has_spin_lock(map)))
		*(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
			(struct bpf_spin_lock){};
	if (unlikely(map_value_has_timer(map)))
		*(struct bpf_timer *)(dst + map->timer_off) =
			(struct bpf_timer){};
}

/* copy everything but bpf_spin_lock */
/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
{
	if (unlikely(map_value_has_spin_lock(map))) {
		u32 off = map->spin_lock_off;
	u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0;

		memcpy(dst, src, off);
		memcpy(dst + off + sizeof(struct bpf_spin_lock),
		       src + off + sizeof(struct bpf_spin_lock),
		       map->value_size - off - sizeof(struct bpf_spin_lock));
	if (unlikely(map_value_has_spin_lock(map))) {
		s_off = map->spin_lock_off;
		s_sz = sizeof(struct bpf_spin_lock);
	} else if (unlikely(map_value_has_timer(map))) {
		t_off = map->timer_off;
		t_sz = sizeof(struct bpf_timer);
	}

	if (unlikely(s_sz || t_sz)) {
		if (s_off < t_off || !s_sz) {
			swap(s_off, t_off);
			swap(s_sz, t_sz);
		}
		memcpy(dst, src, t_off);
		memcpy(dst + t_off + t_sz,
		       src + t_off + t_sz,
		       s_off - t_off - t_sz);
		memcpy(dst + s_off + s_sz,
		       src + s_off + s_sz,
		       map->value_size - s_off - s_sz);
	} else {
		memcpy(dst, src, map->value_size);
	}
}
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
			   bool lock_src);
void bpf_timer_cancel_and_free(void *timer);
int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);

struct bpf_offload_dev;
@@ -314,6 +338,7 @@ enum bpf_arg_type {
	ARG_PTR_TO_FUNC,	/* pointer to a bpf program function */
	ARG_PTR_TO_STACK_OR_NULL,	/* pointer to stack or NULL */
	ARG_PTR_TO_CONST_STR,	/* pointer to a null terminated read-only string */
	ARG_PTR_TO_TIMER,	/* pointer to bpf_timer */
	__BPF_ARG_TYPE_MAX,
};

@@ -554,6 +579,11 @@ struct btf_func_model {
 */
#define BPF_TRAMP_F_SKIP_FRAME		BIT(2)

/* Store IP address of the caller on the trampoline stack,
 * so it's available for trampoline's programs.
 */
#define BPF_TRAMP_F_IP_ARG		BIT(3)

/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
 * bytes on x86.  Pick a number to fit into BPF_IMAGE_SIZE / 2
 */
@@ -1509,12 +1539,12 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
			   struct bpf_prog *xdp_prog, struct bpf_map *map,
			   bool exclude_ingress);
bool dev_map_can_have_prog(struct bpf_map *map);

void __cpu_map_flush(void);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
		    struct net_device *dev_rx);
bool cpu_map_prog_allowed(struct bpf_map *map);
int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
			     struct sk_buff *skb);

/* Return map's numa specified by userspace */
static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
@@ -1711,6 +1741,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
	return 0;
}

static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
					   struct sk_buff *skb)
{
	return -EOPNOTSUPP;
}

static inline bool cpu_map_prog_allowed(struct bpf_map *map)
{
	return false;
@@ -1852,6 +1888,12 @@ void bpf_map_offload_map_free(struct bpf_map *map);
int bpf_prog_test_run_syscall(struct bpf_prog *prog,
			      const union bpf_attr *kattr,
			      union bpf_attr __user *uattr);

int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
#else
static inline int bpf_prog_offload_init(struct bpf_prog *prog,
					union bpf_attr *attr)
@@ -1884,24 +1926,6 @@ static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog,
{
	return -ENOTSUPP;
}
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */

#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);

void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
				       void *value);
int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
				       void *value, u64 map_flags);
#else
static inline void bpf_sk_reuseport_detach(struct sock *sk)
{
}

#ifdef CONFIG_BPF_SYSCALL
static inline int sock_map_get_from_fd(const union bpf_attr *attr,
@@ -1921,7 +1945,21 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
{
	return -EOPNOTSUPP;
}
#endif /* CONFIG_BPF_SYSCALL */
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */

#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
				       void *value);
int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
				       void *value, u64 map_flags);
#else
static inline void bpf_sk_reuseport_detach(struct sock *sk)
{
}

#ifdef CONFIG_BPF_SYSCALL
static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
						     void *key, void *value)
{
+17 −2
Original line number Diff line number Diff line
@@ -53,7 +53,14 @@ struct bpf_reg_state {
		/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
		 *   PTR_TO_MAP_VALUE_OR_NULL
		 */
		struct {
			struct bpf_map *map_ptr;
			/* To distinguish map lookups from outer map
			 * the map_uid is non-zero for registers
			 * pointing to inner maps.
			 */
			u32 map_uid;
		};

		/* for PTR_TO_BTF_ID */
		struct {
@@ -201,12 +208,19 @@ struct bpf_func_state {
	 * zero == main subprog
	 */
	u32 subprogno;
	/* Every bpf_timer_start will increment async_entry_cnt.
	 * It's used to distinguish:
	 * void foo(void) { for(;;); }
	 * void foo(void) { bpf_timer_set_callback(,foo); }
	 */
	u32 async_entry_cnt;
	bool in_callback_fn;
	bool in_async_callback_fn;

	/* The following fields should be last. See copy_func_state() */
	int acquired_refs;
	struct bpf_reference_state *refs;
	int allocated_stack;
	bool in_callback_fn;
	struct bpf_stack_state *stack;
};

@@ -392,6 +406,7 @@ struct bpf_subprog_info {
	bool has_tail_call;
	bool tail_call_reachable;
	bool has_ld_abs;
	bool is_async_cb;
};

/* single container for all structs
Loading