Commit d39e8b92 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Andrii Nakryiko says:

====================
bpf-next 2021-07-30

We've added 64 non-merge commits during the last 15 day(s) which contain
a total of 83 files changed, 5027 insertions(+), 1808 deletions(-).

The main changes are:

1) BTF-guided binary data dumping libbpf API, from Alan.

2) Internal factoring out of libbpf CO-RE relocation logic, from Alexei.

3) Ambient BPF run context and cgroup storage cleanup, from Andrii.

4) Few small API additions for libbpf 1.0 effort, from Evgeniy and Hengqi.

5) bpf_program__attach_kprobe_opts() fixes in libbpf, from Jiri.

6) bpf_{get,set}sockopt() support in BPF iterators, from Martin.

7) BPF map pinning improvements in libbpf, from Martynas.

8) Improved module BTF support in libbpf and bpftool, from Quentin.

9) Bpftool cleanups and documentation improvements, from Quentin.

10) Libbpf improvements for supporting CO-RE on old kernels, from Shuyi.

11) Increased maximum cgroup storage size, from Stanislav.

12) Small fixes and improvements to BPF tests and samples, from various folks.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (64 commits)
  tools: bpftool: Complete metrics list in "bpftool prog profile" doc
  tools: bpftool: Document and add bash completion for -L, -B options
  selftests/bpf: Update bpftool's consistency script for checking options
  tools: bpftool: Update and synchronise option list in doc and help msg
  tools: bpftool: Complete and synchronise attach or map types
  selftests/bpf: Check consistency between bpftool source, doc, completion
  tools: bpftool: Slightly ease bash completion updates
  unix_bpf: Fix a potential deadlock in unix_dgram_bpf_recvmsg()
  libbpf: Add btf__load_vmlinux_btf/btf__load_module_btf
  tools: bpftool: Support dumping split BTF by id
  libbpf: Add split BTF support for btf__load_from_kernel_by_id()
  tools: Replace btf__get_from_id() with btf__load_from_kernel_by_id()
  tools: Free BTF objects at various locations
  libbpf: Rename btf__get_from_id() as btf__load_from_kernel_by_id()
  libbpf: Rename btf__load() as btf__load_into_kernel()
  libbpf: Return non-null error on failures in libbpf_find_prog_btf_id()
  bpf: Emit better log message if bpf_iter ctx arg btf_id == 0
  tools/resolve_btfids: Emit warnings and patch zero id for missing symbols
  bpf: Increase supported cgroup storage value size
  libbpf: Fix race when pinning maps in parallel
  ...
====================

Link: https://lore.kernel.org/r/20210730225606.1897330-1-andrii@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents d2e11fd2 ab0720ce
Loading
Loading
Loading
Loading
+16 −7
Original line number Diff line number Diff line
@@ -320,13 +320,6 @@ Examples for low-level BPF:
  ret #-1
  drop: ret #0

**(Accelerated) VLAN w/ id 10**::

  ld vlan_tci
  jneq #10, drop
  ret #-1
  drop: ret #0

**icmp random packet sampling, 1 in 4**::

  ldh [12]
@@ -358,6 +351,22 @@ Examples for low-level BPF:
  bad: ret #0             /* SECCOMP_RET_KILL_THREAD */
  good: ret #0x7fff0000   /* SECCOMP_RET_ALLOW */

Examples for low-level BPF extension:

**Packet for interface index 13**::

  ld ifidx
  jneq #13, drop
  ret #-1
  drop: ret #0

**(Accelerated) VLAN w/ id 10**::

  ld vlan_tci
  jneq #10, drop
  ret #-1
  drop: ret #0

The above example code can be placed into a file (here called "foo"), and
then be passed to the bpf_asm tool for generating opcodes, output that xt_bpf
and cls_bpf understands and can directly be loaded with. Example with above
+0 −54
Original line number Diff line number Diff line
@@ -27,19 +27,6 @@ struct task_struct;
extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])

#define BPF_CGROUP_STORAGE_NEST_MAX	8

struct bpf_cgroup_storage_info {
	struct task_struct *task;
	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
};

/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
 * to use bpf cgroup storage simultaneously.
 */
DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
		bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);

#define for_each_cgroup_storage_type(stype) \
	for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)

@@ -172,44 +159,6 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
	return BPF_CGROUP_STORAGE_SHARED;
}

static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
					 *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
{
	enum bpf_cgroup_storage_type stype;
	int i, err = 0;

	preempt_disable();
	for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
		if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
			continue;

		this_cpu_write(bpf_cgroup_storage_info[i].task, current);
		for_each_cgroup_storage_type(stype)
			this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
				       storage[stype]);
		goto out;
	}
	err = -EBUSY;
	WARN_ON_ONCE(1);

out:
	preempt_enable();
	return err;
}

static inline void bpf_cgroup_storage_unset(void)
{
	int i;

	for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
		if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
			continue;

		this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
		return;
	}
}

struct bpf_cgroup_storage *
cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
		      void *key, bool locked);
@@ -487,9 +436,6 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
	return -EINVAL;
}

static inline int bpf_cgroup_storage_set(
	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
static inline void bpf_cgroup_storage_unset(void) {}
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
					    struct bpf_map *map) { return 0; }
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
+42 −20
Original line number Diff line number Diff line
@@ -1142,38 +1142,40 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
			struct bpf_prog *include_prog,
			struct bpf_prog_array **new_array);

struct bpf_run_ctx {};

struct bpf_cg_run_ctx {
	struct bpf_run_ctx run_ctx;
	struct bpf_prog_array_item *prog_item;
};

/* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
#define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE			(1 << 0)
/* BPF program asks to set CN on the packet. */
#define BPF_RET_SET_CN						(1 << 0)

/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
 * if bpf_cgroup_storage_set() failed, the rest of programs
 * will not execute. This should be a really rare scenario
 * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
 * preemptions all between bpf_cgroup_storage_set() and
 * bpf_cgroup_storage_unset() on the same cpu.
 */
#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags)		\
	({								\
		struct bpf_prog_array_item *_item;			\
		struct bpf_prog *_prog;					\
		struct bpf_prog_array *_array;				\
		struct bpf_run_ctx *old_run_ctx;			\
		struct bpf_cg_run_ctx run_ctx;				\
		u32 _ret = 1;						\
		u32 func_ret;						\
		migrate_disable();					\
		rcu_read_lock();					\
		_array = rcu_dereference(array);			\
		_item = &_array->items[0];				\
		old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);	\
		while ((_prog = READ_ONCE(_item->prog))) {		\
			if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))	\
				break;					\
			run_ctx.prog_item = _item;			\
			func_ret = func(_prog, ctx);			\
			_ret &= (func_ret & 1);				\
			*(ret_flags) |= (func_ret >> 1);		\
			bpf_cgroup_storage_unset();			\
			_item++;					\
		}							\
		bpf_reset_run_ctx(old_run_ctx);				\
		rcu_read_unlock();					\
		migrate_enable();					\
		_ret;							\
@@ -1184,6 +1186,8 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
		struct bpf_prog_array_item *_item;	\
		struct bpf_prog *_prog;			\
		struct bpf_prog_array *_array;		\
		struct bpf_run_ctx *old_run_ctx;	\
		struct bpf_cg_run_ctx run_ctx;		\
		u32 _ret = 1;				\
		migrate_disable();			\
		rcu_read_lock();			\
@@ -1191,17 +1195,13 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
		if (unlikely(check_non_null && !_array))\
			goto _out;			\
		_item = &_array->items[0];		\
		old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);\
		while ((_prog = READ_ONCE(_item->prog))) {	\
			if (!set_cg_storage) {			\
			run_ctx.prog_item = _item;	\
			_ret &= func(_prog, ctx);	\
			} else {				\
				if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))	\
					break;			\
				_ret &= func(_prog, ctx);	\
				bpf_cgroup_storage_unset();	\
			}				\
			_item++;			\
		}					\
		bpf_reset_run_ctx(old_run_ctx);		\
_out:							\
		rcu_read_unlock();			\
		migrate_enable();			\
@@ -1284,6 +1284,20 @@ static inline void bpf_enable_instrumentation(void)
	migrate_enable();
}

static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
{
	struct bpf_run_ctx *old_ctx;

	old_ctx = current->bpf_ctx;
	current->bpf_ctx = new_ctx;
	return old_ctx;
}

static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
{
	current->bpf_ctx = old_ctx;
}

extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
extern const struct file_operations bpf_iter_fops;
@@ -1428,6 +1442,9 @@ typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux,
					struct seq_file *seq);
typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux,
					 struct bpf_link_info *info);
typedef const struct bpf_func_proto *
(*bpf_iter_get_func_proto_t)(enum bpf_func_id func_id,
			     const struct bpf_prog *prog);

enum bpf_iter_feature {
	BPF_ITER_RESCHED	= BIT(0),
@@ -1440,6 +1457,7 @@ struct bpf_iter_reg {
	bpf_iter_detach_target_t detach_target;
	bpf_iter_show_fdinfo_t show_fdinfo;
	bpf_iter_fill_link_info_t fill_link_info;
	bpf_iter_get_func_proto_t get_func_proto;
	u32 ctx_arg_info_size;
	u32 feature;
	struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
@@ -1462,6 +1480,8 @@ struct bpf_iter__bpf_map_elem {
int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
bool bpf_iter_prog_supported(struct bpf_prog *prog);
const struct bpf_func_proto *
bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog);
int bpf_iter_new_fd(struct bpf_link *link);
bool bpf_link_is_iter(struct bpf_link *link);
@@ -2036,6 +2056,8 @@ extern const struct bpf_func_proto bpf_task_storage_get_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
extern const struct bpf_func_proto bpf_sk_getsockopt_proto;

const struct bpf_func_proto *bpf_tracing_func_proto(
	enum bpf_func_id func_id, const struct bpf_prog *prog);
+3 −0
Original line number Diff line number Diff line
@@ -42,6 +42,7 @@ struct backing_dev_info;
struct bio_list;
struct blk_plug;
struct bpf_local_storage;
struct bpf_run_ctx;
struct capture_control;
struct cfs_rq;
struct fs_struct;
@@ -1379,6 +1380,8 @@ struct task_struct {
#ifdef CONFIG_BPF_SYSCALL
	/* Used by BPF task local storage */
	struct bpf_local_storage __rcu	*bpf_storage;
	/* Used for BPF run context */
	struct bpf_run_ctx		*bpf_ctx;
#endif

#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+6 −0
Original line number Diff line number Diff line
@@ -160,6 +160,12 @@ struct inet_hashinfo {
					____cacheline_aligned_in_smp;
};

#define inet_lhash2_for_each_icsk_continue(__icsk) \
	hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node)

#define inet_lhash2_for_each_icsk(__icsk, list) \
	hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node)

#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \
	hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node)

Loading