Commit 1612cc4b authored by David S. Miller's avatar David S. Miller
Browse files
Alexei Starovoitov says:

====================

The following pull-request contains BPF updates for your *net* tree.

We've added 21 non-merge commits during the last 8 day(s) which contain
a total of 21 files changed, 450 insertions(+), 36 deletions(-).

The main changes are:

1) Adjust bpf_mem_alloc buckets to match ksize(), from Hou Tao.

2) Check whether override is allowed in kprobe mult, from Jiri Olsa.

3) Fix btf_id symbol generation with ld.lld, from Jiri and Nick.

4) Fix potential deadlock when using queue and stack maps from NMI, from Toke Høiland-Jørgensen.

Please consider pulling these changes from:

  git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git



Thanks a lot!

Also thanks to reporters, reviewers and testers of commits in this pull-request:

Alan Maguire, Biju Das, Björn Töpel, Dan Carpenter, Daniel Borkmann,
Eduard Zingerman, Hsin-Wei Hung, Marcus Seyfarth, Nathan Chancellor,
Satya Durga Srinivasu Prabhala, Song Liu, Stephen Rothwell
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 615efed8 c0bb9fb0
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -49,7 +49,7 @@ word \
	____BTF_ID(symbol, word)

#define __ID(prefix) \
	__PASTE(prefix, __COUNTER__)
	__PASTE(__PASTE(prefix, __COUNTER__), __LINE__)

/*
 * The BTF_ID defines unique symbol for each ID pointing
+3 −1
Original line number Diff line number Diff line
@@ -1962,7 +1962,9 @@ union bpf_attr {
 * 		performed again, if the helper is used in combination with
 * 		direct packet access.
 * 	Return
 * 		0 on success, or a negative error in case of failure.
 * 		0 on success, or a negative error in case of failure. Positive
 * 		error indicates a potential drop or congestion in the target
 * 		device. The particular positive error codes are not defined.
 *
 * u64 bpf_get_current_pid_tgid(void)
 * 	Description
+1 −1
Original line number Diff line number Diff line
@@ -8501,7 +8501,7 @@ bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
	tname = btf_name_by_offset(btf, walk_type->name_off);

	ret = snprintf(safe_tname, sizeof(safe_tname), "%s%s", tname, suffix);
	if (ret < 0)
	if (ret >= sizeof(safe_tname))
		return false;

	safe_id = btf_find_by_name_kind(btf, safe_tname, BTF_INFO_KIND(walk_type->info));
+7 −6
Original line number Diff line number Diff line
@@ -785,7 +785,8 @@ static void replace_effective_prog(struct cgroup *cgrp,
 *                          to descendants
 * @cgrp: The cgroup which descendants to traverse
 * @link: A link for which to replace BPF program
 * @type: Type of attach operation
 * @new_prog: &struct bpf_prog for the target BPF program with its refcnt
 *            incremented
 *
 * Must be called with cgroup_mutex held.
 */
@@ -1334,7 +1335,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
 * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
 * @sk: The socket sending or receiving traffic
 * @skb: The skb that is being sent or received
 * @type: The type of program to be executed
 * @atype: The type of program to be executed
 *
 * If no socket is passed, or the socket is not of type INET or INET6,
 * this function does nothing and returns 0.
@@ -1424,7 +1425,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
/**
 * __cgroup_bpf_run_filter_sk() - Run a program on a sock
 * @sk: sock structure to manipulate
 * @type: The type of program to be executed
 * @atype: The type of program to be executed
 *
 * socket is passed is expected to be of type INET or INET6.
 *
@@ -1449,7 +1450,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
 *                                       provided by user sockaddr
 * @sk: sock struct that will use sockaddr
 * @uaddr: sockaddr struct provided by user
 * @type: The type of program to be executed
 * @atype: The type of program to be executed
 * @t_ctx: Pointer to attach type specific context
 * @flags: Pointer to u32 which contains higher bits of BPF program
 *         return value (OR'ed together).
@@ -1496,7 +1497,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
 * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
 * sk with connection information (IP addresses, etc.) May not contain
 * cgroup info if it is a req sock.
 * @type: The type of program to be executed
 * @atype: The type of program to be executed
 *
 * socket passed is expected to be of type INET or INET6.
 *
@@ -1670,7 +1671,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
 * @ppos: value-result argument: value is position at which read from or write
 *	to sysctl is happening, result is new position if program overrode it,
 *	initial value otherwise
 * @type: type of program to be executed
 * @atype: type of program to be executed
 *
 * Program is run when sysctl is being accessed, either read or written, and
 * can allow or deny such access.
+90 −4
Original line number Diff line number Diff line
@@ -459,8 +459,7 @@ static void notrace irq_work_raise(struct bpf_mem_cache *c)
 * Typical case will be between 11K and 116K closer to 11K.
 * bpf progs can and should share bpf_mem_cache when possible.
 */

static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
static void init_refill_work(struct bpf_mem_cache *c)
{
	init_irq_work(&c->refill_work, bpf_mem_refill);
	if (c->unit_size <= 256) {
@@ -476,7 +475,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
		c->high_watermark = max(96 * 256 / c->unit_size, 3);
	}
	c->batch = max((c->high_watermark - c->low_watermark) / 4 * 3, 1);
}

static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
{
	/* To avoid consuming memory assume that 1st run of bpf
	 * prog won't be doing more than 4 map_update_elem from
	 * irq disabled region
@@ -484,6 +486,31 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
	alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false);
}

static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx)
{
	struct llist_node *first;
	unsigned int obj_size;

	/* For per-cpu allocator, the size of free objects in free list doesn't
	 * match with unit_size and now there is no way to get the size of
	 * per-cpu pointer saved in free object, so just skip the checking.
	 */
	if (c->percpu_size)
		return 0;

	first = c->free_llist.first;
	if (!first)
		return 0;

	obj_size = ksize(first);
	if (obj_size != c->unit_size) {
		WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n",
			  idx, obj_size, c->unit_size);
		return -EINVAL;
	}
	return 0;
}

/* When size != 0 bpf_mem_cache for each cpu.
 * This is typical bpf hash map use case when all elements have equal size.
 *
@@ -494,10 +521,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
{
	static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096};
	int cpu, i, err, unit_size, percpu_size = 0;
	struct bpf_mem_caches *cc, __percpu *pcc;
	struct bpf_mem_cache *c, __percpu *pc;
	struct obj_cgroup *objcg = NULL;
	int cpu, i, unit_size, percpu_size = 0;

	if (size) {
		pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL);
@@ -521,6 +548,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
			c->objcg = objcg;
			c->percpu_size = percpu_size;
			c->tgt = c;
			init_refill_work(c);
			prefill_mem_cache(c, cpu);
		}
		ma->cache = pc;
@@ -534,6 +562,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
	pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL);
	if (!pcc)
		return -ENOMEM;
	err = 0;
#ifdef CONFIG_MEMCG_KMEM
	objcg = get_obj_cgroup_from_current();
#endif
@@ -544,11 +573,30 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
			c->unit_size = sizes[i];
			c->objcg = objcg;
			c->tgt = c;

			init_refill_work(c);
			/* Another bpf_mem_cache will be used when allocating
			 * c->unit_size in bpf_mem_alloc(), so doesn't prefill
			 * for the bpf_mem_cache because these free objects will
			 * never be used.
			 */
			if (i != bpf_mem_cache_idx(c->unit_size))
				continue;
			prefill_mem_cache(c, cpu);
			err = check_obj_size(c, i);
			if (err)
				goto out;
		}
	}

out:
	ma->caches = pcc;
	return 0;
	/* refill_work is either zeroed or initialized, so it is safe to
	 * call irq_work_sync().
	 */
	if (err)
		bpf_mem_alloc_destroy(ma);
	return err;
}

static void drain_mem_cache(struct bpf_mem_cache *c)
@@ -916,3 +964,41 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)

	return !ret ? NULL : ret + LLIST_NODE_SZ;
}

/* Most of the logic is taken from setup_kmalloc_cache_index_table() */
static __init int bpf_mem_cache_adjust_size(void)
{
	unsigned int size, index;

	/* Normally KMALLOC_MIN_SIZE is 8-bytes, but it can be
	 * up-to 256-bytes.
	 */
	size = KMALLOC_MIN_SIZE;
	if (size <= 192)
		index = size_index[(size - 1) / 8];
	else
		index = fls(size - 1) - 1;
	for (size = 8; size < KMALLOC_MIN_SIZE && size <= 192; size += 8)
		size_index[(size - 1) / 8] = index;

	/* The minimal alignment is 64-bytes, so disable 96-bytes cache and
	 * use 128-bytes cache instead.
	 */
	if (KMALLOC_MIN_SIZE >= 64) {
		index = size_index[(128 - 1) / 8];
		for (size = 64 + 8; size <= 96; size += 8)
			size_index[(size - 1) / 8] = index;
	}

	/* The minimal alignment is 128-bytes, so disable 192-bytes cache and
	 * use 256-bytes cache instead.
	 */
	if (KMALLOC_MIN_SIZE >= 128) {
		index = fls(256 - 1) - 1;
		for (size = 128 + 8; size <= 192; size += 8)
			size_index[(size - 1) / 8] = index;
	}

	return 0;
}
subsys_initcall(bpf_mem_cache_adjust_size);
Loading