Commit ab86cf33 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf, mm: introduce cgroup.memory=nobpf'

Yafang Shao says:

====================

The bpf memory accouting has some known problems in contianer
environment,

- The container memory usage is not consistent if there's pinned bpf
  program
  After the container restart, the leftover bpf programs won't account
  to the new generation, so the memory usage of the container is not
  consistent. This issue can be resolved by introducing selectable
  memcg, but we don't have an agreement on the solution yet. See also
  the discussions at https://lwn.net/Articles/905150/ .

- The leftover non-preallocated bpf map can't be limited
  The leftover bpf map will be reparented, and thus it will be limited by
  the parent, rather than the container itself. Furthermore, if the
  parent is destroyed, it be will limited by its parent's parent, and so
  on. It can also be resolved by introducing selectable memcg.

- The memory dynamically allocated in bpf prog is charged into root memcg
  only
  Nowdays the bpf prog can dynamically allocate memory, for example via
  bpf_obj_new(), but it only allocate from the global bpf_mem_alloc
  pool, so it will charge into root memcg only. That needs to be
  addressed by a new proposal.

So let's give the container user an option to disable bpf memory accouting.

The idea of "cgroup.memory=nobpf" is originally by Tejun[1].

[1]. https://lwn.net/ml/linux-mm/YxjOawzlgE458ezL@slm.duckdns.org/



Changes,
v1->v2:
- squash patches (Roman)
- commit log improvement in patch #2. (Johannes)
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 7e2a9ebe bf396508
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -557,6 +557,7 @@
			Format: <string>
			nosocket -- Disable socket memory accounting.
			nokmem -- Disable kernel memory accounting.
			nobpf -- Disable BPF memory accounting.

	checkreqprot=	[SELINUX] Set initial checkreqprot flag value.
			Format: { "0" | "1" }
+16 −0
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@
#include <linux/btf.h>
#include <linux/rcupdate_trace.h>
#include <linux/static_call.h>
#include <linux/memcontrol.h>

struct bpf_verifier_env;
struct bpf_verifier_log;
@@ -1886,6 +1887,8 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
			   int node);
void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags);
void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
		       gfp_t flags);
void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
				    size_t align, gfp_t flags);
#else
@@ -1902,6 +1905,12 @@ bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
	return kzalloc(size, flags);
}

static inline void *
bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, gfp_t flags)
{
	return kvcalloc(n, size, flags);
}

static inline void __percpu *
bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align,
		     gfp_t flags)
@@ -2925,4 +2934,11 @@ static inline bool type_is_alloc(u32 type)
	return type & MEM_ALLOC;
}

static inline gfp_t bpf_memcg_flags(gfp_t flags)
{
	if (memcg_bpf_enabled())
		return flags | __GFP_ACCOUNT;
	return flags;
}

#endif /* _LINUX_BPF_H */
+11 −0
Original line number Diff line number Diff line
@@ -1754,6 +1754,12 @@ struct obj_cgroup *get_obj_cgroup_from_page(struct page *page);
int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size);
void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size);

extern struct static_key_false memcg_bpf_enabled_key;
static inline bool memcg_bpf_enabled(void)
{
	return static_branch_likely(&memcg_bpf_enabled_key);
}

extern struct static_key_false memcg_kmem_enabled_key;

static inline bool memcg_kmem_enabled(void)
@@ -1832,6 +1838,11 @@ static inline struct obj_cgroup *get_obj_cgroup_from_page(struct page *page)
	return NULL;
}

static inline bool memcg_bpf_enabled(void)
{
	return false;
}

static inline bool memcg_kmem_enabled(void)
{
	return false;
+2 −2
Original line number Diff line number Diff line
@@ -568,8 +568,8 @@ static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_att
	nbuckets = max_t(u32, 2, nbuckets);
	smap->bucket_log = ilog2(nbuckets);

	smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
				 GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
	smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
					 nbuckets, GFP_USER | __GFP_NOWARN);
	if (!smap->buckets) {
		bpf_map_area_free(smap);
		return ERR_PTR(-ENOMEM);
+7 −6
Original line number Diff line number Diff line
@@ -35,6 +35,7 @@
#include <linux/bpf_verifier.h>
#include <linux/nodemask.h>
#include <linux/bpf_mem_alloc.h>
#include <linux/memcontrol.h>

#include <asm/barrier.h>
#include <asm/unaligned.h>
@@ -87,7 +88,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns

struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
{
	gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
	gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
	struct bpf_prog_aux *aux;
	struct bpf_prog *fp;

@@ -96,12 +97,12 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
	if (fp == NULL)
		return NULL;

	aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT | gfp_extra_flags);
	aux = kzalloc(sizeof(*aux), bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags));
	if (aux == NULL) {
		vfree(fp);
		return NULL;
	}
	fp->active = alloc_percpu_gfp(int, GFP_KERNEL_ACCOUNT | gfp_extra_flags);
	fp->active = alloc_percpu_gfp(int, bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags));
	if (!fp->active) {
		vfree(fp);
		kfree(aux);
@@ -126,7 +127,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag

struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
{
	gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
	gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
	struct bpf_prog *prog;
	int cpu;

@@ -159,7 +160,7 @@ int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)

	prog->aux->jited_linfo = kvcalloc(prog->aux->nr_linfo,
					  sizeof(*prog->aux->jited_linfo),
					  GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
					  bpf_memcg_flags(GFP_KERNEL | __GFP_NOWARN));
	if (!prog->aux->jited_linfo)
		return -ENOMEM;

@@ -234,7 +235,7 @@ void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
				  gfp_t gfp_extra_flags)
{
	gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
	gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
	struct bpf_prog *fp;
	u32 pages;

Loading