Commit a8c49af3 authored by Yosry Ahmed's avatar Yosry Ahmed Committed by Linus Torvalds
Browse files

memcg: add per-memcg total kernel memory stat

Currently memcg stats show several types of kernel memory: kernel stack,
page tables, sock, vmalloc, and slab.  However, there are other
allocations with __GFP_ACCOUNT (or supersets such as GFP_KERNEL_ACCOUNT)
that are not accounted in any of those stats, a few examples are:

 - various kvm allocations (e.g. allocated pages to create vcpus)
 - io_uring
 - tmp_page in pipes during pipe_write()
 - bpf ringbuffers
 - unix sockets

Keeping track of the total kernel memory is essential for the ease of
migration from cgroup v1 to v2 as there are large discrepancies between
v1's kmem.usage_in_bytes and the sum of the available kernel memory
stats in v2.  Adding separate memcg stats for all __GFP_ACCOUNT kernel
allocations is an impractical maintenance burden as there a lot of those
all over the kernel code, with more use cases likely to show up in the
future.

Therefore, add a "kernel" memcg stat that is analogous to kmem page
counter, with added benefits such as using rstat infrastructure which
aggregates stats more efficiently.  Additionally, this provides a
lighter alternative in case the legacy kmem is deprecated in the future

[yosryahmed@google.com: v2]
  Link: https://lkml.kernel.org/r/20220203193856.972500-1-yosryahmed@google.com

Link: https://lkml.kernel.org/r/20220201200823.3283171-1-yosryahmed@google.com


Signed-off-by: default avatarYosry Ahmed <yosryahmed@google.com>
Acked-by: default avatarShakeel Butt <shakeelb@google.com>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 086f694a
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -1301,6 +1301,11 @@ PAGE_SIZE multiple when read back.
		Amount of memory used to cache filesystem data,
		including tmpfs and shared memory.

	  kernel (npn)
		Amount of total kernel memory, including
		(kernel_stack, pagetables, percpu, vmalloc, slab) in
		addition to other kernel memory use cases.

	  kernel_stack
		Amount of memory allocated to kernel stacks.

+1 −0
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@ enum memcg_stat_item {
	MEMCG_SOCK,
	MEMCG_PERCPU_B,
	MEMCG_VMALLOC,
	MEMCG_KMEM,
	MEMCG_NR_STAT,
};

+21 −6
Original line number Diff line number Diff line
@@ -1371,6 +1371,7 @@ struct memory_stat {
static const struct memory_stat memory_stats[] = {
	{ "anon",			NR_ANON_MAPPED			},
	{ "file",			NR_FILE_PAGES			},
	{ "kernel",			MEMCG_KMEM			},
	{ "kernel_stack",		NR_KERNEL_STACK_KB		},
	{ "pagetables",			NR_PAGETABLE			},
	{ "percpu",			MEMCG_PERCPU_B			},
@@ -2114,6 +2115,7 @@ static DEFINE_MUTEX(percpu_charge_mutex);
static void drain_obj_stock(struct obj_stock *stock);
static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
				     struct mem_cgroup *root_memcg);
static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages);

#else
static inline void drain_obj_stock(struct obj_stock *stock)
@@ -2124,6 +2126,9 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
{
	return false;
}
static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages)
{
}
#endif

/**
@@ -2979,6 +2984,18 @@ static void memcg_free_cache_id(int id)
	ida_simple_remove(&memcg_cache_ida, id);
}

static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages)
{
	mod_memcg_state(memcg, MEMCG_KMEM, nr_pages);
	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
		if (nr_pages > 0)
			page_counter_charge(&memcg->kmem, nr_pages);
		else
			page_counter_uncharge(&memcg->kmem, -nr_pages);
	}
}


/*
 * obj_cgroup_uncharge_pages: uncharge a number of kernel pages from a objcg
 * @objcg: object cgroup to uncharge
@@ -2991,8 +3008,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,

	memcg = get_mem_cgroup_from_objcg(objcg);

	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
		page_counter_uncharge(&memcg->kmem, nr_pages);
	memcg_account_kmem(memcg, -nr_pages);
	refill_stock(memcg, nr_pages);

	css_put(&memcg->css);
@@ -3018,8 +3034,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp,
	if (ret)
		goto out;

	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
		page_counter_charge(&memcg->kmem, nr_pages);
	memcg_account_kmem(memcg, nr_pages);
out:
	css_put(&memcg->css);

@@ -6801,8 +6816,8 @@ static void uncharge_batch(const struct uncharge_gather *ug)
		page_counter_uncharge(&ug->memcg->memory, ug->nr_memory);
		if (do_memsw_account())
			page_counter_uncharge(&ug->memcg->memsw, ug->nr_memory);
		if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && ug->nr_kmem)
			page_counter_uncharge(&ug->memcg->kmem, ug->nr_kmem);
		if (ug->nr_kmem)
			memcg_account_kmem(ug->memcg, -ug->nr_kmem);
		memcg_oom_recover(ug->memcg);
	}