Unverified Commit df5fdb96 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!4405 mm: improve performance of accounted kernel memory allocations

Merge Pull Request from: @ci-robot 
 
PR sync from: Liu Shixin <liushixin2@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/TC7IKPDQEMG3V5CHJKI5T2PN7R7DRBGM/ 
This patchset improves the performance of accounted kernel memory allocations
by ~30% as measured by a micro-benchmark [1]. The benchmark is very
straightforward: 1M of 64 bytes-large kmalloc() allocations.

Below are results with the disabled kernel memory accounting, the original state
and with this patchset applied.

|-------------+---------------+----------+---------+--------|

As we can see, the patchset removes the majority of the overhead when there is
no actual accounting (a task belongs to the root memory cgroup) and almost
halves the accounting overhead otherwise.

The main idea is to get rid of unnecessary memcg to objcg conversions and switch
to a scope-based protection of objcgs, which eliminates extra operations with
objcg reference counters under a rcu read lock. More details are provided in
individual commit descriptions.

Roman Gushchin (7):
  mm: kmem: optimize get_obj_cgroup_from_current()
  mm: kmem: add direct objcg pointer to task_struct
  mm: kmem: make memcg keep a reference to the original objcg
  mm: kmem: scoped objcg protection
  percpu: scoped objcg protection
  mm: kmem: reimplement get_obj_cgroup_from_current()
  mm: kmem: properly initialize local objcg variable in
    current_obj_cgroup()


-- 
2.25.1
 
https://gitee.com/openeuler/kernel/issues/I8YU7J 
 
Link:https://gitee.com/openeuler/kernel/pulls/4405

 

Reviewed-by: default avatarZucheng Zheng <zhengzucheng@huawei.com>
Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parents 8d4e9be5 de75cb47
Loading
Loading
Loading
Loading
+26 −2
Original line number Diff line number Diff line
@@ -363,7 +363,13 @@ struct mem_cgroup {
#endif
#ifdef CONFIG_MEMCG_KMEM
	int kmemcg_id;
	/*
	 * memcg->objcg is wiped out as a part of the objcg repaprenting
	 * process. memcg->orig_objcg preserves a pointer (and a reference)
	 * to the original objcg until the end of live of memcg.
	 */
	struct obj_cgroup __rcu	*objcg;
	struct obj_cgroup	*orig_objcg;
	/* list of inherited objcgs, protected by objcg_lock */
	struct list_head objcg_list;
#endif
@@ -1909,9 +1915,27 @@ bool mem_cgroup_kmem_disabled(void);
int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
void __memcg_kmem_uncharge_page(struct page *page, int order);

struct obj_cgroup *get_obj_cgroup_from_current(void);
/*
 * The returned objcg pointer is safe to use without additional
 * protection within a scope. The scope is defined either by
 * the current task (similar to the "current" global variable)
 * or by set_active_memcg() pair.
 * Please, use obj_cgroup_get() to get a reference if the pointer
 * needs to be used outside of the local scope.
 */
struct obj_cgroup *current_obj_cgroup(void);
struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio);

static inline struct obj_cgroup *get_obj_cgroup_from_current(void)
{
	struct obj_cgroup *objcg = current_obj_cgroup();

	if (objcg)
		obj_cgroup_get(objcg);

	return objcg;
}

int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size);
void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size);

+4 −0
Original line number Diff line number Diff line
@@ -1476,6 +1476,10 @@ struct task_struct {
	struct mem_cgroup		*active_memcg;
#endif

#ifdef CONFIG_MEMCG_KMEM
	struct obj_cgroup		*objcg;
#endif

#ifdef CONFIG_BLK_CGROUP
	struct gendisk			*throttle_disk;
#endif
+4 −0
Original line number Diff line number Diff line
@@ -403,6 +403,10 @@ DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg);
 * __GFP_ACCOUNT allocations till the end of the scope will be charged to the
 * given memcg.
 *
 * Please, make sure that caller has a reference to the passed memcg structure,
 * so its lifetime is guaranteed to exceed the scope between two
 * set_active_memcg() calls.
 *
 * NOTE: This function can nest. Users must save the return value and
 * reset the previous value after their own charging scope is over.
 */
+159 −28
Original line number Diff line number Diff line
@@ -275,6 +275,9 @@ struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr)
	return container_of(vmpr, struct mem_cgroup, vmpressure);
}

#define CURRENT_OBJCG_UPDATE_BIT 0
#define CURRENT_OBJCG_UPDATE_FLAG (1UL << CURRENT_OBJCG_UPDATE_BIT)

#ifdef CONFIG_MEMCG_KMEM
static DEFINE_SPINLOCK(objcg_lock);

@@ -1113,19 +1116,6 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
}
EXPORT_SYMBOL(get_mem_cgroup_from_mm);

static __always_inline bool memcg_kmem_bypass(void)
{
	/* Allow remote memcg charging from any context. */
	if (unlikely(active_memcg()))
		return false;

	/* Memcg to charge can't be determined. */
	if (!in_task() || !current->mm || (current->flags & PF_KTHREAD))
		return true;

	return false;
}

/**
 * mem_cgroup_iter - iterate over memory cgroup hierarchy
 * @root: hierarchy root
@@ -3128,28 +3118,105 @@ static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)

	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
		objcg = rcu_dereference(memcg->objcg);
		if (objcg && obj_cgroup_tryget(objcg))
		if (likely(objcg && obj_cgroup_tryget(objcg)))
			break;
		objcg = NULL;
	}
	return objcg;
}

__always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
static struct obj_cgroup *current_objcg_update(void)
{
	struct obj_cgroup *objcg = NULL;
	struct mem_cgroup *memcg;
	struct obj_cgroup *old, *objcg = NULL;

	if (memcg_kmem_bypass())
	do {
		/* Atomically drop the update bit. */
		old = xchg(&current->objcg, NULL);
		if (old) {
			old = (struct obj_cgroup *)
				((unsigned long)old & ~CURRENT_OBJCG_UPDATE_FLAG);
			if (old)
				obj_cgroup_put(old);

			old = NULL;
		}

		/* If new objcg is NULL, no reason for the second atomic update. */
		if (!current->mm || (current->flags & PF_KTHREAD))
			return NULL;

		/*
		 * Release the objcg pointer from the previous iteration,
		 * if try_cmpxcg() below fails.
		 */
		if (unlikely(objcg)) {
			obj_cgroup_put(objcg);
			objcg = NULL;
		}

		/*
		 * Obtain the new objcg pointer. The current task can be
		 * asynchronously moved to another memcg and the previous
		 * memcg can be offlined. So let's get the memcg pointer
		 * and try get a reference to objcg under a rcu read lock.
		 */

		rcu_read_lock();
	if (unlikely(active_memcg()))
		memcg = active_memcg();
	else
		memcg = mem_cgroup_from_task(current);
		objcg = __get_obj_cgroup_from_memcg(memcg);
		rcu_read_unlock();

		/*
		 * Try set up a new objcg pointer atomically. If it
		 * fails, it means the update flag was set concurrently, so
		 * the whole procedure should be repeated.
		 */
	} while (!try_cmpxchg(&current->objcg, &old, objcg));

	return objcg;
}

__always_inline struct obj_cgroup *current_obj_cgroup(void)
{
	struct mem_cgroup *memcg;
	struct obj_cgroup *objcg;

	if (in_task()) {
		memcg = current->active_memcg;
		if (unlikely(memcg))
			goto from_memcg;

		objcg = READ_ONCE(current->objcg);
		if (unlikely((unsigned long)objcg & CURRENT_OBJCG_UPDATE_FLAG))
			objcg = current_objcg_update();
		/*
		 * Objcg reference is kept by the task, so it's safe
		 * to use the objcg by the current task.
		 */
		return objcg;
	}

	memcg = this_cpu_read(int_active_memcg);
	if (unlikely(memcg))
		goto from_memcg;

	return NULL;

from_memcg:
	objcg = NULL;
	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
		/*
		 * Memcg pointer is protected by scope (see set_active_memcg())
		 * and is pinning the corresponding objcg, so objcg can't go
		 * away and can be used within the scope without any additional
		 * protection.
		 */
		objcg = rcu_dereference_check(memcg->objcg, 1);
		if (likely(objcg))
			break;
	}

	return objcg;
}

@@ -3247,15 +3314,15 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
	struct obj_cgroup *objcg;
	int ret = 0;

	objcg = get_obj_cgroup_from_current();
	objcg = current_obj_cgroup();
	if (objcg) {
		ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order);
		if (!ret) {
			obj_cgroup_get(objcg);
			page->memcg_data = (unsigned long)objcg |
				MEMCG_DATA_KMEM;
			return 0;
		}
		obj_cgroup_put(objcg);
	}
	return ret;
}
@@ -3899,6 +3966,8 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)

	objcg->memcg = memcg;
	rcu_assign_pointer(memcg->objcg, objcg);
	obj_cgroup_get(objcg);
	memcg->orig_objcg = objcg;

	static_branch_enable(&memcg_kmem_online_key);

@@ -6362,6 +6431,9 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
{
	int node;

	if (memcg->orig_objcg)
		obj_cgroup_put(memcg->orig_objcg);

	for_each_node(node)
		free_mem_cgroup_per_node_info(memcg, node);
	kfree(memcg->vmstats);
@@ -7478,6 +7550,7 @@ static void mem_cgroup_move_task(void)
		mem_cgroup_clear_mc();
	}
}

#else	/* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
{
@@ -7491,8 +7564,39 @@ static void mem_cgroup_move_task(void)
}
#endif

#ifdef CONFIG_MEMCG_KMEM
static void mem_cgroup_fork(struct task_struct *task)
{
	/*
	 * Set the update flag to cause task->objcg to be initialized lazily
	 * on the first allocation. It can be done without any synchronization
	 * because it's always performed on the current task, so does
	 * current_objcg_update().
	 */
	task->objcg = (struct obj_cgroup *)CURRENT_OBJCG_UPDATE_FLAG;
}

static void mem_cgroup_exit(struct task_struct *task)
{
	struct obj_cgroup *objcg = task->objcg;

	objcg = (struct obj_cgroup *)
		((unsigned long)objcg & ~CURRENT_OBJCG_UPDATE_FLAG);
	if (objcg)
		obj_cgroup_put(objcg);

	/*
	 * Some kernel allocations can happen after this point,
	 * but let's ignore them. It can be done without any synchronization
	 * because it's always performed on the current task, so does
	 * current_objcg_update().
	 */
	task->objcg = NULL;
}
#endif

#ifdef CONFIG_LRU_GEN
static void mem_cgroup_attach(struct cgroup_taskset *tset)
static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset)
{
	struct task_struct *task;
	struct cgroup_subsys_state *css;
@@ -7510,10 +7614,31 @@ static void mem_cgroup_attach(struct cgroup_taskset *tset)
	task_unlock(task);
}
#else
static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset) {}
#endif /* CONFIG_LRU_GEN */

#ifdef CONFIG_MEMCG_KMEM
static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset)
{
	struct task_struct *task;
	struct cgroup_subsys_state *css;

	cgroup_taskset_for_each(task, css, tset) {
		/* atomically set the update bit */
		set_bit(CURRENT_OBJCG_UPDATE_BIT, (unsigned long *)&task->objcg);
	}
}
#else
static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset) {}
#endif /* CONFIG_MEMCG_KMEM */

#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
static void mem_cgroup_attach(struct cgroup_taskset *tset)
{
	mem_cgroup_lru_gen_attach(tset);
	mem_cgroup_kmem_attach(tset);
}
#endif /* CONFIG_LRU_GEN */
#endif

static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
{
@@ -7982,9 +8107,15 @@ struct cgroup_subsys memory_cgrp_subsys = {
	.css_reset = mem_cgroup_css_reset,
	.css_rstat_flush = mem_cgroup_css_rstat_flush,
	.can_attach = mem_cgroup_can_attach,
#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
	.attach = mem_cgroup_attach,
#endif
	.cancel_attach = mem_cgroup_cancel_attach,
	.post_attach = mem_cgroup_move_task,
#ifdef CONFIG_MEMCG_KMEM
	.fork = mem_cgroup_fork,
	.exit = mem_cgroup_exit,
#endif
	.dfl_cftypes = memory_files,
	.legacy_cftypes = mem_cgroup_legacy_files,
	.early_init = 0,
+3 −5
Original line number Diff line number Diff line
@@ -1628,14 +1628,12 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
	if (!memcg_kmem_online() || !(gfp & __GFP_ACCOUNT))
		return true;

	objcg = get_obj_cgroup_from_current();
	objcg = current_obj_cgroup();
	if (!objcg)
		return true;

	if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size))) {
		obj_cgroup_put(objcg);
	if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size)))
		return false;
	}

	*objcgp = objcg;
	return true;
@@ -1649,6 +1647,7 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
		return;

	if (likely(chunk && chunk->obj_cgroups)) {
		obj_cgroup_get(objcg);
		chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg;

		rcu_read_lock();
@@ -1657,7 +1656,6 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
		rcu_read_unlock();
	} else {
		obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size));
		obj_cgroup_put(objcg);
	}
}

Loading