Commit 04f94e3f authored by Dan Schatzberg's avatar Dan Schatzberg Committed by Linus Torvalds
Browse files

mm: charge active memcg when no mm is set

set_active_memcg() worked for kernel allocations but was silently ignored
for user pages.

This patch establishes a precedence order for who gets charged:

1. If there is a memcg associated with the page already, that memcg is
   charged. This happens during swapin.

2. If an explicit mm is passed, mm->memcg is charged. This happens
   during page faults, which can be triggered in remote VMs (eg gup).

3. Otherwise consult the current process context. If there is an
   active_memcg, use that. Otherwise, current->mm->memcg.

Previously, if a NULL mm was passed to mem_cgroup_charge (case 3) it would
always charge the root cgroup.  Now it looks up the active_memcg first
(falling back to charging the root cgroup if not set).

Link: https://lkml.kernel.org/r/20210610173944.1203706-3-schatzberg.dan@gmail.com


Signed-off-by: default avatarDan Schatzberg <schatzberg.dan@gmail.com>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarTejun Heo <tj@kernel.org>
Acked-by: default avatarChris Down <chris@chrisdown.name>
Acked-by: default avatarJens Axboe <axboe@kernel.dk>
Reviewed-by: default avatarShakeel Butt <shakeelb@google.com>
Reviewed-by: default avatarMichal Koutný <mkoutny@suse.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 87579e9b
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -872,7 +872,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
	page->index = offset;

	if (!huge) {
		error = mem_cgroup_charge(page, current->mm, gfp);
		error = mem_cgroup_charge(page, NULL, gfp);
		if (error)
			goto error;
		charged = true;
+27 −14
Original line number Diff line number Diff line
@@ -897,13 +897,24 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
}
EXPORT_SYMBOL(mem_cgroup_from_task);

static __always_inline struct mem_cgroup *active_memcg(void)
{
	if (in_interrupt())
		return this_cpu_read(int_active_memcg);
	else
		return current->active_memcg;
}

/**
 * get_mem_cgroup_from_mm: Obtain a reference on given mm_struct's memcg.
 * @mm: mm from which memcg should be extracted. It can be NULL.
 *
 * Obtain a reference on mm->memcg and returns it if successful. Otherwise
 * root_mem_cgroup is returned. However if mem_cgroup is disabled, NULL is
 * returned.
 * Obtain a reference on mm->memcg and returns it if successful. If mm
 * is NULL, then the memcg is chosen as follows:
 * 1) The active memcg, if set.
 * 2) current->mm->memcg, if available
 * 3) root memcg
 * If mem_cgroup is disabled, NULL is returned.
 */
struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
{
@@ -921,8 +932,17 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
	 * counting is disabled on the root level in the
	 * cgroup core. See CSS_NO_REF.
	 */
	if (unlikely(!mm)) {
		memcg = active_memcg();
		if (unlikely(memcg)) {
			/* remote memcg must hold a ref */
			css_get(&memcg->css);
			return memcg;
		}
		mm = current->mm;
		if (unlikely(!mm))
			return root_mem_cgroup;
	}

	rcu_read_lock();
	do {
@@ -935,14 +955,6 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
}
EXPORT_SYMBOL(get_mem_cgroup_from_mm);

static __always_inline struct mem_cgroup *active_memcg(void)
{
	if (in_interrupt())
		return this_cpu_read(int_active_memcg);
	else
		return current->active_memcg;
}

static __always_inline bool memcg_kmem_bypass(void)
{
	/* Allow remote memcg charging from any context. */
@@ -6711,7 +6723,8 @@ static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg,
 * @gfp_mask: reclaim mode
 *
 * Try to charge @page to the memcg that @mm belongs to, reclaiming
 * pages according to @gfp_mask if necessary.
 * pages according to @gfp_mask if necessary. if @mm is NULL, try to
 * charge to the active memcg.
 *
 * Do not use this for pages allocated for swapin.
 *
+2 −2
Original line number Diff line number Diff line
@@ -1695,7 +1695,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
{
	struct address_space *mapping = inode->i_mapping;
	struct shmem_inode_info *info = SHMEM_I(inode);
	struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm;
	struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL;
	struct swap_info_struct *si;
	struct page *page = NULL;
	swp_entry_t swap;
@@ -1828,7 +1828,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
	}

	sbinfo = SHMEM_SB(inode->i_sb);
	charge_mm = vma ? vma->vm_mm : current->mm;
	charge_mm = vma ? vma->vm_mm : NULL;

	page = pagecache_get_page(mapping, index,
					FGP_ENTRY | FGP_HEAD | FGP_LOCK, 0);