Unverified Commit d68969ba authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!4582 cgroup/hugetlb: hugetlb accounting

Merge Pull Request from: @chenridong 
 
Add hugetlb accounting for cgroup v2 
 
Link:https://gitee.com/openeuler/kernel/pulls/4582

 

Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: default avatarLu Jialin <lujialin4@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parents c3ecca33 95b67895
Loading
Loading
Loading
Loading
+29 −0
Original line number Diff line number Diff line
@@ -210,6 +210,35 @@ cgroup v2 currently supports the following mount options.
        relying on the original semantics (e.g. specifying bogusly
        high 'bypass' protection values at higher tree levels).

  memory_hugetlb_accounting
        Count HugeTLB memory usage towards the cgroup's overall
        memory usage for the memory controller (for the purpose of
        statistics reporting and memory protetion). This is a new
        behavior that could regress existing setups, so it must be
        explicitly opted in with this mount option.

        A few caveats to keep in mind:

        * There is no HugeTLB pool management involved in the memory
          controller. The pre-allocated pool does not belong to anyone.
          Specifically, when a new HugeTLB folio is allocated to
          the pool, it is not accounted for from the perspective of the
          memory controller. It is only charged to a cgroup when it is
          actually used (for e.g at page fault time). Host memory
          overcommit management has to consider this when configuring
          hard limits. In general, HugeTLB pool management should be
          done via other mechanisms (such as the HugeTLB controller).
        * Failure to charge a HugeTLB folio to the memory controller
          results in SIGBUS. This could happen even if the HugeTLB pool
          still has pages available (but the cgroup limit is hit and
          reclaim attempt fails).
        * Charging HugeTLB memory towards the memory controller affects
          memory protection and reclaim dynamics. Any userspace tuning
          (of low, min limits for e.g) needs to take this into account.
        * HugeTLB pages utilized while this option is not selected
          will not be tracked by the memory controller (even if cgroup
          v2 is remounted later on).


Organizing Processes and Threads
--------------------------------
+5 −0
Original line number Diff line number Diff line
@@ -116,6 +116,11 @@ enum {
	 * Enable recursive subtree protection
	 */
	CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 18),

	/*
	 * Enable hugetlb accounting for the memory controller.
	 */
	 CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19),
};

/* cftype->flags */
+37 −0
Original line number Diff line number Diff line
@@ -781,6 +781,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
		page_counter_read(&memcg->memory);
}

void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg);

int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);

/**
@@ -805,6 +807,9 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
	return __mem_cgroup_charge(folio, mm, gfp);
}

int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp,
		long nr_pages);

int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm,
				  gfp_t gfp, swp_entry_t entry);
void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
@@ -832,6 +837,10 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
	__mem_cgroup_uncharge_list(page_list);
}

void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages);

void mem_cgroup_replace_folio(struct folio *old, struct folio *new);

void mem_cgroup_migrate(struct folio *old, struct folio *new);

/**
@@ -888,6 +897,8 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);

struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);

struct mem_cgroup *get_mem_cgroup_from_current(void);

struct lruvec *folio_lruvec_lock(struct folio *folio);
struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
@@ -1388,12 +1399,23 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
	return false;
}

static inline void mem_cgroup_commit_charge(struct folio *folio,
		struct mem_cgroup *memcg)
{
}

static inline int mem_cgroup_charge(struct folio *folio,
		struct mm_struct *mm, gfp_t gfp)
{
	return 0;
}

static inline int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg,
		gfp_t gfp, long nr_pages)
{
	return 0;
}

static inline int mem_cgroup_swapin_charge_folio(struct folio *folio,
			struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
{
@@ -1412,6 +1434,16 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
{
}

static inline void mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
		unsigned int nr_pages)
{
}

static inline void mem_cgroup_replace_folio(struct folio *old,
		struct folio *new)
{
}

static inline void mem_cgroup_migrate(struct folio *old, struct folio *new)
{
}
@@ -1449,6 +1481,11 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
	return NULL;
}

static inline struct mem_cgroup *get_mem_cgroup_from_current(void)
{
	return NULL;
}

static inline
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css)
{
+14 −1
Original line number Diff line number Diff line
@@ -1906,6 +1906,7 @@ enum cgroup2_param {
	Opt_favordynmods,
	Opt_memory_localevents,
	Opt_memory_recursiveprot,
	Opt_memory_hugetlb_accounting,
	nr__cgroup2_params
};

@@ -1914,6 +1915,7 @@ static const struct fs_parameter_spec cgroup2_fs_parameters[] = {
	fsparam_flag("favordynmods",		Opt_favordynmods),
	fsparam_flag("memory_localevents",	Opt_memory_localevents),
	fsparam_flag("memory_recursiveprot",	Opt_memory_recursiveprot),
	fsparam_flag("memory_hugetlb_accounting", Opt_memory_hugetlb_accounting),
	{}
};

@@ -1940,6 +1942,9 @@ static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param
	case Opt_memory_recursiveprot:
		ctx->flags |= CGRP_ROOT_MEMORY_RECURSIVE_PROT;
		return 0;
	case Opt_memory_hugetlb_accounting:
		ctx->flags |= CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING;
		return 0;
	}
	return -EINVAL;
}
@@ -1964,6 +1969,11 @@ static void apply_cgroup_root_flags(unsigned int root_flags)
			cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_RECURSIVE_PROT;
		else
			cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_RECURSIVE_PROT;

		if (root_flags & CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING)
			cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING;
		else
			cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING;
	}
}

@@ -1977,6 +1987,8 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root
		seq_puts(seq, ",memory_localevents");
	if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT)
		seq_puts(seq, ",memory_recursiveprot");
	if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING)
		seq_puts(seq, ",memory_hugetlb_accounting");
	return 0;
}

@@ -7163,7 +7175,8 @@ static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
			"nsdelegate\n"
			"favordynmods\n"
			"memory_localevents\n"
			"memory_recursiveprot\n");
			"memory_recursiveprot\n"
			"memory_hugetlb_accounting\n");
}
static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);

+1 −1
Original line number Diff line number Diff line
@@ -820,7 +820,7 @@ void replace_page_cache_folio(struct folio *old, struct folio *new)
	new->mapping = mapping;
	new->index = offset;

	mem_cgroup_migrate(old, new);
	mem_cgroup_replace_folio(old, new);

	xas_lock_irq(&xas);
	xas_store(&xas, new);
Loading