Commit d5ea6f5f authored by Liu Shixin's avatar Liu Shixin
Browse files

mm/dynamic_pool: check resv for HugeTLB allocation from dpool

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I8S9BY


CVE: NA

--------------------------------

The HugeTLB system use resv_huge_pages to avoid SIGBUS, introduce the
mechainism(simplified version) to dpool to protect HugeTLB allocation.

Signed-off-by: default avatarLiu Shixin <liushixin2@huawei.com>
parent 94095df4
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -35,6 +35,7 @@
#include <linux/magic.h>
#include <linux/migrate.h>
#include <linux/uio.h>
#include <linux/dynamic_pool.h>

#include <linux/uaccess.h>
#include <linux/sched/mm.h>
@@ -1294,6 +1295,9 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
	 */
	mpol_shared_policy_init(&p->policy, NULL);

	/* Initialize hpool here in case of a quick call to destroy */
	dynamic_pool_bind_file(p, sbinfo->hstate);

	return &p->vfs_inode;
}

@@ -1306,6 +1310,7 @@ static void hugetlbfs_destroy_inode(struct inode *inode)
{
	hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
	mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
	dynamic_pool_unbind_file(HUGETLBFS_I(inode));
}

static const struct address_space_operations hugetlbfs_aops = {
+34 −0
Original line number Diff line number Diff line
@@ -74,10 +74,22 @@ static inline bool page_from_dynamic_pool(struct page *page)
	return PagePool(page);
}

static inline bool file_in_dynamic_pool(struct hugetlbfs_inode_info *p)
{
	if (!dpool_enabled)
		return false;

	return p && p->dpool;
}

int dynamic_pool_can_attach(struct task_struct *tsk, struct mem_cgroup *memcg);
struct page *dynamic_pool_alloc_page(gfp_t gfp, unsigned int order,
				     unsigned int alloc_flags);
void dynamic_pool_free_page(struct page *page);
void dynamic_pool_bind_file(struct hugetlbfs_inode_info *p, struct hstate *h);
void dynamic_pool_unbind_file(struct hugetlbfs_inode_info *p);
int dynamic_pool_hugetlb_acct_memory(struct hstate *h, long delta,
				     struct hugetlbfs_inode_info *p);

void dynamic_pool_inherit(struct mem_cgroup *memcg);
int dynamic_pool_destroy(struct cgroup *cgrp, bool *clear_css_online);
@@ -115,6 +127,28 @@ static inline void dynamic_pool_free_page(struct page *page)
{
}

#ifdef CONFIG_HUGETLBFS
static inline bool file_in_dynamic_pool(struct hugetlbfs_inode_info *p)
{
	return false;
}

static inline void dynamic_pool_bind_file(struct hugetlbfs_inode_info *p,
					  struct hstate *h)
{
}

static inline void dynamic_pool_unbind_file(struct hugetlbfs_inode_info *p)
{
}

static inline int dynamic_pool_hugetlb_acct_memory(struct hstate *h, long delta,
						   struct hugetlbfs_inode_info *p)
{
	return -ENOMEM;
}
#endif

static inline void dynamic_pool_inherit(struct mem_cgroup *memcg)
{
}
+3 −0
Original line number Diff line number Diff line
@@ -596,6 +596,9 @@ struct hugetlbfs_inode_info {
	struct shared_policy policy;
	struct inode vfs_inode;
	unsigned int seals;
#ifdef CONFIG_DYNAMIC_POOL
	struct dynamic_pool *dpool;
#endif
};

static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
+62 −0
Original line number Diff line number Diff line
@@ -731,6 +731,68 @@ void dynamic_pool_free_page(struct page *page)
	dpool_put(dpool);
}

void dynamic_pool_bind_file(struct hugetlbfs_inode_info *p, struct hstate *h)
{
	unsigned long size;

	if (!dpool_enabled || !p)
		return;

	size = huge_page_size(h);
	if (size == PMD_SIZE || size == PUD_SIZE)
		p->dpool = dpool_get_from_task(current);
	else
		p->dpool = NULL;
}

void dynamic_pool_unbind_file(struct hugetlbfs_inode_info *p)
{
	struct dynamic_pool *dpool;

	if (!dpool_enabled || !p || !p->dpool)
		return;

	dpool = p->dpool;
	p->dpool = NULL;
	dpool_put(dpool);
}

int dynamic_pool_hugetlb_acct_memory(struct hstate *h, long delta,
				     struct hugetlbfs_inode_info *p)
{
	struct dynamic_pool *dpool;
	struct pages_pool *pool;
	unsigned long flags;
	int type;
	int ret = -ENOMEM;

	if (!dpool_enabled || !p || !p->dpool)
		return 0;

	dpool = p->dpool;
	spin_lock_irqsave(&dpool->lock, flags);

	if (hstate_is_gigantic(h))
		type = PAGES_POOL_1G;
	else
		type = PAGES_POOL_2M;
	pool = &dpool->pool[type];

	if (delta > 0) {
		if (delta <= pool->free_huge_pages - pool->resv_huge_pages) {
			pool->resv_huge_pages += delta;
			ret = 0;
		}
	} else {
		pool->resv_huge_pages -= (unsigned long)(-delta);
		WARN_ON(pool->resv_huge_pages < 0);
		ret = 0;
	}
	spin_unlock_irqrestore(&dpool->lock, flags);

	return ret;
}

/* === dynamic pool function ========================================== */

static void dpool_dump_child_memcg(struct mem_cgroup *memcg, void *message)
+41 −23
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@
#include <linux/memory.h>
#include <linux/mm_inline.h>
#include <linux/share_pool.h>
#include <linux/dynamic_pool.h>

#include <asm/page.h>
#include <asm/pgalloc.h>
@@ -92,7 +93,8 @@ static int num_fault_mutexes;
struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;

/* Forward declaration */
static int hugetlb_acct_memory(struct hstate *h, long delta);
static int hugetlb_acct_memory(struct hstate *h, long delta,
			       struct hugetlbfs_inode_info *info);
static void hugetlb_vma_lock_free(struct vm_area_struct *vma);
static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
@@ -123,7 +125,7 @@ static inline void unlock_or_release_subpool(struct hugepage_subpool *spool,
	if (subpool_is_free(spool)) {
		if (spool->min_hpages != -1)
			hugetlb_acct_memory(spool->hstate,
						-spool->min_hpages);
						-spool->min_hpages, NULL);
		kfree(spool);
	}
}
@@ -143,7 +145,7 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages,
	spool->hstate = h;
	spool->min_hpages = min_hpages;

	if (min_hpages != -1 && hugetlb_acct_memory(h, min_hpages)) {
	if (min_hpages != -1 && hugetlb_acct_memory(h, min_hpages, NULL)) {
		kfree(spool);
		return NULL;
	}
@@ -171,13 +173,16 @@ void hugepage_put_subpool(struct hugepage_subpool *spool)
 * a subpool minimum size must be maintained.
 */
static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
				      long delta)
				      long delta, struct hugetlbfs_inode_info *info)
{
	long ret = delta;

	if (!spool)
		return ret;

	if (file_in_dynamic_pool(info))
		return ret;

	spin_lock_irq(&spool->lock);

	if (spool->max_hpages != -1) {		/* maximum size accounting */
@@ -216,7 +221,7 @@ static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
 * in the case where a subpool minimum size must be maintained.
 */
static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
				       long delta)
				       long delta, struct hugetlbfs_inode_info *info)
{
	long ret = delta;
	unsigned long flags;
@@ -224,6 +229,9 @@ static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
	if (!spool)
		return delta;

	if (file_in_dynamic_pool(info))
		return ret;

	spin_lock_irqsave(&spool->lock, flags);

	if (spool->max_hpages != -1)		/* maximum size accounting */
@@ -935,14 +943,15 @@ static long region_del(struct resv_map *resv, long f, long t)
void hugetlb_fix_reserve_counts(struct inode *inode)
{
	struct hugepage_subpool *spool = subpool_inode(inode);
	struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
	long rsv_adjust;
	bool reserved = false;

	rsv_adjust = hugepage_subpool_get_pages(spool, 1);
	rsv_adjust = hugepage_subpool_get_pages(spool, 1, info);
	if (rsv_adjust > 0) {
		struct hstate *h = hstate_inode(inode);

		if (!hugetlb_acct_memory(h, 1))
		if (!hugetlb_acct_memory(h, 1, info))
			reserved = true;
	} else if (!rsv_adjust) {
		reserved = true;
@@ -1928,7 +1937,7 @@ void free_huge_folio(struct folio *folio)
		 * after page is free.  Therefore, force restore_reserve
		 * operation.
		 */
		if (hugepage_subpool_put_pages(spool, 1) == 0)
		if (hugepage_subpool_put_pages(spool, 1, NULL) == 0)
			restore_reserve = true;
	}

@@ -3118,6 +3127,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
				    unsigned long addr, int avoid_reserve)
{
	struct hugepage_subpool *spool = subpool_vma(vma);
	struct hugetlbfs_inode_info *info = HUGETLBFS_I(file_inode(vma->vm_file));
	struct hstate *h = hstate_vma(vma);
	struct folio *folio;
	long map_chg, map_commit;
@@ -3144,7 +3154,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
	 * checked against any subpool limit.
	 */
	if (map_chg || avoid_reserve) {
		gbl_chg = hugepage_subpool_get_pages(spool, 1);
		gbl_chg = hugepage_subpool_get_pages(spool, 1, info);
		if (gbl_chg < 0) {
			vma_end_reservation(h, vma, addr);
			return ERR_PTR(-ENOSPC);
@@ -3224,8 +3234,8 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
		 */
		long rsv_adjust;

		rsv_adjust = hugepage_subpool_put_pages(spool, 1);
		hugetlb_acct_memory(h, -rsv_adjust);
		rsv_adjust = hugepage_subpool_put_pages(spool, 1, info);
		hugetlb_acct_memory(h, -rsv_adjust, info);
		if (deferred_reserve)
			hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
					pages_per_huge_page(h), folio);
@@ -3240,7 +3250,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
						    h_cg);
out_subpool_put:
	if (map_chg || avoid_reserve)
		hugepage_subpool_put_pages(spool, 1);
		hugepage_subpool_put_pages(spool, 1, info);
	vma_end_reservation(h, vma, addr);
	return ERR_PTR(-ENOSPC);
}
@@ -4927,13 +4937,17 @@ unsigned long hugetlb_total_pages(void)
	return nr_total_pages;
}

static int hugetlb_acct_memory(struct hstate *h, long delta)
static int hugetlb_acct_memory(struct hstate *h, long delta,
			       struct hugetlbfs_inode_info *info)
{
	int ret = -ENOMEM;

	if (!delta)
		return 0;

	if (file_in_dynamic_pool(info))
		return dynamic_pool_hugetlb_acct_memory(h, delta, info);

	spin_lock_irq(&hugetlb_lock);
	/*
	 * When cpuset is configured, it breaks the strict hugetlb page
@@ -5020,6 +5034,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
	struct hstate *h = hstate_vma(vma);
	struct resv_map *resv;
	struct hugepage_subpool *spool = subpool_vma(vma);
	struct hugetlbfs_inode_info *info;
	unsigned long reserve, start, end;
	long gbl_reserve;

@@ -5039,8 +5054,9 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
		 * Decrement reserve counts.  The global reserve count may be
		 * adjusted if the subpool has a minimum size.
		 */
		gbl_reserve = hugepage_subpool_put_pages(spool, reserve);
		hugetlb_acct_memory(h, -gbl_reserve);
		info = HUGETLBFS_I(file_inode(vma->vm_file));
		gbl_reserve = hugepage_subpool_put_pages(spool, reserve, info);
		hugetlb_acct_memory(h, -gbl_reserve, info);
	}

	kref_put(&resv->refs, resv_map_release);
@@ -6864,6 +6880,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
	long chg = -1, add = -1;
	struct hstate *h = hstate_inode(inode);
	struct hugepage_subpool *spool = subpool_inode(inode);
	struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
	struct resv_map *resv_map;
	struct hugetlb_cgroup *h_cg = NULL;
	long gbl_reserve, regions_needed = 0;
@@ -6934,7 +6951,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
	 * the subpool has a minimum size, there may be some global
	 * reservations already in place (gbl_reserve).
	 */
	gbl_reserve = hugepage_subpool_get_pages(spool, chg);
	gbl_reserve = hugepage_subpool_get_pages(spool, chg, info);
	if (gbl_reserve < 0)
		goto out_uncharge_cgroup;

@@ -6942,7 +6959,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
	 * Check enough hugepages are available for the reservation.
	 * Hand the pages back to the subpool if there are not
	 */
	if (hugetlb_acct_memory(h, gbl_reserve) < 0)
	if (hugetlb_acct_memory(h, gbl_reserve, info) < 0)
		goto out_put_pages;

	/*
@@ -6960,7 +6977,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
		add = region_add(resv_map, from, to, regions_needed, h, h_cg);

		if (unlikely(add < 0)) {
			hugetlb_acct_memory(h, -gbl_reserve);
			hugetlb_acct_memory(h, -gbl_reserve, info);
			goto out_put_pages;
		} else if (unlikely(chg > add)) {
			/*
@@ -6981,8 +6998,8 @@ bool hugetlb_reserve_pages(struct inode *inode,
				(chg - add) * pages_per_huge_page(h), h_cg);

			rsv_adjust = hugepage_subpool_put_pages(spool,
								chg - add);
			hugetlb_acct_memory(h, -rsv_adjust);
								chg - add, info);
			hugetlb_acct_memory(h, -rsv_adjust, info);
		} else if (h_cg) {
			/*
			 * The file_regions will hold their own reference to
@@ -6997,7 +7014,7 @@ bool hugetlb_reserve_pages(struct inode *inode,

out_put_pages:
	/* put back original number of pages, chg */
	(void)hugepage_subpool_put_pages(spool, chg);
	(void)hugepage_subpool_put_pages(spool, chg, info);
out_uncharge_cgroup:
	hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
					    chg * pages_per_huge_page(h), h_cg);
@@ -7023,6 +7040,7 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
	struct resv_map *resv_map = inode_resv_map(inode);
	long chg = 0;
	struct hugepage_subpool *spool = subpool_inode(inode);
	struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
	long gbl_reserve;

	/*
@@ -7051,8 +7069,8 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
	 * Note that !resv_map implies freed == 0. So (chg - freed)
	 * won't go negative.
	 */
	gbl_reserve = hugepage_subpool_put_pages(spool, (chg - freed));
	hugetlb_acct_memory(h, -gbl_reserve);
	gbl_reserve = hugepage_subpool_put_pages(spool, (chg - freed), info);
	hugetlb_acct_memory(h, -gbl_reserve, info);

	return 0;
}