Commit c1434668 authored by Lijun Fang's avatar Lijun Fang Committed by Zheng Zengkai
Browse files

mm: Be allowed to alloc CDM node memory for MPOL_BIND

ascend inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4JMLR


CVE: NA
-----------------

CDM nodes should not be part of mems_allowed, However,
It must be allowed to alloc from CDM node, when mpol->mode was MPOL_BIND.

Signed-off-by: default avatarLijun Fang <fanglijun3@huawei.com>
Reviewed-by: default avatarWeilong Chen <chenweilong@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parent a9850de3
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -283,10 +283,6 @@ extern unsigned int kobjsize(const void *objp);
#define VM_NORESERVE	0x00200000	/* should the VM suppress accounting */
#define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */

#ifdef CONFIG_COHERENT_DEVICE
#define VM_CDM		0x00800000	/* Contains coherent device memory */
#endif

#define VM_SYNC		0x00800000	/* Synchronous page faults */
#define VM_ARCH_1	0x01000000	/* Architecture-specific flag */
#define VM_WIPEONFORK	0x02000000	/* Wipe VMA contents in child. */
@@ -303,6 +299,10 @@ extern unsigned int kobjsize(const void *objp);
#define VM_NOHUGEPAGE	0x40000000	/* MADV_NOHUGEPAGE marked this vma */
#define VM_MERGEABLE	0x80000000	/* KSM may merge identical pages */

#ifdef CONFIG_COHERENT_DEVICE
#define VM_CDM		0x100000000	/* Contains coherent device memory */
#endif

#ifdef CONFIG_USERSWAP
/* bit[32:36] is the protection key of intel, so use a large value for VM_USWAP */
#define VM_USWAP      0x2000000000000000
+12 −4
Original line number Diff line number Diff line
@@ -1091,13 +1091,20 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
}

static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, int nid,
		nodemask_t *nmask)
		nodemask_t *nmask, struct mempolicy *mpol)
{
	unsigned int cpuset_mems_cookie;
	struct zonelist *zonelist;
	struct zone *zone;
	struct zoneref *z;
	int node = NUMA_NO_NODE;
	bool mbind_cdmnode = false;

#ifdef CONFIG_COHERENT_DEVICE
	if (is_cdm_node(nid) && ((mpol != NULL && mpol->mode == MPOL_BIND) ||
							(gfp_mask & __GFP_THISNODE)))
		mbind_cdmnode = true;
#endif

	zonelist = node_zonelist(nid, gfp_mask);

@@ -1106,7 +1113,8 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask,
	for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nmask) {
		struct page *page;

		if (!cpuset_zone_allowed(zone, gfp_mask))
		if (!cpuset_zone_allowed(zone, gfp_mask) &&
		    mbind_cdmnode == false)
			continue;
		/*
		 * no need to ask again on the same node. Pool is node rather than
@@ -1152,7 +1160,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,

	gfp_mask = htlb_alloc_mask(h);
	nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
	page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
	page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask, mpol);
	if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
		SetHPageRestoreReserve(page);
		h->resv_huge_pages--;
@@ -2032,7 +2040,7 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
	if (h->free_huge_pages - h->resv_huge_pages > 0) {
		struct page *page;

		page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask);
		page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask, NULL);
		if (page) {
			spin_unlock_irq(&hugetlb_lock);
			return page;
+3 −0
Original line number Diff line number Diff line
@@ -593,6 +593,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
#else
#define ALLOC_NOFRAGMENT	  0x0
#endif
#ifdef CONFIG_COHERENT_DEVICE
#define ALLOC_CDM		0x200
#endif
#define ALLOC_KSWAPD		0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */

enum ttu_flags;
+5 −1
Original line number Diff line number Diff line
@@ -274,6 +274,9 @@ static int mpol_set_nodemask(struct mempolicy *pol,
	nodes_and(nsc->mask1,
		  cpuset_current_mems_allowed, node_states[N_MEMORY]);

#ifdef CONFIG_COHERENT_DEVICE
	nodes_or(nsc->mask1, cdmmask, nsc->mask1);
#endif
	VM_BUG_ON(!nodes);
	if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes))
		nodes = NULL;	/* explicit local allocation */
@@ -1915,7 +1918,8 @@ nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy)
	/* Lower zones don't get a nodemask applied for MPOL_BIND */
	if (unlikely(policy->mode == MPOL_BIND) &&
			apply_policy_zone(policy, gfp_zone(gfp)) &&
			cpuset_nodemask_valid_mems_allowed(&policy->v.nodes))
			(cpuset_nodemask_valid_mems_allowed(&policy->v.nodes) ||
			nodemask_has_cdm(policy->v.nodes)))
		return &policy->v.nodes;

	return NULL;
+10 −2
Original line number Diff line number Diff line
@@ -3829,7 +3829,11 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,

		if (cpusets_enabled() &&
			(alloc_flags & ALLOC_CPUSET) &&
			!__cpuset_zone_allowed(zone, gfp_mask))
			!__cpuset_zone_allowed(zone, gfp_mask)
#ifdef CONFIG_COHERENT_DEVICE
			&& !(alloc_flags & ALLOC_CDM)
#endif
		)
				continue;
		/*
		 * When allocating a page cache page for writing, we
@@ -4908,8 +4912,12 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
		 */
		if (!in_interrupt() && !ac->nodemask)
			ac->nodemask = &cpuset_current_mems_allowed;
		else
		else {
			*alloc_flags |= ALLOC_CPUSET;
#ifdef CONFIG_COHERENT_DEVICE
			*alloc_flags |= ALLOC_CDM;
#endif
		}
	}

	fs_reclaim_acquire(gfp_mask);