Commit 7766cf7a authored by Aneesh Kumar K.V's avatar Aneesh Kumar K.V Committed by Andrew Morton
Browse files

mm/demotion: add pg_data_t member to track node memory tier details

Also update different helpes to use NODE_DATA()->memtier.  Since node
specific memtier can change based on the reassignment of NUMA node to a
different memory tiers, accessing NODE_DATA()->memtier needs to happen
under an rcu read lock or memory_tier_lock.

Link: https://lkml.kernel.org/r/20220818131042.113280-7-aneesh.kumar@linux.ibm.com


Signed-off-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reviewed-by: default avatar"Huang, Ying" <ying.huang@intel.com>
Acked-by: default avatarWei Xu <weixugc@google.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Bharata B Rao <bharata@amd.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Hesham Almatary <hesham.almatary@huawei.com>
Cc: Jagdish Gediya <jvgediya.oss@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Tim Chen <tim.c.chen@intel.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: SeongJae Park <sj@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 6c542ab7
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -1246,6 +1246,9 @@ typedef struct pglist_data {
	/* Per-node vmstats */
	struct per_cpu_nodestat __percpu *per_cpu_nodestats;
	atomic_long_t		vm_stat[NR_VM_NODE_STAT_ITEMS];
#ifdef CONFIG_NUMA
	struct memory_tier __rcu *memtier;
#endif
} pg_data_t;

#define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
+35 −5
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@
#include <linux/sysfs.h>
#include <linux/kobject.h>
#include <linux/memory.h>
#include <linux/mmzone.h>
#include <linux/memory-tiers.h>

#include "internal.h"
@@ -141,12 +142,18 @@ static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memty

static struct memory_tier *__node_get_memory_tier(int node)
{
	struct memory_dev_type *memtype;
	pg_data_t *pgdat;

	memtype = node_memory_types[node];
	if (memtype && node_isset(node, memtype->nodes))
		return memtype->memtier;
	pgdat = NODE_DATA(node);
	if (!pgdat)
		return NULL;
	/*
	 * Since we hold memory_tier_lock, we can avoid
	 * RCU read locks when accessing the details. No
	 * parallel updates are possible here.
	 */
	return rcu_dereference_check(pgdat->memtier,
				     lockdep_is_held(&memory_tier_lock));
}

#ifdef CONFIG_MIGRATION
@@ -309,6 +316,8 @@ static struct memory_tier *set_node_memory_tier(int node)
{
	struct memory_tier *memtier;
	struct memory_dev_type *memtype;
	pg_data_t *pgdat = NODE_DATA(node);


	lockdep_assert_held_once(&memory_tier_lock);

@@ -320,24 +329,45 @@ static struct memory_tier *set_node_memory_tier(int node)
	memtype = node_memory_types[node].memtype;
	node_set(node, memtype->nodes);
	memtier = find_create_memory_tier(memtype);
	if (!IS_ERR(memtier))
		rcu_assign_pointer(pgdat->memtier, memtier);
	return memtier;
}

static void destroy_memory_tier(struct memory_tier *memtier)
{
	list_del(&memtier->list);
	/*
	 * synchronize_rcu in clear_node_memory_tier makes sure
	 * we don't have rcu access to this memory tier.
	 */
	kfree(memtier);
}

static bool clear_node_memory_tier(int node)
{
	bool cleared = false;
	pg_data_t *pgdat;
	struct memory_tier *memtier;

	pgdat = NODE_DATA(node);
	if (!pgdat)
		return false;

	/*
	 * Make sure that anybody looking at NODE_DATA who finds
	 * a valid memtier finds memory_dev_types with nodes still
	 * linked to the memtier. We achieve this by waiting for
	 * rcu read section to finish using synchronize_rcu.
	 * This also enables us to free the destroyed memory tier
	 * with kfree instead of kfree_rcu
	 */
	memtier = __node_get_memory_tier(node);
	if (memtier) {
		struct memory_dev_type *memtype;

		rcu_assign_pointer(pgdat->memtier, NULL);
		synchronize_rcu();
		memtype = node_memory_types[node].memtype;
		node_clear(node, memtype->nodes);
		if (nodes_empty(memtype->nodes)) {