Commit 0695e18b authored by Ze Zuo's avatar Ze Zuo
Browse files

mm: Add PMU based memory sampling abstract layer

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I9GZAQ


CVE: NA

--------------------------------

Add mem_sampling abstract layer to provide hardware mem access
for kernel features, e.g., NUMA balancing or DAMON.

Abstract layer mem_sampling provides an interface to start the
sampling of hardware pmu on current cpu and provides
callback registrations to subscribe access information (e.g.,
for NUMA balancing in subsequent patches). Internally,
mem_sampling registers a callback in specific pmu driver which
forwards the captured records to higher-level through
registered callbacks. Sampling actions are also managed by
hw_pmu layer. CONFIG_MEM_SAMPLING is added to enable the
mem_sampling layer.

For now, mem_sampling only supports SPE driver. New hardware pmu
support could be added in mem_sampling with no need to adjust
higher-level kernel feature code.

Signed-off-by: default avatarZe Zuo <zuoze1@huawei.com>
Signed-off-by: default avatarTong Tiangen <tongtiangen@huawei.com>
Signed-off-by: default avatarShuang Yan <yanshuang7@huawei.com>
parent f4d2c234
Loading
Loading
Loading
Loading
+19 −2
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#include <linux/of_irq.h>
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
#include <linux/mem_sampling.h>

#include "spe-decoder/arm-spe-decoder.h"
#include "spe-decoder/arm-spe-pkt-decoder.h"
@@ -31,6 +32,12 @@ static enum cpuhp_state arm_spe_online;

DEFINE_PER_CPU(struct arm_spe_buf, per_cpu_spe_buf);

mem_sampling_cb_type arm_spe_sampling_cb;
void arm_spe_record_capture_callback_register(mem_sampling_cb_type cb)
{
	arm_spe_sampling_cb = cb;
}

static inline int arm_spe_per_buffer_alloc(int cpu)
{
	struct arm_spe_buf *spe_buf = &per_cpu(per_cpu_spe_buf, cpu);
@@ -376,6 +383,16 @@ static irqreturn_t arm_spe_irq_handler(int irq, void *dev)
	case SPE_PMU_BUF_FAULT_ACT_OK:
		spe_buf->nr_records = 0;
		arm_spe_decode_buf(spe_buf->cur, spe_buf->size);

		/*
		 * Callback function processing record data.
		 * Call one: arm_spe_sampling_cb - mem_sampling layer.
		 * TODO: use per CPU workqueue to process data and reduce
		 * interrupt processing time
		 */
		if (arm_spe_sampling_cb)
			arm_spe_sampling_cb((struct mem_sampling_record *)spe_buf->record_base,
						   spe_buf->nr_records);
		break;

	case SPE_PMU_BUF_FAULT_ACT_SPURIOUS:
@@ -663,7 +680,7 @@ static void arm_spe_sample_para_init(void)
void arm_spe_record_enqueue(struct arm_spe_record *record)
{
	struct arm_spe_buf *spe_buf = this_cpu_ptr(&per_cpu_spe_buf);
	struct arm_spe_record *record_tail;
	struct mem_sampling_record *record_tail;

	if (spe_buf->nr_records >= SPE_RECORD_BUFFER_MAX_RECORDS) {
		pr_err("nr_records exceeded!\n");
@@ -672,7 +689,7 @@ void arm_spe_record_enqueue(struct arm_spe_record *record)

	record_tail = spe_buf->record_base +
			spe_buf->nr_records * SPE_RECORD_ENTRY_SIZE;
	*record_tail = *(struct arm_spe_record *)record;
	*record_tail = *(struct mem_sampling_record *)record;
	spe_buf->nr_records++;

}
+1 −1
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@
#define SPE_SAMPLE_PERIOD		1024

#define SPE_RECORD_BUFFER_MAX_RECORDS	(100)
#define SPE_RECORD_ENTRY_SIZE		sizeof(struct arm_spe_record)
#define SPE_RECORD_ENTRY_SIZE		sizeof(struct mem_sampling_record)

#define SPE_PMU_FEAT_FILT_EVT		(1UL << 0)
#define SPE_PMU_FEAT_FILT_TYP		(1UL << 1)
+97 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * mem_sampling.h: declare the mem_sampling abstract layer and provide
 * unified pmu sampling for NUMA, DAMON, etc.
 *
 * Sample records are converted to mem_sampling_record, and then
 * mem_sampling_record_captured_cb_type invoke the callbacks to
 * pass the record.
 *
 * Copyright (c) 2024-2025, Huawei Technologies Ltd.
 */
#ifndef __MEM_SAMPLING_H
#define __MEM_SAMPLING_H

enum mem_sampling_sample_type {
	MEM_SAMPLING_L1D_ACCESS		= 1 << 0,
	MEM_SAMPLING_L1D_MISS		= 1 << 1,
	MEM_SAMPLING_LLC_ACCESS		= 1 << 2,
	MEM_SAMPLING_LLC_MISS		= 1 << 3,
	MEM_SAMPLING_TLB_ACCESS		= 1 << 4,
	MEM_SAMPLING_TLB_MISS		= 1 << 5,
	MEM_SAMPLING_BRANCH_MISS	= 1 << 6,
	MEM_SAMPLING_REMOTE_ACCESS	= 1 << 7,
};

enum mem_sampling_op_type {
	MEM_SAMPLING_LD	= 1 << 0,
	MEM_SAMPLING_ST	= 1 << 1,
};

struct mem_sampling_record {
	enum mem_sampling_sample_type	type;
	int				err;
	u32				op;
	u32				latency;
	u64				from_ip;
	u64				to_ip;
	u64				timestamp;
	u64				virt_addr;
	u64				phys_addr;
	u64				context_id;
	u16				source;
};

/*
 * Callbacks should be registered using mem_sampling_record_cb_register()
 * by NUMA, DAMON and etc during their initialisation.
 * Callbacks will be invoked on new hardware pmu records caputured.
 */
typedef void (*mem_sampling_record_cb_type)(struct mem_sampling_record *record);
void mem_sampling_record_cb_register(mem_sampling_record_cb_type cb);
void mem_sampling_record_cb_unregister(mem_sampling_record_cb_type cb);

#ifdef CONFIG_MEM_SAMPLING
void mem_sampling_sched_in(struct task_struct *prev, struct task_struct *curr);
#else
static inline void mem_sampling_sched_in(struct task_struct *prev, struct task_struct *curr) { }
#endif

/* invoked by specific mem_sampling */
typedef void (*mem_sampling_cb_type)(struct mem_sampling_record *record_base,
				     int n_records);

struct mem_sampling_ops_struct {
	int (*sampling_start)(void);
	void (*sampling_stop)(void);
	void (*sampling_continue)(void);
};
extern struct mem_sampling_ops_struct mem_sampling_ops;

enum mem_sampling_type_enum {
	MEM_SAMPLING_ARM_SPE,
	MEM_SAMPLING_UNSUPPORTED
};

#ifdef CONFIG_ARM_SPE_MEM_SAMPLING
int arm_spe_start(void);
void arm_spe_stop(void);
void arm_spe_continue(void);
int arm_spe_enabled(void);
void arm_spe_record_capture_callback_register(mem_sampling_cb_type cb);
#else
static inline void arm_spe_stop(void) { }
static inline void arm_spe_continue(void) { }
static inline void arm_spe_record_capture_callback_register(mem_sampling_cb_type cb) { }

static inline int arm_spe_start(void)
{
	return 0;
}

static inline int arm_spe_enabled(void)
{
	return 0;
}
#endif /* CONFIG_ARM_SPE_MEM_SAMPLING */
#endif	/* __MEM_SAMPLING_H */
+11 −0
Original line number Diff line number Diff line
@@ -1014,6 +1014,17 @@ config EXTEND_HUGEPAGE_MAPPING
	help
	  Introduce vmalloc/vmap/remap interfaces that handle only hugepages.

config MEM_SAMPLING
	bool "Use hardware memory sampling for kernel features(NUMA, DAMON, etc.)"
	default n
	depends on ARM64
	select ARM_SPE_MEM_SAMPLING if ARM64
	help
	  Memory sampling is primarily based on specific hardware capabilities,
	  which enable hardware PMUs to sample memory access for use by kernel
	  features. It requires at least one hardware pmu (e.g. ARM_SPE_MEM_SAMPLING) to
	  be enabled.

source "mm/damon/Kconfig"

endmenu
+1 −0
Original line number Diff line number Diff line
@@ -133,3 +133,4 @@ obj-$(CONFIG_MEMORY_RELIABLE) += mem_reliable.o
obj-$(CONFIG_MEMCG_MEMFS_INFO) += memcg_memfs_info.o
obj-$(CONFIG_PAGE_CACHE_LIMIT) += page_cache_limit.o
obj-$(CONFIG_CLEAR_FREELIST_PAGE) += clear_freelist_page.o
obj-$(CONFIG_MEM_SAMPLING) += mem_sampling.o
Loading