Commit ee96dd96 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull libnvdimm updates from Dan Williams:
 "The update for this cycle includes the deprecation of block-aperture
  mode and a new perf events interface for the papr_scm nvdimm driver.

  The perf events approach was acked by PeterZ.

   - Add perf support for nvdimm events, initially only for 'papr_scm'
     devices.

   - Deprecate the 'block aperture' support in libnvdimm, it only ever
     existed in the specification, not in shipping product"

* tag 'libnvdimm-for-5.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  nvdimm/blk: Fix title level
  MAINTAINERS: remove section LIBNVDIMM BLK: MMIO-APERTURE DRIVER
  powerpc/papr_scm: Fix build failure when
  drivers/nvdimm: Fix build failure when CONFIG_PERF_EVENTS is not set
  nvdimm/region: Delete nd_blk_region infrastructure
  ACPI: NFIT: Remove block aperture support
  nvdimm/namespace: Delete nd_namespace_blk
  nvdimm/namespace: Delete blk namespace consideration in shared paths
  nvdimm/blk: Delete the block-aperture window driver
  nvdimm/region: Fix default alignment for small regions
  docs: ABI: sysfs-bus-nvdimm: Document sysfs event format entries for nvdimm pmu
  powerpc/papr_scm: Add perf interface support
  drivers/nvdimm: Add perf interface to expose nvdimm performance stats
  drivers/nvdimm: Add nvdimm pmu structure
parents d888c83f ada8d8d3
Loading
Loading
Loading
Loading
+35 −0
Original line number Diff line number Diff line
@@ -6,3 +6,38 @@ Description:

The libnvdimm sub-system implements a common sysfs interface for
platform nvdimm resources. See Documentation/driver-api/nvdimm/.

What:           /sys/bus/event_source/devices/nmemX/format
Date:           February 2022
KernelVersion:  5.18
Contact:        Kajol Jain <kjain@linux.ibm.com>
Description:	(RO) Attribute group to describe the magic bits
		that go into perf_event_attr.config for a particular pmu.
		(See ABI/testing/sysfs-bus-event_source-devices-format).

		Each attribute under this group defines a bit range of the
		perf_event_attr.config. Supported attribute is listed
		below::
		  event  = "config:0-4"  - event ID

		For example::
			ctl_res_cnt = "event=0x1"

What:           /sys/bus/event_source/devices/nmemX/events
Date:           February 2022
KernelVersion:  5.18
Contact:        Kajol Jain <kjain@linux.ibm.com>
Description:	(RO) Attribute group to describe performance monitoring events
                for the nvdimm memory device. Each attribute in this group
                describes a single performance monitoring event supported by
                this nvdimm pmu.  The name of the file is the name of the event.
                (See ABI/testing/sysfs-bus-event_source-devices-events). A
                listing of the events supported by a given nvdimm provider type
                can be found in Documentation/driver-api/nvdimm/$provider.

What:          /sys/bus/event_source/devices/nmemX/cpumask
Date:          February 2022
KernelVersion:  5.18
Contact:        Kajol Jain <kjain@linux.ibm.com>
Description:	(RO) This sysfs file exposes the cpumask which is designated to
		to retrieve nvdimm pmu event counter data.
+88 −318

File changed.

Preview size limit exceeded, changes collapsed.

+0 −11
Original line number Diff line number Diff line
@@ -11121,17 +11121,6 @@ F: drivers/ata/
F:	include/linux/ata.h
F:	include/linux/libata.h
LIBNVDIMM BLK: MMIO-APERTURE DRIVER
M:	Dan Williams <dan.j.williams@intel.com>
M:	Vishal Verma <vishal.l.verma@intel.com>
M:	Dave Jiang <dave.jiang@intel.com>
L:	nvdimm@lists.linux.dev
S:	Supported
Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
P:	Documentation/nvdimm/maintainer-entry-profile.rst
F:	drivers/nvdimm/blk.c
F:	drivers/nvdimm/region_devs.c
LIBNVDIMM BTT: BLOCK TRANSLATION TABLE
M:	Vishal Verma <vishal.l.verma@intel.com>
M:	Dan Williams <dan.j.williams@intel.com>
+5 −0
Original line number Diff line number Diff line
@@ -48,6 +48,11 @@ struct dev_archdata {

struct pdev_archdata {
	u64 dma_mask;
	/*
	 * Pointer to nvdimm_pmu structure, to handle the unregistering
	 * of pmu device
	 */
	void *priv;
};

#endif /* _ASM_POWERPC_DEVICE_H */
+229 −0
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@
#include <asm/papr_pdsm.h>
#include <asm/mce.h>
#include <asm/unaligned.h>
#include <linux/perf_event.h>

#define BIND_ANY_ADDR (~0ul)

@@ -124,6 +125,8 @@ struct papr_scm_priv {
	/* The bits which needs to be overridden */
	u64 health_bitmap_inject_mask;

	 /* array to have event_code and stat_id mappings */
	char **nvdimm_events_map;
};

static int papr_scm_pmem_flush(struct nd_region *nd_region,
@@ -344,6 +347,225 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
	return 0;
}

#ifdef CONFIG_PERF_EVENTS
#define to_nvdimm_pmu(_pmu)	container_of(_pmu, struct nvdimm_pmu, pmu)

static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count)
{
	struct papr_scm_perf_stat *stat;
	struct papr_scm_perf_stats *stats;
	struct papr_scm_priv *p = (struct papr_scm_priv *)dev->driver_data;
	int rc, size;

	/* Allocate request buffer enough to hold single performance stat */
	size = sizeof(struct papr_scm_perf_stats) +
		sizeof(struct papr_scm_perf_stat);

	if (!p || !p->nvdimm_events_map)
		return -EINVAL;

	stats = kzalloc(size, GFP_KERNEL);
	if (!stats)
		return -ENOMEM;

	stat = &stats->scm_statistic[0];
	memcpy(&stat->stat_id,
	       p->nvdimm_events_map[event->attr.config],
		sizeof(stat->stat_id));
	stat->stat_val = 0;

	rc = drc_pmem_query_stats(p, stats, 1);
	if (rc < 0) {
		kfree(stats);
		return rc;
	}

	*count = be64_to_cpu(stat->stat_val);
	kfree(stats);
	return 0;
}

static int papr_scm_pmu_event_init(struct perf_event *event)
{
	struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
	struct papr_scm_priv *p;

	if (!nd_pmu)
		return -EINVAL;

	/* test the event attr type for PMU enumeration */
	if (event->attr.type != event->pmu->type)
		return -ENOENT;

	/* it does not support event sampling mode */
	if (is_sampling_event(event))
		return -EOPNOTSUPP;

	/* no branch sampling */
	if (has_branch_stack(event))
		return -EOPNOTSUPP;

	p = (struct papr_scm_priv *)nd_pmu->dev->driver_data;
	if (!p)
		return -EINVAL;

	/* Invalid eventcode */
	if (event->attr.config == 0 || event->attr.config > 16)
		return -EINVAL;

	return 0;
}

static int papr_scm_pmu_add(struct perf_event *event, int flags)
{
	u64 count;
	int rc;
	struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);

	if (!nd_pmu)
		return -EINVAL;

	if (flags & PERF_EF_START) {
		rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &count);
		if (rc)
			return rc;

		local64_set(&event->hw.prev_count, count);
	}

	return 0;
}

static void papr_scm_pmu_read(struct perf_event *event)
{
	u64 prev, now;
	int rc;
	struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);

	if (!nd_pmu)
		return;

	rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &now);
	if (rc)
		return;

	prev = local64_xchg(&event->hw.prev_count, now);
	local64_add(now - prev, &event->count);
}

static void papr_scm_pmu_del(struct perf_event *event, int flags)
{
	papr_scm_pmu_read(event);
}

static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu *nd_pmu)
{
	struct papr_scm_perf_stat *stat;
	struct papr_scm_perf_stats *stats;
	char *statid;
	int index, rc, count;
	u32 available_events;

	if (!p->stat_buffer_len)
		return -ENOENT;

	available_events = (p->stat_buffer_len  - sizeof(struct papr_scm_perf_stats))
			/ sizeof(struct papr_scm_perf_stat);

	/* Allocate the buffer for phyp where stats are written */
	stats = kzalloc(p->stat_buffer_len, GFP_KERNEL);
	if (!stats) {
		rc = -ENOMEM;
		return rc;
	}

	/* Allocate memory to nvdimm_event_map */
	p->nvdimm_events_map = kcalloc(available_events, sizeof(char *), GFP_KERNEL);
	if (!p->nvdimm_events_map) {
		rc = -ENOMEM;
		goto out_stats;
	}

	/* Called to get list of events supported */
	rc = drc_pmem_query_stats(p, stats, 0);
	if (rc)
		goto out_nvdimm_events_map;

	for (index = 0, stat = stats->scm_statistic, count = 0;
		     index < available_events; index++, ++stat) {
		statid = kzalloc(strlen(stat->stat_id) + 1, GFP_KERNEL);
		if (!statid) {
			rc = -ENOMEM;
			goto out_nvdimm_events_map;
		}

		strcpy(statid, stat->stat_id);
		p->nvdimm_events_map[count] = statid;
		count++;
	}
	p->nvdimm_events_map[count] = NULL;
	kfree(stats);
	return 0;

out_nvdimm_events_map:
	kfree(p->nvdimm_events_map);
out_stats:
	kfree(stats);
	return rc;
}

static void papr_scm_pmu_register(struct papr_scm_priv *p)
{
	struct nvdimm_pmu *nd_pmu;
	int rc, nodeid;

	nd_pmu = kzalloc(sizeof(*nd_pmu), GFP_KERNEL);
	if (!nd_pmu) {
		rc = -ENOMEM;
		goto pmu_err_print;
	}

	rc = papr_scm_pmu_check_events(p, nd_pmu);
	if (rc)
		goto pmu_check_events_err;

	nd_pmu->pmu.task_ctx_nr = perf_invalid_context;
	nd_pmu->pmu.name = nvdimm_name(p->nvdimm);
	nd_pmu->pmu.event_init = papr_scm_pmu_event_init;
	nd_pmu->pmu.read = papr_scm_pmu_read;
	nd_pmu->pmu.add = papr_scm_pmu_add;
	nd_pmu->pmu.del = papr_scm_pmu_del;

	nd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_INTERRUPT |
				PERF_PMU_CAP_NO_EXCLUDE;

	/*updating the cpumask variable */
	nodeid = numa_map_to_online_node(dev_to_node(&p->pdev->dev));
	nd_pmu->arch_cpumask = *cpumask_of_node(nodeid);

	rc = register_nvdimm_pmu(nd_pmu, p->pdev);
	if (rc)
		goto pmu_register_err;

	/*
	 * Set archdata.priv value to nvdimm_pmu structure, to handle the
	 * unregistering of pmu device.
	 */
	p->pdev->archdata.priv = nd_pmu;
	return;

pmu_register_err:
	kfree(p->nvdimm_events_map);
pmu_check_events_err:
	kfree(nd_pmu);
pmu_err_print:
	dev_info(&p->pdev->dev, "nvdimm pmu didn't register rc=%d\n", rc);
}

#else
static void papr_scm_pmu_register(struct papr_scm_priv *p) { }
#endif

/*
 * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the
 * health information.
@@ -1320,6 +1542,7 @@ static int papr_scm_probe(struct platform_device *pdev)
		goto err2;

	platform_set_drvdata(pdev, p);
	papr_scm_pmu_register(p);

	return 0;

@@ -1338,6 +1561,12 @@ static int papr_scm_remove(struct platform_device *pdev)

	nvdimm_bus_unregister(p->bus);
	drc_pmem_unbind(p);

	if (pdev->archdata.priv)
		unregister_nvdimm_pmu(pdev->archdata.priv);

	pdev->archdata.priv = NULL;
	kfree(p->nvdimm_events_map);
	kfree(p->bus_desc.provider_name);
	kfree(p);

Loading