Commit 2e5fd489 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull libnvdimm updates from Dan Williams:

 - Fix a race condition in the teardown path of raw mode pmem
   namespaces.

 - Cleanup the code that filesystems use to detect filesystem-dax
   capabilities of their underlying block device.

* tag 'libnvdimm-for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  dax: remove bdev_dax_supported
  xfs: factor out a xfs_buftarg_is_dax helper
  dax: stub out dax_supported for !CONFIG_FS_DAX
  dax: remove __generic_fsdax_supported
  dax: move the dax_read_lock() locking into dax_supported
  dax: mark dax_get_by_host static
  dm: use fs_dax_get_by_bdev instead of dax_get_by_host
  dax: stop using bdevname
  fsdax: improve the FS_DAX Kconfig description and help text
  libnvdimm/pmem: Fix crash triggered when I/O in-flight during unbind
parents 4b105f4a 3fc37253
Loading
Loading
Loading
Loading
+73 −118
Original line number Diff line number Diff line
@@ -17,6 +17,24 @@
#include <linux/fs.h>
#include "dax-private.h"

/**
 * struct dax_device - anchor object for dax services
 * @inode: core vfs
 * @cdev: optional character interface for "device dax"
 * @host: optional name for lookups where the device path is not available
 * @private: dax driver private data
 * @flags: state and boolean properties
 */
struct dax_device {
	struct hlist_node list;
	struct inode inode;
	struct cdev cdev;
	const char *host;
	void *private;
	unsigned long flags;
	const struct dax_operations *ops;
};

static dev_t dax_devt;
DEFINE_STATIC_SRCU(dax_srcu);
static struct vfsmount *dax_mnt;
@@ -40,6 +58,42 @@ void dax_read_unlock(int id)
}
EXPORT_SYMBOL_GPL(dax_read_unlock);

static int dax_host_hash(const char *host)
{
	return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
}

/**
 * dax_get_by_host() - temporary lookup mechanism for filesystem-dax
 * @host: alternate name for the device registered by a dax driver
 */
static struct dax_device *dax_get_by_host(const char *host)
{
	struct dax_device *dax_dev, *found = NULL;
	int hash, id;

	if (!host)
		return NULL;

	hash = dax_host_hash(host);

	id = dax_read_lock();
	spin_lock(&dax_host_lock);
	hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) {
		if (!dax_alive(dax_dev)
				|| strcmp(host, dax_dev->host) != 0)
			continue;

		if (igrab(&dax_dev->inode))
			found = dax_dev;
		break;
	}
	spin_unlock(&dax_host_lock);
	dax_read_unlock(id);

	return found;
}

#ifdef CONFIG_BLOCK
#include <linux/blkdev.h>

@@ -65,15 +119,13 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
	return dax_get_by_host(bdev->bd_disk->disk_name);
}
EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
#endif

bool __generic_fsdax_supported(struct dax_device *dax_dev,
bool generic_fsdax_supported(struct dax_device *dax_dev,
		struct block_device *bdev, int blocksize, sector_t start,
		sector_t sectors)
{
	bool dax_enabled = false;
	pgoff_t pgoff, pgoff_end;
	char buf[BDEVNAME_SIZE];
	void *kaddr, *end_kaddr;
	pfn_t pfn, end_pfn;
	sector_t last_page;
@@ -81,29 +133,25 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
	int err, id;

	if (blocksize != PAGE_SIZE) {
		pr_info("%s: error: unsupported blocksize for dax\n",
				bdevname(bdev, buf));
		pr_info("%pg: error: unsupported blocksize for dax\n", bdev);
		return false;
	}

	if (!dax_dev) {
		pr_debug("%s: error: dax unsupported by block device\n",
				bdevname(bdev, buf));
		pr_debug("%pg: error: dax unsupported by block device\n", bdev);
		return false;
	}

	err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
	if (err) {
		pr_info("%s: error: unaligned partition for dax\n",
				bdevname(bdev, buf));
		pr_info("%pg: error: unaligned partition for dax\n", bdev);
		return false;
	}

	last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
	err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
	if (err) {
		pr_info("%s: error: unaligned partition for dax\n",
				bdevname(bdev, buf));
		pr_info("%pg: error: unaligned partition for dax\n", bdev);
		return false;
	}

@@ -112,8 +160,8 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
	len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);

	if (len < 1 || len2 < 1) {
		pr_info("%s: error: dax access failed (%ld)\n",
				bdevname(bdev, buf), len < 1 ? len : len2);
		pr_info("%pg: error: dax access failed (%ld)\n",
				bdev, len < 1 ? len : len2);
		dax_read_unlock(id);
		return false;
	}
@@ -147,57 +195,32 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
	dax_read_unlock(id);

	if (!dax_enabled) {
		pr_info("%s: error: dax support not enabled\n",
				bdevname(bdev, buf));
		pr_info("%pg: error: dax support not enabled\n", bdev);
		return false;
	}
	return true;
}
EXPORT_SYMBOL_GPL(__generic_fsdax_supported);
EXPORT_SYMBOL_GPL(generic_fsdax_supported);

/**
 * __bdev_dax_supported() - Check if the device supports dax for filesystem
 * @bdev: block device to check
 * @blocksize: The block size of the device
 *
 * This is a library function for filesystems to check if the block device
 * can be mounted with dax option.
 *
 * Return: true if supported, false if unsupported
 */
bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
		int blocksize, sector_t start, sector_t len)
{
	struct dax_device *dax_dev;
	struct request_queue *q;
	char buf[BDEVNAME_SIZE];
	bool ret;
	bool ret = false;
	int id;

	q = bdev_get_queue(bdev);
	if (!q || !blk_queue_dax(q)) {
		pr_debug("%s: error: request queue doesn't support dax\n",
				bdevname(bdev, buf));
		return false;
	}

	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
	if (!dax_dev) {
		pr_debug("%s: error: device does not support dax\n",
				bdevname(bdev, buf));
	if (!dax_dev)
		return false;
	}

	id = dax_read_lock();
	ret = dax_supported(dax_dev, bdev, blocksize, 0,
			i_size_read(bdev->bd_inode) / 512);
	if (dax_alive(dax_dev) && dax_dev->ops->dax_supported)
		ret = dax_dev->ops->dax_supported(dax_dev, bdev, blocksize,
						  start, len);
	dax_read_unlock(id);

	put_dax(dax_dev);

	return ret;
}
EXPORT_SYMBOL_GPL(__bdev_dax_supported);
#endif
EXPORT_SYMBOL_GPL(dax_supported);
#endif /* CONFIG_FS_DAX */
#endif /* CONFIG_BLOCK */

enum dax_device_flags {
	/* !alive + rcu grace period == no new operations / mappings */
@@ -208,24 +231,6 @@ enum dax_device_flags {
	DAXDEV_SYNC,
};

/**
 * struct dax_device - anchor object for dax services
 * @inode: core vfs
 * @cdev: optional character interface for "device dax"
 * @host: optional name for lookups where the device path is not available
 * @private: dax driver private data
 * @flags: state and boolean properties
 */
struct dax_device {
	struct hlist_node list;
	struct inode inode;
	struct cdev cdev;
	const char *host;
	void *private;
	unsigned long flags;
	const struct dax_operations *ops;
};

static ssize_t write_cache_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
@@ -323,19 +328,6 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
}
EXPORT_SYMBOL_GPL(dax_direct_access);

bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
		int blocksize, sector_t start, sector_t len)
{
	if (!dax_dev)
		return false;

	if (!dax_alive(dax_dev))
		return false;

	return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len);
}
EXPORT_SYMBOL_GPL(dax_supported);

size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
		size_t bytes, struct iov_iter *i)
{
@@ -423,11 +415,6 @@ bool dax_alive(struct dax_device *dax_dev)
}
EXPORT_SYMBOL_GPL(dax_alive);

static int dax_host_hash(const char *host)
{
	return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
}

/*
 * Note, rcu is not protecting the liveness of dax_dev, rcu is ensuring
 * that any fault handlers or operations that might have seen
@@ -624,38 +611,6 @@ void put_dax(struct dax_device *dax_dev)
}
EXPORT_SYMBOL_GPL(put_dax);

/**
 * dax_get_by_host() - temporary lookup mechanism for filesystem-dax
 * @host: alternate name for the device registered by a dax driver
 */
struct dax_device *dax_get_by_host(const char *host)
{
	struct dax_device *dax_dev, *found = NULL;
	int hash, id;

	if (!host)
		return NULL;

	hash = dax_host_hash(host);

	id = dax_read_lock();
	spin_lock(&dax_host_lock);
	hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) {
		if (!dax_alive(dax_dev)
				|| strcmp(host, dax_dev->host) != 0)
			continue;

		if (igrab(&dax_dev->inode))
			found = dax_dev;
		break;
	}
	spin_unlock(&dax_host_lock);
	dax_read_unlock(id);

	return found;
}
EXPORT_SYMBOL_GPL(dax_get_by_host);

/**
 * inode_dax: convert a public inode into its dax_dev
 * @inode: An inode with i_cdev pointing to a dax_dev
+2 −7
Original line number Diff line number Diff line
@@ -809,14 +809,9 @@ EXPORT_SYMBOL_GPL(dm_table_set_type);
int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
			sector_t start, sector_t len, void *data)
{
	int blocksize = *(int *) data, id;
	bool rc;
	int blocksize = *(int *) data;

	id = dax_read_lock();
	rc = !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
	dax_read_unlock(id);

	return rc;
	return !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
}

/* Check devices support synchronous DAX */
+1 −1
Original line number Diff line number Diff line
@@ -654,7 +654,7 @@ static int open_table_device(struct table_device *td, dev_t dev,
	}

	td->dm_dev.bdev = bdev;
	td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
	td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev);
	return 0;
}

+2 −2
Original line number Diff line number Diff line
@@ -450,11 +450,11 @@ static int pmem_attach_disk(struct device *dev,
		pmem->pfn_flags |= PFN_MAP;
		bb_range = pmem->pgmap.range;
	} else {
		addr = devm_memremap(dev, pmem->phys_addr,
				pmem->size, ARCH_MEMREMAP_PMEM);
		if (devm_add_action_or_reset(dev, pmem_release_queue,
					&pmem->pgmap))
			return -ENOMEM;
		addr = devm_memremap(dev, pmem->phys_addr,
				pmem->size, ARCH_MEMREMAP_PMEM);
		bb_range.start =  res->start;
		bb_range.end = res->end;
	}
+18 −3
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@ source "fs/f2fs/Kconfig"
source "fs/zonefs/Kconfig"

config FS_DAX
	bool "Direct Access (DAX) support"
	bool "File system based Direct Access (DAX) support"
	depends on MMU
	depends on !(ARM || MIPS || SPARC)
	select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
@@ -53,8 +53,23 @@ config FS_DAX
	  Direct Access (DAX) can be used on memory-backed block devices.
	  If the block device supports DAX and the filesystem supports DAX,
	  then you can avoid using the pagecache to buffer I/Os.  Turning
	  on this option will compile in support for DAX; you will need to
	  mount the filesystem using the -o dax option.
	  on this option will compile in support for DAX.

	  For a DAX device to support file system access it needs to have
	  struct pages.  For the nfit based NVDIMMs this can be enabled
	  using the ndctl utility:

		# ndctl create-namespace --force --reconfig=namespace0.0 \
			--mode=fsdax --map=mem

	  See the 'create-namespace' man page for details on the overhead of
	  --map=mem:
	  https://docs.pmem.io/ndctl-user-guide/ndctl-man-pages/ndctl-create-namespace

          For ndctl to work CONFIG_DEV_DAX needs to be enabled as well. For most
	  file systems DAX support needs to be manually enabled globally or
	  per-inode using a mount option as well.  See the file documentation in
	  Documentation/filesystems/dax.rst for details.

	  If you do not have a block device that is capable of using this,
	  or if unsure, say N.  Saying Y will increase the size of the kernel
Loading