Commit 6faddda6 authored by Chuck Lever's avatar Chuck Lever Committed by Christian Brauner
Browse files

libfs: Add directory operations for stable offsets



Create a vector of directory operations in fs/libfs.c that handles
directory seeks and readdir via stable offsets instead of the
current cursor-based mechanism.

For the moment these are unused.

Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
Message-Id: <168814732984.530310.11190772066786107220.stgit@manet.1015granger.net>
Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent 509f0069
Loading
Loading
Loading
Loading
+5 −3
Original line number Diff line number Diff line
@@ -85,13 +85,14 @@ prototypes::
			    struct dentry *dentry, struct fileattr *fa);
	int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
	struct posix_acl * (*get_acl)(struct mnt_idmap *, struct dentry *, int);
	struct offset_ctx *(*get_offset_ctx)(struct inode *inode);

locking rules:
	all may block

==============	=============================================
==============	==================================================
ops		i_rwsem(inode)
==============	=============================================
==============	==================================================
lookup:		shared
create:		exclusive
link:		exclusive (both)
@@ -115,7 +116,8 @@ atomic_open: shared (exclusive if O_CREAT is set in open flags)
tmpfile:	no
fileattr_get:	no or exclusive
fileattr_set:	exclusive
==============	=============================================
get_offset_ctx  no
==============	==================================================


	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem
+5 −1
Original line number Diff line number Diff line
@@ -515,6 +515,7 @@ As of kernel 2.6.22, the following members are defined:
		int (*fileattr_set)(struct mnt_idmap *idmap,
				    struct dentry *dentry, struct fileattr *fa);
		int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
	        struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
	};

Again, all methods are called without any locks being held, unless
@@ -675,7 +676,10 @@ otherwise noted.
	called on ioctl(FS_IOC_SETFLAGS) and ioctl(FS_IOC_FSSETXATTR) to
	change miscellaneous file flags and attributes.  Callers hold
	i_rwsem exclusive.  If unset, then fall back to f_op->ioctl().

``get_offset_ctx``
	called to get the offset context for a directory inode. A
        filesystem must define this operation to use
        simple_offset_dir_operations.

The Address Space Object
========================
+248 −0
Original line number Diff line number Diff line
@@ -239,6 +239,254 @@ const struct inode_operations simple_dir_inode_operations = {
};
EXPORT_SYMBOL(simple_dir_inode_operations);

static void offset_set(struct dentry *dentry, u32 offset)
{
	dentry->d_fsdata = (void *)((uintptr_t)(offset));
}

static u32 dentry2offset(struct dentry *dentry)
{
	return (u32)((uintptr_t)(dentry->d_fsdata));
}

/**
 * simple_offset_init - initialize an offset_ctx
 * @octx: directory offset map to be initialized
 *
 */
void simple_offset_init(struct offset_ctx *octx)
{
	xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1);

	/* 0 is '.', 1 is '..', so always start with offset 2 */
	octx->next_offset = 2;
}

/**
 * simple_offset_add - Add an entry to a directory's offset map
 * @octx: directory offset ctx to be updated
 * @dentry: new dentry being added
 *
 * Returns zero on success. @so_ctx and the dentry offset are updated.
 * Otherwise, a negative errno value is returned.
 */
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
{
	static const struct xa_limit limit = XA_LIMIT(2, U32_MAX);
	u32 offset;
	int ret;

	if (dentry2offset(dentry) != 0)
		return -EBUSY;

	ret = xa_alloc_cyclic(&octx->xa, &offset, dentry, limit,
			      &octx->next_offset, GFP_KERNEL);
	if (ret < 0)
		return ret;

	offset_set(dentry, offset);
	return 0;
}

/**
 * simple_offset_remove - Remove an entry to a directory's offset map
 * @octx: directory offset ctx to be updated
 * @dentry: dentry being removed
 *
 */
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
{
	u32 offset;

	offset = dentry2offset(dentry);
	if (offset == 0)
		return;

	xa_erase(&octx->xa, offset);
	offset_set(dentry, 0);
}

/**
 * simple_offset_rename_exchange - exchange rename with directory offsets
 * @old_dir: parent of dentry being moved
 * @old_dentry: dentry being moved
 * @new_dir: destination parent
 * @new_dentry: destination dentry
 *
 * Returns zero on success. Otherwise a negative errno is returned and the
 * rename is rolled back.
 */
int simple_offset_rename_exchange(struct inode *old_dir,
				  struct dentry *old_dentry,
				  struct inode *new_dir,
				  struct dentry *new_dentry)
{
	struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
	struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
	u32 old_index = dentry2offset(old_dentry);
	u32 new_index = dentry2offset(new_dentry);
	int ret;

	simple_offset_remove(old_ctx, old_dentry);
	simple_offset_remove(new_ctx, new_dentry);

	ret = simple_offset_add(new_ctx, old_dentry);
	if (ret)
		goto out_restore;

	ret = simple_offset_add(old_ctx, new_dentry);
	if (ret) {
		simple_offset_remove(new_ctx, old_dentry);
		goto out_restore;
	}

	ret = simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
	if (ret) {
		simple_offset_remove(new_ctx, old_dentry);
		simple_offset_remove(old_ctx, new_dentry);
		goto out_restore;
	}
	return 0;

out_restore:
	offset_set(old_dentry, old_index);
	xa_store(&old_ctx->xa, old_index, old_dentry, GFP_KERNEL);
	offset_set(new_dentry, new_index);
	xa_store(&new_ctx->xa, new_index, new_dentry, GFP_KERNEL);
	return ret;
}

/**
 * simple_offset_destroy - Release offset map
 * @octx: directory offset ctx that is about to be destroyed
 *
 * During fs teardown (eg. umount), a directory's offset map might still
 * contain entries. xa_destroy() cleans out anything that remains.
 */
void simple_offset_destroy(struct offset_ctx *octx)
{
	xa_destroy(&octx->xa);
}

/**
 * offset_dir_llseek - Advance the read position of a directory descriptor
 * @file: an open directory whose position is to be updated
 * @offset: a byte offset
 * @whence: enumerator describing the starting position for this update
 *
 * SEEK_END, SEEK_DATA, and SEEK_HOLE are not supported for directories.
 *
 * Returns the updated read position if successful; otherwise a
 * negative errno is returned and the read position remains unchanged.
 */
static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
{
	switch (whence) {
	case SEEK_CUR:
		offset += file->f_pos;
		fallthrough;
	case SEEK_SET:
		if (offset >= 0)
			break;
		fallthrough;
	default:
		return -EINVAL;
	}

	return vfs_setpos(file, offset, U32_MAX);
}

static struct dentry *offset_find_next(struct xa_state *xas)
{
	struct dentry *child, *found = NULL;

	rcu_read_lock();
	child = xas_next_entry(xas, U32_MAX);
	if (!child)
		goto out;
	spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
	if (simple_positive(child))
		found = dget_dlock(child);
	spin_unlock(&child->d_lock);
out:
	rcu_read_unlock();
	return found;
}

static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
{
	u32 offset = dentry2offset(dentry);
	struct inode *inode = d_inode(dentry);

	return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
			  inode->i_ino, fs_umode_to_dtype(inode->i_mode));
}

static void offset_iterate_dir(struct dentry *dir, struct dir_context *ctx)
{
	struct inode *inode = d_inode(dir);
	struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
	XA_STATE(xas, &so_ctx->xa, ctx->pos);
	struct dentry *dentry;

	while (true) {
		spin_lock(&dir->d_lock);
		dentry = offset_find_next(&xas);
		spin_unlock(&dir->d_lock);
		if (!dentry)
			break;

		if (!offset_dir_emit(ctx, dentry)) {
			dput(dentry);
			break;
		}

		dput(dentry);
		ctx->pos = xas.xa_index + 1;
	}
}

/**
 * offset_readdir - Emit entries starting at offset @ctx->pos
 * @file: an open directory to iterate over
 * @ctx: directory iteration context
 *
 * Caller must hold @file's i_rwsem to prevent insertion or removal of
 * entries during this call.
 *
 * On entry, @ctx->pos contains an offset that represents the first entry
 * to be read from the directory.
 *
 * The operation continues until there are no more entries to read, or
 * until the ctx->actor indicates there is no more space in the caller's
 * output buffer.
 *
 * On return, @ctx->pos contains an offset that will read the next entry
 * in this directory when shmem_readdir() is called again with @ctx.
 *
 * Return values:
 *   %0 - Complete
 */
static int offset_readdir(struct file *file, struct dir_context *ctx)
{
	struct dentry *dir = file->f_path.dentry;

	lockdep_assert_held(&d_inode(dir)->i_rwsem);

	if (!dir_emit_dots(file, ctx))
		return 0;

	offset_iterate_dir(dir, ctx);
	return 0;
}

const struct file_operations simple_offset_dir_operations = {
	.llseek		= offset_dir_llseek,
	.iterate_shared	= offset_readdir,
	.read		= generic_read_dir,
	.fsync		= noop_fsync,
};

static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
{
	struct dentry *child = NULL;
+18 −0
Original line number Diff line number Diff line
@@ -1770,6 +1770,7 @@ struct dir_context {

struct iov_iter;
struct io_uring_cmd;
struct offset_ctx;

struct file_operations {
	struct module *owner;
@@ -1857,6 +1858,7 @@ struct inode_operations {
	int (*fileattr_set)(struct mnt_idmap *idmap,
			    struct dentry *dentry, struct fileattr *fa);
	int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
	struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
} ____cacheline_aligned;

static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
@@ -2971,6 +2973,22 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
		const void __user *from, size_t count);

struct offset_ctx {
	struct xarray		xa;
	u32			next_offset;
};

void simple_offset_init(struct offset_ctx *octx);
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
int simple_offset_rename_exchange(struct inode *old_dir,
				  struct dentry *old_dentry,
				  struct inode *new_dir,
				  struct dentry *new_dentry);
void simple_offset_destroy(struct offset_ctx *octx);

extern const struct file_operations simple_offset_dir_operations;

extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
extern int generic_file_fsync(struct file *, loff_t, loff_t, int);