Commit 75b96f0e authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull fuse updates from Miklos Szeredi:

 - Allow mounting an active fuse device. Previously the fuse device
   would always be mounted during initialization, and sharing a fuse
   superblock was only possible through mount or namespace cloning

 - Fix data flushing in syncfs (virtiofs only)

 - Fix data flushing in copy_file_range()

 - Fix a possible deadlock in atomic O_TRUNC

 - Misc fixes and cleanups

* tag 'fuse-update-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse:
  fuse: remove unused arg in fuse_write_file_get()
  fuse: wait for writepages in syncfs
  fuse: flush extending writes
  fuse: truncate pagecache on atomic_o_trunc
  fuse: allow sharing existing sb
  fuse: move fget() to fuse_get_tree()
  fuse: move option checking into fuse_fill_super()
  fuse: name fs_context consistently
  fuse: fix use after free in fuse_read_interrupt()
parents 996fe061 a9667ac8
Loading
Loading
Loading
Loading
+5 −5
Original line number Diff line number Diff line
@@ -328,7 +328,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
	drop_nlink(d_inode(fuse_control_sb->s_root));
}

static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fctx)
static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fsc)
{
	static const struct tree_descr empty_descr = {""};
	struct fuse_conn *fc;
@@ -354,18 +354,18 @@ static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fctx)
	return 0;
}

static int fuse_ctl_get_tree(struct fs_context *fc)
static int fuse_ctl_get_tree(struct fs_context *fsc)
{
	return get_tree_single(fc, fuse_ctl_fill_super);
	return get_tree_single(fsc, fuse_ctl_fill_super);
}

static const struct fs_context_operations fuse_ctl_context_ops = {
	.get_tree	= fuse_ctl_get_tree,
};

static int fuse_ctl_init_fs_context(struct fs_context *fc)
static int fuse_ctl_init_fs_context(struct fs_context *fsc)
{
	fc->ops = &fuse_ctl_context_ops;
	fsc->ops = &fuse_ctl_context_ops;
	return 0;
}

+2 −2
Original line number Diff line number Diff line
@@ -288,10 +288,10 @@ void fuse_request_end(struct fuse_req *req)

	/*
	 * test_and_set_bit() implies smp_mb() between bit
	 * changing and below intr_entry check. Pairs with
	 * changing and below FR_INTERRUPTED check. Pairs with
	 * smp_mb() from queue_interrupt().
	 */
	if (!list_empty(&req->intr_entry)) {
	if (test_bit(FR_INTERRUPTED, &req->flags)) {
		spin_lock(&fiq->lock);
		list_del_init(&req->intr_entry);
		spin_unlock(&fiq->lock);
+33 −12
Original line number Diff line number Diff line
@@ -198,12 +198,11 @@ void fuse_finish_open(struct inode *inode, struct file *file)
	struct fuse_file *ff = file->private_data;
	struct fuse_conn *fc = get_fuse_conn(inode);

	if (!(ff->open_flags & FOPEN_KEEP_CACHE))
		invalidate_inode_pages2(inode->i_mapping);
	if (ff->open_flags & FOPEN_STREAM)
		stream_open(inode, file);
	else if (ff->open_flags & FOPEN_NONSEEKABLE)
		nonseekable_open(inode, file);

	if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
		struct fuse_inode *fi = get_fuse_inode(inode);

@@ -211,10 +210,14 @@ void fuse_finish_open(struct inode *inode, struct file *file)
		fi->attr_version = atomic64_inc_return(&fc->attr_version);
		i_size_write(inode, 0);
		spin_unlock(&fi->lock);
		truncate_pagecache(inode, 0);
		fuse_invalidate_attr(inode);
		if (fc->writeback_cache)
			file_update_time(file);
	} else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
		invalidate_inode_pages2(inode->i_mapping);
	}

	if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
		fuse_link_write_file(file);
}
@@ -389,6 +392,7 @@ struct fuse_writepage_args {
	struct list_head queue_entry;
	struct fuse_writepage_args *next;
	struct inode *inode;
	struct fuse_sync_bucket *bucket;
};

static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
@@ -1608,6 +1612,9 @@ static void fuse_writepage_free(struct fuse_writepage_args *wpa)
	struct fuse_args_pages *ap = &wpa->ia.ap;
	int i;

	if (wpa->bucket)
		fuse_sync_bucket_dec(wpa->bucket);

	for (i = 0; i < ap->num_pages; i++)
		__free_page(ap->pages[i]);

@@ -1813,8 +1820,7 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
	fuse_writepage_free(wpa);
}

static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc,
					       struct fuse_inode *fi)
static struct fuse_file *__fuse_write_file_get(struct fuse_inode *fi)
{
	struct fuse_file *ff = NULL;

@@ -1829,22 +1835,20 @@ static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc,
	return ff;
}

static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
					     struct fuse_inode *fi)
static struct fuse_file *fuse_write_file_get(struct fuse_inode *fi)
{
	struct fuse_file *ff = __fuse_write_file_get(fc, fi);
	struct fuse_file *ff = __fuse_write_file_get(fi);
	WARN_ON(!ff);
	return ff;
}

int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	struct fuse_file *ff;
	int err;

	ff = __fuse_write_file_get(fc, fi);
	ff = __fuse_write_file_get(fi);
	err = fuse_flush_times(inode, ff);
	if (ff)
		fuse_file_put(ff, false, false);
@@ -1871,6 +1875,20 @@ static struct fuse_writepage_args *fuse_writepage_args_alloc(void)

}

static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
					 struct fuse_writepage_args *wpa)
{
	if (!fc->sync_fs)
		return;

	rcu_read_lock();
	/* Prevent resurrection of dead bucket in unlikely race with syncfs */
	do {
		wpa->bucket = rcu_dereference(fc->curr_bucket);
	} while (unlikely(!atomic_inc_not_zero(&wpa->bucket->count)));
	rcu_read_unlock();
}

static int fuse_writepage_locked(struct page *page)
{
	struct address_space *mapping = page->mapping;
@@ -1894,10 +1912,11 @@ static int fuse_writepage_locked(struct page *page)
		goto err_free;

	error = -EIO;
	wpa->ia.ff = fuse_write_file_get(fc, fi);
	wpa->ia.ff = fuse_write_file_get(fi);
	if (!wpa->ia.ff)
		goto err_nofile;

	fuse_writepage_add_to_bucket(fc, wpa);
	fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);

	copy_highpage(tmp_page, page);
@@ -2113,7 +2132,7 @@ static int fuse_writepages_fill(struct page *page,

	if (!data->ff) {
		err = -EIO;
		data->ff = fuse_write_file_get(fc, fi);
		data->ff = fuse_write_file_get(fi);
		if (!data->ff)
			goto out_unlock;
	}
@@ -2148,6 +2167,8 @@ static int fuse_writepages_fill(struct page *page,
			__free_page(tmp_page);
			goto out_unlock;
		}
		fuse_writepage_add_to_bucket(fc, wpa);

		data->max_pages = 1;

		ap = &wpa->ia.ap;
@@ -2881,7 +2902,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)

static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end)
{
	int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
	int err = filemap_write_and_wait_range(inode->i_mapping, start, -1);

	if (!err)
		fuse_sync_writes(inode);
+20 −0
Original line number Diff line number Diff line
@@ -482,6 +482,7 @@ struct fuse_dev {

struct fuse_fs_context {
	int fd;
	struct file *file;
	unsigned int rootmode;
	kuid_t user_id;
	kgid_t group_id;
@@ -508,6 +509,13 @@ struct fuse_fs_context {
	void **fudptr;
};

struct fuse_sync_bucket {
	/* count is a possible scalability bottleneck */
	atomic_t count;
	wait_queue_head_t waitq;
	struct rcu_head rcu;
};

/**
 * A Fuse connection.
 *
@@ -800,6 +808,9 @@ struct fuse_conn {

	/** List of filesystems using this connection */
	struct list_head mounts;

	/* New writepages go into this bucket */
	struct fuse_sync_bucket __rcu *curr_bucket;
};

/*
@@ -903,6 +914,15 @@ static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs,
		descs[i].length = PAGE_SIZE - descs[i].offset;
}

static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket)
{
	/* Need RCU protection to prevent use after free after the decrement */
	rcu_read_lock();
	if (atomic_dec_and_test(&bucket->count))
		wake_up(&bucket->waitq);
	rcu_read_unlock();
}

/** Device operations */
extern const struct file_operations fuse_dev_operations;

+148 −55
Original line number Diff line number Diff line
@@ -137,12 +137,12 @@ static void fuse_evict_inode(struct inode *inode)
	}
}

static int fuse_reconfigure(struct fs_context *fc)
static int fuse_reconfigure(struct fs_context *fsc)
{
	struct super_block *sb = fc->root->d_sb;
	struct super_block *sb = fsc->root->d_sb;

	sync_filesystem(sb);
	if (fc->sb_flags & SB_MANDLOCK)
	if (fsc->sb_flags & SB_MANDLOCK)
		return -EINVAL;

	return 0;
@@ -505,6 +505,57 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
	return err;
}

static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void)
{
	struct fuse_sync_bucket *bucket;

	bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL);
	if (bucket) {
		init_waitqueue_head(&bucket->waitq);
		/* Initial active count */
		atomic_set(&bucket->count, 1);
	}
	return bucket;
}

static void fuse_sync_fs_writes(struct fuse_conn *fc)
{
	struct fuse_sync_bucket *bucket, *new_bucket;
	int count;

	new_bucket = fuse_sync_bucket_alloc();
	spin_lock(&fc->lock);
	bucket = rcu_dereference_protected(fc->curr_bucket, 1);
	count = atomic_read(&bucket->count);
	WARN_ON(count < 1);
	/* No outstanding writes? */
	if (count == 1) {
		spin_unlock(&fc->lock);
		kfree(new_bucket);
		return;
	}

	/*
	 * Completion of new bucket depends on completion of this bucket, so add
	 * one more count.
	 */
	atomic_inc(&new_bucket->count);
	rcu_assign_pointer(fc->curr_bucket, new_bucket);
	spin_unlock(&fc->lock);
	/*
	 * Drop initial active count.  At this point if all writes in this and
	 * ancestor buckets complete, the count will go to zero and this task
	 * will be woken up.
	 */
	atomic_dec(&bucket->count);

	wait_event(bucket->waitq, atomic_read(&bucket->count) == 0);

	/* Drop temp count on descendant bucket */
	fuse_sync_bucket_dec(new_bucket);
	kfree_rcu(bucket, rcu);
}

static int fuse_sync_fs(struct super_block *sb, int wait)
{
	struct fuse_mount *fm = get_fuse_mount_super(sb);
@@ -527,6 +578,8 @@ static int fuse_sync_fs(struct super_block *sb, int wait)
	if (!fc->sync_fs)
		return 0;

	fuse_sync_fs_writes(fc);

	memset(&inarg, 0, sizeof(inarg));
	args.in_numargs = 1;
	args.in_args[0].size = sizeof(inarg);
@@ -572,38 +625,38 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = {
	{}
};

static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
{
	struct fs_parse_result result;
	struct fuse_fs_context *ctx = fc->fs_private;
	struct fuse_fs_context *ctx = fsc->fs_private;
	int opt;

	if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
	if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
		/*
		 * Ignore options coming from mount(MS_REMOUNT) for backward
		 * compatibility.
		 */
		if (fc->oldapi)
		if (fsc->oldapi)
			return 0;

		return invalfc(fc, "No changes allowed in reconfigure");
		return invalfc(fsc, "No changes allowed in reconfigure");
	}

	opt = fs_parse(fc, fuse_fs_parameters, param, &result);
	opt = fs_parse(fsc, fuse_fs_parameters, param, &result);
	if (opt < 0)
		return opt;

	switch (opt) {
	case OPT_SOURCE:
		if (fc->source)
			return invalfc(fc, "Multiple sources specified");
		fc->source = param->string;
		if (fsc->source)
			return invalfc(fsc, "Multiple sources specified");
		fsc->source = param->string;
		param->string = NULL;
		break;

	case OPT_SUBTYPE:
		if (ctx->subtype)
			return invalfc(fc, "Multiple subtypes specified");
			return invalfc(fsc, "Multiple subtypes specified");
		ctx->subtype = param->string;
		param->string = NULL;
		return 0;
@@ -615,22 +668,22 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)

	case OPT_ROOTMODE:
		if (!fuse_valid_type(result.uint_32))
			return invalfc(fc, "Invalid rootmode");
			return invalfc(fsc, "Invalid rootmode");
		ctx->rootmode = result.uint_32;
		ctx->rootmode_present = true;
		break;

	case OPT_USER_ID:
		ctx->user_id = make_kuid(fc->user_ns, result.uint_32);
		ctx->user_id = make_kuid(fsc->user_ns, result.uint_32);
		if (!uid_valid(ctx->user_id))
			return invalfc(fc, "Invalid user_id");
			return invalfc(fsc, "Invalid user_id");
		ctx->user_id_present = true;
		break;

	case OPT_GROUP_ID:
		ctx->group_id = make_kgid(fc->user_ns, result.uint_32);
		ctx->group_id = make_kgid(fsc->user_ns, result.uint_32);
		if (!gid_valid(ctx->group_id))
			return invalfc(fc, "Invalid group_id");
			return invalfc(fsc, "Invalid group_id");
		ctx->group_id_present = true;
		break;

@@ -648,7 +701,7 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)

	case OPT_BLKSIZE:
		if (!ctx->is_bdev)
			return invalfc(fc, "blksize only supported for fuseblk");
			return invalfc(fsc, "blksize only supported for fuseblk");
		ctx->blksize = result.uint_32;
		break;

@@ -659,9 +712,9 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
	return 0;
}

static void fuse_free_fc(struct fs_context *fc)
static void fuse_free_fsc(struct fs_context *fsc)
{
	struct fuse_fs_context *ctx = fc->fs_private;
	struct fuse_fs_context *ctx = fsc->fs_private;

	if (ctx) {
		kfree(ctx->subtype);
@@ -762,6 +815,7 @@ void fuse_conn_put(struct fuse_conn *fc)
{
	if (refcount_dec_and_test(&fc->count)) {
		struct fuse_iqueue *fiq = &fc->iq;
		struct fuse_sync_bucket *bucket;

		if (IS_ENABLED(CONFIG_FUSE_DAX))
			fuse_dax_conn_free(fc);
@@ -769,6 +823,11 @@ void fuse_conn_put(struct fuse_conn *fc)
			fiq->ops->release(fiq);
		put_pid_ns(fc->pid_ns);
		put_user_ns(fc->user_ns);
		bucket = rcu_dereference_protected(fc->curr_bucket, 1);
		if (bucket) {
			WARN_ON(atomic_read(&bucket->count) != 1);
			kfree(bucket);
		}
		fc->release(fc);
	}
}
@@ -1417,6 +1476,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
	if (sb->s_flags & SB_MANDLOCK)
		goto err;

	rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc());
	fuse_sb_defaults(sb);

	if (ctx->is_bdev) {
@@ -1508,34 +1568,33 @@ EXPORT_SYMBOL_GPL(fuse_fill_super_common);
static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
{
	struct fuse_fs_context *ctx = fsc->fs_private;
	struct file *file;
	int err;
	struct fuse_conn *fc;
	struct fuse_mount *fm;

	err = -EINVAL;
	file = fget(ctx->fd);
	if (!file)
		goto err;
	if (!ctx->file || !ctx->rootmode_present ||
	    !ctx->user_id_present || !ctx->group_id_present)
		return -EINVAL;

	/*
	 * Require mount to happen from the same user namespace which
	 * opened /dev/fuse to prevent potential attacks.
	 */
	if ((file->f_op != &fuse_dev_operations) ||
	    (file->f_cred->user_ns != sb->s_user_ns))
		goto err_fput;
	ctx->fudptr = &file->private_data;
	err = -EINVAL;
	if ((ctx->file->f_op != &fuse_dev_operations) ||
	    (ctx->file->f_cred->user_ns != sb->s_user_ns))
		goto err;
	ctx->fudptr = &ctx->file->private_data;

	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
	err = -ENOMEM;
	if (!fc)
		goto err_fput;
		goto err;

	fm = kzalloc(sizeof(*fm), GFP_KERNEL);
	if (!fm) {
		kfree(fc);
		goto err_fput;
		goto err;
	}

	fuse_conn_init(fc, fm, sb->s_user_ns, &fuse_dev_fiq_ops, NULL);
@@ -1546,12 +1605,8 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
	err = fuse_fill_super_common(sb, ctx);
	if (err)
		goto err_put_conn;
	/*
	 * atomic_dec_and_test() in fput() provides the necessary
	 * memory barrier for file->private_data to be visible on all
	 * CPUs after this
	 */
	fput(file);
	/* file->private_data shall be visible on all CPUs after this */
	smp_mb();
	fuse_send_init(get_fuse_mount_super(sb));
	return 0;

@@ -1559,30 +1614,68 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
	fuse_conn_put(fc);
	kfree(fm);
	sb->s_fs_info = NULL;
 err_fput:
	fput(file);
 err:
	return err;
}

static int fuse_get_tree(struct fs_context *fc)
/*
 * This is the path where user supplied an already initialized fuse dev.  In
 * this case never create a new super if the old one is gone.
 */
static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc)
{
	struct fuse_fs_context *ctx = fc->fs_private;
	return -ENOTCONN;
}

	if (!ctx->fd_present || !ctx->rootmode_present ||
	    !ctx->user_id_present || !ctx->group_id_present)
		return -EINVAL;
static int fuse_test_super(struct super_block *sb, struct fs_context *fsc)
{

#ifdef CONFIG_BLOCK
	if (ctx->is_bdev)
		return get_tree_bdev(fc, fuse_fill_super);
#endif
	return fsc->sget_key == get_fuse_conn_super(sb);
}

	return get_tree_nodev(fc, fuse_fill_super);
static int fuse_get_tree(struct fs_context *fsc)
{
	struct fuse_fs_context *ctx = fsc->fs_private;
	struct fuse_dev *fud;
	struct super_block *sb;
	int err;

	if (ctx->fd_present)
		ctx->file = fget(ctx->fd);

	if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) {
		err = get_tree_bdev(fsc, fuse_fill_super);
		goto out_fput;
	}
	/*
	 * While block dev mount can be initialized with a dummy device fd
	 * (found by device name), normal fuse mounts can't
	 */
	if (!ctx->file)
		return -EINVAL;

	/*
	 * Allow creating a fuse mount with an already initialized fuse
	 * connection
	 */
	fud = READ_ONCE(ctx->file->private_data);
	if (ctx->file->f_op == &fuse_dev_operations && fud) {
		fsc->sget_key = fud->fc;
		sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super);
		err = PTR_ERR_OR_ZERO(sb);
		if (!IS_ERR(sb))
			fsc->root = dget(sb->s_root);
	} else {
		err = get_tree_nodev(fsc, fuse_fill_super);
	}
out_fput:
	if (ctx->file)
		fput(ctx->file);
	return err;
}

static const struct fs_context_operations fuse_context_ops = {
	.free		= fuse_free_fc,
	.free		= fuse_free_fsc,
	.parse_param	= fuse_parse_param,
	.reconfigure	= fuse_reconfigure,
	.get_tree	= fuse_get_tree,
@@ -1591,7 +1684,7 @@ static const struct fs_context_operations fuse_context_ops = {
/*
 * Set up the filesystem mount context.
 */
static int fuse_init_fs_context(struct fs_context *fc)
static int fuse_init_fs_context(struct fs_context *fsc)
{
	struct fuse_fs_context *ctx;

@@ -1604,14 +1697,14 @@ static int fuse_init_fs_context(struct fs_context *fc)
	ctx->legacy_opts_show = true;

#ifdef CONFIG_BLOCK
	if (fc->fs_type == &fuseblk_fs_type) {
	if (fsc->fs_type == &fuseblk_fs_type) {
		ctx->is_bdev = true;
		ctx->destroy = true;
	}
#endif

	fc->fs_private = ctx;
	fc->ops = &fuse_context_ops;
	fsc->fs_private = ctx;
	fsc->ops = &fuse_context_ops;
	return 0;
}

Loading