Commit aeb6e6ac authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable fixes:
   - pNFS/flexfiles: Fix infinite looping when the RDMA connection
     errors out

  Bugfixes:
   - NFS: fix port value parsing
   - SUNRPC: Reinitialise the backchannel request buffers before reuse
   - SUNRPC: fix expiry of auth creds
   - NFSv4: Fix races in the legacy idmapper upcall
   - NFS: O_DIRECT fixes from Jeff Layton
   - NFSv4.1: Fix OP_SEQUENCE error handling
   - SUNRPC: Fix an RPC/RDMA performance regression
   - NFS: Fix case insensitive renames
   - NFSv4/pnfs: Fix a use-after-free bug in open
   - NFSv4.1: RECLAIM_COMPLETE must handle EACCES

  Features:
   - NFSv4.1: session trunking enhancements
   - NFSv4.2: READ_PLUS performance optimisations
   - NFS: relax the rules for rsize/wsize mount options
   - NFS: don't unhash dentry during unlink/rename
   - SUNRPC: Fail faster on bad verifier
   - NFS/SUNRPC: Various tracing improvements"

* tag 'nfs-for-5.20-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (46 commits)
  NFS: Improve readpage/writepage tracing
  NFS: Improve O_DIRECT tracing
  NFS: Improve write error tracing
  NFS: don't unhash dentry during unlink/rename
  NFSv4/pnfs: Fix a use-after-free bug in open
  NFS: nfs_async_write_reschedule_io must not recurse into the writeback code
  SUNRPC: Don't reuse bvec on retransmission of the request
  SUNRPC: Reinitialise the backchannel request buffers before reuse
  NFSv4.1: RECLAIM_COMPLETE must handle EACCES
  NFSv4.1 probe offline transports for trunking on session creation
  SUNRPC create a function that probes only offline transports
  SUNRPC export xprt_iter_rewind function
  SUNRPC restructure rpc_clnt_setup_test_and_add_xprt
  NFSv4.1 remove xprt from xprt_switch if session trunking test fails
  SUNRPC create an rpc function that allows xprt removal from rpc_clnt
  SUNRPC enable back offline transports in trunking discovery
  SUNRPC create an iterator to list only OFFLINE xprts
  NFSv4.1 offline trunkable transports on DESTROY_SESSION
  SUNRPC add function to offline remove trunkable transports
  SUNRPC expose functions for offline remote xprt functionality
  ...
parents f4144564 3fa5cbdc
Loading
Loading
Loading
Loading
+11 −31
Original line number Diff line number Diff line
@@ -301,18 +301,14 @@ bl_validate_designator(struct pnfs_block_volume *v)
	}
}

/*
 * Try to open the udev path for the WWN.  At least on Debian the udev
 * by-id path will always point to the dm-multipath device if one exists.
 */
static struct block_device *
bl_open_udev_path(struct pnfs_block_volume *v)
bl_open_path(struct pnfs_block_volume *v, const char *prefix)
{
	struct block_device *bdev;
	const char *devname;

	devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%*phN",
				v->scsi.designator_len, v->scsi.designator);
	devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/%s%*phN",
			prefix, v->scsi.designator_len, v->scsi.designator);
	if (!devname)
		return ERR_PTR(-ENOMEM);

@@ -326,28 +322,6 @@ bl_open_udev_path(struct pnfs_block_volume *v)
	return bdev;
}

/*
 * Try to open the RH/Fedora specific dm-mpath udev path for this WWN, as the
 * wwn- links will only point to the first discovered SCSI device there.
 */
static struct block_device *
bl_open_dm_mpath_udev_path(struct pnfs_block_volume *v)
{
	struct block_device *bdev;
	const char *devname;

	devname = kasprintf(GFP_KERNEL,
			"/dev/disk/by-id/dm-uuid-mpath-%d%*phN",
			v->scsi.designator_type,
			v->scsi.designator_len, v->scsi.designator);
	if (!devname)
		return ERR_PTR(-ENOMEM);

	bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
	kfree(devname);
	return bdev;
}

static int
bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
@@ -360,9 +334,15 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
	if (!bl_validate_designator(v))
		return -EINVAL;

	bdev = bl_open_dm_mpath_udev_path(v);
	/*
	 * Try to open the RH/Fedora specific dm-mpath udev path first, as the
	 * wwn- links will only point to the first discovered SCSI device there.
	 * On other distributions like Debian, the default SCSI by-id path will
	 * point to the dm-multipath device if one exists.
	 */
	bdev = bl_open_path(v, "dm-uuid-mpath-0x");
	if (IS_ERR(bdev))
		bdev = bl_open_udev_path(v);
		bdev = bl_open_path(v, "wwn-0x");
	if (IS_ERR(bdev))
		return PTR_ERR(bdev);
	d->bdev = bdev;
+7 −6
Original line number Diff line number Diff line
@@ -708,9 +708,9 @@ static int nfs_init_server(struct nfs_server *server,
	}

	if (ctx->rsize)
		server->rsize = nfs_block_size(ctx->rsize, NULL);
		server->rsize = nfs_io_size(ctx->rsize, clp->cl_proto);
	if (ctx->wsize)
		server->wsize = nfs_block_size(ctx->wsize, NULL);
		server->wsize = nfs_io_size(ctx->wsize, clp->cl_proto);

	server->acregmin = ctx->acregmin * HZ;
	server->acregmax = ctx->acregmax * HZ;
@@ -755,18 +755,19 @@ static int nfs_init_server(struct nfs_server *server,
static void nfs_server_set_fsinfo(struct nfs_server *server,
				  struct nfs_fsinfo *fsinfo)
{
	struct nfs_client *clp = server->nfs_client;
	unsigned long max_rpc_payload, raw_max_rpc_payload;

	/* Work out a lot of parameters */
	if (server->rsize == 0)
		server->rsize = nfs_block_size(fsinfo->rtpref, NULL);
		server->rsize = nfs_io_size(fsinfo->rtpref, clp->cl_proto);
	if (server->wsize == 0)
		server->wsize = nfs_block_size(fsinfo->wtpref, NULL);
		server->wsize = nfs_io_size(fsinfo->wtpref, clp->cl_proto);

	if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax)
		server->rsize = nfs_block_size(fsinfo->rtmax, NULL);
		server->rsize = nfs_io_size(fsinfo->rtmax, clp->cl_proto);
	if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax)
		server->wsize = nfs_block_size(fsinfo->wtmax, NULL);
		server->wsize = nfs_io_size(fsinfo->wtmax, clp->cl_proto);

	raw_max_rpc_payload = rpc_max_payload(server->client);
	max_rpc_payload = nfs_block_size(raw_max_rpc_payload, NULL);
+60 −20
Original line number Diff line number Diff line
@@ -1084,7 +1084,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
	struct nfs_cache_array *array;
	unsigned int i;

	array = kmap(desc->page);
	array = kmap_local_page(desc->page);
	for (i = desc->cache_entry_index; i < array->size; i++) {
		struct nfs_cache_array_entry *ent;

@@ -1110,7 +1110,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
	if (array->page_is_eof)
		desc->eof = !desc->eob;

	kunmap(desc->page);
	kunmap_local(array);
	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n",
			(unsigned long long)desc->dir_cookie);
}
@@ -1739,6 +1739,10 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
		goto out_bad;
	}

	if ((flags & LOOKUP_RENAME_TARGET) && d_count(dentry) < 2 &&
	    nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
		goto out_bad;

	if (nfs_verifier_is_delegated(dentry))
		return nfs_lookup_revalidate_delegated(dir, dentry, inode);

@@ -1778,6 +1782,8 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
	int ret;

	if (flags & LOOKUP_RCU) {
		if (dentry->d_fsdata == NFS_FSDATA_BLOCKED)
			return -ECHILD;
		parent = READ_ONCE(dentry->d_parent);
		dir = d_inode_rcu(parent);
		if (!dir)
@@ -1786,6 +1792,9 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
		if (parent != READ_ONCE(dentry->d_parent))
			return -ECHILD;
	} else {
		/* Wait for unlink to complete */
		wait_var_event(&dentry->d_fsdata,
			       dentry->d_fsdata != NFS_FSDATA_BLOCKED);
		parent = dget_parent(dentry);
		ret = reval(d_inode(parent), dentry, flags);
		dput(parent);
@@ -2454,7 +2463,6 @@ static int nfs_safe_remove(struct dentry *dentry)
int nfs_unlink(struct inode *dir, struct dentry *dentry)
{
	int error;
	int need_rehash = 0;

	dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
		dir->i_ino, dentry);
@@ -2469,15 +2477,25 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
		error = nfs_sillyrename(dir, dentry);
		goto out;
	}
	if (!d_unhashed(dentry)) {
		__d_drop(dentry);
		need_rehash = 1;
	}
	/* We must prevent any concurrent open until the unlink
	 * completes.  ->d_revalidate will wait for ->d_fsdata
	 * to clear.  We set it here to ensure no lookup succeeds until
	 * the unlink is complete on the server.
	 */
	error = -ETXTBSY;
	if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
	    WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED))
		goto out;
	if (dentry->d_fsdata)
		/* old devname */
		kfree(dentry->d_fsdata);
	dentry->d_fsdata = NFS_FSDATA_BLOCKED;

	spin_unlock(&dentry->d_lock);
	error = nfs_safe_remove(dentry);
	nfs_dentry_remove_handle_error(dir, dentry, error);
	if (need_rehash)
		d_rehash(dentry);
	dentry->d_fsdata = NULL;
	wake_up_var(&dentry->d_fsdata);
out:
	trace_nfs_unlink_exit(dir, dentry, error);
	return error;
@@ -2584,6 +2602,15 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
}
EXPORT_SYMBOL_GPL(nfs_link);

static void
nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
{
	struct dentry *new_dentry = data->new_dentry;

	new_dentry->d_fsdata = NULL;
	wake_up_var(&new_dentry->d_fsdata);
}

/*
 * RENAME
 * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -2614,8 +2641,9 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
{
	struct inode *old_inode = d_inode(old_dentry);
	struct inode *new_inode = d_inode(new_dentry);
	struct dentry *dentry = NULL, *rehash = NULL;
	struct dentry *dentry = NULL;
	struct rpc_task *task;
	bool must_unblock = false;
	int error = -EBUSY;

	if (flags)
@@ -2633,18 +2661,27 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
	 * the new target.
	 */
	if (new_inode && !S_ISDIR(new_inode->i_mode)) {
		/*
		 * To prevent any new references to the target during the
		 * rename, we unhash the dentry in advance.
		/* We must prevent any concurrent open until the unlink
		 * completes.  ->d_revalidate will wait for ->d_fsdata
		 * to clear.  We set it here to ensure no lookup succeeds until
		 * the unlink is complete on the server.
		 */
		if (!d_unhashed(new_dentry)) {
			d_drop(new_dentry);
			rehash = new_dentry;
		error = -ETXTBSY;
		if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
		    WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED))
			goto out;
		if (new_dentry->d_fsdata) {
			/* old devname */
			kfree(new_dentry->d_fsdata);
			new_dentry->d_fsdata = NULL;
		}

		spin_lock(&new_dentry->d_lock);
		if (d_count(new_dentry) > 2) {
			int err;

			spin_unlock(&new_dentry->d_lock);

			/* copy the target dentry's name */
			dentry = d_alloc(new_dentry->d_parent,
					 &new_dentry->d_name);
@@ -2657,14 +2694,19 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
				goto out;

			new_dentry = dentry;
			rehash = NULL;
			new_inode = NULL;
		} else {
			new_dentry->d_fsdata = NFS_FSDATA_BLOCKED;
			must_unblock = true;
			spin_unlock(&new_dentry->d_lock);
		}

	}

	if (S_ISREG(old_inode->i_mode))
		nfs_sync_inode(old_inode);
	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
				must_unblock ? nfs_unblock_rename : NULL);
	if (IS_ERR(task)) {
		error = PTR_ERR(task);
		goto out;
@@ -2688,8 +2730,6 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
		spin_unlock(&old_inode->i_lock);
	}
out:
	if (rehash)
		d_rehash(rehash);
	trace_nfs_rename_exit(old_dir, old_dentry,
			new_dir, new_dentry, error);
	if (!error) {
+15 −35
Original line number Diff line number Diff line
@@ -60,44 +60,12 @@
#include "iostat.h"
#include "pnfs.h"
#include "fscache.h"
#include "nfstrace.h"

#define NFSDBG_FACILITY		NFSDBG_VFS

static struct kmem_cache *nfs_direct_cachep;

struct nfs_direct_req {
	struct kref		kref;		/* release manager */

	/* I/O parameters */
	struct nfs_open_context	*ctx;		/* file open context info */
	struct nfs_lock_context *l_ctx;		/* Lock context info */
	struct kiocb *		iocb;		/* controlling i/o request */
	struct inode *		inode;		/* target file of i/o */

	/* completion state */
	atomic_t		io_count;	/* i/os we're waiting for */
	spinlock_t		lock;		/* protect completion state */

	loff_t			io_start;	/* Start offset for I/O */
	ssize_t			count,		/* bytes actually processed */
				max_count,	/* max expected count */
				bytes_left,	/* bytes left to be sent */
				error;		/* any reported error */
	struct completion	completion;	/* wait for i/o completion */

	/* commit state */
	struct nfs_mds_commit_info mds_cinfo;	/* Storage for cinfo */
	struct pnfs_ds_commit_info ds_cinfo;	/* Storage for cinfo */
	struct work_struct	work;
	int			flags;
	/* for write */
#define NFS_ODIRECT_DO_COMMIT		(1)	/* an unstable reply was received */
#define NFS_ODIRECT_RESCHED_WRITES	(2)	/* write verification failed */
	/* for read */
#define NFS_ODIRECT_SHOULD_DIRTY	(3)	/* dirty user-space page after read */
#define NFS_ODIRECT_DONE		INT_MAX	/* write verification failed */
};

static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
static void nfs_direct_write_complete(struct nfs_direct_req *dreq);
@@ -594,14 +562,17 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
	struct nfs_page *req;
	int status = data->task.tk_status;

	trace_nfs_direct_commit_complete(dreq);

	if (status < 0) {
		/* Errors in commit are fatal */
		dreq->error = status;
		dreq->max_count = 0;
		dreq->count = 0;
		dreq->flags = NFS_ODIRECT_DONE;
	} else if (dreq->flags == NFS_ODIRECT_DONE)
	} else {
		status = dreq->error;
	}

	nfs_init_cinfo_from_dreq(&cinfo, dreq);

@@ -630,6 +601,8 @@ static void nfs_direct_resched_write(struct nfs_commit_info *cinfo,
{
	struct nfs_direct_req *dreq = cinfo->dreq;

	trace_nfs_direct_resched_write(dreq);

	spin_lock(&dreq->lock);
	if (dreq->flags != NFS_ODIRECT_DONE)
		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
@@ -694,6 +667,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)

static void nfs_direct_write_complete(struct nfs_direct_req *dreq)
{
	trace_nfs_direct_write_complete(dreq);
	queue_work(nfsiod_workqueue, &dreq->work); /* Calls nfs_direct_write_schedule_work */
}

@@ -704,6 +678,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
	struct nfs_page *req = nfs_list_entry(hdr->pages.next);
	int flags = NFS_ODIRECT_DONE;

	trace_nfs_direct_write_completion(dreq);

	nfs_init_cinfo_from_dreq(&cinfo, dreq);

	spin_lock(&dreq->lock);
@@ -713,7 +689,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
	}

	nfs_direct_count_bytes(dreq, hdr);
	if (hdr->good_bytes != 0 && nfs_write_need_commit(hdr)) {
	if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags)) {
		if (!dreq->flags)
			dreq->flags = NFS_ODIRECT_DO_COMMIT;
		flags = dreq->flags;
@@ -758,6 +734,8 @@ static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
{
	struct nfs_direct_req *dreq = hdr->dreq;

	trace_nfs_direct_write_reschedule_io(dreq);

	spin_lock(&dreq->lock);
	if (dreq->error == 0) {
		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
@@ -798,6 +776,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
	size_t requested_bytes = 0;
	size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);

	trace_nfs_direct_write_schedule_iovec(dreq);

	nfs_pageio_init_write(&desc, inode, ioflags, false,
			      &nfs_direct_write_completion_ops);
	desc.pg_dreq = dreq;
+0 −2
Original line number Diff line number Diff line
@@ -661,8 +661,6 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
		result = filemap_fdatawait_range(file->f_mapping,
						 iocb->ki_pos - written,
						 iocb->ki_pos - 1);
		if (result < 0)
			goto out;
	}
	result = generic_write_sync(iocb, written);
	if (result < 0)
Loading