Commit 6d29d7fe authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull nfsd updates from Chuck Lever:
 "We introduce 'courteous server' in this release. Previously NFSD would
  purge open and lock state for an unresponsive client after one lease
  period (typically 90 seconds). Now, after one lease period, another
  client can open and lock those files and the unresponsive client's
  lease is purged; otherwise if the unresponsive client's open and lock
  state is uncontended, the server retains that open and lock state for
  up to 24 hours, allowing the client's workload to resume after a
  lengthy network partition.

  A longstanding issue with NFSv4 file creation is also addressed.
  Previously a file creation can fail internally, returning an error to
  the client, but leave the newly created file in place as an artifact.
  The file creation code path has been reorganized so that internal
  failures and race conditions are less likely to result in an unwanted
  file creation.

  A fault injector has been added to help exercise paths that are run
  during kernel metadata cache invalidation. These caches contain
  information maintained by user space about exported filesystems. Many
  of our test workloads do not trigger cache invalidation.

  There is one patch that is needed to support PREEMPT_RT and a fix for
  an ancient 'sleep while spin-locked' splat that seems to have become
  easier to hit since v5.18-rc3"

* tag 'nfsd-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (36 commits)
  NFSD: nfsd_file_put() can sleep
  NFSD: Add documenting comment for nfsd4_release_lockowner()
  NFSD: Modernize nfsd4_release_lockowner()
  NFSD: Fix possible sleep during nfsd4_release_lockowner()
  nfsd: destroy percpu stats counters after reply cache shutdown
  nfsd: Fix null-ptr-deref in nfsd_fill_super()
  nfsd: Unregister the cld notifier when laundry_wq create failed
  SUNRPC: Use RMW bitops in single-threaded hot paths
  NFSD: Clean up the show_nf_flags() macro
  NFSD: Trace filecache opens
  NFSD: Move documenting comment for nfsd4_process_open2()
  NFSD: Fix whitespace
  NFSD: Remove dprintk call sites from tail of nfsd4_open()
  NFSD: Instantiate a struct file when creating a regular NFSv4 file
  NFSD: Clean up nfsd_open_verified()
  NFSD: Remove do_nfsd_create()
  NFSD: Refactor NFSv4 OPEN(CREATE)
  NFSD: Refactor NFSv3 CREATE
  NFSD: Refactor nfsd_create_setattr()
  NFSD: Avoid calling fh_drop_write() twice in do_nfsd_create()
  ...
parents 7f50d4df 08af54b3
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -434,6 +434,8 @@ prototypes::
	void (*lm_break)(struct file_lock *); /* break_lease callback */
	int (*lm_change)(struct file_lock **, int);
	bool (*lm_breaker_owns_lease)(struct file_lock *);
        bool (*lm_lock_expirable)(struct file_lock *);
        void (*lm_expire_lock)(void);

locking rules:

@@ -445,6 +447,8 @@ lm_grant: no no no
lm_break:		yes		no			no
lm_change		yes		no			no
lm_breaker_owns_lease:	yes     	no			no
lm_lock_expirable	yes		no			no
lm_expire_lock		no		no			yes
======================	=============	=================	=========

buffer_head
+58 −3
Original line number Diff line number Diff line
@@ -300,6 +300,34 @@ void locks_release_private(struct file_lock *fl)
}
EXPORT_SYMBOL_GPL(locks_release_private);

/**
 * locks_owner_has_blockers - Check for blocking lock requests
 * @flctx: file lock context
 * @owner: lock owner
 *
 * Return values:
 *   %true: @owner has at least one blocker
 *   %false: @owner has no blockers
 */
bool locks_owner_has_blockers(struct file_lock_context *flctx,
		fl_owner_t owner)
{
	struct file_lock *fl;

	spin_lock(&flctx->flc_lock);
	list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
		if (fl->fl_owner != owner)
			continue;
		if (!list_empty(&fl->fl_blocked_requests)) {
			spin_unlock(&flctx->flc_lock);
			return true;
		}
	}
	spin_unlock(&flctx->flc_lock);
	return false;
}
EXPORT_SYMBOL_GPL(locks_owner_has_blockers);

/* Free a lock which is not in use. */
void locks_free_lock(struct file_lock *fl)
{
@@ -874,6 +902,8 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
	struct file_lock *cfl;
	struct file_lock_context *ctx;
	struct inode *inode = locks_inode(filp);
	void *owner;
	void (*func)(void);

	ctx = smp_load_acquire(&inode->i_flctx);
	if (!ctx || list_empty_careful(&ctx->flc_posix)) {
@@ -881,13 +911,24 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
		return;
	}

retry:
	spin_lock(&ctx->flc_lock);
	list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
		if (posix_locks_conflict(fl, cfl)) {
		if (!posix_locks_conflict(fl, cfl))
			continue;
		if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable
			&& (*cfl->fl_lmops->lm_lock_expirable)(cfl)) {
			owner = cfl->fl_lmops->lm_mod_owner;
			func = cfl->fl_lmops->lm_expire_lock;
			__module_get(owner);
			spin_unlock(&ctx->flc_lock);
			(*func)();
			module_put(owner);
			goto retry;
		}
		locks_copy_conflock(fl, cfl);
		goto out;
	}
	}
	fl->fl_type = F_UNLCK;
out:
	spin_unlock(&ctx->flc_lock);
@@ -1060,6 +1101,8 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
	int error;
	bool added = false;
	LIST_HEAD(dispose);
	void *owner;
	void (*func)(void);

	ctx = locks_get_lock_context(inode, request->fl_type);
	if (!ctx)
@@ -1078,6 +1121,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
		new_fl2 = locks_alloc_lock();
	}

retry:
	percpu_down_read(&file_rwsem);
	spin_lock(&ctx->flc_lock);
	/*
@@ -1089,6 +1133,17 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
		list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
			if (!posix_locks_conflict(request, fl))
				continue;
			if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable
				&& (*fl->fl_lmops->lm_lock_expirable)(fl)) {
				owner = fl->fl_lmops->lm_mod_owner;
				func = fl->fl_lmops->lm_expire_lock;
				__module_get(owner);
				spin_unlock(&ctx->flc_lock);
				percpu_up_read(&file_rwsem);
				(*func)();
				module_put(owner);
				goto retry;
			}
			if (conflock)
				locks_copy_conflock(conflock, fl);
			error = -EAGAIN;
+47 −7
Original line number Diff line number Diff line
@@ -303,6 +303,8 @@ nfsd_file_put_noref(struct nfsd_file *nf)
void
nfsd_file_put(struct nfsd_file *nf)
{
	might_sleep();

	set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
	if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
		nfsd_file_flush(nf);
@@ -899,9 +901,9 @@ nfsd_file_is_cached(struct inode *inode)
	return ret;
}

__be32
nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
		  unsigned int may_flags, struct nfsd_file **pnf)
static __be32
nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
		     unsigned int may_flags, struct nfsd_file **pnf, bool open)
{
	__be32	status;
	struct net *net = SVC_NET(rqstp);
@@ -996,10 +998,14 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
		nfsd_file_gc();

	nf->nf_mark = nfsd_file_mark_find_or_create(nf);
	if (nf->nf_mark)
		status = nfsd_open_verified(rqstp, fhp, S_IFREG,
				may_flags, &nf->nf_file);
	else
	if (nf->nf_mark) {
		if (open) {
			status = nfsd_open_verified(rqstp, fhp, may_flags,
						    &nf->nf_file);
			trace_nfsd_file_open(nf, status);
		} else
			status = nfs_ok;
	} else
		status = nfserr_jukebox;
	/*
	 * If construction failed, or we raced with a call to unlink()
@@ -1019,6 +1025,40 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
	goto out;
}

/**
 * nfsd_file_acquire - Get a struct nfsd_file with an open file
 * @rqstp: the RPC transaction being executed
 * @fhp: the NFS filehandle of the file to be opened
 * @may_flags: NFSD_MAY_ settings for the file
 * @pnf: OUT: new or found "struct nfsd_file" object
 *
 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
 * network byte order is returned.
 */
__be32
nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
		  unsigned int may_flags, struct nfsd_file **pnf)
{
	return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, true);
}

/**
 * nfsd_file_create - Get a struct nfsd_file, do not open
 * @rqstp: the RPC transaction being executed
 * @fhp: the NFS filehandle of the file just created
 * @may_flags: NFSD_MAY_ settings for the file
 * @pnf: OUT: new or found "struct nfsd_file" object
 *
 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
 * network byte order is returned.
 */
__be32
nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
		 unsigned int may_flags, struct nfsd_file **pnf)
{
	return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, false);
}

/*
 * Note that fields may be added, removed or reordered in the future. Programs
 * scraping this file for info should test the labels to ensure they're
+2 −0
Original line number Diff line number Diff line
@@ -59,5 +59,7 @@ void nfsd_file_close_inode_sync(struct inode *inode);
bool nfsd_file_is_cached(struct inode *inode);
__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
		  unsigned int may_flags, struct nfsd_file **nfp);
__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
		  unsigned int may_flags, struct nfsd_file **nfp);
int	nfsd_file_cache_stats_open(struct inode *, struct file *);
#endif /* _FS_NFSD_FILECACHE_H */
+122 −19
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/magic.h>
#include <linux/namei.h>

#include "cache.h"
#include "xdr3.h"
@@ -220,17 +221,132 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
}

/*
 * With NFSv3, CREATE processing is a lot easier than with NFSv2.
 * At least in theory; we'll see how it fares in practice when the
 * first reports about SunOS compatibility problems start to pour in...
 * Implement NFSv3's unchecked, guarded, and exclusive CREATE
 * semantics for regular files. Except for the created file,
 * this operation is stateless on the server.
 *
 * Upon return, caller must release @fhp and @resfhp.
 */
static __be32
nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
		  struct svc_fh *resfhp, struct nfsd3_createargs *argp)
{
	struct iattr *iap = &argp->attrs;
	struct dentry *parent, *child;
	__u32 v_mtime, v_atime;
	struct inode *inode;
	__be32 status;
	int host_err;

	if (isdotent(argp->name, argp->len))
		return nfserr_exist;
	if (!(iap->ia_valid & ATTR_MODE))
		iap->ia_mode = 0;

	status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
	if (status != nfs_ok)
		return status;

	parent = fhp->fh_dentry;
	inode = d_inode(parent);

	host_err = fh_want_write(fhp);
	if (host_err)
		return nfserrno(host_err);

	fh_lock_nested(fhp, I_MUTEX_PARENT);

	child = lookup_one_len(argp->name, parent, argp->len);
	if (IS_ERR(child)) {
		status = nfserrno(PTR_ERR(child));
		goto out;
	}

	if (d_really_is_negative(child)) {
		status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
		if (status != nfs_ok)
			goto out;
	}

	status = fh_compose(resfhp, fhp->fh_export, child, fhp);
	if (status != nfs_ok)
		goto out;

	v_mtime = 0;
	v_atime = 0;
	if (argp->createmode == NFS3_CREATE_EXCLUSIVE) {
		u32 *verifier = (u32 *)argp->verf;

		/*
		 * Solaris 7 gets confused (bugid 4218508) if these have
		 * the high bit set, as do xfs filesystems without the
		 * "bigtime" feature. So just clear the high bits.
		 */
		v_mtime = verifier[0] & 0x7fffffff;
		v_atime = verifier[1] & 0x7fffffff;
	}

	if (d_really_is_positive(child)) {
		status = nfs_ok;

		switch (argp->createmode) {
		case NFS3_CREATE_UNCHECKED:
			if (!d_is_reg(child))
				break;
			iap->ia_valid &= ATTR_SIZE;
			goto set_attr;
		case NFS3_CREATE_GUARDED:
			status = nfserr_exist;
			break;
		case NFS3_CREATE_EXCLUSIVE:
			if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
			    d_inode(child)->i_atime.tv_sec == v_atime &&
			    d_inode(child)->i_size == 0) {
				break;
			}
			status = nfserr_exist;
		}
		goto out;
	}

	if (!IS_POSIXACL(inode))
		iap->ia_mode &= ~current_umask();

	host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true);
	if (host_err < 0) {
		status = nfserrno(host_err);
		goto out;
	}

	/* A newly created file already has a file size of zero. */
	if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
		iap->ia_valid &= ~ATTR_SIZE;
	if (argp->createmode == NFS3_CREATE_EXCLUSIVE) {
		iap->ia_valid = ATTR_MTIME | ATTR_ATIME |
				ATTR_MTIME_SET | ATTR_ATIME_SET;
		iap->ia_mtime.tv_sec = v_mtime;
		iap->ia_atime.tv_sec = v_atime;
		iap->ia_mtime.tv_nsec = 0;
		iap->ia_atime.tv_nsec = 0;
	}

set_attr:
	status = nfsd_create_setattr(rqstp, fhp, resfhp, iap);

out:
	fh_unlock(fhp);
	if (child && !IS_ERR(child))
		dput(child);
	fh_drop_write(fhp);
	return status;
}

static __be32
nfsd3_proc_create(struct svc_rqst *rqstp)
{
	struct nfsd3_createargs *argp = rqstp->rq_argp;
	struct nfsd3_diropres *resp = rqstp->rq_resp;
	svc_fh		*dirfhp, *newfhp = NULL;
	struct iattr	*attr;
	svc_fh *dirfhp, *newfhp;

	dprintk("nfsd: CREATE(3)   %s %.*s\n",
				SVCFH_fmt(&argp->fh),
@@ -239,21 +355,8 @@ nfsd3_proc_create(struct svc_rqst *rqstp)

	dirfhp = fh_copy(&resp->dirfh, &argp->fh);
	newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
	attr   = &argp->attrs;

	/* Unfudge the mode bits */
	attr->ia_mode &= ~S_IFMT;
	if (!(attr->ia_valid & ATTR_MODE)) { 
		attr->ia_valid |= ATTR_MODE;
		attr->ia_mode = S_IFREG;
	} else {
		attr->ia_mode = (attr->ia_mode & ~S_IFMT) | S_IFREG;
	}

	/* Now create the file and set attributes */
	resp->status = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len,
				      attr, newfhp, argp->createmode,
				      (u32 *)argp->verf, NULL, NULL);
	resp->status = nfsd3_create_file(rqstp, dirfhp, newfhp, argp);
	return rpc_success;
}

Loading