Commit f35d1706 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull nfsd updates from Chuck Lever:
 "I'm thrilled to announce that the Linux in-kernel NFS server now
  offers NFSv4 write delegations. A write delegation enables a client to
  cache data and metadata for a single file more aggressively, reducing
  network round trips and server workload. Many thanks to Dai Ngo for
  contributing this facility, and to Jeff Layton and Neil Brown for
  reviewing and testing it.

  This release also sees the removal of all support for DES- and
  triple-DES-based Kerberos encryption types in the kernel's SunRPC
  implementation. These encryption types have been deprecated by the
  Internet community for years and are considered insecure. This change
  affects both the in-kernel NFS client and server.

  The server's UDP and TCP socket transports have now fully adopted
  David Howells' new bio_vec iterator so that no more than one sendmsg()
  call is needed to transmit each RPC message. In particular, this helps
  kTLS optimize record boundaries when sending RPC-with-TLS replies, and
  it takes the server a baby step closer to handling file I/O via
  folios.

  We've begun work on overhauling the SunRPC thread scheduler to remove
  a costly linked-list walk when looking for an idle RPC service thread
  to wake. The pre-requisites are included in this release. Thanks to
  Neil Brown for his ongoing work on this improvement"

* tag 'nfsd-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (56 commits)
  Documentation: Add missing documentation for EXPORT_OP flags
  SUNRPC: Remove unused declaration rpc_modcount()
  SUNRPC: Remove unused declarations
  NFSD: da_addr_body field missing in some GETDEVICEINFO replies
  SUNRPC: Remove return value of svc_pool_wake_idle_thread()
  SUNRPC: make rqst_should_sleep() idempotent()
  SUNRPC: Clean up svc_set_num_threads
  SUNRPC: Count ingress RPC messages per svc_pool
  SUNRPC: Deduplicate thread wake-up code
  SUNRPC: Move trace_svc_xprt_enqueue
  SUNRPC: Add enum svc_auth_status
  SUNRPC: change svc_xprt::xpt_flags bits to enum
  SUNRPC: change svc_rqst::rq_flags bits to enum
  SUNRPC: change svc_pool::sp_flags bits to enum
  SUNRPC: change cache_head.flags bits to enum
  SUNRPC: remove timeout arg from svc_recv()
  SUNRPC: change svc_recv() to return void.
  SUNRPC: call svc_process() from svc_recv().
  nfsd: separate nfsd_last_thread() from nfsd_put()
  nfsd: Simplify code around svc_exit_thread() call in nfsd()
  ...
parents 8ae5d298 b38a6023
Loading
Loading
Loading
Loading
+26 −0
Original line number Diff line number Diff line
@@ -215,3 +215,29 @@ following flags are defined:
    This flag causes nfsd to close any open files for this inode _before_
    calling into the vfs to do an unlink or a rename that would replace
    an existing file.

  EXPORT_OP_REMOTE_FS - Backing storage for this filesystem is remote
    PF_LOCAL_THROTTLE exists for loopback NFSD, where a thread needs to
    write to one bdi (the final bdi) in order to free up writes queued
    to another bdi (the client bdi). Such threads get a private balance
    of dirty pages so that dirty pages for the client bdi do not imact
    the daemon writing to the final bdi. For filesystems whose durable
    storage is not local (such as exported NFS filesystems), this
    constraint has negative consequences. EXPORT_OP_REMOTE_FS enables
    an export to disable writeback throttling.

  EXPORT_OP_NOATOMIC_ATTR - Filesystem does not update attributes atomically
    EXPORT_OP_NOATOMIC_ATTR indicates that the exported filesystem
    cannot provide the semantics required by the "atomic" boolean in
    NFSv4's change_info4. This boolean indicates to a client whether the
    returned before and after change attributes were obtained atomically
    with the respect to the requested metadata operation (UNLINK,
    OPEN/CREATE, MKDIR, etc).

  EXPORT_OP_FLUSH_ON_CLOSE - Filesystem flushes file data on close(2)
    On most filesystems, inodes can remain under writeback after the
    file is closed. NFSD relies on client activity or local flusher
    threads to handle writeback. Certain filesystems, such as NFS, flush
    all of an inode's dirty data on last close. Exports that behave this
    way should set EXPORT_OP_FLUSH_ON_CLOSE so that NFSD knows to skip
    waiting for writeback when closing such files.
+1 −0
Original line number Diff line number Diff line
@@ -386,6 +386,7 @@ static int export_encode_fh(struct inode *inode, struct fid *fid,
 * @inode:   the object to encode
 * @fid:     where to store the file handle fragment
 * @max_len: maximum length to store there
 * @parent:  parent directory inode, if wanted
 * @flags:   properties of the requested file handle
 *
 * Returns an enum fid_type or a negative errno.
+3 −0
Original line number Diff line number Diff line
@@ -276,6 +276,9 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
{
	struct nsm_handle *new;

	if (!hostname)
		return NULL;

	new = kzalloc(sizeof(*new) + hostname_len + 1, GFP_KERNEL);
	if (unlikely(new == NULL))
		return NULL;
+10 −42
Original line number Diff line number Diff line
@@ -45,7 +45,6 @@

#define NLMDBG_FACILITY		NLMDBG_SVC
#define LOCKD_BUFSIZE		(1024 + NLMSVC_XDRSIZE)
#define ALLOWED_SIGS		(sigmask(SIGKILL))

static struct svc_program	nlmsvc_program;

@@ -57,6 +56,12 @@ static unsigned int nlmsvc_users;
static struct svc_serv		*nlmsvc_serv;
unsigned long			nlmsvc_timeout;

static void nlmsvc_request_retry(struct timer_list *tl)
{
	svc_wake_up(nlmsvc_serv);
}
DEFINE_TIMER(nlmsvc_retry, nlmsvc_request_retry);

unsigned int lockd_net_id;

/*
@@ -111,26 +116,12 @@ static void set_grace_period(struct net *net)
	schedule_delayed_work(&ln->grace_period_end, grace_period);
}

static void restart_grace(void)
{
	if (nlmsvc_ops) {
		struct net *net = &init_net;
		struct lockd_net *ln = net_generic(net, lockd_net_id);

		cancel_delayed_work_sync(&ln->grace_period_end);
		locks_end_grace(&ln->lockd_manager);
		nlmsvc_invalidate_all();
		set_grace_period(net);
	}
}

/*
 * This is the lockd kernel thread
 */
static int
lockd(void *vrqstp)
{
	int		err = 0;
	struct svc_rqst *rqstp = vrqstp;
	struct net *net = &init_net;
	struct lockd_net *ln = net_generic(net, lockd_net_id);
@@ -138,9 +129,6 @@ lockd(void *vrqstp)
	/* try_to_freeze() is called from svc_recv() */
	set_freezable();

	/* Allow SIGKILL to tell lockd to drop all of its locks */
	allow_signal(SIGKILL);

	dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");

	/*
@@ -148,33 +136,12 @@ lockd(void *vrqstp)
	 * NFS mount or NFS daemon has gone away.
	 */
	while (!kthread_should_stop()) {
		long timeout = MAX_SCHEDULE_TIMEOUT;
		RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);

		/* update sv_maxconn if it has changed */
		rqstp->rq_server->sv_maxconn = nlm_max_connections;

		if (signalled()) {
			flush_signals(current);
			restart_grace();
			continue;
		}

		timeout = nlmsvc_retry_blocked();

		/*
		 * Find a socket with data available and call its
		 * recvfrom routine.
		 */
		err = svc_recv(rqstp, timeout);
		if (err == -EAGAIN || err == -EINTR)
			continue;
		dprintk("lockd: request from %s\n",
				svc_print_addr(rqstp, buf, sizeof(buf)));

		svc_process(rqstp);
		nlmsvc_retry_blocked();
		svc_recv(rqstp);
	}
	flush_signals(current);
	if (nlmsvc_ops)
		nlmsvc_invalidate_all();
	nlm_shutdown_hosts();
@@ -407,6 +374,7 @@ static void lockd_put(void)
#endif

	svc_set_num_threads(nlmsvc_serv, NULL, 0);
	timer_delete_sync(&nlmsvc_retry);
	nlmsvc_serv = NULL;
	dprintk("lockd_down: service destroyed\n");
}
@@ -538,7 +506,7 @@ static inline int is_callback(u32 proc)
}


static int lockd_authenticate(struct svc_rqst *rqstp)
static enum svc_auth_status lockd_authenticate(struct svc_rqst *rqstp)
{
	rqstp->rq_client = NULL;
	switch (rqstp->rq_authop->flavour) {
+15 −3
Original line number Diff line number Diff line
@@ -131,12 +131,14 @@ static void nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
static inline void
nlmsvc_remove_block(struct nlm_block *block)
{
	if (!list_empty(&block->b_list)) {
	spin_lock(&nlm_blocked_lock);
	if (!list_empty(&block->b_list)) {
		list_del_init(&block->b_list);
		spin_unlock(&nlm_blocked_lock);
		nlmsvc_release_block(block);
		return;
	}
	spin_unlock(&nlm_blocked_lock);
}

/*
@@ -152,6 +154,7 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
				file, lock->fl.fl_pid,
				(long long)lock->fl.fl_start,
				(long long)lock->fl.fl_end, lock->fl.fl_type);
	spin_lock(&nlm_blocked_lock);
	list_for_each_entry(block, &nlm_blocked, b_list) {
		fl = &block->b_call->a_args.lock.fl;
		dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
@@ -161,9 +164,11 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
				nlmdbg_cookie2a(&block->b_call->a_args.cookie));
		if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
			kref_get(&block->b_count);
			spin_unlock(&nlm_blocked_lock);
			return block;
		}
	}
	spin_unlock(&nlm_blocked_lock);

	return NULL;
}
@@ -185,16 +190,19 @@ nlmsvc_find_block(struct nlm_cookie *cookie)
{
	struct nlm_block *block;

	spin_lock(&nlm_blocked_lock);
	list_for_each_entry(block, &nlm_blocked, b_list) {
		if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie))
			goto found;
	}
	spin_unlock(&nlm_blocked_lock);

	return NULL;

found:
	dprintk("nlmsvc_find_block(%s): block=%p\n", nlmdbg_cookie2a(cookie), block);
	kref_get(&block->b_count);
	spin_unlock(&nlm_blocked_lock);
	return block;
}

@@ -317,6 +325,7 @@ void nlmsvc_traverse_blocks(struct nlm_host *host,

restart:
	mutex_lock(&file->f_mutex);
	spin_lock(&nlm_blocked_lock);
	list_for_each_entry_safe(block, next, &file->f_blocks, b_flist) {
		if (!match(block->b_host, host))
			continue;
@@ -325,11 +334,13 @@ void nlmsvc_traverse_blocks(struct nlm_host *host,
		if (list_empty(&block->b_list))
			continue;
		kref_get(&block->b_count);
		spin_unlock(&nlm_blocked_lock);
		mutex_unlock(&file->f_mutex);
		nlmsvc_unlink_block(block);
		nlmsvc_release_block(block);
		goto restart;
	}
	spin_unlock(&nlm_blocked_lock);
	mutex_unlock(&file->f_mutex);
}

@@ -1008,7 +1019,7 @@ retry_deferred_block(struct nlm_block *block)
 * picks up locks that can be granted, or grant notifications that must
 * be retransmitted.
 */
unsigned long
void
nlmsvc_retry_blocked(void)
{
	unsigned long	timeout = MAX_SCHEDULE_TIMEOUT;
@@ -1038,5 +1049,6 @@ nlmsvc_retry_blocked(void)
	}
	spin_unlock(&nlm_blocked_lock);

	return timeout;
	if (timeout < MAX_SCHEDULE_TIMEOUT)
		mod_timer(&nlmsvc_retry, jiffies + timeout);
}
Loading