Commit 20325960 authored by David Howells's avatar David Howells
Browse files

afs: Reorganise volume and server trees to be rooted on the cell



Reorganise afs_volume objects such that they're in a tree keyed on volume
ID, rooted at on an afs_cell object rather than being in multiple trees,
each of which is rooted on an afs_server object.

afs_server structs become per-cell and acquire a pointer to the cell.

The process of breaking a callback then starts with finding the server by
its network address, following that to the cell and then looking up each
volume ID in the volume tree.

This is simpler than the afs_vol_interest/afs_cb_interest N:M mapping web
and allows those structs and the code for maintaining them to be simplified
or removed.

It does make a couple of things a bit more tricky, though:

 (1) Operations now start with a volume, not a server, so there can be more
     than one answer as to whether or not the server we'll end up using
     supports the FS.InlineBulkStatus RPC.

 (2) CB RPC operations that specify the server UUID.  There's still a tree
     of servers by UUID on the afs_net struct, but the UUIDs in it aren't
     guaranteed unique.

Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
parent cca37d45
Loading
Loading
Loading
Loading
+43 −243
Original line number Diff line number Diff line
@@ -20,185 +20,6 @@
#include <linux/sched.h>
#include "internal.h"

/*
 * Create volume and callback interests on a server.
 */
static struct afs_cb_interest *afs_create_interest(struct afs_server *server,
						   struct afs_vnode *vnode)
{
	struct afs_vol_interest *new_vi, *vi;
	struct afs_cb_interest *new;
	struct rb_node *parent, **pp;

	new_vi = kzalloc(sizeof(struct afs_vol_interest), GFP_KERNEL);
	if (!new_vi)
		return NULL;

	new = kzalloc(sizeof(struct afs_cb_interest), GFP_KERNEL);
	if (!new) {
		kfree(new_vi);
		return NULL;
	}

	new_vi->usage = 1;
	new_vi->vid = vnode->volume->vid;
	INIT_HLIST_HEAD(&new_vi->cb_interests);

	refcount_set(&new->usage, 1);
	new->sb = vnode->vfs_inode.i_sb;
	new->server = afs_get_server(server, afs_server_trace_get_new_cbi);
	INIT_HLIST_NODE(&new->cb_vlink);

	write_seqlock(&server->cb_break_lock);

	pp = &server->cb_volumes.rb_node;
	while ((parent = *pp)) {
		vi = rb_entry(parent, struct afs_vol_interest, srv_node);
		if (vi->vid < new_vi->vid) {
			pp = &(*pp)->rb_left;
		} else if (vi->vid > new_vi->vid) {
			pp = &(*pp)->rb_right;
		} else {
			vi->usage++;
			goto found_vi;
		}
	}

	vi = new_vi;
	new_vi = NULL;
	rb_link_node_rcu(&vi->srv_node, parent, pp);
	rb_insert_color(&vi->srv_node, &server->cb_volumes);

found_vi:
	new->vol_interest = vi;
	hlist_add_head(&new->cb_vlink, &vi->cb_interests);

	write_sequnlock(&server->cb_break_lock);
	kfree(new_vi);
	return new;
}

/*
 * Set up an interest-in-callbacks record for a volume on a server and
 * register it with the server.
 * - Called with vnode->io_lock held.
 */
int afs_register_server_cb_interest(struct afs_vnode *vnode,
				    struct afs_server_list *slist,
				    unsigned int index)
{
	struct afs_server_entry *entry = &slist->servers[index];
	struct afs_cb_interest *cbi, *vcbi, *new, *old;
	struct afs_server *server = entry->server;

again:
	vcbi = rcu_dereference_protected(vnode->cb_interest,
					 lockdep_is_held(&vnode->io_lock));
	if (vcbi && likely(vcbi == entry->cb_interest))
		return 0;

	read_lock(&slist->lock);
	cbi = afs_get_cb_interest(entry->cb_interest);
	read_unlock(&slist->lock);

	if (vcbi) {
		if (vcbi == cbi) {
			afs_put_cb_interest(afs_v2net(vnode), cbi);
			return 0;
		}

		/* Use a new interest in the server list for the same server
		 * rather than an old one that's still attached to a vnode.
		 */
		if (cbi && vcbi->server == cbi->server) {
			write_seqlock(&vnode->cb_lock);
			old = rcu_dereference_protected(vnode->cb_interest,
							lockdep_is_held(&vnode->cb_lock.lock));
			rcu_assign_pointer(vnode->cb_interest, cbi);
			write_sequnlock(&vnode->cb_lock);
			afs_put_cb_interest(afs_v2net(vnode), old);
			return 0;
		}

		/* Re-use the one attached to the vnode. */
		if (!cbi && vcbi->server == server) {
			write_lock(&slist->lock);
			if (entry->cb_interest) {
				write_unlock(&slist->lock);
				afs_put_cb_interest(afs_v2net(vnode), cbi);
				goto again;
			}

			entry->cb_interest = cbi;
			write_unlock(&slist->lock);
			return 0;
		}
	}

	if (!cbi) {
		new = afs_create_interest(server, vnode);
		if (!new)
			return -ENOMEM;

		write_lock(&slist->lock);
		if (!entry->cb_interest) {
			entry->cb_interest = afs_get_cb_interest(new);
			cbi = new;
			new = NULL;
		} else {
			cbi = afs_get_cb_interest(entry->cb_interest);
		}
		write_unlock(&slist->lock);
		afs_put_cb_interest(afs_v2net(vnode), new);
	}

	ASSERT(cbi);

	/* Change the server the vnode is using.  This entails scrubbing any
	 * interest the vnode had in the previous server it was using.
	 */
	write_seqlock(&vnode->cb_lock);

	old = rcu_dereference_protected(vnode->cb_interest,
					lockdep_is_held(&vnode->cb_lock.lock));
	rcu_assign_pointer(vnode->cb_interest, cbi);
	vnode->cb_s_break = cbi->server->cb_s_break;
	vnode->cb_v_break = vnode->volume->cb_v_break;
	clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);

	write_sequnlock(&vnode->cb_lock);
	afs_put_cb_interest(afs_v2net(vnode), old);
	return 0;
}

/*
 * Remove an interest on a server.
 */
void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
{
	struct afs_vol_interest *vi;

	if (cbi && refcount_dec_and_test(&cbi->usage)) {
		if (!hlist_unhashed(&cbi->cb_vlink)) {
			write_seqlock(&cbi->server->cb_break_lock);

			hlist_del_init(&cbi->cb_vlink);
			vi = cbi->vol_interest;
			cbi->vol_interest = NULL;
			if (--vi->usage == 0)
				rb_erase(&vi->srv_node, &cbi->server->cb_volumes);
			else
				vi = NULL;

			write_sequnlock(&cbi->server->cb_break_lock);
			if (vi)
				kfree_rcu(vi, rcu);
			afs_put_server(net, cbi->server, afs_server_trace_put_cbi);
		}
		kfree_rcu(cbi, rcu);
	}
}

/*
 * allow the fileserver to request callback state (re-)initialisation
 */
@@ -236,12 +57,12 @@ void afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason
}

/*
 * Look up a volume interest by volume ID under RCU conditions.
 * Look up a volume by volume ID under RCU conditions.
 */
static struct afs_vol_interest *afs_lookup_vol_interest_rcu(struct afs_server *server,
static struct afs_volume *afs_lookup_volume_rcu(struct afs_cell *cell,
						afs_volid_t vid)
{
	struct afs_vol_interest *vi = NULL;
	struct afs_volume *volume = NULL;
	struct rb_node *p;
	int seq = 0;

@@ -250,28 +71,25 @@ static struct afs_vol_interest *afs_lookup_vol_interest_rcu(struct afs_server *s
		 * under just the RCU read lock, so we have to check for
		 * changes.
		 */
		read_seqbegin_or_lock(&server->cb_break_lock, &seq);
		read_seqbegin_or_lock(&cell->volume_lock, &seq);

		p = rcu_dereference_raw(server->cb_volumes.rb_node);
		p = rcu_dereference_raw(cell->volumes.rb_node);
		while (p) {
			vi = rb_entry(p, struct afs_vol_interest, srv_node);
			volume = rb_entry(p, struct afs_volume, cell_node);

			if (vi->vid < vid)
			if (volume->vid < vid)
				p = rcu_dereference_raw(p->rb_left);
			else if (vi->vid > vid)
			else if (volume->vid > vid)
				p = rcu_dereference_raw(p->rb_right);
			else
				break;
			/* We want to repeat the search, this time with the
			 * lock properly locked.
			 */
			vi = NULL;
			volume = NULL;
		}

	} while (need_seqretry(&server->cb_break_lock, seq));
	} while (need_seqretry(&cell->volume_lock, seq));

	done_seqretry(&server->cb_break_lock, seq);
	return vi;
	done_seqretry(&cell->volume_lock, seq);
	return volume;
}

/*
@@ -280,35 +98,32 @@ static struct afs_vol_interest *afs_lookup_vol_interest_rcu(struct afs_server *s
 *   - the backing file is changed
 *   - a lock is released
 */
static void afs_break_one_callback(struct afs_server *server,
				   struct afs_fid *fid,
				   struct afs_vol_interest *vi)
static void afs_break_one_callback(struct afs_volume *volume,
				   struct afs_fid *fid)
{
	struct afs_cb_interest *cbi;
	struct super_block *sb;
	struct afs_vnode *vnode;
	struct inode *inode;

	/* Step through all interested superblocks.  There may be more than one
	 * because of cell aliasing.
	 */
	hlist_for_each_entry_rcu(cbi, &vi->cb_interests, cb_vlink) {
	if (fid->vnode == 0 && fid->unique == 0) {
		/* The callback break applies to an entire volume. */
			struct afs_super_info *as = AFS_FS_S(cbi->sb);
			struct afs_volume *volume = as->volume;

		write_lock(&volume->cb_v_break_lock);
		volume->cb_v_break++;
		trace_afs_cb_break(fid, volume->cb_v_break,
				   afs_cb_break_for_volume_callback, false);
		write_unlock(&volume->cb_v_break_lock);
		} else {
			/* See if we can find a matching inode - even an I_NEW
			 * inode needs to be marked as it can have its callback
			 * broken before we finish setting up the local inode.
		return;
	}

	/* See if we can find a matching inode - even an I_NEW inode needs to
	 * be marked as it can have its callback broken before we finish
	 * setting up the local inode.
	 */
			inode = find_inode_rcu(cbi->sb, fid->vnode,
					       afs_ilookup5_test_by_fid, fid);
	sb = rcu_dereference(volume->sb);
	if (!sb)
		return;

	inode = find_inode_rcu(sb, fid->vnode, afs_ilookup5_test_by_fid, fid);
	if (inode) {
		vnode = AFS_FS_I(inode);
		afs_break_callback(vnode, afs_cb_break_for_callback);
@@ -316,19 +131,17 @@ static void afs_break_one_callback(struct afs_server *server,
		trace_afs_cb_miss(fid, afs_cb_break_for_callback);
	}
}
	}
}

static void afs_break_some_callbacks(struct afs_server *server,
				     struct afs_callback_break *cbb,
				     size_t *_count)
{
	struct afs_callback_break *residue = cbb;
	struct afs_vol_interest *vi;
	struct afs_volume *volume;
	afs_volid_t vid = cbb->fid.vid;
	size_t i;

	vi = afs_lookup_vol_interest_rcu(server, vid);
	volume = afs_lookup_volume_rcu(server->cell, vid);

	/* TODO: Find all matching volumes if we couldn't match the server and
	 * break them anyway.
@@ -341,8 +154,8 @@ static void afs_break_some_callbacks(struct afs_server *server,
			       cbb->fid.vnode,
			       cbb->fid.unique);
			--*_count;
			if (vi)
				afs_break_one_callback(server, &cbb->fid, vi);
			if (volume)
				afs_break_one_callback(volume, &cbb->fid);
		} else {
			*residue++ = *cbb;
		}
@@ -367,16 +180,3 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
	rcu_read_unlock();
	return;
}

/*
 * Clear the callback interests in a server list.
 */
void afs_clear_callback_interests(struct afs_net *net, struct afs_server_list *slist)
{
	int i;

	for (i = 0; i < slist->nr_servers; i++) {
		afs_put_cb_interest(net, slist->servers[i].cb_interest);
		slist->servers[i].cb_interest = NULL;
	}
}
+5 −2
Original line number Diff line number Diff line
@@ -161,8 +161,11 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,

	atomic_set(&cell->usage, 2);
	INIT_WORK(&cell->manager, afs_manage_cell);
	INIT_LIST_HEAD(&cell->proc_volumes);
	rwlock_init(&cell->proc_lock);
	cell->volumes = RB_ROOT;
	INIT_HLIST_HEAD(&cell->proc_volumes);
	seqlock_init(&cell->volume_lock);
	cell->fs_servers = RB_ROOT;
	seqlock_init(&cell->fs_lock);
	rwlock_init(&cell->vl_servers_lock);
	cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS);

+33 −12
Original line number Diff line number Diff line
@@ -702,6 +702,37 @@ static const struct afs_operation_ops afs_fetch_status_operation = {
	.success	= afs_do_lookup_success,
};

/*
 * See if we know that the server we expect to use doesn't support
 * FS.InlineBulkStatus.
 */
static bool afs_server_supports_ibulk(struct afs_vnode *dvnode)
{
	struct afs_server_list *slist;
	struct afs_volume *volume = dvnode->volume;
	struct afs_server *server;
	bool ret = true;
	int i;

	if (!test_bit(AFS_VOLUME_MAYBE_NO_IBULK, &volume->flags))
		return true;

	rcu_read_lock();
	slist = rcu_dereference(volume->servers);

	for (i = 0; i < slist->nr_servers; i++) {
		server = slist->servers[i].server;
		if (server == dvnode->cb_server) {
			if (test_bit(AFS_SERVER_FL_NO_IBULK, &server->flags))
				ret = false;
			break;
		}
	}

	rcu_read_unlock();
	return ret;
}

/*
 * Do a lookup in a directory.  We make use of bulk lookup to query a slew of
 * files in one go and create inodes for them.  The inode of the file we were
@@ -711,10 +742,8 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
				   struct key *key)
{
	struct afs_lookup_cookie *cookie;
	struct afs_cb_interest *dcbi;
	struct afs_vnode_param *vp;
	struct afs_operation *op;
	struct afs_server *server;
	struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
	struct inode *inode = NULL, *ti;
	afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
@@ -734,16 +763,8 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
	cookie->nr_fids = 2; /* slot 0 is saved for the fid we actually want
			      * and slot 1 for the directory */

	read_seqlock_excl(&dvnode->cb_lock);
	dcbi = rcu_dereference_protected(dvnode->cb_interest,
					 lockdep_is_held(&dvnode->cb_lock.lock));
	if (dcbi) {
		server = dcbi->server;
		if (server &&
		    test_bit(AFS_SERVER_FL_NO_IBULK, &server->flags))
	if (!afs_server_supports_ibulk(dvnode))
		cookie->one_only = true;
	}
	read_sequnlock_excl(&dvnode->cb_lock);

	/* search the directory */
	ret = afs_dir_iterate(dir, &cookie->ctx, key, &data_version);
+2 −9
Original line number Diff line number Diff line
@@ -143,12 +143,6 @@ bool afs_begin_vnode_operation(struct afs_operation *op)
		if (!afs_get_io_locks(op))
			return false;

	read_seqlock_excl(&vnode->cb_lock);
	op->cbi = afs_get_cb_interest(
		rcu_dereference_protected(vnode->cb_interest,
					  lockdep_is_held(&vnode->cb_lock.lock)));
	read_sequnlock_excl(&vnode->cb_lock);

	afs_prepare_vnode(op, &op->file[0], 0);
	afs_prepare_vnode(op, &op->file[1], 1);
	op->cb_v_break = op->volume->cb_v_break;
@@ -183,8 +177,8 @@ void afs_wait_for_operation(struct afs_operation *op)
	_enter("");

	while (afs_select_fileserver(op)) {
		op->cb_s_break = op->cbi->server->cb_s_break;
		if (test_bit(AFS_SERVER_FL_IS_YFS, &op->cbi->server->flags) &&
		op->cb_s_break = op->server->cb_s_break;
		if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags) &&
		    op->ops->issue_yfs_rpc)
			op->ops->issue_yfs_rpc(op);
		else
@@ -231,7 +225,6 @@ int afs_put_operation(struct afs_operation *op)
	}

	afs_end_cursor(&op->ac);
	afs_put_cb_interest(op->net, op->cbi);
	afs_put_serverlist(op->net, op->server_list);
	afs_put_volume(op->net, op->volume, afs_volume_trace_put_put_op);
	kfree(op);
+5 −2
Original line number Diff line number Diff line
@@ -1893,8 +1893,11 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
static void afs_done_fs_inline_bulk_status(struct afs_call *call)
{
	if (call->error == -ECONNABORTED &&
	    call->abort_code == RX_INVALID_OPERATION)
	    call->abort_code == RX_INVALID_OPERATION) {
		set_bit(AFS_SERVER_FL_NO_IBULK, &call->server->flags);
		if (call->op)
			set_bit(AFS_VOLUME_MAYBE_NO_IBULK, &call->op->volume->flags);
	}
}

/*
@@ -1919,7 +1922,7 @@ void afs_fs_inline_bulk_status(struct afs_operation *op)
	__be32 *bp;
	int i;

	if (test_bit(AFS_SERVER_FL_NO_IBULK, &op->cbi->server->flags)) {
	if (test_bit(AFS_SERVER_FL_NO_IBULK, &op->server->flags)) {
		op->error = -ENOTSUPP;
		return;
	}
Loading