Commit f642404a authored by David Howells's avatar David Howells
Browse files

afs: Make vnode->cb_interest RCU safe



Use RCU-based freeing for afs_cb_interest struct objects and use RCU on
vnode->cb_interest.  Use that change to allow afs_check_validity() to use
read_seqbegin_or_lock() instead of read_seqlock_excl().

This also requires the caller of afs_check_validity() to hold the RCU read
lock across the call.

Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
parent c925bd0a
Loading
Loading
Loading
Loading
+12 −9
Original line number Diff line number Diff line
@@ -94,15 +94,15 @@ int afs_register_server_cb_interest(struct afs_vnode *vnode,
	struct afs_server *server = entry->server;

again:
	if (vnode->cb_interest &&
	    likely(vnode->cb_interest == entry->cb_interest))
	vcbi = rcu_dereference_protected(vnode->cb_interest,
					 lockdep_is_held(&vnode->io_lock));
	if (vcbi && likely(vcbi == entry->cb_interest))
		return 0;

	read_lock(&slist->lock);
	cbi = afs_get_cb_interest(entry->cb_interest);
	read_unlock(&slist->lock);

	vcbi = vnode->cb_interest;
	if (vcbi) {
		if (vcbi == cbi) {
			afs_put_cb_interest(afs_v2net(vnode), cbi);
@@ -114,8 +114,9 @@ int afs_register_server_cb_interest(struct afs_vnode *vnode,
		 */
		if (cbi && vcbi->server == cbi->server) {
			write_seqlock(&vnode->cb_lock);
			old = vnode->cb_interest;
			vnode->cb_interest = cbi;
			old = rcu_dereference_protected(vnode->cb_interest,
							lockdep_is_held(&vnode->cb_lock.lock));
			rcu_assign_pointer(vnode->cb_interest, cbi);
			write_sequnlock(&vnode->cb_lock);
			afs_put_cb_interest(afs_v2net(vnode), old);
			return 0;
@@ -160,8 +161,9 @@ int afs_register_server_cb_interest(struct afs_vnode *vnode,
	 */
	write_seqlock(&vnode->cb_lock);

	old = vnode->cb_interest;
	vnode->cb_interest = cbi;
	old = rcu_dereference_protected(vnode->cb_interest,
					lockdep_is_held(&vnode->cb_lock.lock));
	rcu_assign_pointer(vnode->cb_interest, cbi);
	vnode->cb_s_break = cbi->server->cb_s_break;
	vnode->cb_v_break = vnode->volume->cb_v_break;
	clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
@@ -191,10 +193,11 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
				vi = NULL;

			write_unlock(&cbi->server->cb_break_lock);
			kfree(vi);
			if (vi)
				kfree_rcu(vi, rcu);
			afs_put_server(net, cbi->server);
		}
		kfree(cbi);
		kfree_rcu(cbi, rcu);
	}
}

+10 −5
Original line number Diff line number Diff line
@@ -638,11 +638,12 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
				   struct key *key)
{
	struct afs_lookup_cookie *cookie;
	struct afs_cb_interest *cbi = NULL;
	struct afs_cb_interest *dcbi, *cbi = NULL;
	struct afs_super_info *as = dir->i_sb->s_fs_info;
	struct afs_status_cb *scb;
	struct afs_iget_data data;
	struct afs_fs_cursor fc;
	struct afs_server *server;
	struct afs_vnode *dvnode = AFS_FS_I(dir);
	struct inode *inode = NULL;
	int ret, i;
@@ -658,10 +659,14 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
	cookie->nr_fids = 1; /* slot 0 is saved for the fid we actually want */

	read_seqlock_excl(&dvnode->cb_lock);
	if (dvnode->cb_interest &&
	    dvnode->cb_interest->server &&
	    test_bit(AFS_SERVER_FL_NO_IBULK, &dvnode->cb_interest->server->flags))
	dcbi = rcu_dereference_protected(dvnode->cb_interest,
					 lockdep_is_held(&dvnode->cb_lock.lock));
	if (dcbi) {
		server = dcbi->server;
		if (server &&
		    test_bit(AFS_SERVER_FL_NO_IBULK, &server->flags))
			cookie->one_only = true;
	}
	read_sequnlock_excl(&dvnode->cb_lock);

	for (i = 0; i < 50; i++)
+50 −31
Original line number Diff line number Diff line
@@ -139,9 +139,10 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key,
		vnode->cb_expires_at = ktime_get_real_seconds();
	} else {
		vnode->cb_expires_at = scb->callback.expires_at;
		old_cbi = vnode->cb_interest;
		old_cbi = rcu_dereference_protected(vnode->cb_interest,
						    lockdep_is_held(&vnode->cb_lock.lock));
		if (cbi != old_cbi)
			vnode->cb_interest = afs_get_cb_interest(cbi);
			rcu_assign_pointer(vnode->cb_interest, afs_get_cb_interest(cbi));
		else
			old_cbi = NULL;
		set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
@@ -245,9 +246,10 @@ static void afs_apply_callback(struct afs_fs_cursor *fc,

	if (!afs_cb_is_broken(cb_break, vnode, fc->cbi)) {
		vnode->cb_expires_at	= cb->expires_at;
		old = vnode->cb_interest;
		old = rcu_dereference_protected(vnode->cb_interest,
						lockdep_is_held(&vnode->cb_lock.lock));
		if (old != fc->cbi) {
			vnode->cb_interest = afs_get_cb_interest(fc->cbi);
			rcu_assign_pointer(vnode->cb_interest, afs_get_cb_interest(fc->cbi));
			afs_put_cb_interest(afs_v2net(vnode), old);
		}
		set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
@@ -565,20 +567,28 @@ void afs_zap_data(struct afs_vnode *vnode)
 */
bool afs_check_validity(struct afs_vnode *vnode)
{
	struct afs_cb_interest *cbi;
	struct afs_server *server;
	struct afs_volume *volume = vnode->volume;
	time64_t now = ktime_get_real_seconds();
	bool valid;
	unsigned int cb_break, cb_s_break, cb_v_break;
	int seq = 0;

	/* Quickly check the callback state.  Ideally, we'd use read_seqbegin
	 * here, but we have no way to pass the net namespace to the RCU
	 * cleanup for the server record.
	 */
	read_seqlock_excl(&vnode->cb_lock);
	do {
		read_seqbegin_or_lock(&vnode->cb_lock, &seq);
		cb_v_break = READ_ONCE(volume->cb_v_break);
		cb_break = vnode->cb_break;

		if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
		if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break ||
		    vnode->cb_v_break != vnode->volume->cb_v_break) {
			vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
			vnode->cb_v_break = vnode->volume->cb_v_break;
			cbi = rcu_dereference(vnode->cb_interest);
			server = rcu_dereference(cbi->server);
			cb_s_break = READ_ONCE(server->cb_s_break);

			if (vnode->cb_s_break != cb_s_break ||
			    vnode->cb_v_break != cb_v_break) {
				vnode->cb_s_break = cb_s_break;
				vnode->cb_v_break = cb_v_break;
				valid = false;
			} else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
				valid = false;
@@ -590,11 +600,13 @@ bool afs_check_validity(struct afs_vnode *vnode)
		} else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
			valid = true;
		} else {
		vnode->cb_v_break = vnode->volume->cb_v_break;
			vnode->cb_v_break = cb_v_break;
			valid = false;
		}

	read_sequnlock_excl(&vnode->cb_lock);
	} while (need_seqretry(&vnode->cb_lock, seq));

	done_seqretry(&vnode->cb_lock, seq);
	return valid;
}

@@ -616,7 +628,9 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
	       vnode->fid.vid, vnode->fid.vnode, vnode->flags,
	       key_serial(key));

	rcu_read_lock();
	valid = afs_check_validity(vnode);
	rcu_read_unlock();

	if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
		clear_nlink(&vnode->vfs_inode);
@@ -703,6 +717,7 @@ int afs_drop_inode(struct inode *inode)
 */
void afs_evict_inode(struct inode *inode)
{
	struct afs_cb_interest *cbi;
	struct afs_vnode *vnode;

	vnode = AFS_FS_I(inode);
@@ -719,10 +734,14 @@ void afs_evict_inode(struct inode *inode)
	truncate_inode_pages_final(&inode->i_data);
	clear_inode(inode);

	if (vnode->cb_interest) {
		afs_put_cb_interest(afs_i2net(inode), vnode->cb_interest);
		vnode->cb_interest = NULL;
	write_seqlock(&vnode->cb_lock);
	cbi = rcu_dereference_protected(vnode->cb_interest,
					lockdep_is_held(&vnode->cb_lock.lock));
	if (cbi) {
		afs_put_cb_interest(afs_i2net(inode), cbi);
		rcu_assign_pointer(vnode->cb_interest, NULL);
	}
	write_sequnlock(&vnode->cb_lock);

	while (!list_empty(&vnode->wb_keys)) {
		struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next,
+9 −3
Original line number Diff line number Diff line
@@ -554,7 +554,10 @@ struct afs_server {
struct afs_vol_interest {
	struct hlist_node	srv_link;	/* Link in server->cb_volumes */
	struct hlist_head	cb_interests;	/* List of callback interests on the server */
	union {
		struct rcu_head	rcu;
		afs_volid_t	vid;		/* Volume ID to match */
	};
	unsigned int		usage;
};

@@ -566,7 +569,10 @@ struct afs_cb_interest {
	struct afs_vol_interest	*vol_interest;
	struct afs_server	*server;	/* Server on which this interest resides */
	struct super_block	*sb;		/* Superblock on which inodes reside */
	union {
		struct rcu_head	rcu;
		afs_volid_t	vid;		/* Volume ID to match */
	};
	refcount_t		usage;
};

@@ -676,7 +682,7 @@ struct afs_vnode {
	afs_lock_type_t		lock_type : 8;

	/* outstanding callback notification on this file */
	struct afs_cb_interest	*cb_interest;	/* Server on which this resides */
	struct afs_cb_interest __rcu *cb_interest; /* Server on which this resides */
	unsigned int		cb_s_break;	/* Mass break counter on ->server */
	unsigned int		cb_v_break;	/* Mass break counter on ->volume */
	unsigned int		cb_break;	/* Break counter on vnode */
+12 −6
Original line number Diff line number Diff line
@@ -66,7 +66,8 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
	fc->untried = (1UL << fc->server_list->nr_servers) - 1;
	fc->index = READ_ONCE(fc->server_list->preferred);

	cbi = vnode->cb_interest;
	cbi = rcu_dereference_protected(vnode->cb_interest,
					lockdep_is_held(&vnode->io_lock));
	if (cbi) {
		/* See if the vnode's preferred record is still available */
		for (i = 0; i < fc->server_list->nr_servers; i++) {
@@ -87,8 +88,8 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,

		/* Note that the callback promise is effectively broken */
		write_seqlock(&vnode->cb_lock);
		ASSERTCMP(cbi, ==, vnode->cb_interest);
		vnode->cb_interest = NULL;
		ASSERTCMP(cbi, ==, rcu_access_pointer(vnode->cb_interest));
		rcu_assign_pointer(vnode->cb_interest, NULL);
		if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
			vnode->cb_break++;
		write_sequnlock(&vnode->cb_lock);
@@ -417,7 +418,9 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
	if (error < 0)
		goto failed_set_error;

	fc->cbi = afs_get_cb_interest(vnode->cb_interest);
	fc->cbi = afs_get_cb_interest(
		rcu_dereference_protected(vnode->cb_interest,
					  lockdep_is_held(&vnode->io_lock)));

	read_lock(&server->fs_lock);
	alist = rcu_dereference_protected(server->addresses,
@@ -487,12 +490,15 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
{
	struct afs_vnode *vnode = fc->vnode;
	struct afs_cb_interest *cbi = vnode->cb_interest;
	struct afs_cb_interest *cbi;
	struct afs_addr_list *alist;
	int error = fc->ac.error;

	_enter("");

	cbi = rcu_dereference_protected(vnode->cb_interest,
					lockdep_is_held(&vnode->io_lock));

	switch (error) {
	case SHRT_MAX:
		if (!cbi) {
@@ -501,7 +507,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
			return false;
		}

		fc->cbi = afs_get_cb_interest(vnode->cb_interest);
		fc->cbi = afs_get_cb_interest(cbi);

		read_lock(&cbi->server->fs_lock);
		alist = rcu_dereference_protected(cbi->server->addresses,
Loading