Commit eb7f20a8 authored by Zizhi Wo's avatar Zizhi Wo
Browse files

fscache: Add the synchronous waiting mechanism for the volume unhash in erofs ondemand mode

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/IB5UKT



--------------------------------

This patch adds the synchronous waiting mechanism for the volume unhash in
erofs ondemand mode. There are 2 main reasons:

1) Currently volume's unhash relies on its own reference count to be 0,
partly depending on the user state release fd. That is, if the user state
fails to turn off fd properly, then the volume cannot be unhashed. Next
time when mount the specified volume with the same name, the system will
waits for the release of the previous volume. However, the write semaphore
of the corresponding superblock is not released. When traversing the sb,
hung task will occur because the read semaphore cannot be obtained:

 [mount]					[sync]
vfs_get_super                                 ksys_sync
  sget_fc                                       iterate_supers
    alloc_super
      down_write_nested ----pin sb->s_umount
    list_add_tail(&s->s_list, &super_blocks)
  erofs_fc_fill_super                             super_lock
    ...                                             down_read ----hungtask
    fscache_hash_volume ----wait for volume

2) During the umount process, object generates the cache directory entry
(cachefiles_commit_tmpfile), but it is not synchronized. After umount is
complete, the user may see that the cache directory is not generated. When
inuse() is called in user mode, the cache directory cannot be found.

The solution:
1) For the erofs on-demand loading scenario, "n_hash_cookies" has been
introduced in "struct fscache_volume", increased whenever there is a child
cookie hashed and decreased if the cookie has unhashed. When it returns
zero, the volume is awakened with unhash. FSCACHE_CACHE_SYNC_VOLUME_UNHASH
flag is introduced in "struct fscache_cache", which is used to indicate
whether this feature is enabled.
2) cachefiles_free_volume() need to be called to ensure the next mount
successful, otherwise -ENODATA will be returned because cache_priv will not
be created in new volume and the object may not be initialized. To prevent
use-after-free issue caused by the kfree(volume)/kfree(cache), "ref" and
"lock" have been introduced in "struct cachefiles_volume".

There are three benefits to this:
1) The unhash of volume does not depend on whether the fd is handled
correctly by the userland. If fd is not closed after umount, it does not
matter if it continues to be used, because object->file is already NULL as
cachefiles_clean_up_object() is called before fscache_unhash_cookie().
2) The cache directory can be guaranteed to be generated before the umount
completes unless the process is interruped. This is because
cachefiles_commit_tmpfile() is called before fscache_unhash_cookie().
3) Before this patch, it is possible that after umount, calling inuse() on
a cache entry may still shows -EBUSY, because the order of umount and
cachefiles_put_directory() is indeterminate. Now thanks to volume->lock,
calling cull/inuse after umount is finished will not return an error.

Fixes: 62ab6335 ("fscache: Implement volume registration")
Signed-off-by: default avatarZizhi Wo <wozizhi@huawei.com>
parent bcbc151d
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -367,6 +367,7 @@ static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache)
				continue;
			}
			list_del_init(&volume->cache_link);
			cachefiles_get_volume(volume);
		}
		spin_unlock(&cache->object_list_lock);
		if (!volume)
@@ -374,6 +375,7 @@ static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache)

		cachefiles_withdraw_volume(volume);
		fscache_put_volume(vcookie, fscache_volume_put_withdraw);
		cachefiles_put_volume(volume);
	}

	_leave("");
+11 −1
Original line number Diff line number Diff line
@@ -22,7 +22,7 @@ static
struct cachefiles_object *cachefiles_alloc_object(struct fscache_cookie *cookie)
{
	struct fscache_volume *vcookie = cookie->volume;
	struct cachefiles_volume *volume = vcookie->cache_priv;
	struct cachefiles_volume *volume = READ_ONCE(vcookie->cache_priv);
	struct cachefiles_object *object;

	_enter("{%s},%x,", vcookie->key, cookie->debug_id);
@@ -38,6 +38,15 @@ struct cachefiles_object *cachefiles_alloc_object(struct fscache_cookie *cookie)

	refcount_set(&object->ref, 1);

	/*
	 * After enabling sync_volume_unhash, fscache_relinquish_volume() may
	 * release cachefiles_volume while fscache_volume->ref is non-zero.
	 * However, cachefiles_object may still access cachefiles_cache through
	 * object->volume->cache (e.g., in cachefiles_ondemand_fd_release).
	 * Therefore, we need to pin cachefiles_volume through the object.
	 */
	cachefiles_get_volume(volume);

	spin_lock_init(&object->lock);
	INIT_LIST_HEAD(&object->cache_link);
	object->volume = volume;
@@ -96,6 +105,7 @@ void cachefiles_put_object(struct cachefiles_object *object,
		cachefiles_ondemand_deinit_obj_info(object);
		cache = object->volume->cache->cache;
		fscache_put_cookie(object->cookie, fscache_cookie_put_object);
		cachefiles_put_volume(object->volume);
		object->cookie = NULL;
		kmem_cache_free(cachefiles_object_jar, object);
		fscache_uncount_object(cache);
+4 −0
Original line number Diff line number Diff line
@@ -37,9 +37,11 @@ enum cachefiles_content {
 * Cached volume representation.
 */
struct cachefiles_volume {
	refcount_t			ref;
	struct cachefiles_cache		*cache;
	struct list_head		cache_link;	/* Link in cache->volumes */
	struct fscache_volume		*vcookie;	/* The netfs's representation */
	struct mutex			lock;		/* Lock for cachefiles_volume */
	bool				dir_has_put;	/* Indicates cache dir has been put */
	struct dentry			*dentry;	/* The volume dentry */
	struct dentry			*fanout[256];	/* Fanout subdirs */
@@ -406,6 +408,8 @@ static inline void cachefiles_end_secure(struct cachefiles_cache *cache,
/*
 * volume.c
 */
void cachefiles_get_volume(struct cachefiles_volume *volume);
void cachefiles_put_volume(struct cachefiles_volume *volume);
void cachefiles_acquire_volume(struct fscache_volume *volume);
void cachefiles_free_volume(struct fscache_volume *volume);
void cachefiles_withdraw_volume(struct cachefiles_volume *volume);
+45 −6
Original line number Diff line number Diff line
@@ -10,6 +10,19 @@
#include "internal.h"
#include <trace/events/fscache.h>

void cachefiles_get_volume(struct cachefiles_volume *volume)
{
	refcount_inc(&volume->ref);
}

void cachefiles_put_volume(struct cachefiles_volume *volume)
{
	if (refcount_dec_and_test(&volume->ref)) {
		mutex_destroy(&volume->lock);
		kfree(volume);
	}
}

/*
 * Allocate and set up a volume representation.  We make sure all the fanout
 * directories are created and pinned.
@@ -33,6 +46,7 @@ void cachefiles_acquire_volume(struct fscache_volume *vcookie)
	volume->vcookie = vcookie;
	volume->cache = cache;
	INIT_LIST_HEAD(&volume->cache_link);
	mutex_init(&volume->lock);

	cachefiles_begin_secure(cache, &saved_cred);

@@ -77,7 +91,17 @@ void cachefiles_acquire_volume(struct fscache_volume *vcookie)

	cachefiles_end_secure(cache, saved_cred);

	vcookie->cache_priv = volume;
	/*
	 * The purpose of introducing volume->ref is twofold:
	 * 1) To allow cachefiles_object to pin cachefiles_volume.
	 * 2) To handle the concurrency between cachefiles_free_volume() and
	 * cachefiles_withdraw_volume() introduced by enabling sync_unhash
	 * volume, preventing the former from releasing cachefiles_volume and
	 * causing a use-after-free in the latter.
	 */
	refcount_set(&volume->ref, 1);
	/* Prevent writing vcookie->cache_priv before writing volume->ref. */
	smp_store_release(&vcookie->cache_priv, volume);
	n_accesses = atomic_inc_return(&vcookie->n_accesses); /* Stop wakeups on dec-to-0 */
	trace_fscache_access_volume(vcookie->debug_id, 0,
				    refcount_read(&vcookie->ref),
@@ -98,6 +122,7 @@ void cachefiles_acquire_volume(struct fscache_volume *vcookie)
error_name:
	kfree(name);
error_vol:
	mutex_destroy(&volume->lock);
	kfree(volume);
	cachefiles_end_secure(cache, saved_cred);
}
@@ -111,26 +136,40 @@ static void __cachefiles_free_volume(struct cachefiles_volume *volume)

	_enter("");

	if (volume->dir_has_put)
	mutex_lock(&volume->lock);
	if (volume->dir_has_put) {
		mutex_unlock(&volume->lock);
		return;
	}

	volume->dir_has_put = true;

	for (i = 0; i < 256; i++)
		cachefiles_put_directory(volume->fanout[i]);
	cachefiles_put_directory(volume->dentry);
	kfree(volume);
	mutex_unlock(&volume->lock);
}

void cachefiles_free_volume(struct fscache_volume *vcookie)
{
	struct cachefiles_volume *volume = vcookie->cache_priv;

	/*
	 * Prevents access to the cachefiles_cache that has been freed caused
	 * by the concurrency between cachefiles_free_volume() and
	 * cachefiles_daemon_release(), the later may kree(cache).
	 */
	mutex_lock(&volume->lock);
	if (!volume->dir_has_put) {
		spin_lock(&volume->cache->object_list_lock);
		list_del_init(&volume->cache_link);
		spin_unlock(&volume->cache->object_list_lock);
	}
	mutex_unlock(&volume->lock);

	vcookie->cache_priv = NULL;
	__cachefiles_free_volume(volume);
	cachefiles_put_volume(volume);
}

void cachefiles_withdraw_volume(struct cachefiles_volume *volume)
+4 −0
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
 */
#include <linux/pseudo_fs.h>
#include <linux/fscache.h>
#include <linux/fscache-cache.h>
#include "internal.h"

static DEFINE_MUTEX(erofs_domain_list_lock);
@@ -366,6 +367,9 @@ static int erofs_fscache_register_volume(struct super_block *sb)
		erofs_err(sb, "failed to register volume for %s", name);
		ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
		volume = NULL;
	} else {
		/* enable synchronous unhashing for the associated volumes */
		fscache_set_sync_volume_unhash(volume->cache);
	}

	sbi->volume = volume;
Loading