Commit bdb2fd7f authored by Tejun Heo's avatar Tejun Heo Committed by Greg Kroah-Hartman
Browse files

kernfs: Skip kernfs_drain_open_files() more aggressively



Track the number of mmapped files and files that need to be released and
skip kernfs_drain_open_file() if both are zero, which are the precise
conditions which require draining open_files. The early exit test is
factored into kernfs_should_drain_open_files() which is now tested by
kernfs_drain_open_files()'s caller - kernfs_drain().

This isn't a meaningful optimization on its own but will enable future
stand-alone kernfs_deactivate() implementation.

v2: Chengming noticed that on->nr_to_release was leaking after ->open()
    failure. Fix it by telling kernfs_unlink_open_file() that it's called
    from the ->open() fail path and should dec the counter. Use kzalloc() to
    allocate kernfs_open_node so that the tracking fields are correctly
    initialized.

Cc: Chengming Zhou <zhouchengming@bytedance.com>
Tested-by: default avatarChengming Zhou <zhouchengming@bytedance.com>
Reviewed-by: default avatarChengming Zhou <zhouchengming@bytedance.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20220828050440.734579-5-tj@kernel.org


Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent cf2dc9db
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -489,6 +489,7 @@ static void kernfs_drain(struct kernfs_node *kn)
		rwsem_release(&kn->dep_map, _RET_IP_);
	}

	if (kernfs_should_drain_open_files(kn))
		kernfs_drain_open_files(kn);

	down_write(&root->kernfs_rwsem);
+45 −20
Original line number Diff line number Diff line
@@ -23,6 +23,8 @@ struct kernfs_open_node {
	atomic_t		event;
	wait_queue_head_t	poll;
	struct list_head	files; /* goes through kernfs_open_file.list */
	unsigned int		nr_mmapped;
	unsigned int		nr_to_release;
};

/*
@@ -527,6 +529,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)

	rc = 0;
	of->mmapped = true;
	of_on(of)->nr_mmapped++;
	of->vm_ops = vma->vm_ops;
	vma->vm_ops = &kernfs_vm_ops;
out_put:
@@ -562,7 +565,7 @@ static int kernfs_get_open_node(struct kernfs_node *kn,

	if (!on) {
		/* not there, initialize a new one */
		on = kmalloc(sizeof(*on), GFP_KERNEL);
		on = kzalloc(sizeof(*on), GFP_KERNEL);
		if (!on) {
			mutex_unlock(mutex);
			return -ENOMEM;
@@ -574,6 +577,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
	}

	list_add_tail(&of->list, &on->files);
	if (kn->flags & KERNFS_HAS_RELEASE)
		on->nr_to_release++;

	mutex_unlock(mutex);
	return 0;
@@ -584,6 +589,7 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
 *
 *	@kn: target kernfs_node
 *	@of: associated kernfs_open_file
 *	@open_failed: ->open() failed, cancel ->release()
 *
 *	Unlink @of from list of @kn's associated open files. If list of
 *	associated open files becomes empty, disassociate and free
@@ -593,7 +599,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
 *	None.
 */
static void kernfs_unlink_open_file(struct kernfs_node *kn,
				 struct kernfs_open_file *of)
				    struct kernfs_open_file *of,
				    bool open_failed)
{
	struct kernfs_open_node *on;
	struct mutex *mutex;
@@ -606,8 +613,16 @@ static void kernfs_unlink_open_file(struct kernfs_node *kn,
		return;
	}

	if (of)
	if (of) {
		if (kn->flags & KERNFS_HAS_RELEASE) {
			WARN_ON_ONCE(of->released == open_failed);
			if (open_failed)
				on->nr_to_release--;
		}
		if (of->mmapped)
			on->nr_mmapped--;
		list_del(&of->list);
	}

	if (list_empty(&on->files)) {
		rcu_assign_pointer(kn->attr.open, NULL);
@@ -734,7 +749,7 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
	return 0;

err_put_node:
	kernfs_unlink_open_file(kn, of);
	kernfs_unlink_open_file(kn, of, true);
err_seq_release:
	seq_release(inode, file);
err_free:
@@ -766,6 +781,7 @@ static void kernfs_release_file(struct kernfs_node *kn,
		 */
		kn->attr.ops->release(of);
		of->released = true;
		of_on(of)->nr_to_release--;
	}
}

@@ -782,7 +798,7 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
		mutex_unlock(mutex);
	}

	kernfs_unlink_open_file(kn, of);
	kernfs_unlink_open_file(kn, of, false);
	seq_release(inode, filp);
	kfree(of->prealloc_buf);
	kfree(of);
@@ -790,25 +806,30 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
	return 0;
}

void kernfs_drain_open_files(struct kernfs_node *kn)
bool kernfs_should_drain_open_files(struct kernfs_node *kn)
{
	struct kernfs_open_node *on;
	struct kernfs_open_file *of;
	struct mutex *mutex;

	if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
		return;
	bool ret;

	/*
	 * lockless opportunistic check is safe below because no one is adding to
	 * ->attr.open at this point of time. This check allows early bail out
	 * if ->attr.open is already NULL. kernfs_unlink_open_file makes
	 * ->attr.open NULL only while holding kernfs_open_file_mutex so below
	 * check under kernfs_open_file_mutex_ptr(kn) will ensure bailing out if
	 * ->attr.open became NULL while waiting for the mutex.
	 * @kn being deactivated guarantees that @kn->attr.open can't change
	 * beneath us making the lockless test below safe.
	 */
	if (!rcu_access_pointer(kn->attr.open))
		return;
	WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);

	rcu_read_lock();
	on = rcu_dereference(kn->attr.open);
	ret = on && (on->nr_mmapped || on->nr_to_release);
	rcu_read_unlock();

	return ret;
}

void kernfs_drain_open_files(struct kernfs_node *kn)
{
	struct kernfs_open_node *on;
	struct kernfs_open_file *of;
	struct mutex *mutex;

	mutex = kernfs_open_file_mutex_lock(kn);
	on = kernfs_deref_open_node_locked(kn);
@@ -820,13 +841,17 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
	list_for_each_entry(of, &on->files, list) {
		struct inode *inode = file_inode(of->file);

		if (kn->flags & KERNFS_HAS_MMAP)
		if (of->mmapped) {
			unmap_mapping_range(inode->i_mapping, 0, 0, 1);
			of->mmapped = false;
			on->nr_mmapped--;
		}

		if (kn->flags & KERNFS_HAS_RELEASE)
			kernfs_release_file(kn, of);
	}

	WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release);
	mutex_unlock(mutex);
}

+1 −0
Original line number Diff line number Diff line
@@ -157,6 +157,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
 */
extern const struct file_operations kernfs_file_fops;

bool kernfs_should_drain_open_files(struct kernfs_node *kn);
void kernfs_drain_open_files(struct kernfs_node *kn);

/*