Commit bbd2c8de authored by Jeff Layton's avatar Jeff Layton Committed by Jinjie Ruan
Browse files

fs: try an opportunistic lookup for O_CREAT opens too

mainline inclusion
from mainline-v6.12-rc1
commit e747e15156b79efeea0ad056df8de14b93d318c2
category: performance
bugzilla: https://gitee.com/src-openeuler/kernel/issues/IB1S01

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e747e15156b79efeea0ad056df8de14b93d318c2



--------------------------------

Today, when opening a file we'll typically do a fast lookup, but if
O_CREAT is set, the kernel always takes the exclusive inode lock. I
assume this was done with the expectation that O_CREAT means that we
always expect to do the create, but that's often not the case. Many
programs set O_CREAT even in scenarios where the file already exists.

This patch rearranges the pathwalk-for-open code to also attempt a
fast_lookup in certain O_CREAT cases. If a positive dentry is found, the
inode_lock can be avoided altogether, and if auditing isn't enabled, it
can stay in rcuwalk mode for the last step_into.

One notable exception that is hopefully temporary: if we're doing an
rcuwalk and auditing is enabled, skip the lookup_fast. Legitimizing the
dentry in that case is more expensive than taking the i_rwsem for now.

Signed-off-by: default avatarJeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/20240807-openfast-v3-1-040d132d2559@kernel.org


Reviewed-by: default avatarJan Kara <jack@suse.cz>
Reviewed-by: default avatarJosef Bacik <josef@toxicpanda.com>
Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
Conflicts:
	fs/namei.c
[Context conflict]
Signed-off-by: default avatarJinjie Ruan <ruanjinjie@huawei.com>
parent 18112d46
Loading
Loading
Loading
Loading
+64 −10
Original line number Diff line number Diff line
@@ -3503,6 +3503,49 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
	return ERR_PTR(error);
}

static inline bool trailing_slashes(struct nameidata *nd)
{
	return (bool)nd->last.name[nd->last.len];
}

static struct dentry *lookup_fast_for_open(struct nameidata *nd, int open_flag)
{
	struct dentry *dentry;

	if (open_flag & O_CREAT) {
		/* Don't bother on an O_EXCL create */
		if (open_flag & O_EXCL)
			return NULL;

		/*
		 * FIXME: If auditing is enabled, then we'll have to unlazy to
		 * use the dentry. For now, don't do this, since it shifts
		 * contention from parent's i_rwsem to its d_lockref spinlock.
		 * Reconsider this once dentry refcounting handles heavy
		 * contention better.
		 */
		if ((nd->flags & LOOKUP_RCU) && !audit_dummy_context())
			return NULL;
	}

	if (trailing_slashes(nd))
		nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;

	dentry = lookup_fast(nd);
	if (IS_ERR_OR_NULL(dentry))
		return dentry;

	if (open_flag & O_CREAT) {
		/* Discard negative dentries. Need inode_lock to do the create */
		if (!dentry->d_inode) {
			if (!(nd->flags & LOOKUP_RCU))
				dput(dentry);
			dentry = NULL;
		}
	}
	return dentry;
}

static const char *open_last_lookups(struct nameidata *nd,
		   struct file *file, const struct open_flags *op)
{
@@ -3520,28 +3563,39 @@ static const char *open_last_lookups(struct nameidata *nd,
		return handle_dots(nd, nd->last_type);
	}

	if (!(open_flag & O_CREAT)) {
		if (nd->last.name[nd->last.len])
			nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
		/* we _can_ be in RCU mode here */
		dentry = lookup_fast(nd);
	/* We _can_ be in RCU mode here */
	dentry = lookup_fast_for_open(nd, open_flag);
	if (IS_ERR(dentry))
		return ERR_CAST(dentry);

	if (!(open_flag & O_CREAT)) {
		if (likely(dentry))
			goto finish_lookup;

		BUG_ON(nd->flags & LOOKUP_RCU);
	} else {
		/* create side of things */
		if (nd->flags & LOOKUP_RCU) {
			if (!try_to_unlazy(nd))
			bool unlazied;

			/* can stay in rcuwalk if not auditing */
			if (dentry && audit_dummy_context()) {
				if (trailing_slashes(nd))
					return ERR_PTR(-EISDIR);
				goto finish_lookup;
			}
			unlazied = dentry ? try_to_unlazy_next(nd, dentry) :
					    try_to_unlazy(nd);
			if (!unlazied)
				return ERR_PTR(-ECHILD);
		}
		audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
		/* trailing slashes? */
		if (unlikely(nd->last.name[nd->last.len]))
		if (trailing_slashes(nd)) {
			dput(dentry);
			return ERR_PTR(-EISDIR);
		}
		if (dentry)
			goto finish_lookup;
	}

	if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
		got_write = !mnt_want_write(nd->path.mnt);