Commit 2c0042e8 authored by Amir Goldstein's avatar Amir Goldstein Committed by Baokun Li
Browse files

fsnotify: clear PARENT_WATCHED flags lazily

stable inclusion
from stable-v5.10.226
commit 3f3ef1d9f66b93913ce2171120d9226b55acd41d
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAVU3A
CVE: CVE-2024-47660

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=3f3ef1d9f66b93913ce2171120d9226b55acd41d



--------------------------------

[ Upstream commit 172e422ffea20a89bfdc672741c1aad6fbb5044e ]

In some setups directories can have many (usually negative) dentries.
Hence __fsnotify_update_child_dentry_flags() function can take a
significant amount of time. Since the bulk of this function happens
under inode->i_lock this causes a significant contention on the lock
when we remove the watch from the directory as the
__fsnotify_update_child_dentry_flags() call from fsnotify_recalc_mask()
races with __fsnotify_update_child_dentry_flags() calls from
__fsnotify_parent() happening on children. This can lead upto softlockup
reports reported by users.

Fix the problem by calling fsnotify_update_children_dentry_flags() to
set PARENT_WATCHED flags only when parent starts watching children.

When parent stops watching children, clear false positive PARENT_WATCHED
flags lazily in __fsnotify_parent() for each accessed child.

Suggested-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarAmir Goldstein <amir73il@gmail.com>
Signed-off-by: default avatarStephen Brennan <stephen.s.brennan@oracle.com>
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>

Conflicts:
	fs/notify/mark.c
[Context differences because there is no commit c3638b5b
 ("fsnotify: allow adding an inode mark without pinning inode").]
Signed-off-by: default avatarBaokun Li <libaokun1@huawei.com>
parent 3415fa79
Loading
Loading
Loading
Loading
+21 −10
Original line number Diff line number Diff line
@@ -105,17 +105,13 @@ void fsnotify_sb_delete(struct super_block *sb)
 * parent cares.  Thus when an event happens on a child it can quickly tell
 * if there is a need to find a parent and send the event to the parent.
 */
void __fsnotify_update_child_dentry_flags(struct inode *inode)
void fsnotify_set_children_dentry_flags(struct inode *inode)
{
	struct dentry *alias;
	int watched;

	if (!S_ISDIR(inode->i_mode))
		return;

	/* determine if the children should tell inode about their events */
	watched = fsnotify_inode_watches_children(inode);

	spin_lock(&inode->i_lock);
	/* run all of the dentries associated with this inode.  Since this is a
	 * directory, there damn well better only be one item on this list */
@@ -131,10 +127,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
				continue;

			spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
			if (watched)
			child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
			else
				child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
			spin_unlock(&child->d_lock);
		}
		spin_unlock(&alias->d_lock);
@@ -142,6 +135,24 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
	spin_unlock(&inode->i_lock);
}

/*
 * Lazily clear false positive PARENT_WATCHED flag for child whose parent had
 * stopped watching children.
 */
static void fsnotify_clear_child_dentry_flag(struct inode *pinode,
					     struct dentry *dentry)
{
	spin_lock(&dentry->d_lock);
	/*
	 * d_lock is a sufficient barrier to prevent observing a non-watched
	 * parent state from before the fsnotify_set_children_dentry_flags()
	 * or fsnotify_update_flags() call that had set PARENT_WATCHED.
	 */
	if (!fsnotify_inode_watches_children(pinode))
		dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
	spin_unlock(&dentry->d_lock);
}

/* Are inode/sb/mount interested in parent and name info with this event? */
static bool fsnotify_event_needs_parent(struct inode *inode, struct mount *mnt,
					__u32 mask)
@@ -210,7 +221,7 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
	p_inode = parent->d_inode;
	p_mask = fsnotify_inode_watches_children(p_inode);
	if (unlikely(parent_watched && !p_mask))
		__fsnotify_update_child_dentry_flags(p_inode);
		fsnotify_clear_child_dentry_flag(p_inode, dentry);

	/*
	 * Include parent/name in notification either if some notification
+1 −1
Original line number Diff line number Diff line
@@ -59,7 +59,7 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
 * update the dentry->d_flags of all of inode's children to indicate if inode cares
 * about events that happen to its children.
 */
extern void __fsnotify_update_child_dentry_flags(struct inode *inode);
extern void fsnotify_set_children_dentry_flags(struct inode *inode);

extern struct kmem_cache *fsnotify_mark_connector_cachep;

+29 −3
Original line number Diff line number Diff line
@@ -132,6 +132,24 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
	*fsnotify_conn_mask_p(conn) = new_mask;
}

static bool fsnotify_conn_watches_children(
					struct fsnotify_mark_connector *conn)
{
	if (conn->type != FSNOTIFY_OBJ_TYPE_INODE)
		return false;

	return fsnotify_inode_watches_children(fsnotify_conn_inode(conn));
}

static void fsnotify_conn_set_children_dentry_flags(
					struct fsnotify_mark_connector *conn)
{
	if (conn->type != FSNOTIFY_OBJ_TYPE_INODE)
		return;

	fsnotify_set_children_dentry_flags(fsnotify_conn_inode(conn));
}

/*
 * Calculate mask of events for a list of marks. The caller must make sure
 * connector and connector->obj cannot disappear under us.  Callers achieve
@@ -140,15 +158,23 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 */
void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
{
	bool update_children;

	if (!conn)
		return;

	spin_lock(&conn->lock);
	update_children = !fsnotify_conn_watches_children(conn);
	__fsnotify_recalc_mask(conn);
	update_children &= fsnotify_conn_watches_children(conn);
	spin_unlock(&conn->lock);
	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
		__fsnotify_update_child_dentry_flags(
					fsnotify_conn_inode(conn));
	/*
	 * Set children's PARENT_WATCHED flags only if parent started watching.
	 * When parent stops watching, we clear false positive PARENT_WATCHED
	 * flags lazily in __fsnotify_parent().
	 */
	if (update_children)
		fsnotify_conn_set_children_dentry_flags(conn);
}

/* Free all connectors queued for freeing once SRCU period ends */
+5 −3
Original line number Diff line number Diff line
@@ -440,12 +440,14 @@ static inline __u32 fsnotify_parent_needed_mask(__u32 mask)

static inline int fsnotify_inode_watches_children(struct inode *inode)
{
	__u32 parent_mask = READ_ONCE(inode->i_fsnotify_mask);

	/* FS_EVENT_ON_CHILD is set if the inode may care */
	if (!(inode->i_fsnotify_mask & FS_EVENT_ON_CHILD))
	if (!(parent_mask & FS_EVENT_ON_CHILD))
		return 0;
	/* this inode might care about child events, does it care about the
	 * specific set of events that can happen on a child? */
	return inode->i_fsnotify_mask & FS_EVENTS_POSS_ON_CHILD;
	return parent_mask & FS_EVENTS_POSS_ON_CHILD;
}

/*
@@ -459,7 +461,7 @@ static inline void fsnotify_update_flags(struct dentry *dentry)
	/*
	 * Serialisation of setting PARENT_WATCHED on the dentries is provided
	 * by d_lock. If inotify_inode_watched changes after we have taken
	 * d_lock, the following __fsnotify_update_child_dentry_flags call will
	 * d_lock, the following fsnotify_set_children_dentry_flags call will
	 * find our entry, so it will spin until we complete here, and update
	 * us with the new state.
	 */