Commit df820f8d authored by Miklos Szeredi's avatar Miklos Szeredi
Browse files

ovl: make private mounts longterm



Overlayfs is using clone_private_mount() to create internal mounts for
underlying layers.  These are used for operations requiring a path, such as
dentry_open().

Since these private mounts are not in any namespace they are treated as
short term, "detached" mounts and mntput() involves taking the global
mount_lock, which can result in serious cacheline pingpong.

Make these private mounts longterm instead, which trade the penalty on
mntput() for a slightly longer shutdown time due to an added RCU grace
period when putting these mounts.

Introduce a new helper kern_unmount_many() that can take care of multiple
longterm mounts with a single RCU grace period.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: default avatarMiklos Szeredi <mszeredi@redhat.com>
parent b8e42a65
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -858,3 +858,10 @@ be misspelled d_alloc_anon().
[should've been added in 2016] stale comment in finish_open() nonwithstanding,
failure exits in ->atomic_open() instances should *NOT* fput() the file,
no matter what.  Everything is handled by the caller.

---

**mandatory**

clone_private_mount() returns a longterm mount now, so the proper destructor of
its result is kern_unmount() or kern_unmount_array().
+16 −0
Original line number Diff line number Diff line
@@ -1879,6 +1879,9 @@ struct vfsmount *clone_private_mount(const struct path *path)
	if (IS_ERR(new_mnt))
		return ERR_CAST(new_mnt);

	/* Longterm mount to be removed by kern_unmount*() */
	new_mnt->mnt_ns = MNT_NS_INTERNAL;

	return &new_mnt->mnt;
}
EXPORT_SYMBOL_GPL(clone_private_mount);
@@ -3804,6 +3807,19 @@ void kern_unmount(struct vfsmount *mnt)
}
EXPORT_SYMBOL(kern_unmount);

void kern_unmount_array(struct vfsmount *mnt[], unsigned int num)
{
	unsigned int i;

	for (i = 0; i < num; i++)
		if (mnt[i])
			real_mount(mnt[i])->mnt_ns = NULL;
	synchronize_rcu_expedited();
	for (i = 0; i < num; i++)
		mntput(mnt[i]);
}
EXPORT_SYMBOL(kern_unmount_array);

bool our_mnt(struct vfsmount *mnt)
{
	return check_mnt(real_mount(mnt));
+6 −1
Original line number Diff line number Diff line
@@ -211,6 +211,7 @@ static void ovl_destroy_inode(struct inode *inode)

static void ovl_free_fs(struct ovl_fs *ofs)
{
	struct vfsmount **mounts;
	unsigned i;

	iput(ofs->workbasedir_trap);
@@ -224,10 +225,14 @@ static void ovl_free_fs(struct ovl_fs *ofs)
	dput(ofs->workbasedir);
	if (ofs->upperdir_locked)
		ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);

	/* Hack!  Reuse ofs->layers as a vfsmount array before freeing it */
	mounts = (struct vfsmount **) ofs->layers;
	for (i = 0; i < ofs->numlayer; i++) {
		iput(ofs->layers[i].trap);
		mntput(ofs->layers[i].mnt);
		mounts[i] = ofs->layers[i].mnt;
	}
	kern_unmount_array(mounts, ofs->numlayer);
	kfree(ofs->layers);
	for (i = 0; i < ofs->numfs; i++)
		free_anon_bdev(ofs->fs[i].pseudo_dev);
+2 −0
Original line number Diff line number Diff line
@@ -109,4 +109,6 @@ extern unsigned int sysctl_mount_max;

extern bool path_is_mountpoint(const struct path *path);

extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);

#endif /* _LINUX_MOUNT_H */