Merge tag 'ovl-update-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs (d652502e) · Commits · EulixOS / Software / Kernel

Documentation/filesystems/overlayfs.rst

+12 −14

Original line number	Diff line number	Diff line
		@@ -40,17 +40,17 @@ On 64bit systems, even if all overlay layers are not on the same
		underlying filesystem, the same compliant behavior could be achieved
		with the "xino" feature. The "xino" feature composes a unique object
		identifier from the real object st_ino and an underlying fsid index.

		If all underlying filesystems support NFS file handles and export file
		handles with 32bit inode number encoding (e.g. ext4), overlay filesystem
		will use the high inode number bits for fsid. Even when the underlying
		filesystem uses 64bit inode numbers, users can still enable the "xino"
		feature with the "-o xino=on" overlay mount option. That is useful for the
		case of underlying filesystems like xfs and tmpfs, which use 64bit inode
		numbers, but are very unlikely to use the high inode number bits. In case
		The "xino" feature uses the high inode number bits for fsid, because the
		underlying filesystems rarely use the high inode number bits. In case
		the underlying inode number does overflow into the high xino bits, overlay
		filesystem will fall back to the non xino behavior for that inode.

		The "xino" feature can be enabled with the "-o xino=on" overlay mount option.
		If all underlying filesystems support NFS file handles, the value of st_ino
		for overlay filesystem objects is not only unique, but also persistent over
		the lifetime of the filesystem. The "-o xino=auto" overlay mount option
		enables the "xino" feature only if the persistent st_ino requirement is met.

		The following table summarizes what can be expected in different overlay
		configurations.

		@@ -66,14 +66,13 @@ Inode properties
		\| All layers \| Y \| Y \| Y \| Y \| Y \| Y \| Y \| Y \|
		\| on same fs \| \| \| \| \| \| \| \| \|
		+--------------+-----+------+-----+------+--------+--------+--------+-------+
		\| Layers not \| N \| Y \| Y \| N \| N \| Y \| N \| Y \|
		\| Layers not \| N \| N \| Y \| N \| N \| Y \| N \| Y \|
		\| on same fs, \| \| \| \| \| \| \| \| \|
		\| xino=off \| \| \| \| \| \| \| \| \|
		+--------------+-----+------+-----+------+--------+--------+--------+-------+
		\| xino=on/auto \| Y \| Y \| Y \| Y \| Y \| Y \| Y \| Y \|
		\| \| \| \| \| \| \| \| \| \|
		+--------------+-----+------+-----+------+--------+--------+--------+-------+
		\| xino=on/auto,\| N \| Y \| Y \| N \| N \| Y \| N \| Y \|
		\| xino=on/auto,\| N \| N \| Y \| N \| N \| Y \| N \| Y \|
		\| ino overflow \| \| \| \| \| \| \| \| \|
		+--------------+-----+------+-----+------+--------+--------+--------+-------+

		@@ -81,7 +80,6 @@ Inode properties
		/proc files, such as /proc/locks and /proc/self/fdinfo/<fd> of an inotify
		file descriptor.


		Upper and Lower
		---------------

		@@ -461,7 +459,7 @@ enough free bits in the inode number, then overlayfs will not be able to
		guarantee that the values of st_ino and st_dev returned by stat(2) and the
		value of d_ino returned by readdir(3) will act like on a normal filesystem.
		E.g. the value of st_dev may be different for two objects in the same
		overlay filesystem and the value of st_ino for directory objects may not be
		overlay filesystem and the value of st_ino for filesystem objects may not be
		persistent and could change even while the overlay filesystem is mounted, as
		summarized in the `Inode properties`_ table above.

		@@ -476,7 +474,7 @@ a crash or deadlock.

		Offline changes, when the overlay is not mounted, are allowed to the
		upper tree. Offline changes to the lower tree are only allowed if the
		"metadata only copy up", "inode index", and "redirect_dir" features
		"metadata only copy up", "inode index", "xino" and "redirect_dir" features
		have not been used. If the lower tree is modified and any of these
		features has been used, the behavior of the overlay is undefined,
		though it will not result in a crash or deadlock.

fs/overlayfs/copy_up.c

+2 −1

Original line number	Diff line number	Diff line
		@@ -932,7 +932,7 @@ static int ovl_copy_up_one(struct dentry parent, struct dentry dentry,
		static int ovl_copy_up_flags(struct dentry *dentry, int flags)
		{
		int err = 0;
		const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
		const struct cred *old_cred;
		bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);

		/*
		@@ -943,6 +943,7 @@ static int ovl_copy_up_flags(struct dentry *dentry, int flags)
		if (WARN_ON(disconnected && d_is_dir(dentry)))
		return -EIO;

		old_cred = ovl_override_creds(dentry->d_sb);
		while (!err) {
		struct dentry *next;
		struct dentry *parent = NULL;

fs/overlayfs/file.c

+21 −0

Original line number	Diff line number	Diff line
		@@ -571,6 +571,26 @@ static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
		remap_flags, op);
		}

		static int ovl_flush(struct file *file, fl_owner_t id)
		{
		struct fd real;
		const struct cred *old_cred;
		int err;

		err = ovl_real_fdget(file, &real);
		if (err)
		return err;

		if (real.file->f_op->flush) {
		old_cred = ovl_override_creds(file_inode(file)->i_sb);
		err = real.file->f_op->flush(real.file, id);
		revert_creds(old_cred);
		}
		fdput(real);

		return err;
		}

		const struct file_operations ovl_file_operations = {
		.open = ovl_open,
		.release = ovl_release,
		@@ -581,6 +601,7 @@ const struct file_operations ovl_file_operations = {
		.mmap = ovl_mmap,
		.fallocate = ovl_fallocate,
		.fadvise = ovl_fadvise,
		.flush = ovl_flush,
		.splice_read = generic_file_splice_read,
		.splice_write = iter_file_splice_write,

fs/overlayfs/inode.c

+7 −11

Original line number	Diff line number	Diff line
		@@ -97,7 +97,7 @@ int ovl_setattr(struct user_namespace mnt_userns, struct dentry dentry,
		return err;
		}

		static int ovl_map_dev_ino(struct dentry dentry, struct kstat stat, int fsid)
		static void ovl_map_dev_ino(struct dentry dentry, struct kstat stat, int fsid)
		{
		bool samefs = ovl_same_fs(dentry->d_sb);
		unsigned int xinobits = ovl_xino_bits(dentry->d_sb);
		@@ -110,21 +110,21 @@ static int ovl_map_dev_ino(struct dentry dentry, struct kstat stat, int fsid)
		* which is friendly to du -x.
		*/
		stat->dev = dentry->d_sb->s_dev;
		return 0;
		return;
		} else if (xinobits) {
		/*
		* All inode numbers of underlying fs should not be using the
		* high xinobits, so we use high xinobits to partition the
		* overlay st_ino address space. The high bits holds the fsid
		* (upper fsid is 0). The lowest xinobit is reserved for mapping
		* the non-peresistent inode numbers range in case of overflow.
		* the non-persistent inode numbers range in case of overflow.
		* This way all overlay inode numbers are unique and use the
		* overlay st_dev.
		*/
		if (likely(!(stat->ino >> xinoshift))) {
		stat->ino \|= ((u64)fsid) << (xinoshift + 1);
		stat->dev = dentry->d_sb->s_dev;
		return 0;
		return;
		} else if (ovl_xino_warn(dentry->d_sb)) {
		pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
		dentry, stat->ino, xinobits);
		@@ -153,8 +153,6 @@ static int ovl_map_dev_ino(struct dentry dentry, struct kstat stat, int fsid)
		*/
		stat->dev = OVL_FS(dentry->d_sb)->fs[fsid].pseudo_dev;
		}

		return 0;
		}

		int ovl_getattr(struct user_namespace mnt_userns, const struct path path,
		@@ -253,9 +251,7 @@ int ovl_getattr(struct user_namespace mnt_userns, const struct path path,
		}
		}

		err = ovl_map_dev_ino(dentry, stat, fsid);
		if (err)
		goto out;
		ovl_map_dev_ino(dentry, stat, fsid);

		/*
		* It's probably not worth it to count subdirs to get the
		@@ -410,7 +406,7 @@ static bool ovl_can_list(struct super_block sb, const char s)
		if (ovl_is_private_xattr(sb, s))
		return false;

		/* List all non-trusted xatts */
		/* List all non-trusted xattrs */
		if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
		return true;

		@@ -615,7 +611,7 @@ static const struct address_space_operations ovl_aops = {
		* stackable i_mutex locks according to stack level of the super
		* block instance. An overlayfs instance can never be in stack
		* depth 0 (there is always a real fs below it). An overlayfs
		* inode lock will use the lockdep annotaion ovl_i_mutex_key[depth].
		* inode lock will use the lockdep annotation ovl_i_mutex_key[depth].
		*
		* For example, here is a snip from /proc/lockdep_chains after
		* dir_iterate of nested overlayfs:

fs/overlayfs/namei.c

+1 −0

Original line number	Diff line number	Diff line
		@@ -919,6 +919,7 @@ struct dentry ovl_lookup(struct inode dir, struct dentry *dentry,
		continue;

		if ((uppermetacopy \|\| d.metacopy) && !ofs->config.metacopy) {
		dput(this);
		err = -EPERM;
		pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry);
		goto out_put;