Merge tag 'erofs-for-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs (4a6bff11) · Commits · EulixOS / Software / Kernel

Documentation/filesystems/erofs.rst

+25 −13

Original line number	Diff line number	Diff line
		@@ -30,12 +30,18 @@ It is implemented to be a better choice for the following scenarios:
		especially for those embedded devices with limited memory and high-density
		hosts with numerous containers.

		Here is the main features of EROFS:
		Here are the main features of EROFS:

		- Little endian on-disk design;

		- 4KiB block size and 32-bit block addresses, therefore 16TiB address space
		at most for now;
		- Block-based distribution and file-based distribution over fscache are
		supported;

		- Support multiple devices to refer to external blobs, which can be used
		for container images;

		- 4KiB block size and 32-bit block addresses for each device, therefore
		16TiB address space at most for now;

		- Two inode layouts for different requirements:

		@@ -50,28 +56,31 @@ Here is the main features of EROFS:
		Metadata reserved 8 bytes 18 bytes
		===================== ============ ======================================

		- Metadata and data could be mixed as an option;

		- Support extended attributes (xattrs) as an option;
		- Support extended attributes as an option;

		- Support tailpacking data and xattr inline compared to byte-addressed
		unaligned metadata or smaller block size alternatives;

		- Support POSIX.1e ACLs by using xattrs;
		- Support POSIX.1e ACLs by using extended attributes;

		- Support transparent data compression as an option:
		LZ4 and MicroLZMA algorithms can be used on a per-file basis; In addition,
		inplace decompression is also supported to avoid bounce compressed buffers
		and page cache thrashing.

		- Support chunk-based data deduplication and rolling-hash compressed data
		deduplication;

		- Support tailpacking inline compared to byte-addressed unaligned metadata
		or smaller block size alternatives;

		- Support merging tail-end data into a special inode as fragments.

		- Support large folios for uncompressed files.

		- Support direct I/O on uncompressed files to avoid double caching for loop
		devices;

		- Support FSDAX on uncompressed images for secure containers and ramdisks in
		order to get rid of unnecessary page cache.

		- Support multiple devices for multi blob container images;

		- Support file-based on-demand loading with the Fscache infrastructure.

		The following git tree provides the file system user-space tools under
		@@ -259,7 +268,7 @@ By the way, chunk-based files are all uncompressed for now.

		Data compression
		----------------
		EROFS implements LZ4 fixed-sized output compression which generates fixed-sized
		EROFS implements fixed-sized output compression which generates fixed-sized
		compressed data blocks from variable-sized input in contrast to other existing
		fixed-sized input solutions. Relatively higher compression ratios can be gotten
		by using fixed-sized output compression since nowadays popular data compression
		@@ -314,3 +323,6 @@ to understand its delta0 is constantly 1, as illustrated below::

		If another HEAD follows a HEAD lcluster, there is no room to record CBLKCNT,
		but it's easy to know the size of such pcluster is 1 lcluster as well.

		Since Linux v6.1, each pcluster can be used for multiple variable-sized extents,
		therefore it can be used for compressed data deduplication.

fs/cachefiles/io.c

+50 −27

Original line number	Diff line number	Diff line
		@@ -385,38 +385,35 @@ static int cachefiles_write(struct netfs_cache_resources *cres,
		term_func, term_func_priv);
		}

		/*
		* Prepare a read operation, shortening it to a cached/uncached
		* boundary as appropriate.
		*/
		static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq,
		loff_t i_size)
		static inline enum netfs_io_source
		cachefiles_do_prepare_read(struct netfs_cache_resources *cres,
		loff_t start, size_t *_len, loff_t i_size,
		unsigned long *_flags, ino_t netfs_ino)
		{
		enum cachefiles_prepare_read_trace why;
		struct netfs_io_request *rreq = subreq->rreq;
		struct netfs_cache_resources *cres = &rreq->cache_resources;
		struct cachefiles_object *object;
		struct cachefiles_object *object = NULL;
		struct cachefiles_cache *cache;
		struct fscache_cookie *cookie = fscache_cres_cookie(cres);
		const struct cred *saved_cred;
		struct file *file = cachefiles_cres_file(cres);
		enum netfs_io_source ret = NETFS_DOWNLOAD_FROM_SERVER;
		size_t len = *_len;
		loff_t off, to;
		ino_t ino = file ? file_inode(file)->i_ino : 0;
		int rc;

		_enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size);
		_enter("%zx @%llx/%llx", len, start, i_size);

		if (subreq->start >= i_size) {
		if (start >= i_size) {
		ret = NETFS_FILL_WITH_ZEROES;
		why = cachefiles_trace_read_after_eof;
		goto out_no_object;
		}

		if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) {
		__set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
		__set_bit(NETFS_SREQ_COPY_TO_CACHE, _flags);
		why = cachefiles_trace_read_no_data;
		if (!test_bit(NETFS_SREQ_ONDEMAND, &subreq->flags))
		if (!test_bit(NETFS_SREQ_ONDEMAND, _flags))
		goto out_no_object;
		}

		@@ -437,7 +434,7 @@ static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *
		retry:
		off = cachefiles_inject_read_error();
		if (off == 0)
		off = vfs_llseek(file, subreq->start, SEEK_DATA);
		off = vfs_llseek(file, start, SEEK_DATA);
		if (off < 0 && off >= (loff_t)-MAX_ERRNO) {
		if (off == (loff_t)-ENXIO) {
		why = cachefiles_trace_read_seek_nxio;
		@@ -449,21 +446,22 @@ static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *
		goto out;
		}

		if (off >= subreq->start + subreq->len) {
		if (off >= start + len) {
		why = cachefiles_trace_read_found_hole;
		goto download_and_store;
		}

		if (off > subreq->start) {
		if (off > start) {
		off = round_up(off, cache->bsize);
		subreq->len = off - subreq->start;
		len = off - start;
		*_len = len;
		why = cachefiles_trace_read_found_part;
		goto download_and_store;
		}

		to = cachefiles_inject_read_error();
		if (to == 0)
		to = vfs_llseek(file, subreq->start, SEEK_HOLE);
		to = vfs_llseek(file, start, SEEK_HOLE);
		if (to < 0 && to >= (loff_t)-MAX_ERRNO) {
		trace_cachefiles_io_error(object, file_inode(file), to,
		cachefiles_trace_seek_error);
		@@ -471,12 +469,13 @@ static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *
		goto out;
		}

		if (to < subreq->start + subreq->len) {
		if (subreq->start + subreq->len >= i_size)
		if (to < start + len) {
		if (start + len >= i_size)
		to = round_up(to, cache->bsize);
		else
		to = round_down(to, cache->bsize);
		subreq->len = to - subreq->start;
		len = to - start;
		*_len = len;
		}

		why = cachefiles_trace_read_have_data;
		@@ -484,12 +483,11 @@ static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *
		goto out;

		download_and_store:
		__set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
		if (test_bit(NETFS_SREQ_ONDEMAND, &subreq->flags)) {
		rc = cachefiles_ondemand_read(object, subreq->start,
		subreq->len);
		__set_bit(NETFS_SREQ_COPY_TO_CACHE, _flags);
		if (test_bit(NETFS_SREQ_ONDEMAND, _flags)) {
		rc = cachefiles_ondemand_read(object, start, len);
		if (!rc) {
		__clear_bit(NETFS_SREQ_ONDEMAND, &subreq->flags);
		__clear_bit(NETFS_SREQ_ONDEMAND, _flags);
		goto retry;
		}
		ret = NETFS_INVALID_READ;
		@@ -497,10 +495,34 @@ static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *
		out:
		cachefiles_end_secure(cache, saved_cred);
		out_no_object:
		trace_cachefiles_prep_read(subreq, ret, why, ino);
		trace_cachefiles_prep_read(object, start, len, *_flags, ret, why, ino, netfs_ino);
		return ret;
		}

		/*
		* Prepare a read operation, shortening it to a cached/uncached
		* boundary as appropriate.
		*/
		static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq,
		loff_t i_size)
		{
		return cachefiles_do_prepare_read(&subreq->rreq->cache_resources,
		subreq->start, &subreq->len, i_size,
		&subreq->flags, subreq->rreq->inode->i_ino);
		}

		/*
		* Prepare an on-demand read operation, shortening it to a cached/uncached
		* boundary as appropriate.
		*/
		static enum netfs_io_source
		cachefiles_prepare_ondemand_read(struct netfs_cache_resources *cres,
		loff_t start, size_t *_len, loff_t i_size,
		unsigned long *_flags, ino_t ino)
		{
		return cachefiles_do_prepare_read(cres, start, _len, i_size, _flags, ino);
		}

		/*
		* Prepare for a write to occur.
		*/
		@@ -621,6 +643,7 @@ static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
		.write = cachefiles_write,
		.prepare_read = cachefiles_prepare_read,
		.prepare_write = cachefiles_prepare_write,
		.prepare_ondemand_read = cachefiles_prepare_ondemand_read,
		.query_occupancy = cachefiles_query_occupancy,
		};

fs/erofs/data.c

+4 −6

Original line number	Diff line number	Diff line
		@@ -13,9 +13,7 @@
		void erofs_unmap_metabuf(struct erofs_buf *buf)
		{
		if (buf->kmap_type == EROFS_KMAP)
		kunmap(buf->page);
		else if (buf->kmap_type == EROFS_KMAP_ATOMIC)
		kunmap_atomic(buf->base);
		kunmap_local(buf->base);
		buf->base = NULL;
		buf->kmap_type = EROFS_NO_KMAP;
		}
		@@ -54,9 +52,7 @@ void erofs_bread(struct erofs_buf buf, struct inode *inode,
		}
		if (buf->kmap_type == EROFS_NO_KMAP) {
		if (type == EROFS_KMAP)
		buf->base = kmap(page);
		else if (type == EROFS_KMAP_ATOMIC)
		buf->base = kmap_atomic(page);
		buf->base = kmap_local_page(page);
		buf->kmap_type = type;
		} else if (buf->kmap_type != type) {
		DBG_BUGON(1);
		@@ -403,6 +399,8 @@ const struct address_space_operations erofs_raw_access_aops = {
		.readahead = erofs_readahead,
		.bmap = erofs_bmap,
		.direct_IO = noop_direct_IO,
		.release_folio = iomap_release_folio,
		.invalidate_folio = iomap_invalidate_folio,
		};

		#ifdef CONFIG_FS_DAX

fs/erofs/fscache.c

+185 −223

File changed.

Preview size limit exceeded, changes collapsed.

fs/erofs/inode.c

+2 −0

Original line number	Diff line number	Diff line
		@@ -268,6 +268,7 @@ static int erofs_fill_inode(struct inode *inode)
		case S_IFDIR:
		inode->i_op = &erofs_dir_iops;
		inode->i_fop = &erofs_dir_fops;
		inode_nohighmem(inode);
		break;
		case S_IFLNK:
		err = erofs_fill_symlink(inode, kaddr, ofs);
		@@ -295,6 +296,7 @@ static int erofs_fill_inode(struct inode *inode)
		goto out_unlock;
		}
		inode->i_mapping->a_ops = &erofs_raw_access_aops;
		mapping_set_large_folios(inode->i_mapping);
		#ifdef CONFIG_EROFS_FS_ONDEMAND
		if (erofs_is_fscache_mode(inode->i_sb))
		inode->i_mapping->a_ops = &erofs_fscache_access_aops;