Commit 9149fe8b authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull erofs updates from Gao Xiang:
 "In this cycle, tail-packing data inline for compressed files is now
  supported so that tail pcluster can be stored and read together with
  inode metadata in order to save data I/O and storage space.

  In addition to that, to prepare for the upcoming subpage, folio and
  fscache features, we also introduce meta buffers to get rid of
  erofs_get_meta_page() since it was too close to the page itself.

  In addition, in order to show supported kernel features and control
  sync decompression strategy, new sysfs nodes are introduced in this
  cycle as well.

  Summary:

   - add sysfs interface and a sysfs node to control sync decompression

   - add tail-packing inline support for compressed files

   - get rid of erofs_get_meta_page()"

* tag 'erofs-for-5.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: use meta buffers for zmap operations
  erofs: use meta buffers for xattr operations
  erofs: use meta buffers for super operations
  erofs: use meta buffers for inode operations
  erofs: introduce meta buffer operations
  erofs: add on-disk compressed tail-packing inline support
  erofs: support inline data decompression
  erofs: support unaligned data decompression
  erofs: introduce z_erofs_fixup_insize
  erofs: tidy up z_erofs_lz4_decompress
  erofs: clean up erofs_map_blocks tracepoints
  erofs: Replace zero-length array with flexible-array member
  erofs: add sysfs node to control sync decompression strategy
  erofs: add sysfs interface
  erofs: rename lz4_0pading to zero_padding
parents 579f3a6d 09c54379
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
What:		/sys/fs/erofs/features/
Date:		November 2021
Contact:	"Huang Jianan" <huangjianan@oppo.com>
Description:	Shows all enabled kernel features.
		Supported features:
		zero_padding, compr_cfgs, big_pcluster, chunked_file,
		device_table, compr_head2, sb_chksum.

What:		/sys/fs/erofs/<disk>/sync_decompress
Date:		November 2021
Contact:	"Huang Jianan" <huangjianan@oppo.com>
Description:	Control strategy of sync decompression
		- 0 (default, auto): enable for readpage, and enable for
				     readahead on atomic contexts only,
		- 1 (force on): enable for readpage and readahead.
		- 2 (force off): disable for all situations.
+8 −0
Original line number Diff line number Diff line
@@ -93,6 +93,14 @@ dax A legacy option which is an alias for ``dax=always``.
device=%s              Specify a path to an extra device to be used together.
===================    =========================================================

Sysfs Entries
=============

Information about mounted erofs file systems can be found in /sys/fs/erofs.
Each mounted filesystem will have a directory in /sys/fs/erofs based on its
device name (i.e., /sys/fs/erofs/sda).
(see also Documentation/ABI/testing/sysfs-fs-erofs)

On-disk details
===============

+1 −1
Original line number Diff line number Diff line
# SPDX-License-Identifier: GPL-2.0-only

obj-$(CONFIG_EROFS_FS) += erofs.o
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o sysfs.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
+3 −1
Original line number Diff line number Diff line
@@ -12,7 +12,7 @@ struct z_erofs_decompress_req {
	struct super_block *sb;
	struct page **in, **out;

	unsigned short pageofs_out;
	unsigned short pageofs_in, pageofs_out;
	unsigned int inputsize, outputsize;

	/* indicate the algorithm will be used for decompression */
@@ -87,6 +87,8 @@ static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
	return page->mapping == MNGD_MAPPING(sbi);
}

int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
			 unsigned int padbufsize);
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
		       struct page **pagepool);

+88 −50
Original line number Diff line number Diff line
@@ -9,37 +9,71 @@
#include <linux/dax.h>
#include <trace/events/erofs.h>

struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr)
void erofs_unmap_metabuf(struct erofs_buf *buf)
{
	if (buf->kmap_type == EROFS_KMAP)
		kunmap(buf->page);
	else if (buf->kmap_type == EROFS_KMAP_ATOMIC)
		kunmap_atomic(buf->base);
	buf->base = NULL;
	buf->kmap_type = EROFS_NO_KMAP;
}

void erofs_put_metabuf(struct erofs_buf *buf)
{
	if (!buf->page)
		return;
	erofs_unmap_metabuf(buf);
	put_page(buf->page);
	buf->page = NULL;
}

void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
			erofs_blk_t blkaddr, enum erofs_kmap_type type)
{
	struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping;
	struct page *page;
	erofs_off_t offset = blknr_to_addr(blkaddr);
	pgoff_t index = offset >> PAGE_SHIFT;
	struct page *page = buf->page;

	page = read_cache_page_gfp(mapping, blkaddr,
	if (!page || page->index != index) {
		erofs_put_metabuf(buf);
		page = read_cache_page_gfp(mapping, index,
				mapping_gfp_constraint(mapping, ~__GFP_FS));
	/* should already be PageUptodate */
	if (!IS_ERR(page))
		lock_page(page);
		if (IS_ERR(page))
			return page;
		/* should already be PageUptodate, no need to lock page */
		buf->page = page;
	}
	if (buf->kmap_type == EROFS_NO_KMAP) {
		if (type == EROFS_KMAP)
			buf->base = kmap(page);
		else if (type == EROFS_KMAP_ATOMIC)
			buf->base = kmap_atomic(page);
		buf->kmap_type = type;
	} else if (buf->kmap_type != type) {
		DBG_BUGON(1);
		return ERR_PTR(-EFAULT);
	}
	if (type == EROFS_NO_KMAP)
		return NULL;
	return buf->base + (offset & ~PAGE_MASK);
}

static int erofs_map_blocks_flatmode(struct inode *inode,
				     struct erofs_map_blocks *map,
				     int flags)
{
	int err = 0;
	erofs_blk_t nblocks, lastblk;
	u64 offset = map->m_la;
	struct erofs_inode *vi = EROFS_I(inode);
	bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);

	trace_erofs_map_blocks_flatmode_enter(inode, map, flags);

	nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
	nblocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
	lastblk = nblocks - tailendpacking;

	/* there is no hole in flatmode */
	map->m_flags = EROFS_MAP_MAPPED;

	if (offset < blknr_to_addr(lastblk)) {
		map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
		map->m_plen = blknr_to_addr(lastblk) - offset;
@@ -51,30 +85,23 @@ static int erofs_map_blocks_flatmode(struct inode *inode,
			vi->xattr_isize + erofs_blkoff(map->m_la);
		map->m_plen = inode->i_size - offset;

		/* inline data should be located in one meta block */
		if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) {
		/* inline data should be located in the same meta block */
		if (erofs_blkoff(map->m_pa) + map->m_plen > EROFS_BLKSIZ) {
			erofs_err(inode->i_sb,
				  "inline data cross block boundary @ nid %llu",
				  vi->nid);
			DBG_BUGON(1);
			err = -EFSCORRUPTED;
			goto err_out;
			return -EFSCORRUPTED;
		}

		map->m_flags |= EROFS_MAP_META;
	} else {
		erofs_err(inode->i_sb,
			  "internal error @ nid: %llu (size %llu), m_la 0x%llx",
			  vi->nid, inode->i_size, map->m_la);
		DBG_BUGON(1);
		err = -EIO;
		goto err_out;
		return -EIO;
	}

	map->m_llen = map->m_plen;
err_out:
	trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0);
	return err;
	return 0;
}

static int erofs_map_blocks(struct inode *inode,
@@ -83,12 +110,14 @@ static int erofs_map_blocks(struct inode *inode,
	struct super_block *sb = inode->i_sb;
	struct erofs_inode *vi = EROFS_I(inode);
	struct erofs_inode_chunk_index *idx;
	struct page *page;
	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
	u64 chunknr;
	unsigned int unit;
	erofs_off_t pos;
	void *kaddr;
	int err = 0;

	trace_erofs_map_blocks_enter(inode, map, flags);
	map->m_deviceid = 0;
	if (map->m_la >= inode->i_size) {
		/* leave out-of-bound access unmapped */
@@ -97,8 +126,10 @@ static int erofs_map_blocks(struct inode *inode,
		goto out;
	}

	if (vi->datalayout != EROFS_INODE_CHUNK_BASED)
		return erofs_map_blocks_flatmode(inode, map, flags);
	if (vi->datalayout != EROFS_INODE_CHUNK_BASED) {
		err = erofs_map_blocks_flatmode(inode, map, flags);
		goto out;
	}

	if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)
		unit = sizeof(*idx);			/* chunk index */
@@ -109,17 +140,18 @@ static int erofs_map_blocks(struct inode *inode,
	pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
		    vi->xattr_isize, unit) + unit * chunknr;

	page = erofs_get_meta_page(inode->i_sb, erofs_blknr(pos));
	if (IS_ERR(page))
		return PTR_ERR(page);

	kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP);
	if (IS_ERR(kaddr)) {
		err = PTR_ERR(kaddr);
		goto out;
	}
	map->m_la = chunknr << vi->chunkbits;
	map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits,
			    roundup(inode->i_size - map->m_la, EROFS_BLKSIZ));

	/* handle block map */
	if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
		__le32 *blkaddr = page_address(page) + erofs_blkoff(pos);
		__le32 *blkaddr = kaddr + erofs_blkoff(pos);

		if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
			map->m_flags = 0;
@@ -130,7 +162,7 @@ static int erofs_map_blocks(struct inode *inode,
		goto out_unlock;
	}
	/* parse chunk indexes */
	idx = page_address(page) + erofs_blkoff(pos);
	idx = kaddr + erofs_blkoff(pos);
	switch (le32_to_cpu(idx->blkaddr)) {
	case EROFS_NULL_ADDR:
		map->m_flags = 0;
@@ -143,10 +175,11 @@ static int erofs_map_blocks(struct inode *inode,
		break;
	}
out_unlock:
	unlock_page(page);
	put_page(page);
	erofs_put_metabuf(&buf);
out:
	if (!err)
		map->m_llen = map->m_plen;
	trace_erofs_map_blocks_exit(inode, map, flags, 0);
	return err;
}

@@ -231,16 +264,16 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
	}

	if (map.m_flags & EROFS_MAP_META) {
		struct page *ipage;
		void *ptr;
		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;

		iomap->type = IOMAP_INLINE;
		ipage = erofs_get_meta_page(inode->i_sb,
					    erofs_blknr(mdev.m_pa));
		if (IS_ERR(ipage))
			return PTR_ERR(ipage);
		iomap->inline_data = page_address(ipage) +
					erofs_blkoff(mdev.m_pa);
		iomap->private = ipage;
		ptr = erofs_read_metabuf(&buf, inode->i_sb,
					 erofs_blknr(mdev.m_pa), EROFS_KMAP);
		if (IS_ERR(ptr))
			return PTR_ERR(ptr);
		iomap->inline_data = ptr + erofs_blkoff(mdev.m_pa);
		iomap->private = buf.base;
	} else {
		iomap->type = IOMAP_MAPPED;
		iomap->addr = mdev.m_pa;
@@ -251,12 +284,17 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
		ssize_t written, unsigned int flags, struct iomap *iomap)
{
	struct page *ipage = iomap->private;
	void *ptr = iomap->private;

	if (ptr) {
		struct erofs_buf buf = {
			.page = kmap_to_page(ptr),
			.base = ptr,
			.kmap_type = EROFS_KMAP,
		};

	if (ipage) {
		DBG_BUGON(iomap->type != IOMAP_INLINE);
		unlock_page(ipage);
		put_page(ipage);
		erofs_put_metabuf(&buf);
	} else {
		DBG_BUGON(iomap->type == IOMAP_INLINE);
	}
Loading