Commit 622ceadd authored by Gao Xiang's avatar Gao Xiang
Browse files

erofs: lzma compression support

Add MicroLZMA support in order to maximize compression ratios for
specific scenarios. For example, it's useful for low-end embedded
boards and as a secondary algorithm in a file for specific access
patterns.

MicroLZMA is a new container format for raw LZMA1, which was created
by Lasse Collin aiming to minimize old LZMA headers and get rid of
unnecessary EOPM (end of payload marker) as well as to enable
fixed-sized output compression, especially for 4KiB pclusters.

Similar to LZ4, inplace I/O approach is used to minimize runtime
memory footprint when dealing with I/O. Overlapped decompression is
handled with 1) bounced buffer for data under processing or 2) extra
short-lived pages from the on-stack pagepool which will be shared in
the same read request (128KiB for example).

Link: https://lore.kernel.org/r/20211010213145.17462-8-xiang@kernel.org


Acked-by: default avatarChao Yu <chao@kernel.org>
Signed-off-by: default avatarGao Xiang <hsiangkao@linux.alibaba.com>
parent 966edfb0
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -82,3 +82,19 @@ config EROFS_FS_ZIP
	  Enable fixed-sized output compression for EROFS.

	  If you don't want to enable compression feature, say N.

config EROFS_FS_ZIP_LZMA
	bool "EROFS LZMA compressed data support"
	depends on EROFS_FS_ZIP
	select XZ_DEC
	select XZ_DEC_MICROLZMA
	help
	  Saying Y here includes support for reading EROFS file systems
	  containing LZMA compressed data, specifically called microLZMA. it
	  gives better compression ratios than the LZ4 algorithm, at the
	  expense of more CPU overhead.

	  LZMA support is an experimental feature for now and so most file
	  systems will be readable without selecting this option.

	  If unsure, say N.
+1 −0
Original line number Diff line number Diff line
@@ -4,3 +4,4 @@ obj-$(CONFIG_EROFS_FS) += erofs.o
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
+16 −0
Original line number Diff line number Diff line
@@ -20,6 +20,12 @@ struct z_erofs_decompress_req {
	bool inplace_io, partial_decoding;
};

struct z_erofs_decompressor {
	int (*decompress)(struct z_erofs_decompress_req *rq,
			  struct list_head *pagepool);
	char *name;
};

/* some special page->private (unsigned long, see below) */
#define Z_EROFS_SHORTLIVED_PAGE		(-1UL << 2)
#define Z_EROFS_PREALLOCATED_PAGE	(-2UL << 2)
@@ -75,7 +81,17 @@ static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool,
	return true;
}

#define MNGD_MAPPING(sbi)	((sbi)->managed_cache->i_mapping)
static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
					 struct page *page)
{
	return page->mapping == MNGD_MAPPING(sbi);
}

int z_erofs_decompress(struct z_erofs_decompress_req *rq,
		       struct list_head *pagepool);

/* prototypes for specific algorithms */
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
			    struct list_head *pagepool);
#endif
+6 −6
Original line number Diff line number Diff line
@@ -16,12 +16,6 @@
#define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize)  (((srcsize) >> 8) + 32)
#endif

struct z_erofs_decompressor {
	int (*decompress)(struct z_erofs_decompress_req *rq,
			  struct list_head *pagepool);
	char *name;
};

int z_erofs_load_lz4_config(struct super_block *sb,
			    struct erofs_super_block *dsb,
			    struct z_erofs_lz4_cfgs *lz4, int size)
@@ -349,6 +343,12 @@ static struct z_erofs_decompressor decompressors[] = {
		.decompress = z_erofs_lz4_decompress,
		.name = "lz4"
	},
#ifdef CONFIG_EROFS_FS_ZIP_LZMA
	[Z_EROFS_COMPRESSION_LZMA] = {
		.decompress = z_erofs_lzma_decompress,
		.name = "lzma"
	},
#endif
};

int z_erofs_decompress(struct z_erofs_decompress_req *rq,
+290 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-or-later
#include <linux/xz.h>
#include <linux/module.h>
#include "compress.h"

struct z_erofs_lzma {
	struct z_erofs_lzma *next;
	struct xz_dec_microlzma *state;
	struct xz_buf buf;
	u8 bounce[PAGE_SIZE];
};

/* considering the LZMA performance, no need to use a lockless list for now */
static DEFINE_SPINLOCK(z_erofs_lzma_lock);
static unsigned int z_erofs_lzma_max_dictsize;
static unsigned int z_erofs_lzma_nstrms, z_erofs_lzma_avail_strms;
static struct z_erofs_lzma *z_erofs_lzma_head;
static DECLARE_WAIT_QUEUE_HEAD(z_erofs_lzma_wq);

module_param_named(lzma_streams, z_erofs_lzma_nstrms, uint, 0444);

void z_erofs_lzma_exit(void)
{
	/* there should be no running fs instance */
	while (z_erofs_lzma_avail_strms) {
		struct z_erofs_lzma *strm;

		spin_lock(&z_erofs_lzma_lock);
		strm = z_erofs_lzma_head;
		if (!strm) {
			spin_unlock(&z_erofs_lzma_lock);
			DBG_BUGON(1);
			return;
		}
		z_erofs_lzma_head = NULL;
		spin_unlock(&z_erofs_lzma_lock);

		while (strm) {
			struct z_erofs_lzma *n = strm->next;

			if (strm->state)
				xz_dec_microlzma_end(strm->state);
			kfree(strm);
			--z_erofs_lzma_avail_strms;
			strm = n;
		}
	}
}

int z_erofs_lzma_init(void)
{
	unsigned int i;

	/* by default, use # of possible CPUs instead */
	if (!z_erofs_lzma_nstrms)
		z_erofs_lzma_nstrms = num_possible_cpus();

	for (i = 0; i < z_erofs_lzma_nstrms; ++i) {
		struct z_erofs_lzma *strm = kzalloc(sizeof(*strm), GFP_KERNEL);

		if (!strm) {
			z_erofs_lzma_exit();
			return -ENOMEM;
		}
		spin_lock(&z_erofs_lzma_lock);
		strm->next = z_erofs_lzma_head;
		z_erofs_lzma_head = strm;
		spin_unlock(&z_erofs_lzma_lock);
		++z_erofs_lzma_avail_strms;
	}
	return 0;
}

int z_erofs_load_lzma_config(struct super_block *sb,
			     struct erofs_super_block *dsb,
			     struct z_erofs_lzma_cfgs *lzma, int size)
{
	static DEFINE_MUTEX(lzma_resize_mutex);
	unsigned int dict_size, i;
	struct z_erofs_lzma *strm, *head = NULL;
	int err;

	if (!lzma || size < sizeof(struct z_erofs_lzma_cfgs)) {
		erofs_err(sb, "invalid lzma cfgs, size=%u", size);
		return -EINVAL;
	}
	if (lzma->format) {
		erofs_err(sb, "unidentified lzma format %x, please check kernel version",
			  le16_to_cpu(lzma->format));
		return -EINVAL;
	}
	dict_size = le32_to_cpu(lzma->dict_size);
	if (dict_size > Z_EROFS_LZMA_MAX_DICT_SIZE || dict_size < 4096) {
		erofs_err(sb, "unsupported lzma dictionary size %u",
			  dict_size);
		return -EINVAL;
	}

	erofs_info(sb, "EXPERIMENTAL MicroLZMA in use. Use at your own risk!");

	/* in case 2 z_erofs_load_lzma_config() race to avoid deadlock */
	mutex_lock(&lzma_resize_mutex);

	if (z_erofs_lzma_max_dictsize >= dict_size) {
		mutex_unlock(&lzma_resize_mutex);
		return 0;
	}

	/* 1. collect/isolate all streams for the following check */
	for (i = 0; i < z_erofs_lzma_avail_strms; ++i) {
		struct z_erofs_lzma *last;

again:
		spin_lock(&z_erofs_lzma_lock);
		strm = z_erofs_lzma_head;
		if (!strm) {
			spin_unlock(&z_erofs_lzma_lock);
			wait_event(z_erofs_lzma_wq,
				   READ_ONCE(z_erofs_lzma_head));
			goto again;
		}
		z_erofs_lzma_head = NULL;
		spin_unlock(&z_erofs_lzma_lock);

		for (last = strm; last->next; last = last->next)
			++i;
		last->next = head;
		head = strm;
	}

	err = 0;
	/* 2. walk each isolated stream and grow max dict_size if needed */
	for (strm = head; strm; strm = strm->next) {
		if (strm->state)
			xz_dec_microlzma_end(strm->state);
		strm->state = xz_dec_microlzma_alloc(XZ_PREALLOC, dict_size);
		if (!strm->state)
			err = -ENOMEM;
	}

	/* 3. push back all to the global list and update max dict_size */
	spin_lock(&z_erofs_lzma_lock);
	DBG_BUGON(z_erofs_lzma_head);
	z_erofs_lzma_head = head;
	spin_unlock(&z_erofs_lzma_lock);

	z_erofs_lzma_max_dictsize = dict_size;
	mutex_unlock(&lzma_resize_mutex);
	return err;
}

int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
			    struct list_head *pagepool)
{
	const unsigned int nrpages_out =
		PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
	const unsigned int nrpages_in =
		PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
	unsigned int inputmargin, inlen, outlen, pageofs;
	struct z_erofs_lzma *strm;
	u8 *kin;
	bool bounced = false;
	int no, ni, j, err = 0;

	/* 1. get the exact LZMA compressed size */
	kin = kmap(*rq->in);
	inputmargin = 0;
	while (!kin[inputmargin & ~PAGE_MASK])
		if (!(++inputmargin & ~PAGE_MASK))
			break;

	if (inputmargin >= PAGE_SIZE) {
		kunmap(*rq->in);
		return -EFSCORRUPTED;
	}
	rq->inputsize -= inputmargin;

	/* 2. get an available lzma context */
again:
	spin_lock(&z_erofs_lzma_lock);
	strm = z_erofs_lzma_head;
	if (!strm) {
		spin_unlock(&z_erofs_lzma_lock);
		wait_event(z_erofs_lzma_wq, READ_ONCE(z_erofs_lzma_head));
		goto again;
	}
	z_erofs_lzma_head = strm->next;
	spin_unlock(&z_erofs_lzma_lock);

	/* 3. multi-call decompress */
	inlen = rq->inputsize;
	outlen = rq->outputsize;
	xz_dec_microlzma_reset(strm->state, inlen, outlen,
			       !rq->partial_decoding);
	pageofs = rq->pageofs_out;
	strm->buf.in = kin + inputmargin;
	strm->buf.in_pos = 0;
	strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - inputmargin);
	inlen -= strm->buf.in_size;
	strm->buf.out = NULL;
	strm->buf.out_pos = 0;
	strm->buf.out_size = 0;

	for (ni = 0, no = -1;;) {
		enum xz_ret xz_err;

		if (strm->buf.out_pos == strm->buf.out_size) {
			if (strm->buf.out) {
				kunmap(rq->out[no]);
				strm->buf.out = NULL;
			}

			if (++no >= nrpages_out || !outlen) {
				erofs_err(rq->sb, "decompressed buf out of bound");
				err = -EFSCORRUPTED;
				break;
			}
			strm->buf.out_pos = 0;
			strm->buf.out_size = min_t(u32, outlen,
						   PAGE_SIZE - pageofs);
			outlen -= strm->buf.out_size;
			if (rq->out[no])
				strm->buf.out = kmap(rq->out[no]) + pageofs;
			pageofs = 0;
		} else if (strm->buf.in_pos == strm->buf.in_size) {
			kunmap(rq->in[ni]);

			if (++ni >= nrpages_in || !inlen) {
				erofs_err(rq->sb, "compressed buf out of bound");
				err = -EFSCORRUPTED;
				break;
			}
			strm->buf.in_pos = 0;
			strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE);
			inlen -= strm->buf.in_size;
			kin = kmap(rq->in[ni]);
			strm->buf.in = kin;
			bounced = false;
		}

		/*
		 * Handle overlapping: Use bounced buffer if the compressed
		 * data is under processing; Otherwise, Use short-lived pages
		 * from the on-stack pagepool where pages share with the same
		 * request.
		 */
		if (!bounced && rq->out[no] == rq->in[ni]) {
			memcpy(strm->bounce, strm->buf.in, strm->buf.in_size);
			strm->buf.in = strm->bounce;
			bounced = true;
		}
		for (j = ni + 1; j < nrpages_in; ++j) {
			struct page *tmppage;

			if (rq->out[no] != rq->in[j])
				continue;

			DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb),
							rq->in[j]));
			tmppage = erofs_allocpage(pagepool,
						  GFP_KERNEL | __GFP_NOFAIL);
			set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
			copy_highpage(tmppage, rq->in[j]);
			rq->in[j] = tmppage;
		}
		xz_err = xz_dec_microlzma_run(strm->state, &strm->buf);
		DBG_BUGON(strm->buf.out_pos > strm->buf.out_size);
		DBG_BUGON(strm->buf.in_pos > strm->buf.in_size);

		if (xz_err != XZ_OK) {
			if (xz_err == XZ_STREAM_END && !outlen)
				break;
			erofs_err(rq->sb, "failed to decompress %d in[%u] out[%u]",
				  xz_err, rq->inputsize, rq->outputsize);
			err = -EFSCORRUPTED;
			break;
		}
	}
	if (no < nrpages_out && strm->buf.out)
		kunmap(rq->in[no]);
	if (ni < nrpages_in)
		kunmap(rq->in[ni]);
	/* 4. push back LZMA stream context to the global list */
	spin_lock(&z_erofs_lzma_lock);
	strm->next = z_erofs_lzma_head;
	z_erofs_lzma_head = strm;
	spin_unlock(&z_erofs_lzma_lock);
	wake_up(&z_erofs_lzma_wq);
	return err;
}
Loading