Commit cecf864d authored by Yue Hu's avatar Yue Hu Committed by Gao Xiang
Browse files

erofs: support inline data decompression

Currently, we have already support tail-packing inline for
uncompressed file, let's also implement this for compressed
files to save I/Os and storage space.

Different from normal pclusters, compressed data is available
in advance because of other metadata I/Os. Therefore, they
directly move into the bypass queue without extra I/O submission.

It's the last compression feature before folio/subpage support.

Link: https://lore.kernel.org/r/20211228232919.21413-1-xiang@kernel.org


Reviewed-by: default avatarChao Yu <chao@kernel.org>
Signed-off-by: default avatarYue Hu <huyue2@yulong.com>
Signed-off-by: default avatarGao Xiang <hsiangkao@linux.alibaba.com>
parent ab749bad
Loading
Loading
Loading
Loading
+97 −42
Original line number Diff line number Diff line
@@ -82,12 +82,13 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages)

static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl)
{
	unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
	int i;

	for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
		struct z_erofs_pcluster_slab *pcs = pcluster_pool + i;

		if (pcl->pclusterpages > pcs->maxpages)
		if (pclusterpages > pcs->maxpages)
			continue;

		kmem_cache_free(pcs->slab, pcl);
@@ -298,6 +299,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
		container_of(grp, struct z_erofs_pcluster, obj);
	int i;

	DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
	/*
	 * refcount of workgroup is now freezed as 1,
	 * therefore no need to worry about available decompression users.
@@ -331,6 +333,7 @@ int erofs_try_to_free_cached_page(struct page *page)
	if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
		unsigned int i;

		DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
		for (i = 0; i < pcl->pclusterpages; ++i) {
			if (pcl->compressed_pages[i] == page) {
				WRITE_ONCE(pcl->compressed_pages[i], NULL);
@@ -458,6 +461,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
				       struct inode *inode,
				       struct erofs_map_blocks *map)
{
	bool ztailpacking = map->m_flags & EROFS_MAP_META;
	struct z_erofs_pcluster *pcl;
	struct z_erofs_collection *cl;
	struct erofs_workgroup *grp;
@@ -469,12 +473,12 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
	}

	/* no available pcluster, let's allocate one */
	pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT);
	pcl = z_erofs_alloc_pcluster(ztailpacking ? 1 :
				     map->m_plen >> PAGE_SHIFT);
	if (IS_ERR(pcl))
		return PTR_ERR(pcl);

	atomic_set(&pcl->obj.refcount, 1);
	pcl->obj.index = map->m_pa >> PAGE_SHIFT;
	pcl->algorithmformat = map->m_algorithmformat;
	pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
		(map->m_flags & EROFS_MAP_FULL_MAPPED ?
@@ -494,6 +498,13 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
	mutex_init(&cl->lock);
	DBG_BUGON(!mutex_trylock(&cl->lock));

	if (ztailpacking) {
		pcl->obj.index = 0;	/* which indicates ztailpacking */
		pcl->pageofs_in = erofs_blkoff(map->m_pa);
		pcl->tailpacking_size = map->m_plen;
	} else {
		pcl->obj.index = map->m_pa >> PAGE_SHIFT;

		grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
		if (IS_ERR(grp)) {
			err = PTR_ERR(grp);
@@ -501,10 +512,12 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
		}

		if (grp != &pcl->obj) {
		clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
			clt->pcl = container_of(grp,
					struct z_erofs_pcluster, obj);
			err = -EEXIST;
			goto err_out;
		}
	}
	/* used to check tail merging loop due to corrupted images */
	if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
		clt->tailpcl = pcl;
@@ -532,17 +545,20 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
	DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL);
	DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);

	if (!PAGE_ALIGNED(map->m_pa)) {
	if (map->m_flags & EROFS_MAP_META) {
		if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
			DBG_BUGON(1);
		return -EINVAL;
			return -EFSCORRUPTED;
		}
		goto tailpacking;
	}

	grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
	if (grp) {
		clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
	} else {
tailpacking:
		ret = z_erofs_register_collection(clt, inode, map);

		if (!ret)
			goto out;
		if (ret != -EEXIST)
@@ -558,9 +574,9 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
out:
	z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
				  clt->cl->pagevec, clt->cl->vcnt);

	/* since file-backed online pages are traversed in reverse order */
	clt->icpage_ptr = clt->pcl->compressed_pages + clt->pcl->pclusterpages;
	clt->icpage_ptr = clt->pcl->compressed_pages +
			z_erofs_pclusterpages(clt->pcl);
	return 0;
}

@@ -681,14 +697,33 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
	if (err)
		goto err_out;

	/* preload all compressed pages (maybe downgrade role if necessary) */
	if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, map->m_la))
	if (z_erofs_is_inline_pcluster(clt->pcl)) {
		struct page *mpage;

		mpage = erofs_get_meta_page(inode->i_sb,
					    erofs_blknr(map->m_pa));
		if (IS_ERR(mpage)) {
			err = PTR_ERR(mpage);
			erofs_err(inode->i_sb,
				  "failed to get inline page, err %d", err);
			goto err_out;
		}
		/* TODO: new subpage feature will get rid of it */
		unlock_page(mpage);

		WRITE_ONCE(clt->pcl->compressed_pages[0], mpage);
		clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
	} else {
		/* preload all compressed pages (can change mode if needed) */
		if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy,
					       map->m_la))
			cache_strategy = TRYALLOC;
		else
			cache_strategy = DONTALLOC;

		preload_compressed_pages(clt, MNGD_MAPPING(sbi),
					 cache_strategy, pagepool);
	}

hitted:
	/*
@@ -844,6 +879,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
				       struct page **pagepool)
{
	struct erofs_sb_info *const sbi = EROFS_SB(sb);
	unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
	struct z_erofs_pagevec_ctor ctor;
	unsigned int i, inputsize, outputsize, llen, nr_pages;
	struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
@@ -925,15 +961,20 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
	overlapped = false;
	compressed_pages = pcl->compressed_pages;

	for (i = 0; i < pcl->pclusterpages; ++i) {
	for (i = 0; i < pclusterpages; ++i) {
		unsigned int pagenr;

		page = compressed_pages[i];

		/* all compressed pages ought to be valid */
		DBG_BUGON(!page);
		DBG_BUGON(z_erofs_page_is_invalidated(page));

		if (z_erofs_is_inline_pcluster(pcl)) {
			if (!PageUptodate(page))
				err = -EIO;
			continue;
		}

		DBG_BUGON(z_erofs_page_is_invalidated(page));
		if (!z_erofs_is_shortlived_page(page)) {
			if (erofs_page_is_managed(sbi, page)) {
				if (!PageUptodate(page))
@@ -978,11 +1019,16 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
		partial = true;
	}

	inputsize = pcl->pclusterpages * PAGE_SIZE;
	if (z_erofs_is_inline_pcluster(pcl))
		inputsize = pcl->tailpacking_size;
	else
		inputsize = pclusterpages * PAGE_SIZE;

	err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
					.sb = sb,
					.in = compressed_pages,
					.out = pages,
					.pageofs_in = pcl->pageofs_in,
					.pageofs_out = cl->pageofs,
					.inputsize = inputsize,
					.outputsize = outputsize,
@@ -992,8 +1038,13 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
				 }, pagepool);

out:
	/* must handle all compressed pages before ending pages */
	for (i = 0; i < pcl->pclusterpages; ++i) {
	/* must handle all compressed pages before actual file pages */
	if (z_erofs_is_inline_pcluster(pcl)) {
		page = compressed_pages[0];
		WRITE_ONCE(compressed_pages[0], NULL);
		put_page(page);
	} else {
		for (i = 0; i < pclusterpages; ++i) {
			page = compressed_pages[i];

			if (erofs_page_is_managed(sbi, page))
@@ -1001,9 +1052,9 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,

			/* recycle all individual short-lived pages */
			(void)z_erofs_put_shortlivedpage(pagepool, page);

			WRITE_ONCE(compressed_pages[i], NULL);
		}
	}

	for (i = 0; i < nr_pages; ++i) {
		page = pages[i];
@@ -1288,6 +1339,14 @@ static void z_erofs_submit_queue(struct super_block *sb,

		pcl = container_of(owned_head, struct z_erofs_pcluster, next);

		/* close the main owned chain at first */
		owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
				     Z_EROFS_PCLUSTER_TAIL_CLOSED);
		if (z_erofs_is_inline_pcluster(pcl)) {
			move_to_bypass_jobqueue(pcl, qtail, owned_head);
			continue;
		}

		/* no device id here, thus it will always succeed */
		mdev = (struct erofs_map_dev) {
			.m_pa = blknr_to_addr(pcl->obj.index),
@@ -1297,10 +1356,6 @@ static void z_erofs_submit_queue(struct super_block *sb,
		cur = erofs_blknr(mdev.m_pa);
		end = cur + pcl->pclusterpages;

		/* close the main owned chain at first */
		owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
				     Z_EROFS_PCLUSTER_TAIL_CLOSED);

		do {
			struct page *page;

+22 −2
Original line number Diff line number Diff line
@@ -62,9 +62,17 @@ struct z_erofs_pcluster {
	/* A: lower limit of decompressed length and if full length or not */
	unsigned int length;

	/* I: page offset of inline compressed data */
	unsigned short pageofs_in;

	union {
		/* I: physical cluster size in pages */
		unsigned short pclusterpages;

		/* I: tailpacking inline compressed size */
		unsigned short tailpacking_size;
	};

	/* I: compression algorithm format */
	unsigned char algorithmformat;

@@ -94,6 +102,18 @@ struct z_erofs_decompressqueue {
	} u;
};

static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
{
	return !pcl->obj.index;
}

static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
{
	if (z_erofs_is_inline_pcluster(pcl))
		return 1;
	return pcl->pclusterpages;
}

#define Z_EROFS_ONLINEPAGE_COUNT_BITS   2
#define Z_EROFS_ONLINEPAGE_COUNT_MASK   ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT  (Z_EROFS_ONLINEPAGE_COUNT_BITS)