Commit 098c5dd9 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull erofs updates from Gao Xiang:
 "No outstanding new feature for this cycle.

  Most of these commits are decompression cleanups which are part of the
  ongoing development for subpage/folio compression support as well as
  xattr cleanups for the upcoming xattr bloom filter optimization [1].

  In addition, there are bugfixes to address some corner cases of
  compressed images due to global data de-duplication and arm64 16k
  pages.

  Summary:

   - Fix rare I/O hang on deduplicated compressed images due to loop
     hooked chains

   - Fix compact compression layout of 16k blocks on arm64 devices

   - Fix atomic context detection of async decompression

   - Decompression/Xattr code cleanups"

Link: https://lore.kernel.org/r/20230621083209.116024-1-jefflexu@linux.alibaba.com [1]

* tag 'erofs-for-6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: clean up zmap.c
  erofs: remove unnecessary goto
  erofs: Fix detection of atomic context
  erofs: use separate xattr parsers for listxattr/getxattr
  erofs: unify inline/shared xattr iterators for listxattr/getxattr
  erofs: make the size of read data stored in buffer_ofs
  erofs: unify xattr_iter structures
  erofs: use absolute position in xattr iterator
  erofs: fix compact 4B support for 16k block size
  erofs: convert erofs_read_metabuf() to erofs_bread() for xattr
  erofs: use poison pointer to replace the hard-coded address
  erofs: use struct lockref to replace handcrafted approach
  erofs: adapt managed inode operations into folios
  erofs: kill hooked chains to avoid loops on deduplicated compressed images
  erofs: avoid on-stack pagepool directly passed by arguments
  erofs: allocate extra bvec pages directly instead of retrying
  erofs: clean up z_erofs_pcluster_readmore()
  erofs: remove the member readahead from struct z_erofs_decompress_frontend
  erofs: fold in z_erofs_decompress()
parents 74774e24 8241fdd3
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -89,8 +89,7 @@ static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,

int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
			 unsigned int padbufsize);
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
		       struct page **pagepool);
extern const struct z_erofs_decompressor erofs_decompressors[];

/* prototypes for specific algorithms */
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
+1 −7
Original line number Diff line number Diff line
@@ -363,7 +363,7 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
	return 0;
}

static struct z_erofs_decompressor decompressors[] = {
const struct z_erofs_decompressor erofs_decompressors[] = {
	[Z_EROFS_COMPRESSION_SHIFTED] = {
		.decompress = z_erofs_transform_plain,
		.name = "shifted"
@@ -383,9 +383,3 @@ static struct z_erofs_decompressor decompressors[] = {
	},
#endif
};

int z_erofs_decompress(struct z_erofs_decompress_req *rq,
		       struct page **pagepool)
{
	return decompressors[rq->alg].decompress(rq, pagepool);
}
+4 −37
Original line number Diff line number Diff line
@@ -208,46 +208,12 @@ enum {
	EROFS_ZIP_CACHE_READAROUND
};

#define EROFS_LOCKED_MAGIC     (INT_MIN | 0xE0F510CCL)

/* basic unit of the workstation of a super_block */
struct erofs_workgroup {
	/* the workgroup index in the workstation */
	pgoff_t index;

	/* overall workgroup reference count */
	atomic_t refcount;
	struct lockref lockref;
};

static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
						 int val)
{
	preempt_disable();
	if (val != atomic_cmpxchg(&grp->refcount, val, EROFS_LOCKED_MAGIC)) {
		preempt_enable();
		return false;
	}
	return true;
}

static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
					    int orig_val)
{
	/*
	 * other observers should notice all modifications
	 * in the freezing period.
	 */
	smp_mb();
	atomic_set(&grp->refcount, orig_val);
	preempt_enable();
}

static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
{
	return atomic_cond_read_relaxed(&grp->refcount,
					VAL != EROFS_LOCKED_MAGIC);
}

enum erofs_kmap_type {
	EROFS_NO_KMAP,		/* don't map the buffer */
	EROFS_KMAP,		/* use kmap_local_page() to map the buffer */
@@ -486,7 +452,7 @@ static inline void erofs_pagepool_add(struct page **pagepool, struct page *page)
void erofs_release_pages(struct page **pagepool);

#ifdef CONFIG_EROFS_FS_ZIP
int erofs_workgroup_put(struct erofs_workgroup *grp);
void erofs_workgroup_put(struct erofs_workgroup *grp);
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
					     pgoff_t index);
struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
@@ -500,7 +466,6 @@ int __init z_erofs_init_zip_subsystem(void);
void z_erofs_exit_zip_subsystem(void);
int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
				       struct erofs_workgroup *egrp);
int erofs_try_to_free_cached_page(struct page *page);
int z_erofs_load_lz4_config(struct super_block *sb,
			    struct erofs_super_block *dsb,
			    struct z_erofs_lz4_cfgs *lz4, int len);
@@ -511,6 +476,7 @@ void erofs_put_pcpubuf(void *ptr);
int erofs_pcpubuf_growsize(unsigned int nrpages);
void __init erofs_pcpubuf_init(void);
void erofs_pcpubuf_exit(void);
int erofs_init_managed_cache(struct super_block *sb);
#else
static inline void erofs_shrinker_register(struct super_block *sb) {}
static inline void erofs_shrinker_unregister(struct super_block *sb) {}
@@ -530,6 +496,7 @@ static inline int z_erofs_load_lz4_config(struct super_block *sb,
}
static inline void erofs_pcpubuf_init(void) {}
static inline void erofs_pcpubuf_exit(void) {}
static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; }
#endif	/* !CONFIG_EROFS_FS_ZIP */

#ifdef CONFIG_EROFS_FS_ZIP_LZMA
+2 −67
Original line number Diff line number Diff line
@@ -599,68 +599,6 @@ static int erofs_fc_parse_param(struct fs_context *fc,
	return 0;
}

#ifdef CONFIG_EROFS_FS_ZIP
static const struct address_space_operations managed_cache_aops;

static bool erofs_managed_cache_release_folio(struct folio *folio, gfp_t gfp)
{
	bool ret = true;
	struct address_space *const mapping = folio->mapping;

	DBG_BUGON(!folio_test_locked(folio));
	DBG_BUGON(mapping->a_ops != &managed_cache_aops);

	if (folio_test_private(folio))
		ret = erofs_try_to_free_cached_page(&folio->page);

	return ret;
}

/*
 * It will be called only on inode eviction. In case that there are still some
 * decompression requests in progress, wait with rescheduling for a bit here.
 * We could introduce an extra locking instead but it seems unnecessary.
 */
static void erofs_managed_cache_invalidate_folio(struct folio *folio,
					       size_t offset, size_t length)
{
	const size_t stop = length + offset;

	DBG_BUGON(!folio_test_locked(folio));

	/* Check for potential overflow in debug mode */
	DBG_BUGON(stop > folio_size(folio) || stop < length);

	if (offset == 0 && stop == folio_size(folio))
		while (!erofs_managed_cache_release_folio(folio, GFP_NOFS))
			cond_resched();
}

static const struct address_space_operations managed_cache_aops = {
	.release_folio = erofs_managed_cache_release_folio,
	.invalidate_folio = erofs_managed_cache_invalidate_folio,
};

static int erofs_init_managed_cache(struct super_block *sb)
{
	struct erofs_sb_info *const sbi = EROFS_SB(sb);
	struct inode *const inode = new_inode(sb);

	if (!inode)
		return -ENOMEM;

	set_nlink(inode, 1);
	inode->i_size = OFFSET_MAX;

	inode->i_mapping->a_ops = &managed_cache_aops;
	mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
	sbi->managed_cache = inode;
	return 0;
}
#else
static int erofs_init_managed_cache(struct super_block *sb) { return 0; }
#endif

static struct inode *erofs_nfs_get_inode(struct super_block *sb,
					 u64 ino, u32 generation)
{
@@ -1016,10 +954,8 @@ static int __init erofs_module_init(void)
					       sizeof(struct erofs_inode), 0,
					       SLAB_RECLAIM_ACCOUNT,
					       erofs_inode_init_once);
	if (!erofs_inode_cachep) {
		err = -ENOMEM;
		goto icache_err;
	}
	if (!erofs_inode_cachep)
		return -ENOMEM;

	err = erofs_init_shrinker();
	if (err)
@@ -1054,7 +990,6 @@ static int __init erofs_module_init(void)
	erofs_exit_shrinker();
shrinker_err:
	kmem_cache_destroy(erofs_inode_cachep);
icache_err:
	return err;
}

+41 −45
Original line number Diff line number Diff line
@@ -4,7 +4,6 @@
 *             https://www.huawei.com/
 */
#include "internal.h"
#include <linux/pagevec.h>

struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp)
{
@@ -33,22 +32,21 @@ void erofs_release_pages(struct page **pagepool)
/* global shrink count (for all mounted EROFS instances) */
static atomic_long_t erofs_global_shrink_cnt;

static int erofs_workgroup_get(struct erofs_workgroup *grp)
static bool erofs_workgroup_get(struct erofs_workgroup *grp)
{
	int o;

repeat:
	o = erofs_wait_on_workgroup_freezed(grp);
	if (o <= 0)
		return -1;
	if (lockref_get_not_zero(&grp->lockref))
		return true;

	if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o)
		goto repeat;
	spin_lock(&grp->lockref.lock);
	if (__lockref_is_dead(&grp->lockref)) {
		spin_unlock(&grp->lockref.lock);
		return false;
	}

	/* decrease refcount paired by erofs_workgroup_put */
	if (o == 1)
	if (!grp->lockref.count++)
		atomic_long_dec(&erofs_global_shrink_cnt);
	return 0;
	spin_unlock(&grp->lockref.lock);
	return true;
}

struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
@@ -61,7 +59,7 @@ struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
	rcu_read_lock();
	grp = xa_load(&sbi->managed_pslots, index);
	if (grp) {
		if (erofs_workgroup_get(grp)) {
		if (!erofs_workgroup_get(grp)) {
			/* prefer to relax rcu read side */
			rcu_read_unlock();
			goto repeat;
@@ -80,11 +78,10 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
	struct erofs_workgroup *pre;

	/*
	 * Bump up a reference count before making this visible
	 * to others for the XArray in order to avoid potential
	 * UAF without serialized by xa_lock.
	 * Bump up before making this visible to others for the XArray in order
	 * to avoid potential UAF without serialized by xa_lock.
	 */
	atomic_inc(&grp->refcount);
	lockref_get(&grp->lockref);

repeat:
	xa_lock(&sbi->managed_pslots);
@@ -93,13 +90,13 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
	if (pre) {
		if (xa_is_err(pre)) {
			pre = ERR_PTR(xa_err(pre));
		} else if (erofs_workgroup_get(pre)) {
		} else if (!erofs_workgroup_get(pre)) {
			/* try to legitimize the current in-tree one */
			xa_unlock(&sbi->managed_pslots);
			cond_resched();
			goto repeat;
		}
		atomic_dec(&grp->refcount);
		lockref_put_return(&grp->lockref);
		grp = pre;
	}
	xa_unlock(&sbi->managed_pslots);
@@ -112,38 +109,34 @@ static void __erofs_workgroup_free(struct erofs_workgroup *grp)
	erofs_workgroup_free_rcu(grp);
}

int erofs_workgroup_put(struct erofs_workgroup *grp)
void erofs_workgroup_put(struct erofs_workgroup *grp)
{
	int count = atomic_dec_return(&grp->refcount);
	if (lockref_put_or_lock(&grp->lockref))
		return;

	if (count == 1)
	DBG_BUGON(__lockref_is_dead(&grp->lockref));
	if (grp->lockref.count == 1)
		atomic_long_inc(&erofs_global_shrink_cnt);
	else if (!count)
		__erofs_workgroup_free(grp);
	return count;
	--grp->lockref.count;
	spin_unlock(&grp->lockref.lock);
}

static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
					   struct erofs_workgroup *grp)
{
	/*
	 * If managed cache is on, refcount of workgroups
	 * themselves could be < 0 (freezed). In other words,
	 * there is no guarantee that all refcounts > 0.
	 */
	if (!erofs_workgroup_try_to_freeze(grp, 1))
		return false;
	int free = false;

	spin_lock(&grp->lockref.lock);
	if (grp->lockref.count)
		goto out;

	/*
	 * Note that all cached pages should be unattached
	 * before deleted from the XArray. Otherwise some
	 * cached pages could be still attached to the orphan
	 * old workgroup when the new one is available in the tree.
	 * Note that all cached pages should be detached before deleted from
	 * the XArray. Otherwise some cached pages could be still attached to
	 * the orphan old workgroup when the new one is available in the tree.
	 */
	if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
		erofs_workgroup_unfreeze(grp, 1);
		return false;
	}
	if (erofs_try_to_free_all_cached_pages(sbi, grp))
		goto out;

	/*
	 * It's impossible to fail after the workgroup is freezed,
@@ -152,10 +145,13 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
	 */
	DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);

	/* last refcount should be connected with its managed pslot.  */
	erofs_workgroup_unfreeze(grp, 0);
	lockref_mark_dead(&grp->lockref);
	free = true;
out:
	spin_unlock(&grp->lockref.lock);
	if (free)
		__erofs_workgroup_free(grp);
	return true;
	return free;
}

static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
Loading