Commit bf11b9a8 authored by Sebastian Andrzej Siewior's avatar Sebastian Andrzej Siewior Committed by Linus Torvalds
Browse files

shmem: use raw_spinlock_t for ->stat_lock

Each CPU has SHMEM_INO_BATCH inodes available in `->ino_batch' which is
per-CPU.  Access here is serialized by disabling preemption.  If the pool
is empty, it gets reloaded from `->next_ino'.  Access here is serialized
by ->stat_lock which is a spinlock_t and can not be acquired with disabled
preemption.

One way around it would make per-CPU ino_batch struct containing the inode
number a local_lock_t.

Another solution is to promote ->stat_lock to a raw_spinlock_t.  The
critical sections are short.  The mpol_put() must be moved outside of the
critical section to avoid invoking the destructor with disabled
preemption.

Link: https://lkml.kernel.org/r/20210806142916.jdwkb5bx62q5fwfo@linutronix.de


Signed-off-by: default avatarSebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: default avatarHugh Dickins <hughd@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 3969b1a6
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -31,7 +31,7 @@ struct shmem_sb_info {
	struct percpu_counter used_blocks;  /* How many are allocated */
	unsigned long max_inodes;   /* How many inodes are allowed */
	unsigned long free_inodes;  /* How many are left for allocation */
	spinlock_t stat_lock;	    /* Serialize shmem_sb_info changes */
	raw_spinlock_t stat_lock;   /* Serialize shmem_sb_info changes */
	umode_t mode;		    /* Mount mode for root directory */
	unsigned char huge;	    /* Whether to try for hugepages */
	kuid_t uid;		    /* Mount uid for root directory */
+17 −14
Original line number Diff line number Diff line
@@ -278,10 +278,10 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
	ino_t ino;

	if (!(sb->s_flags & SB_KERNMOUNT)) {
		spin_lock(&sbinfo->stat_lock);
		raw_spin_lock(&sbinfo->stat_lock);
		if (sbinfo->max_inodes) {
			if (!sbinfo->free_inodes) {
				spin_unlock(&sbinfo->stat_lock);
				raw_spin_unlock(&sbinfo->stat_lock);
				return -ENOSPC;
			}
			sbinfo->free_inodes--;
@@ -304,7 +304,7 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
			}
			*inop = ino;
		}
		spin_unlock(&sbinfo->stat_lock);
		raw_spin_unlock(&sbinfo->stat_lock);
	} else if (inop) {
		/*
		 * __shmem_file_setup, one of our callers, is lock-free: it
@@ -319,13 +319,14 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
		 * to worry about things like glibc compatibility.
		 */
		ino_t *next_ino;

		next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu());
		ino = *next_ino;
		if (unlikely(ino % SHMEM_INO_BATCH == 0)) {
			spin_lock(&sbinfo->stat_lock);
			raw_spin_lock(&sbinfo->stat_lock);
			ino = sbinfo->next_ino;
			sbinfo->next_ino += SHMEM_INO_BATCH;
			spin_unlock(&sbinfo->stat_lock);
			raw_spin_unlock(&sbinfo->stat_lock);
			if (unlikely(is_zero_ino(ino)))
				ino++;
		}
@@ -341,9 +342,9 @@ static void shmem_free_inode(struct super_block *sb)
{
	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
	if (sbinfo->max_inodes) {
		spin_lock(&sbinfo->stat_lock);
		raw_spin_lock(&sbinfo->stat_lock);
		sbinfo->free_inodes++;
		spin_unlock(&sbinfo->stat_lock);
		raw_spin_unlock(&sbinfo->stat_lock);
	}
}

@@ -1453,10 +1454,10 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
{
	struct mempolicy *mpol = NULL;
	if (sbinfo->mpol) {
		spin_lock(&sbinfo->stat_lock);	/* prevent replace/use races */
		raw_spin_lock(&sbinfo->stat_lock);	/* prevent replace/use races */
		mpol = sbinfo->mpol;
		mpol_get(mpol);
		spin_unlock(&sbinfo->stat_lock);
		raw_spin_unlock(&sbinfo->stat_lock);
	}
	return mpol;
}
@@ -3488,9 +3489,10 @@ static int shmem_reconfigure(struct fs_context *fc)
	struct shmem_options *ctx = fc->fs_private;
	struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
	unsigned long inodes;
	struct mempolicy *mpol = NULL;
	const char *err;

	spin_lock(&sbinfo->stat_lock);
	raw_spin_lock(&sbinfo->stat_lock);
	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
	if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
		if (!sbinfo->max_blocks) {
@@ -3535,14 +3537,15 @@ static int shmem_reconfigure(struct fs_context *fc)
	 * Preserve previous mempolicy unless mpol remount option was specified.
	 */
	if (ctx->mpol) {
		mpol_put(sbinfo->mpol);
		mpol = sbinfo->mpol;
		sbinfo->mpol = ctx->mpol;	/* transfers initial ref */
		ctx->mpol = NULL;
	}
	spin_unlock(&sbinfo->stat_lock);
	raw_spin_unlock(&sbinfo->stat_lock);
	mpol_put(mpol);
	return 0;
out:
	spin_unlock(&sbinfo->stat_lock);
	raw_spin_unlock(&sbinfo->stat_lock);
	return invalfc(fc, "%s", err);
}

@@ -3659,7 +3662,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
	sbinfo->mpol = ctx->mpol;
	ctx->mpol = NULL;

	spin_lock_init(&sbinfo->stat_lock);
	raw_spin_lock_init(&sbinfo->stat_lock);
	if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
		goto failed;
	spin_lock_init(&sbinfo->shrinklist_lock);