Commit 47a7c01c authored by Qi Zheng's avatar Qi Zheng Committed by Andrew Morton
Browse files

Revert "mm: shrinkers: convert shrinker_rwsem to mutex"

Patch series "revert shrinker_srcu related changes".


This patch (of 7):

This reverts commit cf2e309e.

Kernel test robot reports -88.8% regression in stress-ng.ramfs.ops_per_sec
test case [1], which is caused by commit f95bdb70 ("mm: vmscan: make
global slab shrink lockless").  The root cause is that SRCU has to be
careful to not frequently check for SRCU read-side critical section exits.
Therefore, even if no one is currently in the SRCU read-side critical
section, synchronize_srcu() cannot return quickly.  That's why
unregister_shrinker() has become slower.

After discussion, we will try to use the refcount+RCU method [2] proposed
by Dave Chinner to continue to re-implement the lockless slab shrink.  So
revert the shrinker_mutex back to shrinker_rwsem first.

[1]. https://lore.kernel.org/lkml/202305230837.db2c233f-yujie.liu@intel.com/
[2]. https://lore.kernel.org/lkml/ZIJhou1d55d4H1s0@dread.disaster.area/

Link: https://lkml.kernel.org/r/20230609081518.3039120-1-qi.zheng@linux.dev
Link: https://lkml.kernel.org/r/20230609081518.3039120-2-qi.zheng@linux.dev


Reported-by: default avatarkernel test robot <yujie.liu@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202305230837.db2c233f-yujie.liu@intel.com


Signed-off-by: default avatarQi Zheng <zhengqi.arch@bytedance.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Kirill Tkhai <tkhai@ya.ru>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yujie Liu <yujie.liu@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 679bd7eb
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1828,7 +1828,7 @@ int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
	 * Replacement block manager (new_bm) is created and old_bm destroyed outside of
	 * cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
	 * shrinker associated with the block manager's bufio client vs cmd root_lock).
	 * - must take shrinker_mutex without holding cmd->root_lock
	 * - must take shrinker_rwsem without holding cmd->root_lock
	 */
	new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
					 CACHE_MAX_CONCURRENT_LOCKS);
+1 −1
Original line number Diff line number Diff line
@@ -1887,7 +1887,7 @@ int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
	 * Replacement block manager (new_bm) is created and old_bm destroyed outside of
	 * pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
	 * shrinker associated with the block manager's bufio client vs pmd root_lock).
	 * - must take shrinker_mutex without holding pmd->root_lock
	 * - must take shrinker_rwsem without holding pmd->root_lock
	 */
	new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
					 THIN_MAX_CONCURRENT_LOCKS);
+1 −1
Original line number Diff line number Diff line
@@ -54,7 +54,7 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
 * One thing we have to be careful of with a per-sb shrinker is that we don't
 * drop the last active reference to the superblock from within the shrinker.
 * If that happens we could trigger unregistering the shrinker from within the
 * shrinker path and that leads to deadlock on the shrinker_mutex. Hence we
 * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
 * take a passive reference to the superblock to avoid this from occurring.
 */
static unsigned long super_cache_scan(struct shrinker *shrink,
+7 −7
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@
#include <linux/srcu.h>

/* defined in vmscan.c */
extern struct mutex shrinker_mutex;
extern struct rw_semaphore shrinker_rwsem;
extern struct list_head shrinker_list;
extern struct srcu_struct shrinker_srcu;

@@ -168,7 +168,7 @@ int shrinker_debugfs_add(struct shrinker *shrinker)
	char buf[128];
	int id;

	lockdep_assert_held(&shrinker_mutex);
	lockdep_assert_held(&shrinker_rwsem);

	/* debugfs isn't initialized yet, add debugfs entries later. */
	if (!shrinker_debugfs_root)
@@ -211,7 +211,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
	if (!new)
		return -ENOMEM;

	mutex_lock(&shrinker_mutex);
	down_write(&shrinker_rwsem);

	old = shrinker->name;
	shrinker->name = new;
@@ -229,7 +229,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
			shrinker->debugfs_entry = entry;
	}

	mutex_unlock(&shrinker_mutex);
	up_write(&shrinker_rwsem);

	kfree_const(old);

@@ -242,7 +242,7 @@ struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
{
	struct dentry *entry = shrinker->debugfs_entry;

	lockdep_assert_held(&shrinker_mutex);
	lockdep_assert_held(&shrinker_rwsem);

	kfree_const(shrinker->name);
	shrinker->name = NULL;
@@ -271,14 +271,14 @@ static int __init shrinker_debugfs_init(void)
	shrinker_debugfs_root = dentry;

	/* Create debugfs entries for shrinkers registered at boot */
	mutex_lock(&shrinker_mutex);
	down_write(&shrinker_rwsem);
	list_for_each_entry(shrinker, &shrinker_list, list)
		if (!shrinker->debugfs_entry) {
			ret = shrinker_debugfs_add(shrinker);
			if (ret)
				break;
		}
	mutex_unlock(&shrinker_mutex);
	up_write(&shrinker_rwsem);

	return ret;
}
+17 −17
Original line number Diff line number Diff line
@@ -35,7 +35,7 @@
#include <linux/cpuset.h>
#include <linux/compaction.h>
#include <linux/notifier.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -190,7 +190,7 @@ struct scan_control {
int vm_swappiness = 60;

LIST_HEAD(shrinker_list);
DEFINE_MUTEX(shrinker_mutex);
DECLARE_RWSEM(shrinker_rwsem);
DEFINE_SRCU(shrinker_srcu);
static atomic_t shrinker_srcu_generation = ATOMIC_INIT(0);

@@ -213,7 +213,7 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
{
	return srcu_dereference_check(memcg->nodeinfo[nid]->shrinker_info,
				      &shrinker_srcu,
				      lockdep_is_held(&shrinker_mutex));
				      lockdep_is_held(&shrinker_rwsem));
}

static struct shrinker_info *shrinker_info_srcu(struct mem_cgroup *memcg,
@@ -292,7 +292,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg)
	int nid, size, ret = 0;
	int map_size, defer_size = 0;

	mutex_lock(&shrinker_mutex);
	down_write(&shrinker_rwsem);
	map_size = shrinker_map_size(shrinker_nr_max);
	defer_size = shrinker_defer_size(shrinker_nr_max);
	size = map_size + defer_size;
@@ -308,7 +308,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg)
		info->map_nr_max = shrinker_nr_max;
		rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
	}
	mutex_unlock(&shrinker_mutex);
	up_write(&shrinker_rwsem);

	return ret;
}
@@ -324,7 +324,7 @@ static int expand_shrinker_info(int new_id)
	if (!root_mem_cgroup)
		goto out;

	lockdep_assert_held(&shrinker_mutex);
	lockdep_assert_held(&shrinker_rwsem);

	map_size = shrinker_map_size(new_nr_max);
	defer_size = shrinker_defer_size(new_nr_max);
@@ -374,7 +374,7 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
	if (mem_cgroup_disabled())
		return -ENOSYS;

	mutex_lock(&shrinker_mutex);
	down_write(&shrinker_rwsem);
	id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
	if (id < 0)
		goto unlock;
@@ -388,7 +388,7 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
	shrinker->id = id;
	ret = 0;
unlock:
	mutex_unlock(&shrinker_mutex);
	up_write(&shrinker_rwsem);
	return ret;
}

@@ -398,7 +398,7 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)

	BUG_ON(id < 0);

	lockdep_assert_held(&shrinker_mutex);
	lockdep_assert_held(&shrinker_rwsem);

	idr_remove(&shrinker_idr, id);
}
@@ -433,7 +433,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg)
		parent = root_mem_cgroup;

	/* Prevent from concurrent shrinker_info expand */
	mutex_lock(&shrinker_mutex);
	down_write(&shrinker_rwsem);
	for_each_node(nid) {
		child_info = shrinker_info_protected(memcg, nid);
		parent_info = shrinker_info_protected(parent, nid);
@@ -442,7 +442,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg)
			atomic_long_add(nr, &parent_info->nr_deferred[i]);
		}
	}
	mutex_unlock(&shrinker_mutex);
	up_write(&shrinker_rwsem);
}

static bool cgroup_reclaim(struct scan_control *sc)
@@ -743,9 +743,9 @@ void free_prealloced_shrinker(struct shrinker *shrinker)
	shrinker->name = NULL;
#endif
	if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
		mutex_lock(&shrinker_mutex);
		down_write(&shrinker_rwsem);
		unregister_memcg_shrinker(shrinker);
		mutex_unlock(&shrinker_mutex);
		up_write(&shrinker_rwsem);
		return;
	}

@@ -755,11 +755,11 @@ void free_prealloced_shrinker(struct shrinker *shrinker)

void register_shrinker_prepared(struct shrinker *shrinker)
{
	mutex_lock(&shrinker_mutex);
	down_write(&shrinker_rwsem);
	list_add_tail_rcu(&shrinker->list, &shrinker_list);
	shrinker->flags |= SHRINKER_REGISTERED;
	shrinker_debugfs_add(shrinker);
	mutex_unlock(&shrinker_mutex);
	up_write(&shrinker_rwsem);
}

static int __register_shrinker(struct shrinker *shrinker)
@@ -810,13 +810,13 @@ void unregister_shrinker(struct shrinker *shrinker)
	if (!(shrinker->flags & SHRINKER_REGISTERED))
		return;

	mutex_lock(&shrinker_mutex);
	down_write(&shrinker_rwsem);
	list_del_rcu(&shrinker->list);
	shrinker->flags &= ~SHRINKER_REGISTERED;
	if (shrinker->flags & SHRINKER_MEMCG_AWARE)
		unregister_memcg_shrinker(shrinker);
	debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id);
	mutex_unlock(&shrinker_mutex);
	up_write(&shrinker_rwsem);

	atomic_inc(&shrinker_srcu_generation);
	synchronize_srcu(&shrinker_srcu);