Commit 6cc8e743 authored by Pavel Tatashin's avatar Pavel Tatashin Committed by Jens Axboe
Browse files

loop: scale loop device by introducing per device lock



Currently, loop device has only one global lock: loop_ctl_mutex.

This becomes hot in scenarios where many loop devices are used.

Scale it by introducing per-device lock: lo_mutex that protects
modifications of all fields in struct loop_device.

Keep loop_ctl_mutex to protect global data: loop_index_idr, loop_lookup,
loop_add.

The new lock ordering requirement is that loop_ctl_mutex must be taken
before lo_mutex.

Signed-off-by: default avatarPavel Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: default avatarTyler Hicks <tyhicks@linux.microsoft.com>
Reviewed-by: default avatarPetr Vorel <pvorel@suse.cz>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 767630c6
Loading
Loading
Loading
Loading
+53 −40
Original line number Diff line number Diff line
@@ -704,7 +704,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
	int		error;
	bool		partscan;

	error = mutex_lock_killable(&loop_ctl_mutex);
	error = mutex_lock_killable(&lo->lo_mutex);
	if (error)
		return error;
	error = -ENXIO;
@@ -743,9 +743,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
	loop_update_dio(lo);
	blk_mq_unfreeze_queue(lo->lo_queue);
	partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
	mutex_unlock(&loop_ctl_mutex);
	mutex_unlock(&lo->lo_mutex);
	/*
	 * We must drop file reference outside of loop_ctl_mutex as dropping
	 * We must drop file reference outside of lo_mutex as dropping
	 * the file ref can take bd_mutex which creates circular locking
	 * dependency.
	 */
@@ -755,7 +755,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
	return 0;

out_err:
	mutex_unlock(&loop_ctl_mutex);
	mutex_unlock(&lo->lo_mutex);
	if (file)
		fput(file);
	return error;
@@ -1092,7 +1092,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
			goto out_putf;
	}

	error = mutex_lock_killable(&loop_ctl_mutex);
	error = mutex_lock_killable(&lo->lo_mutex);
	if (error)
		goto out_bdev;

@@ -1171,7 +1171,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
	 * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
	 */
	bdgrab(bdev);
	mutex_unlock(&loop_ctl_mutex);
	mutex_unlock(&lo->lo_mutex);
	if (partscan)
		loop_reread_partitions(lo, bdev);
	if (!(mode & FMODE_EXCL))
@@ -1179,7 +1179,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
	return 0;

out_unlock:
	mutex_unlock(&loop_ctl_mutex);
	mutex_unlock(&lo->lo_mutex);
out_bdev:
	if (!(mode & FMODE_EXCL))
		bd_abort_claiming(bdev, loop_configure);
@@ -1200,7 +1200,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
	bool partscan = false;
	int lo_number;

	mutex_lock(&loop_ctl_mutex);
	mutex_lock(&lo->lo_mutex);
	if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
		err = -ENXIO;
		goto out_unlock;
@@ -1253,7 +1253,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
	lo_number = lo->lo_number;
	loop_unprepare_queue(lo);
out_unlock:
	mutex_unlock(&loop_ctl_mutex);
	mutex_unlock(&lo->lo_mutex);
	if (partscan) {
		/*
		 * bd_mutex has been held already in release path, so don't
@@ -1284,18 +1284,17 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
	 * protects us from all the other places trying to change the 'lo'
	 * device.
	 */
	mutex_lock(&loop_ctl_mutex);
	mutex_lock(&lo->lo_mutex);
	lo->lo_flags = 0;
	if (!part_shift)
		lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
	lo->lo_state = Lo_unbound;
	mutex_unlock(&loop_ctl_mutex);
	mutex_unlock(&lo->lo_mutex);

	/*
	 * Need not hold loop_ctl_mutex to fput backing file.
	 * Calling fput holding loop_ctl_mutex triggers a circular
	 * lock dependency possibility warning as fput can take
	 * bd_mutex which is usually taken before loop_ctl_mutex.
	 * Need not hold lo_mutex to fput backing file. Calling fput holding
	 * lo_mutex triggers a circular lock dependency possibility warning as
	 * fput can take bd_mutex which is usually taken before lo_mutex.
	 */
	if (filp)
		fput(filp);
@@ -1306,11 +1305,11 @@ static int loop_clr_fd(struct loop_device *lo)
{
	int err;

	err = mutex_lock_killable(&loop_ctl_mutex);
	err = mutex_lock_killable(&lo->lo_mutex);
	if (err)
		return err;
	if (lo->lo_state != Lo_bound) {
		mutex_unlock(&loop_ctl_mutex);
		mutex_unlock(&lo->lo_mutex);
		return -ENXIO;
	}
	/*
@@ -1325,11 +1324,11 @@ static int loop_clr_fd(struct loop_device *lo)
	 */
	if (atomic_read(&lo->lo_refcnt) > 1) {
		lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
		mutex_unlock(&loop_ctl_mutex);
		mutex_unlock(&lo->lo_mutex);
		return 0;
	}
	lo->lo_state = Lo_rundown;
	mutex_unlock(&loop_ctl_mutex);
	mutex_unlock(&lo->lo_mutex);

	return __loop_clr_fd(lo, false);
}
@@ -1344,7 +1343,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
	bool partscan = false;
	bool size_changed = false;

	err = mutex_lock_killable(&loop_ctl_mutex);
	err = mutex_lock_killable(&lo->lo_mutex);
	if (err)
		return err;
	if (lo->lo_encrypt_key_size &&
@@ -1411,7 +1410,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
		partscan = true;
	}
out_unlock:
	mutex_unlock(&loop_ctl_mutex);
	mutex_unlock(&lo->lo_mutex);
	if (partscan)
		loop_reread_partitions(lo, bdev);

@@ -1425,11 +1424,11 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
	struct kstat stat;
	int ret;

	ret = mutex_lock_killable(&loop_ctl_mutex);
	ret = mutex_lock_killable(&lo->lo_mutex);
	if (ret)
		return ret;
	if (lo->lo_state != Lo_bound) {
		mutex_unlock(&loop_ctl_mutex);
		mutex_unlock(&lo->lo_mutex);
		return -ENXIO;
	}

@@ -1448,10 +1447,10 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
		       lo->lo_encrypt_key_size);
	}

	/* Drop loop_ctl_mutex while we call into the filesystem. */
	/* Drop lo_mutex while we call into the filesystem. */
	path = lo->lo_backing_file->f_path;
	path_get(&path);
	mutex_unlock(&loop_ctl_mutex);
	mutex_unlock(&lo->lo_mutex);
	ret = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT);
	if (!ret) {
		info->lo_device = huge_encode_dev(stat.dev);
@@ -1637,7 +1636,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
{
	int err;

	err = mutex_lock_killable(&loop_ctl_mutex);
	err = mutex_lock_killable(&lo->lo_mutex);
	if (err)
		return err;
	switch (cmd) {
@@ -1653,7 +1652,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
	default:
		err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
	}
	mutex_unlock(&loop_ctl_mutex);
	mutex_unlock(&lo->lo_mutex);
	return err;
}

@@ -1879,27 +1878,33 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
	struct loop_device *lo;
	int err;

	/*
	 * take loop_ctl_mutex to protect lo pointer from race with
	 * loop_control_ioctl(LOOP_CTL_REMOVE), however, to reduce contention
	 * release it prior to updating lo->lo_refcnt.
	 */
	err = mutex_lock_killable(&loop_ctl_mutex);
	if (err)
		return err;
	lo = bdev->bd_disk->private_data;
	if (!lo) {
		err = -ENXIO;
		goto out;
		mutex_unlock(&loop_ctl_mutex);
		return -ENXIO;
	}

	atomic_inc(&lo->lo_refcnt);
out:
	err = mutex_lock_killable(&lo->lo_mutex);
	mutex_unlock(&loop_ctl_mutex);
	if (err)
		return err;
	atomic_inc(&lo->lo_refcnt);
	mutex_unlock(&lo->lo_mutex);
	return 0;
}

static void lo_release(struct gendisk *disk, fmode_t mode)
{
	struct loop_device *lo;
	struct loop_device *lo = disk->private_data;

	mutex_lock(&loop_ctl_mutex);
	lo = disk->private_data;
	mutex_lock(&lo->lo_mutex);
	if (atomic_dec_return(&lo->lo_refcnt))
		goto out_unlock;

@@ -1907,7 +1912,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
		if (lo->lo_state != Lo_bound)
			goto out_unlock;
		lo->lo_state = Lo_rundown;
		mutex_unlock(&loop_ctl_mutex);
		mutex_unlock(&lo->lo_mutex);
		/*
		 * In autoclear mode, stop the loop thread
		 * and remove configuration after last close.
@@ -1924,7 +1929,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
	}

out_unlock:
	mutex_unlock(&loop_ctl_mutex);
	mutex_unlock(&lo->lo_mutex);
}

static const struct block_device_operations lo_fops = {
@@ -1963,10 +1968,10 @@ static int unregister_transfer_cb(int id, void *ptr, void *data)
	struct loop_device *lo = ptr;
	struct loop_func_table *xfer = data;

	mutex_lock(&loop_ctl_mutex);
	mutex_lock(&lo->lo_mutex);
	if (lo->lo_encryption == xfer)
		loop_release_xfer(lo);
	mutex_unlock(&loop_ctl_mutex);
	mutex_unlock(&lo->lo_mutex);
	return 0;
}

@@ -2152,6 +2157,7 @@ static int loop_add(struct loop_device **l, int i)
		disk->flags |= GENHD_FL_NO_PART_SCAN;
	disk->flags |= GENHD_FL_EXT_DEVT;
	atomic_set(&lo->lo_refcnt, 0);
	mutex_init(&lo->lo_mutex);
	lo->lo_number		= i;
	spin_lock_init(&lo->lo_lock);
	disk->major		= LOOP_MAJOR;
@@ -2182,6 +2188,7 @@ static void loop_remove(struct loop_device *lo)
	blk_cleanup_queue(lo->lo_queue);
	blk_mq_free_tag_set(&lo->tag_set);
	put_disk(lo->lo_disk);
	mutex_destroy(&lo->lo_mutex);
	kfree(lo);
}

@@ -2261,15 +2268,21 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
		ret = loop_lookup(&lo, parm);
		if (ret < 0)
			break;
		ret = mutex_lock_killable(&lo->lo_mutex);
		if (ret)
			break;
		if (lo->lo_state != Lo_unbound) {
			ret = -EBUSY;
			mutex_unlock(&lo->lo_mutex);
			break;
		}
		if (atomic_read(&lo->lo_refcnt) > 0) {
			ret = -EBUSY;
			mutex_unlock(&lo->lo_mutex);
			break;
		}
		lo->lo_disk->private_data = NULL;
		mutex_unlock(&lo->lo_mutex);
		idr_remove(&loop_index_idr, lo->lo_number);
		loop_remove(lo);
		break;
+1 −0
Original line number Diff line number Diff line
@@ -62,6 +62,7 @@ struct loop_device {
	struct request_queue	*lo_queue;
	struct blk_mq_tag_set	tag_set;
	struct gendisk		*lo_disk;
	struct mutex		lo_mutex;
};

struct loop_cmd {