Commit 4669e13c authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'block-5.14-2021-07-30' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - gendisk freeing fix (Christoph)

 - blk-iocost wake ordering fix (Tejun)

 - tag allocation error handling fix (John)

 - loop locking fix. While this isn't the prettiest fix in the world,
   nobody has any good alternatives for 5.14. Something to likely
   revisit for 5.15. (Tetsuo)

* tag 'block-5.14-2021-07-30' of git://git.kernel.dk/linux-block:
  block: delay freeing the gendisk
  blk-iocost: fix operation ordering in iocg_wake_fn()
  blk-mq-sched: Fix blk_mq_sched_alloc_tags() error handling
  loop: reintroduce global lock for safe loop_validate_file() traversal
parents 27eb687b 340e8457
Loading
Loading
Loading
Loading
+6 −5
Original line number Diff line number Diff line
@@ -1440,16 +1440,17 @@ static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode,
		return -1;

	iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost);
	wait->committed = true;

	/*
	 * autoremove_wake_function() removes the wait entry only when it
	 * actually changed the task state.  We want the wait always
	 * removed.  Remove explicitly and use default_wake_function().
	 * actually changed the task state. We want the wait always removed.
	 * Remove explicitly and use default_wake_function(). Note that the
	 * order of operations is important as finish_wait() tests whether
	 * @wq_entry is removed without grabbing the lock.
	 */
	list_del_init(&wq_entry->entry);
	wait->committed = true;

	default_wake_function(wq_entry, mode, flags, key);
	list_del_init_careful(&wq_entry->entry);
	return 0;
}

+4 −13
Original line number Diff line number Diff line
@@ -515,17 +515,6 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
	percpu_ref_put(&q->q_usage_counter);
}

static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
				   struct blk_mq_hw_ctx *hctx,
				   unsigned int hctx_idx)
{
	if (hctx->sched_tags) {
		blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
		blk_mq_free_rq_map(hctx->sched_tags, set->flags);
		hctx->sched_tags = NULL;
	}
}

static int blk_mq_sched_alloc_tags(struct request_queue *q,
				   struct blk_mq_hw_ctx *hctx,
				   unsigned int hctx_idx)
@@ -539,8 +528,10 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
		return -ENOMEM;

	ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
	if (ret)
		blk_mq_sched_free_tags(set, hctx, hctx_idx);
	if (ret) {
		blk_mq_free_rq_map(hctx->sched_tags, set->flags);
		hctx->sched_tags = NULL;
	}

	return ret;
}
+1 −2
Original line number Diff line number Diff line
@@ -1079,10 +1079,9 @@ static void disk_release(struct device *dev)
	disk_release_events(disk);
	kfree(disk->random);
	xa_destroy(&disk->part_tbl);
	bdput(disk->part0);
	if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue)
		blk_put_queue(disk->queue);
	kfree(disk);
	bdput(disk->part0);	/* frees the disk */
}
struct class block_class = {
	.name		= "block",
+97 −31
Original line number Diff line number Diff line
@@ -88,6 +88,47 @@

static DEFINE_IDR(loop_index_idr);
static DEFINE_MUTEX(loop_ctl_mutex);
static DEFINE_MUTEX(loop_validate_mutex);

/**
 * loop_global_lock_killable() - take locks for safe loop_validate_file() test
 *
 * @lo: struct loop_device
 * @global: true if @lo is about to bind another "struct loop_device", false otherwise
 *
 * Returns 0 on success, -EINTR otherwise.
 *
 * Since loop_validate_file() traverses on other "struct loop_device" if
 * is_loop_device() is true, we need a global lock for serializing concurrent
 * loop_configure()/loop_change_fd()/__loop_clr_fd() calls.
 */
static int loop_global_lock_killable(struct loop_device *lo, bool global)
{
	int err;

	if (global) {
		err = mutex_lock_killable(&loop_validate_mutex);
		if (err)
			return err;
	}
	err = mutex_lock_killable(&lo->lo_mutex);
	if (err && global)
		mutex_unlock(&loop_validate_mutex);
	return err;
}

/**
 * loop_global_unlock() - release locks taken by loop_global_lock_killable()
 *
 * @lo: struct loop_device
 * @global: true if @lo was about to bind another "struct loop_device", false otherwise
 */
static void loop_global_unlock(struct loop_device *lo, bool global)
{
	mutex_unlock(&lo->lo_mutex);
	if (global)
		mutex_unlock(&loop_validate_mutex);
}

static int max_part;
static int part_shift;
@@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
	while (is_loop_device(f)) {
		struct loop_device *l;

		lockdep_assert_held(&loop_validate_mutex);
		if (f->f_mapping->host->i_rdev == bdev->bd_dev)
			return -EBADF;

		l = I_BDEV(f->f_mapping->host)->bd_disk->private_data;
		if (l->lo_state != Lo_bound) {
		if (l->lo_state != Lo_bound)
			return -EINVAL;
		}
		/* Order wrt setting lo->lo_backing_file in loop_configure(). */
		rmb();
		f = l->lo_backing_file;
	}
	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
@@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
			  unsigned int arg)
{
	struct file	*file = NULL, *old_file;
	struct file *file = fget(arg);
	struct file *old_file;
	int error;
	bool partscan;
	bool is_loop;

	error = mutex_lock_killable(&lo->lo_mutex);
	if (!file)
		return -EBADF;
	is_loop = is_loop_device(file);
	error = loop_global_lock_killable(lo, is_loop);
	if (error)
		return error;
		goto out_putf;
	error = -ENXIO;
	if (lo->lo_state != Lo_bound)
		goto out_err;
@@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
	if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
		goto out_err;

	error = -EBADF;
	file = fget(arg);
	if (!file)
		goto out_err;

	error = loop_validate_file(file, bdev);
	if (error)
		goto out_err;
@@ -740,7 +783,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
	loop_update_dio(lo);
	blk_mq_unfreeze_queue(lo->lo_queue);
	partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
	mutex_unlock(&lo->lo_mutex);
	loop_global_unlock(lo, is_loop);

	/*
	 * Flush loop_validate_file() before fput(), for l->lo_backing_file
	 * might be pointing at old_file which might be the last reference.
	 */
	if (!is_loop) {
		mutex_lock(&loop_validate_mutex);
		mutex_unlock(&loop_validate_mutex);
	}
	/*
	 * We must drop file reference outside of lo_mutex as dropping
	 * the file ref can take open_mutex which creates circular locking
@@ -752,8 +804,8 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
	return 0;

out_err:
	mutex_unlock(&lo->lo_mutex);
	if (file)
	loop_global_unlock(lo, is_loop);
out_putf:
	fput(file);
	return error;
}
@@ -1136,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
			  struct block_device *bdev,
			  const struct loop_config *config)
{
	struct file	*file;
	struct file *file = fget(config->fd);
	struct inode *inode;
	struct address_space *mapping;
	int error;
	loff_t size;
	bool partscan;
	unsigned short bsize;
	bool is_loop;

	if (!file)
		return -EBADF;
	is_loop = is_loop_device(file);

	/* This is safe, since we have a reference from open(). */
	__module_get(THIS_MODULE);

	error = -EBADF;
	file = fget(config->fd);
	if (!file)
		goto out;

	/*
	 * If we don't hold exclusive handle for the device, upgrade to it
	 * here to avoid changing device under exclusive owner.
@@ -1162,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
			goto out_putf;
	}

	error = mutex_lock_killable(&lo->lo_mutex);
	error = loop_global_lock_killable(lo, is_loop);
	if (error)
		goto out_bdev;

@@ -1242,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
	size = get_loop_size(lo, file);
	loop_set_size(lo, size);

	/* Order wrt reading lo_state in loop_validate_file(). */
	wmb();

	lo->lo_state = Lo_bound;
	if (part_shift)
		lo->lo_flags |= LO_FLAGS_PARTSCAN;
@@ -1253,7 +1308,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
	 * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
	 */
	bdgrab(bdev);
	mutex_unlock(&lo->lo_mutex);
	loop_global_unlock(lo, is_loop);
	if (partscan)
		loop_reread_partitions(lo);
	if (!(mode & FMODE_EXCL))
@@ -1261,13 +1316,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
	return 0;

out_unlock:
	mutex_unlock(&lo->lo_mutex);
	loop_global_unlock(lo, is_loop);
out_bdev:
	if (!(mode & FMODE_EXCL))
		bd_abort_claiming(bdev, loop_configure);
out_putf:
	fput(file);
out:
	/* This is safe: open() is still holding a reference. */
	module_put(THIS_MODULE);
	return error;
@@ -1283,6 +1337,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
	int lo_number;
	struct loop_worker *pos, *worker;

	/*
	 * Flush loop_configure() and loop_change_fd(). It is acceptable for
	 * loop_validate_file() to succeed, for actual clear operation has not
	 * started yet.
	 */
	mutex_lock(&loop_validate_mutex);
	mutex_unlock(&loop_validate_mutex);
	/*
	 * loop_validate_file() now fails because l->lo_state != Lo_bound
	 * became visible.
	 */

	mutex_lock(&lo->lo_mutex);
	if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
		err = -ENXIO;
+2 −0
Original line number Diff line number Diff line
@@ -812,6 +812,8 @@ static void bdev_free_inode(struct inode *inode)
	free_percpu(bdev->bd_stats);
	kfree(bdev->bd_meta_info);

	if (!bdev_is_partition(bdev))
		kfree(bdev->bd_disk);
	kmem_cache_free(bdev_cachep, BDEV_I(inode));
}