Commit 3e28850c authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-5.16/block-2021-11-09' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - Set of fixes for the batched tag allocation (Ming, me)

 - add_disk() error handling fix (Luis)

 - Nested queue quiesce fixes (Ming)

 - Shared tags init error handling fix (Ye)

 - Misc cleanups (Jean, Ming, me)

* tag 'for-5.16/block-2021-11-09' of git://git.kernel.dk/linux-block:
  nvme: wait until quiesce is done
  scsi: make sure that request queue queiesce and unquiesce balanced
  scsi: avoid to quiesce sdev->request_queue two times
  blk-mq: add one API for waiting until quiesce is done
  blk-mq: don't free tags if the tag_set is used by other device in queue initialztion
  block: fix device_add_disk() kobject_create_and_add() error handling
  block: ensure cached plug request matches the current queue
  block: move queue enter logic into blk_mq_submit_bio()
  block: make bio_queue_enter() fast-path available inline
  block: split request allocation components into helpers
  block: have plug stored requests hold references to the queue
  blk-mq: update hctx->nr_active in blk_mq_end_request_batch()
  blk-mq: add RQF_ELV debug entry
  blk-mq: only try to run plug merge if request has same queue with incoming bio
  block: move RQF_ELV setting into allocators
  dm: don't stop request queue after the dm device is suspended
  block: replace always false argument with 'false'
  block: assign correct tag before doing prefetch of request
  blk-mq: fix redundant check of !e expression
parents 1dc1f92e 26af1cd0
Loading
Loading
Loading
Loading
+21 −40
Original line number Diff line number Diff line
@@ -386,30 +386,6 @@ void blk_cleanup_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_cleanup_queue);

static bool blk_try_enter_queue(struct request_queue *q, bool pm)
{
	rcu_read_lock();
	if (!percpu_ref_tryget_live_rcu(&q->q_usage_counter))
		goto fail;

	/*
	 * The code that increments the pm_only counter must ensure that the
	 * counter is globally visible before the queue is unfrozen.
	 */
	if (blk_queue_pm_only(q) &&
	    (!pm || queue_rpm_status(q) == RPM_SUSPENDED))
		goto fail_put;

	rcu_read_unlock();
	return true;

fail_put:
	blk_queue_exit(q);
fail:
	rcu_read_unlock();
	return false;
}

/**
 * blk_queue_enter() - try to increase q->q_usage_counter
 * @q: request queue pointer
@@ -442,10 +418,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
	return 0;
}

static inline int bio_queue_enter(struct bio *bio)
int __bio_queue_enter(struct request_queue *q, struct bio *bio)
{
	struct request_queue *q = bdev_get_queue(bio->bi_bdev);

	while (!blk_try_enter_queue(q, false)) {
		struct gendisk *disk = bio->bi_bdev->bd_disk;

@@ -742,7 +716,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
	return BLK_STS_OK;
}

static noinline_for_stack bool submit_bio_checks(struct bio *bio)
noinline_for_stack bool submit_bio_checks(struct bio *bio)
{
	struct block_device *bdev = bio->bi_bdev;
	struct request_queue *q = bdev_get_queue(bdev);
@@ -860,24 +834,25 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
	return false;
}

static void __submit_bio(struct bio *bio)
static void __submit_bio_fops(struct gendisk *disk, struct bio *bio)
{
	struct gendisk *disk = bio->bi_bdev->bd_disk;

	if (unlikely(bio_queue_enter(bio) != 0))
		return;

	if (!submit_bio_checks(bio) || !blk_crypto_bio_prep(&bio))
		goto queue_exit;
	if (!disk->fops->submit_bio) {
		blk_mq_submit_bio(bio);
		return;
	}
	if (submit_bio_checks(bio) && blk_crypto_bio_prep(&bio))
		disk->fops->submit_bio(bio);
queue_exit:
	blk_queue_exit(disk->queue);
}

static void __submit_bio(struct bio *bio)
{
	struct gendisk *disk = bio->bi_bdev->bd_disk;

	if (!disk->fops->submit_bio)
		blk_mq_submit_bio(bio);
	else
		__submit_bio_fops(disk, bio);
}

/*
 * The loop in this function may be a bit non-obvious, and so deserves some
 * explanation:
@@ -1615,7 +1590,13 @@ void blk_flush_plug(struct blk_plug *plug, bool from_schedule)
		flush_plug_callbacks(plug, from_schedule);
	if (!rq_list_empty(plug->mq_list))
		blk_mq_flush_plug_list(plug, from_schedule);
	if (unlikely(!from_schedule && plug->cached_rq))
	/*
	 * Unconditionally flush out cached requests, even if the unplug
	 * event came from schedule. Since we know hold references to the
	 * queue for cached requests, we don't want a blocked task holding
	 * up a queue freeze/quiesce event.
	 */
	if (unlikely(!rq_list_empty(plug->cached_rq)))
		blk_mq_free_plug_rqs(plug);
}

+4 −2
Original line number Diff line number Diff line
@@ -1101,9 +1101,11 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
		 * the same queue, there should be only one such rq in a queue
		 */
		*same_queue_rq = true;
	}
	if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == BIO_MERGE_OK)

		if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
				BIO_MERGE_OK)
			return true;
	}
	return false;
}

+1 −0
Original line number Diff line number Diff line
@@ -308,6 +308,7 @@ static const char *const rqf_name[] = {
	RQF_NAME(SPECIAL_PAYLOAD),
	RQF_NAME(ZONE_WRITE_LOCKED),
	RQF_NAME(MQ_POLL_SLEPT),
	RQF_NAME(ELV),
};
#undef RQF_NAME

+11 −4
Original line number Diff line number Diff line
@@ -370,15 +370,20 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
	bool ret = false;
	enum hctx_type type;

	if (e && e->type->ops.bio_merge)
		return e->type->ops.bio_merge(q, bio, nr_segs);
	if (bio_queue_enter(bio))
		return false;

	if (e && e->type->ops.bio_merge) {
		ret = e->type->ops.bio_merge(q, bio, nr_segs);
		goto out_put;
	}

	ctx = blk_mq_get_ctx(q);
	hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
	type = hctx->type;
	if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
	    list_empty_careful(&ctx->rq_lists[type]))
		return false;
		goto out_put;

	/* default per sw-queue merge */
	spin_lock(&ctx->lock);
@@ -391,6 +396,8 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
		ret = true;

	spin_unlock(&ctx->lock);
out_put:
	blk_queue_exit(q);
	return ret;
}

@@ -497,7 +504,7 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
		 * busy in case of 'none' scheduler, and this way may save
		 * us one extra enqueue & dequeue to sw queue.
		 */
		if (!hctx->dispatch_busy && !e && !run_queue_async) {
		if (!hctx->dispatch_busy && !run_queue_async) {
			blk_mq_try_issue_list_directly(hctx, list);
			if (list_empty(list))
				goto out;
+130 −57
Original line number Diff line number Diff line
@@ -251,22 +251,18 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q)
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);

/**
 * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
 * blk_mq_wait_quiesce_done() - wait until in-progress quiesce is done
 * @q: request queue.
 *
 * Note: this function does not prevent that the struct request end_io()
 * callback function is invoked. Once this function is returned, we make
 * sure no dispatch can happen until the queue is unquiesced via
 * blk_mq_unquiesce_queue().
 * Note: it is driver's responsibility for making sure that quiesce has
 * been started.
 */
void blk_mq_quiesce_queue(struct request_queue *q)
void blk_mq_wait_quiesce_done(struct request_queue *q)
{
	struct blk_mq_hw_ctx *hctx;
	unsigned int i;
	bool rcu = false;

	blk_mq_quiesce_queue_nowait(q);

	queue_for_each_hw_ctx(q, hctx, i) {
		if (hctx->flags & BLK_MQ_F_BLOCKING)
			synchronize_srcu(hctx->srcu);
@@ -276,6 +272,22 @@ void blk_mq_quiesce_queue(struct request_queue *q)
	if (rcu)
		synchronize_rcu();
}
EXPORT_SYMBOL_GPL(blk_mq_wait_quiesce_done);

/**
 * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
 * @q: request queue.
 *
 * Note: this function does not prevent that the struct request end_io()
 * callback function is invoked. Once this function is returned, we make
 * sure no dispatch can happen until the queue is unquiesced via
 * blk_mq_unquiesce_queue().
 */
void blk_mq_quiesce_queue(struct request_queue *q)
{
	blk_mq_quiesce_queue_nowait(q);
	blk_mq_wait_quiesce_done(q);
}
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);

/*
@@ -405,12 +417,15 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data,
	for (i = 0; tag_mask; i++) {
		if (!(tag_mask & (1UL << i)))
			continue;
		prefetch(tags->static_rqs[tag]);
		tag = tag_offset + i;
		prefetch(tags->static_rqs[tag]);
		tag_mask &= ~(1UL << i);
		rq = blk_mq_rq_ctx_init(data, tags, tag, alloc_time_ns);
		rq_list_add(data->cached_rq, rq);
		nr++;
	}
	/* caller already holds a reference, add for remainder */
	percpu_ref_get_many(&data->q->q_usage_counter, nr - 1);
	data->nr_tags -= nr;

	return rq_list_pop(data->cached_rq);
@@ -419,7 +434,6 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data,
static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
{
	struct request_queue *q = data->q;
	struct elevator_queue *e = q->elevator;
	u64 alloc_time_ns = 0;
	struct request *rq;
	unsigned int tag;
@@ -431,7 +445,11 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
	if (data->cmd_flags & REQ_NOWAIT)
		data->flags |= BLK_MQ_REQ_NOWAIT;

	if (e) {
	if (q->elevator) {
		struct elevator_queue *e = q->elevator;

		data->rq_flags |= RQF_ELV;

		/*
		 * Flush/passthrough requests are special and go directly to the
		 * dispatch list. Don't include reserved tags in the
@@ -447,7 +465,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
retry:
	data->ctx = blk_mq_get_ctx(q);
	data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
	if (!e)
	if (!(data->rq_flags & RQF_ELV))
		blk_mq_tag_busy(data->hctx);

	/*
@@ -490,7 +508,6 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
		.q		= q,
		.flags		= flags,
		.cmd_flags	= op,
		.rq_flags	= q->elevator ? RQF_ELV : 0,
		.nr_tags	= 1,
	};
	struct request *rq;
@@ -520,7 +537,6 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
		.q		= q,
		.flags		= flags,
		.cmd_flags	= op,
		.rq_flags	= q->elevator ? RQF_ELV : 0,
		.nr_tags	= 1,
	};
	u64 alloc_time_ns = 0;
@@ -561,6 +577,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,

	if (!q->elevator)
		blk_mq_tag_busy(data.hctx);
	else
		data.rq_flags |= RQF_ELV;

	ret = -EWOULDBLOCK;
	tag = blk_mq_get_tag(&data);
@@ -627,11 +645,9 @@ void blk_mq_free_plug_rqs(struct blk_plug *plug)
{
	struct request *rq;

	while ((rq = rq_list_pop(&plug->cached_rq)) != NULL) {
		percpu_ref_get(&rq->q->q_usage_counter);
	while ((rq = rq_list_pop(&plug->cached_rq)) != NULL)
		blk_mq_free_request(rq);
}
}

static void req_bio_endio(struct request *rq, struct bio *bio,
			  unsigned int nbytes, blk_status_t error)
@@ -815,6 +831,13 @@ static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx,
{
	struct request_queue *q = hctx->queue;

	/*
	 * All requests should have been marked as RQF_MQ_INFLIGHT, so
	 * update hctx->nr_active in batch
	 */
	if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
		__blk_mq_sub_active_requests(hctx, nr_tags);

	blk_mq_put_tags(hctx->tags, tag_array, nr_tags);
	percpu_ref_put_many(&q->q_usage_counter, nr_tags);
}
@@ -2232,7 +2255,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
	plug->rq_count = 0;

	if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) {
		blk_mq_plug_issue_direct(plug, from_schedule);
		blk_mq_plug_issue_direct(plug, false);
		if (rq_list_empty(plug->mq_list))
			return;
	}
@@ -2472,6 +2495,83 @@ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
	return BLK_MAX_REQUEST_COUNT;
}

static bool blk_attempt_bio_merge(struct request_queue *q, struct bio *bio,
				  unsigned int nr_segs, bool *same_queue_rq)
{
	if (!blk_queue_nomerges(q) && bio_mergeable(bio)) {
		if (blk_attempt_plug_merge(q, bio, nr_segs, same_queue_rq))
			return true;
		if (blk_mq_sched_bio_merge(q, bio, nr_segs))
			return true;
	}
	return false;
}

static struct request *blk_mq_get_new_requests(struct request_queue *q,
					       struct blk_plug *plug,
					       struct bio *bio,
					       unsigned int nsegs,
					       bool *same_queue_rq)
{
	struct blk_mq_alloc_data data = {
		.q		= q,
		.nr_tags	= 1,
		.cmd_flags	= bio->bi_opf,
	};
	struct request *rq;

	if (unlikely(bio_queue_enter(bio)))
		return NULL;
	if (unlikely(!submit_bio_checks(bio)))
		goto put_exit;
	if (blk_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
		goto put_exit;

	rq_qos_throttle(q, bio);

	if (plug) {
		data.nr_tags = plug->nr_ios;
		plug->nr_ios = 1;
		data.cached_rq = &plug->cached_rq;
	}

	rq = __blk_mq_alloc_requests(&data);
	if (rq)
		return rq;

	rq_qos_cleanup(q, bio);
	if (bio->bi_opf & REQ_NOWAIT)
		bio_wouldblock_error(bio);
put_exit:
	blk_queue_exit(q);
	return NULL;
}

static inline struct request *blk_mq_get_request(struct request_queue *q,
						 struct blk_plug *plug,
						 struct bio *bio,
						 unsigned int nsegs,
						 bool *same_queue_rq)
{
	if (plug) {
		struct request *rq;

		rq = rq_list_peek(&plug->cached_rq);
		if (rq && rq->q == q) {
			if (unlikely(!submit_bio_checks(bio)))
				return NULL;
			if (blk_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
				return NULL;
			plug->cached_rq = rq_list_next(rq);
			INIT_LIST_HEAD(&rq->queuelist);
			rq_qos_throttle(q, bio);
			return rq;
		}
	}

	return blk_mq_get_new_requests(q, plug, bio, nsegs, same_queue_rq);
}

/**
 * blk_mq_submit_bio - Create and send a request to block device.
 * @bio: Bio pointer.
@@ -2495,47 +2595,20 @@ void blk_mq_submit_bio(struct bio *bio)
	unsigned int nr_segs = 1;
	blk_status_t ret;

	if (unlikely(!blk_crypto_bio_prep(&bio)))
		return;

	blk_queue_bounce(q, &bio);
	if (blk_may_split(q, bio))
		__blk_queue_split(q, &bio, &nr_segs);

	if (!bio_integrity_prep(bio))
		goto queue_exit;

	if (!blk_queue_nomerges(q) && bio_mergeable(bio)) {
		if (blk_attempt_plug_merge(q, bio, nr_segs, &same_queue_rq))
			goto queue_exit;
		if (blk_mq_sched_bio_merge(q, bio, nr_segs))
			goto queue_exit;
	}

	rq_qos_throttle(q, bio);
		return;

	plug = blk_mq_plug(q, bio);
	if (plug && plug->cached_rq) {
		rq = rq_list_pop(&plug->cached_rq);
		INIT_LIST_HEAD(&rq->queuelist);
	} else {
		struct blk_mq_alloc_data data = {
			.q		= q,
			.nr_tags	= 1,
			.cmd_flags	= bio->bi_opf,
			.rq_flags	= q->elevator ? RQF_ELV : 0,
		};

		if (plug) {
			data.nr_tags = plug->nr_ios;
			plug->nr_ios = 1;
			data.cached_rq = &plug->cached_rq;
		}
		rq = __blk_mq_alloc_requests(&data);
		if (unlikely(!rq)) {
			rq_qos_cleanup(q, bio);
			if (bio->bi_opf & REQ_NOWAIT)
				bio_wouldblock_error(bio);
			goto queue_exit;
		}
	}
	rq = blk_mq_get_request(q, plug, bio, nr_segs, &same_queue_rq);
	if (unlikely(!rq))
		return;

	trace_block_getrq(bio);

@@ -2616,10 +2689,6 @@ void blk_mq_submit_bio(struct bio *bio)
		/* Default case. */
		blk_mq_sched_insert_request(rq, false, true, true);
	}

	return;
queue_exit:
	blk_queue_exit(q);
}

static size_t order_to_size(unsigned int order)
@@ -3605,7 +3674,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
		struct blk_mq_hw_ctx *hctx = hctxs[j];

		if (hctx) {
			__blk_mq_free_map_and_rqs(set, j);
			blk_mq_exit_hctx(q, set, hctx, j);
			hctxs[j] = NULL;
		}
@@ -4113,8 +4181,13 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
	list_for_each_entry(q, &set->tag_list, tag_set_list) {
		blk_mq_realloc_hw_ctxs(set, q);
		if (q->nr_hw_queues != set->nr_hw_queues) {
			int i = prev_nr_hw_queues;

			pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n",
					nr_hw_queues, prev_nr_hw_queues);
			for (; i < set->nr_hw_queues; i++)
				__blk_mq_free_map_and_rqs(set, i);

			set->nr_hw_queues = prev_nr_hw_queues;
			blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
			goto fallback;
Loading