Commit dd6216bb authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe
Browse files

blk-mq: make sure elevator callbacks aren't called for passthrough request



In case of q->elevator, passthrough request can still be marked as
RQF_ELV, so some elevator callbacks will be called for them.

Fix this by splitting RQF_SCHED_TAGS, which is set for all requests that
are issued on a queue that uses an I/O scheduler, and RQF_USE_SCHED for
non-flush, non-passthrough requests on such a queue.

Roughly based on two different patches from
Ming Lei <ming.lei@redhat.com>.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarMing Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20230518053101.760632-4-hch@lst.de


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent fdcab6cd
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -249,6 +249,8 @@ static const char *const rqf_name[] = {
	RQF_NAME(MIXED_MERGE),
	RQF_NAME(MQ_INFLIGHT),
	RQF_NAME(DONTPREP),
	RQF_NAME(SCHED_TAGS),
	RQF_NAME(USE_SCHED),
	RQF_NAME(FAILED),
	RQF_NAME(QUIET),
	RQF_NAME(IO_STAT),
@@ -258,7 +260,6 @@ static const char *const rqf_name[] = {
	RQF_NAME(SPECIAL_PAYLOAD),
	RQF_NAME(ZONE_WRITE_LOCKED),
	RQF_NAME(TIMED_OUT),
	RQF_NAME(ELV),
	RQF_NAME(RESV),
};
#undef RQF_NAME
+3 −3
Original line number Diff line number Diff line
@@ -37,7 +37,7 @@ static inline bool
blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
			 struct bio *bio)
{
	if (rq->rq_flags & RQF_ELV) {
	if (rq->rq_flags & RQF_USE_SCHED) {
		struct elevator_queue *e = q->elevator;

		if (e->type->ops.allow_merge)
@@ -48,7 +48,7 @@ blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,

static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
{
	if (rq->rq_flags & RQF_ELV) {
	if (rq->rq_flags & RQF_USE_SCHED) {
		struct elevator_queue *e = rq->q->elevator;

		if (e->type->ops.completed_request)
@@ -58,7 +58,7 @@ static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)

static inline void blk_mq_sched_requeue_request(struct request *rq)
{
	if ((rq->rq_flags & RQF_ELV) && !op_is_flush(rq->cmd_flags)) {
	if (rq->rq_flags & RQF_USE_SCHED) {
		struct request_queue *q = rq->q;
		struct elevator_queue *e = q->elevator;

+29 −24
Original line number Diff line number Diff line
@@ -354,12 +354,12 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
		data->rq_flags |= RQF_IO_STAT;
	rq->rq_flags = data->rq_flags;

	if (!(data->rq_flags & RQF_ELV)) {
		rq->tag = tag;
		rq->internal_tag = BLK_MQ_NO_TAG;
	} else {
	if (data->rq_flags & RQF_SCHED_TAGS) {
		rq->tag = BLK_MQ_NO_TAG;
		rq->internal_tag = tag;
	} else {
		rq->tag = tag;
		rq->internal_tag = BLK_MQ_NO_TAG;
	}
	rq->timeout = 0;

@@ -386,14 +386,13 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
	WRITE_ONCE(rq->deadline, 0);
	req_ref_set(rq, 1);

	if (rq->rq_flags & RQF_ELV) {
	if (rq->rq_flags & RQF_USE_SCHED) {
		struct elevator_queue *e = data->q->elevator;

		INIT_HLIST_NODE(&rq->hash);
		RB_CLEAR_NODE(&rq->rb_node);

		if (!op_is_flush(data->cmd_flags) &&
		    e->type->ops.prepare_request)
		if (e->type->ops.prepare_request)
			e->type->ops.prepare_request(rq);
	}

@@ -447,26 +446,32 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
		data->flags |= BLK_MQ_REQ_NOWAIT;

	if (q->elevator) {
		struct elevator_queue *e = q->elevator;

		data->rq_flags |= RQF_ELV;
		/*
		 * All requests use scheduler tags when an I/O scheduler is
		 * enabled for the queue.
		 */
		data->rq_flags |= RQF_SCHED_TAGS;

		/*
		 * Flush/passthrough requests are special and go directly to the
		 * dispatch list. Don't include reserved tags in the
		 * limiting, as it isn't useful.
		 * dispatch list.
		 */
		if (!op_is_flush(data->cmd_flags) &&
		    !blk_op_is_passthrough(data->cmd_flags) &&
		    e->type->ops.limit_depth &&
		    !(data->flags & BLK_MQ_REQ_RESERVED))
			e->type->ops.limit_depth(data->cmd_flags, data);
		    !blk_op_is_passthrough(data->cmd_flags)) {
			struct elevator_mq_ops *ops = &q->elevator->type->ops;

			WARN_ON_ONCE(data->flags & BLK_MQ_REQ_RESERVED);

			data->rq_flags |= RQF_USE_SCHED;
			if (ops->limit_depth)
				ops->limit_depth(data->cmd_flags, data);
		}
	}

retry:
	data->ctx = blk_mq_get_ctx(q);
	data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
	if (!(data->rq_flags & RQF_ELV))
	if (!(data->rq_flags & RQF_SCHED_TAGS))
		blk_mq_tag_busy(data->hctx);

	if (data->flags & BLK_MQ_REQ_RESERVED)
@@ -646,10 +651,10 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
		goto out_queue_exit;
	data.ctx = __blk_mq_get_ctx(q, cpu);

	if (!q->elevator)
		blk_mq_tag_busy(data.hctx);
	if (q->elevator)
		data.rq_flags |= RQF_SCHED_TAGS;
	else
		data.rq_flags |= RQF_ELV;
		blk_mq_tag_busy(data.hctx);

	if (flags & BLK_MQ_REQ_RESERVED)
		data.rq_flags |= RQF_RESV;
@@ -694,7 +699,7 @@ void blk_mq_free_request(struct request *rq)
	struct request_queue *q = rq->q;
	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;

	if ((rq->rq_flags & RQF_ELV) && !op_is_flush(rq->cmd_flags) &&
	if ((rq->rq_flags & RQF_USE_SCHED) &&
	    q->elevator->type->ops.finish_request)
		q->elevator->type->ops.finish_request(rq);

@@ -1268,7 +1273,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)

	if (!plug->multiple_queues && last && last->q != rq->q)
		plug->multiple_queues = true;
	if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
	if (!plug->has_elevator && (rq->rq_flags & RQF_USE_SCHED))
		plug->has_elevator = true;
	rq->rq_next = NULL;
	rq_list_add(&plug->mq_list, rq);
@@ -2620,7 +2625,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
		return;
	}

	if ((rq->rq_flags & RQF_ELV) || !blk_mq_get_budget_and_tag(rq)) {
	if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) {
		blk_mq_insert_request(rq, 0);
		blk_mq_run_hw_queue(hctx, false);
		return;
@@ -2983,7 +2988,7 @@ void blk_mq_submit_bio(struct bio *bio)
	}

	hctx = rq->mq_hctx;
	if ((rq->rq_flags & RQF_ELV) ||
	if ((rq->rq_flags & RQF_USE_SCHED) ||
	    (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) {
		blk_mq_insert_request(rq, 0);
		blk_mq_run_hw_queue(hctx, true);
+3 −3
Original line number Diff line number Diff line
@@ -226,9 +226,9 @@ static inline bool blk_mq_is_shared_tags(unsigned int flags)

static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
{
	if (!(data->rq_flags & RQF_ELV))
		return data->hctx->tags;
	if (data->rq_flags & RQF_SCHED_TAGS)
		return data->hctx->sched_tags;
	return data->hctx->tags;
}

static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
+7 −5
Original line number Diff line number Diff line
@@ -38,6 +38,10 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_MQ_INFLIGHT		((__force req_flags_t)(1 << 6))
/* don't call prep for this one */
#define RQF_DONTPREP		((__force req_flags_t)(1 << 7))
/* use hctx->sched_tags */
#define RQF_SCHED_TAGS		((__force req_flags_t)(1 << 8))
/* use an I/O scheduler for this request */
#define RQF_USE_SCHED		((__force req_flags_t)(1 << 9))
/* vaguely specified driver internal error.  Ignored by the block layer */
#define RQF_FAILED		((__force req_flags_t)(1 << 10))
/* don't warn about errors */
@@ -57,8 +61,6 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_ZONE_WRITE_LOCKED	((__force req_flags_t)(1 << 19))
/* ->timeout has been called, don't expire again */
#define RQF_TIMED_OUT		((__force req_flags_t)(1 << 21))
/* queue has elevator attached */
#define RQF_ELV			((__force req_flags_t)(1 << 22))
#define RQF_RESV		((__force req_flags_t)(1 << 23))

/* flags that prevent us from merging requests: */
@@ -842,7 +844,7 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib);
 */
static inline bool blk_mq_need_time_stamp(struct request *rq)
{
	return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV));
	return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED));
}

static inline bool blk_mq_is_reserved_rq(struct request *rq)
@@ -858,7 +860,7 @@ static inline bool blk_mq_add_to_batch(struct request *req,
				       struct io_comp_batch *iob, int ioerror,
				       void (*complete)(struct io_comp_batch *))
{
	if (!iob || (req->rq_flags & RQF_ELV) || ioerror ||
	if (!iob || (req->rq_flags & RQF_USE_SCHED) || ioerror ||
			(req->end_io && !blk_rq_is_passthrough(req)))
		return false;