Commit 80bd4a7a authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe
Browse files

blk-mq: move the srcu_struct used for quiescing to the tagset



All I/O submissions have fairly similar latencies, and a tagset-wide
quiesce is a fairly common operation.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarKeith Busch <kbusch@kernel.org>
Reviewed-by: default avatarMing Lei <ming.lei@redhat.com>
Reviewed-by: default avatarChao Leng <lengchao@huawei.com>
Reviewed-by: default avatarSagi Grimberg <sagi@grimberg.me>
Reviewed-by: default avatarHannes Reinecke <hare@suse.de>
Reviewed-by: default avatarChaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20221101150050.3510-12-hch@lst.de


[axboe: fix whitespace]
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 8537380b
Loading
Loading
Loading
Loading
+5 −22
Original line number Diff line number Diff line
@@ -65,7 +65,6 @@ DEFINE_IDA(blk_queue_ida);
 * For queue allocation
 */
struct kmem_cache *blk_requestq_cachep;
struct kmem_cache *blk_requestq_srcu_cachep;

/*
 * Controlling structure to kblockd
@@ -373,26 +372,20 @@ static void blk_timeout_work(struct work_struct *work)
{
}

struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
struct request_queue *blk_alloc_queue(int node_id)
{
	struct request_queue *q;

	q = kmem_cache_alloc_node(blk_get_queue_kmem_cache(alloc_srcu),
			GFP_KERNEL | __GFP_ZERO, node_id);
	q = kmem_cache_alloc_node(blk_requestq_cachep, GFP_KERNEL | __GFP_ZERO,
				  node_id);
	if (!q)
		return NULL;

	if (alloc_srcu) {
		blk_queue_flag_set(QUEUE_FLAG_HAS_SRCU, q);
		if (init_srcu_struct(q->srcu) != 0)
			goto fail_q;
	}

	q->last_merge = NULL;

	q->id = ida_alloc(&blk_queue_ida, GFP_KERNEL);
	if (q->id < 0)
		goto fail_srcu;
		goto fail_q;

	q->stats = blk_alloc_queue_stats();
	if (!q->stats)
@@ -435,11 +428,8 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
	blk_free_queue_stats(q->stats);
fail_id:
	ida_free(&blk_queue_ida, q->id);
fail_srcu:
	if (alloc_srcu)
		cleanup_srcu_struct(q->srcu);
fail_q:
	kmem_cache_free(blk_get_queue_kmem_cache(alloc_srcu), q);
	kmem_cache_free(blk_requestq_cachep, q);
	return NULL;
}

@@ -1172,9 +1162,6 @@ int __init blk_dev_init(void)
			sizeof_field(struct request, cmd_flags));
	BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
			sizeof_field(struct bio, bi_opf));
	BUILD_BUG_ON(ALIGN(offsetof(struct request_queue, srcu),
			   __alignof__(struct request_queue)) !=
		     sizeof(struct request_queue));

	/* used for unplugging and affects IO latency/throughput - HIGHPRI */
	kblockd_workqueue = alloc_workqueue("kblockd",
@@ -1185,10 +1172,6 @@ int __init blk_dev_init(void)
	blk_requestq_cachep = kmem_cache_create("request_queue",
			sizeof(struct request_queue), 0, SLAB_PANIC, NULL);

	blk_requestq_srcu_cachep = kmem_cache_create("request_queue_srcu",
			sizeof(struct request_queue) +
			sizeof(struct srcu_struct), 0, SLAB_PANIC, NULL);

	blk_debugfs_root = debugfs_create_dir("block", NULL);

	return 0;
+25 −8
Original line number Diff line number Diff line
@@ -261,8 +261,8 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
 */
void blk_mq_wait_quiesce_done(struct request_queue *q)
{
	if (blk_queue_has_srcu(q))
		synchronize_srcu(q->srcu);
	if (q->tag_set->flags & BLK_MQ_F_BLOCKING)
		synchronize_srcu(q->tag_set->srcu);
	else
		synchronize_rcu();
}
@@ -4003,7 +4003,7 @@ static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
	struct request_queue *q;
	int ret;

	q = blk_alloc_queue(set->numa_node, set->flags & BLK_MQ_F_BLOCKING);
	q = blk_alloc_queue(set->numa_node);
	if (!q)
		return ERR_PTR(-ENOMEM);
	q->queuedata = queuedata;
@@ -4168,9 +4168,6 @@ static void blk_mq_update_poll_flag(struct request_queue *q)
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
		struct request_queue *q)
{
	WARN_ON_ONCE(blk_queue_has_srcu(q) !=
			!!(set->flags & BLK_MQ_F_BLOCKING));

	/* mark the queue as mq asap */
	q->mq_ops = set->ops;

@@ -4429,8 +4426,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
	if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)
		set->nr_hw_queues = nr_cpu_ids;

	if (blk_mq_alloc_tag_set_tags(set, set->nr_hw_queues) < 0)
	if (set->flags & BLK_MQ_F_BLOCKING) {
		set->srcu = kmalloc(sizeof(*set->srcu), GFP_KERNEL);
		if (!set->srcu)
			return -ENOMEM;
		ret = init_srcu_struct(set->srcu);
		if (ret)
			goto out_free_srcu;
	}

	ret = blk_mq_alloc_tag_set_tags(set, set->nr_hw_queues);
	if (ret)
		goto out_cleanup_srcu;

	ret = -ENOMEM;
	for (i = 0; i < set->nr_maps; i++) {
@@ -4460,6 +4467,12 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
	}
	kfree(set->tags);
	set->tags = NULL;
out_cleanup_srcu:
	if (set->flags & BLK_MQ_F_BLOCKING)
		cleanup_srcu_struct(set->srcu);
out_free_srcu:
	if (set->flags & BLK_MQ_F_BLOCKING)
		kfree(set->srcu);
	return ret;
}
EXPORT_SYMBOL(blk_mq_alloc_tag_set);
@@ -4499,6 +4512,10 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)

	kfree(set->tags);
	set->tags = NULL;
	if (set->flags & BLK_MQ_F_BLOCKING) {
		cleanup_srcu_struct(set->srcu);
		kfree(set->srcu);
	}
}
EXPORT_SYMBOL(blk_mq_free_tag_set);

+7 −7
Original line number Diff line number Diff line
@@ -377,17 +377,17 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
/* run the code block in @dispatch_ops with rcu/srcu read lock held */
#define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops)	\
do {								\
	if (!blk_queue_has_srcu(q)) {				\
		rcu_read_lock();				\
		(dispatch_ops);					\
		rcu_read_unlock();				\
	} else {						\
	if ((q)->tag_set->flags & BLK_MQ_F_BLOCKING) {		\
		int srcu_idx;					\
								\
		might_sleep_if(check_sleep);			\
		srcu_idx = srcu_read_lock((q)->srcu);		\
		srcu_idx = srcu_read_lock((q)->tag_set->srcu);	\
		(dispatch_ops);					\
		srcu_read_unlock((q)->srcu, srcu_idx);		\
		srcu_read_unlock((q)->tag_set->srcu, srcu_idx);	\
	} else {						\
		rcu_read_lock();				\
		(dispatch_ops);					\
		rcu_read_unlock();				\
	}							\
} while (0)

+2 −7
Original line number Diff line number Diff line
@@ -742,10 +742,8 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,

static void blk_free_queue_rcu(struct rcu_head *rcu_head)
{
	struct request_queue *q = container_of(rcu_head, struct request_queue,
					       rcu_head);

	kmem_cache_free(blk_get_queue_kmem_cache(blk_queue_has_srcu(q)), q);
	kmem_cache_free(blk_requestq_cachep,
			container_of(rcu_head, struct request_queue, rcu_head));
}

/**
@@ -782,9 +780,6 @@ static void blk_release_queue(struct kobject *kobj)
	if (queue_is_mq(q))
		blk_mq_release(q);

	if (blk_queue_has_srcu(q))
		cleanup_srcu_struct(q->srcu);

	ida_free(&blk_queue_ida, q->id);
	call_rcu(&q->rcu_head, blk_free_queue_rcu);
}
+1 −8
Original line number Diff line number Diff line
@@ -27,7 +27,6 @@ struct blk_flush_queue {
};

extern struct kmem_cache *blk_requestq_cachep;
extern struct kmem_cache *blk_requestq_srcu_cachep;
extern struct kobj_type blk_queue_ktype;
extern struct ida blk_queue_ida;

@@ -429,13 +428,7 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
		struct page *page, unsigned int len, unsigned int offset,
		unsigned int max_sectors, bool *same_page);

static inline struct kmem_cache *blk_get_queue_kmem_cache(bool srcu)
{
	if (srcu)
		return blk_requestq_srcu_cachep;
	return blk_requestq_cachep;
}
struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu);
struct request_queue *blk_alloc_queue(int node_id);

int disk_scan_partitions(struct gendisk *disk, fmode_t mode);

Loading