Commit 2bd85221 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe
Browse files

block: untangle request_queue refcounting from sysfs



The kobject embedded into the request_queue is used for the queue
directory in sysfs, but that is a child of the gendisks directory and is
intimately tied to it.  Move this kobject to the gendisk and use a
refcount_t in the request_queue for the actual request_queue refcounting
that is completely unrelated to the device model.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20221114042637.1009333-5-hch@lst.de


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 40602997
Loading
Loading
Loading
Loading
+34 −8
Original line number Diff line number Diff line
@@ -59,12 +59,12 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_insert);

DEFINE_IDA(blk_queue_ida);
static DEFINE_IDA(blk_queue_ida);

/*
 * For queue allocation
 */
struct kmem_cache *blk_requestq_cachep;
static struct kmem_cache *blk_requestq_cachep;

/*
 * Controlling structure to kblockd
@@ -252,19 +252,46 @@ void blk_clear_pm_only(struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blk_clear_pm_only);

static void blk_free_queue_rcu(struct rcu_head *rcu_head)
{
	kmem_cache_free(blk_requestq_cachep,
			container_of(rcu_head, struct request_queue, rcu_head));
}

static void blk_free_queue(struct request_queue *q)
{
	might_sleep();

	percpu_ref_exit(&q->q_usage_counter);

	if (q->poll_stat)
		blk_stat_remove_callback(q, q->poll_cb);
	blk_stat_free_callback(q->poll_cb);

	blk_free_queue_stats(q->stats);
	kfree(q->poll_stat);

	if (queue_is_mq(q))
		blk_mq_release(q);

	ida_free(&blk_queue_ida, q->id);
	call_rcu(&q->rcu_head, blk_free_queue_rcu);
}

/**
 * blk_put_queue - decrement the request_queue refcount
 * @q: the request_queue structure to decrement the refcount for
 *
 * Decrements the refcount of the request_queue kobject. When this reaches 0
 * we'll have blk_release_queue() called.
 * Decrements the refcount of the request_queue and free it when the refcount
 * reaches 0.
 *
 * Context: Any context, but the last reference must not be dropped from
 *          atomic context.
 */
void blk_put_queue(struct request_queue *q)
{
	kobject_put(&q->kobj);
	if (refcount_dec_and_test(&q->refs))
		blk_free_queue(q);
}
EXPORT_SYMBOL(blk_put_queue);

@@ -399,8 +426,7 @@ struct request_queue *blk_alloc_queue(int node_id)
	INIT_WORK(&q->timeout_work, blk_timeout_work);
	INIT_LIST_HEAD(&q->icq_list);

	kobject_init(&q->kobj, &blk_queue_ktype);

	refcount_set(&q->refs, 1);
	mutex_init(&q->debugfs_mutex);
	mutex_init(&q->sysfs_lock);
	mutex_init(&q->sysfs_dir_lock);
@@ -445,7 +471,7 @@ bool blk_get_queue(struct request_queue *q)
{
	if (unlikely(blk_queue_dying(q)))
		return false;
	kobject_get(&q->kobj);
	refcount_inc(&q->refs);
	return true;
}
EXPORT_SYMBOL(blk_get_queue);
+2 −2
Original line number Diff line number Diff line
@@ -140,8 +140,8 @@ int blk_crypto_sysfs_register(struct gendisk *disk)
		return -ENOMEM;
	obj->profile = q->crypto_profile;

	err = kobject_init_and_add(&obj->kobj, &blk_crypto_ktype, &q->kobj,
				   "crypto");
	err = kobject_init_and_add(&obj->kobj, &blk_crypto_ktype,
				   &disk->queue_kobj, "crypto");
	if (err) {
		kobject_put(&obj->kobj);
		return err;
+2 −1
Original line number Diff line number Diff line
@@ -123,7 +123,8 @@ int disk_register_independent_access_ranges(struct gendisk *disk)
	 */
	WARN_ON(iars->sysfs_registered);
	ret = kobject_init_and_add(&iars->kobj, &blk_ia_ranges_ktype,
				   &q->kobj, "%s", "independent_access_ranges");
				   &disk->queue_kobj, "%s",
				   "independent_access_ranges");
	if (ret) {
		disk->ia_ranges = NULL;
		kobject_put(&iars->kobj);
+23 −63
Original line number Diff line number Diff line
@@ -683,8 +683,8 @@ static struct attribute *queue_attrs[] = {
static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
				int n)
{
	struct request_queue *q =
		container_of(kobj, struct request_queue, kobj);
	struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
	struct request_queue *q = disk->queue;

	if (attr == &queue_io_timeout_entry.attr &&
		(!q->mq_ops || !q->mq_ops->timeout))
@@ -710,8 +710,8 @@ static ssize_t
queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
{
	struct queue_sysfs_entry *entry = to_queue(attr);
	struct request_queue *q =
		container_of(kobj, struct request_queue, kobj);
	struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
	struct request_queue *q = disk->queue;
	ssize_t res;

	if (!entry->show)
@@ -727,63 +727,19 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
		    const char *page, size_t length)
{
	struct queue_sysfs_entry *entry = to_queue(attr);
	struct request_queue *q;
	struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
	struct request_queue *q = disk->queue;
	ssize_t res;

	if (!entry->store)
		return -EIO;

	q = container_of(kobj, struct request_queue, kobj);
	mutex_lock(&q->sysfs_lock);
	res = entry->store(q, page, length);
	mutex_unlock(&q->sysfs_lock);
	return res;
}

static void blk_free_queue_rcu(struct rcu_head *rcu_head)
{
	kmem_cache_free(blk_requestq_cachep,
			container_of(rcu_head, struct request_queue, rcu_head));
}

/**
 * blk_release_queue - releases all allocated resources of the request_queue
 * @kobj: pointer to a kobject, whose container is a request_queue
 *
 * This function releases all allocated resources of the request queue.
 *
 * The struct request_queue refcount is incremented with blk_get_queue() and
 * decremented with blk_put_queue(). Once the refcount reaches 0 this function
 * is called.
 *
 * Drivers exist which depend on the release of the request_queue to be
 * synchronous, it should not be deferred.
 *
 * Context: can sleep
 */
static void blk_release_queue(struct kobject *kobj)
{
	struct request_queue *q =
		container_of(kobj, struct request_queue, kobj);

	might_sleep();

	percpu_ref_exit(&q->q_usage_counter);

	if (q->poll_stat)
		blk_stat_remove_callback(q, q->poll_cb);
	blk_stat_free_callback(q->poll_cb);

	blk_free_queue_stats(q->stats);
	kfree(q->poll_stat);

	if (queue_is_mq(q))
		blk_mq_release(q);

	ida_free(&blk_queue_ida, q->id);
	call_rcu(&q->rcu_head, blk_free_queue_rcu);
}

static const struct sysfs_ops queue_sysfs_ops = {
	.show	= queue_attr_show,
	.store	= queue_attr_store,
@@ -794,10 +750,15 @@ static const struct attribute_group *blk_queue_attr_groups[] = {
	NULL
};

struct kobj_type blk_queue_ktype = {
static void blk_queue_release(struct kobject *kobj)
{
	/* nothing to do here, all data is associated with the parent gendisk */
}

static struct kobj_type blk_queue_ktype = {
	.default_groups = blk_queue_attr_groups,
	.sysfs_ops	= &queue_sysfs_ops,
	.release	= blk_release_queue,
	.release	= blk_queue_release,
};

static void blk_debugfs_remove(struct gendisk *disk)
@@ -823,20 +784,20 @@ int blk_register_queue(struct gendisk *disk)
	int ret;

	mutex_lock(&q->sysfs_dir_lock);
	ret = kobject_add(&q->kobj, &disk_to_dev(disk)->kobj, "queue");
	kobject_init(&disk->queue_kobj, &blk_queue_ktype);
	ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue");
	if (ret < 0)
		goto out_unlock_dir;
		goto out_put_queue_kobj;

	if (queue_is_mq(q)) {
		ret = blk_mq_sysfs_register(disk);
		if (ret)
			goto out_del_queue_kobj;
			goto out_put_queue_kobj;
	}
	mutex_lock(&q->sysfs_lock);

	mutex_lock(&q->debugfs_mutex);
	q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
					    blk_debugfs_root);
	q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root);
	if (queue_is_mq(q))
		blk_mq_debugfs_register(q);
	mutex_unlock(&q->debugfs_mutex);
@@ -860,7 +821,7 @@ int blk_register_queue(struct gendisk *disk)
	blk_throtl_register(disk);

	/* Now everything is ready and send out KOBJ_ADD uevent */
	kobject_uevent(&q->kobj, KOBJ_ADD);
	kobject_uevent(&disk->queue_kobj, KOBJ_ADD);
	if (q->elevator)
		kobject_uevent(&q->elevator->kobj, KOBJ_ADD);
	mutex_unlock(&q->sysfs_lock);
@@ -889,9 +850,8 @@ int blk_register_queue(struct gendisk *disk)
out_debugfs_remove:
	blk_debugfs_remove(disk);
	mutex_unlock(&q->sysfs_lock);
out_del_queue_kobj:
	kobject_del(&q->kobj);
out_unlock_dir:
out_put_queue_kobj:
	kobject_put(&disk->queue_kobj);
	mutex_unlock(&q->sysfs_dir_lock);
	return ret;
}
@@ -938,8 +898,8 @@ void blk_unregister_queue(struct gendisk *disk)
	mutex_unlock(&q->sysfs_lock);

	/* Now that we've deleted all child objects, we can delete the queue. */
	kobject_uevent(&q->kobj, KOBJ_REMOVE);
	kobject_del(&q->kobj);
	kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE);
	kobject_del(&disk->queue_kobj);
	mutex_unlock(&q->sysfs_dir_lock);

	blk_debugfs_remove(disk);
+0 −4
Original line number Diff line number Diff line
@@ -26,10 +26,6 @@ struct blk_flush_queue {
	spinlock_t		mq_flush_lock;
};

extern struct kmem_cache *blk_requestq_cachep;
extern struct kobj_type blk_queue_ktype;
extern struct ida blk_queue_ida;

bool is_flush_rq(struct request *req);

struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
Loading