Commit faffaab2 authored by Tejun Heo's avatar Tejun Heo Committed by Jens Axboe
Browse files

blkcg: Restructure blkg_conf_prep() and friends



We want to support lazy init of rq-qos policies so that iolatency is enabled
lazily on configuration instead of gendisk initialization. The way blkg
config helpers are structured now is a bit awkward for that. Let's
restructure:

* blkcg_conf_open_bdev() is renamed to blkg_conf_open_bdev(). The blkcg_
  prefix was used because the bdev opening step is blkg-independent.
  However, the distinction is too subtle and confuses more than helps. Let's
  switch to blkg prefix so that it's consistent with the type and other
  helper names.

* struct blkg_conf_ctx now remembers the original input string and is always
  initialized by the new blkg_conf_init().

* blkg_conf_open_bdev() is updated to take a pointer to blkg_conf_ctx like
  blkg_conf_prep() and can be called multiple times safely. Instead of
  modifying the double pointer to input string directly,
  blkg_conf_open_bdev() now sets blkg_conf_ctx->body.

* blkg_conf_finish() is renamed to blkg_conf_exit() for symmetry and now
  must be called on all blkg_conf_ctx's which were initialized with
  blkg_conf_init().

Combined, this allows the users to either open the bdev first or do it
altogether with blkg_conf_prep() which will help implementing lazy init of
rq-qos policies.

blkg_conf_init/exit() will also be used implement synchronization against
device removal. This is necessary because iolat / iocost are configured
through cgroupfs instead of one of the files under /sys/block/DEVICE. As
cgroupfs operations aren't synchronized with block layer, the lazy init and
other configuration operations may race against device removal. This patch
makes blkg_conf_init/exit() used consistently for all cgroup-orginating
configurations making them a good place to implement explicit
synchronization.

Users are updated accordingly. No behavior change is intended by this patch.

v2: bfq wasn't updated in v1 causing a build error. Fixed.

v3: Update the description to include future use of blkg_conf_init/exit() as
    synchronization points.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Yu Kuai <yukuai1@huaweicloud.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20230413000649.115785-3-tj@kernel.org


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 83462a6c
Loading
Loading
Loading
Loading
+5 −3
Original line number Diff line number Diff line
@@ -1105,9 +1105,11 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
	struct bfq_group *bfqg;
	u64 v;

	ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx);
	blkg_conf_init(&ctx, buf);

	ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx);
	if (ret)
		return ret;
		goto out;

	if (sscanf(ctx.body, "%llu", &v) == 1) {
		/* require "default" on dfl */
@@ -1129,7 +1131,7 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
		ret = 0;
	}
out:
	blkg_conf_finish(&ctx);
	blkg_conf_exit(&ctx);
	return ret ?: nbytes;
}

+67 −38
Original line number Diff line number Diff line
@@ -653,69 +653,93 @@ u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
EXPORT_SYMBOL_GPL(__blkg_prfill_u64);

/**
 * blkcg_conf_open_bdev - parse and open bdev for per-blkg config update
 * @inputp: input string pointer
 * blkg_conf_init - initialize a blkg_conf_ctx
 * @ctx: blkg_conf_ctx to initialize
 * @input: input string
 *
 * Initialize @ctx which can be used to parse blkg config input string @input.
 * Once initialized, @ctx can be used with blkg_conf_open_bdev() and
 * blkg_conf_prep(), and must be cleaned up with blkg_conf_exit().
 */
void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input)
{
	*ctx = (struct blkg_conf_ctx){ .input = input };
}
EXPORT_SYMBOL_GPL(blkg_conf_init);

/**
 * blkg_conf_open_bdev - parse and open bdev for per-blkg config update
 * @ctx: blkg_conf_ctx initialized with blkg_conf_init()
 *
 * Parse the device node prefix part, MAJ:MIN, of per-blkg config update
 * from @input and get and return the matching bdev.  *@inputp is
 * updated to point past the device node prefix.  Returns an ERR_PTR()
 * value on error.
 * Parse the device node prefix part, MAJ:MIN, of per-blkg config update from
 * @ctx->input and get and store the matching bdev in @ctx->bdev. @ctx->body is
 * set to point past the device node prefix.
 *
 * Use this function iff blkg_conf_prep() can't be used for some reason.
 * This function may be called multiple times on @ctx and the extra calls become
 * NOOPs. blkg_conf_prep() implicitly calls this function. Use this function
 * explicitly if bdev access is needed without resolving the blkcg / policy part
 * of @ctx->input. Returns -errno on error.
 */
struct block_device *blkcg_conf_open_bdev(char **inputp)
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
{
	char *input = *inputp;
	char *input = ctx->input;
	unsigned int major, minor;
	struct block_device *bdev;
	int key_len;

	if (ctx->bdev)
		return 0;

	if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
		return ERR_PTR(-EINVAL);
		return -EINVAL;

	input += key_len;
	if (!isspace(*input))
		return ERR_PTR(-EINVAL);
		return -EINVAL;
	input = skip_spaces(input);

	bdev = blkdev_get_no_open(MKDEV(major, minor));
	if (!bdev)
		return ERR_PTR(-ENODEV);
		return -ENODEV;
	if (bdev_is_partition(bdev)) {
		blkdev_put_no_open(bdev);
		return ERR_PTR(-ENODEV);
		return -ENODEV;
	}

	*inputp = input;
	return bdev;
	ctx->body = input;
	ctx->bdev = bdev;
	return 0;
}

/**
 * blkg_conf_prep - parse and prepare for per-blkg config update
 * @blkcg: target block cgroup
 * @pol: target policy
 * @input: input string
 * @ctx: blkg_conf_ctx to be filled
 * @ctx: blkg_conf_ctx initialized with blkg_conf_init()
 *
 * Parse per-blkg config update from @ctx->input and initialize @ctx
 * accordingly. On success, @ctx->body points to the part of @ctx->input
 * following MAJ:MIN, @ctx->bdev points to the target block device and
 * @ctx->blkg to the blkg being configured.
 *
 * Parse per-blkg config update from @input and initialize @ctx with the
 * result.  @ctx->blkg points to the blkg to be updated and @ctx->body the
 * part of @input following MAJ:MIN.  This function returns with queue lock
 * held and must be paired with blkg_conf_finish().
 * blkg_conf_open_bdev() may be called on @ctx beforehand. On success, this
 * function returns with queue lock held and must be followed by
 * blkg_conf_exit().
 */
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
		   char *input, struct blkg_conf_ctx *ctx)
		   struct blkg_conf_ctx *ctx)
	__acquires(&bdev->bd_queue->queue_lock)
{
	struct block_device *bdev;
	struct gendisk *disk;
	struct request_queue *q;
	struct blkcg_gq *blkg;
	int ret;

	bdev = blkcg_conf_open_bdev(&input);
	if (IS_ERR(bdev))
		return PTR_ERR(bdev);
	disk = bdev->bd_disk;
	ret = blkg_conf_open_bdev(ctx);
	if (ret)
		return ret;

	disk = ctx->bdev->bd_disk;
	q = disk->queue;

	/*
@@ -793,9 +817,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
	}
success:
	blk_queue_exit(q);
	ctx->bdev = bdev;
	ctx->blkg = blkg;
	ctx->body = input;
	return 0;

fail_preloaded:
@@ -805,7 +827,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
fail_exit_queue:
	blk_queue_exit(q);
fail:
	blkdev_put_no_open(bdev);
	/*
	 * If queue was bypassing, we should retry.  Do so after a
	 * short msleep().  It isn't strictly necessary but queue
@@ -821,19 +842,27 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
EXPORT_SYMBOL_GPL(blkg_conf_prep);

/**
 * blkg_conf_finish - finish up per-blkg config update
 * @ctx: blkg_conf_ctx initialized by blkg_conf_prep()
 * blkg_conf_exit - clean up per-blkg config update
 * @ctx: blkg_conf_ctx initialized with blkg_conf_init()
 *
 * Finish up after per-blkg config update.  This function must be paired
 * with blkg_conf_prep().
 * Clean up after per-blkg config update. This function must be called on all
 * blkg_conf_ctx's initialized with blkg_conf_init().
 */
void blkg_conf_finish(struct blkg_conf_ctx *ctx)
void blkg_conf_exit(struct blkg_conf_ctx *ctx)
	__releases(&ctx->bdev->bd_queue->queue_lock)
{
	if (ctx->blkg) {
		spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
		ctx->blkg = NULL;
	}

	if (ctx->bdev) {
		blkdev_put_no_open(ctx->bdev);
		ctx->body = NULL;
		ctx->bdev = NULL;
	}
}
EXPORT_SYMBOL_GPL(blkg_conf_finish);
EXPORT_SYMBOL_GPL(blkg_conf_exit);

static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
{
+6 −4
Original line number Diff line number Diff line
@@ -206,15 +206,17 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);

struct blkg_conf_ctx {
	char				*input;
	char				*body;
	struct block_device		*bdev;
	struct blkcg_gq			*blkg;
	char				*body;
};

struct block_device *blkcg_conf_open_bdev(char **inputp);
void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input);
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx);
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
		   char *input, struct blkg_conf_ctx *ctx);
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
		   struct blkg_conf_ctx *ctx);
void blkg_conf_exit(struct blkg_conf_ctx *ctx);

/**
 * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
+34 −24
Original line number Diff line number Diff line
@@ -3106,9 +3106,11 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
		return nbytes;
	}

	ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, buf, &ctx);
	blkg_conf_init(&ctx, buf);

	ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, &ctx);
	if (ret)
		return ret;
		goto err;

	iocg = blkg_to_iocg(ctx.blkg);

@@ -3127,12 +3129,14 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
	weight_updated(iocg, &now);
	spin_unlock(&iocg->ioc->lock);

	blkg_conf_finish(&ctx);
	blkg_conf_exit(&ctx);
	return nbytes;

einval:
	blkg_conf_finish(&ctx);
	return -EINVAL;
	ret = -EINVAL;
err:
	blkg_conf_exit(&ctx);
	return ret;
}

static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
@@ -3189,19 +3193,22 @@ static const match_table_t qos_tokens = {
static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
			     size_t nbytes, loff_t off)
{
	struct block_device *bdev;
	struct blkg_conf_ctx ctx;
	struct gendisk *disk;
	struct ioc *ioc;
	u32 qos[NR_QOS_PARAMS];
	bool enable, user;
	char *p;
	char *body, *p;
	int ret;

	bdev = blkcg_conf_open_bdev(&input);
	if (IS_ERR(bdev))
		return PTR_ERR(bdev);
	blkg_conf_init(&ctx, input);

	disk = bdev->bd_disk;
	ret = blkg_conf_open_bdev(&ctx);
	if (ret)
		goto err;

	body = ctx.body;
	disk = ctx.bdev->bd_disk;
	if (!queue_is_mq(disk->queue)) {
		ret = -EOPNOTSUPP;
		goto err;
@@ -3223,7 +3230,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
	enable = ioc->enabled;
	user = ioc->user_qos_params;

	while ((p = strsep(&input, " \t\n"))) {
	while ((p = strsep(&body, " \t\n"))) {
		substring_t args[MAX_OPT_ARGS];
		char buf[32];
		int tok;
@@ -3313,7 +3320,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
	blk_mq_unquiesce_queue(disk->queue);
	blk_mq_unfreeze_queue(disk->queue);

	blkdev_put_no_open(bdev);
	blkg_conf_exit(&ctx);
	return nbytes;
einval:
	spin_unlock_irq(&ioc->lock);
@@ -3323,7 +3330,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,

	ret = -EINVAL;
err:
	blkdev_put_no_open(bdev);
	blkg_conf_exit(&ctx);
	return ret;
}

@@ -3376,19 +3383,22 @@ static const match_table_t i_lcoef_tokens = {
static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
				    size_t nbytes, loff_t off)
{
	struct block_device *bdev;
	struct blkg_conf_ctx ctx;
	struct request_queue *q;
	struct ioc *ioc;
	u64 u[NR_I_LCOEFS];
	bool user;
	char *p;
	char *body, *p;
	int ret;

	bdev = blkcg_conf_open_bdev(&input);
	if (IS_ERR(bdev))
		return PTR_ERR(bdev);
	blkg_conf_init(&ctx, input);

	ret = blkg_conf_open_bdev(&ctx);
	if (ret)
		goto err;

	q = bdev_get_queue(bdev);
	body = ctx.body;
	q = bdev_get_queue(ctx.bdev);
	if (!queue_is_mq(q)) {
		ret = -EOPNOTSUPP;
		goto err;
@@ -3396,7 +3406,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,

	ioc = q_to_ioc(q);
	if (!ioc) {
		ret = blk_iocost_init(bdev->bd_disk);
		ret = blk_iocost_init(ctx.bdev->bd_disk);
		if (ret)
			goto err;
		ioc = q_to_ioc(q);
@@ -3409,7 +3419,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
	memcpy(u, ioc->params.i_lcoefs, sizeof(u));
	user = ioc->user_cost_model;

	while ((p = strsep(&input, " \t\n"))) {
	while ((p = strsep(&body, " \t\n"))) {
		substring_t args[MAX_OPT_ARGS];
		char buf[32];
		int tok;
@@ -3456,7 +3466,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
	blk_mq_unquiesce_queue(q);
	blk_mq_unfreeze_queue(q);

	blkdev_put_no_open(bdev);
	blkg_conf_exit(&ctx);
	return nbytes;

einval:
@@ -3467,7 +3477,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,

	ret = -EINVAL;
err:
	blkdev_put_no_open(bdev);
	blkg_conf_exit(&ctx);
	return ret;
}

+5 −3
Original line number Diff line number Diff line
@@ -836,9 +836,11 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
	u64 oldval;
	int ret;

	ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
	blkg_conf_init(&ctx, buf);

	ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, &ctx);
	if (ret)
		return ret;
		goto out;

	iolat = blkg_to_lat(ctx.blkg);
	p = ctx.body;
@@ -874,7 +876,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
		iolatency_clear_scaling(blkg);
	ret = 0;
out:
	blkg_conf_finish(&ctx);
	blkg_conf_exit(&ctx);
	return ret ?: nbytes;
}

Loading