Commit 0f783995 authored by Tejun Heo's avatar Tejun Heo Committed by Jens Axboe
Browse files

Revert "block/mq-deadline: Add cgroup support"



This reverts commit 08a9ad8b ("block/mq-deadline: Add cgroup support")
and a follow-up commit c06bc5a3 ("block/mq-deadline: Remove a
WARN_ON_ONCE() call"). The added cgroup support has the following issues:

* It breaks cgroup interface file format rule by adding custom elements to a
  nested key-value file.

* It registers mq-deadline as a cgroup-aware policy even though all it's
  doing is collecting per-cgroup stats. Even if we need these stats, this
  isn't the right way to add them.

* It hasn't been reviewed from cgroup side.

Cc: Bart Van Assche <bvanassche@acm.org>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 11431e26
Loading
Loading
Loading
Loading
+0 −6
Original line number Diff line number Diff line
@@ -9,12 +9,6 @@ config MQ_IOSCHED_DEADLINE
	help
	  MQ version of the deadline IO scheduler.

config MQ_IOSCHED_DEADLINE_CGROUP
       tristate
       default y
       depends on MQ_IOSCHED_DEADLINE
       depends on BLK_CGROUP

config MQ_IOSCHED_KYBER
	tristate "Kyber I/O scheduler"
	default y
+0 −2
Original line number Diff line number Diff line
@@ -22,8 +22,6 @@ obj-$(CONFIG_BLK_CGROUP_IOPRIO) += blk-ioprio.o
obj-$(CONFIG_BLK_CGROUP_IOLATENCY)	+= blk-iolatency.o
obj-$(CONFIG_BLK_CGROUP_IOCOST)	+= blk-iocost.o
obj-$(CONFIG_MQ_IOSCHED_DEADLINE)	+= mq-deadline.o
mq-deadline-y += mq-deadline-main.o
mq-deadline-$(CONFIG_MQ_IOSCHED_DEADLINE_CGROUP)+= mq-deadline-cgroup.o
obj-$(CONFIG_MQ_IOSCHED_KYBER)	+= kyber-iosched.o
bfq-y				:= bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
obj-$(CONFIG_IOSCHED_BFQ)	+= bfq.o

block/mq-deadline-cgroup.c

deleted100644 → 0
+0 −126
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0

#include <linux/blk-cgroup.h>
#include <linux/ioprio.h>

#include "mq-deadline-cgroup.h"

static struct blkcg_policy dd_blkcg_policy;

static struct blkcg_policy_data *dd_cpd_alloc(gfp_t gfp)
{
	struct dd_blkcg *pd;

	pd = kzalloc(sizeof(*pd), gfp);
	if (!pd)
		return NULL;
	pd->stats = alloc_percpu_gfp(typeof(*pd->stats),
				     GFP_KERNEL | __GFP_ZERO);
	if (!pd->stats) {
		kfree(pd);
		return NULL;
	}
	return &pd->cpd;
}

static void dd_cpd_free(struct blkcg_policy_data *cpd)
{
	struct dd_blkcg *dd_blkcg = container_of(cpd, typeof(*dd_blkcg), cpd);

	free_percpu(dd_blkcg->stats);
	kfree(dd_blkcg);
}

static struct dd_blkcg *dd_blkcg_from_pd(struct blkg_policy_data *pd)
{
	return container_of(blkcg_to_cpd(pd->blkg->blkcg, &dd_blkcg_policy),
			    struct dd_blkcg, cpd);
}

/*
 * Convert an association between a block cgroup and a request queue into a
 * pointer to the mq-deadline information associated with a (blkcg, queue) pair.
 */
struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio)
{
	struct blkg_policy_data *pd;

	pd = blkg_to_pd(bio->bi_blkg, &dd_blkcg_policy);
	if (!pd)
		return NULL;

	return dd_blkcg_from_pd(pd);
}

static size_t dd_pd_stat(struct blkg_policy_data *pd, char *buf, size_t size)
{
	static const char *const prio_class_name[] = {
		[IOPRIO_CLASS_NONE]	= "NONE",
		[IOPRIO_CLASS_RT]	= "RT",
		[IOPRIO_CLASS_BE]	= "BE",
		[IOPRIO_CLASS_IDLE]	= "IDLE",
	};
	struct dd_blkcg *blkcg = dd_blkcg_from_pd(pd);
	int res = 0;
	u8 prio;

	for (prio = 0; prio < ARRAY_SIZE(blkcg->stats->stats); prio++)
		res += scnprintf(buf + res, size - res,
			" [%s] dispatched=%u inserted=%u merged=%u",
			prio_class_name[prio],
			ddcg_sum(blkcg, dispatched, prio) +
			ddcg_sum(blkcg, merged, prio) -
			ddcg_sum(blkcg, completed, prio),
			ddcg_sum(blkcg, inserted, prio) -
			ddcg_sum(blkcg, completed, prio),
			ddcg_sum(blkcg, merged, prio));

	return res;
}

static struct blkg_policy_data *dd_pd_alloc(gfp_t gfp, struct request_queue *q,
					    struct blkcg *blkcg)
{
	struct dd_blkg *pd;

	pd = kzalloc(sizeof(*pd), gfp);
	if (!pd)
		return NULL;
	return &pd->pd;
}

static void dd_pd_free(struct blkg_policy_data *pd)
{
	struct dd_blkg *dd_blkg = container_of(pd, typeof(*dd_blkg), pd);

	kfree(dd_blkg);
}

static struct blkcg_policy dd_blkcg_policy = {
	.cpd_alloc_fn		= dd_cpd_alloc,
	.cpd_free_fn		= dd_cpd_free,

	.pd_alloc_fn		= dd_pd_alloc,
	.pd_free_fn		= dd_pd_free,
	.pd_stat_fn		= dd_pd_stat,
};

int dd_activate_policy(struct request_queue *q)
{
	return blkcg_activate_policy(q, &dd_blkcg_policy);
}

void dd_deactivate_policy(struct request_queue *q)
{
	blkcg_deactivate_policy(q, &dd_blkcg_policy);
}

int __init dd_blkcg_init(void)
{
	return blkcg_policy_register(&dd_blkcg_policy);
}

void __exit dd_blkcg_exit(void)
{
	blkcg_policy_unregister(&dd_blkcg_policy);
}

block/mq-deadline-cgroup.h

deleted100644 → 0
+0 −114
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */

#if !defined(_MQ_DEADLINE_CGROUP_H_)
#define _MQ_DEADLINE_CGROUP_H_

#include <linux/blk-cgroup.h>

struct request_queue;

/**
 * struct io_stats_per_prio - I/O statistics per I/O priority class.
 * @inserted: Number of inserted requests.
 * @merged: Number of merged requests.
 * @dispatched: Number of dispatched requests.
 * @completed: Number of I/O completions.
 */
struct io_stats_per_prio {
	local_t inserted;
	local_t merged;
	local_t dispatched;
	local_t completed;
};

/* I/O statistics per I/O cgroup per I/O priority class (IOPRIO_CLASS_*). */
struct blkcg_io_stats {
	struct io_stats_per_prio stats[4];
};

/**
 * struct dd_blkcg - Per cgroup data.
 * @cpd: blkcg_policy_data structure.
 * @stats: I/O statistics.
 */
struct dd_blkcg {
	struct blkcg_policy_data cpd;	/* must be the first member */
	struct blkcg_io_stats __percpu *stats;
};

/*
 * Count one event of type 'event_type' and with I/O priority class
 * 'prio_class'.
 */
#define ddcg_count(ddcg, event_type, prio_class) do {			\
if (ddcg) {								\
	struct blkcg_io_stats *io_stats = get_cpu_ptr((ddcg)->stats);	\
									\
	BUILD_BUG_ON(!__same_type((ddcg), struct dd_blkcg *));		\
	BUILD_BUG_ON(!__same_type((prio_class), u8));			\
	local_inc(&io_stats->stats[(prio_class)].event_type);		\
	put_cpu_ptr(io_stats);						\
}									\
} while (0)

/*
 * Returns the total number of ddcg_count(ddcg, event_type, prio_class) calls
 * across all CPUs. No locking or barriers since it is fine if the returned
 * sum is slightly outdated.
 */
#define ddcg_sum(ddcg, event_type, prio) ({				\
	unsigned int cpu;						\
	u32 sum = 0;							\
									\
	BUILD_BUG_ON(!__same_type((ddcg), struct dd_blkcg *));		\
	BUILD_BUG_ON(!__same_type((prio), u8));				\
	for_each_present_cpu(cpu)					\
		sum += local_read(&per_cpu_ptr((ddcg)->stats, cpu)->	\
				  stats[(prio)].event_type);		\
	sum;								\
})

#ifdef CONFIG_BLK_CGROUP

/**
 * struct dd_blkg - Per (cgroup, request queue) data.
 * @pd: blkg_policy_data structure.
 */
struct dd_blkg {
	struct blkg_policy_data pd;	/* must be the first member */
};

struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio);
int dd_activate_policy(struct request_queue *q);
void dd_deactivate_policy(struct request_queue *q);
int __init dd_blkcg_init(void);
void __exit dd_blkcg_exit(void);

#else /* CONFIG_BLK_CGROUP */

static inline struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio)
{
	return NULL;
}

static inline int dd_activate_policy(struct request_queue *q)
{
	return 0;
}

static inline void dd_deactivate_policy(struct request_queue *q)
{
}

static inline int dd_blkcg_init(void)
{
	return 0;
}

static inline void dd_blkcg_exit(void)
{
}

#endif /* CONFIG_BLK_CGROUP */

#endif /* _MQ_DEADLINE_CGROUP_H_ */
+14 −59
Original line number Diff line number Diff line
@@ -25,7 +25,6 @@
#include "blk-mq-debugfs.h"
#include "blk-mq-tag.h"
#include "blk-mq-sched.h"
#include "mq-deadline-cgroup.h"

/*
 * See Documentation/block/deadline-iosched.rst
@@ -57,6 +56,14 @@ enum dd_prio {

enum { DD_PRIO_COUNT = 3 };

/* I/O statistics per I/O priority. */
struct io_stats_per_prio {
	local_t inserted;
	local_t merged;
	local_t dispatched;
	local_t completed;
};

/* I/O statistics for all I/O priorities (enum dd_prio). */
struct io_stats {
	struct io_stats_per_prio stats[DD_PRIO_COUNT];
@@ -79,9 +86,6 @@ struct deadline_data {
	 * run time data
	 */

	/* Request queue that owns this data structure. */
	struct request_queue *queue;

	struct dd_per_prio per_prio[DD_PRIO_COUNT];

	/* Data direction of latest dispatched request. */
@@ -234,10 +238,8 @@ static void dd_merged_requests(struct request_queue *q, struct request *req,
	struct deadline_data *dd = q->elevator->elevator_data;
	const u8 ioprio_class = dd_rq_ioclass(next);
	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
	struct dd_blkcg *blkcg = next->elv.priv[0];

	dd_count(dd, merged, prio);
	ddcg_count(blkcg, merged, ioprio_class);

	/*
	 * if next expires before rq, assign its expire time to rq
@@ -375,7 +377,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
{
	struct request *rq, *next_rq;
	enum dd_data_dir data_dir;
	struct dd_blkcg *blkcg;
	enum dd_prio prio;
	u8 ioprio_class;

@@ -474,8 +475,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
	ioprio_class = dd_rq_ioclass(rq);
	prio = ioprio_class_to_prio[ioprio_class];
	dd_count(dd, dispatched, prio);
	blkcg = rq->elv.priv[0];
	ddcg_count(blkcg, dispatched, ioprio_class);
	/*
	 * If the request needs its target zone locked, do it.
	 */
@@ -569,8 +568,6 @@ static void dd_exit_sched(struct elevator_queue *e)
	struct deadline_data *dd = e->elevator_data;
	enum dd_prio prio;

	dd_deactivate_policy(dd->queue);

	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
		struct dd_per_prio *per_prio = &dd->per_prio[prio];

@@ -584,7 +581,7 @@ static void dd_exit_sched(struct elevator_queue *e)
}

/*
 * Initialize elevator private data (deadline_data) and associate with blkcg.
 * initialize elevator private data (deadline_data).
 */
static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
{
@@ -593,12 +590,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
	enum dd_prio prio;
	int ret = -ENOMEM;

	/*
	 * Initialization would be very tricky if the queue is not frozen,
	 * hence the warning statement below.
	 */
	WARN_ON_ONCE(!percpu_ref_is_zero(&q->q_usage_counter));

	eq = elevator_alloc(q, e);
	if (!eq)
		return ret;
@@ -614,8 +605,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
	if (!dd->stats)
		goto free_dd;

	dd->queue = q;

	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
		struct dd_per_prio *per_prio = &dd->per_prio[prio];

@@ -635,17 +624,9 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
	spin_lock_init(&dd->lock);
	spin_lock_init(&dd->zone_lock);

	ret = dd_activate_policy(q);
	if (ret)
		goto free_stats;

	ret = 0;
	q->elevator = eq;
	return 0;

free_stats:
	free_percpu(dd->stats);

free_dd:
	kfree(dd);

@@ -718,7 +699,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
	u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
	struct dd_per_prio *per_prio;
	enum dd_prio prio;
	struct dd_blkcg *blkcg;
	LIST_HEAD(free);

	lockdep_assert_held(&dd->lock);
@@ -729,18 +709,8 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
	 */
	blk_req_zone_write_unlock(rq);

	/*
	 * If a block cgroup has been associated with the submitter and if an
	 * I/O priority has been set in the associated block cgroup, use the
	 * lowest of the cgroup priority and the request priority for the
	 * request. If no priority has been set in the request, use the cgroup
	 * priority.
	 */
	prio = ioprio_class_to_prio[ioprio_class];
	dd_count(dd, inserted, prio);
	blkcg = dd_blkcg_from_bio(rq->bio);
	ddcg_count(blkcg, inserted, ioprio_class);
	rq->elv.priv[0] = blkcg;

	if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
		blk_mq_free_requests(&free);
@@ -789,10 +759,12 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
	spin_unlock(&dd->lock);
}

/* Callback from inside blk_mq_rq_ctx_init(). */
/*
 * Nothing to do here. This is defined only to ensure that .finish_request
 * method is called upon request completion.
 */
static void dd_prepare_request(struct request *rq)
{
	rq->elv.priv[0] = NULL;
}

/*
@@ -815,13 +787,11 @@ static void dd_finish_request(struct request *rq)
{
	struct request_queue *q = rq->q;
	struct deadline_data *dd = q->elevator->elevator_data;
	struct dd_blkcg *blkcg = rq->elv.priv[0];
	const u8 ioprio_class = dd_rq_ioclass(rq);
	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
	struct dd_per_prio *per_prio = &dd->per_prio[prio];

	dd_count(dd, completed, prio);
	ddcg_count(blkcg, completed, ioprio_class);

	if (blk_queue_is_zoned(q)) {
		unsigned long flags;
@@ -1144,26 +1114,11 @@ MODULE_ALIAS("mq-deadline-iosched");

static int __init deadline_init(void)
{
	int ret;

	ret = elv_register(&mq_deadline);
	if (ret)
		goto out;
	ret = dd_blkcg_init();
	if (ret)
		goto unreg;

out:
	return ret;

unreg:
	elv_unregister(&mq_deadline);
	goto out;
	return elv_register(&mq_deadline);
}

static void __exit deadline_exit(void)
{
	dd_blkcg_exit();
	elv_unregister(&mq_deadline);
}