Commit 449f81e0 authored by Yu Kuai's avatar Yu Kuai
Browse files

blk-io-hierarchy: support new bio based stage wbt

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/release-management/issues/IB4E8P


CVE: NA

--------------------------------

Like blk-throttle, if wbt is enabled, following new debugfs entries
will be created as well.

/sys/kernel/debug/block/sda/blk_io_hierarchy/
|-- wbt
|   |-- io_dump
|   |-- stats
|   `-- threshold

User can use them to analyze how IO behaves in wbt.

Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
parent 1bf8f90f
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -37,4 +37,15 @@ config HIERARCHY_THROTTLE

	If unsure, say N.

config HIERARCHY_WBT
	bool "Enable hierarchy stats layer blk-wbt"
	default n
	depends on BLK_WBT
	help
	Enabling this lets blk hierarchy stats to record additional information
	for blk-wbt. Such information can be helpful to debug performance
	and problems like io hang.

	If unsure, say N.

endif
+1 −1
Original line number Diff line number Diff line
@@ -306,7 +306,7 @@ static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
		return;
	}

	rq_qos_wait(rqw, iolat, iolat_acquire_inflight, iolat_cleanup_cb);
	rq_qos_wait(rqw, iolat, iolat_acquire_inflight, iolat_cleanup_cb, NULL);
}

#define SCALE_DOWN_FACTOR 2
+8 −1
Original line number Diff line number Diff line
@@ -230,6 +230,7 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr,
 * @private_data: caller provided specific data
 * @acquire_inflight_cb: inc the rqw->inflight counter if we can
 * @cleanup_cb: the callback to cleanup in case we race with a waker
 * @io_acct_cb: the callback for io accounting
 *
 * This provides a uniform place for the rq_qos users to do their throttling.
 * Since you can end up with a lot of things sleeping at once, this manages the
@@ -242,7 +243,7 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr,
 */
void rq_qos_wait(struct rq_wait *rqw, void *private_data,
		 acquire_inflight_cb_t *acquire_inflight_cb,
		 cleanup_cb_t *cleanup_cb)
		 cleanup_cb_t *cleanup_cb, io_acct_cb_t *io_acct_cb)
{
	struct rq_qos_wait_data data = {
		.wq = {
@@ -260,6 +261,9 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
	if (!has_sleeper && acquire_inflight_cb(rqw, private_data))
		return;

	if (io_acct_cb)
		io_acct_cb(private_data, true);

	has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq,
						 TASK_UNINTERRUPTIBLE);
	do {
@@ -284,6 +288,9 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
		set_current_state(TASK_UNINTERRUPTIBLE);
	} while (1);
	finish_wait(&rqw->wait, &data.wq);

	if (io_acct_cb)
		io_acct_cb(private_data, false);
}

void rq_qos_exit(struct request_queue *q)
+2 −1
Original line number Diff line number Diff line
@@ -91,10 +91,11 @@ void rq_qos_del(struct rq_qos *rqos);

typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
typedef void (cleanup_cb_t)(struct rq_wait *rqw, void *private_data);
typedef void (io_acct_cb_t)(void *private_data, bool start_acct);

void rq_qos_wait(struct rq_wait *rqw, void *private_data,
		 acquire_inflight_cb_t *acquire_inflight_cb,
		 cleanup_cb_t *cleanup_cb);
		 cleanup_cb_t *cleanup_cb, io_acct_cb_t *io_acct_cb);
bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit);
bool rq_depth_scale_up(struct rq_depth *rqd);
bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
+24 −7
Original line number Diff line number Diff line
@@ -32,6 +32,7 @@

#ifndef __GENKSYMS__
#include "blk.h"
#include "blk-io-hierarchy/stats.h"
#endif

#define CREATE_TRACE_POINTS
@@ -564,38 +565,51 @@ static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
}

struct wbt_wait_data {
	struct bio *bio;
	struct rq_wb *rwb;
	enum wbt_flags wb_acct;
	blk_opf_t opf;
};

static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data)
{
	struct wbt_wait_data *data = private_data;
	return rq_wait_inc_below(rqw, get_limit(data->rwb, data->opf));

	return rq_wait_inc_below(rqw, get_limit(data->rwb, data->bio->bi_opf));
}

static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data)
{
	struct wbt_wait_data *data = private_data;

	wbt_rqw_done(data->rwb, rqw, data->wb_acct);
}

static void wbt_io_acct_cb(void *private_data, bool start)
{
	struct wbt_wait_data *data = private_data;

	if (start)
		bio_hierarchy_start_io_acct(data->bio, STAGE_WBT);
	else
		bio_hierarchy_end_io_acct(data->bio, STAGE_WBT);
}

/*
 * Block if we will exceed our limit, or if we are currently waiting for
 * the timer to kick off queuing again.
 */
static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
		       blk_opf_t opf)
		       struct bio *bio)
{
	struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
	struct wbt_wait_data data = {
		.rwb = rwb,
		.wb_acct = wb_acct,
		.opf = opf,
		.bio = bio,
	};

	rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb);
	rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb,
		    wbt_io_acct_cb);
}

static inline bool wbt_should_throttle(struct bio *bio)
@@ -659,7 +673,7 @@ static void wbt_wait(struct rq_qos *rqos, struct bio *bio)
		return;
	}

	__wbt_wait(rwb, flags, bio->bi_opf);
	__wbt_wait(rwb, flags, bio);

	if (!blk_stat_is_active(rwb->cb))
		rwb_arm_timer(rwb);
@@ -773,8 +787,10 @@ static void wbt_queue_depth_changed(struct rq_qos *rqos)
static void wbt_exit(struct rq_qos *rqos)
{
	struct rq_wb *rwb = RQWB(rqos);
	struct request_queue *q = rqos->disk->queue;

	blk_stat_remove_callback(rqos->disk->queue, rwb->cb);
	blk_mq_unregister_hierarchy(q, STAGE_WBT);
	blk_stat_remove_callback(q, rwb->cb);
	blk_stat_free_callback(rwb->cb);
	kfree(rwb);
}
@@ -937,6 +953,7 @@ int wbt_init(struct gendisk *disk)
		goto err_free;

	blk_stat_add_callback(q, rwb->cb);
	blk_mq_register_hierarchy(q, STAGE_WBT);

	return 0;

Loading