Commit 1bf8f90f authored by Yu Kuai's avatar Yu Kuai
Browse files

block-io-hierarchy: core hierarchy stats and iodump implementation

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/release-management/issues/IB4E8P


CVE: NA

--------------------------------

Include main structures definition and provide helpers for different IO
stages to record IO stats and dump inflight IO.

Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
parent 21dcd8a7
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -297,6 +297,11 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
#ifdef CONFIG_BLK_BIO_ALLOC_TASK
	bio->pid = get_pid(task_pid(current));
#endif

#ifdef CONFIG_BLK_IO_HIERARCHY_STATS
	bio->hierarchy_time = 0;
	INIT_LIST_HEAD(&bio->hierarchy_list);
#endif
}
EXPORT_SYMBOL(bio_init);

+4 −0
Original line number Diff line number Diff line
@@ -73,6 +73,7 @@
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-sched.h"
#include "blk-io-hierarchy/stats.h"

/* PREFLUSH/FUA sequences */
enum {
@@ -343,6 +344,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
	flush_rq->end_io = flush_end_io;
	blk_rq_init_bi_alloc_time(flush_rq, first_rq);
	blk_mq_get_alloc_task(flush_rq, first_rq->bio);
	blk_rq_hierarchy_stats_init(flush_rq);
	/*
	 * Order WRITE ->end_io and WRITE rq->ref, and its pair is the one
	 * implied in refcount_inc_not_zero() called from
@@ -373,6 +375,8 @@ static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
		blk_mq_put_driver_tag(rq);
	}

	blk_rq_hierarchy_set_flush_done(rq);

	/*
	 * After populating an empty queue, kick it to avoid stall.  Read
	 * the comment in flush_end_io().
+13 −0
Original line number Diff line number Diff line
@@ -13,6 +13,19 @@ menuconfig BLK_IO_HIERARCHY_STATS

if BLK_IO_HIERARCHY_STATS

config HIERARCHY_IO_DUMP
	bool "Support to dump io that is throttled"
	default n
	select BLK_BIO_ALLOC_TIME
	select BLK_BIO_ALLOC_TASK
	depends on BLK_DEV_IO_TRACE
	help
	Enable this will create new debugfs entries to show user the detailed
	information of IO that are submitted and not done yet, and user can
	filter the result by IO stage or IO latency.

	If unsure, say N.

config HIERARCHY_THROTTLE
	bool "Enable hierarchy stats layer blk-throttle"
	default n
+1 −0
Original line number Diff line number Diff line
@@ -5,3 +5,4 @@
obj-$(CONFIG_BLK_IO_HIERARCHY_STATS) += blk_io_hierarchy_stats.o

blk_io_hierarchy_stats-y := stats.o debugfs.o
obj-$(CONFIG_HIERARCHY_IO_DUMP) += iodump.o
+125 −9
Original line number Diff line number Diff line
@@ -12,13 +12,49 @@
 */

#include <linux/debugfs.h>
#include <linux/blkdev.h>

#include "../blk-mq-debugfs.h"
#include "stats.h"
#include "iodump.h"

static const char *stage_name[NR_STAGE_GROUPS] = {
#ifdef CONFIG_HIERARCHY_THROTTLE
	[STAGE_THROTTLE]	= "throtl",
#endif
#ifdef CONFIG_HIERARCHY_WBT
	[STAGE_WBT]		= "wbt",
#endif
#ifdef CONFIG_HIERARCHY_IOCOST
	[STAGE_IOCOST]		= "iocost",
#endif
#ifdef CONFIG_HIERARCHY_GETTAG
	[STAGE_GETTAG]		= "gettag",
#endif
#ifdef CONFIG_HIERARCHY_PLUG
	[STAGE_PLUG]		= "plug",
#endif
#ifdef CONFIG_HIERARCHY_DEADLINE
	[STAGE_DEADLINE]	= "deadline",
#endif
#ifdef CONFIG_HIERARCHY_BFQ
	[STAGE_BFQ]		= "bfq",
#endif
#ifdef CONFIG_HIERARCHY_KYBER
	[STAGE_KYBER]		= "kyber",
#endif
#ifdef CONFIG_HIERARCHY_HCTX
	[STAGE_HCTX]		= "hctx",
#endif
#ifdef CONFIG_HIERARCHY_REQUEUE
	[STAGE_REQUEUE]		= "requeue",
#endif
#ifdef CONFIG_HIERARCHY_RQ_DRIVER
	[STAGE_RQ_DRIVER]	= "rq_driver",
#endif
#ifdef CONFIG_HIERARCHY_BIO
	[STAGE_BIO]		= "bio",
#endif
};

const char *hierarchy_stage_name(enum stage_group stage)
@@ -26,26 +62,33 @@ const char *hierarchy_stage_name(enum stage_group stage)
	return stage_name[stage];
}

static int hierarchy_stats_show(void *data, struct seq_file *m)
static int __hierarchy_stats_show(struct hierarchy_stats_data *hstats_data,
				  struct seq_file *m, enum stage_group stage)
{
	struct hierarchy_stage *hstage = data;
	int cpu;
	u64 dispatched[NR_STAT_GROUPS] = {0};
	u64 completed[NR_STAT_GROUPS] = {0};
	u64 latency[NR_STAT_GROUPS] = {0};
	int cpu;
	int i;

	for_each_possible_cpu(cpu) {
		int i;
		struct hierarchy_stats *stat = per_cpu_ptr(hstage->hstats, cpu);
		struct hierarchy_stats *stat =
			per_cpu_ptr(hstats_data->hstats, cpu);

		for (i = 0; i < NR_STAT_GROUPS; ++i) {
			dispatched[i] += stat->dispatched[i];
			completed[i] += stat->completed[i];
			latency[i] += stat->nsecs[i];
			latency[i] += stage_is_rq(stage) ?
				      stat->jiffies[i] : stat->nsecs[i];
		}
	}

	seq_printf(m, "%llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
	if (stage_is_rq(stage))
		for (i = 0; i < NR_STAT_GROUPS; ++i)
			latency[i] =
				jiffies_to_msecs(latency[i]) * NSEC_PER_MSEC;

	seq_printf(m, "%llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu",
		   dispatched[STAT_READ], completed[STAT_READ],
		   latency[STAT_READ], dispatched[STAT_WRITE],
		   completed[STAT_WRITE], latency[STAT_WRITE],
@@ -53,11 +96,71 @@ static int hierarchy_stats_show(void *data, struct seq_file *m)
		   latency[STAT_DISCARD], dispatched[STAT_FLUSH],
		   completed[STAT_FLUSH], latency[STAT_FLUSH]);

	hierarchy_show_slow_io(hstats_data, m);
	seq_putc(m, '\n');
	return 0;
}

static void *hierarchy_stats_start(struct seq_file *m, loff_t *pos)
{
	enum stage_group stage = *pos;

	if (stage < 0 || stage >= NR_STAGE_GROUPS)
		return NULL;

	return pos;
}

static void *hierarchy_stats_next(struct seq_file *m, void *v, loff_t *pos)
{
	enum stage_group stage = ++(*pos);

	if (stage >= 0 && stage < NR_STAGE_GROUPS)
		return pos;

	return NULL;
}

static void hierarchy_stats_stop(struct seq_file *m, void *v)
{
}

static int hierarchy_stats_show(struct seq_file *m, void *v)
{
	enum stage_group stage = (*(loff_t *)v);
	struct blk_io_hierarchy_stats *stats = m->private;
	struct hierarchy_stats_data *hstats_data = get_hstats_data(stats, stage);

	if (!hstats_data)
		return 0;

	seq_printf(m, "%s ", hierarchy_stage_name(stage));
	__hierarchy_stats_show(hstats_data, m, stage);
	put_hstats_data(stats, hstats_data);
	return 0;
}

static struct blk_mq_debugfs_attr hierarchy_debugfs_attrs[] = {
	{"stats", 0400, hierarchy_stats_show},
static const struct seq_operations hierarchy_stats_ops = {
	.start	= hierarchy_stats_start,
	.next	= hierarchy_stats_next,
	.stop	= hierarchy_stats_stop,
	.show	= hierarchy_stats_show,
};

static int hierarchy_stats_show_single(void *v, struct seq_file *m)
{
	struct hierarchy_stage *hstage = v;

	return __hierarchy_stats_show(hstage->hstats_data, m, hstage->stage);
}

static const struct blk_mq_debugfs_attr hierarchy_debugfs_attrs[] = {
	{"stats", 0400, hierarchy_stats_show_single},
	{},
};

static const struct blk_mq_debugfs_attr hierarchy_stats_attr[] = {
	{"stats", 0400, .seq_ops = &hierarchy_stats_ops},
	{},
};

@@ -76,6 +179,7 @@ static void hierarchy_register_stage(struct blk_io_hierarchy_stats *stats,

	hstage->debugfs_dir = dir;
	debugfs_create_files(dir, hstage, hierarchy_debugfs_attrs);
	io_hierarchy_register_iodump(hstage);
}

static void hierarchy_unregister_stage(struct blk_io_hierarchy_stats *stats,
@@ -117,3 +221,15 @@ void blk_mq_debugfs_unregister_hierarchy(struct request_queue *q,

	hierarchy_unregister_stage(stats, stage);
}

void blk_mq_debugfs_create_default_hierarchy_attr(struct request_queue *q)
{
	struct blk_io_hierarchy_stats *stats = q->io_hierarchy_stats;

	lockdep_assert_held(&q->debugfs_mutex);

	if (!blk_mq_debugfs_enabled(q))
		return;

	debugfs_create_files(stats->debugfs_dir, stats, hierarchy_stats_attr);
}
Loading