Commit e50df249 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'block-6.5-2023-07-03' of git://git.kernel.dk/linux

Pull more block updates from Jens Axboe:
 "Mostly items that came in a bit late for the initial pull request,
  wanted to make sure they had the appropriate amount of linux-next soak
  before going upstream.

  Outside of stragglers, just generic fixes for either merge window
  items, or longer standing bugs"

* tag 'block-6.5-2023-07-03' of git://git.kernel.dk/linux: (25 commits)
  md/raid0: add discard support for the 'original' layout
  nvme: disable controller on reset state failure
  nvme: sync timeout work on failed reset
  nvme: ensure unquiesce on teardown
  cdrom/gdrom: Fix build error
  nvme: improved uring polling
  block: add request polling helper
  nvme-mpath: fix I/O failure with EAGAIN when failing over I/O
  nvme: host: fix command name spelling
  blk-sysfs: add a new attr_group for blk_mq
  blk-iocost: move wbt_enable/disable_default() out of spinlock
  blk-wbt: cleanup rwb_enabled() and wbt_disabled()
  blk-wbt: remove dead code to handle wbt enable/disable with io inflight
  blk-wbt: don't create wbt sysfs entry if CONFIG_BLK_WBT is disabled
  blk-mq: fix two misuses on RQF_USE_SCHED
  blk-throttle: Fix io statistics for cgroup v1
  bcache: Fix bcache device claiming
  bcache: Alloc holder object before async registration
  raid10: avoid spin_lock from fastpath from raid10_unplug()
  md: fix 'delete_mutex' deadlock
  ...
parents 4f528753 3c2f765c
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -2086,6 +2086,9 @@ void blk_cgroup_bio_start(struct bio *bio)
	struct blkg_iostat_set *bis;
	unsigned long flags;

	if (!cgroup_subsys_on_dfl(io_cgrp_subsys))
		return;

	/* Root-level stats are sourced from system-wide IO stats */
	if (!cgroup_parent(blkcg->css.cgroup))
		return;
@@ -2116,7 +2119,6 @@ void blk_cgroup_bio_start(struct bio *bio)
	}

	u64_stats_update_end_irqrestore(&bis->sync, flags);
	if (cgroup_subsys_on_dfl(io_cgrp_subsys))
	cgroup_rstat_updated(blkcg->css.cgroup, cpu);
	put_cpu();
}
+5 −2
Original line number Diff line number Diff line
@@ -3301,11 +3301,9 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
		blk_stat_enable_accounting(disk->queue);
		blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
		ioc->enabled = true;
		wbt_disable_default(disk);
	} else {
		blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
		ioc->enabled = false;
		wbt_enable_default(disk);
	}

	if (user) {
@@ -3318,6 +3316,11 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
	ioc_refresh_params(ioc, true);
	spin_unlock_irq(&ioc->lock);

	if (enable)
		wbt_disable_default(disk);
	else
		wbt_enable_default(disk);

	blk_mq_unquiesce_queue(disk->queue);
	blk_mq_unfreeze_queue(disk->queue);

+37 −17
Original line number Diff line number Diff line
@@ -49,17 +49,8 @@ static void blk_mq_request_bypass_insert(struct request *rq,
		blk_insert_t flags);
static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
		struct list_head *list);

static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q,
		blk_qc_t qc)
{
	return xa_load(&q->hctx_table, qc);
}

static inline blk_qc_t blk_rq_to_qc(struct request *rq)
{
	return rq->mq_hctx->queue_num;
}
static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
			 struct io_comp_batch *iob, unsigned int flags);

/*
 * Check if any of the ctx, dispatch list or elevator
@@ -1248,7 +1239,7 @@ void blk_mq_start_request(struct request *rq)
		q->integrity.profile->prepare_fn(rq);
#endif
	if (rq->bio && rq->bio->bi_opf & REQ_POLLED)
	        WRITE_ONCE(rq->bio->bi_cookie, blk_rq_to_qc(rq));
	        WRITE_ONCE(rq->bio->bi_cookie, rq->mq_hctx->queue_num);
}
EXPORT_SYMBOL(blk_mq_start_request);

@@ -1280,7 +1271,11 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)

	if (!plug->multiple_queues && last && last->q != rq->q)
		plug->multiple_queues = true;
	if (!plug->has_elevator && (rq->rq_flags & RQF_USE_SCHED))
	/*
	 * Any request allocated from sched tags can't be issued to
	 * ->queue_rqs() directly
	 */
	if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS))
		plug->has_elevator = true;
	rq->rq_next = NULL;
	rq_list_add(&plug->mq_list, rq);
@@ -1350,7 +1345,7 @@ EXPORT_SYMBOL_GPL(blk_rq_is_poll);
static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
{
	do {
		blk_mq_poll(rq->q, blk_rq_to_qc(rq), NULL, 0);
		blk_hctx_poll(rq->q, rq->mq_hctx, NULL, 0);
		cond_resched();
	} while (!completion_done(wait));
}
@@ -4745,10 +4740,9 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
}
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);

int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
		unsigned int flags)
static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
			 struct io_comp_batch *iob, unsigned int flags)
{
	struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie);
	long state = get_current_state();
	int ret;

@@ -4773,6 +4767,32 @@ int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *
	return 0;
}

int blk_mq_poll(struct request_queue *q, blk_qc_t cookie,
		struct io_comp_batch *iob, unsigned int flags)
{
	struct blk_mq_hw_ctx *hctx = xa_load(&q->hctx_table, cookie);

	return blk_hctx_poll(q, hctx, iob, flags);
}

int blk_rq_poll(struct request *rq, struct io_comp_batch *iob,
		unsigned int poll_flags)
{
	struct request_queue *q = rq->q;
	int ret;

	if (!blk_rq_is_poll(rq))
		return 0;
	if (!percpu_ref_tryget(&q->q_usage_counter))
		return 0;

	ret = blk_hctx_poll(q, rq->mq_hctx, iob, poll_flags);
	blk_queue_exit(q);

	return ret;
}
EXPORT_SYMBOL_GPL(blk_rq_poll);

unsigned int blk_mq_rq_cpu(struct request *rq)
{
	return rq->mq_ctx->cpu;
+103 −78
Original line number Diff line number Diff line
@@ -47,19 +47,6 @@ queue_var_store(unsigned long *var, const char *page, size_t count)
	return count;
}

static ssize_t queue_var_store64(s64 *var, const char *page)
{
	int err;
	s64 v;

	err = kstrtos64(page, 10, &v);
	if (err < 0)
		return err;

	*var = v;
	return 0;
}

static ssize_t queue_requests_show(struct request_queue *q, char *page)
{
	return queue_var_show(q->nr_requests, page);
@@ -451,61 +438,6 @@ static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
	return count;
}

static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
{
	if (!wbt_rq_qos(q))
		return -EINVAL;

	if (wbt_disabled(q))
		return sprintf(page, "0\n");

	return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
}

static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
				  size_t count)
{
	struct rq_qos *rqos;
	ssize_t ret;
	s64 val;

	ret = queue_var_store64(&val, page);
	if (ret < 0)
		return ret;
	if (val < -1)
		return -EINVAL;

	rqos = wbt_rq_qos(q);
	if (!rqos) {
		ret = wbt_init(q->disk);
		if (ret)
			return ret;
	}

	if (val == -1)
		val = wbt_default_latency_nsec(q);
	else if (val >= 0)
		val *= 1000ULL;

	if (wbt_get_min_lat(q) == val)
		return count;

	/*
	 * Ensure that the queue is idled, in case the latency update
	 * ends up either enabling or disabling wbt completely. We can't
	 * have IO inflight if that happens.
	 */
	blk_mq_freeze_queue(q);
	blk_mq_quiesce_queue(q);

	wbt_set_min_lat(q, val);

	blk_mq_unquiesce_queue(q);
	blk_mq_unfreeze_queue(q);

	return count;
}

static ssize_t queue_wc_show(struct request_queue *q, char *page)
{
	if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
@@ -598,7 +530,6 @@ QUEUE_RW_ENTRY(queue_wc, "write_cache");
QUEUE_RO_ENTRY(queue_fua, "fua");
QUEUE_RO_ENTRY(queue_dax, "dax");
QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
QUEUE_RO_ENTRY(queue_dma_alignment, "dma_alignment");

@@ -617,8 +548,79 @@ QUEUE_RW_ENTRY(queue_iostats, "iostats");
QUEUE_RW_ENTRY(queue_random, "add_random");
QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");

#ifdef CONFIG_BLK_WBT
static ssize_t queue_var_store64(s64 *var, const char *page)
{
	int err;
	s64 v;

	err = kstrtos64(page, 10, &v);
	if (err < 0)
		return err;

	*var = v;
	return 0;
}

static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
{
	if (!wbt_rq_qos(q))
		return -EINVAL;

	if (wbt_disabled(q))
		return sprintf(page, "0\n");

	return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
}

static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
				  size_t count)
{
	struct rq_qos *rqos;
	ssize_t ret;
	s64 val;

	ret = queue_var_store64(&val, page);
	if (ret < 0)
		return ret;
	if (val < -1)
		return -EINVAL;

	rqos = wbt_rq_qos(q);
	if (!rqos) {
		ret = wbt_init(q->disk);
		if (ret)
			return ret;
	}

	if (val == -1)
		val = wbt_default_latency_nsec(q);
	else if (val >= 0)
		val *= 1000ULL;

	if (wbt_get_min_lat(q) == val)
		return count;

	/*
	 * Ensure that the queue is idled, in case the latency update
	 * ends up either enabling or disabling wbt completely. We can't
	 * have IO inflight if that happens.
	 */
	blk_mq_freeze_queue(q);
	blk_mq_quiesce_queue(q);

	wbt_set_min_lat(q, val);

	blk_mq_unquiesce_queue(q);
	blk_mq_unfreeze_queue(q);

	return count;
}

QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
#endif

static struct attribute *queue_attrs[] = {
	&queue_requests_entry.attr,
	&queue_ra_entry.attr,
	&queue_max_hw_sectors_entry.attr,
	&queue_max_sectors_entry.attr,
@@ -626,7 +628,6 @@ static struct attribute *queue_attrs[] = {
	&queue_max_discard_segments_entry.attr,
	&queue_max_integrity_segments_entry.attr,
	&queue_max_segment_size_entry.attr,
	&elv_iosched_entry.attr,
	&queue_hw_sector_size_entry.attr,
	&queue_logical_block_size_entry.attr,
	&queue_physical_block_size_entry.attr,
@@ -647,7 +648,6 @@ static struct attribute *queue_attrs[] = {
	&queue_max_open_zones_entry.attr,
	&queue_max_active_zones_entry.attr,
	&queue_nomerges_entry.attr,
	&queue_rq_affinity_entry.attr,
	&queue_iostats_entry.attr,
	&queue_stable_writes_entry.attr,
	&queue_random_entry.attr,
@@ -655,9 +655,7 @@ static struct attribute *queue_attrs[] = {
	&queue_wc_entry.attr,
	&queue_fua_entry.attr,
	&queue_dax_entry.attr,
	&queue_wb_lat_entry.attr,
	&queue_poll_delay_entry.attr,
	&queue_io_timeout_entry.attr,
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
	&blk_throtl_sample_time_entry.attr,
#endif
@@ -666,16 +664,23 @@ static struct attribute *queue_attrs[] = {
	NULL,
};

static struct attribute *blk_mq_queue_attrs[] = {
	&queue_requests_entry.attr,
	&elv_iosched_entry.attr,
	&queue_rq_affinity_entry.attr,
	&queue_io_timeout_entry.attr,
#ifdef CONFIG_BLK_WBT
	&queue_wb_lat_entry.attr,
#endif
	NULL,
};

static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
				int n)
{
	struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
	struct request_queue *q = disk->queue;

	if (attr == &queue_io_timeout_entry.attr &&
		(!q->mq_ops || !q->mq_ops->timeout))
			return 0;

	if ((attr == &queue_max_open_zones_entry.attr ||
	     attr == &queue_max_active_zones_entry.attr) &&
	    !blk_queue_is_zoned(q))
@@ -684,11 +689,30 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
	return attr->mode;
}

static umode_t blk_mq_queue_attr_visible(struct kobject *kobj,
					 struct attribute *attr, int n)
{
	struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
	struct request_queue *q = disk->queue;

	if (!queue_is_mq(q))
		return 0;

	if (attr == &queue_io_timeout_entry.attr && !q->mq_ops->timeout)
		return 0;

	return attr->mode;
}

static struct attribute_group queue_attr_group = {
	.attrs = queue_attrs,
	.is_visible = queue_attr_visible,
};

static struct attribute_group blk_mq_queue_attr_group = {
	.attrs = blk_mq_queue_attrs,
	.is_visible = blk_mq_queue_attr_visible,
};

#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)

@@ -733,6 +757,7 @@ static const struct sysfs_ops queue_sysfs_ops = {

static const struct attribute_group *blk_queue_attr_groups[] = {
	&queue_attr_group,
	&blk_mq_queue_attr_group,
	NULL
};

+0 −6
Original line number Diff line number Diff line
@@ -2178,12 +2178,6 @@ bool __blk_throtl_bio(struct bio *bio)

	rcu_read_lock();

	if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
		blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,
				bio->bi_iter.bi_size);
		blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);
	}

	spin_lock_irq(&q->queue_lock);

	throtl_update_latency_buckets(td);
Loading