Commit 3d3dfeb3 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-6.6/block-2023-08-28' of git://git.kernel.dk/linux

Pull block updates from Jens Axboe:
 "Pretty quiet round for this release. This contains:

   - Add support for zoned storage to ublk (Andreas, Ming)

   - Series improving performance for drivers that mark themselves as
     needing a blocking context for issue (Bart)

   - Cleanup the flush logic (Chengming)

   - sed opal keyring support (Greg)

   - Fixes and improvements to the integrity support (Jinyoung)

   - Add some exports for bcachefs that we can hopefully delete again in
     the future (Kent)

   - deadline throttling fix (Zhiguo)

   - Series allowing building the kernel without buffer_head support
     (Christoph)

   - Sanitize the bio page adding flow (Christoph)

   - Write back cache fixes (Christoph)

   - MD updates via Song:
      - Fix perf regression for raid0 large sequential writes (Jan)
      - Fix split bio iostat for raid0 (David)
      - Various raid1 fixes (Heinz, Xueshi)
      - raid6test build fixes (WANG)
      - Deprecate bitmap file support (Christoph)
      - Fix deadlock with md sync thread (Yu)
      - Refactor md io accounting (Yu)
      - Various non-urgent fixes (Li, Yu, Jack)

   - Various fixes and cleanups (Arnd, Azeem, Chengming, Damien, Li,
     Ming, Nitesh, Ruan, Tejun, Thomas, Xu)"

* tag 'for-6.6/block-2023-08-28' of git://git.kernel.dk/linux: (113 commits)
  block: use strscpy() to instead of strncpy()
  block: sed-opal: keyring support for SED keys
  block: sed-opal: Implement IOC_OPAL_REVERT_LSP
  block: sed-opal: Implement IOC_OPAL_DISCOVERY
  blk-mq: prealloc tags when increase tagset nr_hw_queues
  blk-mq: delete redundant tagset map update when fallback
  blk-mq: fix tags leak when shrink nr_hw_queues
  ublk: zoned: support REQ_OP_ZONE_RESET_ALL
  md: raid0: account for split bio in iostat accounting
  md/raid0: Fix performance regression for large sequential writes
  md/raid0: Factor out helper for mapping and submitting a bio
  md raid1: allow writebehind to work on any leg device set WriteMostly
  md/raid1: hold the barrier until handle_read_error() finishes
  md/raid1: free the r1bio before waiting for blocked rdev
  md/raid1: call free_r1bio() before allow_barrier() in raid_end_bio_io()
  blk-cgroup: Fix NULL deref caused by blkg_policy_data being installed before init
  drivers/rnbd: restore sysfs interface to rnbd-client
  md/raid5-cache: fix null-ptr-deref for r5l_flush_stripe_to_raid()
  raid6: test: only check for Altivec if building on powerpc hosts
  raid6: test: make sure all intermediate and artifact files are .gitignored
  ...
parents c1b7fcf3 146afeb2
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
menuconfig BLOCK
       bool "Enable the block layer" if EXPERT
       default y
       select FS_IOMAP
       select SBITMAP
       help
	 Provide block layer support for the kernel.
@@ -183,6 +184,8 @@ config BLK_DEBUG_FS_ZONED

config BLK_SED_OPAL
	bool "Logic for interfacing with Opal enabled SEDs"
	depends on KEYS
	select PSERIES_PLPKS if PPC_PSERIES
	help
	Builds Logic for interfacing with Opal enabled controllers.
	Enabling this option enables users to setup/unlock/lock
+30 −29
Original line number Diff line number Diff line
@@ -123,20 +123,38 @@ void bio_integrity_free(struct bio *bio)
int bio_integrity_add_page(struct bio *bio, struct page *page,
			   unsigned int len, unsigned int offset)
{
	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
	struct bio_integrity_payload *bip = bio_integrity(bio);

	if (bip->bip_vcnt >= bip->bip_max_vcnt) {
		printk(KERN_ERR "%s: bip_vec full\n", __func__);
	if (((bip->bip_iter.bi_size + len) >> SECTOR_SHIFT) >
	    queue_max_hw_sectors(q))
		return 0;

	if (bip->bip_vcnt > 0) {
		struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1];
		bool same_page = false;

		if (bvec_try_merge_hw_page(q, bv, page, len, offset,
					   &same_page)) {
			bip->bip_iter.bi_size += len;
			return len;
		}

	if (bip->bip_vcnt &&
	    bvec_gap_to_prev(&bdev_get_queue(bio->bi_bdev)->limits,
			     &bip->bip_vec[bip->bip_vcnt - 1], offset))
		if (bip->bip_vcnt >=
		    min(bip->bip_max_vcnt, queue_max_integrity_segments(q)))
			return 0;

		/*
		 * If the queue doesn't support SG gaps and adding this segment
		 * would create a gap, disallow it.
		 */
		if (bvec_gap_to_prev(&q->limits, bv, offset))
			return 0;
	}

	bvec_set_page(&bip->bip_vec[bip->bip_vcnt], page, len, offset);
	bip->bip_vcnt++;
	bip->bip_iter.bi_size += len;

	return len;
}
@@ -199,8 +217,6 @@ bool bio_integrity_prep(struct bio *bio)
	unsigned long start, end;
	unsigned int len, nr_pages;
	unsigned int bytes, offset, i;
	unsigned int intervals;
	blk_status_t status;

	if (!bi)
		return true;
@@ -224,12 +240,10 @@ bool bio_integrity_prep(struct bio *bio)
		    !(bi->flags & BLK_INTEGRITY_GENERATE))
			return true;
	}
	intervals = bio_integrity_intervals(bi, bio_sectors(bio));

	/* Allocate kernel buffer for protection data */
	len = intervals * bi->tuple_size;
	len = bio_integrity_bytes(bi, bio_sectors(bio));
	buf = kmalloc(len, GFP_NOIO);
	status = BLK_STS_RESOURCE;
	if (unlikely(buf == NULL)) {
		printk(KERN_ERR "could not allocate integrity buffer\n");
		goto err_end_io;
@@ -244,12 +258,10 @@ bool bio_integrity_prep(struct bio *bio)
	if (IS_ERR(bip)) {
		printk(KERN_ERR "could not allocate data integrity bioset\n");
		kfree(buf);
		status = BLK_STS_RESOURCE;
		goto err_end_io;
	}

	bip->bip_flags |= BIP_BLOCK_INTEGRITY;
	bip->bip_iter.bi_size = len;
	bip_set_seed(bip, bio->bi_iter.bi_sector);

	if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM)
@@ -257,28 +269,18 @@ bool bio_integrity_prep(struct bio *bio)

	/* Map it */
	offset = offset_in_page(buf);
	for (i = 0 ; i < nr_pages ; i++) {
		int ret;
	for (i = 0; i < nr_pages && len > 0; i++) {
		bytes = PAGE_SIZE - offset;

		if (len <= 0)
			break;

		if (bytes > len)
			bytes = len;

		ret = bio_integrity_add_page(bio, virt_to_page(buf),
					     bytes, offset);

		if (ret == 0) {
		if (bio_integrity_add_page(bio, virt_to_page(buf),
					   bytes, offset) < bytes) {
			printk(KERN_ERR "could not attach integrity payload\n");
			status = BLK_STS_RESOURCE;
			goto err_end_io;
		}

		if (ret < bytes)
			break;

		buf += bytes;
		len -= bytes;
		offset = 0;
@@ -294,10 +296,9 @@ bool bio_integrity_prep(struct bio *bio)
	return true;

err_end_io:
	bio->bi_status = status;
	bio->bi_status = BLK_STS_RESOURCE;
	bio_endio(bio);
	return false;

}
EXPORT_SYMBOL(bio_integrity_prep);

+64 −78
Original line number Diff line number Diff line
@@ -606,15 +606,15 @@ struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask)
}
EXPORT_SYMBOL(bio_kmalloc);

void zero_fill_bio(struct bio *bio)
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
{
	struct bio_vec bv;
	struct bvec_iter iter;

	bio_for_each_segment(bv, bio, iter)
	__bio_for_each_segment(bv, bio, iter, start)
		memzero_bvec(&bv);
}
EXPORT_SYMBOL(zero_fill_bio);
EXPORT_SYMBOL(zero_fill_bio_iter);

/**
 * bio_truncate - truncate the bio to small size of @new_size
@@ -903,9 +903,8 @@ static inline bool bio_full(struct bio *bio, unsigned len)
	return false;
}

static inline bool page_is_mergeable(const struct bio_vec *bv,
		struct page *page, unsigned int len, unsigned int off,
		bool *same_page)
static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
		unsigned int len, unsigned int off, bool *same_page)
{
	size_t bv_end = bv->bv_offset + bv->bv_len;
	phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1;
@@ -919,61 +918,26 @@ static inline bool page_is_mergeable(const struct bio_vec *bv,
		return false;

	*same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
	if (*same_page)
		return true;
	else if (IS_ENABLED(CONFIG_KMSAN))
		return false;
	return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
}

/**
 * __bio_try_merge_page - try appending data to an existing bvec.
 * @bio: destination bio
 * @page: start page to add
 * @len: length of the data to add
 * @off: offset of the data relative to @page
 * @same_page: return if the segment has been merged inside the same page
 *
 * Try to add the data at @page + @off to the last bvec of @bio.  This is a
 * useful optimisation for file systems with a block size smaller than the
 * page size.
 *
 * Warn if (@len, @off) crosses pages in case that @same_page is true.
 *
 * Return %true on success or %false on failure.
 */
static bool __bio_try_merge_page(struct bio *bio, struct page *page,
		unsigned int len, unsigned int off, bool *same_page)
{
	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
	if (!*same_page) {
		if (IS_ENABLED(CONFIG_KMSAN))
			return false;

	if (bio->bi_vcnt > 0) {
		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];

		if (page_is_mergeable(bv, page, len, off, same_page)) {
			if (bio->bi_iter.bi_size > UINT_MAX - len) {
				*same_page = false;
		if (bv->bv_page + bv_end / PAGE_SIZE != page + off / PAGE_SIZE)
			return false;
	}

	bv->bv_len += len;
			bio->bi_iter.bi_size += len;
	return true;
}
	}
	return false;
}

/*
 * Try to merge a page into a segment, while obeying the hardware segment
 * size limit.  This is not for normal read/write bios, but for passthrough
 * or Zone Append operations that we can't split.
 */
static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
				 struct page *page, unsigned len,
				 unsigned offset, bool *same_page)
bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
		struct page *page, unsigned len, unsigned offset,
		bool *same_page)
{
	struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
	unsigned long mask = queue_segment_boundary(q);
	phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
	phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
@@ -982,7 +946,7 @@ static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
		return false;
	if (bv->bv_len + len > queue_max_segment_size(q))
		return false;
	return __bio_try_merge_page(bio, page, len, offset, same_page);
	return bvec_try_merge_page(bv, page, len, offset, same_page);
}

/**
@@ -1002,33 +966,33 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
		struct page *page, unsigned int len, unsigned int offset,
		unsigned int max_sectors, bool *same_page)
{
	struct bio_vec *bvec;

	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
		return 0;

	if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
	if (((bio->bi_iter.bi_size + len) >> SECTOR_SHIFT) > max_sectors)
		return 0;

	if (bio->bi_vcnt > 0) {
		if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];

		if (bvec_try_merge_hw_page(q, bv, page, len, offset,
				same_page)) {
			bio->bi_iter.bi_size += len;
			return len;
		}

		if (bio->bi_vcnt >=
		    min(bio->bi_max_vecs, queue_max_segments(q)))
			return 0;

		/*
		 * If the queue doesn't support SG gaps and adding this segment
		 * would create a gap, disallow it.
		 */
		bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
		if (bvec_gap_to_prev(&q->limits, bvec, offset))
		if (bvec_gap_to_prev(&q->limits, bv, offset))
			return 0;
	}

	if (bio_full(bio, len))
		return 0;

	if (bio->bi_vcnt >= queue_max_segments(q))
		return 0;

	bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, offset);
	bio->bi_vcnt++;
	bio->bi_iter.bi_size += len;
@@ -1129,11 +1093,21 @@ int bio_add_page(struct bio *bio, struct page *page,
{
	bool same_page = false;

	if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
		if (bio_full(bio, len))
	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
		return 0;
		__bio_add_page(bio, page, len, offset);
	if (bio->bi_iter.bi_size > UINT_MAX - len)
		return 0;

	if (bio->bi_vcnt > 0 &&
	    bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1],
				page, len, offset, &same_page)) {
		bio->bi_iter.bi_size += len;
		return len;
	}

	if (bio->bi_vcnt >= bio->bi_max_vecs)
		return 0;
	__bio_add_page(bio, page, len, offset);
	return len;
}
EXPORT_SYMBOL(bio_add_page);
@@ -1207,15 +1181,20 @@ static int bio_iov_add_page(struct bio *bio, struct page *page,
{
	bool same_page = false;

	if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
		__bio_add_page(bio, page, len, offset);
		return 0;
	}
	if (WARN_ON_ONCE(bio->bi_iter.bi_size > UINT_MAX - len))
		return -EIO;

	if (bio->bi_vcnt > 0 &&
	    bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1],
				page, len, offset, &same_page)) {
		bio->bi_iter.bi_size += len;
		if (same_page)
			bio_release_page(bio, page);
		return 0;
	}
	__bio_add_page(bio, page, len, offset);
	return 0;
}

static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
		unsigned int len, unsigned int offset)
@@ -1252,7 +1231,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
	struct page **pages = (struct page **)bv;
	ssize_t size, left;
	unsigned len, i = 0;
	size_t offset, trim;
	size_t offset;
	int ret = 0;

	/*
@@ -1281,10 +1260,12 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)

	nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE);

	trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1);
	if (bio->bi_bdev) {
		size_t trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1);
		iov_iter_revert(iter, trim);

		size -= trim;
	}

	if (unlikely(!size)) {
		ret = -EFAULT;
		goto out;
@@ -1337,6 +1318,9 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
	int ret = 0;

	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
		return -EIO;

	if (iov_iter_is_bvec(iter)) {
		bio_iov_bvec_set(bio, iter);
		iov_iter_advance(iter, bio->bi_iter.bi_size);
@@ -1490,6 +1474,7 @@ void bio_set_pages_dirty(struct bio *bio)
			set_page_dirty_lock(bvec->bv_page);
	}
}
EXPORT_SYMBOL_GPL(bio_set_pages_dirty);

/*
 * bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
@@ -1549,6 +1534,7 @@ void bio_check_pages_dirty(struct bio *bio)
	spin_unlock_irqrestore(&bio_dirty_lock, flags);
	schedule_work(&bio_dirty_work);
}
EXPORT_SYMBOL_GPL(bio_check_pages_dirty);

static inline bool bio_remaining_done(struct bio *bio)
{
+18 −14
Original line number Diff line number Diff line
@@ -1511,7 +1511,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
retry:
	spin_lock_irq(&q->queue_lock);

	/* blkg_list is pushed at the head, reverse walk to allocate parents first */
	/* blkg_list is pushed at the head, reverse walk to initialize parents first */
	list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
		struct blkg_policy_data *pd;

@@ -1549,21 +1549,20 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
				goto enomem;
		}

		blkg->pd[pol->plid] = pd;
		spin_lock(&blkg->blkcg->lock);

		pd->blkg = blkg;
		pd->plid = pol->plid;
		pd->online = false;
	}
		blkg->pd[pol->plid] = pd;

	/* all allocated, init in the same order */
		if (pol->pd_init_fn)
		list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
			pol->pd_init_fn(blkg->pd[pol->plid]);
			pol->pd_init_fn(pd);

	list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
		if (pol->pd_online_fn)
			pol->pd_online_fn(blkg->pd[pol->plid]);
		blkg->pd[pol->plid]->online = true;
			pol->pd_online_fn(pd);
		pd->online = true;

		spin_unlock(&blkg->blkcg->lock);
	}

	__set_bit(pol->plid, q->blkcg_pols);
@@ -1580,14 +1579,19 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
	return ret;

enomem:
	/* alloc failed, nothing's initialized yet, free everything */
	/* alloc failed, take down everything */
	spin_lock_irq(&q->queue_lock);
	list_for_each_entry(blkg, &q->blkg_list, q_node) {
		struct blkcg *blkcg = blkg->blkcg;
		struct blkg_policy_data *pd;

		spin_lock(&blkcg->lock);
		if (blkg->pd[pol->plid]) {
			pol->pd_free_fn(blkg->pd[pol->plid]);
		pd = blkg->pd[pol->plid];
		if (pd) {
			if (pd->online && pol->pd_offline_fn)
				pol->pd_offline_fn(pd);
			pd->online = false;
			pol->pd_free_fn(pd);
			blkg->pd[pol->plid] = NULL;
		}
		spin_unlock(&blkcg->lock);
+1 −0
Original line number Diff line number Diff line
@@ -208,6 +208,7 @@ const char *blk_status_to_str(blk_status_t status)
		return "<null>";
	return blk_errors[idx].name;
}
EXPORT_SYMBOL_GPL(blk_status_to_str);

/**
 * blk_sync_queue - cancel any pending callbacks on a queue
Loading