Commit 66757001 authored by Lars Ellenberg's avatar Lars Ellenberg Committed by Jens Axboe
Browse files

drbd: bm_page_async_io: fix spurious bitmap "IO error" on large volumes



We usually do all our bitmap IO in units of PAGE_SIZE.

With very small or oddly sized external meta data, or with
PAGE_SIZE != 4k, it can happen that our last on-disk bitmap page
is not fully PAGE_SIZE aligned, so we may need to adjust the size
of the IO.

We used to do that with
  min_t(unsigned int, PAGE_SIZE,
	last_allowed_sector - current_offset);
And for just the right diff, (unsigned int)(diff) will result in 0.

A bio of length 0 will correctly be rejected with an IO error
(and some scary WARN_ON_ONCE()) by the scsi layer.

Do the calculation properly.

Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: default avatarChristoph Böhmwalder <christoph.boehmwalder@linbit.com>
Link: https://lore.kernel.org/r/20220622204932.196830-1-christoph.boehmwalder@linbit.com


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 8374cfe6
Loading
Loading
Loading
Loading
+42 −7
Original line number Diff line number Diff line
@@ -974,25 +974,58 @@ static void drbd_bm_endio(struct bio *bio)
	}
}

/* For the layout, see comment above drbd_md_set_sector_offsets(). */
static inline sector_t drbd_md_last_bitmap_sector(struct drbd_backing_dev *bdev)
{
	switch (bdev->md.meta_dev_idx) {
	case DRBD_MD_INDEX_INTERNAL:
	case DRBD_MD_INDEX_FLEX_INT:
		return bdev->md.md_offset + bdev->md.al_offset -1;
	case DRBD_MD_INDEX_FLEX_EXT:
	default:
		return bdev->md.md_offset + bdev->md.md_size_sect -1;
	}
}

static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
{
	struct drbd_device *device = ctx->device;
	enum req_op op = ctx->flags & BM_AIO_READ ? REQ_OP_READ : REQ_OP_WRITE;
	struct bio *bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op,
					   GFP_NOIO, &drbd_md_io_bio_set);
	struct drbd_bitmap *b = device->bitmap;
	struct bio *bio;
	struct page *page;
	sector_t last_bm_sect;
	sector_t first_bm_sect;
	sector_t on_disk_sector;
	unsigned int len;

	sector_t on_disk_sector =
		device->ldev->md.md_offset + device->ldev->md.bm_offset;
	on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
	first_bm_sect = device->ldev->md.md_offset + device->ldev->md.bm_offset;
	on_disk_sector = first_bm_sect + (((sector_t)page_nr) << (PAGE_SHIFT-SECTOR_SHIFT));

	/* this might happen with very small
	 * flexible external meta data device,
	 * or with PAGE_SIZE > 4k */
	len = min_t(unsigned int, PAGE_SIZE,
		(drbd_md_last_sector(device->ldev) - on_disk_sector + 1)<<9);
	last_bm_sect = drbd_md_last_bitmap_sector(device->ldev);
	if (first_bm_sect <= on_disk_sector && last_bm_sect >= on_disk_sector) {
		sector_t len_sect = last_bm_sect - on_disk_sector + 1;
		if (len_sect < PAGE_SIZE/SECTOR_SIZE)
			len = (unsigned int)len_sect*SECTOR_SIZE;
		else
			len = PAGE_SIZE;
	} else {
		if (__ratelimit(&drbd_ratelimit_state)) {
			drbd_err(device, "Invalid offset during on-disk bitmap access: "
				 "page idx %u, sector %llu\n", page_nr, on_disk_sector);
		}
		ctx->error = -EIO;
		bm_set_page_io_err(b->bm_pages[page_nr]);
		if (atomic_dec_and_test(&ctx->in_flight)) {
			ctx->done = 1;
			wake_up(&device->misc_wait);
			kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
		}
		return;
	}

	/* serialize IO on this page */
	bm_page_lock_io(device, page_nr);
@@ -1007,6 +1040,8 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
		bm_store_page_idx(page, page_nr);
	} else
		page = b->bm_pages[page_nr];
	bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op, GFP_NOIO,
			&drbd_md_io_bio_set);
	bio->bi_iter.bi_sector = on_disk_sector;
	/* bio_add_page of a single page to an empty bio will always succeed,
	 * according to api.  Do we want to assert that? */