Commit 1d94330a authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-5.11/dm-fixes-1' of...

Merge tag 'for-5.11/dm-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Fix DM-raid's raid1 discard limits so discards work.

 - Select missing Kconfig dependencies for DM integrity and zoned
   targets.

 - Four fixes for DM crypt target's support to optionally bypass kcryptd
   workqueues.

 - Fix DM snapshot merge supports missing data flushes before committing
   metadata.

 - Fix DM integrity data device flushing when external metadata is used.

 - Fix DM integrity's maximum number of supported constructor arguments
   that user can request when creating an integrity device.

 - Eliminate DM core ioctl logging noise when an ioctl is issued without
   required CAP_SYS_RAWIO permission.

* tag 'for-5.11/dm-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm crypt: defer decryption to a tasklet if interrupts disabled
  dm integrity: fix the maximum number of arguments
  dm crypt: do not call bio_endio() from the dm-crypt tasklet
  dm integrity: fix flush with external metadata device
  dm: eliminate potential source of excessive kernel log noise
  dm snapshot: flush merged data before committing metadata
  dm crypt: use GFP_ATOMIC when allocating crypto requests from softirq
  dm crypt: do not wait for backlogged crypto request completion in softirq
  dm zoned: select CONFIG_CRC32
  dm integrity: select CRYPTO_SKCIPHER
  dm raid: fix discard limits for raid1
parents b45e2da6 c87a95dc
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -605,6 +605,7 @@ config DM_INTEGRITY
	select BLK_DEV_INTEGRITY
	select DM_BUFIO
	select CRYPTO
	select CRYPTO_SKCIPHER
	select ASYNC_XOR
	help
	  This device-mapper target emulates a block device that has
@@ -622,6 +623,7 @@ config DM_ZONED
	tristate "Drive-managed zoned block device target support"
	depends on BLK_DEV_DM
	depends on BLK_DEV_ZONED
	select CRC32
	help
	  This device-mapper target takes a host-managed or host-aware zoned
	  block device and exposes most of its capacity as a regular block
+6 −0
Original line number Diff line number Diff line
@@ -1534,6 +1534,12 @@ sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
}
EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);

struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c)
{
	return c->dm_io;
}
EXPORT_SYMBOL_GPL(dm_bufio_get_dm_io_client);

sector_t dm_bufio_get_block_number(struct dm_buffer *b)
{
	return b->block;
+152 −18
Original line number Diff line number Diff line
@@ -1454,13 +1454,16 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
static void kcryptd_async_done(struct crypto_async_request *async_req,
			       int error);

static void crypt_alloc_req_skcipher(struct crypt_config *cc,
static int crypt_alloc_req_skcipher(struct crypt_config *cc,
				     struct convert_context *ctx)
{
	unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1);

	if (!ctx->r.req) {
		ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO);
		if (!ctx->r.req)
		ctx->r.req = mempool_alloc(&cc->req_pool, GFP_NOIO);
			return -ENOMEM;
	}

	skcipher_request_set_tfm(ctx->r.req, cc->cipher_tfm.tfms[key_index]);

@@ -1471,13 +1474,18 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc,
	skcipher_request_set_callback(ctx->r.req,
	    CRYPTO_TFM_REQ_MAY_BACKLOG,
	    kcryptd_async_done, dmreq_of_req(cc, ctx->r.req));

	return 0;
}

static void crypt_alloc_req_aead(struct crypt_config *cc,
static int crypt_alloc_req_aead(struct crypt_config *cc,
				 struct convert_context *ctx)
{
	if (!ctx->r.req_aead)
		ctx->r.req_aead = mempool_alloc(&cc->req_pool, GFP_NOIO);
	if (!ctx->r.req) {
		ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO);
		if (!ctx->r.req)
			return -ENOMEM;
	}

	aead_request_set_tfm(ctx->r.req_aead, cc->cipher_tfm.tfms_aead[0]);

@@ -1488,15 +1496,17 @@ static void crypt_alloc_req_aead(struct crypt_config *cc,
	aead_request_set_callback(ctx->r.req_aead,
	    CRYPTO_TFM_REQ_MAY_BACKLOG,
	    kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead));

	return 0;
}

static void crypt_alloc_req(struct crypt_config *cc,
static int crypt_alloc_req(struct crypt_config *cc,
			    struct convert_context *ctx)
{
	if (crypt_integrity_aead(cc))
		crypt_alloc_req_aead(cc, ctx);
		return crypt_alloc_req_aead(cc, ctx);
	else
		crypt_alloc_req_skcipher(cc, ctx);
		return crypt_alloc_req_skcipher(cc, ctx);
}

static void crypt_free_req_skcipher(struct crypt_config *cc,
@@ -1529,17 +1539,28 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
 * Encrypt / decrypt data from one bio to another one (can be the same one)
 */
static blk_status_t crypt_convert(struct crypt_config *cc,
			 struct convert_context *ctx, bool atomic)
			 struct convert_context *ctx, bool atomic, bool reset_pending)
{
	unsigned int tag_offset = 0;
	unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT;
	int r;

	/*
	 * if reset_pending is set we are dealing with the bio for the first time,
	 * else we're continuing to work on the previous bio, so don't mess with
	 * the cc_pending counter
	 */
	if (reset_pending)
		atomic_set(&ctx->cc_pending, 1);

	while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) {

		crypt_alloc_req(cc, ctx);
		r = crypt_alloc_req(cc, ctx);
		if (r) {
			complete(&ctx->restart);
			return BLK_STS_DEV_RESOURCE;
		}

		atomic_inc(&ctx->cc_pending);

		if (crypt_integrity_aead(cc))
@@ -1553,7 +1574,25 @@ static blk_status_t crypt_convert(struct crypt_config *cc,
		 * but the driver request queue is full, let's wait.
		 */
		case -EBUSY:
			if (in_interrupt()) {
				if (try_wait_for_completion(&ctx->restart)) {
					/*
					 * we don't have to block to wait for completion,
					 * so proceed
					 */
				} else {
					/*
					 * we can't wait for completion without blocking
					 * exit and continue processing in a workqueue
					 */
					ctx->r.req = NULL;
					ctx->cc_sector += sector_step;
					tag_offset++;
					return BLK_STS_DEV_RESOURCE;
				}
			} else {
				wait_for_completion(&ctx->restart);
			}
			reinit_completion(&ctx->restart);
			fallthrough;
		/*
@@ -1691,6 +1730,12 @@ static void crypt_inc_pending(struct dm_crypt_io *io)
	atomic_inc(&io->io_pending);
}

static void kcryptd_io_bio_endio(struct work_struct *work)
{
	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
	bio_endio(io->base_bio);
}

/*
 * One of the bios was finished. Check for completion of
 * the whole request and correctly clean up the buffer.
@@ -1713,7 +1758,23 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
		kfree(io->integrity_metadata);

	base_bio->bi_status = error;

	/*
	 * If we are running this function from our tasklet,
	 * we can't call bio_endio() here, because it will call
	 * clone_endio() from dm.c, which in turn will
	 * free the current struct dm_crypt_io structure with
	 * our tasklet. In this case we need to delay bio_endio()
	 * execution to after the tasklet is done and dequeued.
	 */
	if (tasklet_trylock(&io->tasklet)) {
		tasklet_unlock(&io->tasklet);
		bio_endio(base_bio);
		return;
	}

	INIT_WORK(&io->work, kcryptd_io_bio_endio);
	queue_work(cc->io_queue, &io->work);
}

/*
@@ -1945,6 +2006,37 @@ static bool kcryptd_crypt_write_inline(struct crypt_config *cc,
	}
}

static void kcryptd_crypt_write_continue(struct work_struct *work)
{
	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
	struct crypt_config *cc = io->cc;
	struct convert_context *ctx = &io->ctx;
	int crypt_finished;
	sector_t sector = io->sector;
	blk_status_t r;

	wait_for_completion(&ctx->restart);
	reinit_completion(&ctx->restart);

	r = crypt_convert(cc, &io->ctx, true, false);
	if (r)
		io->error = r;
	crypt_finished = atomic_dec_and_test(&ctx->cc_pending);
	if (!crypt_finished && kcryptd_crypt_write_inline(cc, ctx)) {
		/* Wait for completion signaled by kcryptd_async_done() */
		wait_for_completion(&ctx->restart);
		crypt_finished = 1;
	}

	/* Encryption was already finished, submit io now */
	if (crypt_finished) {
		kcryptd_crypt_write_io_submit(io, 0);
		io->sector = sector;
	}

	crypt_dec_pending(io);
}

static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
{
	struct crypt_config *cc = io->cc;
@@ -1973,7 +2065,17 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)

	crypt_inc_pending(io);
	r = crypt_convert(cc, ctx,
			  test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags));
			  test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags), true);
	/*
	 * Crypto API backlogged the request, because its queue was full
	 * and we're in softirq context, so continue from a workqueue
	 * (TODO: is it actually possible to be in softirq in the write path?)
	 */
	if (r == BLK_STS_DEV_RESOURCE) {
		INIT_WORK(&io->work, kcryptd_crypt_write_continue);
		queue_work(cc->crypt_queue, &io->work);
		return;
	}
	if (r)
		io->error = r;
	crypt_finished = atomic_dec_and_test(&ctx->cc_pending);
@@ -1998,6 +2100,25 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
	crypt_dec_pending(io);
}

static void kcryptd_crypt_read_continue(struct work_struct *work)
{
	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
	struct crypt_config *cc = io->cc;
	blk_status_t r;

	wait_for_completion(&io->ctx.restart);
	reinit_completion(&io->ctx.restart);

	r = crypt_convert(cc, &io->ctx, true, false);
	if (r)
		io->error = r;

	if (atomic_dec_and_test(&io->ctx.cc_pending))
		kcryptd_crypt_read_done(io);

	crypt_dec_pending(io);
}

static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
{
	struct crypt_config *cc = io->cc;
@@ -2009,7 +2130,16 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
			   io->sector);

	r = crypt_convert(cc, &io->ctx,
			  test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags));
			  test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
	/*
	 * Crypto API backlogged the request, because its queue was full
	 * and we're in softirq context, so continue from a workqueue
	 */
	if (r == BLK_STS_DEV_RESOURCE) {
		INIT_WORK(&io->work, kcryptd_crypt_read_continue);
		queue_work(cc->crypt_queue, &io->work);
		return;
	}
	if (r)
		io->error = r;

@@ -2091,8 +2221,12 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io)

	if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) ||
	    (bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) {
		if (in_irq()) {
			/* Crypto API's "skcipher_walk_first() refuses to work in hard IRQ context */
		/*
		 * in_irq(): Crypto API's skcipher_walk_first() refuses to work in hard IRQ context.
		 * irqs_disabled(): the kernel may run some IO completion from the idle thread, but
		 * it is being executed with irqs disabled.
		 */
		if (in_irq() || irqs_disabled()) {
			tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work);
			tasklet_schedule(&io->tasklet);
			return;
+50 −12
Original line number Diff line number Diff line
@@ -1379,12 +1379,52 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
#undef MAY_BE_HASH
}

static void dm_integrity_flush_buffers(struct dm_integrity_c *ic)
struct flush_request {
	struct dm_io_request io_req;
	struct dm_io_region io_reg;
	struct dm_integrity_c *ic;
	struct completion comp;
};

static void flush_notify(unsigned long error, void *fr_)
{
	struct flush_request *fr = fr_;
	if (unlikely(error != 0))
		dm_integrity_io_error(fr->ic, "flusing disk cache", -EIO);
	complete(&fr->comp);
}

static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data)
{
	int r;

	struct flush_request fr;

	if (!ic->meta_dev)
		flush_data = false;
	if (flush_data) {
		fr.io_req.bi_op = REQ_OP_WRITE,
		fr.io_req.bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
		fr.io_req.mem.type = DM_IO_KMEM,
		fr.io_req.mem.ptr.addr = NULL,
		fr.io_req.notify.fn = flush_notify,
		fr.io_req.notify.context = &fr;
		fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio),
		fr.io_reg.bdev = ic->dev->bdev,
		fr.io_reg.sector = 0,
		fr.io_reg.count = 0,
		fr.ic = ic;
		init_completion(&fr.comp);
		r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL);
		BUG_ON(r);
	}

	r = dm_bufio_write_dirty_buffers(ic->bufio);
	if (unlikely(r))
		dm_integrity_io_error(ic, "writing tags", r);

	if (flush_data)
		wait_for_completion(&fr.comp);
}

static void sleep_on_endio_wait(struct dm_integrity_c *ic)
@@ -2110,7 +2150,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map

	if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) {
		integrity_metadata(&dio->work);
		dm_integrity_flush_buffers(ic);
		dm_integrity_flush_buffers(ic, false);

		dio->in_flight = (atomic_t)ATOMIC_INIT(1);
		dio->completion = NULL;
@@ -2195,7 +2235,7 @@ static void integrity_commit(struct work_struct *w)
	flushes = bio_list_get(&ic->flush_bio_list);
	if (unlikely(ic->mode != 'J')) {
		spin_unlock_irq(&ic->endio_wait.lock);
		dm_integrity_flush_buffers(ic);
		dm_integrity_flush_buffers(ic, true);
		goto release_flush_bios;
	}

@@ -2409,7 +2449,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
	complete_journal_op(&comp);
	wait_for_completion_io(&comp.comp);

	dm_integrity_flush_buffers(ic);
	dm_integrity_flush_buffers(ic, true);
}

static void integrity_writer(struct work_struct *w)
@@ -2451,7 +2491,7 @@ static void recalc_write_super(struct dm_integrity_c *ic)
{
	int r;

	dm_integrity_flush_buffers(ic);
	dm_integrity_flush_buffers(ic, false);
	if (dm_integrity_failed(ic))
		return;

@@ -2654,7 +2694,7 @@ static void bitmap_flush_work(struct work_struct *work)
	unsigned long limit;
	struct bio *bio;

	dm_integrity_flush_buffers(ic);
	dm_integrity_flush_buffers(ic, false);

	range.logical_sector = 0;
	range.n_sectors = ic->provided_data_sectors;
@@ -2663,9 +2703,7 @@ static void bitmap_flush_work(struct work_struct *work)
	add_new_range_and_wait(ic, &range);
	spin_unlock_irq(&ic->endio_wait.lock);

	dm_integrity_flush_buffers(ic);
	if (ic->meta_dev)
		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
	dm_integrity_flush_buffers(ic, true);

	limit = ic->provided_data_sectors;
	if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
@@ -2934,11 +2972,11 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
		if (ic->meta_dev)
			queue_work(ic->writer_wq, &ic->writer_work);
		drain_workqueue(ic->writer_wq);
		dm_integrity_flush_buffers(ic);
		dm_integrity_flush_buffers(ic, true);
	}

	if (ic->mode == 'B') {
		dm_integrity_flush_buffers(ic);
		dm_integrity_flush_buffers(ic, true);
#if 1
		/* set to 0 to test bitmap replay code */
		init_journal(ic, 0, ic->journal_sections, 0);
@@ -3754,7 +3792,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
	unsigned extra_args;
	struct dm_arg_set as;
	static const struct dm_arg _args[] = {
		{0, 9, "Invalid number of feature args"},
		{0, 15, "Invalid number of feature args"},
	};
	unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
	bool should_write_sb;
+3 −3
Original line number Diff line number Diff line
@@ -3729,10 +3729,10 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
	blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));

	/*
	 * RAID1 and RAID10 personalities require bio splitting,
	 * RAID0/4/5/6 don't and process large discard bios properly.
	 * RAID0 and RAID10 personalities require bio splitting,
	 * RAID1/4/5/6 don't and process large discard bios properly.
	 */
	if (rs_is_raid1(rs) || rs_is_raid10(rs)) {
	if (rs_is_raid0(rs) || rs_is_raid10(rs)) {
		limits->discard_granularity = chunk_size_bytes;
		limits->max_discard_sectors = rs->md.chunk_sectors;
	}
Loading