Commit 83c7c18b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-4.17/dm-changes' of...

Merge tag 'for-4.17/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - DM core passthrough ioctl fix to retain reference to DM table, and
   that table's block devices, while issuing the ioctl to one of those
   block devices.

 - DM core passthrough ioctl fix to _not_ override the fmode_t used to
   issue the ioctl. Overriding by using the fmode_t that the block
   device was originally open with during DM table load is a liability.

 - Add DM core support for secure erase forwarding and update the DM
   linear and DM striped targets to support them.

 - A DM core 4.16 stable fix to allow abnormal IO (e.g. discard, write
   same, write zeroes) for targets that make use of the non-splitting IO
   variant (as is done for multipath or thinp when layered directly on
   NVMe).

 - Allow DM targets to return a payload in response to a DM message that
   they are sent. This is useful for DM targets that would like to
   provide statistics data in response to DM messages.

 - Update DM bufio to support non-power-of-2 block sizes. Numerous other
   related changes prepare the DM bufio code for this support.

 - Fix DM crypt to use a bounded amount of memory across the entire
   system. This is to avoid OOM that can otherwise occur in response to
   certain pathological IO workloads (e.g. discarding a large DM crypt
   device).

 - Add a 'check_at_most_once' feature to the DM verity target to allow
   verity to be used on mobile devices that have very limited resources.

 - Fix the DM integrity target to fail early if a keyed algorithm (e.g.
   HMAC) is to be used but the key isn't set.

 - Add non-power-of-2 support to the DM unstripe target.

 - Eliminate the use of a Variable Length Array in the DM stripe target.

 - Update the DM log-writes target to record metadata (REQ_META flag).

 - DM raid fixes for its nosync status and some variable range issues.

* tag 'for-4.17/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (28 commits)
  dm: remove fmode_t argument from .prepare_ioctl hook
  dm: hold DM table for duration of ioctl rather than use blkdev_get
  dm raid: fix parse_raid_params() variable range issue
  dm verity: make verity_for_io_block static
  dm verity: add 'check_at_most_once' option to only validate hashes once
  dm bufio: don't embed a bio in the dm_buffer structure
  dm bufio: support non-power-of-two block sizes
  dm bufio: use slab cache for dm_buffer structure allocations
  dm bufio: reorder fields in dm_buffer structure
  dm bufio: relax alignment constraint on slab cache
  dm bufio: remove code that merges slab caches
  dm bufio: get rid of slab cache name allocations
  dm bufio: move dm-bufio.h to include/linux/
  dm bufio: delete outdated comment
  dm: add support for secure erase forwarding
  dm: backfill abnormal IO support to non-splitting IO submission
  dm raid: fix nosync status
  dm mpath: use DM_MAPIO_SUBMITTED instead of magic number 0 in process_queued_bios()
  dm stripe: get rid of a Variable Length Array (VLA)
  dm log writes: record metadata flag for better flags record
  ...
parents 9022ca6b 5bd5e8d8
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -109,6 +109,17 @@ fec_start <offset>
    This is the offset, in <data_block_size> blocks, from the start of the
    FEC device to the beginning of the encoding data.

check_at_most_once
    Verify data blocks only the first time they are read from the data device,
    rather than every time.  This reduces the overhead of dm-verity so that it
    can be used on systems that are memory and/or CPU constrained.  However, it
    provides a reduced level of security because only offline tampering of the
    data device's content will be detected, not online tampering.

    Hash blocks are still verified each time they are read from the hash device,
    since verification of hash blocks is less performance critical than data
    blocks, and a hash block will not be verified any more after all the data
    blocks it covers have been verified anyway.

Theory of operation
===================
+122 −157
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@
 * This file is released under the GPL.
 */

#include "dm-bufio.h"
#include <linux/dm-bufio.h>

#include <linux/device-mapper.h>
#include <linux/dm-io.h>
@@ -50,19 +50,6 @@
 */
#define DM_BUFIO_DEFAULT_RETAIN_BYTES   (256 * 1024)

/*
 * The number of bvec entries that are embedded directly in the buffer.
 * If the chunk size is larger, dm-io is used to do the io.
 */
#define DM_BUFIO_INLINE_VECS		16

/*
 * Don't try to use kmem_cache_alloc for blocks larger than this.
 * For explanation, see alloc_buffer_data below.
 */
#define DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT	(PAGE_SIZE >> 1)
#define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT	(PAGE_SIZE << (MAX_ORDER - 1))

/*
 * Align buffer writes to this boundary.
 * Tests show that SSDs have the highest IOPS when using 4k writes.
@@ -99,13 +86,12 @@ struct dm_bufio_client {

	struct block_device *bdev;
	unsigned block_size;
	unsigned char sectors_per_block_bits;
	unsigned char pages_per_block_bits;
	unsigned char blocks_per_page_bits;
	unsigned aux_size;
	s8 sectors_per_block_bits;
	void (*alloc_callback)(struct dm_buffer *);
	void (*write_callback)(struct dm_buffer *);

	struct kmem_cache *slab_buffer;
	struct kmem_cache *slab_cache;
	struct dm_io_client *dm_io;

	struct list_head reserved_buffers;
@@ -148,11 +134,11 @@ struct dm_buffer {
	struct list_head lru_list;
	sector_t block;
	void *data;
	enum data_mode data_mode;
	unsigned char data_mode;		/* DATA_MODE_* */
	unsigned char list_mode;		/* LIST_* */
	unsigned hold_count;
	blk_status_t read_error;
	blk_status_t write_error;
	unsigned hold_count;
	unsigned long state;
	unsigned long last_accessed;
	unsigned dirty_start;
@@ -161,8 +147,7 @@ struct dm_buffer {
	unsigned write_end;
	struct dm_bufio_client *c;
	struct list_head write_list;
	struct bio bio;
	struct bio_vec bio_vec[DM_BUFIO_INLINE_VECS];
	void (*end_io)(struct dm_buffer *, blk_status_t);
#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
#define MAX_STACK 10
	struct stack_trace stack_trace;
@@ -172,21 +157,6 @@ struct dm_buffer {

/*----------------------------------------------------------------*/

static struct kmem_cache *dm_bufio_caches[PAGE_SHIFT - SECTOR_SHIFT];
static char *dm_bufio_cache_names[PAGE_SHIFT - SECTOR_SHIFT];

static inline int dm_bufio_cache_index(struct dm_bufio_client *c)
{
	unsigned ret = c->blocks_per_page_bits - 1;

	BUG_ON(ret >= ARRAY_SIZE(dm_bufio_caches));

	return ret;
}

#define DM_BUFIO_CACHE(c)	(dm_bufio_caches[dm_bufio_cache_index(c)])
#define DM_BUFIO_CACHE_NAME(c)	(dm_bufio_cache_names[dm_bufio_cache_index(c)])

#define dm_bufio_in_request()	(!!current->bio_list)

static void dm_bufio_lock(struct dm_bufio_client *c)
@@ -319,7 +289,7 @@ static void __remove(struct dm_bufio_client *c, struct dm_buffer *b)

/*----------------------------------------------------------------*/

static void adjust_total_allocated(enum data_mode data_mode, long diff)
static void adjust_total_allocated(unsigned char data_mode, long diff)
{
	static unsigned long * const class_ptr[DATA_MODE_LIMIT] = {
		&dm_bufio_allocated_kmem_cache,
@@ -384,18 +354,18 @@ static void __cache_size_refresh(void)
 * space.
 */
static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
			       enum data_mode *data_mode)
			       unsigned char *data_mode)
{
	if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
	if (unlikely(c->slab_cache != NULL)) {
		*data_mode = DATA_MODE_SLAB;
		return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
		return kmem_cache_alloc(c->slab_cache, gfp_mask);
	}

	if (c->block_size <= DM_BUFIO_BLOCK_SIZE_GFP_LIMIT &&
	if (c->block_size <= KMALLOC_MAX_SIZE &&
	    gfp_mask & __GFP_NORETRY) {
		*data_mode = DATA_MODE_GET_FREE_PAGES;
		return (void *)__get_free_pages(gfp_mask,
						c->pages_per_block_bits);
						c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT));
	}

	*data_mode = DATA_MODE_VMALLOC;
@@ -424,15 +394,16 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
 * Free buffer's data.
 */
static void free_buffer_data(struct dm_bufio_client *c,
			     void *data, enum data_mode data_mode)
			     void *data, unsigned char data_mode)
{
	switch (data_mode) {
	case DATA_MODE_SLAB:
		kmem_cache_free(DM_BUFIO_CACHE(c), data);
		kmem_cache_free(c->slab_cache, data);
		break;

	case DATA_MODE_GET_FREE_PAGES:
		free_pages((unsigned long)data, c->pages_per_block_bits);
		free_pages((unsigned long)data,
			   c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT));
		break;

	case DATA_MODE_VMALLOC:
@@ -451,8 +422,7 @@ static void free_buffer_data(struct dm_bufio_client *c,
 */
static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
{
	struct dm_buffer *b = kmalloc(sizeof(struct dm_buffer) + c->aux_size,
				      gfp_mask);
	struct dm_buffer *b = kmem_cache_alloc(c->slab_buffer, gfp_mask);

	if (!b)
		return NULL;
@@ -461,7 +431,7 @@ static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)

	b->data = alloc_buffer_data(c, gfp_mask, &b->data_mode);
	if (!b->data) {
		kfree(b);
		kmem_cache_free(c->slab_buffer, b);
		return NULL;
	}

@@ -483,7 +453,7 @@ static void free_buffer(struct dm_buffer *b)
	adjust_total_allocated(b->data_mode, -(long)c->block_size);

	free_buffer_data(c, b->data, b->data_mode);
	kfree(b);
	kmem_cache_free(c->slab_buffer, b);
}

/*
@@ -540,10 +510,6 @@ static void __relink_lru(struct dm_buffer *b, int dirty)
 *
 *	the memory must be direct-mapped, not vmalloced;
 *
 *	the I/O driver can reject requests spuriously if it thinks that
 *	the requests are too big for the device or if they cross a
 *	controller-defined memory boundary.
 *
 * If the buffer is small enough (up to DM_BUFIO_INLINE_VECS pages) and
 * it is not vmalloced, try using the bio interface.
 *
@@ -561,12 +527,11 @@ static void dmio_complete(unsigned long error, void *context)
{
	struct dm_buffer *b = context;

	b->bio.bi_status = error ? BLK_STS_IOERR : 0;
	b->bio.bi_end_io(&b->bio);
	b->end_io(b, unlikely(error != 0) ? BLK_STS_IOERR : 0);
}

static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
		     unsigned n_sectors, unsigned offset, bio_end_io_t *end_io)
		     unsigned n_sectors, unsigned offset)
{
	int r;
	struct dm_io_request io_req = {
@@ -590,76 +555,77 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
		io_req.mem.ptr.vma = (char *)b->data + offset;
	}

	b->bio.bi_end_io = end_io;

	r = dm_io(&io_req, 1, &region, NULL);
	if (r) {
		b->bio.bi_status = errno_to_blk_status(r);
		end_io(&b->bio);
	}
	if (unlikely(r))
		b->end_io(b, errno_to_blk_status(r));
}

static void inline_endio(struct bio *bio)
static void bio_complete(struct bio *bio)
{
	bio_end_io_t *end_fn = bio->bi_private;
	struct dm_buffer *b = bio->bi_private;
	blk_status_t status = bio->bi_status;

	/*
	 * Reset the bio to free any attached resources
	 * (e.g. bio integrity profiles).
	 */
	bio_reset(bio);

	bio->bi_status = status;
	end_fn(bio);
	bio_put(bio);
	b->end_io(b, status);
}

static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector,
			   unsigned n_sectors, unsigned offset, bio_end_io_t *end_io)
static void use_bio(struct dm_buffer *b, int rw, sector_t sector,
		    unsigned n_sectors, unsigned offset)
{
	struct bio *bio;
	char *ptr;
	unsigned len;
	unsigned vec_size, len;

	bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS);
	b->bio.bi_iter.bi_sector = sector;
	bio_set_dev(&b->bio, b->c->bdev);
	b->bio.bi_end_io = inline_endio;
	/*
	 * Use of .bi_private isn't a problem here because
	 * the dm_buffer's inline bio is local to bufio.
	 */
	b->bio.bi_private = end_io;
	bio_set_op_attrs(&b->bio, rw, 0);
	vec_size = b->c->block_size >> PAGE_SHIFT;
	if (unlikely(b->c->sectors_per_block_bits < PAGE_SHIFT - SECTOR_SHIFT))
		vec_size += 2;

	bio = bio_kmalloc(GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN, vec_size);
	if (!bio) {
dmio:
		use_dmio(b, rw, sector, n_sectors, offset);
		return;
	}

	bio->bi_iter.bi_sector = sector;
	bio_set_dev(bio, b->c->bdev);
	bio_set_op_attrs(bio, rw, 0);
	bio->bi_end_io = bio_complete;
	bio->bi_private = b;

	ptr = (char *)b->data + offset;
	len = n_sectors << SECTOR_SHIFT;

	do {
		unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len);
		if (!bio_add_page(&b->bio, virt_to_page(ptr), this_step,
		if (!bio_add_page(bio, virt_to_page(ptr), this_step,
				  offset_in_page(ptr))) {
			BUG_ON(b->c->block_size <= PAGE_SIZE);
			use_dmio(b, rw, sector, n_sectors, offset, end_io);
			return;
			bio_put(bio);
			goto dmio;
		}

		len -= this_step;
		ptr += this_step;
	} while (len > 0);

	submit_bio(&b->bio);
	submit_bio(bio);
}

static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io)
static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buffer *, blk_status_t))
{
	unsigned n_sectors;
	sector_t sector;
	unsigned offset, end;

	sector = (b->block << b->c->sectors_per_block_bits) + b->c->start;
	b->end_io = end_io;

	if (likely(b->c->sectors_per_block_bits >= 0))
		sector = b->block << b->c->sectors_per_block_bits;
	else
		sector = b->block * (b->c->block_size >> SECTOR_SHIFT);
	sector += b->c->start;

	if (rw != REQ_OP_WRITE) {
		n_sectors = 1 << b->c->sectors_per_block_bits;
		n_sectors = b->c->block_size >> SECTOR_SHIFT;
		offset = 0;
	} else {
		if (b->c->write_callback)
@@ -676,11 +642,10 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io)
		n_sectors = (end - offset) >> SECTOR_SHIFT;
	}

	if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) &&
	    b->data_mode != DATA_MODE_VMALLOC)
		use_inline_bio(b, rw, sector, n_sectors, offset, end_io);
	if (b->data_mode != DATA_MODE_VMALLOC)
		use_bio(b, rw, sector, n_sectors, offset);
	else
		use_dmio(b, rw, sector, n_sectors, offset, end_io);
		use_dmio(b, rw, sector, n_sectors, offset);
}

/*----------------------------------------------------------------
@@ -693,16 +658,14 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io)
 * Set the error, clear B_WRITING bit and wake anyone who was waiting on
 * it.
 */
static void write_endio(struct bio *bio)
static void write_endio(struct dm_buffer *b, blk_status_t status)
{
	struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);

	b->write_error = bio->bi_status;
	if (unlikely(bio->bi_status)) {
	b->write_error = status;
	if (unlikely(status)) {
		struct dm_bufio_client *c = b->c;

		(void)cmpxchg(&c->async_write_error, 0,
				blk_status_to_errno(bio->bi_status));
				blk_status_to_errno(status));
	}

	BUG_ON(!test_bit(B_WRITING, &b->state));
@@ -963,8 +926,11 @@ static void __get_memory_limit(struct dm_bufio_client *c,
		}
	}

	buffers = dm_bufio_cache_size_per_client >>
		  (c->sectors_per_block_bits + SECTOR_SHIFT);
	buffers = dm_bufio_cache_size_per_client;
	if (likely(c->sectors_per_block_bits >= 0))
		buffers >>= c->sectors_per_block_bits + SECTOR_SHIFT;
	else
		buffers /= c->block_size;

	if (buffers < c->minimum_buffers)
		buffers = c->minimum_buffers;
@@ -1076,11 +1042,9 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
 * The endio routine for reading: set the error, clear the bit and wake up
 * anyone waiting on the buffer.
 */
static void read_endio(struct bio *bio)
static void read_endio(struct dm_buffer *b, blk_status_t status)
{
	struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);

	b->read_error = bio->bi_status;
	b->read_error = status;

	BUG_ON(!test_bit(B_READING, &b->state));

@@ -1482,13 +1446,13 @@ void dm_bufio_forget(struct dm_bufio_client *c, sector_t block)

	dm_bufio_unlock(c);
}
EXPORT_SYMBOL(dm_bufio_forget);
EXPORT_SYMBOL_GPL(dm_bufio_forget);

void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n)
{
	c->minimum_buffers = n;
}
EXPORT_SYMBOL(dm_bufio_set_minimum_buffers);
EXPORT_SYMBOL_GPL(dm_bufio_set_minimum_buffers);

unsigned dm_bufio_get_block_size(struct dm_bufio_client *c)
{
@@ -1498,8 +1462,12 @@ EXPORT_SYMBOL_GPL(dm_bufio_get_block_size);

sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
{
	return i_size_read(c->bdev->bd_inode) >>
			   (SECTOR_SHIFT + c->sectors_per_block_bits);
	sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT;
	if (likely(c->sectors_per_block_bits >= 0))
		s >>= c->sectors_per_block_bits;
	else
		sector_div(s, c->block_size >> SECTOR_SHIFT);
	return s;
}
EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);

@@ -1598,7 +1566,11 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp)
static unsigned long get_retain_buffers(struct dm_bufio_client *c)
{
	unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes);
        return retain_bytes >> (c->sectors_per_block_bits + SECTOR_SHIFT);
	if (likely(c->sectors_per_block_bits >= 0))
		retain_bytes >>= c->sectors_per_block_bits + SECTOR_SHIFT;
	else
		retain_bytes /= c->block_size;
	return retain_bytes;
}

static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
@@ -1662,9 +1634,13 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
	int r;
	struct dm_bufio_client *c;
	unsigned i;
	char slab_name[27];

	BUG_ON(block_size < 1 << SECTOR_SHIFT ||
	       (block_size & (block_size - 1)));
	if (!block_size || block_size & ((1 << SECTOR_SHIFT) - 1)) {
		DMERR("%s: block size not specified or is not multiple of 512b", __func__);
		r = -EINVAL;
		goto bad_client;
	}

	c = kzalloc(sizeof(*c), GFP_KERNEL);
	if (!c) {
@@ -1675,13 +1651,11 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign

	c->bdev = bdev;
	c->block_size = block_size;
	if (is_power_of_2(block_size))
		c->sectors_per_block_bits = __ffs(block_size) - SECTOR_SHIFT;
	c->pages_per_block_bits = (__ffs(block_size) >= PAGE_SHIFT) ?
				  __ffs(block_size) - PAGE_SHIFT : 0;
	c->blocks_per_page_bits = (__ffs(block_size) < PAGE_SHIFT ?
				  PAGE_SHIFT - __ffs(block_size) : 0);
	else
		c->sectors_per_block_bits = -1;

	c->aux_size = aux_size;
	c->alloc_callback = alloc_callback;
	c->write_callback = write_callback;

@@ -1694,7 +1668,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
	INIT_LIST_HEAD(&c->reserved_buffers);
	c->need_reserved_buffers = reserved_buffers;

	c->minimum_buffers = DM_BUFIO_MIN_BUFFERS;
	dm_bufio_set_minimum_buffers(c, DM_BUFIO_MIN_BUFFERS);

	init_waitqueue_head(&c->free_buffer_wait);
	c->async_write_error = 0;
@@ -1705,29 +1679,26 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
		goto bad_dm_io;
	}

	mutex_lock(&dm_bufio_clients_lock);
	if (c->blocks_per_page_bits) {
		if (!DM_BUFIO_CACHE_NAME(c)) {
			DM_BUFIO_CACHE_NAME(c) = kasprintf(GFP_KERNEL, "dm_bufio_cache-%u", c->block_size);
			if (!DM_BUFIO_CACHE_NAME(c)) {
	if (block_size <= KMALLOC_MAX_SIZE &&
	    (block_size < PAGE_SIZE || !is_power_of_2(block_size))) {
		snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", c->block_size);
		c->slab_cache = kmem_cache_create(slab_name, c->block_size, ARCH_KMALLOC_MINALIGN,
						  SLAB_RECLAIM_ACCOUNT, NULL);
		if (!c->slab_cache) {
			r = -ENOMEM;
				mutex_unlock(&dm_bufio_clients_lock);
			goto bad;
		}
	}

		if (!DM_BUFIO_CACHE(c)) {
			DM_BUFIO_CACHE(c) = kmem_cache_create(DM_BUFIO_CACHE_NAME(c),
							      c->block_size,
							      c->block_size, 0, NULL);
			if (!DM_BUFIO_CACHE(c)) {
	if (aux_size)
		snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer-%u", aux_size);
	else
		snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer");
	c->slab_buffer = kmem_cache_create(slab_name, sizeof(struct dm_buffer) + aux_size,
					   0, SLAB_RECLAIM_ACCOUNT, NULL);
	if (!c->slab_buffer) {
		r = -ENOMEM;
				mutex_unlock(&dm_bufio_clients_lock);
		goto bad;
	}
		}
	}
	mutex_unlock(&dm_bufio_clients_lock);

	while (c->need_reserved_buffers) {
		struct dm_buffer *b = alloc_buffer(c, GFP_KERNEL);
@@ -1762,6 +1733,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
		list_del(&b->lru_list);
		free_buffer(b);
	}
	kmem_cache_destroy(c->slab_cache);
	kmem_cache_destroy(c->slab_buffer);
	dm_io_client_destroy(c->dm_io);
bad_dm_io:
	mutex_destroy(&c->lock);
@@ -1808,6 +1781,8 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c)
	for (i = 0; i < LIST_SIZE; i++)
		BUG_ON(c->n_buffers[i]);

	kmem_cache_destroy(c->slab_cache);
	kmem_cache_destroy(c->slab_buffer);
	dm_io_client_destroy(c->dm_io);
	mutex_destroy(&c->lock);
	kfree(c);
@@ -1911,9 +1886,6 @@ static int __init dm_bufio_init(void)
	dm_bufio_allocated_vmalloc = 0;
	dm_bufio_current_allocated = 0;

	memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches);
	memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names);

	mem = (__u64)mult_frac(totalram_pages - totalhigh_pages,
			       DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT;

@@ -1948,17 +1920,10 @@ static int __init dm_bufio_init(void)
static void __exit dm_bufio_exit(void)
{
	int bug = 0;
	int i;

	cancel_delayed_work_sync(&dm_bufio_work);
	destroy_workqueue(dm_bufio_wq);

	for (i = 0; i < ARRAY_SIZE(dm_bufio_caches); i++)
		kmem_cache_destroy(dm_bufio_caches[i]);

	for (i = 0; i < ARRAY_SIZE(dm_bufio_cache_names); i++)
		kfree(dm_bufio_cache_names[i]);

	if (dm_bufio_client_count) {
		DMCRIT("%s: dm_bufio_client_count leaked: %d",
			__func__, dm_bufio_client_count);
+2 −1
Original line number Diff line number Diff line
@@ -3387,7 +3387,8 @@ static int process_invalidate_cblocks_message(struct cache *cache, unsigned coun
 *
 * The key migration_threshold is supported by the cache target core.
 */
static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
static int cache_message(struct dm_target *ti, unsigned argc, char **argv,
			 char *result, unsigned maxlen)
{
	struct cache *cache = ti->private;

+67 −2
Original line number Diff line number Diff line
@@ -148,6 +148,8 @@ struct crypt_config {
	mempool_t *tag_pool;
	unsigned tag_pool_max_sectors;

	struct percpu_counter n_allocated_pages;

	struct bio_set *bs;
	struct mutex bio_alloc_lock;

@@ -219,6 +221,12 @@ struct crypt_config {
#define MAX_TAG_SIZE	480
#define POOL_ENTRY_SIZE	512

static DEFINE_SPINLOCK(dm_crypt_clients_lock);
static unsigned dm_crypt_clients_n = 0;
static volatile unsigned long dm_crypt_pages_per_client;
#define DM_CRYPT_MEMORY_PERCENT			2
#define DM_CRYPT_MIN_PAGES_PER_CLIENT		(BIO_MAX_PAGES * 16)

static void clone_init(struct dm_crypt_io *, struct bio *);
static void kcryptd_queue_crypt(struct dm_crypt_io *io);
static struct scatterlist *crypt_get_sg_data(struct crypt_config *cc,
@@ -2155,6 +2163,43 @@ static int crypt_wipe_key(struct crypt_config *cc)
	return r;
}

static void crypt_calculate_pages_per_client(void)
{
	unsigned long pages = (totalram_pages - totalhigh_pages) * DM_CRYPT_MEMORY_PERCENT / 100;

	if (!dm_crypt_clients_n)
		return;

	pages /= dm_crypt_clients_n;
	if (pages < DM_CRYPT_MIN_PAGES_PER_CLIENT)
		pages = DM_CRYPT_MIN_PAGES_PER_CLIENT;
	dm_crypt_pages_per_client = pages;
}

static void *crypt_page_alloc(gfp_t gfp_mask, void *pool_data)
{
	struct crypt_config *cc = pool_data;
	struct page *page;

	if (unlikely(percpu_counter_compare(&cc->n_allocated_pages, dm_crypt_pages_per_client) >= 0) &&
	    likely(gfp_mask & __GFP_NORETRY))
		return NULL;

	page = alloc_page(gfp_mask);
	if (likely(page != NULL))
		percpu_counter_add(&cc->n_allocated_pages, 1);

	return page;
}

static void crypt_page_free(void *page, void *pool_data)
{
	struct crypt_config *cc = pool_data;

	__free_page(page);
	percpu_counter_sub(&cc->n_allocated_pages, 1);
}

static void crypt_dtr(struct dm_target *ti)
{
	struct crypt_config *cc = ti->private;
@@ -2181,6 +2226,10 @@ static void crypt_dtr(struct dm_target *ti)
	mempool_destroy(cc->req_pool);
	mempool_destroy(cc->tag_pool);

	if (cc->page_pool)
		WARN_ON(percpu_counter_sum(&cc->n_allocated_pages) != 0);
	percpu_counter_destroy(&cc->n_allocated_pages);

	if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
		cc->iv_gen_ops->dtr(cc);

@@ -2197,6 +2246,12 @@ static void crypt_dtr(struct dm_target *ti)

	/* Must zero key material before freeing */
	kzfree(cc);

	spin_lock(&dm_crypt_clients_lock);
	WARN_ON(!dm_crypt_clients_n);
	dm_crypt_clients_n--;
	crypt_calculate_pages_per_client();
	spin_unlock(&dm_crypt_clients_lock);
}

static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
@@ -2644,6 +2699,15 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)

	ti->private = cc;

	spin_lock(&dm_crypt_clients_lock);
	dm_crypt_clients_n++;
	crypt_calculate_pages_per_client();
	spin_unlock(&dm_crypt_clients_lock);

	ret = percpu_counter_init(&cc->n_allocated_pages, 0, GFP_KERNEL);
	if (ret < 0)
		goto bad;

	/* Optional parameters need to be read before cipher constructor */
	if (argc > 5) {
		ret = crypt_ctr_optional(ti, argc - 5, &argv[5]);
@@ -2698,7 +2762,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
		ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start + additional_req_size,
		      ARCH_KMALLOC_MINALIGN);

	cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0);
	cc->page_pool = mempool_create(BIO_MAX_PAGES, crypt_page_alloc, crypt_page_free, cc);
	if (!cc->page_pool) {
		ti->error = "Cannot allocate page mempool";
		goto bad;
@@ -2942,7 +3006,8 @@ static void crypt_resume(struct dm_target *ti)
 *	key set <key>
 *	key wipe
 */
static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
static int crypt_message(struct dm_target *ti, unsigned argc, char **argv,
			 char *result, unsigned maxlen)
{
	struct crypt_config *cc = ti->private;
	int key_size, ret = -EINVAL;
+2 −1
Original line number Diff line number Diff line
@@ -1635,7 +1635,8 @@ static void era_status(struct dm_target *ti, status_type_t type,
	DMEMIT("Error");
}

static int era_message(struct dm_target *ti, unsigned argc, char **argv)
static int era_message(struct dm_target *ti, unsigned argc, char **argv,
		       char *result, unsigned maxlen)
{
	struct era *era = ti->private;

Loading