Commit e2dd8aca authored by Joe Thornber's avatar Joe Thornber Committed by Mike Snitzer
Browse files

dm bio prison v1: improve concurrent IO performance



Split the bio prison into multiple regions, with a separate rbtree and
associated lock for each region.

To get fast bio prison locking and not damage the performance of
discards too much the bio-prison now stipulates that discards should
not cross a BIO_PRISON_MAX_RANGE boundary.

Because the range of a key (block_end - block_begin) must not exceed
BIO_PRISON_MAX_RANGE: break_up_discard_bio() now ensures the data
range reflected in PHYSICAL key doesn't exceed BIO_PRISON_MAX_RANGE.
And splitting the thin target's discards (handled with VIRTUAL key) is
achieved by updating dm-thin.c to set limits->max_discard_sectors in
terms of BIO_PRISON_MAX_RANGE _and_ setting the thin and thin-pool
targets' max_discard_granularity to true.

Signed-off-by: default avatarJoe Thornber <ejt@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@kernel.org>
parent 06961c48
Loading
Loading
Loading
Loading
+57 −30
Original line number Diff line number Diff line
@@ -16,11 +16,17 @@

/*----------------------------------------------------------------*/

#define NR_LOCKS 64
#define LOCK_MASK (NR_LOCKS - 1)
#define MIN_CELLS 1024

struct dm_bio_prison {
struct prison_region {
	spinlock_t lock;
	struct rb_root cells;
	struct rb_root cell;
} ____cacheline_aligned_in_smp;

struct dm_bio_prison {
	struct prison_region regions[NR_LOCKS];
	mempool_t cell_pool;
};

@@ -34,13 +40,17 @@ static struct kmem_cache *_cell_cache;
 */
struct dm_bio_prison *dm_bio_prison_create(void)
{
	struct dm_bio_prison *prison = kzalloc(sizeof(*prison), GFP_KERNEL);
	int ret;
	unsigned i;
	struct dm_bio_prison *prison = kzalloc(sizeof(*prison), GFP_KERNEL);

	if (!prison)
		return NULL;

	spin_lock_init(&prison->lock);
	for (i = 0; i < NR_LOCKS; i++) {
		spin_lock_init(&prison->regions[i].lock);
		prison->regions[i].cell = RB_ROOT;
	}

	ret = mempool_init_slab_pool(&prison->cell_pool, MIN_CELLS, _cell_cache);
	if (ret) {
@@ -48,8 +58,6 @@ struct dm_bio_prison *dm_bio_prison_create(void)
		return NULL;
	}

	prison->cells = RB_ROOT;

	return prison;
}
EXPORT_SYMBOL_GPL(dm_bio_prison_create);
@@ -107,14 +115,26 @@ static int cmp_keys(struct dm_cell_key *lhs,
	return 0;
}

static int __bio_detain(struct dm_bio_prison *prison,
static unsigned lock_nr(struct dm_cell_key *key)
{
	return (key->block_begin >> BIO_PRISON_MAX_RANGE_SHIFT) & LOCK_MASK;
}

static void check_range(struct dm_cell_key *key)
{
	BUG_ON(key->block_end - key->block_begin > BIO_PRISON_MAX_RANGE);
	BUG_ON((key->block_begin >> BIO_PRISON_MAX_RANGE_SHIFT) !=
	       ((key->block_end - 1) >> BIO_PRISON_MAX_RANGE_SHIFT));
}

static int __bio_detain(struct rb_root *root,
			struct dm_cell_key *key,
			struct bio *inmate,
			struct dm_bio_prison_cell *cell_prealloc,
			struct dm_bio_prison_cell **cell_result)
{
	int r;
	struct rb_node **new = &prison->cells.rb_node, *parent = NULL;
	struct rb_node **new = &root->rb_node, *parent = NULL;

	while (*new) {
		struct dm_bio_prison_cell *cell =
@@ -139,7 +159,7 @@ static int __bio_detain(struct dm_bio_prison *prison,
	*cell_result = cell_prealloc;

	rb_link_node(&cell_prealloc->node, parent, new);
	rb_insert_color(&cell_prealloc->node, &prison->cells);
	rb_insert_color(&cell_prealloc->node, root);

	return 0;
}
@@ -151,10 +171,12 @@ static int bio_detain(struct dm_bio_prison *prison,
		      struct dm_bio_prison_cell **cell_result)
{
	int r;
	unsigned l = lock_nr(key);
	check_range(key);

	spin_lock_irq(&prison->lock);
	r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result);
	spin_unlock_irq(&prison->lock);
	spin_lock_irq(&prison->regions[l].lock);
	r = __bio_detain(&prison->regions[l].cell, key, inmate, cell_prealloc, cell_result);
	spin_unlock_irq(&prison->regions[l].lock);

	return r;
}
@@ -181,11 +203,11 @@ EXPORT_SYMBOL_GPL(dm_get_cell);
/*
 * @inmates must have been initialised prior to this call
 */
static void __cell_release(struct dm_bio_prison *prison,
static void __cell_release(struct rb_root *root,
			   struct dm_bio_prison_cell *cell,
			   struct bio_list *inmates)
{
	rb_erase(&cell->node, &prison->cells);
	rb_erase(&cell->node, root);

	if (inmates) {
		if (cell->holder)
@@ -198,20 +220,22 @@ void dm_cell_release(struct dm_bio_prison *prison,
		     struct dm_bio_prison_cell *cell,
		     struct bio_list *bios)
{
	spin_lock_irq(&prison->lock);
	__cell_release(prison, cell, bios);
	spin_unlock_irq(&prison->lock);
	unsigned l = lock_nr(&cell->key);

	spin_lock_irq(&prison->regions[l].lock);
	__cell_release(&prison->regions[l].cell, cell, bios);
	spin_unlock_irq(&prison->regions[l].lock);
}
EXPORT_SYMBOL_GPL(dm_cell_release);

/*
 * Sometimes we don't want the holder, just the additional bios.
 */
static void __cell_release_no_holder(struct dm_bio_prison *prison,
static void __cell_release_no_holder(struct rb_root *root,
				     struct dm_bio_prison_cell *cell,
				     struct bio_list *inmates)
{
	rb_erase(&cell->node, &prison->cells);
	rb_erase(&cell->node, root);
	bio_list_merge(inmates, &cell->bios);
}

@@ -219,11 +243,12 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
			       struct dm_bio_prison_cell *cell,
			       struct bio_list *inmates)
{
	unsigned l = lock_nr(&cell->key);
	unsigned long flags;

	spin_lock_irqsave(&prison->lock, flags);
	__cell_release_no_holder(prison, cell, inmates);
	spin_unlock_irqrestore(&prison->lock, flags);
	spin_lock_irqsave(&prison->regions[l].lock, flags);
	__cell_release_no_holder(&prison->regions[l].cell, cell, inmates);
	spin_unlock_irqrestore(&prison->regions[l].lock, flags);
}
EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);

@@ -248,18 +273,19 @@ void dm_cell_visit_release(struct dm_bio_prison *prison,
			   void *context,
			   struct dm_bio_prison_cell *cell)
{
	spin_lock_irq(&prison->lock);
	unsigned l = lock_nr(&cell->key);
	spin_lock_irq(&prison->regions[l].lock);
	visit_fn(context, cell);
	rb_erase(&cell->node, &prison->cells);
	spin_unlock_irq(&prison->lock);
	rb_erase(&cell->node, &prison->regions[l].cell);
	spin_unlock_irq(&prison->regions[l].lock);
}
EXPORT_SYMBOL_GPL(dm_cell_visit_release);

static int __promote_or_release(struct dm_bio_prison *prison,
static int __promote_or_release(struct rb_root *root,
				struct dm_bio_prison_cell *cell)
{
	if (bio_list_empty(&cell->bios)) {
		rb_erase(&cell->node, &prison->cells);
		rb_erase(&cell->node, root);
		return 1;
	}

@@ -271,10 +297,11 @@ int dm_cell_promote_or_release(struct dm_bio_prison *prison,
			       struct dm_bio_prison_cell *cell)
{
	int r;
	unsigned l = lock_nr(&cell->key);

	spin_lock_irq(&prison->lock);
	r = __promote_or_release(prison, cell);
	spin_unlock_irq(&prison->lock);
	spin_lock_irq(&prison->regions[l].lock);
	r = __promote_or_release(&prison->regions[l].cell, cell);
	spin_unlock_irq(&prison->regions[l].lock);

	return r;
}
+10 −0
Original line number Diff line number Diff line
@@ -34,6 +34,16 @@ struct dm_cell_key {
	dm_block_t block_begin, block_end;
};

/*
 * The range of a key (block_end - block_begin) must not
 * exceed BIO_PRISON_MAX_RANGE.  Also the range must not
 * cross a similarly sized boundary.
 *
 * Must be a power of 2.
 */
#define BIO_PRISON_MAX_RANGE 1024
#define BIO_PRISON_MAX_RANGE_SHIFT 10

/*
 * Treat this as opaque, only in header so callers can manage allocation
 * themselves.
+54 −38
Original line number Diff line number Diff line
@@ -1674,27 +1674,38 @@ static void break_up_discard_bio(struct thin_c *tc, dm_block_t begin, dm_block_t
	struct dm_cell_key data_key;
	struct dm_bio_prison_cell *data_cell;
	struct dm_thin_new_mapping *m;
	dm_block_t virt_begin, virt_end, data_begin;
	dm_block_t virt_begin, virt_end, data_begin, data_end;
	dm_block_t len, next_boundary;

	while (begin != end) {
		r = ensure_next_mapping(pool);
		if (r)
			/* we did our best */
			return;

		r = dm_thin_find_mapped_range(tc->td, begin, end, &virt_begin, &virt_end,
					      &data_begin, &maybe_shared);
		if (r)
		if (r) {
			/*
			 * Silently fail, letting any mappings we've
			 * created complete.
			 */
			break;
		}

		data_end = data_begin + (virt_end - virt_begin);

		/*
		 * Make sure the data region obeys the bio prison restrictions.
		 */
		while (data_begin < data_end) {
			r = ensure_next_mapping(pool);
			if (r)
				return; /* we did our best */

			next_boundary = ((data_begin >> BIO_PRISON_MAX_RANGE_SHIFT) + 1)
				<< BIO_PRISON_MAX_RANGE_SHIFT;
			len = min_t(sector_t, data_end - data_begin, next_boundary - data_begin);

		build_key(tc->td, PHYSICAL, data_begin, data_begin + (virt_end - virt_begin), &data_key);
			build_key(tc->td, PHYSICAL, data_begin, data_begin + len, &data_key);
			if (bio_detain(tc->pool, &data_key, NULL, &data_cell)) {
				/* contention, we'll give up with this range */
			begin = virt_end;
				data_begin += len;
				continue;
			}

@@ -1706,7 +1717,7 @@ static void break_up_discard_bio(struct thin_c *tc, dm_block_t begin, dm_block_t
			m->tc = tc;
			m->maybe_shared = maybe_shared;
			m->virt_begin = virt_begin;
		m->virt_end = virt_end;
			m->virt_end = virt_begin + len;
			m->data_block = data_begin;
			m->cell = data_cell;
			m->bio = bio;
@@ -1723,6 +1734,10 @@ static void break_up_discard_bio(struct thin_c *tc, dm_block_t begin, dm_block_t
			if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list))
				pool->process_prepared_discard(m);

			virt_begin += len;
			data_begin += len;
		}

		begin = virt_end;
	}
}
@@ -3380,13 +3395,13 @@ static int pool_ctr(struct dm_target *ti, unsigned int argc, char **argv)
	 */
	if (pf.discard_enabled && pf.discard_passdown) {
		ti->num_discard_bios = 1;

		/*
		 * Setting 'discards_supported' circumvents the normal
		 * stacking of discard limits (this keeps the pool and
		 * thin devices' discard limits consistent).
		 */
		ti->discards_supported = true;
		ti->max_discard_granularity = true;
	}
	ti->private = pt;

@@ -4096,7 +4111,7 @@ static struct target_type pool_target = {
	.name = "thin-pool",
	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
		    DM_TARGET_IMMUTABLE,
	.version = {1, 22, 0},
	.version = {1, 23, 0},
	.module = THIS_MODULE,
	.ctr = pool_ctr,
	.dtr = pool_dtr,
@@ -4261,6 +4276,7 @@ static int thin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
	if (tc->pool->pf.discard_enabled) {
		ti->discards_supported = true;
		ti->num_discard_bios = 1;
		ti->max_discard_granularity = true;
	}

	mutex_unlock(&dm_thin_pool_table.mutex);
@@ -4476,12 +4492,12 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
		return;

	limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
	limits->max_discard_sectors = 2048 * 1024 * 16; /* 16G */
	limits->max_discard_sectors = pool->sectors_per_block * BIO_PRISON_MAX_RANGE;
}

static struct target_type thin_target = {
	.name = "thin",
	.version = {1, 22, 0},
	.version = {1, 23, 0},
	.module	= THIS_MODULE,
	.ctr = thin_ctr,
	.dtr = thin_dtr,