Commit be500ed7 authored by Joe Thornber's avatar Joe Thornber Committed by Mike Snitzer
Browse files

dm space maps: improve performance with inc/dec on ranges of blocks



When we break sharing on btree nodes we typically need to increment
the reference counts to every value held in the node.  This can
cause a lot of repeated calls to the space maps.  Fix this by changing
the interface to the space map inc/dec methods to take ranges of
adjacent blocks to be operated on.

For installations that are using a lot of snapshots this will reduce
cpu overhead of fundamental operations such as provisioning a new block,
or deleting a snapshot, by as much as 10 times.

Signed-off-by: default avatarJoe Thornber <ejt@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent 5faafc77
Loading
Loading
Loading
Loading
+14 −10
Original line number Diff line number Diff line
@@ -363,29 +363,33 @@ static void ws_unpack(const struct writeset_disk *disk, struct writeset_metadata
	core->root = le64_to_cpu(disk->root);
}

static void ws_inc(void *context, const void *value)
static void ws_inc(void *context, const void *value, unsigned count)
{
	struct era_metadata *md = context;
	struct writeset_disk ws_d;
	dm_block_t b;
	unsigned i;

	memcpy(&ws_d, value, sizeof(ws_d));
	for (i = 0; i < count; i++) {
		memcpy(&ws_d, value + (i * sizeof(ws_d)), sizeof(ws_d));
		b = le64_to_cpu(ws_d.root);

		dm_tm_inc(md->tm, b);
	}
}

static void ws_dec(void *context, const void *value)
static void ws_dec(void *context, const void *value, unsigned count)
{
	struct era_metadata *md = context;
	struct writeset_disk ws_d;
	dm_block_t b;
	unsigned i;

	memcpy(&ws_d, value, sizeof(ws_d));
	for (i = 0; i < count; i++) {
		memcpy(&ws_d, value + (i * sizeof(ws_d)), sizeof(ws_d));
		b = le64_to_cpu(ws_d.root);

		dm_bitset_del(&md->bitset_info, b);
	}
}

static int ws_eq(void *context, const void *value1, const void *value2)
{
+53 −38
Original line number Diff line number Diff line
@@ -311,28 +311,53 @@ static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t)
	*t = v & ((1 << 24) - 1);
}

static void data_block_inc(void *context, const void *value_le)
/*
 * It's more efficient to call dm_sm_{inc,dec}_blocks as few times as
 * possible.  'with_runs' reads contiguous runs of blocks, and calls the
 * given sm function.
 */
typedef int (*run_fn)(struct dm_space_map *, dm_block_t, dm_block_t);

static void with_runs(struct dm_space_map *sm, const __le64 *value_le, unsigned count, run_fn fn)
{
	struct dm_space_map *sm = context;
	__le64 v_le;
	uint64_t b;
	uint64_t b, begin, end;
	uint32_t t;
	bool in_run = false;
	unsigned i;

	for (i = 0; i < count; i++, value_le++) {
		/* We know value_le is 8 byte aligned */
		unpack_block_time(le64_to_cpu(*value_le), &b, &t);

	memcpy(&v_le, value_le, sizeof(v_le));
	unpack_block_time(le64_to_cpu(v_le), &b, &t);
	dm_sm_inc_block(sm, b);
		if (in_run) {
			if (b == end) {
				end++;
			} else {
				fn(sm, begin, end);
				begin = b;
				end = b + 1;
			}
		} else {
			in_run = true;
			begin = b;
			end = b + 1;
		}
	}

static void data_block_dec(void *context, const void *value_le)
	if (in_run)
		fn(sm, begin, end);
}

static void data_block_inc(void *context, const void *value_le, unsigned count)
{
	struct dm_space_map *sm = context;
	__le64 v_le;
	uint64_t b;
	uint32_t t;
	with_runs((struct dm_space_map *) context,
		  (const __le64 *) value_le, count, dm_sm_inc_blocks);
}

	memcpy(&v_le, value_le, sizeof(v_le));
	unpack_block_time(le64_to_cpu(v_le), &b, &t);
	dm_sm_dec_block(sm, b);
static void data_block_dec(void *context, const void *value_le, unsigned count)
{
	with_runs((struct dm_space_map *) context,
		  (const __le64 *) value_le, count, dm_sm_dec_blocks);
}

static int data_block_equal(void *context, const void *value1_le, const void *value2_le)
@@ -349,26 +374,24 @@ static int data_block_equal(void *context, const void *value1_le, const void *va
	return b1 == b2;
}

static void subtree_inc(void *context, const void *value)
static void subtree_inc(void *context, const void *value, unsigned count)
{
	struct dm_btree_info *info = context;
	__le64 root_le;
	uint64_t root;
	const __le64 *root_le = value;
	unsigned i;

	memcpy(&root_le, value, sizeof(root_le));
	root = le64_to_cpu(root_le);
	dm_tm_inc(info->tm, root);
	for (i = 0; i < count; i++, root_le++)
		dm_tm_inc(info->tm, le64_to_cpu(*root_le));
}

static void subtree_dec(void *context, const void *value)
static void subtree_dec(void *context, const void *value, unsigned count)
{
	struct dm_btree_info *info = context;
	__le64 root_le;
	uint64_t root;
	const __le64 *root_le = value;
	unsigned i;

	memcpy(&root_le, value, sizeof(root_le));
	root = le64_to_cpu(root_le);
	if (dm_btree_del(info, root))
	for (i = 0; i < count; i++, root_le++)
		if (dm_btree_del(info, le64_to_cpu(*root_le)))
			DMERR("btree delete failed");
}

@@ -1761,11 +1784,7 @@ int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_
	int r = 0;

	pmd_write_lock(pmd);
	for (; b != e; b++) {
		r = dm_sm_inc_block(pmd->data_sm, b);
		if (r)
			break;
	}
	r = dm_sm_inc_blocks(pmd->data_sm, b, e);
	pmd_write_unlock(pmd);

	return r;
@@ -1776,11 +1795,7 @@ int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_
	int r = 0;

	pmd_write_lock(pmd);
	for (; b != e; b++) {
		r = dm_sm_dec_block(pmd->data_sm, b);
		if (r)
			break;
	}
	r = dm_sm_dec_blocks(pmd->data_sm, b, e);
	pmd_write_unlock(pmd);

	return r;
+28 −24
Original line number Diff line number Diff line
@@ -108,12 +108,10 @@ static void *element_at(struct dm_array_info *info, struct array_block *ab,
 * in an array block.
 */
static void on_entries(struct dm_array_info *info, struct array_block *ab,
		       void (*fn)(void *, const void *))
		       void (*fn)(void *, const void *, unsigned))
{
	unsigned i, nr_entries = le32_to_cpu(ab->nr_entries);

	for (i = 0; i < nr_entries; i++)
		fn(info->value_type.context, element_at(info, ab, i));
	unsigned nr_entries = le32_to_cpu(ab->nr_entries);
	fn(info->value_type.context, element_at(info, ab, 0), nr_entries);
}

/*
@@ -175,19 +173,18 @@ static int alloc_ablock(struct dm_array_info *info, size_t size_of_block,
static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
			const void *value, unsigned new_nr)
{
	unsigned i;
	uint32_t nr_entries;
	uint32_t nr_entries, delta, i;
	struct dm_btree_value_type *vt = &info->value_type;

	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
	BUG_ON(new_nr < le32_to_cpu(ab->nr_entries));

	nr_entries = le32_to_cpu(ab->nr_entries);
	for (i = nr_entries; i < new_nr; i++) {
	delta = new_nr - nr_entries;
	if (vt->inc)
			vt->inc(vt->context, value);
		vt->inc(vt->context, value, delta);
	for (i = nr_entries; i < new_nr; i++)
		memcpy(element_at(info, ab, i), value, vt->size);
	}
	ab->nr_entries = cpu_to_le32(new_nr);
}

@@ -199,17 +196,16 @@ static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
static void trim_ablock(struct dm_array_info *info, struct array_block *ab,
			unsigned new_nr)
{
	unsigned i;
	uint32_t nr_entries;
	uint32_t nr_entries, delta;
	struct dm_btree_value_type *vt = &info->value_type;

	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
	BUG_ON(new_nr > le32_to_cpu(ab->nr_entries));

	nr_entries = le32_to_cpu(ab->nr_entries);
	for (i = nr_entries; i > new_nr; i--)
	delta = nr_entries - new_nr;
	if (vt->dec)
			vt->dec(vt->context, element_at(info, ab, i - 1));
		vt->dec(vt->context, element_at(info, ab, new_nr - 1), delta);
	ab->nr_entries = cpu_to_le32(new_nr);
}

@@ -573,16 +569,17 @@ static int grow(struct resize *resize)
 * These are the value_type functions for the btree elements, which point
 * to array blocks.
 */
static void block_inc(void *context, const void *value)
static void block_inc(void *context, const void *value, unsigned count)
{
	__le64 block_le;
	const __le64 *block_le = value;
	struct dm_array_info *info = context;
	unsigned i;

	memcpy(&block_le, value, sizeof(block_le));
	dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le));
	for (i = 0; i < count; i++, block_le++)
		dm_tm_inc(info->btree_info.tm, le64_to_cpu(*block_le));
}

static void block_dec(void *context, const void *value)
static void __block_dec(void *context, const void *value)
{
	int r;
	uint64_t b;
@@ -621,6 +618,13 @@ static void block_dec(void *context, const void *value)
	dm_tm_dec(info->btree_info.tm, b);
}

static void block_dec(void *context, const void *value, unsigned count)
{
	unsigned i;
	for (i = 0; i < count; i++, value += sizeof(__le64))
		__block_dec(context, value);
}

static int block_equal(void *context, const void *value1, const void *value2)
{
	return !memcmp(value1, value2, sizeof(__le64));
@@ -711,7 +715,7 @@ static int populate_ablock_with_values(struct dm_array_info *info, struct array_
			return r;

		if (vt->inc)
			vt->inc(vt->context, element_at(info, ab, i));
			vt->inc(vt->context, element_at(info, ab, i), 1);
	}

	ab->nr_entries = cpu_to_le32(new_nr);
@@ -822,9 +826,9 @@ static int array_set_value(struct dm_array_info *info, dm_block_t root,
	old_value = element_at(info, ab, entry);
	if (vt->dec &&
	    (!vt->equal || !vt->equal(vt->context, old_value, value))) {
		vt->dec(vt->context, old_value);
		vt->dec(vt->context, old_value, 1);
		if (vt->inc)
			vt->inc(vt->context, value);
			vt->inc(vt->context, value, 1);
	}

	memcpy(old_value, value, info->value_type.size);
+13 −0
Original line number Diff line number Diff line
@@ -144,4 +144,17 @@ extern struct dm_block_validator btree_node_validator;
extern void init_le64_type(struct dm_transaction_manager *tm,
			   struct dm_btree_value_type *vt);

/*
 * This returns a shadowed btree leaf that you may modify.  In practise
 * this means overwrites only, since an insert could cause a node to
 * be split.  Useful if you need access to the old value to calculate the
 * new one.
 *
 * This only works with single level btrees.  The given key must be present in
 * the tree, otherwise -EINVAL will be returned.
 */
int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root,
			     uint64_t key, int *index,
			     dm_block_t *new_root, struct dm_block **leaf);

#endif	/* DM_BTREE_INTERNAL_H */
+2 −2
Original line number Diff line number Diff line
@@ -544,7 +544,7 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,

		if (info->value_type.dec)
			info->value_type.dec(info->value_type.context,
					     value_ptr(n, index));
					     value_ptr(n, index), 1);

		delete_at(n, index);
	}
@@ -653,7 +653,7 @@ static int remove_one(struct dm_btree_info *info, dm_block_t root,
	if (k >= keys[last_level] && k < end_key) {
		if (info->value_type.dec)
			info->value_type.dec(info->value_type.context,
					     value_ptr(n, index));
					     value_ptr(n, index), 1);

		delete_at(n, index);
		keys[last_level] = k + 1ull;
Loading