Commit 8fc75643 authored by Matthew Wilcox (Oracle)'s avatar Matthew Wilcox (Oracle) Committed by Linus Torvalds
Browse files

XArray: add xas_split

In order to use multi-index entries for huge pages in the page cache, we
need to be able to split a multi-index entry (eg if a file is truncated in
the middle of a huge page entry).  This version does not support splitting
more than one level of the tree at a time.  This is an acceptable
limitation for the page cache as we do not expect to support order-12
pages in the near future.

[akpm@linux-foundation.org: export xas_split_alloc() to modules]
[willy@infradead.org: fix xarray split]
  Link: https://lkml.kernel.org/r/20200910175450.GV6583@casper.infradead.org
[willy@infradead.org: fix xarray]
  Link: https://lkml.kernel.org/r/20201001233943.GW20115@casper.infradead.org



Signed-off-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Qian Cai <cai@lca.pw>
Cc: Song Liu <songliubraving@fb.com>
Link: https://lkml.kernel.org/r/20200903183029.14930-3-willy@infradead.org


Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 57417ceb
Loading
Loading
Loading
Loading
+9 −7
Original line number Diff line number Diff line
@@ -475,13 +475,15 @@ or iterations will move the index to the first index in the range.
Each entry will only be returned once, no matter how many indices it
occupies.

Using xas_next() or xas_prev() with a multi-index xa_state
is not supported.  Using either of these functions on a multi-index entry
will reveal sibling entries; these should be skipped over by the caller.

Storing ``NULL`` into any index of a multi-index entry will set the entry
at every index to ``NULL`` and dissolve the tie.  Splitting a multi-index
entry into entries occupying smaller ranges is not yet supported.
Using xas_next() or xas_prev() with a multi-index xa_state is not
supported.  Using either of these functions on a multi-index entry will
reveal sibling entries; these should be skipped over by the caller.

Storing ``NULL`` into any index of a multi-index entry will set the
entry at every index to ``NULL`` and dissolve the tie.  A multi-index
entry can be split into entries occupying smaller ranges by calling
xas_split_alloc() without the xa_lock held, followed by taking the lock
and calling xas_split().

Functions and structures
========================
+13 −0
Original line number Diff line number Diff line
@@ -1507,11 +1507,24 @@ void xas_create_range(struct xa_state *);

#ifdef CONFIG_XARRAY_MULTI
int xa_get_order(struct xarray *, unsigned long index);
void xas_split(struct xa_state *, void *entry, unsigned int order);
void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t);
#else
static inline int xa_get_order(struct xarray *xa, unsigned long index)
{
	return 0;
}

static inline void xas_split(struct xa_state *xas, void *entry,
		unsigned int order)
{
	xas_store(xas, entry);
}

static inline void xas_split_alloc(struct xa_state *xas, void *entry,
		unsigned int order, gfp_t gfp)
{
}
#endif

/**
+44 −0
Original line number Diff line number Diff line
@@ -1503,6 +1503,49 @@ static noinline void check_store_range(struct xarray *xa)
	}
}

#ifdef CONFIG_XARRAY_MULTI
static void check_split_1(struct xarray *xa, unsigned long index,
							unsigned int order)
{
	XA_STATE(xas, xa, index);
	void *entry;
	unsigned int i = 0;

	xa_store_order(xa, index, order, xa, GFP_KERNEL);

	xas_split_alloc(&xas, xa, order, GFP_KERNEL);
	xas_lock(&xas);
	xas_split(&xas, xa, order);
	xas_unlock(&xas);

	xa_for_each(xa, index, entry) {
		XA_BUG_ON(xa, entry != xa);
		i++;
	}
	XA_BUG_ON(xa, i != 1 << order);

	xa_set_mark(xa, index, XA_MARK_0);
	XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0));

	xa_destroy(xa);
}

static noinline void check_split(struct xarray *xa)
{
	unsigned int order;

	XA_BUG_ON(xa, !xa_empty(xa));

	for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) {
		check_split_1(xa, 0, order);
		check_split_1(xa, 1UL << order, order);
		check_split_1(xa, 3UL << order, order);
	}
}
#else
static void check_split(struct xarray *xa) { }
#endif

static void check_align_1(struct xarray *xa, char *name)
{
	int i;
@@ -1729,6 +1772,7 @@ static int xarray_checks(void)
	check_store_range(&array);
	check_store_iter(&array);
	check_align(&xa0);
	check_split(&array);

	check_workingset(&array, 0);
	check_workingset(&array, 64);
+159 −9
Original line number Diff line number Diff line
@@ -266,13 +266,14 @@ static void xa_node_free(struct xa_node *node)
 */
static void xas_destroy(struct xa_state *xas)
{
	struct xa_node *node = xas->xa_alloc;
	struct xa_node *next, *node = xas->xa_alloc;

	if (!node)
		return;
	while (node) {
		XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
	kmem_cache_free(radix_tree_node_cachep, node);
	xas->xa_alloc = NULL;
		next = rcu_dereference_raw(node->parent);
		radix_tree_node_rcu_free(&node->rcu_head);
		xas->xa_alloc = node = next;
	}
}

/**
@@ -304,6 +305,7 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp)
	xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
	if (!xas->xa_alloc)
		return false;
	xas->xa_alloc->parent = NULL;
	XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
	xas->xa_node = XAS_RESTART;
	return true;
@@ -339,6 +341,7 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
	}
	if (!xas->xa_alloc)
		return false;
	xas->xa_alloc->parent = NULL;
	XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
	xas->xa_node = XAS_RESTART;
	return true;
@@ -403,7 +406,7 @@ static unsigned long xas_size(const struct xa_state *xas)
/*
 * Use this to calculate the maximum index that will need to be created
 * in order to add the entry described by @xas.  Because we cannot store a
 * multiple-index entry at index 0, the calculation is a little more complex
 * multi-index entry at index 0, the calculation is a little more complex
 * than you might expect.
 */
static unsigned long xas_max(struct xa_state *xas)
@@ -946,6 +949,153 @@ void xas_init_marks(const struct xa_state *xas)
}
EXPORT_SYMBOL_GPL(xas_init_marks);

#ifdef CONFIG_XARRAY_MULTI
static unsigned int node_get_marks(struct xa_node *node, unsigned int offset)
{
	unsigned int marks = 0;
	xa_mark_t mark = XA_MARK_0;

	for (;;) {
		if (node_get_mark(node, offset, mark))
			marks |= 1 << (__force unsigned int)mark;
		if (mark == XA_MARK_MAX)
			break;
		mark_inc(mark);
	}

	return marks;
}

static void node_set_marks(struct xa_node *node, unsigned int offset,
			struct xa_node *child, unsigned int marks)
{
	xa_mark_t mark = XA_MARK_0;

	for (;;) {
		if (marks & (1 << (__force unsigned int)mark)) {
			node_set_mark(node, offset, mark);
			if (child)
				node_mark_all(child, mark);
		}
		if (mark == XA_MARK_MAX)
			break;
		mark_inc(mark);
	}
}

/**
 * xas_split_alloc() - Allocate memory for splitting an entry.
 * @xas: XArray operation state.
 * @entry: New entry which will be stored in the array.
 * @order: New entry order.
 * @gfp: Memory allocation flags.
 *
 * This function should be called before calling xas_split().
 * If necessary, it will allocate new nodes (and fill them with @entry)
 * to prepare for the upcoming split of an entry of @order size into
 * entries of the order stored in the @xas.
 *
 * Context: May sleep if @gfp flags permit.
 */
void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
		gfp_t gfp)
{
	unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
	unsigned int mask = xas->xa_sibs;

	/* XXX: no support for splitting really large entries yet */
	if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT < order))
		goto nomem;
	if (xas->xa_shift + XA_CHUNK_SHIFT > order)
		return;

	do {
		unsigned int i;
		void *sibling;
		struct xa_node *node;

		node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
		if (!node)
			goto nomem;
		node->array = xas->xa;
		for (i = 0; i < XA_CHUNK_SIZE; i++) {
			if ((i & mask) == 0) {
				RCU_INIT_POINTER(node->slots[i], entry);
				sibling = xa_mk_sibling(0);
			} else {
				RCU_INIT_POINTER(node->slots[i], sibling);
			}
		}
		RCU_INIT_POINTER(node->parent, xas->xa_alloc);
		xas->xa_alloc = node;
	} while (sibs-- > 0);

	return;
nomem:
	xas_destroy(xas);
	xas_set_err(xas, -ENOMEM);
}
EXPORT_SYMBOL_GPL(xas_split_alloc);

/**
 * xas_split() - Split a multi-index entry into smaller entries.
 * @xas: XArray operation state.
 * @entry: New entry to store in the array.
 * @order: New entry order.
 *
 * The value in the entry is copied to all the replacement entries.
 *
 * Context: Any context.  The caller should hold the xa_lock.
 */
void xas_split(struct xa_state *xas, void *entry, unsigned int order)
{
	unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
	unsigned int offset, marks;
	struct xa_node *node;
	void *curr = xas_load(xas);
	int values = 0;

	node = xas->xa_node;
	if (xas_top(node))
		return;

	marks = node_get_marks(node, xas->xa_offset);

	offset = xas->xa_offset + sibs;
	do {
		if (xas->xa_shift < node->shift) {
			struct xa_node *child = xas->xa_alloc;

			xas->xa_alloc = rcu_dereference_raw(child->parent);
			child->shift = node->shift - XA_CHUNK_SHIFT;
			child->offset = offset;
			child->count = XA_CHUNK_SIZE;
			child->nr_values = xa_is_value(entry) ?
					XA_CHUNK_SIZE : 0;
			RCU_INIT_POINTER(child->parent, node);
			node_set_marks(node, offset, child, marks);
			rcu_assign_pointer(node->slots[offset],
					xa_mk_node(child));
			if (xa_is_value(curr))
				values--;
		} else {
			unsigned int canon = offset - xas->xa_sibs;

			node_set_marks(node, canon, NULL, marks);
			rcu_assign_pointer(node->slots[canon], entry);
			while (offset > canon)
				rcu_assign_pointer(node->slots[offset--],
						xa_mk_sibling(canon));
			values += (xa_is_value(entry) - xa_is_value(curr)) *
					(xas->xa_sibs + 1);
		}
	} while (offset-- > xas->xa_offset);

	node->nr_values += values;
}
EXPORT_SYMBOL_GPL(xas_split);
#endif

/**
 * xas_pause() - Pause a walk to drop a lock.
 * @xas: XArray operation state.
@@ -1407,7 +1557,7 @@ EXPORT_SYMBOL(__xa_store);
 * @gfp: Memory allocation flags.
 *
 * After this function returns, loads from this index will return @entry.
 * Storing into an existing multislot entry updates the entry of every index.
 * Storing into an existing multi-index entry updates the entry of every index.
 * The marks associated with @index are unaffected unless @entry is %NULL.
 *
 * Context: Any context.  Takes and releases the xa_lock.
@@ -1549,7 +1699,7 @@ static void xas_set_range(struct xa_state *xas, unsigned long first,
 *
 * After this function returns, loads from any index between @first and @last,
 * inclusive will return @entry.
 * Storing into an existing multislot entry updates the entry of every index.
 * Storing into an existing multi-index entry updates the entry of every index.
 * The marks associated with @index are unaffected unless @entry is %NULL.
 *
 * Context: Process context.  Takes and releases the xa_lock.  May sleep