Unverified Commit 11142287 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!5492 Backport Introduce __mt_dup() to improve the performance of fork()

Merge Pull Request from: @ci-robot 
 
PR sync from: Peng Zhang <zhangpeng362@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/ADNKPGZHNGUFVE5XKC72TDIZCQQDGYMF/ 
From: ZhangPeng <zhangpeng362@huawei.com>

This series introduces __mt_dup() to improve the performance of fork().
Also backport a bugfix to avoid conflicts. 

Liam R. Howlett (1):
  radix tree test suite: fix allocation calculation in
    kmem_cache_alloc_bulk()

Peng Zhang (10):
  maple_tree: add mt_free_one() and mt_attr() helpers
  maple_tree: introduce {mtree,mas}_lock_nested()
  maple_tree: introduce interfaces __mt_dup() and mtree_dup()
  radix tree test suite: align kmem_cache_alloc_bulk() with kernel
    behavior.
  maple_tree: add test for mtree_dup()
  maple_tree: update the documentation of maple tree
  maple_tree: skip other tests when BENCH is enabled
  maple_tree: update check_forking() and bench_forking()
  maple_tree: preserve the tree attributes when destroying maple tree
  fork: use __mt_dup() to duplicate maple tree in dup_mmap()


-- 
2.25.1
 
https://gitee.com/openeuler/kernel/issues/I9AYM3 
 
Link:https://gitee.com/openeuler/kernel/pulls/5492

 

Reviewed-by: default avatarZucheng Zheng <zhengzucheng@huawei.com>
Reviewed-by: default avatarXu Kuohai <xukuohai@huawei.com>
Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
Acked-by: default avatarXie XiuQi <xiexiuqi@huawei.com>
parents 1bf66e08 d37e5614
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -81,6 +81,9 @@ section.
Sometimes it is necessary to ensure the next call to store to a maple tree does
not allocate memory, please see :ref:`maple-tree-advanced-api` for this use case.

You can use mtree_dup() to duplicate an entire maple tree. It is a more
efficient way than inserting all elements one by one into a new tree.

Finally, you can remove all entries from a maple tree by calling
mtree_destroy().  If the maple tree entries are pointers, you may wish to free
the entries first.
@@ -112,6 +115,7 @@ Takes ma_lock internally:
 * mtree_insert()
 * mtree_insert_range()
 * mtree_erase()
 * mtree_dup()
 * mtree_destroy()
 * mt_set_in_rcu()
 * mt_clear_in_rcu()
+7 −0
Original line number Diff line number Diff line
@@ -256,6 +256,8 @@ struct maple_tree {
	struct maple_tree name = MTREE_INIT(name, 0)

#define mtree_lock(mt)		spin_lock((&(mt)->ma_lock))
#define mtree_lock_nested(mas, subclass) \
		spin_lock_nested((&(mt)->ma_lock), subclass)
#define mtree_unlock(mt)	spin_unlock((&(mt)->ma_lock))

/*
@@ -327,6 +329,9 @@ int mtree_store(struct maple_tree *mt, unsigned long index,
		void *entry, gfp_t gfp);
void *mtree_erase(struct maple_tree *mt, unsigned long index);

int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp);
int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp);

void mtree_destroy(struct maple_tree *mt);
void __mt_destroy(struct maple_tree *mt);

@@ -406,6 +411,8 @@ struct ma_wr_state {
};

#define mas_lock(mas)           spin_lock(&((mas)->tree->ma_lock))
#define mas_lock_nested(mas, subclass) \
		spin_lock_nested(&((mas)->tree->ma_lock), subclass)
#define mas_unlock(mas)         spin_unlock(&((mas)->tree->ma_lock))


+11 −0
Original line number Diff line number Diff line
@@ -1010,6 +1010,17 @@ static inline int vma_iter_bulk_alloc(struct vma_iterator *vmi,
	return mas_expected_entries(&vmi->mas, count);
}

static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
			unsigned long start, unsigned long end, gfp_t gfp)
{
	__mas_set_range(&vmi->mas, start, end - 1);
	mas_store_gfp(&vmi->mas, NULL, gfp);
	if (unlikely(mas_is_err(&vmi->mas)))
		return -ENOMEM;

	return 0;
}

/* Free any unused preallocations */
static inline void vma_iter_free(struct vma_iterator *vmi)
{
+29 −11
Original line number Diff line number Diff line
@@ -663,7 +663,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
	int retval;
	unsigned long charge = 0;
	LIST_HEAD(uf);
	VMA_ITERATOR(old_vmi, oldmm, 0);
	VMA_ITERATOR(vmi, mm, 0);

	uprobe_start_dup_mmap();
@@ -691,16 +690,22 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
		goto out;
	khugepaged_fork(mm, oldmm);

	retval = vma_iter_bulk_alloc(&vmi, oldmm->map_count);
	if (retval)
	/* Use __mt_dup() to efficiently build an identical maple tree. */
	retval = __mt_dup(&oldmm->mm_mt, &mm->mm_mt, GFP_KERNEL);
	if (unlikely(retval))
		goto out;

	mt_clear_in_rcu(vmi.mas.tree);
	for_each_vma(old_vmi, mpnt) {
	for_each_vma(vmi, mpnt) {
		struct file *file;

		vma_start_write(mpnt);
		if (mpnt->vm_flags & VM_DONTCOPY) {
			retval = vma_iter_clear_gfp(&vmi, mpnt->vm_start,
						    mpnt->vm_end, GFP_KERNEL);
			if (retval)
				goto loop_out;

			vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt));
			continue;
		}
@@ -762,9 +767,11 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
		if (is_vm_hugetlb_page(tmp))
			hugetlb_dup_vma_private(tmp);

		/* Link the vma into the MT */
		if (vma_iter_bulk_store(&vmi, tmp))
			goto fail_nomem_vmi_store;
		/*
		 * Link the vma into the MT. After using __mt_dup(), memory
		 * allocation is not necessary here, so it cannot fail.
		 */
		vma_iter_bulk_store(&vmi, tmp);

		mm->map_count++;
		if (!(tmp->vm_flags & VM_WIPEONFORK))
@@ -773,15 +780,28 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
		if (tmp->vm_ops && tmp->vm_ops->open)
			tmp->vm_ops->open(tmp);

		if (retval)
		if (retval) {
			mpnt = vma_next(&vmi);
			goto loop_out;
		}
	}
	/* a new mm has just been created */
	retval = arch_dup_mmap(oldmm, mm);
loop_out:
	vma_iter_free(&vmi);
	if (!retval)
	if (!retval) {
		mt_set_in_rcu(vmi.mas.tree);
	} else if (mpnt) {
		/*
		 * The entire maple tree has already been duplicated. If the
		 * mmap duplication fails, mark the failure point with
		 * XA_ZERO_ENTRY. In exit_mmap(), if this marker is encountered,
		 * stop releasing VMAs that have not been duplicated after this
		 * point.
		 */
		mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1);
		mas_store(&vmi.mas, XA_ZERO_ENTRY);
	}
out:
	mmap_write_unlock(mm);
	flush_tlb_mm(oldmm);
@@ -791,8 +811,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
	uprobe_end_dup_mmap();
	return retval;

fail_nomem_vmi_store:
	unlink_anon_vmas(tmp);
fail_nomem_anon_vma_fork:
	mpol_put(vma_policy(tmp));
fail_nomem_policy:
+286 −2
Original line number Diff line number Diff line
@@ -4,6 +4,8 @@
 * Copyright (c) 2018-2022 Oracle Corporation
 * Authors: Liam R. Howlett <Liam.Howlett@oracle.com>
 *	    Matthew Wilcox <willy@infradead.org>
 * Copyright (c) 2023 ByteDance
 * Author: Peng Zhang <zhangpeng.00@bytedance.com>
 */

/*
@@ -165,6 +167,11 @@ static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
	return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
}

static inline void mt_free_one(struct maple_node *node)
{
	kmem_cache_free(maple_node_cache, node);
}

static inline void mt_free_bulk(size_t size, void __rcu **nodes)
{
	kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
@@ -205,6 +212,11 @@ static unsigned int mas_mt_height(struct ma_state *mas)
	return mt_height(mas->tree);
}

static inline unsigned int mt_attr(struct maple_tree *mt)
{
	return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK;
}

static inline enum maple_type mte_node_type(const struct maple_enode *entry)
{
	return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) &
@@ -5584,7 +5596,7 @@ void mas_destroy(struct ma_state *mas)
			mt_free_bulk(count, (void __rcu **)&node->slot[1]);
			total -= count;
		}
		kmem_cache_free(maple_node_cache, node);
		mt_free_one(ma_mnode_ptr(node));
		total--;
	}

@@ -6476,6 +6488,278 @@ void *mtree_erase(struct maple_tree *mt, unsigned long index)
}
EXPORT_SYMBOL(mtree_erase);

/*
 * mas_dup_free() - Free an incomplete duplication of a tree.
 * @mas: The maple state of a incomplete tree.
 *
 * The parameter @mas->node passed in indicates that the allocation failed on
 * this node. This function frees all nodes starting from @mas->node in the
 * reverse order of mas_dup_build(). There is no need to hold the source tree
 * lock at this time.
 */
static void mas_dup_free(struct ma_state *mas)
{
	struct maple_node *node;
	enum maple_type type;
	void __rcu **slots;
	unsigned char count, i;

	/* Maybe the first node allocation failed. */
	if (mas_is_none(mas))
		return;

	while (!mte_is_root(mas->node)) {
		mas_ascend(mas);
		if (mas->offset) {
			mas->offset--;
			do {
				mas_descend(mas);
				mas->offset = mas_data_end(mas);
			} while (!mte_is_leaf(mas->node));

			mas_ascend(mas);
		}

		node = mte_to_node(mas->node);
		type = mte_node_type(mas->node);
		slots = ma_slots(node, type);
		count = mas_data_end(mas) + 1;
		for (i = 0; i < count; i++)
			((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK;
		mt_free_bulk(count, slots);
	}

	node = mte_to_node(mas->node);
	mt_free_one(node);
}

/*
 * mas_copy_node() - Copy a maple node and replace the parent.
 * @mas: The maple state of source tree.
 * @new_mas: The maple state of new tree.
 * @parent: The parent of the new node.
 *
 * Copy @mas->node to @new_mas->node, set @parent to be the parent of
 * @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM.
 */
static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas,
		struct maple_pnode *parent)
{
	struct maple_node *node = mte_to_node(mas->node);
	struct maple_node *new_node = mte_to_node(new_mas->node);
	unsigned long val;

	/* Copy the node completely. */
	memcpy(new_node, node, sizeof(struct maple_node));
	/* Update the parent node pointer. */
	val = (unsigned long)node->parent & MAPLE_NODE_MASK;
	new_node->parent = ma_parent_ptr(val | (unsigned long)parent);
}

/*
 * mas_dup_alloc() - Allocate child nodes for a maple node.
 * @mas: The maple state of source tree.
 * @new_mas: The maple state of new tree.
 * @gfp: The GFP_FLAGS to use for allocations.
 *
 * This function allocates child nodes for @new_mas->node during the duplication
 * process. If memory allocation fails, @mas is set to -ENOMEM.
 */
static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas,
		gfp_t gfp)
{
	struct maple_node *node = mte_to_node(mas->node);
	struct maple_node *new_node = mte_to_node(new_mas->node);
	enum maple_type type;
	unsigned char request, count, i;
	void __rcu **slots;
	void __rcu **new_slots;
	unsigned long val;

	/* Allocate memory for child nodes. */
	type = mte_node_type(mas->node);
	new_slots = ma_slots(new_node, type);
	request = mas_data_end(mas) + 1;
	count = mt_alloc_bulk(gfp, request, (void **)new_slots);
	if (unlikely(count < request)) {
		memset(new_slots, 0, request * sizeof(void *));
		mas_set_err(mas, -ENOMEM);
		return;
	}

	/* Restore node type information in slots. */
	slots = ma_slots(node, type);
	for (i = 0; i < count; i++) {
		val = (unsigned long)mt_slot_locked(mas->tree, slots, i);
		val &= MAPLE_NODE_MASK;
		((unsigned long *)new_slots)[i] |= val;
	}
}

/*
 * mas_dup_build() - Build a new maple tree from a source tree
 * @mas: The maple state of source tree, need to be in MAS_START state.
 * @new_mas: The maple state of new tree, need to be in MAS_START state.
 * @gfp: The GFP_FLAGS to use for allocations.
 *
 * This function builds a new tree in DFS preorder. If the memory allocation
 * fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the
 * last node. mas_dup_free() will free the incomplete duplication of a tree.
 *
 * Note that the attributes of the two trees need to be exactly the same, and the
 * new tree needs to be empty, otherwise -EINVAL will be set in @mas.
 */
static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas,
		gfp_t gfp)
{
	struct maple_node *node;
	struct maple_pnode *parent = NULL;
	struct maple_enode *root;
	enum maple_type type;

	if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) ||
	    unlikely(!mtree_empty(new_mas->tree))) {
		mas_set_err(mas, -EINVAL);
		return;
	}

	root = mas_start(mas);
	if (mas_is_ptr(mas) || mas_is_none(mas))
		goto set_new_tree;

	node = mt_alloc_one(gfp);
	if (!node) {
		new_mas->node = MAS_NONE;
		mas_set_err(mas, -ENOMEM);
		return;
	}

	type = mte_node_type(mas->node);
	root = mt_mk_node(node, type);
	new_mas->node = root;
	new_mas->min = 0;
	new_mas->max = ULONG_MAX;
	root = mte_mk_root(root);
	while (1) {
		mas_copy_node(mas, new_mas, parent);
		if (!mte_is_leaf(mas->node)) {
			/* Only allocate child nodes for non-leaf nodes. */
			mas_dup_alloc(mas, new_mas, gfp);
			if (unlikely(mas_is_err(mas)))
				return;
		} else {
			/*
			 * This is the last leaf node and duplication is
			 * completed.
			 */
			if (mas->max == ULONG_MAX)
				goto done;

			/* This is not the last leaf node and needs to go up. */
			do {
				mas_ascend(mas);
				mas_ascend(new_mas);
			} while (mas->offset == mas_data_end(mas));

			/* Move to the next subtree. */
			mas->offset++;
			new_mas->offset++;
		}

		mas_descend(mas);
		parent = ma_parent_ptr(mte_to_node(new_mas->node));
		mas_descend(new_mas);
		mas->offset = 0;
		new_mas->offset = 0;
	}
done:
	/* Specially handle the parent of the root node. */
	mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas));
set_new_tree:
	/* Make them the same height */
	new_mas->tree->ma_flags = mas->tree->ma_flags;
	rcu_assign_pointer(new_mas->tree->ma_root, root);
}

/**
 * __mt_dup(): Duplicate an entire maple tree
 * @mt: The source maple tree
 * @new: The new maple tree
 * @gfp: The GFP_FLAGS to use for allocations
 *
 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order
 * traversal. It uses memcpy() to copy nodes in the source tree and allocate
 * new child nodes in non-leaf nodes. The new node is exactly the same as the
 * source node except for all the addresses stored in it. It will be faster than
 * traversing all elements in the source tree and inserting them one by one into
 * the new tree.
 * The user needs to ensure that the attributes of the source tree and the new
 * tree are the same, and the new tree needs to be an empty tree, otherwise
 * -EINVAL will be returned.
 * Note that the user needs to manually lock the source tree and the new tree.
 *
 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If
 * the attributes of the two trees are different or the new tree is not an empty
 * tree.
 */
int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
{
	int ret = 0;
	MA_STATE(mas, mt, 0, 0);
	MA_STATE(new_mas, new, 0, 0);

	mas_dup_build(&mas, &new_mas, gfp);
	if (unlikely(mas_is_err(&mas))) {
		ret = xa_err(mas.node);
		if (ret == -ENOMEM)
			mas_dup_free(&new_mas);
	}

	return ret;
}
EXPORT_SYMBOL(__mt_dup);

/**
 * mtree_dup(): Duplicate an entire maple tree
 * @mt: The source maple tree
 * @new: The new maple tree
 * @gfp: The GFP_FLAGS to use for allocations
 *
 * This function duplicates a maple tree in Depth-First Search (DFS) pre-order
 * traversal. It uses memcpy() to copy nodes in the source tree and allocate
 * new child nodes in non-leaf nodes. The new node is exactly the same as the
 * source node except for all the addresses stored in it. It will be faster than
 * traversing all elements in the source tree and inserting them one by one into
 * the new tree.
 * The user needs to ensure that the attributes of the source tree and the new
 * tree are the same, and the new tree needs to be an empty tree, otherwise
 * -EINVAL will be returned.
 *
 * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If
 * the attributes of the two trees are different or the new tree is not an empty
 * tree.
 */
int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
{
	int ret = 0;
	MA_STATE(mas, mt, 0, 0);
	MA_STATE(new_mas, new, 0, 0);

	mas_lock(&new_mas);
	mas_lock_nested(&mas, SINGLE_DEPTH_NESTING);
	mas_dup_build(&mas, &new_mas, gfp);
	mas_unlock(&mas);
	if (unlikely(mas_is_err(&mas))) {
		ret = xa_err(mas.node);
		if (ret == -ENOMEM)
			mas_dup_free(&new_mas);
	}

	mas_unlock(&new_mas);
	return ret;
}
EXPORT_SYMBOL(mtree_dup);

/**
 * __mt_destroy() - Walk and free all nodes of a locked maple tree.
 * @mt: The maple tree
@@ -6490,7 +6774,7 @@ void __mt_destroy(struct maple_tree *mt)
	if (xa_is_node(root))
		mte_destroy_walk(root, mt);

	mt->ma_flags = 0;
	mt->ma_flags = mt_attr(mt);
}
EXPORT_SYMBOL_GPL(__mt_destroy);

Loading