Commit 3225717f authored by Bob Pearson's avatar Bob Pearson Committed by Jason Gunthorpe
Browse files

RDMA/rxe: Replace red-black trees by xarrays

Currently the rxe driver uses red-black trees to add indices to the rxe
object pools. Linux xarrays provide a better way to implement the same
functionality for indices. This patch replaces red-black trees by xarrays
for pool objects. Since xarrays already have a spinlock use that in place
of the pool rwlock. Make sure that all changes in the xarray(index) and
kref(ref counnt) occur atomically.

Link: https://lore.kernel.org/r/20220304000808.225811-9-rpearsonhpe@gmail.com


Signed-off-by: default avatarBob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent df34dc9e
Loading
Loading
Loading
Loading
+12 −68
Original line number Diff line number Diff line
@@ -114,75 +114,26 @@ static void rxe_init_ports(struct rxe_dev *rxe)
}

/* init pools of managed objects */
static int rxe_init_pools(struct rxe_dev *rxe)
static void rxe_init_pools(struct rxe_dev *rxe)
{
	int err;

	err = rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC);
	if (err)
		goto err1;

	err = rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD);
	if (err)
		goto err2;

	err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH);
	if (err)
		goto err3;

	err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ);
	if (err)
		goto err4;

	err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP);
	if (err)
		goto err5;

	err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ);
	if (err)
		goto err6;

	err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR);
	if (err)
		goto err7;

	err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW);
	if (err)
		goto err8;

	return 0;

err8:
	rxe_pool_cleanup(&rxe->mr_pool);
err7:
	rxe_pool_cleanup(&rxe->cq_pool);
err6:
	rxe_pool_cleanup(&rxe->qp_pool);
err5:
	rxe_pool_cleanup(&rxe->srq_pool);
err4:
	rxe_pool_cleanup(&rxe->ah_pool);
err3:
	rxe_pool_cleanup(&rxe->pd_pool);
err2:
	rxe_pool_cleanup(&rxe->uc_pool);
err1:
	return err;
	rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC);
	rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD);
	rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH);
	rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ);
	rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP);
	rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ);
	rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR);
	rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW);
}

/* initialize rxe device state */
static int rxe_init(struct rxe_dev *rxe)
static void rxe_init(struct rxe_dev *rxe)
{
	int err;

	/* init default device parameters */
	rxe_init_device_param(rxe);

	rxe_init_ports(rxe);

	err = rxe_init_pools(rxe);
	if (err)
		return err;
	rxe_init_pools(rxe);

	/* init pending mmap list */
	spin_lock_init(&rxe->mmap_offset_lock);
@@ -194,8 +145,6 @@ static int rxe_init(struct rxe_dev *rxe)
	rxe->mcg_tree = RB_ROOT;

	mutex_init(&rxe->usdev_lock);

	return 0;
}

void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
@@ -217,12 +166,7 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
 */
int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name)
{
	int err;

	err = rxe_init(rxe);
	if (err)
		return err;

	rxe_init(rxe);
	rxe_set_mtu(rxe, mtu);

	return rxe_register_device(rxe, ibdev_name);
+0 −1
Original line number Diff line number Diff line
@@ -691,7 +691,6 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)

	mr->state = RXE_MR_STATE_INVALID;
	rxe_drop_ref(mr_pd(mr));
	rxe_drop_index(mr);
	rxe_drop_ref(mr);

	return 0;
+0 −8
Original line number Diff line number Diff line
@@ -20,7 +20,6 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
		return ret;
	}

	rxe_add_index(mw);
	mw->rkey = ibmw->rkey = (mw->elem.index << 8) | rxe_get_next_key(-1);
	mw->state = (mw->ibmw.type == IB_MW_TYPE_2) ?
			RXE_MW_STATE_FREE : RXE_MW_STATE_VALID;
@@ -329,10 +328,3 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey)

	return mw;
}

void rxe_mw_cleanup(struct rxe_pool_elem *elem)
{
	struct rxe_mw *mw = container_of(elem, typeof(*mw), elem);

	rxe_drop_index(mw);
}
+62 −159
Original line number Diff line number Diff line
@@ -22,19 +22,22 @@ static const struct rxe_type_info {
		.name		= "uc",
		.size		= sizeof(struct rxe_ucontext),
		.elem_offset	= offsetof(struct rxe_ucontext, elem),
		.min_index	= 1,
		.max_index	= UINT_MAX,
		.max_elem	= UINT_MAX,
	},
	[RXE_TYPE_PD] = {
		.name		= "pd",
		.size		= sizeof(struct rxe_pd),
		.elem_offset	= offsetof(struct rxe_pd, elem),
		.min_index	= 1,
		.max_index	= UINT_MAX,
		.max_elem	= UINT_MAX,
	},
	[RXE_TYPE_AH] = {
		.name		= "ah",
		.size		= sizeof(struct rxe_ah),
		.elem_offset	= offsetof(struct rxe_ah, elem),
		.flags		= RXE_POOL_INDEX,
		.min_index	= RXE_MIN_AH_INDEX,
		.max_index	= RXE_MAX_AH_INDEX,
		.max_elem	= RXE_MAX_AH_INDEX - RXE_MIN_AH_INDEX + 1,
@@ -43,7 +46,6 @@ static const struct rxe_type_info {
		.name		= "srq",
		.size		= sizeof(struct rxe_srq),
		.elem_offset	= offsetof(struct rxe_srq, elem),
		.flags		= RXE_POOL_INDEX,
		.min_index	= RXE_MIN_SRQ_INDEX,
		.max_index	= RXE_MAX_SRQ_INDEX,
		.max_elem	= RXE_MAX_SRQ_INDEX - RXE_MIN_SRQ_INDEX + 1,
@@ -53,7 +55,6 @@ static const struct rxe_type_info {
		.size		= sizeof(struct rxe_qp),
		.elem_offset	= offsetof(struct rxe_qp, elem),
		.cleanup	= rxe_qp_cleanup,
		.flags		= RXE_POOL_INDEX,
		.min_index	= RXE_MIN_QP_INDEX,
		.max_index	= RXE_MAX_QP_INDEX,
		.max_elem	= RXE_MAX_QP_INDEX - RXE_MIN_QP_INDEX + 1,
@@ -63,6 +64,8 @@ static const struct rxe_type_info {
		.size		= sizeof(struct rxe_cq),
		.elem_offset	= offsetof(struct rxe_cq, elem),
		.cleanup	= rxe_cq_cleanup,
		.min_index	= 1,
		.max_index	= UINT_MAX,
		.max_elem	= UINT_MAX,
	},
	[RXE_TYPE_MR] = {
@@ -70,7 +73,7 @@ static const struct rxe_type_info {
		.size		= sizeof(struct rxe_mr),
		.elem_offset	= offsetof(struct rxe_mr, elem),
		.cleanup	= rxe_mr_cleanup,
		.flags		= RXE_POOL_INDEX | RXE_POOL_ALLOC,
		.flags		= RXE_POOL_ALLOC,
		.min_index	= RXE_MIN_MR_INDEX,
		.max_index	= RXE_MAX_MR_INDEX,
		.max_elem	= RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX + 1,
@@ -79,44 +82,16 @@ static const struct rxe_type_info {
		.name		= "mw",
		.size		= sizeof(struct rxe_mw),
		.elem_offset	= offsetof(struct rxe_mw, elem),
		.cleanup	= rxe_mw_cleanup,
		.flags		= RXE_POOL_INDEX,
		.min_index	= RXE_MIN_MW_INDEX,
		.max_index	= RXE_MAX_MW_INDEX,
		.max_elem	= RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX + 1,
	},
};

static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
{
	int err = 0;

	if ((max - min + 1) < pool->max_elem) {
		pr_warn("not enough indices for max_elem\n");
		err = -EINVAL;
		goto out;
	}

	pool->index.max_index = max;
	pool->index.min_index = min;

	pool->index.table = bitmap_zalloc(max - min + 1, GFP_KERNEL);
	if (!pool->index.table) {
		err = -ENOMEM;
		goto out;
	}

out:
	return err;
}

int rxe_pool_init(
	struct rxe_dev		*rxe,
	struct rxe_pool		*pool,
void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
		   enum rxe_elem_type type)
{
	const struct rxe_type_info *info = &rxe_type_info[type];
	int			err = 0;

	memset(pool, 0, sizeof(*pool));

@@ -131,111 +106,31 @@ int rxe_pool_init(

	atomic_set(&pool->num_elem, 0);

	rwlock_init(&pool->pool_lock);

	if (pool->flags & RXE_POOL_INDEX) {
		pool->index.tree = RB_ROOT;
		err = rxe_pool_init_index(pool, info->max_index,
					  info->min_index);
		if (err)
			goto out;
	}

out:
	return err;
	xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
	pool->limit.min = info->min_index;
	pool->limit.max = info->max_index;
}

void rxe_pool_cleanup(struct rxe_pool *pool)
{
	if (atomic_read(&pool->num_elem) > 0)
		pr_warn("%s pool destroyed with unfree'd elem\n",
			pool->name);

	if (pool->flags & RXE_POOL_INDEX)
		bitmap_free(pool->index.table);
}

static u32 alloc_index(struct rxe_pool *pool)
{
	u32 index;
	u32 range = pool->index.max_index - pool->index.min_index + 1;

	index = find_next_zero_bit(pool->index.table, range, pool->index.last);
	if (index >= range)
		index = find_first_zero_bit(pool->index.table, range);

	WARN_ON_ONCE(index >= range);
	set_bit(index, pool->index.table);
	pool->index.last = index;
	return index + pool->index.min_index;
}

static int rxe_insert_index(struct rxe_pool *pool, struct rxe_pool_elem *new)
{
	struct rb_node **link = &pool->index.tree.rb_node;
	struct rb_node *parent = NULL;
	struct rxe_pool_elem *elem;

	while (*link) {
		parent = *link;
		elem = rb_entry(parent, struct rxe_pool_elem, index_node);

		if (elem->index == new->index) {
			pr_warn("element already exists!\n");
			return -EINVAL;
		}

		if (elem->index > new->index)
			link = &(*link)->rb_left;
		else
			link = &(*link)->rb_right;
	}

	rb_link_node(&new->index_node, parent, link);
	rb_insert_color(&new->index_node, &pool->index.tree);

	return 0;
}

int __rxe_add_index(struct rxe_pool_elem *elem)
{
	struct rxe_pool *pool = elem->pool;
	unsigned long flags;
	int err;

	write_lock_irqsave(&pool->pool_lock, flags);
	elem->index = alloc_index(pool);
	err = rxe_insert_index(pool, elem);
	write_unlock_irqrestore(&pool->pool_lock, flags);

	return err;
}

void __rxe_drop_index(struct rxe_pool_elem *elem)
{
	struct rxe_pool *pool = elem->pool;
	unsigned long flags;

	write_lock_irqsave(&pool->pool_lock, flags);
	clear_bit(elem->index - pool->index.min_index, pool->index.table);
	rb_erase(&elem->index_node, &pool->index.tree);
	write_unlock_irqrestore(&pool->pool_lock, flags);
	WARN_ON(!xa_empty(&pool->xa));
}

void *rxe_alloc(struct rxe_pool *pool)
{
	struct rxe_pool_elem *elem;
	void *obj;
	int err;

	if (WARN_ON(!(pool->flags & RXE_POOL_ALLOC)))
		return NULL;

	if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
		goto out_cnt;
		goto err_cnt;

	obj = kzalloc(pool->elem_size, GFP_KERNEL);
	if (!obj)
		goto out_cnt;
		goto err_cnt;

	elem = (struct rxe_pool_elem *)((u8 *)obj + pool->elem_offset);

@@ -243,78 +138,86 @@ void *rxe_alloc(struct rxe_pool *pool)
	elem->obj = obj;
	kref_init(&elem->ref_cnt);

	err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
			      &pool->next, GFP_KERNEL);
	if (err)
		goto err_free;

	return obj;

out_cnt:
err_free:
	kfree(obj);
err_cnt:
	atomic_dec(&pool->num_elem);
	return NULL;
}

int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem)
{
	int err;

	if (WARN_ON(pool->flags & RXE_POOL_ALLOC))
		return -EINVAL;

	if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
		goto out_cnt;
		goto err_cnt;

	elem->pool = pool;
	elem->obj = (u8 *)elem - pool->elem_offset;
	kref_init(&elem->ref_cnt);

	err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit,
			      &pool->next, GFP_KERNEL);
	if (err)
		goto err_cnt;

	return 0;

out_cnt:
err_cnt:
	atomic_dec(&pool->num_elem);
	return -EINVAL;
}

void rxe_elem_release(struct kref *kref)
void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
{
	struct rxe_pool_elem *elem =
		container_of(kref, struct rxe_pool_elem, ref_cnt);
	struct rxe_pool *pool = elem->pool;
	struct rxe_pool_elem *elem;
	struct xarray *xa = &pool->xa;
	unsigned long flags;
	void *obj;

	if (pool->cleanup)
		pool->cleanup(elem);

	if (pool->flags & RXE_POOL_ALLOC) {
	xa_lock_irqsave(xa, flags);
	elem = xa_load(xa, index);
	if (elem && kref_get_unless_zero(&elem->ref_cnt))
		obj = elem->obj;
		kfree(obj);
	}
	else
		obj = NULL;
	xa_unlock_irqrestore(xa, flags);

	atomic_dec(&pool->num_elem);
	return obj;
}

void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
static void rxe_elem_release(struct kref *kref)
{
	struct rxe_pool_elem *elem;
	struct rb_node *node;
	unsigned long flags;
	void *obj;
	struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt);
	struct rxe_pool *pool = elem->pool;

	read_lock_irqsave(&pool->pool_lock, flags);
	node = pool->index.tree.rb_node;
	xa_erase(&pool->xa, elem->index);

	while (node) {
		elem = rb_entry(node, struct rxe_pool_elem, index_node);
	if (pool->cleanup)
		pool->cleanup(elem);

		if (elem->index > index)
			node = node->rb_left;
		else if (elem->index < index)
			node = node->rb_right;
		else
			break;
	if (pool->flags & RXE_POOL_ALLOC)
		kfree(elem->obj);

	atomic_dec(&pool->num_elem);
}

	if (node) {
		kref_get(&elem->ref_cnt);
		obj = elem->obj;
	} else {
		obj = NULL;
int __rxe_get(struct rxe_pool_elem *elem)
{
	return kref_get_unless_zero(&elem->ref_cnt);
}
	read_unlock_irqrestore(&pool->pool_lock, flags);

	return obj;
int __rxe_put(struct rxe_pool_elem *elem)
{
	return kref_put(&elem->ref_cnt, rxe_elem_release);
}
+11 −32
Original line number Diff line number Diff line
@@ -8,8 +8,7 @@
#define RXE_POOL_H

enum rxe_pool_flags {
	RXE_POOL_INDEX		= BIT(1),
	RXE_POOL_ALLOC		= BIT(2),
	RXE_POOL_ALLOC		= BIT(1),
};

enum rxe_elem_type {
@@ -29,16 +28,12 @@ struct rxe_pool_elem {
	void			*obj;
	struct kref		ref_cnt;
	struct list_head	list;

	/* only used if indexed */
	struct rb_node		index_node;
	u32			index;
};

struct rxe_pool {
	struct rxe_dev		*rxe;
	const char		*name;
	rwlock_t		pool_lock; /* protects pool add/del/search */
	void			(*cleanup)(struct rxe_pool_elem *elem);
	enum rxe_pool_flags	flags;
	enum rxe_elem_type	type;
@@ -48,21 +43,16 @@ struct rxe_pool {
	size_t			elem_size;
	size_t			elem_offset;

	/* only used if indexed */
	struct {
		struct rb_root		tree;
		unsigned long		*table;
		u32			last;
		u32			max_index;
		u32			min_index;
	} index;
	struct xarray		xa;
	struct xa_limit		limit;
	u32			next;
};

/* initialize a pool of objects with given limit on
 * number of elements. gets parameters from rxe_type_info
 * pool elements will be allocated out of a slab cache
 */
int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
		  enum rxe_elem_type type);

/* free resources from object pool */
@@ -76,29 +66,18 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem);

#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->elem)

/* assign an index to an indexed object and insert object into
 * pool's rb tree
 */
int __rxe_add_index(struct rxe_pool_elem *elem);

#define rxe_add_index(obj) __rxe_add_index(&(obj)->elem)

/* drop an index and remove object from rb tree */
void __rxe_drop_index(struct rxe_pool_elem *elem);

#define rxe_drop_index(obj) __rxe_drop_index(&(obj)->elem)

/* lookup an indexed object from index. takes a reference on object */
void *rxe_pool_get_index(struct rxe_pool *pool, u32 index);

/* cleanup an object when all references are dropped */
void rxe_elem_release(struct kref *kref);

/* take a reference on an object */
#define rxe_add_ref(obj) kref_get(&(obj)->elem.ref_cnt)
int __rxe_get(struct rxe_pool_elem *elem);

#define rxe_add_ref(obj) __rxe_get(&(obj)->elem)

/* drop a reference on an object */
#define rxe_drop_ref(obj) kref_put(&(obj)->elem.ref_cnt, rxe_elem_release)
int __rxe_put(struct rxe_pool_elem *elem);

#define rxe_drop_ref(obj) __rxe_put(&(obj)->elem)

#define rxe_read_ref(obj) kref_read(&(obj)->elem.ref_cnt)

Loading