Commit 5bcf5a59 authored by Bob Pearson's avatar Bob Pearson Committed by Jason Gunthorpe
Browse files

RDMA/rxe: Protext kernel index from user space

In order to prevent user space from modifying the index that belongs to
the kernel for shared queues let the kernel use a local copy of the index
and copy any new values of that index to the shared rxe_queue_bus struct.

This adds more switch statements which decreases the performance of the
queue API. Move the type into the parameter list for these functions so
that the compiler can optimize out the switch statements when the explicit
type is known. Modify all the calls in the driver on performance paths to
pass in the explicit queue type.

Link: https://lore.kernel.org/r/20210527194748.662636-4-rpearsonhpe@gmail.com
Link: https://lore.kernel.org/linux-rdma/20210526165239.GP1002214@@nvidia.com/


Signed-off-by: default avatarBob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 0a67c46d
Loading
Loading
Loading
Loading
+21 −10
Original line number Diff line number Diff line
@@ -141,7 +141,10 @@ static inline enum comp_state get_wqe(struct rxe_qp *qp,
	/* we come here whether or not we found a response packet to see if
	 * there are any posted WQEs
	 */
	wqe = queue_head(qp->sq.queue);
	if (qp->is_user)
		wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_USER);
	else
		wqe = queue_head(qp->sq.queue, QUEUE_TYPE_KERNEL);
	*wqe_p = wqe;

	/* no WQE or requester has not started it yet */
@@ -414,16 +417,23 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
{
	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
	struct rxe_cqe cqe;
	bool post;

	if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) ||
	/* do we need to post a completion */
	post = ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) ||
			(wqe->wr.send_flags & IB_SEND_SIGNALED) ||
	    wqe->status != IB_WC_SUCCESS) {
			wqe->status != IB_WC_SUCCESS);

	if (post)
		make_send_cqe(qp, wqe, &cqe);
		advance_consumer(qp->sq.queue);

	if (qp->is_user)
		advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_USER);
	else
		advance_consumer(qp->sq.queue, QUEUE_TYPE_KERNEL);

	if (post)
		rxe_cq_post(qp->scq, &cqe, 0);
	} else {
		advance_consumer(qp->sq.queue);
	}

	if (wqe->wr.opcode == IB_WR_SEND ||
	    wqe->wr.opcode == IB_WR_SEND_WITH_IMM ||
@@ -511,6 +521,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
{
	struct sk_buff *skb;
	struct rxe_send_wqe *wqe;
	struct rxe_queue *q = qp->sq.queue;

	while ((skb = skb_dequeue(&qp->resp_pkts))) {
		rxe_drop_ref(qp);
@@ -518,12 +529,12 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
		ib_device_put(qp->ibqp.device);
	}

	while ((wqe = queue_head(qp->sq.queue))) {
	while ((wqe = queue_head(q, q->type))) {
		if (notify) {
			wqe->status = IB_WC_WR_FLUSH_ERR;
			do_complete(qp, wqe);
		} else {
			advance_consumer(qp->sq.queue);
			advance_consumer(q, q->type);
		}
	}
}
+24 −4
Original line number Diff line number Diff line
@@ -25,7 +25,11 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
	}

	if (cq) {
		count = queue_count(cq->queue);
		if (cq->is_user)
			count = queue_count(cq->queue, QUEUE_TYPE_TO_USER);
		else
			count = queue_count(cq->queue, QUEUE_TYPE_KERNEL);

		if (cqe < count) {
			pr_warn("cqe(%d) < current # elements in queue (%d)",
				cqe, count);
@@ -108,10 +112,17 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
{
	struct ib_event ev;
	unsigned long flags;
	int full;
	void *addr;

	spin_lock_irqsave(&cq->cq_lock, flags);

	if (unlikely(queue_full(cq->queue))) {
	if (cq->is_user)
		full = queue_full(cq->queue, QUEUE_TYPE_TO_USER);
	else
		full = queue_full(cq->queue, QUEUE_TYPE_KERNEL);

	if (unlikely(full)) {
		spin_unlock_irqrestore(&cq->cq_lock, flags);
		if (cq->ibcq.event_handler) {
			ev.device = cq->ibcq.device;
@@ -123,9 +134,18 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
		return -EBUSY;
	}

	memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe));
	if (cq->is_user)
		addr = producer_addr(cq->queue, QUEUE_TYPE_TO_USER);
	else
		addr = producer_addr(cq->queue, QUEUE_TYPE_KERNEL);

	memcpy(addr, cqe, sizeof(*cqe));

	if (cq->is_user)
		advance_producer(cq->queue, QUEUE_TYPE_TO_USER);
	else
		advance_producer(cq->queue, QUEUE_TYPE_KERNEL);

	advance_producer(cq->queue);
	spin_unlock_irqrestore(&cq->cq_lock, flags);

	if ((cq->notify == IB_CQ_NEXT_COMP) ||
+9 −1
Original line number Diff line number Diff line
@@ -248,7 +248,13 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
		return err;
	}

	qp->req.wqe_index	= producer_index(qp->sq.queue);
	if (qp->is_user)
		qp->req.wqe_index = producer_index(qp->sq.queue,
						QUEUE_TYPE_FROM_USER);
	else
		qp->req.wqe_index = producer_index(qp->sq.queue,
						QUEUE_TYPE_KERNEL);

	qp->req.state		= QP_STATE_RESET;
	qp->req.opcode		= -1;
	qp->comp.opcode		= -1;
@@ -306,6 +312,8 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
	spin_lock_init(&qp->rq.producer_lock);
	spin_lock_init(&qp->rq.consumer_lock);

	qp->rq.is_user = qp->is_user;

	skb_queue_head_init(&qp->resp_pkts);

	rxe_init_task(rxe, &qp->resp.task, qp,
+7 −6
Original line number Diff line number Diff line
@@ -111,14 +111,15 @@ struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem,
static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q,
			 unsigned int num_elem)
{
	if (!queue_empty(q) && (num_elem < queue_count(q)))
	if (!queue_empty(q, q->type) && (num_elem < queue_count(q, q->type)))
		return -EINVAL;

	while (!queue_empty(q)) {
		memcpy(producer_addr(new_q), consumer_addr(q),
	while (!queue_empty(q, q->type)) {
		memcpy(producer_addr(new_q, new_q->type),
					consumer_addr(q, q->type),
					new_q->elem_size);
		advance_producer(new_q);
		advance_consumer(q);
		advance_producer(new_q, new_q->type);
		advance_consumer(q, q->type);
	}

	swap(*q, *new_q);
+107 −38
Original line number Diff line number Diff line
@@ -17,6 +17,20 @@
 * up to a power of 2. Since the queue is empty when the
 * producer and consumer indices match the maximum capacity
 * of the queue is one less than the number of element slots
 *
 * Notes:
 *   - Kernel space indices are always masked off to q->index_mask
 *   before storing so do not need to be checked on reads.
 *   - User space indices may be out of range and must be
 *   masked before use when read.
 *   - The kernel indices for shared queues must not be written
 *   by user space so a local copy is used and a shared copy is
 *   stored when the local copy changes.
 *   - By passing the type in the parameter list separate from q
 *   the compiler can eliminate the switch statement when the
 *   actual queue type is known when the function is called.
 *   In the performance path this is done. In less critical
 *   paths just q->type is passed.
 */

/* type of queue */
@@ -35,6 +49,12 @@ struct rxe_queue {
	unsigned int		log2_elem_size;
	u32			index_mask;
	enum queue_type		type;
	/* private copy of index for shared queues between
	 * kernel space and user space. Kernel reads and writes
	 * this copy and then replicates to rxe_queue_buf
	 * for read access by user space.
	 */
	u32			index;
};

int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf,
@@ -61,19 +81,19 @@ static inline int next_index(struct rxe_queue *q, int index)
	return (index + 1) & q->buf->index_mask;
}

static inline int queue_empty(struct rxe_queue *q)
static inline int queue_empty(struct rxe_queue *q, enum queue_type type)
{
	u32 prod;
	u32 cons;

	switch (q->type) {
	switch (type) {
	case QUEUE_TYPE_FROM_USER:
		/* protect user space index */
		prod = smp_load_acquire(&q->buf->producer_index);
		cons = q->buf->consumer_index;
		cons = q->index;
		break;
	case QUEUE_TYPE_TO_USER:
		prod = q->buf->producer_index;
		prod = q->index;
		/* protect user space index */
		cons = smp_load_acquire(&q->buf->consumer_index);
		break;
@@ -86,19 +106,19 @@ static inline int queue_empty(struct rxe_queue *q)
	return ((prod - cons) & q->index_mask) == 0;
}

static inline int queue_full(struct rxe_queue *q)
static inline int queue_full(struct rxe_queue *q, enum queue_type type)
{
	u32 prod;
	u32 cons;

	switch (q->type) {
	switch (type) {
	case QUEUE_TYPE_FROM_USER:
		/* protect user space index */
		prod = smp_load_acquire(&q->buf->producer_index);
		cons = q->buf->consumer_index;
		cons = q->index;
		break;
	case QUEUE_TYPE_TO_USER:
		prod = q->buf->producer_index;
		prod = q->index;
		/* protect user space index */
		cons = smp_load_acquire(&q->buf->consumer_index);
		break;
@@ -111,19 +131,20 @@ static inline int queue_full(struct rxe_queue *q)
	return ((prod + 1 - cons) & q->index_mask) == 0;
}

static inline unsigned int queue_count(const struct rxe_queue *q)
static inline unsigned int queue_count(const struct rxe_queue *q,
					enum queue_type type)
{
	u32 prod;
	u32 cons;

	switch (q->type) {
	switch (type) {
	case QUEUE_TYPE_FROM_USER:
		/* protect user space index */
		prod = smp_load_acquire(&q->buf->producer_index);
		cons = q->buf->consumer_index;
		cons = q->index;
		break;
	case QUEUE_TYPE_TO_USER:
		prod = q->buf->producer_index;
		prod = q->index;
		/* protect user space index */
		cons = smp_load_acquire(&q->buf->consumer_index);
		break;
@@ -136,90 +157,138 @@ static inline unsigned int queue_count(const struct rxe_queue *q)
	return (prod - cons) & q->index_mask;
}

static inline void advance_producer(struct rxe_queue *q)
static inline void advance_producer(struct rxe_queue *q, enum queue_type type)
{
	u32 prod;

	if (q->type == QUEUE_TYPE_FROM_USER) {
	switch (type) {
	case QUEUE_TYPE_FROM_USER:
		pr_warn_once("Normally kernel should not write user space index\n");
		/* protect user space index */
		prod = smp_load_acquire(&q->buf->producer_index);
		prod = (prod + 1) & q->index_mask;
		/* same */
		smp_store_release(&q->buf->producer_index, prod);
	} else {
		break;
	case QUEUE_TYPE_TO_USER:
		prod = q->index;
		q->index = (prod + 1) & q->index_mask;
		q->buf->producer_index = q->index;
		break;
	case QUEUE_TYPE_KERNEL:
		prod = q->buf->producer_index;
		q->buf->producer_index = (prod + 1) & q->index_mask;
		break;
	}
}

static inline void advance_consumer(struct rxe_queue *q)
static inline void advance_consumer(struct rxe_queue *q, enum queue_type type)
{
	u32 cons;

	if (q->type == QUEUE_TYPE_TO_USER) {
	switch (type) {
	case QUEUE_TYPE_FROM_USER:
		cons = q->index;
		q->index = (cons + 1) & q->index_mask;
		q->buf->consumer_index = q->index;
		break;
	case QUEUE_TYPE_TO_USER:
		pr_warn_once("Normally kernel should not write user space index\n");
		/* protect user space index */
		cons = smp_load_acquire(&q->buf->consumer_index);
		cons = (cons + 1) & q->index_mask;
		/* same */
		smp_store_release(&q->buf->consumer_index, cons);
	} else {
		break;
	case QUEUE_TYPE_KERNEL:
		cons = q->buf->consumer_index;
		q->buf->consumer_index = (cons + 1) & q->index_mask;
		break;
	}
}

static inline void *producer_addr(struct rxe_queue *q)
static inline void *producer_addr(struct rxe_queue *q, enum queue_type type)
{
	u32 prod;

	if (q->type == QUEUE_TYPE_FROM_USER)
	switch (type) {
	case QUEUE_TYPE_FROM_USER:
		/* protect user space index */
		prod = smp_load_acquire(&q->buf->producer_index);
	else
		prod &= q->index_mask;
		break;
	case QUEUE_TYPE_TO_USER:
		prod = q->index;
		break;
	case QUEUE_TYPE_KERNEL:
		prod = q->buf->producer_index;
		break;
	}

	return q->buf->data + ((prod & q->index_mask) << q->log2_elem_size);
	return q->buf->data + (prod << q->log2_elem_size);
}

static inline void *consumer_addr(struct rxe_queue *q)
static inline void *consumer_addr(struct rxe_queue *q, enum queue_type type)
{
	u32 cons;

	if (q->type == QUEUE_TYPE_TO_USER)
	switch (type) {
	case QUEUE_TYPE_FROM_USER:
		cons = q->index;
		break;
	case QUEUE_TYPE_TO_USER:
		/* protect user space index */
		cons = smp_load_acquire(&q->buf->consumer_index);
	else
		cons &= q->index_mask;
		break;
	case QUEUE_TYPE_KERNEL:
		cons = q->buf->consumer_index;
		break;
	}

	return q->buf->data + ((cons & q->index_mask) << q->log2_elem_size);
	return q->buf->data + (cons << q->log2_elem_size);
}

static inline unsigned int producer_index(struct rxe_queue *q)
static inline unsigned int producer_index(struct rxe_queue *q,
						enum queue_type type)
{
	u32 prod;

	if (q->type == QUEUE_TYPE_FROM_USER)
	switch (type) {
	case QUEUE_TYPE_FROM_USER:
		/* protect user space index */
		prod = smp_load_acquire(&q->buf->producer_index);
	else
		prod = q->buf->producer_index;

		prod &= q->index_mask;
		break;
	case QUEUE_TYPE_TO_USER:
		prod = q->index;
		break;
	case QUEUE_TYPE_KERNEL:
		prod = q->buf->producer_index;
		break;
	}

	return prod;
}

static inline unsigned int consumer_index(struct rxe_queue *q)
static inline unsigned int consumer_index(struct rxe_queue *q,
						enum queue_type type)
{
	u32 cons;

	if (q->type == QUEUE_TYPE_TO_USER)
	switch (type) {
	case QUEUE_TYPE_FROM_USER:
		cons = q->index;
		break;
	case QUEUE_TYPE_TO_USER:
		/* protect user space index */
		cons = smp_load_acquire(&q->buf->consumer_index);
	else
		cons = q->buf->consumer_index;

		cons &= q->index_mask;
		break;
	case QUEUE_TYPE_KERNEL:
		cons = q->buf->consumer_index;
		break;
	}

	return cons;
}
@@ -238,9 +307,9 @@ static inline unsigned int index_from_addr(const struct rxe_queue *q,
				& q->index_mask;
}

static inline void *queue_head(struct rxe_queue *q)
static inline void *queue_head(struct rxe_queue *q, enum queue_type type)
{
	return queue_empty(q) ? NULL : consumer_addr(q);
	return queue_empty(q, type) ? NULL : consumer_addr(q, type);
}

#endif /* RXE_QUEUE_H */
Loading