Commit b6c2bfea authored by Chuck Lever's avatar Chuck Lever
Browse files

svcrdma: Relieve contention on sc_send_lock.



/proc/lock_stat indicates the the sc_send_lock is heavily
contended when the server is under load from a single client.

To address this, convert the send_ctxt free list to an llist.
Returning an item to the send_ctxt cache is now waitless, which
reduces the instruction path length in the single-threaded Send
handler (svc_rdma_wc_send).

The goal is to enable the ib_comp_wq worker to handle a higher
RPC/RDMA Send completion rate given the same CPU resources. This
change reduces CPU utilization of Send completion by 2-3% on my
server.

Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
Reviewed-By: default avatarTom Talpey <tom@talpey.com>
parent 6c8c84f5
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -90,7 +90,7 @@ struct svcxprt_rdma {
	struct ib_pd         *sc_pd;

	spinlock_t	     sc_send_lock;
	struct list_head     sc_send_ctxts;
	struct llist_head    sc_send_ctxts;
	spinlock_t	     sc_rw_ctxt_lock;
	struct list_head     sc_rw_ctxts;

@@ -150,7 +150,7 @@ struct svc_rdma_recv_ctxt {
};

struct svc_rdma_send_ctxt {
	struct list_head	sc_list;
	struct llist_node	sc_node;
	struct rpc_rdma_cid	sc_cid;

	struct ib_send_wr	sc_send_wr;
+8 −15
Original line number Diff line number Diff line
@@ -113,13 +113,6 @@

static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc);

static inline struct svc_rdma_send_ctxt *
svc_rdma_next_send_ctxt(struct list_head *list)
{
	return list_first_entry_or_null(list, struct svc_rdma_send_ctxt,
					sc_list);
}

static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
				   struct rpc_rdma_cid *cid)
{
@@ -182,9 +175,10 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
{
	struct svc_rdma_send_ctxt *ctxt;
	struct llist_node *node;

	while ((ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts))) {
		list_del(&ctxt->sc_list);
	while ((node = llist_del_first(&rdma->sc_send_ctxts)) != NULL) {
		ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node);
		ib_dma_unmap_single(rdma->sc_pd->device,
				    ctxt->sc_sges[0].addr,
				    rdma->sc_max_req_size,
@@ -204,12 +198,13 @@ void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
{
	struct svc_rdma_send_ctxt *ctxt;
	struct llist_node *node;

	spin_lock(&rdma->sc_send_lock);
	ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts);
	if (!ctxt)
	node = llist_del_first(&rdma->sc_send_ctxts);
	if (!node)
		goto out_empty;
	list_del(&ctxt->sc_list);
	ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node);
	spin_unlock(&rdma->sc_send_lock);

out:
@@ -253,9 +248,7 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
					     ctxt->sc_sges[i].length);
	}

	spin_lock(&rdma->sc_send_lock);
	list_add(&ctxt->sc_list, &rdma->sc_send_ctxts);
	spin_unlock(&rdma->sc_send_lock);
	llist_add(&ctxt->sc_node, &rdma->sc_send_ctxts);
}

/**
+1 −1
Original line number Diff line number Diff line
@@ -136,7 +136,7 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
	svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
	INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
	INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
	INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
	init_llist_head(&cma_xprt->sc_send_ctxts);
	init_llist_head(&cma_xprt->sc_recv_ctxts);
	INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
	init_waitqueue_head(&cma_xprt->sc_send_wait);