Commit 1efede74 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/marcel/tags/rdma-pull-request' into staging



RDMA queue

* pvrdma: Add support for SRQ

# gpg: Signature made Sat 04 May 2019 14:35:40 BST
# gpg:                using RSA key 36D4C0F0CF2FE46D
# gpg: Good signature from "Marcel Apfelbaum <marcel.apfelbaum@zoho.com>" [marginal]
# gpg:                 aka "Marcel Apfelbaum <marcel@redhat.com>" [marginal]
# gpg:                 aka "Marcel Apfelbaum <marcel.apfelbaum@gmail.com>" [marginal]
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: B1C6 3A57 F92E 08F2 640F  31F5 36D4 C0F0 CF2F E46D

* remotes/marcel/tags/rdma-pull-request:
  hw/pvrdma: Add support for SRQ
  hw/rdma: Modify create/destroy QP to support SRQ
  hw/rdma: Add support for managing SRQ resource
  hw/rdma: Add SRQ support to backend layer

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 19eb2d4e 355b7cf3
Loading
Loading
Loading
Loading
+120 −5
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@ typedef struct BackendCtx {
    void *up_ctx;
    struct ibv_sge sge; /* Used to save MAD recv buffer */
    RdmaBackendQP *backend_qp; /* To maintain recv buffers */
    RdmaBackendSRQ *backend_srq;
} BackendCtx;

struct backend_umad {
@@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
    int i, ne, total_ne = 0;
    BackendCtx *bctx;
    struct ibv_wc wc[2];
    RdmaProtectedGSList *cqe_ctx_list;

    qemu_mutex_lock(&rdma_dev_res->lock);
    do {
@@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)

            comp_handler(bctx->up_ctx, &wc[i]);

            rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list,
                                               wc[i].wr_id);
            if (bctx->backend_qp) {
                cqe_ctx_list = &bctx->backend_qp->cqe_ctx_list;
            } else {
                cqe_ctx_list = &bctx->backend_srq->cqe_ctx_list;
            }

            rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id);
            rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);
            g_free(bctx);
        }
@@ -662,6 +669,60 @@ err_free_bctx:
    g_free(bctx);
}

void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
                                RdmaBackendSRQ *srq, struct ibv_sge *sge,
                                uint32_t num_sge, void *ctx)
{
    BackendCtx *bctx;
    struct ibv_sge new_sge[MAX_SGE];
    uint32_t bctx_id;
    int rc;
    struct ibv_recv_wr wr = {}, *bad_wr;

    bctx = g_malloc0(sizeof(*bctx));
    bctx->up_ctx = ctx;
    bctx->backend_srq = srq;

    rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx);
    if (unlikely(rc)) {
        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
        goto err_free_bctx;
    }

    rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id);

    rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge,
                              &backend_dev->rdma_dev_res->stats.rx_bufs_len);
    if (rc) {
        complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
        goto err_dealloc_cqe_ctx;
    }

    wr.num_sge = num_sge;
    wr.sg_list = new_sge;
    wr.wr_id = bctx_id;
    rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr);
    if (rc) {
        rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, errno=%d",
                          srq->ibsrq->handle, rc, errno);
        complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
        goto err_dealloc_cqe_ctx;
    }

    atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe);
    backend_dev->rdma_dev_res->stats.rx_bufs++;
    backend_dev->rdma_dev_res->stats.rx_srq++;

    return;

err_dealloc_cqe_ctx:
    backend_dev->rdma_dev_res->stats.rx_bufs_err++;
    rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id);

err_free_bctx:
    g_free(bctx);
}

int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd)
{
    pd->ibpd = ibv_alloc_pd(backend_dev->context);
@@ -733,9 +794,9 @@ void rdma_backend_destroy_cq(RdmaBackendCQ *cq)

int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
                           RdmaBackendPD *pd, RdmaBackendCQ *scq,
                           RdmaBackendCQ *rcq, uint32_t max_send_wr,
                           uint32_t max_recv_wr, uint32_t max_send_sge,
                           uint32_t max_recv_sge)
                           RdmaBackendCQ *rcq, RdmaBackendSRQ *srq,
                           uint32_t max_send_wr, uint32_t max_recv_wr,
                           uint32_t max_send_sge, uint32_t max_recv_sge)
{
    struct ibv_qp_init_attr attr = {};

@@ -763,6 +824,9 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
    attr.cap.max_recv_wr = max_recv_wr;
    attr.cap.max_send_sge = max_send_sge;
    attr.cap.max_recv_sge = max_recv_sge;
    if (srq) {
        attr.srq = srq->ibsrq;
    }

    qp->ibqp = ibv_create_qp(pd->ibpd, &attr);
    if (!qp->ibqp) {
@@ -938,6 +1002,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res)
    rdma_protected_gslist_destroy(&qp->cqe_ctx_list);
}

int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
                            uint32_t max_wr, uint32_t max_sge,
                            uint32_t srq_limit)
{
    struct ibv_srq_init_attr srq_init_attr = {};

    srq_init_attr.attr.max_wr = max_wr;
    srq_init_attr.attr.max_sge = max_sge;
    srq_init_attr.attr.srq_limit = srq_limit;

    srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr);
    if (!srq->ibsrq) {
        rdma_error_report("ibv_create_srq failed, errno=%d", errno);
        return -EIO;
    }

    rdma_protected_gslist_init(&srq->cqe_ctx_list);

    return 0;
}

int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr)
{
    if (!srq->ibsrq) {
        return -EINVAL;
    }

    return ibv_query_srq(srq->ibsrq, srq_attr);
}

int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
                int srq_attr_mask)
{
    if (!srq->ibsrq) {
        return -EINVAL;
    }

    return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask);
}

void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res)
{
    if (srq->ibsrq) {
        ibv_destroy_srq(srq->ibsrq);
    }
    g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res);
    rdma_protected_gslist_destroy(&srq->cqe_ctx_list);
}

#define CHK_ATTR(req, dev, member, fmt) ({ \
    trace_rdma_check_dev_attr(#member, dev.member, req->member); \
    if (req->member > dev.member) { \
@@ -960,6 +1073,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
    }

    dev_attr->max_sge = MAX_SGE;
    dev_attr->max_srq_sge = MAX_SGE;

    CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64);
    CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d");
@@ -970,6 +1084,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
    CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d");
    CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d");
    CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d");
    CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d");

    return 0;
}
+15 −3
Original line number Diff line number Diff line
@@ -89,9 +89,9 @@ void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq);

int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
                           RdmaBackendPD *pd, RdmaBackendCQ *scq,
                           RdmaBackendCQ *rcq, uint32_t max_send_wr,
                           uint32_t max_recv_wr, uint32_t max_send_sge,
                           uint32_t max_recv_sge);
                           RdmaBackendCQ *rcq, RdmaBackendSRQ *srq,
                           uint32_t max_send_wr, uint32_t max_recv_wr,
                           uint32_t max_send_sge, uint32_t max_recv_sge);
int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
                               uint8_t qp_type, uint32_t qkey);
int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
@@ -114,4 +114,16 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
                            RdmaBackendQP *qp, uint8_t qp_type,
                            struct ibv_sge *sge, uint32_t num_sge, void *ctx);

int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
                            uint32_t max_wr, uint32_t max_sge,
                            uint32_t srq_limit);
int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr);
int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
                            int srq_attr_mask);
void rdma_backend_destroy_srq(RdmaBackendSRQ *srq,
                              RdmaDeviceResources *dev_res);
void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
                                RdmaBackendSRQ *srq, struct ibv_sge *sge,
                                uint32_t num_sge, void *ctx);

#endif
+5 −0
Original line number Diff line number Diff line
@@ -68,4 +68,9 @@ typedef struct RdmaBackendQP {
    RdmaProtectedGSList cqe_ctx_list;
} RdmaBackendQP;

typedef struct RdmaBackendSRQ {
    struct ibv_srq *ibsrq;
    RdmaProtectedGSList cqe_ctx_list;
} RdmaBackendSRQ;

#endif
+114 −3
Original line number Diff line number Diff line
@@ -37,6 +37,8 @@ void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res)
                   dev_res->stats.tx_err);
    monitor_printf(mon, "\trx_bufs          : %" PRId64 "\n",
                   dev_res->stats.rx_bufs);
    monitor_printf(mon, "\trx_srq           : %" PRId64 "\n",
                   dev_res->stats.rx_srq);
    monitor_printf(mon, "\trx_bufs_len      : %" PRId64 "\n",
                   dev_res->stats.rx_bufs_len);
    monitor_printf(mon, "\trx_bufs_err      : %" PRId64 "\n",
@@ -384,12 +386,14 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
                     uint8_t qp_type, uint32_t max_send_wr,
                     uint32_t max_send_sge, uint32_t send_cq_handle,
                     uint32_t max_recv_wr, uint32_t max_recv_sge,
                     uint32_t recv_cq_handle, void *opaque, uint32_t *qpn)
                     uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
                     uint8_t is_srq, uint32_t srq_handle)
{
    int rc;
    RdmaRmQP *qp;
    RdmaRmCQ *scq, *rcq;
    RdmaRmPD *pd;
    RdmaRmSRQ *srq = NULL;
    uint32_t rm_qpn;

    pd = rdma_rm_get_pd(dev_res, pd_handle);
@@ -406,6 +410,16 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
        return -EINVAL;
    }

    if (is_srq) {
        srq = rdma_rm_get_srq(dev_res, srq_handle);
        if (!srq) {
            rdma_error_report("Invalid srqn %d", srq_handle);
            return -EINVAL;
        }

        srq->recv_cq_handle = recv_cq_handle;
    }

    if (qp_type == IBV_QPT_GSI) {
        scq->notify = CNT_SET;
        rcq->notify = CNT_SET;
@@ -422,10 +436,14 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
    qp->send_cq_handle = send_cq_handle;
    qp->recv_cq_handle = recv_cq_handle;
    qp->opaque = opaque;
    qp->is_srq = is_srq;

    rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
                                &scq->backend_cq, &rcq->backend_cq, max_send_wr,
                                max_recv_wr, max_send_sge, max_recv_sge);
                                &scq->backend_cq, &rcq->backend_cq,
                                is_srq ? &srq->backend_srq : NULL,
                                max_send_wr, max_recv_wr, max_send_sge,
                                max_recv_sge);

    if (rc) {
        rc = -EIO;
        goto out_dealloc_qp;
@@ -542,6 +560,96 @@ void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
    rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
}

RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
{
    return rdma_res_tbl_get(&dev_res->srq_tbl, srq_handle);
}

int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
                      uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
                      uint32_t *srq_handle, void *opaque)
{
    RdmaRmSRQ *srq;
    RdmaRmPD *pd;
    int rc;

    pd = rdma_rm_get_pd(dev_res, pd_handle);
    if (!pd) {
        return -EINVAL;
    }

    srq = rdma_res_tbl_alloc(&dev_res->srq_tbl, srq_handle);
    if (!srq) {
        return -ENOMEM;
    }

    rc = rdma_backend_create_srq(&srq->backend_srq, &pd->backend_pd,
                                 max_wr, max_sge, srq_limit);
    if (rc) {
        rc = -EIO;
        goto out_dealloc_srq;
    }

    srq->opaque = opaque;

    return 0;

out_dealloc_srq:
    rdma_res_tbl_dealloc(&dev_res->srq_tbl, *srq_handle);

    return rc;
}

int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
                      struct ibv_srq_attr *srq_attr)
{
    RdmaRmSRQ *srq;

    srq = rdma_rm_get_srq(dev_res, srq_handle);
    if (!srq) {
        return -EINVAL;
    }

    return rdma_backend_query_srq(&srq->backend_srq, srq_attr);
}

int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
                       struct ibv_srq_attr *srq_attr, int srq_attr_mask)
{
    RdmaRmSRQ *srq;

    srq = rdma_rm_get_srq(dev_res, srq_handle);
    if (!srq) {
        return -EINVAL;
    }

    if ((srq_attr_mask & IBV_SRQ_LIMIT) &&
        (srq_attr->srq_limit == 0)) {
        return -EINVAL;
    }

    if ((srq_attr_mask & IBV_SRQ_MAX_WR) &&
        (srq_attr->max_wr == 0)) {
        return -EINVAL;
    }

    return rdma_backend_modify_srq(&srq->backend_srq, srq_attr,
                                   srq_attr_mask);
}

void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
{
    RdmaRmSRQ *srq;

    srq = rdma_rm_get_srq(dev_res, srq_handle);
    if (!srq) {
        return;
    }

    rdma_backend_destroy_srq(&srq->backend_srq, dev_res);
    rdma_res_tbl_dealloc(&dev_res->srq_tbl, srq_handle);
}

void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
{
    void **cqe_ctx;
@@ -671,6 +779,8 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr)
    res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
                       dev_attr->max_qp_wr, sizeof(void *));
    res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
    res_tbl_init("SRQ", &dev_res->srq_tbl, dev_attr->max_srq,
                 sizeof(RdmaRmSRQ));

    init_ports(dev_res);

@@ -689,6 +799,7 @@ void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,

    fini_ports(dev_res, backend_dev, ifname);

    res_tbl_free(&dev_res->srq_tbl);
    res_tbl_free(&dev_res->uc_tbl);
    res_tbl_free(&dev_res->cqe_ctx_tbl);
    res_tbl_free(&dev_res->qp_tbl);
+12 −1
Original line number Diff line number Diff line
@@ -53,7 +53,8 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
                     uint8_t qp_type, uint32_t max_send_wr,
                     uint32_t max_send_sge, uint32_t send_cq_handle,
                     uint32_t max_recv_wr, uint32_t max_recv_sge,
                     uint32_t recv_cq_handle, void *opaque, uint32_t *qpn);
                     uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
                     uint8_t is_srq, uint32_t srq_handle);
RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn);
int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
                      uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
@@ -65,6 +66,16 @@ int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
                     int attr_mask, struct ibv_qp_init_attr *init_attr);
void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle);

RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle);
int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
                      uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
                      uint32_t *srq_handle, void *opaque);
int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
                      struct ibv_srq_attr *srq_attr);
int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
                       struct ibv_srq_attr *srq_attr, int srq_attr_mask);
void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle);

int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
                          void *ctx);
void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id);
Loading