Commit 9e5bcc70 authored by Chengchang Tang's avatar Chengchang Tang Committed by Juan Zhou
Browse files

RDMA/hns: Support flexible wqe buffer page size

driver inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I98HIN



--------------------------------------------------------------------------

Currently, driver fixedly allocates 4K pages for user space WQE buffer
even in a 64K system. This results in HW reading WQE with a granularity
of 4K even in a 64K system. Considering that we support 1024-byte inline,
in the scenario of using SQ inline, HW will switch pages every 4 WQEs.
This will introduce a delay of about 400ns, which is an average delay of
100ns per packet.

In order to improve performance, we allow user-mode driver to use more
flexible WQE buffer page size allocation strategies, which allowing
user-mode driver to configure WQE buffer using pages between 4K to
system PAGESIZE.

This feature needs to be used in conjunction with the user-mode driver.
In order to ensure forward compatibility, if the user-mode driver does
not support this feature, the kernel mode will continue to use a fixed
4K pagesize.

Signed-off-by: default avatarChengchang Tang <tangchengchang@huawei.com>
parent 26d71e7c
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -473,6 +473,11 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
	if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
		resp.congest_type = hr_dev->caps.cong_cap;

	if (ucmd.config & HNS_ROCE_UCTX_DYN_QP_PGSZ) {
		context->config |= HNS_ROCE_UCTX_DYN_QP_PGSZ;
		resp.config |=  HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ;
	}

	ret = hns_roce_uar_alloc(hr_dev, &context->uar);
	if (ret)
		goto error_out;
+20 −12
Original line number Diff line number Diff line
@@ -641,18 +641,21 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev,
}

static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,
			    struct hns_roce_qp *hr_qp,
			    struct hns_roce_qp *hr_qp, u8 page_shift,
			    struct hns_roce_buf_attr *buf_attr)
{
	unsigned int page_size = BIT(page_shift);
	int buf_size;
	int idx = 0;

	hr_qp->buff_size = 0;

	if (page_shift > PAGE_SHIFT || page_shift < HNS_HW_PAGE_SHIFT)
		return -EOPNOTSUPP;

	/* SQ WQE */
	hr_qp->sq.offset = 0;
	buf_size = to_hr_hem_entries_size(hr_qp->sq.wqe_cnt,
					  hr_qp->sq.wqe_shift);
	buf_size = ALIGN(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size);
	if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
		buf_attr->region[idx].size = buf_size;
		buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num;
@@ -662,8 +665,7 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,

	/* extend SGE WQE in SQ */
	hr_qp->sge.offset = hr_qp->buff_size;
	buf_size = to_hr_hem_entries_size(hr_qp->sge.sge_cnt,
					  hr_qp->sge.sge_shift);
	buf_size = ALIGN(hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift, page_size);
	if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
		buf_attr->region[idx].size = buf_size;
		buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sge_hop_num;
@@ -673,8 +675,7 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,

	/* RQ WQE */
	hr_qp->rq.offset = hr_qp->buff_size;
	buf_size = to_hr_hem_entries_size(hr_qp->rq.wqe_cnt,
					  hr_qp->rq.wqe_shift);
	buf_size = ALIGN(hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift, page_size);
	if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
		buf_attr->region[idx].size = buf_size;
		buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num;
@@ -685,8 +686,8 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,
	if (hr_qp->buff_size < 1)
		return -EINVAL;

	buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
	buf_attr->region_count = idx;
	buf_attr->page_shift = page_shift;

	return 0;
}
@@ -742,20 +743,27 @@ static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr)

static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
			struct ib_qp_init_attr *init_attr,
			struct ib_udata *udata, unsigned long addr)
			struct ib_udata *udata,
			struct hns_roce_ib_create_qp *ucmd)
{
	struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
					 struct hns_roce_ucontext, ibucontext);
	struct ib_device *ibdev = &hr_dev->ib_dev;
	struct hns_roce_buf_attr buf_attr = {};
	u8 page_shift = HNS_HW_PAGE_SHIFT;
	int ret;

	ret = set_wqe_buf_attr(hr_dev, hr_qp, &buf_attr);
	if (uctx && (uctx->config & HNS_ROCE_UCTX_DYN_QP_PGSZ))
		page_shift = ucmd->pageshift;

	ret = set_wqe_buf_attr(hr_dev, hr_qp, page_shift, &buf_attr);
	if (ret) {
		ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret);
		goto err_inline;
	}
	ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr,
				  PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz,
				  udata, addr);
				  udata, ucmd->buf_addr);
	if (ret) {
		ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret);
		goto err_inline;
@@ -1151,7 +1159,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
		}
	}

	ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr);
	ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, &ucmd);
	if (ret) {
		ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret);
		goto err_buf;
+4 −1
Original line number Diff line number Diff line
@@ -90,7 +90,8 @@ struct hns_roce_ib_create_qp {
	__u8    log_sq_bb_count;
	__u8    log_sq_stride;
	__u8    sq_no_prefetch;
	__u8    reserved[5];
	__u8    pageshift;
	__u8    reserved[4];
	__aligned_u64 sdb_addr;
	__aligned_u64 comp_mask; /* Use enum hns_roce_create_qp_comp_mask */
	__aligned_u64 create_flags;
@@ -119,12 +120,14 @@ enum {
	HNS_ROCE_EXSGE_FLAGS = 1 << 0,
	HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1,
	HNS_ROCE_CQE_INLINE_FLAGS = 1 << 2,
	HNS_ROCE_UCTX_DYN_QP_PGSZ = 1 << 4,
};

enum {
	HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0,
	HNS_ROCE_RSP_RQ_INLINE_FLAGS = 1 << 1,
	HNS_ROCE_RSP_CQE_INLINE_FLAGS = 1 << 2,
	HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ = HNS_ROCE_UCTX_DYN_QP_PGSZ,
};

struct hns_roce_ib_alloc_ucontext_resp {