Unverified Commit c535f644 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!2517 RDMA/hns: Support flexible wqe buffer page size

Merge Pull Request from: @stinft 
 
Upload two patches to support flexible wqe buffer page size.
Currently, driver fixedly allocates 4K pages for user space WQE buffer even in a 64K system. This 
results in HW reading WQE with a granularity of 4K even in a 64K system. Considering that we support 
1024-byte inline,
in the scenario of using SQ inline, HW will switch pages every 4 WQEs. This will introduce a delay of about 400ns, which is an average delay of 100ns per packet.
In order to improve performance, we allow user-mode driver to use more flexible WQE buffer page size allocation strategies, which allowing user-mode driver to configure WQE buffer using pages between 4K to system PAGESIZE.
This feature needs to be used in conjunction with the user-mode driver. In order to ensure forward compatibility, if the user-mode driver does not support this feature, the kernel mode will continue to use a fixed 4K pagesize.

The first patch is cleanup. The patch of this feature needs to add code to the hns_roce_alloc_ucontext() function, so that the number of lines is too large. Therefore, before uploading the patch of this feature, reconstruct the function to reduce the number of lines of the function and improve the readability.
The second patch is used to support this feature.

 Chengchang Tang (1):
   RDMA/hns: Support flexible wqe buffer page size
 Juan Zhou (1):
   RDMA/hns: Refactor hns_roce_alloc_ucontext()

https://gitee.com/openeuler/kernel/issues/I87LTM 
 
Link:https://gitee.com/openeuler/kernel/pulls/2517

 

Reviewed-by: default avatarChengchang Tang <tangchengchang@huawei.com>
Signed-off-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
parents ea103bb8 603053b4
Loading
Loading
Loading
Loading
+38 −25
Original line number Diff line number Diff line
@@ -499,6 +499,43 @@ static u32 get_udca_max_qps(struct hns_roce_dev *hr_dev,
	return qp_num;
}

static void hns_roce_get_uctx_config(struct hns_roce_dev *hr_dev,
				struct hns_roce_ucontext *context,
				struct hns_roce_ib_alloc_ucontext *ucmd,
				struct hns_roce_ib_alloc_ucontext_resp *resp)
{
	if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
		context->config = ucmd->config & HNS_ROCE_EXSGE_FLAGS;

	if (context->config & HNS_ROCE_EXSGE_FLAGS) {
		resp->config |= HNS_ROCE_RSP_EXSGE_FLAGS;
		resp->max_inline_data = hr_dev->caps.max_sq_inline;
	}

	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
		context->config |= ucmd->config & HNS_ROCE_RQ_INLINE_FLAGS;
		if (context->config & HNS_ROCE_RQ_INLINE_FLAGS)
			resp->config |= HNS_ROCE_RSP_RQ_INLINE_FLAGS;
	}

	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQE_INLINE) {
		context->config |= ucmd->config & HNS_ROCE_CQE_INLINE_FLAGS;
		if (context->config & HNS_ROCE_CQE_INLINE_FLAGS)
			resp->config |= HNS_ROCE_RSP_CQE_INLINE_FLAGS;
	}

	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE) {
		context->config |= ucmd->config & HNS_ROCE_UCTX_CONFIG_DCA;
		if (context->config & HNS_ROCE_UCTX_CONFIG_DCA)
			resp->config |= HNS_ROCE_UCTX_RSP_DCA_FLAGS;
	}

	if (ucmd->config & HNS_ROCE_UCTX_DYN_QP_PGSZ) {
		context->config |= HNS_ROCE_UCTX_DYN_QP_PGSZ;
		resp->config |=  HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ;
	}
}

static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
				   struct ib_udata *udata)
{
@@ -519,31 +556,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
	if (ret)
		goto error_fail_uar_alloc;

	if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
		context->config = ucmd.config & HNS_ROCE_EXSGE_FLAGS;

	if (context->config & HNS_ROCE_EXSGE_FLAGS) {
		resp.config |= HNS_ROCE_RSP_EXSGE_FLAGS;
		resp.max_inline_data = hr_dev->caps.max_sq_inline;
	}

	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
		context->config |= ucmd.config & HNS_ROCE_RQ_INLINE_FLAGS;
		if (context->config & HNS_ROCE_RQ_INLINE_FLAGS)
			resp.config |= HNS_ROCE_RSP_RQ_INLINE_FLAGS;
	}

	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQE_INLINE) {
		context->config |= ucmd.config & HNS_ROCE_CQE_INLINE_FLAGS;
		if (context->config & HNS_ROCE_CQE_INLINE_FLAGS)
			resp.config |= HNS_ROCE_RSP_CQE_INLINE_FLAGS;
	}

	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE) {
		context->config |= ucmd.config & HNS_ROCE_UCTX_CONFIG_DCA;
		if (context->config & HNS_ROCE_UCTX_CONFIG_DCA)
			resp.config |= HNS_ROCE_UCTX_RSP_DCA_FLAGS;
	}
	hns_roce_get_uctx_config(hr_dev, context, &ucmd, &resp);

	ret = hns_roce_uar_alloc(hr_dev, &context->uar);
	if (ret)
+29 −26
Original line number Diff line number Diff line
@@ -672,17 +672,24 @@ static bool check_dca_is_enable(struct hns_roce_dev *hr_dev,

static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,
			    struct hns_roce_qp *hr_qp, bool dca_en,
			    struct hns_roce_buf_attr *buf_attr)
			    u8 page_shift, struct hns_roce_buf_attr *buf_attr)
{
	unsigned int page_size = BIT(page_shift);
	int buf_size;
	int idx = 0;

	hr_qp->buff_size = 0;

	if (page_shift > PAGE_SHIFT || page_shift < HNS_HW_PAGE_SHIFT)
		return -EOPNOTSUPP;
	/*
	 * When enable DCA, there's no need to alloc buffer now, and
	 * the page shift should be fixed to 4K.
	 */
	if (dca_en && page_shift != HNS_HW_PAGE_SHIFT)
		return -EOPNOTSUPP;
	/* SQ WQE */
	hr_qp->sq.offset = 0;
	buf_size = to_hr_hem_entries_size(hr_qp->sq.wqe_cnt,
					  hr_qp->sq.wqe_shift);
	buf_size = ALIGN(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size);
	if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
		buf_attr->region[idx].size = buf_size;
		buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num;
@@ -692,8 +699,7 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,

	/* extend SGE WQE in SQ */
	hr_qp->sge.offset = hr_qp->buff_size;
	buf_size = to_hr_hem_entries_size(hr_qp->sge.sge_cnt,
					  hr_qp->sge.sge_shift);
	buf_size = ALIGN(hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift, page_size);
	if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
		buf_attr->region[idx].size = buf_size;
		buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sge_hop_num;
@@ -703,8 +709,7 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,

	/* RQ WQE */
	hr_qp->rq.offset = hr_qp->buff_size;
	buf_size = to_hr_hem_entries_size(hr_qp->rq.wqe_cnt,
					  hr_qp->rq.wqe_shift);
	buf_size = ALIGN(hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift, page_size);
	if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
		buf_attr->region[idx].size = buf_size;
		buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num;
@@ -716,19 +721,8 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,
		return -EINVAL;

	buf_attr->region_count = idx;

	if (dca_en) {
		/*
		 * When enable DCA, there's no need to alloc buffer now, and
		 * the page shift should be fixed to 4K.
		 */
		buf_attr->mtt_only = true;
		buf_attr->page_shift = HNS_HW_PAGE_SHIFT;
	} else {
		buf_attr->mtt_only = false;
		buf_attr->page_shift = HNS_HW_PAGE_SHIFT +
				       hr_dev->caps.mtt_buf_pg_sz;
	}
	buf_attr->mtt_only = dca_en;
	buf_attr->page_shift = page_shift;

	return 0;
}
@@ -834,21 +828,30 @@ static void free_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,

static int alloc_qp_wqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
			struct ib_qp_init_attr *init_attr,
			struct ib_udata *udata, unsigned long addr)
			struct ib_udata *udata,
			struct hns_roce_ib_create_qp *ucmd)
{
	struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
					 struct hns_roce_ucontext, ibucontext);
	struct ib_device *ibdev = &hr_dev->ib_dev;
	struct hns_roce_buf_attr buf_attr = {};
	u8 page_shift = HNS_HW_PAGE_SHIFT;
	bool dca_en;
	int ret;

	dca_en = check_dca_is_enable(hr_dev, hr_qp, init_attr, !!udata, addr);
	ret = set_wqe_buf_attr(hr_dev, hr_qp, dca_en, &buf_attr);
	if (uctx && (uctx->config & HNS_ROCE_UCTX_DYN_QP_PGSZ))
		page_shift = ucmd->pageshift;

	dca_en = check_dca_is_enable(hr_dev, hr_qp, init_attr,
				     !!udata, ucmd->buf_addr);
	ret = set_wqe_buf_attr(hr_dev, hr_qp, dca_en, page_shift, &buf_attr);
	if (ret) {
		ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret);
		return ret;
	}

	ret = alloc_wqe_buf(hr_dev, hr_qp, dca_en, &buf_attr, udata, addr);
	ret = alloc_wqe_buf(hr_dev, hr_qp, dca_en,
			    &buf_attr, udata, ucmd->buf_addr);
	if (ret)
		ibdev_err(ibdev, "failed to alloc WQE buf, ret = %d.\n", ret);

@@ -1237,7 +1240,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
		goto err_qpn;
	}

	ret = alloc_qp_wqe(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr);
	ret = alloc_qp_wqe(hr_dev, hr_qp, init_attr, udata, &ucmd);
	if (ret) {
		ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret);
		goto err_buf;
+4 −1
Original line number Diff line number Diff line
@@ -81,7 +81,8 @@ struct hns_roce_ib_create_qp {
	__u8    log_sq_bb_count;
	__u8    log_sq_stride;
	__u8    sq_no_prefetch;
	__u8    reserved[5];
	__u8    reserved[4];
	__u8    pageshift;
	__aligned_u64 sdb_addr;
	__aligned_u64 comp_mask;
	__aligned_u64 create_flags;
@@ -122,6 +123,7 @@ enum {
	HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1,
	HNS_ROCE_CQE_INLINE_FLAGS = 1 << 2,
	HNS_ROCE_UCTX_CONFIG_DCA = 1 << 3,
	HNS_ROCE_UCTX_DYN_QP_PGSZ = 1 << 4,
};

enum {
@@ -129,6 +131,7 @@ enum {
	HNS_ROCE_RSP_RQ_INLINE_FLAGS = 1 << 1,
	HNS_ROCE_RSP_CQE_INLINE_FLAGS = 1 << 2,
	HNS_ROCE_UCTX_RSP_DCA_FLAGS = HNS_ROCE_UCTX_CONFIG_DCA,
	HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ = HNS_ROCE_UCTX_DYN_QP_PGSZ,
};

struct hns_roce_ib_alloc_ucontext_resp {