Unverified Commit 622c640a authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!576 Support congestion control algorithm configuration

Merge Pull Request from: @stinft 
 
```bash
Feature information:
1.RDMA/hns: Modify congestion abbreviation
The currently used abbreviation of cong cannot clearly
indicate the meaning, so the full name congest is used instead.

2.RDMA/hns: Support congestion control algorithm configuration at QP granularity
This patch supports to configure congestion control algorithm
based on QP granulariy. The configuration will be sent to
driver from user space. And then driver configures the selected
algorithm into QPC.
The current XRC type QP cannot deliver the configured
algorithm to kernel space, so the driver will set the default
algorithm for XRC type QP. And the default algorithm type is
controlled by the firmware.
```
bugzilla:#I6N1G4
 
 
Link:https://gitee.com/openeuler/kernel/pulls/576

 

Reviewed-by: default avatarChengchang Tang <tangchengchang@huawei.com>
Reviewed-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
Signed-off-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
parents 6ca12438 09f1b7cb
Loading
Loading
Loading
Loading
+9 −7
Original line number Diff line number Diff line
@@ -654,6 +654,7 @@ struct hns_roce_qp {
	struct hns_roce_db	rdb;
	struct hns_roce_db	sdb;
	unsigned long		en_flags;
	unsigned long		congest_type;
	u32			doorbell_qpn;
	enum ib_sig_type	sq_signal_bits;
	struct hns_roce_wq	sq;
@@ -775,11 +776,11 @@ enum hns_roce_scc_algo {
	HNS_ROCE_SCC_ALGO_TOTAL,
};

enum cong_type {
	CONG_TYPE_DCQCN = 1 << HNS_ROCE_SCC_ALGO_DCQCN,
	CONG_TYPE_LDCP = 1 << HNS_ROCE_SCC_ALGO_LDCP,
	CONG_TYPE_HC3 = 1 << HNS_ROCE_SCC_ALGO_HC3,
	CONG_TYPE_DIP = 1 << HNS_ROCE_SCC_ALGO_DIP,
enum congest_type {
	HNS_ROCE_CONGEST_TYPE_DCQCN = 1 << HNS_ROCE_SCC_ALGO_DCQCN,
	HNS_ROCE_CONGEST_TYPE_LDCP = 1 << HNS_ROCE_SCC_ALGO_LDCP,
	HNS_ROCE_CONGEST_TYPE_HC3 = 1 << HNS_ROCE_SCC_ALGO_HC3,
	HNS_ROCE_CONGEST_TYPE_DIP = 1 << HNS_ROCE_SCC_ALGO_DIP,
};

struct hns_roce_caps {
@@ -913,7 +914,8 @@ struct hns_roce_caps {
	u16		default_aeq_period;
	u16		default_aeq_arm_st;
	u16		default_ceq_arm_st;
	enum cong_type	cong_type;
	u8		congest_type;
	u8		default_congest_type;
};

enum hns_roce_device_state {
@@ -1117,7 +1119,7 @@ struct hns_roce_dev {
	struct work_struct ecc_work;
	u32 func_num;
	u32 is_vf;
	u32 cong_algo_tmpl_id;
	u32 congest_algo_tmpl_id;
	u64 dwqe_page;

	struct notifier_block bond_nb;
+59 −54
Original line number Diff line number Diff line
@@ -1792,7 +1792,7 @@ static int hns_roce_query_func_info(struct hns_roce_dev *hr_dev)
	}

	hr_dev->func_num = le32_to_cpu(desc.func_info.own_func_num);
	hr_dev->cong_algo_tmpl_id = le32_to_cpu(desc.func_info.own_mac_id);
	hr_dev->congest_algo_tmpl_id = le32_to_cpu(desc.func_info.own_mac_id);

	return 0;
}
@@ -2447,11 +2447,12 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev)
	caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth);

	caps->num_srqs = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_SRQS);
	caps->cong_type = hr_reg_read(resp_d, PF_CAPS_D_CONG_TYPE);
	caps->congest_type = hr_reg_read(resp_d, PF_CAPS_D_CONGEST_TYPE);
	caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth);
	caps->ceqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_CEQ_DEPTH);
	caps->num_comp_vectors = hr_reg_read(resp_d, PF_CAPS_D_NUM_CEQS);
	caps->aeqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_AEQ_DEPTH);
	caps->default_congest_type = hr_reg_read(resp_d, PF_CAPS_D_DEFAULT_ALG);
	caps->reserved_pds = hr_reg_read(resp_d, PF_CAPS_D_RSV_PDS);
	caps->num_uars = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_UARS);
	caps->reserved_qps = hr_reg_read(resp_d, PF_CAPS_D_RSV_QPS);
@@ -5067,18 +5068,18 @@ static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
}

enum {
	CONG_DCQCN,
	CONG_WINDOW,
	CONGEST_DCQCN,
	CONGEST_WINDOW,
};

enum {
	UNSUPPORT_CONG_LEVEL,
	SUPPORT_CONG_LEVEL,
	UNSUPPORT_CONGEST_LEVEL,
	SUPPORT_CONGEST_LEVEL,
};

enum {
	CONG_LDCP,
	CONG_HC3,
	CONGEST_LDCP,
	CONGEST_HC3,
};

enum {
@@ -5091,54 +5092,54 @@ enum {
	WND_UNLIMIT,
};

static int check_cong_type(struct ib_qp *ibqp,
			   struct hns_roce_congestion_algorithm *cong_alg)
static int check_congest_type(struct ib_qp *ibqp,
			      struct hns_roce_congestion_algorithm *congest_alg)
{
	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);

	/* different congestion types match different configurations */
	switch (hr_dev->caps.cong_type) {
	case CONG_TYPE_DCQCN:
		cong_alg->alg_sel = CONG_DCQCN;
		cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
		cong_alg->dip_vld = DIP_INVALID;
		cong_alg->wnd_mode_sel = WND_LIMIT;
	switch (hr_qp->congest_type) {
	case HNS_ROCE_CONGEST_TYPE_DCQCN:
		congest_alg->alg_sel = CONGEST_DCQCN;
		congest_alg->alg_sub_sel = UNSUPPORT_CONGEST_LEVEL;
		congest_alg->dip_vld = DIP_INVALID;
		congest_alg->wnd_mode_sel = WND_LIMIT;
		break;
	case CONG_TYPE_LDCP:
		cong_alg->alg_sel = CONG_WINDOW;
		cong_alg->alg_sub_sel = CONG_LDCP;
		cong_alg->dip_vld = DIP_INVALID;
		cong_alg->wnd_mode_sel = WND_UNLIMIT;
	case HNS_ROCE_CONGEST_TYPE_LDCP:
		congest_alg->alg_sel = CONGEST_WINDOW;
		congest_alg->alg_sub_sel = CONGEST_LDCP;
		congest_alg->dip_vld = DIP_INVALID;
		congest_alg->wnd_mode_sel = WND_UNLIMIT;
		break;
	case CONG_TYPE_HC3:
		cong_alg->alg_sel = CONG_WINDOW;
		cong_alg->alg_sub_sel = CONG_HC3;
		cong_alg->dip_vld = DIP_INVALID;
		cong_alg->wnd_mode_sel = WND_LIMIT;
	case HNS_ROCE_CONGEST_TYPE_HC3:
		congest_alg->alg_sel = CONGEST_WINDOW;
		congest_alg->alg_sub_sel = CONGEST_HC3;
		congest_alg->dip_vld = DIP_INVALID;
		congest_alg->wnd_mode_sel = WND_LIMIT;
		break;
	case CONG_TYPE_DIP:
		cong_alg->alg_sel = CONG_DCQCN;
		cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
		cong_alg->dip_vld = DIP_VALID;
		cong_alg->wnd_mode_sel = WND_LIMIT;
	case HNS_ROCE_CONGEST_TYPE_DIP:
		congest_alg->alg_sel = CONGEST_DCQCN;
		congest_alg->alg_sub_sel = UNSUPPORT_CONGEST_LEVEL;
		congest_alg->dip_vld = DIP_VALID;
		congest_alg->wnd_mode_sel = WND_LIMIT;
		break;
	default:
		cong_alg->alg_sel = CONG_DCQCN;
		cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
		cong_alg->dip_vld = DIP_INVALID;
		cong_alg->wnd_mode_sel = WND_LIMIT;
		congest_alg->alg_sel = CONGEST_DCQCN;
		congest_alg->alg_sub_sel = UNSUPPORT_CONGEST_LEVEL;
		congest_alg->dip_vld = DIP_INVALID;
		congest_alg->wnd_mode_sel = WND_LIMIT;
		break;
	}

	return 0;
}

static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
static int fill_congest_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
			      struct hns_roce_v2_qp_context *context,
			      struct hns_roce_v2_qp_context *qpc_mask)
{
	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
	struct hns_roce_congestion_algorithm cong_field;
	struct hns_roce_congestion_algorithm congest_field;
	struct ib_device *ibdev = ibqp->device;
	struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
	u32 dip_idx = 0;
@@ -5148,31 +5149,35 @@ static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
	    grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE)
		return 0;

	ret = check_cong_type(ibqp, &cong_field);
	ret = check_congest_type(ibqp, &congest_field);
	if (ret)
		return ret;

	hr_reg_write(context, QPC_CONG_ALGO_TMPL_ID, hr_dev->cong_algo_tmpl_id +
		     hr_dev->caps.cong_type * HNS_ROCE_CONG_SIZE);
	hr_reg_clear(qpc_mask, QPC_CONG_ALGO_TMPL_ID);
	hr_reg_write(&context->ext, QPCEX_CONG_ALG_SEL, cong_field.alg_sel);
	hr_reg_clear(&qpc_mask->ext, QPCEX_CONG_ALG_SEL);
	hr_reg_write(&context->ext, QPCEX_CONG_ALG_SUB_SEL,
		     cong_field.alg_sub_sel);
	hr_reg_clear(&qpc_mask->ext, QPCEX_CONG_ALG_SUB_SEL);
	hr_reg_write(&context->ext, QPCEX_DIP_CTX_IDX_VLD, cong_field.dip_vld);
	hr_reg_write(context, QPC_CONGEST_ALGO_TMPL_ID,
		     hr_dev->congest_algo_tmpl_id +
		     hr_dev->caps.congest_type * HNS_ROCE_CONGEST_SIZE);
	hr_reg_clear(qpc_mask, QPC_CONGEST_ALGO_TMPL_ID);
	hr_reg_write(&context->ext, QPCEX_CONGEST_ALG_SEL,
		     congest_field.alg_sel);
	hr_reg_clear(&qpc_mask->ext, QPCEX_CONGEST_ALG_SEL);
	hr_reg_write(&context->ext, QPCEX_CONGEST_ALG_SUB_SEL,
		     congest_field.alg_sub_sel);
	hr_reg_clear(&qpc_mask->ext, QPCEX_CONGEST_ALG_SUB_SEL);
	hr_reg_write(&context->ext, QPCEX_DIP_CTX_IDX_VLD,
		     congest_field.dip_vld);
	hr_reg_clear(&qpc_mask->ext, QPCEX_DIP_CTX_IDX_VLD);
	hr_reg_write(&context->ext, QPCEX_SQ_RQ_NOT_FORBID_EN,
		     cong_field.wnd_mode_sel);
		     congest_field.wnd_mode_sel);
	hr_reg_clear(&qpc_mask->ext, QPCEX_SQ_RQ_NOT_FORBID_EN);

	/* if dip is disabled, there is no need to set dip idx */
	if (cong_field.dip_vld == 0)
	if (congest_field.dip_vld == 0)
		return 0;

	ret = get_dip_ctx_idx(ibqp, attr, &dip_idx);
	if (ret) {
		ibdev_err(ibdev, "failed to fill cong field, ret = %d.\n", ret);
		ibdev_err(ibdev, "failed to fill congest field, ret = %d.\n",
			  ret);
		return ret;
	}

@@ -5319,7 +5324,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
	hr_reg_write(context, QPC_HOPLIMIT, grh->hop_limit);
	hr_reg_clear(qpc_mask, QPC_HOPLIMIT);

	ret = fill_cong_field(ibqp, attr, context, qpc_mask);
	ret = fill_congest_field(ibqp, attr, context, qpc_mask);
	if (ret)
		return ret;

+6 −5
Original line number Diff line number Diff line
@@ -97,7 +97,7 @@ enum {

#define HNS_ROCE_CMQ_SCC_CLR_DONE_CNT		5

#define HNS_ROCE_CONG_SIZE 64
#define HNS_ROCE_CONGEST_SIZE 64

#define check_whether_last_step(hop_num, step_idx) \
	((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \
@@ -494,7 +494,7 @@ struct hns_roce_v2_qp_context {
#define QPC_RQ_TX_ERR QPC_FIELD_LOC(442, 442)
#define QPC_RQ_RX_ERR QPC_FIELD_LOC(443, 443)
#define QPC_LP_PKTN_INI QPC_FIELD_LOC(447, 444)
#define QPC_CONG_ALGO_TMPL_ID QPC_FIELD_LOC(455, 448)
#define QPC_CONGEST_ALGO_TMPL_ID QPC_FIELD_LOC(455, 448)
#define QPC_SCC_TOKEN QPC_FIELD_LOC(474, 456)
#define QPC_SQ_DB_DOING QPC_FIELD_LOC(475, 475)
#define QPC_RQ_DB_DOING QPC_FIELD_LOC(476, 476)
@@ -648,8 +648,8 @@ struct hns_roce_v2_qp_context {

#define QPCEX_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context_ex, h, l)

#define QPCEX_CONG_ALG_SEL QPCEX_FIELD_LOC(0, 0)
#define QPCEX_CONG_ALG_SUB_SEL QPCEX_FIELD_LOC(1, 1)
#define QPCEX_CONGEST_ALG_SEL QPCEX_FIELD_LOC(0, 0)
#define QPCEX_CONGEST_ALG_SUB_SEL QPCEX_FIELD_LOC(1, 1)
#define QPCEX_DIP_CTX_IDX_VLD QPCEX_FIELD_LOC(2, 2)
#define QPCEX_DIP_CTX_IDX QPCEX_FIELD_LOC(22, 3)
#define QPCEX_SQ_RQ_NOT_FORBID_EN QPCEX_FIELD_LOC(23, 23)
@@ -1235,12 +1235,13 @@ struct hns_roce_query_pf_caps_d {
#define PF_CAPS_D_RQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(21, 20)
#define PF_CAPS_D_EX_SGE_HOP_NUM PF_CAPS_D_FIELD_LOC(23, 22)
#define PF_CAPS_D_SQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(25, 24)
#define PF_CAPS_D_CONG_TYPE PF_CAPS_D_FIELD_LOC(29, 26)
#define PF_CAPS_D_CONGEST_TYPE PF_CAPS_D_FIELD_LOC(29, 26)
#define PF_CAPS_D_CEQ_DEPTH PF_CAPS_D_FIELD_LOC(85, 64)
#define PF_CAPS_D_NUM_CEQS PF_CAPS_D_FIELD_LOC(95, 86)
#define PF_CAPS_D_AEQ_DEPTH PF_CAPS_D_FIELD_LOC(117, 96)
#define PF_CAPS_D_AEQ_ARM_ST PF_CAPS_D_FIELD_LOC(119, 118)
#define PF_CAPS_D_CEQ_ARM_ST PF_CAPS_D_FIELD_LOC(121, 120)
#define PF_CAPS_D_DEFAULT_ALG PF_CAPS_D_FIELD_LOC(127, 122)
#define PF_CAPS_D_RSV_PDS PF_CAPS_D_FIELD_LOC(147, 128)
#define PF_CAPS_D_NUM_UARS PF_CAPS_D_FIELD_LOC(155, 148)
#define PF_CAPS_D_RSV_QPS PF_CAPS_D_FIELD_LOC(179, 160)
+4 −0
Original line number Diff line number Diff line
@@ -440,6 +440,10 @@ static void ucontext_set_resp(struct ib_ucontext *uctx,
	resp->srq_tab_size = hr_dev->caps.num_srqs;
	resp->cqe_size = hr_dev->caps.cqe_sz;
	resp->mac_type = hr_dev->mac_type;

	if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
		resp->congest_type = hr_dev->caps.congest_type;

	if (context->dca_ctx.dca_mmap_entry) {
		resp->dca_qps = context->dca_ctx.max_qps;
		resp->dca_mmap_size = PAGE_SIZE * context->dca_ctx.status_npage;
+46 −0
Original line number Diff line number Diff line
@@ -1072,6 +1072,48 @@ static void free_kernel_wrid(struct hns_roce_qp *hr_qp)
	kfree(hr_qp->sq.wrid);
}

static inline void default_congest_type(struct hns_roce_dev *hr_dev,
					struct hns_roce_qp *hr_qp)
{
	struct hns_roce_caps *caps = &hr_dev->caps;

	hr_qp->congest_type = 1 << caps->default_congest_type;
}

static int set_congest_type(struct hns_roce_qp *hr_qp,
			    struct hns_roce_ib_create_qp *ucmd)
{
	int ret = 0;

	if (ucmd->congest_type_flags & HNS_ROCE_CREATE_QP_FLAGS_DCQCN)
		hr_qp->congest_type = HNS_ROCE_CREATE_QP_FLAGS_DCQCN;
	else if (ucmd->congest_type_flags & HNS_ROCE_CREATE_QP_FLAGS_LDCP)
		hr_qp->congest_type = HNS_ROCE_CREATE_QP_FLAGS_LDCP;
	else if (ucmd->congest_type_flags & HNS_ROCE_CREATE_QP_FLAGS_HC3)
		hr_qp->congest_type = HNS_ROCE_CREATE_QP_FLAGS_HC3;
	else if (ucmd->congest_type_flags & HNS_ROCE_CREATE_QP_FLAGS_DIP)
		hr_qp->congest_type = HNS_ROCE_CREATE_QP_FLAGS_DIP;
	else
		ret = -EINVAL;

	return ret;
}

static void set_congest_param(struct hns_roce_dev *hr_dev,
			      struct hns_roce_qp *hr_qp,
			      struct hns_roce_ib_create_qp *ucmd)
{
	int ret;

	if (ucmd->comp_mask & HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE) {
		ret = set_congest_type(hr_qp, ucmd);
		if (ret == 0)
			return;
	}

	default_congest_type(hr_dev, hr_qp);
}

static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
			struct ib_qp_init_attr *init_attr,
			struct ib_udata *udata,
@@ -1096,6 +1138,9 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
		return ret;
	}

	if (init_attr->qp_type == IB_QPT_XRC_TGT)
		default_congest_type(hr_dev, hr_qp);

	if (udata) {
		ret = ib_copy_from_udata(ucmd, udata,
					 min(udata->inlen, sizeof(*ucmd)));
@@ -1113,6 +1158,7 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
		if (ret)
			ibdev_err(ibdev, "Failed to set user SQ size, ret = %d\n",
				  ret);
		set_congest_param(hr_dev, hr_qp, ucmd);
	} else {
		if (init_attr->create_flags &
		    IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
Loading