Unverified Commit cc58681d authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!427 Support the feature of querying stats

Merge Pull Request from: @stinft 
 
Feature information:
1.RDMA/hns: Support hns HW stats
Support query hns HW stats to help debugging several issues.
2.RDMA/hns: Add dfx cnt stats
Add more dfx cnt to help diagnosis. And this stats could be got by
sysfs or rdmatool.
bugzilla:https://gitee.com/openeuler/kernel/issues/I6GSZL 
 
Link:https://gitee.com/openeuler/kernel/pulls/427

 

Reviewed-by: default avatarYue Haibing <yuehaibing@huawei.com>
Reviewed-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parents 1c8d3dec d5a4ca75
Loading
Loading
Loading
Loading
+6 −2
Original line number Diff line number Diff line
@@ -85,7 +85,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
		ret = 0;

	if (ret && grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
		return ret;
		goto err_out;

	if (tc_mode == HNAE3_TC_MAP_MODE_DSCP &&
	    grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
@@ -101,7 +101,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
		ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr,
					      &ah->av.vlan_id, NULL);
		if (ret)
			return ret;
			goto err_out;

		ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID;
	}
@@ -113,6 +113,10 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
				       min(udata->outlen, sizeof(resp)));
	}

err_out:
	if (ret)
		atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AH_CREATE_ERR_CNT]);

	return ret;
}

+15 −2
Original line number Diff line number Diff line
@@ -41,7 +41,14 @@
static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev,
				     struct hns_roce_mbox_msg *mbox_msg)
{
	return hr_dev->hw->post_mbox(hr_dev, mbox_msg);
	int ret;

	ret = hr_dev->hw->post_mbox(hr_dev, mbox_msg);
	if (ret)
		return ret;

	atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_POSTED_CNT]);
	return 0;
}

/* this should be called with "poll_sem" */
@@ -58,7 +65,12 @@ static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev,
		return ret;
	}

	return hr_dev->hw->poll_mbox_done(hr_dev);
	ret = hr_dev->hw->poll_mbox_done(hr_dev);
	if (ret)
		return ret;

	atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_POLLED_CNT]);
	return 0;
}

static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev,
@@ -89,6 +101,7 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
	context->result = (status == HNS_ROCE_CMD_SUCCESS) ? 0 : (-EIO);
	context->out_param = out_param;
	complete(&context->done);
	atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_EVENT_CNT]);
}

static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev,
+11 −6
Original line number Diff line number Diff line
@@ -363,17 +363,19 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
	struct hns_roce_ib_create_cq ucmd = {};
	int ret;

	if (attr->flags)
		return -EOPNOTSUPP;
	if (attr->flags) {
		ret = -EOPNOTSUPP;
		goto err_out;
	}

	ret = verify_cq_create_attr(hr_dev, attr);
	if (ret)
		return ret;
		goto err_out;

	if (udata) {
		ret = get_cq_ucmd(hr_cq, udata, &ucmd);
		if (ret)
			return ret;
			goto err_out;

	}

@@ -381,12 +383,12 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,

	ret = set_cqe_size(hr_cq, udata, &ucmd);
	if (ret)
		return ret;
		goto err_out;

	ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
	if (ret) {
		ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret);
		return ret;
		goto err_out;
	}

	ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp);
@@ -431,6 +433,9 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
	free_cq_db(hr_dev, hr_cq, udata);
err_cq_buf:
	free_cq_buf(hr_dev, hr_cq);
err_out:
	atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_CREATE_ERR_CNT]);

	return ret;
}

+50 −0
Original line number Diff line number Diff line
@@ -914,6 +914,53 @@ enum hns_roce_device_state {
	HNS_ROCE_DEVICE_STATE_UNINIT,
};

enum hns_roce_hw_pkt_stat_index {
	HNS_ROCE_HW_RX_RC_PKT_CNT,
	HNS_ROCE_HW_RX_UC_PKT_CNT,
	HNS_ROCE_HW_RX_UD_PKT_CNT,
	HNS_ROCE_HW_RX_XRC_PKT_CNT,
	HNS_ROCE_HW_RX_PKT_CNT,
	HNS_ROCE_HW_RX_ERR_PKT_CNT,
	HNS_ROCE_HW_RX_CNP_PKT_CNT,
	HNS_ROCE_HW_TX_RC_PKT_CNT,
	HNS_ROCE_HW_TX_UC_PKT_CNT,
	HNS_ROCE_HW_TX_UD_PKT_CNT,
	HNS_ROCE_HW_TX_XRC_PKT_CNT,
	HNS_ROCE_HW_TX_PKT_CNT,
	HNS_ROCE_HW_TX_ERR_PKT_CNT,
	HNS_ROCE_HW_TX_CNP_PKT_CNT,
	HNS_ROCE_HW_TRP_GET_MPT_ERR_PKT_CNT,
	HNS_ROCE_HW_TRP_GET_IRRL_ERR_PKT_CNT,
	HNS_ROCE_HW_ECN_DB_CNT,
	HNS_ROCE_HW_RX_BUF_CNT,
	HNS_ROCE_HW_TRP_RX_SOF_CNT,
	HNS_ROCE_HW_CQ_CQE_CNT,
	HNS_ROCE_HW_CQ_POE_CNT,
	HNS_ROCE_HW_CQ_NOTIFY_CNT,
	HNS_ROCE_HW_CNT_TOTAL,
};

enum hns_roce_hw_dfx_stat_index {
	HNS_ROCE_DFX_AEQE_CNT,
	HNS_ROCE_DFX_CEQE_CNT,
	HNS_ROCE_DFX_CMDS_CNT,
	HNS_ROCE_DFX_CMDS_ERR_CNT,
	HNS_ROCE_DFX_MBX_POSTED_CNT,
	HNS_ROCE_DFX_MBX_POLLED_CNT,
	HNS_ROCE_DFX_MBX_EVENT_CNT,
	HNS_ROCE_DFX_QP_CREATE_ERR_CNT,
	HNS_ROCE_DFX_QP_MODIFY_ERR_CNT,
	HNS_ROCE_DFX_CQ_CREATE_ERR_CNT,
	HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT,
	HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT,
	HNS_ROCE_DFX_MR_REG_ERR_CNT,
	HNS_ROCE_DFX_MR_REREG_ERR_CNT,
	HNS_ROCE_DFX_AH_CREATE_ERR_CNT,
	HNS_ROCE_DFX_MMAP_ERR_CNT,
	HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT,
	HNS_ROCE_DFX_CNT_TOTAL
};

struct hns_roce_hw {
	int (*cmq_init)(struct hns_roce_dev *hr_dev);
	void (*cmq_exit)(struct hns_roce_dev *hr_dev);
@@ -962,6 +1009,8 @@ struct hns_roce_hw {
	int (*query_mpt)(struct hns_roce_dev *hr_dev, u32 key, void *buffer);
	int (*get_dscp)(struct hns_roce_dev *hr_dev, u8 dscp,
			u8 *tc_mode, u8 *priority);
	int (*query_hw_counter)(struct hns_roce_dev *hr_dev,
				u64 *stats, u32 port, int *hw_counters);
	const struct ib_device_ops *hns_roce_dev_ops;
	const struct ib_device_ops *hns_roce_dev_srq_ops;
	int (*bond_init)(struct hns_roce_dev *hr_dev);
@@ -1045,6 +1094,7 @@ struct hns_roce_dev {
	struct delayed_work bond_work;
	struct hns_roce_bond_group *bond_grp;
	struct netdev_lag_lower_state_info slave_state;
	atomic64_t *dfx_cnt;
};

static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
+58 −0
Original line number Diff line number Diff line
@@ -1396,6 +1396,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
	/* Write to hardware */
	roce_write(hr_dev, ROCEE_TX_CMQ_PI_REG, csq->head);

	atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]);

	do {
		if (hns_roce_cmq_csq_done(hr_dev))
			break;
@@ -1433,6 +1435,9 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,

	spin_unlock_bh(&csq->lock);

	if (ret)
		atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]);

	return ret;
}

@@ -1772,6 +1777,56 @@ static int hns_roce_query_func_info(struct hns_roce_dev *hr_dev)
	return 0;
}

static int hns_roce_hw_v2_query_counter(struct hns_roce_dev *hr_dev,
					u64 *stats, u32 port, int *num_counters)
{
#define CNT_PER_DESC 3
	struct hns_roce_cmq_desc *desc;
	int bd_idx, cnt_idx;
	__le64 *cnt_data;
	int desc_num;
	int ret;
	int i;

	if (port > hr_dev->caps.num_ports)
		return -EINVAL;

	desc_num = DIV_ROUND_UP(HNS_ROCE_HW_CNT_TOTAL, CNT_PER_DESC);
	desc = kcalloc(desc_num, sizeof(*desc), GFP_KERNEL);
	if (!desc)
		return -ENOMEM;

	for (i = 0; i < desc_num; i++) {
		hns_roce_cmq_setup_basic_desc(&desc[i],
					      HNS_ROCE_OPC_QUERY_COUNTER, true);
		if (i != desc_num - 1)
			desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
	}

	ret = hns_roce_cmq_send(hr_dev, desc, desc_num);
	if (ret) {
		ibdev_err(&hr_dev->ib_dev,
			  "failed to get counter, ret = %d.\n", ret);
		goto err_out;
	}

	for (i = 0; i < HNS_ROCE_HW_CNT_TOTAL && i < *num_counters; i++) {
		bd_idx = i / CNT_PER_DESC;
		if (!(desc[bd_idx].flag & HNS_ROCE_CMD_FLAG_NEXT) &&
		    bd_idx != HNS_ROCE_HW_CNT_TOTAL / CNT_PER_DESC)
			break;

		cnt_data = (__le64 *)&desc[bd_idx].data[0];
		cnt_idx = i % CNT_PER_DESC;
		stats[i] = le64_to_cpu(cnt_data[cnt_idx]);
	}
	*num_counters = i;

err_out:
	kfree(desc);
	return ret;
}

static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
{
	struct hns_roce_cmq_desc desc;
@@ -6465,6 +6520,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
		eq->sub_type = sub_type;
		++eq->cons_index;
		aeqe_found = IRQ_HANDLED;
		atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AEQE_CNT]);

		hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);

@@ -6507,6 +6563,7 @@ static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,

		++eq->cons_index;
		ceqe_found = IRQ_HANDLED;
		atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]);

		ceqe = next_ceqe_sw_v2(eq);
	}
@@ -7140,6 +7197,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
	.bond_init = hns_roce_bond_init,
	.bond_is_active = hns_roce_bond_is_active,
	.get_bond_netdev = hns_roce_get_bond_netdev,
	.query_hw_counter = hns_roce_hw_v2_query_counter,
};

static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
Loading