Commit a2393d42 authored by wenglianfa's avatar wenglianfa Committed by Chengchang Tang
Browse files

RDMA/hns: Fix cpu stuck caused by printings during reset

maillist inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/IB1I99
CVE: NA

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?id=323275ac2ff15b2b7b3eac391ae5d8c5a3c3a999



----------------------------------------------------------------------

During reset, cmd to destroy resources such as qp, cq, and mr may fail,
and error logs will be printed. When a large number of resources are
destroyed, there will be lots of printings, and it may lead to a cpu
stuck.

Delete some unnecessary printings and replace other printing functions
in these paths with the ratelimited version.

Fixes: 9a443537 ("IB/hns: Add driver files for hns RoCE driver")
Fixes: c7bcb134 ("RDMA/hns: Add SRQ support for hip08 kernel mode")
Fixes: 70f92521 ("RDMA/hns: Use the reserved loopback QPs to free MR before destroying MPT")
Fixes: 926a01dc ("RDMA/hns: Add QP operations support for hip08 SoC")
Signed-off-by: default avatarwenglianfa <wenglianfa@huawei.com>
Signed-off-by: default avatarJunxian Huang <huangjunxian6@hisilicon.com>
Link: https://patch.msgid.link/20241024124000.2931869-6-huangjunxian6@hisilicon.com


Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
Signed-off-by: default avatarXinghai Cen <cenxinghai@h-partners.com>
parent 09017684
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -179,9 +179,9 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
	ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC,
				      hr_cq->cqn);
	if (ret)
		dev_err_ratelimited(dev,
				    "DESTROY_CQ failed (%d) for CQN %06lx\n",
		dev_err_ratelimited(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n",
				    ret, hr_cq->cqn);

	if (ret == -EBUSY)
		hr_cq->delayed_destroy_flag = true;

+6 −17
Original line number Diff line number Diff line
@@ -451,17 +451,10 @@ static int check_send_valid(struct hns_roce_dev *hr_dev,

	if (unlikely(hr_qp->state == IB_QPS_RESET ||
		     hr_qp->state == IB_QPS_INIT ||
		     hr_qp->state == IB_QPS_RTR)) {
		ibdev_err_ratelimited(ibdev,
				      "failed to post WQE, QP state %u!\n",
				      hr_qp->state);
		     hr_qp->state == IB_QPS_RTR))
		return -EINVAL;
	} else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) {
		ibdev_err_ratelimited(ibdev,
				      "failed to post WQE, dev state %d!\n",
				      hr_dev->state);
	else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN))
		return -EIO;
	}

	if (check_dca_attach_enable(hr_qp)) {
		ret = dca_attach_qp_buf(hr_dev, hr_qp);
@@ -2972,7 +2965,7 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
	ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
				    IB_QPS_INIT, NULL);
	if (ret) {
		ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n",
		ibdev_err_ratelimited(ibdev, "failed to modify qp to init, ret = %d.\n",
				      ret);
		return ret;
	}
@@ -3658,8 +3651,7 @@ static int free_mr_post_send_lp_wqe(struct hns_roce_qp *hr_qp)

	ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr);
	if (ret) {
		ibdev_err_ratelimited(ibdev,
			"failed to post wqe for free mr, ret = %d.\n",
		ibdev_err_ratelimited(ibdev, "failed to post wqe for free mr, ret = %d.\n",
			ret);
		return ret;
	}
@@ -5398,11 +5390,8 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
	int ret = 0;

	if (!check_qp_state(cur_state, new_state)) {
		ibdev_err_ratelimited(&hr_dev->ib_dev,
				      "Illegal state for QP!\n");
	if (!check_qp_state(cur_state, new_state))
		return -EINVAL;
	}

	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
		memset(qpc_mask, 0, hr_dev->caps.qpc_sz);