Unverified Commit eee89848 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!13044 Some patches of RDMA/hns from Linux to OLK-6.6

Merge Pull Request from: @ci-robot 
 
PR sync from: Chengchang Tang <tangchengchang@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/33LMZO36ZOPABOQTZHSZYQQFBG3SAEIS/ 
From: Xinghai Cen <cenxinghai@h-partners.com>

Some patches of RDMA/hns from Linux to OLK-6.6

Junxian Huang (1):
  RDMA/hns: Use dev_* printings in hem code instead of ibdev_*

Xinghai Cen (2):
  Revert "RDMA/hns: Fix missing spin_lock_init() for qp flush lock"
  Revert "RDMA/hns: Fix flush cqe error when racing with destroy qp"

Yuyu Li (1):
  RDMA/hns: Modify debugfs name

wenglianfa (3):
  RDMA/hns: Fix an AEQE overflow error caused by untimely update of
    eq_db_ci
  RDMA/hns: Fix flush cqe error when racing with destroy qp
  RDMA/hns: Fix cpu stuck caused by printings during reset


-- 
2.33.0
 
https://gitee.com/openeuler/kernel/issues/IB1I99 
 
Link:https://gitee.com/openeuler/kernel/pulls/13044

 

Reviewed-by: default avatarChengchang Tang <tangchengchang@huawei.com>
Signed-off-by: default avatarZhang Peng <zhangpeng362@huawei.com>
parents be251396 a2393d42
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -179,9 +179,9 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
	ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC,
				      hr_cq->cqn);
	if (ret)
		dev_err_ratelimited(dev,
				    "DESTROY_CQ failed (%d) for CQN %06lx\n",
		dev_err_ratelimited(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n",
				    ret, hr_cq->cqn);

	if (ret == -EBUSY)
		hr_cq->delayed_destroy_flag = true;

+2 −1
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@

#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/pci.h>

#include "hns_roce_device.h"
#include "hns_roce_common.h"
@@ -489,7 +490,7 @@ void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev)
{
	struct hns_roce_dev_debugfs *dbgfs = &hr_dev->dbgfs;

	dbgfs->root = debugfs_create_dir(dev_name(&hr_dev->ib_dev.dev),
	dbgfs->root = debugfs_create_dir(pci_name(hr_dev->pci_dev),
					 hns_roce_dbgfs_root);

	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DCA_MODE)
+1 −0
Original line number Diff line number Diff line
@@ -1445,6 +1445,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn);
void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
int hns_roce_init(struct hns_roce_dev *hr_dev);
+22 −22
Original line number Diff line number Diff line
@@ -300,7 +300,7 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev,
			   struct hns_roce_hem_mhop *mhop,
			   struct hns_roce_hem_index *index)
{
	struct ib_device *ibdev = &hr_dev->ib_dev;
	struct device *dev = hr_dev->dev;
	unsigned long mhop_obj = obj;
	u32 l0_idx, l1_idx, l2_idx;
	u32 chunk_ba_num;
@@ -331,13 +331,13 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev,
		index->buf = l0_idx;
		break;
	default:
		ibdev_err(ibdev, "table %u not support mhop.hop_num = %u!\n",
		dev_err(dev, "table %u not support mhop.hop_num = %u!\n",
			table->type, mhop->hop_num);
		return -EINVAL;
	}

	if (unlikely(index->buf >= table->num_hem)) {
		ibdev_err(ibdev, "table %u exceed hem limt idx %llu, max %lu!\n",
		dev_err(dev, "table %u exceed hem limt idx %llu, max %lu!\n",
			table->type, index->buf, table->num_hem);
		return -EINVAL;
	}
@@ -448,14 +448,14 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev,
			struct hns_roce_hem_mhop *mhop,
			struct hns_roce_hem_index *index)
{
	struct ib_device *ibdev = &hr_dev->ib_dev;
	struct device *dev = hr_dev->dev;
	u32 step_idx;
	int ret = 0;

	if (index->inited & HEM_INDEX_L0) {
		ret = hr_dev->hw->set_hem(hr_dev, table, obj, 0);
		if (ret) {
			ibdev_err(ibdev, "set HEM step 0 failed!\n");
			dev_err(dev, "set HEM step 0 failed!\n");
			goto out;
		}
	}
@@ -463,7 +463,7 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev,
	if (index->inited & HEM_INDEX_L1) {
		ret = hr_dev->hw->set_hem(hr_dev, table, obj, 1);
		if (ret) {
			ibdev_err(ibdev, "set HEM step 1 failed!\n");
			dev_err(dev, "set HEM step 1 failed!\n");
			goto out;
		}
	}
@@ -475,7 +475,7 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev,
			step_idx = mhop->hop_num;
		ret = hr_dev->hw->set_hem(hr_dev, table, obj, step_idx);
		if (ret)
			ibdev_err(ibdev, "set HEM step last failed!\n");
			dev_err(dev, "set HEM step last failed!\n");
	}
out:
	return ret;
@@ -485,14 +485,14 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
				   struct hns_roce_hem_table *table,
				   unsigned long obj)
{
	struct ib_device *ibdev = &hr_dev->ib_dev;
	struct hns_roce_hem_index index = {};
	struct hns_roce_hem_mhop mhop = {};
	struct device *dev = hr_dev->dev;
	int ret;

	ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
	if (ret) {
		ibdev_err(ibdev, "calc hem config failed!\n");
		dev_err(dev, "calc hem config failed!\n");
		return ret;
	}

@@ -504,7 +504,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,

	ret = alloc_mhop_hem(hr_dev, table, &mhop, &index);
	if (ret) {
		ibdev_err(ibdev, "alloc mhop hem failed!\n");
		dev_err(dev, "alloc mhop hem failed!\n");
		goto out;
	}

@@ -512,7 +512,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
	if (table->type < HEM_TYPE_MTT) {
		ret = set_mhop_hem(hr_dev, table, obj, &mhop, &index);
		if (ret) {
			ibdev_err(ibdev, "set HEM address to HW failed!\n");
			dev_err(dev, "set HEM address to HW failed!\n");
			goto err_alloc;
		}
	}
@@ -575,7 +575,7 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev,
			   struct hns_roce_hem_mhop *mhop,
			   struct hns_roce_hem_index *index)
{
	struct ib_device *ibdev = &hr_dev->ib_dev;
	struct device *dev = hr_dev->dev;
	u32 hop_num = mhop->hop_num;
	u32 chunk_ba_num;
	u32 step_idx;
@@ -605,20 +605,20 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev,

		ret = hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx);
		if (ret)
			ibdev_warn(ibdev, "failed to clear hop%u HEM, ret = %d.\n",
			dev_warn(dev, "failed to clear hop%u HEM, ret = %d.\n",
				 hop_num, ret);

		if (index->inited & HEM_INDEX_L1) {
			ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 1);
			if (ret)
				ibdev_warn(ibdev, "failed to clear HEM step 1, ret = %d.\n",
				dev_warn(dev, "failed to clear HEM step 1, ret = %d.\n",
					 ret);
		}

		if (index->inited & HEM_INDEX_L0) {
			ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 0);
			if (ret)
				ibdev_warn(ibdev, "failed to clear HEM step 0, ret = %d.\n",
				dev_warn(dev, "failed to clear HEM step 0, ret = %d.\n",
					 ret);
		}
	}
@@ -629,14 +629,14 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
				    unsigned long obj,
				    int check_refcount)
{
	struct ib_device *ibdev = &hr_dev->ib_dev;
	struct hns_roce_hem_index index = {};
	struct hns_roce_hem_mhop mhop = {};
	struct device *dev = hr_dev->dev;
	int ret;

	ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
	if (ret) {
		ibdev_err(ibdev, "calc hem config failed!\n");
		dev_err(dev, "calc hem config failed!\n");
		return;
	}

+57 −43
Original line number Diff line number Diff line
@@ -451,17 +451,10 @@ static int check_send_valid(struct hns_roce_dev *hr_dev,

	if (unlikely(hr_qp->state == IB_QPS_RESET ||
		     hr_qp->state == IB_QPS_INIT ||
		     hr_qp->state == IB_QPS_RTR)) {
		ibdev_err_ratelimited(ibdev,
				      "failed to post WQE, QP state %u!\n",
				      hr_qp->state);
		     hr_qp->state == IB_QPS_RTR))
		return -EINVAL;
	} else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) {
		ibdev_err_ratelimited(ibdev,
				      "failed to post WQE, dev state %d!\n",
				      hr_dev->state);
	else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN))
		return -EIO;
	}

	if (check_dca_attach_enable(hr_qp)) {
		ret = dca_attach_qp_buf(hr_dev, hr_qp);
@@ -2972,7 +2965,7 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
	ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
				    IB_QPS_INIT, NULL);
	if (ret) {
		ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n",
		ibdev_err_ratelimited(ibdev, "failed to modify qp to init, ret = %d.\n",
				      ret);
		return ret;
	}
@@ -3658,8 +3651,7 @@ static int free_mr_post_send_lp_wqe(struct hns_roce_qp *hr_qp)

	ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr);
	if (ret) {
		ibdev_err_ratelimited(ibdev,
			"failed to post wqe for free mr, ret = %d.\n",
		ibdev_err_ratelimited(ibdev, "failed to post wqe for free mr, ret = %d.\n",
			ret);
		return ret;
	}
@@ -5398,11 +5390,8 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
	int ret = 0;

	if (!check_qp_state(cur_state, new_state)) {
		ibdev_err_ratelimited(&hr_dev->ib_dev,
				      "Illegal state for QP!\n");
	if (!check_qp_state(cur_state, new_state))
		return -EINVAL;
	}

	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
		memset(qpc_mask, 0, hr_dev->caps.qpc_sz);
@@ -6023,7 +6012,7 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
	unsigned long flags;
	int ret;

	/* Make sure flush_cqe() is complete */
	/* Make sure flush_cqe() is completed */
	spin_lock_irqsave(&hr_qp->flush_lock, flags);
	set_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag);
	spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
@@ -6399,11 +6388,10 @@ static int hns_roce_v2_query_mpt(struct hns_roce_dev *hr_dev, u32 key,
	return ret;
}

static void hns_roce_irq_work_handle(struct work_struct *work)
static void dump_aeqe_log(struct hns_roce_work *irq_work)
{
	struct hns_roce_work *irq_work =
				container_of(work, struct hns_roce_work, work);
	struct ib_device *ibdev = &irq_work->hr_dev->ib_dev;
	struct hns_roce_dev *hr_dev = irq_work->hr_dev;
	struct ib_device *ibdev = &hr_dev->ib_dev;

	switch (irq_work->event_type) {
	case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@@ -6447,6 +6435,8 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
	case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
		ibdev_warn(ibdev, "DB overflow.\n");
		break;
	case HNS_ROCE_EVENT_TYPE_MB:
		break;
	case HNS_ROCE_EVENT_TYPE_FLR:
		ibdev_warn(ibdev, "function level reset.\n");
		break;
@@ -6456,10 +6446,48 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
	case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
		ibdev_err(ibdev, "invalid xrceth error.\n");
		break;
	default:
		ibdev_info(ibdev, "Undefined event %d.\n",
			   irq_work->event_type);
		break;
	}
}

static void hns_roce_irq_work_handle(struct work_struct *work)
{
	struct hns_roce_work *irq_work =
				container_of(work, struct hns_roce_work, work);
	struct hns_roce_dev *hr_dev = irq_work->hr_dev;
	int event_type = irq_work->event_type;
	u32 queue_num = irq_work->queue_num;

	switch (event_type) {
	case HNS_ROCE_EVENT_TYPE_PATH_MIG:
	case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
	case HNS_ROCE_EVENT_TYPE_COMM_EST:
	case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
	case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
	case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
	case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
	case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
	case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
	case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
		hns_roce_qp_event(hr_dev, queue_num, event_type);
		break;
	case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
	case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
		hns_roce_srq_event(hr_dev, queue_num, event_type);
		break;
	case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
	case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
		hns_roce_cq_event(hr_dev, queue_num, event_type);
		break;
	default:
		break;
	}

	dump_aeqe_log(irq_work);

	kfree(irq_work);
}

@@ -6519,14 +6547,14 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
				       struct hns_roce_eq *eq)
{
	struct device *dev = hr_dev->dev;
	struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq);
	irqreturn_t aeqe_found = IRQ_NONE;
	int num_aeqes = 0;
	int event_type;
	u32 queue_num;
	int sub_type;

	while (aeqe) {
	while (aeqe && num_aeqes < HNS_AEQ_POLLING_BUDGET) {
		/* Make sure we read AEQ entry after we have checked the
		 * ownership bit
		 */
@@ -6537,25 +6565,12 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
		queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM);

		switch (event_type) {
		case HNS_ROCE_EVENT_TYPE_PATH_MIG:
		case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
		case HNS_ROCE_EVENT_TYPE_COMM_EST:
		case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
		case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
		case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
		case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
		case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
		case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
		case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
			hns_roce_qp_event(hr_dev, queue_num, event_type);
			break;
		case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
		case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
			hns_roce_srq_event(hr_dev, queue_num, event_type);
			break;
		case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
		case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
			hns_roce_cq_event(hr_dev, queue_num, event_type);
			hns_roce_flush_cqe(hr_dev, queue_num);
			break;
		case HNS_ROCE_EVENT_TYPE_MB:
			hns_roce_cmd_event(hr_dev,
@@ -6563,12 +6578,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
					aeqe->event.cmd.status,
					le64_to_cpu(aeqe->event.cmd.out_param));
			break;
		case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
		case HNS_ROCE_EVENT_TYPE_FLR:
			break;
		default:
			dev_err(dev, "unhandled event %d on EQ %d at idx %u.\n",
				event_type, eq->eqn, eq->cons_index);
			break;
		}

@@ -6582,6 +6592,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
		hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);

		aeqe = next_aeqe_sw_v2(eq);
		++num_aeqes;
	}

	update_eq_db(eq);
@@ -7129,6 +7140,9 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
	int ret;
	int i;

	if (hr_dev->caps.aeqe_depth < HNS_AEQ_POLLING_BUDGET)
		return -EINVAL;

	other_num = hr_dev->caps.num_other_vectors;
	comp_num = hr_dev->caps.num_comp_vectors;
	aeq_num = hr_dev->caps.num_aeq_vectors;
Loading