Unverified Commit 6ad884d5 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!14109 RDMA/hns: backport some bugfix from maillist linux

Merge Pull Request from: @ci-robot 
 
PR sync from: Chengchang Tang <tangchengchang@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/AZ5FFXINGXSCPF5QN437P4XOJSDHKUTQ/ 
From: Xinghai Cen <cenxinghai@h-partners.com>

[RDMA/hns] backport some bugfix from maillist linux:

Xinghai Cen (1):
  Revert "RDMA/hns: Fix an AEQE overflow error caused by untimely update
    of eq_db_ci"

wenglianfa (2):
  RDMA/hns: Fix flush cqe error when racing with destroy qp
  RDMA/hns: Fix an AEQE overflow error caused by untimely update of
    eq_db_ci


-- 
2.33.0
 
https://gitee.com/openeuler/kernel/issues/IB8NKF 
 
Link:https://gitee.com/openeuler/kernel/pulls/14109

 

Reviewed-by: default avatarChengchang Tang <tangchengchang@huawei.com>
Reviewed-by: default avatarLi Nan <linan122@huawei.com>
Signed-off-by: default avatarLi Nan <linan122@huawei.com>
parents 36ade961 9a72d9ef
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1540,6 +1540,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn);
void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index);
void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
+11 −1
Original line number Diff line number Diff line
@@ -6332,6 +6332,11 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
	unsigned long flags;
	int ret;
	/* Make sure flush_cqe() is completed */
	spin_lock_irqsave(&hr_qp->flush_lock, flags);
	set_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag);
	spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
	flush_work(&hr_qp->flush_work.work);

	if (hr_qp->congest_type == HNS_ROCE_CONGEST_TYPE_DIP)
		put_dip_ctx_idx(hr_dev, hr_qp);
@@ -6787,7 +6792,12 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
	case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
	case HNS_ROCE_EVENT_TYPE_COMM_EST:
	case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
	case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
	case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
	case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
	case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
	case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
	case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
		hns_roce_qp_event(hr_dev, queue_num, event_type);
		break;
	case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
@@ -6886,7 +6896,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
		case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
		case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
		case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
			hns_roce_qp_event(hr_dev, queue_num, event_type);
			hns_roce_flush_cqe(hr_dev, queue_num);
			break;
		case HNS_ROCE_EVENT_TYPE_MB:
			hns_roce_cmd_event(hr_dev,
+35 −19
Original line number Diff line number Diff line
@@ -39,6 +39,25 @@
#include "hns_roce_hem.h"
#include "hns_roce_dca.h"

static struct hns_roce_qp *hns_roce_qp_lookup(struct hns_roce_dev *hr_dev,
					     u32 qpn)
{
	struct device *dev = hr_dev->dev;
	struct hns_roce_qp *qp;
	unsigned long flags;

	xa_lock_irqsave(&hr_dev->qp_table_xa, flags);
	qp = __hns_roce_qp_lookup(hr_dev, qpn);
	if (qp)
		refcount_inc(&qp->refcount);
	xa_unlock_irqrestore(&hr_dev->qp_table_xa, flags);

	if (!qp)
		dev_warn(dev, "async event for bogus QP %08x\n", qpn);

	return qp;
}

static void flush_work_handle(struct work_struct *work)
{
	struct hns_roce_work *flush_work = container_of(work,
@@ -102,31 +121,28 @@ void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp)

void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
{
	struct device *dev = hr_dev->dev;
	struct hns_roce_qp *qp;

	xa_lock(&hr_dev->qp_table_xa);
	qp = __hns_roce_qp_lookup(hr_dev, qpn);
	if (qp)
		refcount_inc(&qp->refcount);
	xa_unlock(&hr_dev->qp_table_xa);

	if (!qp) {
		dev_warn(dev, "Async event for bogus QP %08x\n", qpn);
	qp = hns_roce_qp_lookup(hr_dev, qpn);
	if (!qp)
		return;
	}

	if (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR ||
	    event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR ||
	    event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR ||
	    event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION ||
	    event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH) {
		qp->state = IB_QPS_ERR;
	qp->event(qp, (enum hns_roce_event)event_type);

		flush_cqe(hr_dev, qp);
	if (refcount_dec_and_test(&qp->refcount))
		complete(&qp->free);
}

	qp->event(qp, (enum hns_roce_event)event_type);
void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn)
{
	struct hns_roce_qp *qp;

	qp = hns_roce_qp_lookup(hr_dev, qpn);
	if (!qp)
		return;

	qp->state = IB_QPS_ERR;
	flush_cqe(hr_dev, qp);

	if (refcount_dec_and_test(&qp->refcount))
		complete(&qp->free);