Commit 5da61758 authored by Feng Fang's avatar Feng Fang Committed by zhangyuyang
Browse files

RDMA/hns: Fix different dgids mapping to the same dip_idx

maillist inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/IB4OOG

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?id=faa62440a5772b40bb7d78bf9e29556a82ecf153



---------------------------------------------------------------------

DIP algorithm requires a one-to-one mapping between dgid and dip_idx.
Currently a queue 'spare_idx' is used to store QPN of QPs that use
DIP algorithm. For a new dgid, use a QPN from spare_idx as dip_idx.
This method lacks a mechanism for deduplicating QPN, which may result
in different dgids sharing the same dip_idx and break the one-to-one
mapping requirement.

This patch replaces spare_idx with xarray and introduces a refcnt of
a dip_idx to indicate the number of QPs that using this dip_idx.

The state machine for dip_idx management is implemented as:

* The entry at an index in xarray is empty -- This indicates that the
  corresponding dip_idx hasn't been created.

* The entry at an index in xarray is not empty but with 0 refcnt --
  This indicates that the corresponding dip_idx has been created but
  not used as dip_idx yet.

* The entry at an index in xarray is not empty and with non-0 refcnt --
  This indicates that the corresponding dip_idx is being used by refcnt
  number of DIP QPs.

Fixes: eb653eda ("RDMA/hns: Bugfix for incorrect association between dip_idx and dgid")
Fixes: f91696f2 ("RDMA/hns: Support congestion control type selection according to the FW")
Signed-off-by: default avatarFeng Fang <fangfeng4@huawei.com>
Signed-off-by: default avatarJunxian Huang <huangjunxian6@hisilicon.com>
Link: https://patch.msgid.link/20241112055553.3681129-1-huangjunxian6@hisilicon.com


Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
Signed-off-by: default avatarXinghai Cen <cenxinghai@h-partners.com>
parent 2b05c5bb
Loading
Loading
Loading
Loading
+2 −9
Original line number Diff line number Diff line
@@ -570,12 +570,6 @@ struct hns_roce_bank {
	u32 next; /* Next ID to allocate. */
};

struct hns_roce_idx_table {
	u32 *spare_idx;
	u32 head;
	u32 tail;
};

struct hns_roce_qp_table {
	struct hns_roce_hem_table	qp_table;
	struct hns_roce_hem_table	irrl_table;
@@ -584,7 +578,7 @@ struct hns_roce_qp_table {
	struct mutex			scc_mutex;
	struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM];
	struct mutex bank_mutex;
	struct hns_roce_idx_table	idx_table;
	struct xarray			dip_xa;
};

struct hns_roce_cq_table {
@@ -742,6 +736,7 @@ struct hns_roce_qp {
	bool			delayed_destroy_flag;
	struct hns_roce_mtr_node *mtr_node;
	spinlock_t flush_lock;
	struct hns_roce_dip *dip;
};

struct hns_roce_ib_iboe {
@@ -1102,8 +1097,6 @@ struct hns_roce_dev {
	enum hns_roce_device_state state;
	struct list_head	qp_list; /* list of all qps on this dev */
	spinlock_t		qp_list_lock; /* protect qp_list */
	struct list_head	dip_list; /* list of all dest ips on this dev */
	spinlock_t		dip_list_lock; /* protect dip_list */

	struct list_head        pgdir_list;
	struct mutex            pgdir_mutex;
+70 −26
Original line number Diff line number Diff line
@@ -2720,20 +2720,19 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev)
	free_link_table_buf(hr_dev, &priv->ext_llm);
}

static void free_dip_list(struct hns_roce_dev *hr_dev)
static void free_dip_entry(struct hns_roce_dev *hr_dev)
{
	struct hns_roce_dip *hr_dip;
	struct hns_roce_dip *tmp;
	unsigned long flags;
	unsigned long idx;

	spin_lock_irqsave(&hr_dev->dip_list_lock, flags);
	xa_lock(&hr_dev->qp_table.dip_xa);

	list_for_each_entry_safe(hr_dip, tmp, &hr_dev->dip_list, node) {
		list_del(&hr_dip->node);
	xa_for_each(&hr_dev->qp_table.dip_xa, idx, hr_dip) {
		__xa_erase(&hr_dev->qp_table.dip_xa, hr_dip->dip_idx);
		kfree(hr_dip);
	}

	spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
	xa_unlock(&hr_dev->qp_table.dip_xa);
}

static int hns_roce_v2_get_reset_page(struct hns_roce_dev *hr_dev)
@@ -3182,7 +3181,7 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
	hns_roce_v2_put_reset_page(hr_dev);

	if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP09)
		free_dip_list(hr_dev);
		free_dip_entry(hr_dev);
}

static inline void mbox_desc_init(struct hns_roce_post_mbox *mb,
@@ -5042,26 +5041,49 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, int attr_mask,
	return 0;
}

static int alloc_dip_entry(struct xarray *dip_xa, u32 qpn)
{
	struct hns_roce_dip *hr_dip;
	int ret;

	hr_dip = xa_load(dip_xa, qpn);
	if (hr_dip)
		return 0;

	hr_dip = kzalloc(sizeof(*hr_dip), GFP_KERNEL);
	if (!hr_dip)
		return -ENOMEM;

	ret = xa_err(xa_store(dip_xa, qpn, hr_dip, GFP_KERNEL));
	if (ret)
		kfree(hr_dip);

	return ret;
}

static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
			   u32 *dip_idx)
{
	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
	u32 *spare_idx = hr_dev->qp_table.idx_table.spare_idx;
	u32 *head =  &hr_dev->qp_table.idx_table.head;
	u32 *tail =  &hr_dev->qp_table.idx_table.tail;
	struct xarray *dip_xa = &hr_dev->qp_table.dip_xa;
	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
	struct hns_roce_dip *hr_dip;
	unsigned long flags;
	unsigned long idx;
	int ret = 0;

	spin_lock_irqsave(&hr_dev->dip_list_lock, flags);
	ret = alloc_dip_entry(dip_xa, ibqp->qp_num);
	if (ret)
		return ret;

	spare_idx[*tail] = ibqp->qp_num;
	*tail = (*tail == hr_dev->caps.num_qps - 1) ? 0 : (*tail + 1);
	xa_lock(dip_xa);

	list_for_each_entry(hr_dip, &hr_dev->dip_list, node) {
		if (!memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) {
	xa_for_each(dip_xa, idx, hr_dip) {
		if (hr_dip->qp_cnt &&
		    !memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) {
			*dip_idx = hr_dip->dip_idx;
			hr_dip->qp_cnt++;
			hr_qp->dip = hr_dip;
			goto out;
		}
	}
@@ -5069,19 +5091,24 @@ static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
	/* If no dgid is found, a new dip and a mapping between dgid and
	 * dip_idx will be created.
	 */
	hr_dip = kzalloc(sizeof(*hr_dip), GFP_ATOMIC);
	if (!hr_dip) {
		ret = -ENOMEM;
		goto out;
	}
	xa_for_each(dip_xa, idx, hr_dip) {
		if (hr_dip->qp_cnt)
			continue;

		*dip_idx = idx;
		memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
	hr_dip->dip_idx = *dip_idx = spare_idx[*head];
	*head = (*head == hr_dev->caps.num_qps - 1) ? 0 : (*head + 1);
	list_add_tail(&hr_dip->node, &hr_dev->dip_list);
		hr_dip->dip_idx = idx;
		hr_dip->qp_cnt++;
		hr_qp->dip = hr_dip;
		break;
	}

	/* This should never happen. */
	if (WARN_ON_ONCE(!hr_qp->dip))
		ret = -ENOSPC;

out:
	spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
	xa_unlock(dip_xa);
	return ret;
}

@@ -6005,6 +6032,20 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
	return ret;
}

static void put_dip_ctx_idx(struct hns_roce_dev *hr_dev,
			    struct hns_roce_qp *hr_qp)
{
	struct hns_roce_dip *hr_dip = hr_qp->dip;

	xa_lock(&hr_dev->qp_table.dip_xa);

	hr_dip->qp_cnt--;
	if (!hr_dip->qp_cnt)
		memset(hr_dip->dgid, 0, GID_LEN_V2);

	xa_unlock(&hr_dev->qp_table.dip_xa);
}

int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
@@ -6018,6 +6059,9 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
	spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
	flush_work(&hr_qp->flush_work.work);

	if (hr_qp->cong_type == CONG_TYPE_DIP)
		put_dip_ctx_idx(hr_dev, hr_qp);

	ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata);
	if (ret)
		ibdev_err_ratelimited(&hr_dev->ib_dev,
+1 −1
Original line number Diff line number Diff line
@@ -1366,7 +1366,7 @@ struct hns_roce_v2_priv {
struct hns_roce_dip {
	u8 dgid[GID_LEN_V2];
	u32 dip_idx;
	struct list_head node; /* all dips are on a list */
	u32 qp_cnt;
};

struct fmea_ram_ecc {
+0 −2
Original line number Diff line number Diff line
@@ -1250,8 +1250,6 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)

	INIT_LIST_HEAD(&hr_dev->qp_list);
	spin_lock_init(&hr_dev->qp_list_lock);
	INIT_LIST_HEAD(&hr_dev->dip_list);
	spin_lock_init(&hr_dev->dip_list_lock);

	INIT_LIST_HEAD(&hr_dev->uctx_list);
	mutex_init(&hr_dev->uctx_list_mutex);
+2 −6
Original line number Diff line number Diff line
@@ -1701,14 +1701,10 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
	unsigned int reserved_from_bot;
	unsigned int i;

	qp_table->idx_table.spare_idx = kcalloc(hr_dev->caps.num_qps,
					sizeof(u32), GFP_KERNEL);
	if (!qp_table->idx_table.spare_idx)
		return -ENOMEM;

	mutex_init(&qp_table->scc_mutex);
	mutex_init(&qp_table->bank_mutex);
	xa_init(&hr_dev->qp_table_xa);
	xa_init(&qp_table->dip_xa);

	reserved_from_bot = hr_dev->caps.reserved_qps;

@@ -1733,7 +1729,7 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)

	for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++)
		ida_destroy(&hr_dev->qp_table.bank[i].ida);
	xa_destroy(&hr_dev->qp_table.dip_xa);
	mutex_destroy(&hr_dev->qp_table.bank_mutex);
	mutex_destroy(&hr_dev->qp_table.scc_mutex);
	kfree(hr_dev->qp_table.idx_table.spare_idx);
}