Commit 431c875e authored by wenglianfa's avatar wenglianfa Committed by Juan Zhou
Browse files

RDMA/hns: Fix simultaneous reset and resource deregistration

driver inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I87LFL



--------------------------------------------------------------------------

In the current solution, the pseudo WC enables the user-mode
to detect the device error in advance and releases context
resources. As a result, there is a high probability that
hardware reset and context resource release occur at the same
time. During the hardware reset, the MBOX cannot instruct the
hardware to stop accessing the memory, but the corresponding
resources are released during the reset. The hardware is
unaware that the driver has freed resources. Therefore, the
remaining tasks of the hardware access invalid memory, and
the RAS alarm is reported.

If the driver detects above scenario, the driver will not
release the resources.Instead, record it in a linked list.
Wait for the roce driver to uninstall before releasing it.
In this way, the hardware does not access the invalid memory,
and the driver does not cause memory leakage.

Fixes: 306b8c76 ("RDMA/hns: Do not destroy QP resources in the hw resetting phase")
Signed-off-by: default avatarwenglianfa <wenglianfa@huawei.com>
parent 1bc1f93a
Loading
Loading
Loading
Loading
+22 −4
Original line number Diff line number Diff line
@@ -182,6 +182,8 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
	if (ret)
		dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret,
			hr_cq->cqn);
	if (ret == -EBUSY)
		hr_cq->delayed_destroy_flag = true;

	xa_erase(&cq_table->array, hr_cq->cqn);

@@ -193,6 +195,10 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
		complete(&hr_cq->free);
	wait_for_completion(&hr_cq->free);

	/* this resource will be freed when the driver is uninstalled, so
	 * no memory leak will occur.
	 */
	if (!hr_cq->delayed_destroy_flag)
		hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
}

@@ -203,6 +209,10 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
	struct hns_roce_buf_attr buf_attr = {};
	int ret;

	hr_cq->mtr_node = kvmalloc(sizeof(*hr_cq->mtr_node), GFP_KERNEL);
	if (!hr_cq->mtr_node)
		return -ENOMEM;

	buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + PAGE_SHIFT;
	buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size;
	buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num;
@@ -211,15 +221,22 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
	ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr,
				  hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT,
				  udata, addr);
	if (ret)
	if (ret) {
		ibdev_err(ibdev, "Failed to alloc CQ mtr, ret = %d\n", ret);
		kvfree(hr_cq->mtr_node);
	}

	return ret;
}

static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
	if (hr_cq->delayed_destroy_flag) {
		hns_roce_add_unfree_mtr(hr_cq->mtr_node, hr_dev, &hr_cq->mtr);
	} else {
		hns_roce_mtr_destroy(hr_dev, &hr_cq->mtr);
		kvfree(hr_cq->mtr_node);
	}
}

static int alloc_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
@@ -270,7 +287,8 @@ static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
		uctx = rdma_udata_to_drv_context(udata,
						 struct hns_roce_ucontext,
						 ibucontext);
		hns_roce_db_unmap_user(uctx, &hr_cq->db);
		hns_roce_db_unmap_user(uctx, &hr_cq->db,
				       hr_cq->delayed_destroy_flag);
	} else {
		hns_roce_free_db(hr_dev, &hr_cq->db);
	}
+24 −6
Original line number Diff line number Diff line
@@ -24,7 +24,7 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
	page = kmalloc(sizeof(*page), GFP_KERNEL);
	if (!page) {
		ret = -ENOMEM;
		goto out;
		goto err_out;
	}

	refcount_set(&page->refcount, 1);
@@ -33,8 +33,12 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
				 PAGE_SIZE, 0);
	if (IS_ERR(page->umem)) {
		ret = PTR_ERR(page->umem);
		kfree(page);
		goto out;
		goto err_page;
	}
	page->umem_node = kvmalloc(sizeof(*page->umem_node), GFP_KERNEL);
	if (!page->umem_node) {
		ret = -ENOMEM;
		goto err_umem;
	}

	list_add(&page->list, &context->page_list);
@@ -45,22 +49,36 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
	db->virt_addr = sg_virt(page->umem->sg_head.sgl) + offset;
	db->u.user_page = page;
	refcount_inc(&page->refcount);
	mutex_unlock(&context->page_mutex);
	return 0;

out:
err_umem:
	ib_umem_release(page->umem);
err_page:
	kvfree(page);
err_out:
	mutex_unlock(&context->page_mutex);

	return ret;
}

void hns_roce_db_unmap_user(struct hns_roce_ucontext *context,
			    struct hns_roce_db *db)
			    struct hns_roce_db *db,
			    bool delayed_unmap_flag)
{
	struct hns_roce_dev *hr_dev = to_hr_dev(context->ibucontext.device);

	mutex_lock(&context->page_mutex);

	refcount_dec(&db->u.user_page->refcount);
	if (refcount_dec_if_one(&db->u.user_page->refcount)) {
		list_del(&db->u.user_page->list);
		if (delayed_unmap_flag) {
			hns_roce_add_unfree_umem(db->u.user_page, hr_dev);
		} else {
			ib_umem_release(db->u.user_page->umem);
			kvfree(db->u.user_page->umem_node);
		}
		kfree(db->u.user_page);
	}

+33 −1
Original line number Diff line number Diff line
@@ -409,6 +409,8 @@ struct hns_roce_mr {
	struct hns_roce_mtr	pbl_mtr;
	u32			npages;
	dma_addr_t		*page_list;
	bool			delayed_destroy_flag;
	struct hns_roce_mtr_node *mtr_node;
};

struct hns_roce_mr_table {
@@ -475,11 +477,17 @@ struct hns_roce_db_pgdir {
	dma_addr_t		db_dma;
};

struct hns_roce_umem_node {
	struct ib_umem *umem;
	struct list_head list;
};

struct hns_roce_user_db_page {
	struct list_head	list;
	struct ib_umem		*umem;
	unsigned long		user_virt;
	refcount_t		refcount;
	struct hns_roce_umem_node *umem_node;
};

struct hns_roce_db {
@@ -531,7 +539,9 @@ struct hns_roce_cq {
	int				is_armed; /* cq is armed */
	struct list_head		node; /* all armed cqs are on a list */
	u8				poe_channel;
	bool				delayed_destroy_flag;
	struct hns_roce_notify_conf	write_notify;
	struct hns_roce_mtr_node *mtr_node;
};

struct hns_roce_idx_que {
@@ -540,6 +550,7 @@ struct hns_roce_idx_que {
	unsigned long			*bitmap;
	u32				head;
	u32				tail;
	struct hns_roce_mtr_node *mtr_node;
};

struct hns_roce_srq {
@@ -565,6 +576,8 @@ struct hns_roce_srq {
	void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
	struct hns_roce_db	rdb;
	u32			cap_flags;
	bool			delayed_destroy_flag;
	struct hns_roce_mtr_node *mtr_node;
};

struct hns_roce_uar_table {
@@ -740,6 +753,8 @@ struct hns_roce_qp {
	u32			config;
	u8			tc_mode;
	u8			priority;
	bool			delayed_destroy_flag;
	struct hns_roce_mtr_node *mtr_node;
};

struct hns_roce_ib_iboe {
@@ -1100,6 +1115,11 @@ struct hns_roce_port {
	struct hns_roce_scc_param *scc_param;
};

struct hns_roce_mtr_node {
	struct hns_roce_mtr mtr;
	struct list_head list;
};

struct hns_roce_dev {
	struct ib_device	ib_dev;
	struct pci_dev		*pci_dev;
@@ -1183,6 +1203,10 @@ struct hns_roce_dev {

	struct rdma_notify_mem *notify_tbl;
	size_t notify_num;
	struct list_head mtr_unfree_list; /* list of unfree mtr on this dev */
	spinlock_t mtr_unfree_list_lock; /* protect mtr_unfree_list */
	struct list_head umem_unfree_list; /* list of unfree umem on this dev */
	spinlock_t umem_unfree_list_lock; /* protect umem_unfree_list */
};

static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
@@ -1464,7 +1488,8 @@ int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
			 struct hns_roce_db *db);
void hns_roce_db_unmap_user(struct hns_roce_ucontext *context,
			    struct hns_roce_db *db);
			    struct hns_roce_db *db,
			    bool delayed_unmap_flag);
int hns_roce_alloc_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db,
		      int order);
void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db);
@@ -1484,6 +1509,13 @@ int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp);
int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp);
int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr);
int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr);
void hns_roce_add_unfree_umem(struct hns_roce_user_db_page *user_page,
			      struct hns_roce_dev *hr_dev);
void hns_roce_free_unfree_umem(struct hns_roce_dev *hr_dev);
void hns_roce_add_unfree_mtr(struct hns_roce_mtr_node *pos,
			     struct hns_roce_dev *hr_dev,
			     struct hns_roce_mtr *mtr);
void hns_roce_free_unfree_mtr(struct hns_roce_dev *hr_dev);
struct hns_user_mmap_entry *
hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
				size_t length,
+3 −0
Original line number Diff line number Diff line
@@ -6038,6 +6038,9 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
			  "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n",
			  hr_qp->qpn, ret);

	if (ret == -EBUSY)
		hr_qp->delayed_destroy_flag = true;

	hns_roce_qp_destroy(hr_dev, hr_qp, udata);

	return 0;
+8 −0
Original line number Diff line number Diff line
@@ -1309,6 +1309,12 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
	INIT_LIST_HEAD(&hr_dev->uctx_list);
	spin_lock_init(&hr_dev->uctx_list_lock);

	INIT_LIST_HEAD(&hr_dev->mtr_unfree_list);
	spin_lock_init(&hr_dev->mtr_unfree_list_lock);

	INIT_LIST_HEAD(&hr_dev->umem_unfree_list);
	spin_lock_init(&hr_dev->umem_unfree_list_lock);

	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
	    hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) {
		INIT_LIST_HEAD(&hr_dev->pgdir_list);
@@ -1555,6 +1561,8 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup)
	if (hr_dev->hw->hw_exit)
		hr_dev->hw->hw_exit(hr_dev);
	hns_roce_teardown_hca(hr_dev);
	hns_roce_free_unfree_umem(hr_dev);
	hns_roce_free_unfree_mtr(hr_dev);
	hns_roce_cleanup_hem(hr_dev);

	if (hr_dev->cmd_mod)
Loading