Commit 3bfea1c8 authored by Xi Wang's avatar Xi Wang Committed by Zheng Zengkai
Browse files

RDMA/hns: Refactor the hns_roce_buf allocation flow

mainline inclusion
from mainline-v5.11-rc1
commit 6f6e2dcb
category: bugfix
bugzilla: 174002
CVE:NA

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6f6e2dcbb82b9b2ea304fe32635789fedd4e9868

----------------------------------------------------------------------

Add a group of flags to control the 'struct hns_roce_buf' allocation
flow, this is used to support the caller running in atomic context.

Link: https://lore.kernel.org/r/1605347916-15964-1-git-send-email-liweihang@huawei.com


Signed-off-by: default avatarXi Wang <wangxi11@huawei.com>
Signed-off-by: default avatarWeihang Li <liweihang@huawei.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Signed-off-by: default avatarwangsirong <wangsirong@huawei.com>
Reviewed-by: default avatarChunZhi Hu <huchunzhi@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parent f8867640
Loading
Loading
Loading
Loading
+74 −54
Original line number Diff line number Diff line
@@ -159,76 +159,96 @@ void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap)

void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf)
{
	struct device *dev = hr_dev->dev;
	u32 size = buf->size;
	int i;
	struct hns_roce_buf_list *trunks;
	u32 i;

	if (size == 0)
	if (!buf)
		return;

	buf->size = 0;
	trunks = buf->trunk_list;
	if (trunks) {
		buf->trunk_list = NULL;
		for (i = 0; i < buf->ntrunks; i++)
			dma_free_coherent(hr_dev->dev, 1 << buf->trunk_shift,
					  trunks[i].buf, trunks[i].map);

	if (hns_roce_buf_is_direct(buf)) {
		dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map);
	} else {
		for (i = 0; i < buf->npages; ++i)
			if (buf->page_list[i].buf)
				dma_free_coherent(dev, 1 << buf->page_shift,
						  buf->page_list[i].buf,
						  buf->page_list[i].map);
		kfree(buf->page_list);
		buf->page_list = NULL;
		kfree(trunks);
	}

	kfree(buf);
}

int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
		       struct hns_roce_buf *buf, u32 page_shift)
/*
 * Allocate the dma buffer for storing ROCEE table entries
 *
 * @size: required size
 * @page_shift: the unit size in a continuous dma address range
 * @flags: HNS_ROCE_BUF_ flags to control the allocation flow.
 */
struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
					u32 page_shift, u32 flags)
{
	struct hns_roce_buf_list *buf_list;
	struct device *dev = hr_dev->dev;
	u32 page_size;
	int i;
	u32 trunk_size, page_size, alloced_size;
	struct hns_roce_buf_list *trunks;
	struct hns_roce_buf *buf;
	gfp_t gfp_flags;
	u32 ntrunk, i;

	/* The minimum shift of the page accessed by hw is HNS_HW_PAGE_SHIFT */
	buf->page_shift = max_t(int, HNS_HW_PAGE_SHIFT, page_shift);
	if (WARN_ON(page_shift < HNS_HW_PAGE_SHIFT))
		return ERR_PTR(-EINVAL);

	gfp_flags = (flags & HNS_ROCE_BUF_NOSLEEP) ? GFP_ATOMIC : GFP_KERNEL;
	buf = kzalloc(sizeof(*buf), gfp_flags);
	if (!buf)
		return ERR_PTR(-ENOMEM);

	buf->page_shift = page_shift;
	page_size = 1 << buf->page_shift;
	buf->npages = DIV_ROUND_UP(size, page_size);

	/* required size is not bigger than one trunk size */
	if (size <= max_direct) {
		buf->page_list = NULL;
		buf->direct.buf = dma_alloc_coherent(dev, size,
						     &buf->direct.map,
						     GFP_KERNEL);
		if (!buf->direct.buf)
			return -ENOMEM;
	/* Calc the trunk size and num by required size and page_shift */
	if (flags & HNS_ROCE_BUF_DIRECT) {
		buf->trunk_shift = ilog2(ALIGN(size, PAGE_SIZE));
		ntrunk = 1;
	} else {
		buf_list = kcalloc(buf->npages, sizeof(*buf_list), GFP_KERNEL);
		if (!buf_list)
			return -ENOMEM;
		buf->trunk_shift = ilog2(ALIGN(page_size, PAGE_SIZE));
		ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift);
	}

		for (i = 0; i < buf->npages; i++) {
			buf_list[i].buf = dma_alloc_coherent(dev, page_size,
							     &buf_list[i].map,
							     GFP_KERNEL);
			if (!buf_list[i].buf)
				break;
	trunks = kcalloc(ntrunk, sizeof(*trunks), gfp_flags);
	if (!trunks) {
		kfree(buf);
		return ERR_PTR(-ENOMEM);
	}

		if (i != buf->npages && i > 0) {
			while (i-- > 0)
				dma_free_coherent(dev, page_size,
						  buf_list[i].buf,
						  buf_list[i].map);
			kfree(buf_list);
			return -ENOMEM;
	trunk_size = 1 << buf->trunk_shift;
	alloced_size = 0;
	for (i = 0; i < ntrunk; i++) {
		trunks[i].buf = dma_alloc_coherent(hr_dev->dev, trunk_size,
						   &trunks[i].map, gfp_flags);
		if (!trunks[i].buf)
			break;

		alloced_size += trunk_size;
	}
		buf->page_list = buf_list;

	buf->ntrunks = i;

	/* In nofail mode, it's only failed when the alloced size is 0 */
	if ((flags & HNS_ROCE_BUF_NOFAIL) ? i == 0 : i != ntrunk) {
		for (i = 0; i < buf->ntrunks; i++)
			dma_free_coherent(hr_dev->dev, trunk_size,
					  trunks[i].buf, trunks[i].map);

		kfree(trunks);
		kfree(buf);
		return ERR_PTR(-ENOMEM);
	}
	buf->size = size;

	return 0;
	buf->npages = DIV_ROUND_UP(alloced_size, page_size);
	buf->trunk_list = trunks;

	return buf;
}

int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
+26 −25
Original line number Diff line number Diff line
@@ -267,9 +267,6 @@ enum {
#define HNS_HW_PAGE_SHIFT			12
#define HNS_HW_PAGE_SIZE			(1 << HNS_HW_PAGE_SHIFT)

/* The minimum page count for hardware access page directly. */
#define HNS_HW_DIRECT_PAGE_COUNT 2

struct hns_roce_uar {
	u64		pfn;
	unsigned long	index;
@@ -421,11 +418,26 @@ struct hns_roce_buf_list {
	dma_addr_t	map;
};

/*
 * %HNS_ROCE_BUF_DIRECT indicates that the all memory must be in a continuous
 * dma address range.
 *
 * %HNS_ROCE_BUF_NOSLEEP indicates that the caller cannot sleep.
 *
 * %HNS_ROCE_BUF_NOFAIL allocation only failed when allocated size is zero, even
 * the allocated size is smaller than the required size.
 */
enum {
	HNS_ROCE_BUF_DIRECT = BIT(0),
	HNS_ROCE_BUF_NOSLEEP = BIT(1),
	HNS_ROCE_BUF_NOFAIL = BIT(2),
};

struct hns_roce_buf {
	struct hns_roce_buf_list	direct;
	struct hns_roce_buf_list	*page_list;
	struct hns_roce_buf_list	*trunk_list;
	u32				ntrunks;
	u32				npages;
	u32				size;
	unsigned int			trunk_shift;
	unsigned int			page_shift;
};

@@ -1081,29 +1093,18 @@ static inline struct hns_roce_qp
	return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1));
}

static inline bool hns_roce_buf_is_direct(struct hns_roce_buf *buf)
{
	if (buf->page_list)
		return false;

	return true;
}

static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset)
{
	if (hns_roce_buf_is_direct(buf))
		return (char *)(buf->direct.buf) + (offset & (buf->size - 1));

	return (char *)(buf->page_list[offset >> buf->page_shift].buf) +
	       (offset & ((1 << buf->page_shift) - 1));
	return (char *)(buf->trunk_list[offset >> buf->trunk_shift].buf) +
			(offset & ((1 << buf->trunk_shift) - 1));
}

static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx)
{
	if (hns_roce_buf_is_direct(buf))
		return buf->direct.map + ((dma_addr_t)idx << buf->page_shift);
	else
		return buf->page_list[idx].map;
	int offset = idx << buf->page_shift;

	return buf->trunk_list[offset >> buf->trunk_shift].map +
			(offset & ((1 << buf->trunk_shift) - 1));
}

#define hr_hw_page_align(x)		ALIGN(x, 1 << HNS_HW_PAGE_SHIFT)
@@ -1235,8 +1236,8 @@ int hns_roce_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int hns_roce_dealloc_mw(struct ib_mw *ibmw);

void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf);
int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
		       struct hns_roce_buf *buf, u32 page_shift);
struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
					u32 page_shift, u32 flags);

int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
			   int buf_cnt, int start, struct hns_roce_buf *buf);
+13 −26
Original line number Diff line number Diff line
@@ -695,15 +695,6 @@ static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr)
	return size;
}

static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size,
					  unsigned int page_shift)
{
	if (is_direct)
		return ALIGN(alloc_size, 1 << page_shift);
	else
		return HNS_HW_DIRECT_PAGE_COUNT << page_shift;
}

/*
 * check the given pages in continuous address space
 * Returns 0 on success, or the error page num.
@@ -732,7 +723,6 @@ static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
	/* release kernel buffers */
	if (mtr->kmem) {
		hns_roce_buf_free(hr_dev, mtr->kmem);
		kfree(mtr->kmem);
		mtr->kmem = NULL;
	}
}
@@ -744,13 +734,12 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
	struct ib_device *ibdev = &hr_dev->ib_dev;
	unsigned int best_pg_shift;
	int all_pg_count = 0;
	size_t direct_size;
	size_t total_size;
	int ret;

	total_size = mtr_bufs_size(buf_attr);
	if (total_size < 1) {
		ibdev_err(ibdev, "Failed to check mtr size\n");
		ibdev_err(ibdev, "failed to check mtr size\n.");
		return -EINVAL;
	}

@@ -762,7 +751,7 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
		mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
					buf_attr->user_access);
		if (IS_ERR_OR_NULL(mtr->umem)) {
			ibdev_err(ibdev, "Failed to get umem, ret %ld\n",
			ibdev_err(ibdev, "failed to get umem, ret = %ld.\n",
				  PTR_ERR(mtr->umem));
			return -ENOMEM;
		}
@@ -780,19 +769,16 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
		ret = 0;
	} else {
		mtr->umem = NULL;
		mtr->kmem = kzalloc(sizeof(*mtr->kmem), GFP_KERNEL);
		if (!mtr->kmem) {
			ibdev_err(ibdev, "Failed to alloc kmem\n");
			return -ENOMEM;
		}
		direct_size = mtr_kmem_direct_size(is_direct, total_size,
						   buf_attr->page_shift);
		ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size,
					 mtr->kmem, buf_attr->page_shift);
		if (ret) {
			ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret);
			goto err_alloc_mem;
		mtr->kmem =
			hns_roce_buf_alloc(hr_dev, total_size,
					   buf_attr->page_shift,
					   is_direct ? HNS_ROCE_BUF_DIRECT : 0);
		if (IS_ERR(mtr->kmem)) {
			ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n",
				  PTR_ERR(mtr->kmem));
			return PTR_ERR(mtr->kmem);
		}

		best_pg_shift = buf_attr->page_shift;
		all_pg_count = mtr->kmem->npages;
	}
@@ -800,7 +786,8 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
	/* must bigger than minimum hardware page shift */
	if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) {
		ret = -EINVAL;
		ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n",
		ibdev_err(ibdev,
			  "failed to check mtr, page shift = %u count = %d.\n",
			  best_pg_shift, all_pg_count);
		goto err_alloc_mem;
	}