Commit d3caaebd authored by Chengchang Tang's avatar Chengchang Tang Committed by Zheng Zengkai
Browse files

RDMA/hns: Optimize user DCA perfermance by sharing DCA status

driver inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I63KVU



----------------------------------------------------------

Use the shared memory to store the DCA status by getting the max qp num
from uctx alloc param.

Signed-off-by: default avatarChengchang Tang <tangchengchang@huawei.com>
Reviewed-by: default avatarYangyang Li <liyangyang20@huawei.com>
Reviewed-by: default avatarYueHaibing <yuehaibing@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parent a2178118
Loading
Loading
Loading
Loading
+278 −60
Original line number Diff line number Diff line
@@ -143,7 +143,7 @@ static void *alloc_dca_pages(struct hns_roce_dev *hr_dev, bool is_user,
	}

	mem->page_count = kmem->npages;
	/* Override the attr->size by actually alloced size */
	/* Overwrite the attr->size by actually alloced size */
	attr->size = kmem->ntrunks << kmem->trunk_shift;
	return kmem;

@@ -731,6 +731,72 @@ static int active_alloced_buf(struct hns_roce_qp *hr_qp,
	return ret;
}

#define DCAN_TO_SYNC_BIT(n) ((n) * HNS_DCA_BITS_PER_STATUS)
#define DCAN_TO_STAT_BIT(n) DCAN_TO_SYNC_BIT(n)
static bool start_free_dca_buf(struct hns_roce_dca_ctx *ctx, u32 dcan)
{
	unsigned long *st = ctx->sync_status;

	if (st && dcan < ctx->max_qps)
		return !test_and_set_bit_lock(DCAN_TO_SYNC_BIT(dcan), st);

	return true;
}

static void stop_free_dca_buf(struct hns_roce_dca_ctx *ctx, u32 dcan)
{
	unsigned long *st = ctx->sync_status;

	if (st && dcan < ctx->max_qps)
		clear_bit_unlock(DCAN_TO_SYNC_BIT(dcan), st);
}

static void update_dca_buf_status(struct hns_roce_dca_ctx *ctx, u32 dcan,
				  bool en)
{
	unsigned long *st = ctx->buf_status;

	if (st && dcan < ctx->max_qps) {
		if (en)
			set_bit(DCAN_TO_STAT_BIT(dcan), st);
		else
			clear_bit(DCAN_TO_STAT_BIT(dcan), st);

		/* sync status with user-space rdma */
		smp_mb__after_atomic();
	}
}

static void restart_aging_dca_mem(struct hns_roce_dev *hr_dev,
				  struct hns_roce_dca_ctx *ctx)
{
	spin_lock(&ctx->aging_lock);
	ctx->exit_aging = false;
	if (!list_empty(&ctx->aging_new_list))
		queue_delayed_work(hr_dev->irq_workq, &ctx->aging_dwork,
				   msecs_to_jiffies(DCA_MEM_AGEING_MSES));

	spin_unlock(&ctx->aging_lock);
}

static void stop_aging_dca_mem(struct hns_roce_dca_ctx *ctx,
			       struct hns_roce_dca_cfg *cfg, bool stop_worker)
{
	spin_lock(&ctx->aging_lock);
	if (stop_worker) {
		ctx->exit_aging = true;
		cancel_delayed_work(&ctx->aging_dwork);
	}

	spin_lock(&cfg->lock);

	if (!list_empty(&cfg->aging_node))
		list_del_init(&cfg->aging_node);

	spin_unlock(&cfg->lock);
	spin_unlock(&ctx->aging_lock);
}

static int attach_dca_mem(struct hns_roce_dev *hr_dev,
			  struct hns_roce_qp *hr_qp,
			  struct hns_dca_attach_attr *attr,
@@ -741,8 +807,8 @@ static int attach_dca_mem(struct hns_roce_dev *hr_dev,
	u32 buf_id;
	int ret;

	/* Stop DCA mem ageing worker */
	cancel_delayed_work(&cfg->dwork);
	if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH)
		stop_aging_dca_mem(ctx, cfg, false);
	resp->alloc_flags = 0;

	spin_lock(&cfg->lock);
@@ -779,6 +845,7 @@ static int attach_dca_mem(struct hns_roce_dev *hr_dev,

	resp->alloc_flags |= HNS_DCA_ATTACH_FLAGS_NEW_BUFFER;
	resp->alloc_pages = cfg->npages;
	update_dca_buf_status(ctx, cfg->dcan, true);

	return 0;
}
@@ -831,6 +898,7 @@ static void free_buf_from_dca_mem(struct hns_roce_dca_ctx *ctx,
	unsigned long flags;
	u32 buf_id;

	update_dca_buf_status(ctx, cfg->dcan, false);
	spin_lock(&cfg->lock);
	buf_id = cfg->buf_id;
	cfg->buf_id = HNS_DCA_INVALID_BUF_ID;
@@ -849,19 +917,22 @@ static void free_buf_from_dca_mem(struct hns_roce_dca_ctx *ctx,
	spin_unlock_irqrestore(&ctx->pool_lock, flags);
}

static void detach_dca_mem(struct hns_roce_dev *hr_dev,
			   struct hns_roce_qp *hr_qp,
void hns_roce_dca_detach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
			 struct hns_dca_detach_attr *attr)
{
	struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_dev, hr_qp);
	struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;

	/* Start an ageing worker to free buffer */
	cancel_delayed_work(&cfg->dwork);
	stop_aging_dca_mem(ctx, cfg, true);

	spin_lock(&ctx->aging_lock);
	spin_lock(&cfg->lock);
	cfg->sq_idx = attr->sq_idx;
	queue_delayed_work(hr_dev->irq_workq, &cfg->dwork,
			   msecs_to_jiffies(DCA_MEM_AGEING_MSES));
	list_add_tail(&cfg->aging_node, &ctx->aging_new_list);
	spin_unlock(&cfg->lock);
	spin_unlock(&ctx->aging_lock);

	restart_aging_dca_mem(hr_dev, ctx);
}

struct dca_mem_shrink_attr {
@@ -924,11 +995,87 @@ static void shrink_dca_mem(struct hns_roce_dev *hr_dev,
	resp->free_key = attr.shrink_key;
}

static void init_dca_context(struct hns_roce_dca_ctx *ctx)
static void process_aging_dca_mem(struct hns_roce_dev *hr_dev,
				  struct hns_roce_dca_ctx *ctx)
{
	struct hns_roce_dca_cfg *cfg, *tmp_cfg;
	struct hns_roce_qp *hr_qp;

	spin_lock(&ctx->aging_lock);
	list_for_each_entry_safe(cfg, tmp_cfg, &ctx->aging_new_list, aging_node)
		list_move(&cfg->aging_node, &ctx->aging_proc_list);

	while (!ctx->exit_aging && !list_empty(&ctx->aging_proc_list)) {
		cfg = list_first_entry(&ctx->aging_proc_list,
				       struct hns_roce_dca_cfg, aging_node);
		list_del_init_careful(&cfg->aging_node);
		hr_qp = container_of(cfg, struct hns_roce_qp, dca_cfg);
		spin_unlock(&ctx->aging_lock);

		if (start_free_dca_buf(ctx, cfg->dcan)) {
			if (hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp))
				free_buf_from_dca_mem(ctx, cfg);

			stop_free_dca_buf(ctx, cfg->dcan);
		}

		spin_lock(&ctx->aging_lock);

		spin_lock(&cfg->lock);

		if (cfg->buf_id != HNS_DCA_INVALID_BUF_ID)
			list_move(&cfg->aging_node, &ctx->aging_new_list);

		spin_unlock(&cfg->lock);
	}
	spin_unlock(&ctx->aging_lock);
}

static void udca_mem_aging_work(struct work_struct *work)
{
	struct hns_roce_dca_ctx *ctx = container_of(work,
			struct hns_roce_dca_ctx, aging_dwork.work);
	struct hns_roce_ucontext *uctx = container_of(ctx,
					 struct hns_roce_ucontext, dca_ctx);
	struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device);

	cancel_delayed_work(&ctx->aging_dwork);
	process_aging_dca_mem(hr_dev, ctx);
	if (!ctx->exit_aging)
		restart_aging_dca_mem(hr_dev, ctx);
}

static void remove_unused_dca_mem(struct hns_roce_dev *hr_dev);

static void kdca_mem_aging_work(struct work_struct *work)
{
	struct hns_roce_dca_ctx *ctx = container_of(work,
			struct hns_roce_dca_ctx, aging_dwork.work);
	struct hns_roce_dev *hr_dev = container_of(ctx, struct hns_roce_dev,
						   dca_ctx);

	cancel_delayed_work(&ctx->aging_dwork);
	process_aging_dca_mem(hr_dev, ctx);
	remove_unused_dca_mem(hr_dev);
	if (!ctx->exit_aging)
		restart_aging_dca_mem(hr_dev, ctx);
}

static void init_dca_context(struct hns_roce_dca_ctx *ctx, bool is_user)
{
	INIT_LIST_HEAD(&ctx->pool);
	spin_lock_init(&ctx->pool_lock);
	ctx->total_size = 0;

	ida_init(&ctx->ida);
	INIT_LIST_HEAD(&ctx->aging_new_list);
	INIT_LIST_HEAD(&ctx->aging_proc_list);
	spin_lock_init(&ctx->aging_lock);
	ctx->exit_aging = false;
	if (is_user)
		INIT_DELAYED_WORK(&ctx->aging_dwork, udca_mem_aging_work);
	else
		INIT_DELAYED_WORK(&ctx->aging_dwork, kdca_mem_aging_work);
}

static void cleanup_dca_context(struct hns_roce_dev *hr_dev,
@@ -938,6 +1085,10 @@ static void cleanup_dca_context(struct hns_roce_dev *hr_dev,
	unsigned long flags;
	bool is_user;

	spin_lock(&ctx->aging_lock);
	cancel_delayed_work_sync(&ctx->aging_dwork);
	spin_unlock(&ctx->aging_lock);

	is_user = (ctx != &hr_dev->dca_ctx);
	spin_lock_irqsave(&ctx->pool_lock, flags);
	list_for_each_entry_safe(mem, tmp, &ctx->pool, list) {
@@ -963,7 +1114,7 @@ static uint dca_unit_size;
static ulong dca_min_size = DCA_MAX_MEM_SIZE;
static ulong dca_max_size = DCA_MAX_MEM_SIZE;

static void config_kdca_context(struct hns_roce_dca_ctx *ctx)
static void load_kdca_param(struct hns_roce_dca_ctx *ctx)
{
	unsigned int unit_size;

@@ -985,9 +1136,8 @@ static void config_kdca_context(struct hns_roce_dca_ctx *ctx)

void hns_roce_init_dca(struct hns_roce_dev *hr_dev)
{
	init_dca_context(&hr_dev->dca_ctx);

	config_kdca_context(&hr_dev->dca_ctx);
	load_kdca_param(&hr_dev->dca_ctx);
	init_dca_context(&hr_dev->dca_ctx, false);
}

void hns_roce_cleanup_dca(struct hns_roce_dev *hr_dev)
@@ -995,22 +1145,68 @@ void hns_roce_cleanup_dca(struct hns_roce_dev *hr_dev)
	cleanup_dca_context(hr_dev, &hr_dev->dca_ctx);
}

void hns_roce_register_udca(struct hns_roce_dev *hr_dev,
static void init_udca_status(struct hns_roce_ucontext *uctx, int udca_max_qps,
			     unsigned int dev_max_qps)
{
	struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device);
	const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS;
	struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx);
	struct ib_ucontext *ib_uctx = &uctx->ibucontext;
	void *kaddr;
	size_t size;

	size = BITS_TO_BYTES(udca_max_qps * bits_per_qp);
	ctx->status_npage = DIV_ROUND_UP(size, PAGE_SIZE);

	size = ctx->status_npage * PAGE_SIZE;
	ctx->max_qps = min_t(unsigned int, dev_max_qps,
			     size * BITS_PER_BYTE / bits_per_qp);

	kaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
	if (!kaddr)
		return;

	ctx->dca_mmap_entry = hns_roce_user_mmap_entry_insert(ib_uctx,
				(u64)kaddr, size, HNS_ROCE_MMAP_TYPE_DCA);
	if (!ctx->dca_mmap_entry) {
		free_pages_exact(kaddr, size);
		return;
	}

	ctx->buf_status = (unsigned long *)kaddr;
	ctx->sync_status = (unsigned long *)(kaddr + size / 2);
}

void hns_roce_register_udca(struct hns_roce_dev *hr_dev, int max_qps,
			    struct hns_roce_ucontext *uctx)
{
	struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx);

	if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA))
		return;

	init_dca_context(&uctx->dca_ctx);
	init_dca_context(ctx, true);
	if (max_qps > 0)
		init_udca_status(uctx, max_qps, hr_dev->caps.num_qps);
}

void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev,
			      struct hns_roce_ucontext *uctx)
{
	struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx);

	if (!(uctx->config & HNS_ROCE_UCTX_CONFIG_DCA))
		return;

	cleanup_dca_context(hr_dev, &uctx->dca_ctx);
	cleanup_dca_context(hr_dev, ctx);

	if (ctx->buf_status) {
		free_pages_exact(ctx->buf_status,
				 ctx->status_npage * PAGE_SIZE);
		ctx->buf_status = NULL;
	}

	ida_destroy(&ctx->ida);
}

static struct dca_mem *key_to_dca_mem(struct list_head *head, u64 key)
@@ -1227,6 +1423,7 @@ static void remove_unused_dca_mem(struct hns_roce_dev *hr_dev)
		spin_unlock_irqrestore(&ctx->pool_lock, flags);
		if (!mem)
			break;

		unregister_dca_mem(hr_dev, NULL, mem);
		free_dca_mem(mem);
		/* No more free memory */
@@ -1235,52 +1432,56 @@ static void remove_unused_dca_mem(struct hns_roce_dev *hr_dev)
	}
}

static void kick_dca_mem(struct hns_roce_dev *hr_dev,
static void kick_dca_buf(struct hns_roce_dev *hr_dev,
			 struct hns_roce_dca_cfg *cfg,
			 struct hns_roce_ucontext *uctx)
			 struct hns_roce_dca_ctx *ctx)
{
	struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx);

	/* Stop ageing worker and free DCA buffer from pool */
	cancel_delayed_work_sync(&cfg->dwork);
	stop_aging_dca_mem(ctx, cfg, true);
	free_buf_from_dca_mem(ctx, cfg);
	restart_aging_dca_mem(hr_dev, ctx);

	/* Shrink kenrel DCA mem */
	if (!uctx)
	if (ctx == &hr_dev->dca_ctx)
		remove_unused_dca_mem(hr_dev);
}

static void dca_mem_ageing_work(struct work_struct *work)
static u32 alloc_dca_num(struct hns_roce_dca_ctx *ctx)
{
	struct hns_roce_qp *hr_qp = container_of(work, struct hns_roce_qp,
						 dca_cfg.dwork.work);
	struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
	struct hns_roce_dca_ctx *ctx = hr_qp_to_dca_ctx(hr_dev, hr_qp);
	bool hw_is_inactive;
	int ret;

	hw_is_inactive = hr_dev->hw->chk_dca_buf_inactive &&
			 hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp);
	if (hw_is_inactive)
		free_buf_from_dca_mem(ctx, &hr_qp->dca_cfg);
	ret = ida_alloc_max(&ctx->ida, ctx->max_qps - 1, GFP_KERNEL);
	if (ret < 0)
		return HNS_DCA_INVALID_DCA_NUM;

	/* Shrink kenrel DCA mem */
	if (!hr_qp->ibqp.uobject)
		remove_unused_dca_mem(hr_dev);
	stop_free_dca_buf(ctx, ret);
	update_dca_buf_status(ctx, ret, false);
	return ret;
}

void hns_roce_dca_detach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
			 struct hns_dca_detach_attr *attr)
static void free_dca_num(u32 dcan, struct hns_roce_dca_ctx *ctx)
{
	detach_dca_mem(hr_dev, hr_qp, attr);
	if (dcan == HNS_DCA_INVALID_DCA_NUM)
		return;

	ida_free(&ctx->ida, dcan);
}

void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
		       struct ib_udata *udata)
static int setup_kdca(struct hns_roce_dca_cfg *cfg)
{
	struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
					 struct hns_roce_ucontext, ibucontext);
	if (!cfg->npages)
		return -EINVAL;

	kick_dca_mem(hr_dev, &hr_qp->dca_cfg, uctx);
	cfg->buf_list = kcalloc(cfg->npages, sizeof(void *), GFP_KERNEL);
	if (!cfg->buf_list)
		return -ENOMEM;

	return 0;
}

static void teardown_kdca(struct hns_roce_dca_cfg *cfg)
{
	kfree(cfg->buf_list);
	cfg->buf_list = NULL;
}

int hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
@@ -1289,17 +1490,16 @@ int hns_roce_enable_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
	struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;

	spin_lock_init(&cfg->lock);
	INIT_DELAYED_WORK(&cfg->dwork, dca_mem_ageing_work);
	INIT_LIST_HEAD(&cfg->aging_node);
	cfg->buf_id = HNS_DCA_INVALID_BUF_ID;
	cfg->npages = hr_qp->buff_size >> HNS_HW_PAGE_SHIFT;
	cfg->dcan = HNS_DCA_INVALID_DCA_NUM;
	/* Cannot support dynamic detach when rq is not empty */
	if (!hr_qp->rq.wqe_cnt)
		hr_qp->en_flags |= HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH;

	/* DCA page list for kernel QP */
	if (!udata && cfg->npages) {
		cfg->buf_list = kcalloc(cfg->npages, sizeof(void *),
					GFP_KERNEL);
		if (!cfg->buf_list)
			return -ENOMEM;
	}
	if (!udata)
		return setup_kdca(cfg);

	return 0;
}
@@ -1309,14 +1509,32 @@ void hns_roce_disable_dca(struct hns_roce_dev *hr_dev,
{
	struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
					 struct hns_roce_ucontext, ibucontext);
	struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx);
	struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;

	kick_dca_mem(hr_dev, cfg, uctx);
	kick_dca_buf(hr_dev, cfg, ctx);
	free_dca_num(cfg->dcan, ctx);
	cfg->dcan = HNS_DCA_INVALID_DCA_NUM;

	/* Free kenrel DCA buffer list */
	if (!udata && cfg->buf_list) {
		kfree(cfg->buf_list);
		cfg->buf_list = NULL;
	if (!udata)
		teardown_kdca(&hr_qp->dca_cfg);
}

void hns_roce_modify_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
			 struct ib_udata *udata)
{
	struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
					 struct hns_roce_ucontext, ibucontext);
	struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(hr_dev, uctx);
	struct hns_roce_dca_cfg *cfg = &hr_qp->dca_cfg;

	if (hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_ERR) {
		kick_dca_buf(hr_dev, cfg, ctx);
		free_dca_num(cfg->dcan, ctx);
		cfg->dcan = HNS_DCA_INVALID_DCA_NUM;
	} else if (hr_qp->state == IB_QPS_RTR) {
		free_dca_num(cfg->dcan, ctx);
		cfg->dcan = alloc_dca_num(ctx);
	}
}

@@ -1520,7 +1738,7 @@ static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_DETACH)(
	if (ret)
		return ret;

	detach_dca_mem(to_hr_dev(hr_qp->ibqp.device), hr_qp, &attr);
	hns_roce_dca_detach(to_hr_dev(hr_qp->ibqp.device), hr_qp, &attr);

	return 0;
}
+4 −4
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@ struct hns_dca_page_state {
extern const struct uapi_definition hns_roce_dca_uapi_defs[];

#define HNS_DCA_INVALID_BUF_ID 0UL
#define HNS_DCA_INVALID_DCA_NUM ~0U

/*
 * buffer id(29b) = tag(7b) + owner(22b)
@@ -55,7 +56,7 @@ typedef int (*hns_dca_enum_callback)(struct hns_dca_page_state *, u32, void *);
void hns_roce_init_dca(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_dca(struct hns_roce_dev *hr_dev);

void hns_roce_register_udca(struct hns_roce_dev *hr_dev,
void hns_roce_register_udca(struct hns_roce_dev *hr_dev, int max_qps,
			    struct hns_roce_ucontext *uctx);
void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev,
			      struct hns_roce_ucontext *uctx);
@@ -69,8 +70,7 @@ int hns_roce_dca_attach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
			struct hns_dca_attach_attr *attr);
void hns_roce_dca_detach(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
			 struct hns_dca_detach_attr *attr);

void hns_roce_dca_kick(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
void hns_roce_modify_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
			 struct ib_udata *udata);

void hns_roce_enum_dca_pool(struct hns_roce_dca_ctx *dca_ctx, void *param,
+2 −1
Original line number Diff line number Diff line
@@ -259,7 +259,8 @@ static void dca_setup_qp_stats(struct hns_roce_qp *hr_qp,
{
	struct hns_roce_ucontext *uctx = NULL;

	if (!(hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) || !hr_qp->ibqp.pd)
	if (!(hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH) ||
	    !hr_qp->ibqp.pd)
		return;

	if (hr_qp->ibqp.pd->uobject)
+20 −2
Original line number Diff line number Diff line
@@ -205,6 +205,7 @@ struct hns_roce_uar {
enum hns_roce_mmap_type {
	HNS_ROCE_MMAP_TYPE_DB = 1,
	HNS_ROCE_MMAP_TYPE_DWQE,
	HNS_ROCE_MMAP_TYPE_DCA,
};

struct hns_user_mmap_entry {
@@ -222,6 +223,21 @@ struct hns_roce_dca_ctx {
	size_t max_size; /* max size the pool can expand to */
	size_t min_size; /* shrink if @free_size > @min_size */
	unsigned int unit_size; /* unit size per DCA mem */

	unsigned int max_qps;
	unsigned int status_npage;
	struct ida ida;

#define HNS_DCA_BITS_PER_STATUS 1
	unsigned long *buf_status;
	unsigned long *sync_status;

	bool exit_aging;
	struct list_head aging_proc_list;
	struct list_head aging_new_list;
	spinlock_t aging_lock;
	struct delayed_work aging_dwork;
	struct hns_user_mmap_entry *dca_mmap_entry;
};

struct hns_roce_ucontext {
@@ -332,12 +348,14 @@ struct hns_roce_mtr {
/* DCA config */
struct hns_roce_dca_cfg {
	spinlock_t		lock;
	u32			buf_id;
	u16			attach_count;
	u32			buf_id;
	u32			dcan;
	void			**buf_list;
	u32			npages;
	u32			sq_idx;
	struct delayed_work	dwork;
	bool			aging_enable;
	struct list_head	aging_node;
};

struct hns_roce_mw {
+14 −16
Original line number Diff line number Diff line
@@ -372,9 +372,9 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
	return 0;
}

static inline bool check_qp_dca_enable(struct hns_roce_qp *hr_qp)
static bool check_dca_attach_enable(struct hns_roce_qp *hr_qp)
{
	return !!(hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA);
	return hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH;
}

static int dca_attach_qp_buf(struct hns_roce_dev *hr_dev,
@@ -408,6 +408,11 @@ static int dca_attach_qp_buf(struct hns_roce_dev *hr_dev,
	return hns_roce_dca_attach(hr_dev, hr_qp, &attr);
}

static bool check_dca_detach_enable(struct hns_roce_qp *hr_qp)
{
	return hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH;
}

static void dca_detach_qp_buf(struct hns_roce_dev *hr_dev,
			      struct hns_roce_qp *hr_qp)
{
@@ -454,7 +459,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev,
		return -EIO;
	}

	if (check_qp_dca_enable(hr_qp)) {
	if (check_dca_attach_enable(hr_qp)) {
		ret = dca_attach_qp_buf(hr_dev, hr_qp);
		if (unlikely(ret)) {
			ibdev_err(&hr_dev->ib_dev,
@@ -694,7 +699,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
		ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe,
					&curr_idx, valid_num_sge);

	if (qp->en_flags & HNS_ROCE_QP_CAP_DCA)
	if (qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH)
		fill_dca_fields(qp, rc_sq_wqe);

	/*
@@ -883,7 +888,7 @@ static int check_recv_valid(struct hns_roce_dev *hr_dev,
	if (hr_qp->state == IB_QPS_RESET)
		return -EINVAL;

	if (check_qp_dca_enable(hr_qp)) {
	if (check_dca_attach_enable(hr_qp)) {
		ret = dca_attach_qp_buf(hr_dev, hr_qp);
		if (unlikely(ret)) {
			ibdev_err(ibdev,
@@ -4272,7 +4277,7 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,

	for (npolled = 0; npolled < num_entries; ++npolled) {
		ret = hns_roce_v2_poll_one(hr_cq, &cur_qp, wc + npolled);
		if (cur_qp && check_qp_dca_enable(cur_qp))
		if (cur_qp && check_dca_detach_enable(cur_qp))
			dca_detach_qp_buf(hr_dev, cur_qp);
		if (ret)
			break;
@@ -4842,7 +4847,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
	hr_reg_clear(qpc_mask, QPC_TRRL_BA_H);

	if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DCA) {
		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH) {
			hr_reg_enable(context, QPC_DCA_MODE);
			hr_reg_clear(qpc_mask, QPC_DCA_MODE);
		}
@@ -5620,9 +5625,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
	if (new_state == IB_QPS_RESET && !ibqp->uobject)
		clear_qp(hr_qp);

	if (check_qp_dca_enable(hr_qp) &&
	    (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
		hns_roce_dca_kick(hr_dev, hr_qp, udata);
	if (check_dca_attach_enable(hr_qp))
		hns_roce_modify_dca(hr_dev, hr_qp, udata);

out:
	return ret;
@@ -5831,12 +5835,6 @@ static bool hns_roce_v2_chk_dca_buf_inactive(struct hns_roce_dev *hr_dev,
	if (state == HNS_ROCE_QP_ST_ERR || state == HNS_ROCE_QP_ST_RST)
		return true;

	/* If RQ is not empty, the buffer is always active until the QP stops
	 * working.
	 */
	if (hr_qp->rq.wqe_cnt > 0)
		return false;

	if (hr_qp->sq.wqe_cnt > 0) {
		tmp = (u32)hr_reg_read(&context, QPC_RETRY_MSG_MSN);
		sq_idx = tmp & (hr_qp->sq.wqe_cnt - 1);
Loading