Commit 4c236d5d authored by Geetha sowjanya's avatar Geetha sowjanya Committed by David S. Miller
Browse files

octeontx2-pf: cn10k: Use LMTST lines for NPA/NIX operations



This patch adds support to use new LMTST lines for NPA batch free
and burst SQE flush. Adds new dev_hw_ops structure to hold platform
specific functions and create new files cn10k.c and cn10k.h.

Signed-off-by: default avatarGeetha sowjanya <gakula@marvell.com>
Signed-off-by: default avatarSunil Goutham <sgoutham@marvell.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6e8ad438
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ obj-$(CONFIG_OCTEONTX2_PF) += rvu_nicpf.o
obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o

rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
		     otx2_ptp.o otx2_flows.o
		     otx2_ptp.o otx2_flows.o cn10k.o
rvu_nicvf-y := otx2_vf.o

ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
+181 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/* Marvell OcteonTx2 RVU Physcial Function ethernet driver
 *
 * Copyright (C) 2020 Marvell.
 */

#include "cn10k.h"
#include "otx2_reg.h"
#include "otx2_struct.h"

static struct dev_hw_ops	otx2_hw_ops = {
	.sq_aq_init = otx2_sq_aq_init,
	.sqe_flush = otx2_sqe_flush,
	.aura_freeptr = otx2_aura_freeptr,
	.refill_pool_ptrs = otx2_refill_pool_ptrs,
};

static struct dev_hw_ops cn10k_hw_ops = {
	.sq_aq_init = cn10k_sq_aq_init,
	.sqe_flush = cn10k_sqe_flush,
	.aura_freeptr = cn10k_aura_freeptr,
	.refill_pool_ptrs = cn10k_refill_pool_ptrs,
};

int cn10k_pf_lmtst_init(struct otx2_nic *pf)
{
	int size, num_lines;
	u64 base;

	if (!test_bit(CN10K_LMTST, &pf->hw.cap_flag)) {
		pf->hw_ops = &otx2_hw_ops;
		return 0;
	}

	pf->hw_ops = &cn10k_hw_ops;
	base = pci_resource_start(pf->pdev, PCI_MBOX_BAR_NUM) +
		       (MBOX_SIZE * (pf->total_vfs + 1));

	size = pci_resource_len(pf->pdev, PCI_MBOX_BAR_NUM) -
	       (MBOX_SIZE * (pf->total_vfs + 1));

	pf->hw.lmt_base = ioremap(base, size);

	if (!pf->hw.lmt_base) {
		dev_err(pf->dev, "Unable to map PF LMTST region\n");
		return -ENOMEM;
	}

	/* FIXME: Get the num of LMTST lines from LMT table */
	pf->tot_lmt_lines = size / LMT_LINE_SIZE;
	num_lines = (pf->tot_lmt_lines - NIX_LMTID_BASE) /
			    pf->hw.tx_queues;
	/* Number of LMT lines per SQ queues */
	pf->nix_lmt_lines = num_lines > 32 ? 32 : num_lines;

	pf->nix_lmt_size = pf->nix_lmt_lines * LMT_LINE_SIZE;
	return 0;
}

int cn10k_vf_lmtst_init(struct otx2_nic *vf)
{
	int size, num_lines;

	if (!test_bit(CN10K_LMTST, &vf->hw.cap_flag)) {
		vf->hw_ops = &otx2_hw_ops;
		return 0;
	}

	vf->hw_ops = &cn10k_hw_ops;
	size = pci_resource_len(vf->pdev, PCI_MBOX_BAR_NUM);
	vf->hw.lmt_base = ioremap_wc(pci_resource_start(vf->pdev,
							PCI_MBOX_BAR_NUM),
				     size);
	if (!vf->hw.lmt_base) {
		dev_err(vf->dev, "Unable to map VF LMTST region\n");
		return -ENOMEM;
	}

	vf->tot_lmt_lines = size / LMT_LINE_SIZE;
	/* LMTST lines per SQ */
	num_lines = (vf->tot_lmt_lines - NIX_LMTID_BASE) /
			    vf->hw.tx_queues;
	vf->nix_lmt_lines = num_lines > 32 ? 32 : num_lines;
	vf->nix_lmt_size = vf->nix_lmt_lines * LMT_LINE_SIZE;
	return 0;
}
EXPORT_SYMBOL(cn10k_vf_lmtst_init);

int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
{
	struct nix_cn10k_aq_enq_req *aq;
	struct otx2_nic *pfvf = dev;
	struct otx2_snd_queue *sq;

	sq = &pfvf->qset.sq[qidx];
	sq->lmt_addr = (__force u64 *)((u64)pfvf->hw.nix_lmt_base +
			       (qidx * pfvf->nix_lmt_size));

	/* Get memory to put this msg */
	aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox);
	if (!aq)
		return -ENOMEM;

	aq->sq.cq = pfvf->hw.rx_queues + qidx;
	aq->sq.max_sqe_size = NIX_MAXSQESZ_W16; /* 128 byte */
	aq->sq.cq_ena = 1;
	aq->sq.ena = 1;
	/* Only one SMQ is allocated, map all SQ's to that SMQ  */
	aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
	/* FIXME: set based on NIX_AF_DWRR_RPM_MTU*/
	aq->sq.smq_rr_weight = OTX2_MAX_MTU;
	aq->sq.default_chan = pfvf->hw.tx_chan_base;
	aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
	aq->sq.sqb_aura = sqb_aura;
	aq->sq.sq_int_ena = NIX_SQINT_BITS;
	aq->sq.qint_idx = 0;
	/* Due pipelining impact minimum 2000 unused SQ CQE's
	 * need to maintain to avoid CQ overflow.
	 */
	aq->sq.cq_limit = ((SEND_CQ_SKID * 256) / (pfvf->qset.sqe_cnt));

	/* Fill AQ info */
	aq->qidx = qidx;
	aq->ctype = NIX_AQ_CTYPE_SQ;
	aq->op = NIX_AQ_INSTOP_INIT;

	return otx2_sync_mbox_msg(&pfvf->mbox);
}

#define NPA_MAX_BURST 16
void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
{
	struct otx2_nic *pfvf = dev;
	u64 ptrs[NPA_MAX_BURST];
	int num_ptrs = 1;
	dma_addr_t bufptr;

	/* Refill pool with new buffers */
	while (cq->pool_ptrs) {
		if (otx2_alloc_buffer(pfvf, cq, &bufptr)) {
			if (num_ptrs--)
				__cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,
						     num_ptrs,
						     cq->rbpool->lmt_addr);
			break;
		}
		cq->pool_ptrs--;
		ptrs[num_ptrs] = (u64)bufptr + OTX2_HEAD_ROOM;
		num_ptrs++;
		if (num_ptrs == NPA_MAX_BURST || cq->pool_ptrs == 0) {
			__cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,
					     num_ptrs,
					     cq->rbpool->lmt_addr);
			num_ptrs = 1;
		}
	}
}

void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx)
{
	struct otx2_nic *pfvf = dev;
	int lmt_id = NIX_LMTID_BASE + (qidx * pfvf->nix_lmt_lines);
	u64 val = 0, tar_addr = 0;

	/* FIXME: val[0:10] LMT_ID.
	 * [12:15] no of LMTST - 1 in the burst.
	 * [19:63] data size of each LMTST in the burst except first.
	 */
	val = (lmt_id & 0x7FF);
	/* Target address for LMTST flush tells HW how many 128bit
	 * words are present.
	 * tar_addr[6:4] size of first LMTST - 1 in units of 128b.
	 */
	tar_addr |= sq->io_addr | (((size / 16) - 1) & 0x7) << 4;
	dma_wmb();
	memcpy(sq->lmt_addr, sq->sqe_base, size);
	cn10k_lmt_flush(val, tar_addr);

	sq->head++;
	sq->head &= (sq->sqe_cnt - 1);
}
+17 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0
 * Marvell OcteonTx2 RVU Ethernet driver
 *
 * Copyright (C) 2020 Marvell.
 */

#ifndef CN10K_H
#define CN10K_H

#include "otx2_common.h"

void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq);
void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx);
int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
int cn10k_pf_lmtst_init(struct otx2_nic *pf);
int cn10k_vf_lmtst_init(struct otx2_nic *vf);
#endif /* CN10K_H */
+36 −48
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@
#include "otx2_reg.h"
#include "otx2_common.h"
#include "otx2_struct.h"
#include "cn10k.h"

static void otx2_nix_rq_op_stats(struct queue_stats *stats,
				 struct otx2_nic *pfvf, int qidx)
@@ -526,6 +527,26 @@ static int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
	return ret;
}

int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq,
		      dma_addr_t *dma)
{
	if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, dma))) {
		struct refill_work *work;
		struct delayed_work *dwork;

		work = &pfvf->refill_wrk[cq->cq_idx];
		dwork = &work->pool_refill_work;
		/* Schedule a task if no other task is running */
		if (!cq->refill_task_sched) {
			cq->refill_task_sched = true;
			schedule_delayed_work(dwork,
					      msecs_to_jiffies(100));
		}
		return -ENOMEM;
	}
	return 0;
}

void otx2_tx_timeout(struct net_device *netdev, unsigned int txq)
{
	struct otx2_nic *pfvf = netdev_priv(netdev);
@@ -728,9 +749,6 @@ void otx2_sqb_flush(struct otx2_nic *pfvf)
#define RQ_PASS_LVL_AURA (255 - ((95 * 256) / 100)) /* RED when 95% is full */
#define RQ_DROP_LVL_AURA (255 - ((99 * 256) / 100)) /* Drop when 99% is full */

/* Send skid of 2000 packets required for CQ size of 4K CQEs. */
#define SEND_CQ_SKID	2000

static int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura)
{
	struct otx2_qset *qset = &pfvf->qset;
@@ -764,45 +782,14 @@ static int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura)
	return otx2_sync_mbox_msg(&pfvf->mbox);
}

static int cn10k_sq_aq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
{
	struct nix_cn10k_aq_enq_req *aq;

	/* Get memory to put this msg */
	aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox);
	if (!aq)
		return -ENOMEM;

	aq->sq.cq = pfvf->hw.rx_queues + qidx;
	aq->sq.max_sqe_size = NIX_MAXSQESZ_W16; /* 128 byte */
	aq->sq.cq_ena = 1;
	aq->sq.ena = 1;
	/* Only one SMQ is allocated, map all SQ's to that SMQ  */
	aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
	/* FIXME: set based on NIX_AF_DWRR_RPM_MTU*/
	aq->sq.smq_rr_weight = OTX2_MAX_MTU;
	aq->sq.default_chan = pfvf->hw.tx_chan_base;
	aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
	aq->sq.sqb_aura = sqb_aura;
	aq->sq.sq_int_ena = NIX_SQINT_BITS;
	aq->sq.qint_idx = 0;
	/* Due pipelining impact minimum 2000 unused SQ CQE's
	 * need to maintain to avoid CQ overflow.
	 */
	aq->sq.cq_limit = ((SEND_CQ_SKID * 256) / (pfvf->qset.sqe_cnt));

	/* Fill AQ info */
	aq->qidx = qidx;
	aq->ctype = NIX_AQ_CTYPE_SQ;
	aq->op = NIX_AQ_INSTOP_INIT;

	return otx2_sync_mbox_msg(&pfvf->mbox);
}

static int otx2_sq_aq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
{
	struct otx2_nic *pfvf = dev;
	struct otx2_snd_queue *sq;
	struct nix_aq_enq_req *aq;

	sq = &pfvf->qset.sq[qidx];
	sq->lmt_addr = (__force u64 *)(pfvf->reg_base + LMT_LF_LMTLINEX(qidx));
	/* Get memory to put this msg */
	aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
	if (!aq)
@@ -873,16 +860,12 @@ static int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
	sq->sqe_thresh = ((sq->num_sqbs * sq->sqe_per_sqb) * 10) / 100;
	sq->aura_id = sqb_aura;
	sq->aura_fc_addr = pool->fc_addr->base;
	sq->lmt_addr = (__force u64 *)(pfvf->reg_base + LMT_LF_LMTLINEX(qidx));
	sq->io_addr = (__force u64)otx2_get_regaddr(pfvf, NIX_LF_OP_SENDX(0));

	sq->stats.bytes = 0;
	sq->stats.pkts = 0;

	if (is_dev_otx2(pfvf->pdev))
		return otx2_sq_aq_init(pfvf, qidx, sqb_aura);
	else
		return cn10k_sq_aq_init(pfvf, qidx, sqb_aura);
	return pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura);

}

@@ -987,7 +970,7 @@ static void otx2_pool_refill_task(struct work_struct *work)
			}
			return;
		}
		otx2_aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM);
		pfvf->hw_ops->aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM);
		cq->pool_ptrs--;
	}
	cq->refill_task_sched = false;
@@ -1231,6 +1214,11 @@ static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,

	pool->rbsize = buf_size;

	/* Set LMTST addr for NPA batch free */
	if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag))
		pool->lmt_addr = (__force u64 *)((u64)pfvf->hw.npa_lmt_base +
						 (pool_id * LMT_LINE_SIZE));

	/* Initialize this pool's context via AF */
	aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
	if (!aq) {
@@ -1319,7 +1307,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
		for (ptr = 0; ptr < num_sqbs; ptr++) {
			if (otx2_alloc_rbuf(pfvf, pool, &bufptr))
				return -ENOMEM;
			otx2_aura_freeptr(pfvf, pool_id, bufptr);
			pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr);
			sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr;
		}
	}
@@ -1369,7 +1357,7 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf)
		for (ptr = 0; ptr < num_ptrs; ptr++) {
			if (otx2_alloc_rbuf(pfvf, pool, &bufptr))
				return -ENOMEM;
			otx2_aura_freeptr(pfvf, pool_id,
			pfvf->hw_ops->aura_freeptr(pfvf, pool_id,
						   bufptr + OTX2_HEAD_ROOM);
		}
	}
+63 −5
Original line number Diff line number Diff line
@@ -50,6 +50,9 @@ enum arua_mapped_qtypes {
#define NIX_LF_ERR_VEC				0x81
#define NIX_LF_POISON_VEC			0x82

/* Send skid of 2000 packets required for CQ size of 4K CQEs. */
#define SEND_CQ_SKID	2000

/* RSS configuration */
struct otx2_rss_ctx {
	u8  ind_tbl[MAX_RSS_INDIR_TBL_SIZE];
@@ -275,9 +278,18 @@ struct otx2_flow_config {
	struct list_head	flow_list;
};

struct dev_hw_ops {
	int	(*sq_aq_init)(void *dev, u16 qidx, u16 sqb_aura);
	void	(*sqe_flush)(void *dev, struct otx2_snd_queue *sq,
			     int size, int qidx);
	void	(*refill_pool_ptrs)(void *dev, struct otx2_cq_queue *cq);
	void	(*aura_freeptr)(void *dev, int aura, u64 buf);
};

struct otx2_nic {
	void __iomem		*reg_base;
	struct net_device	*netdev;
	struct dev_hw_ops	*hw_ops;
	void			*iommu_domain;
	u16			max_frs;
	u16			rbsize; /* Receive buffer size */
@@ -507,10 +519,51 @@ static inline u64 otx2_atomic64_add(u64 incr, u64 *ptr)
}

#else
#define otx2_write128(lo, hi, addr)
#define otx2_write128(lo, hi, addr)		writeq((hi) | (lo), addr)
#define otx2_atomic64_add(incr, ptr)		({ *ptr += incr; })
#endif

static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
					u64 *ptrs, u64 num_ptrs,
					u64 *lmt_addr)
{
	u64 size = 0, count_eot = 0;
	u64 tar_addr, val = 0;

	tar_addr = (__force u64)otx2_get_regaddr(pfvf, NPA_LF_AURA_BATCH_FREE0);
	/* LMTID is same as AURA Id */
	val = (aura & 0x7FF) | BIT_ULL(63);
	/* Set if [127:64] of last 128bit word has a valid pointer */
	count_eot = (num_ptrs % 2) ? 0ULL : 1ULL;
	/* Set AURA ID to free pointer */
	ptrs[0] = (count_eot << 32) | (aura & 0xFFFFF);
	/* Target address for LMTST flush tells HW how many 128bit
	 * words are valid from NPA_LF_AURA_BATCH_FREE0.
	 *
	 * tar_addr[6:4] is LMTST size-1 in units of 128b.
	 */
	if (num_ptrs > 2) {
		size = (sizeof(u64) * num_ptrs) / 16;
		if (!count_eot)
			size++;
		tar_addr |=  ((size - 1) & 0x7) << 4;
	}
	memcpy(lmt_addr, ptrs, sizeof(u64) * num_ptrs);
	/* Perform LMTST flush */
	cn10k_lmt_flush(val, tar_addr);
}

static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf)
{
	struct otx2_nic *pfvf = dev;
	struct otx2_pool *pool;
	u64 ptrs[2];

	pool = &pfvf->qset.pool[aura];
	ptrs[1] = buf;
	__cn10k_aura_freeptr(pfvf, aura, ptrs, 2, pool->lmt_addr);
}

/* Alloc pointer from pool/aura */
static inline u64 otx2_aura_allocptr(struct otx2_nic *pfvf, int aura)
{
@@ -522,11 +575,12 @@ static inline u64 otx2_aura_allocptr(struct otx2_nic *pfvf, int aura)
}

/* Free pointer to a pool/aura */
static inline void otx2_aura_freeptr(struct otx2_nic *pfvf,
				     int aura, u64 buf)
static inline void otx2_aura_freeptr(void *dev, int aura, u64 buf)
{
	otx2_write128(buf, (u64)aura | BIT_ULL(63),
		      otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_FREE0));
	struct otx2_nic *pfvf = dev;
	void __iomem *addr = otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_FREE0);

	otx2_write128(buf, (u64)aura | BIT_ULL(63), addr);
}

static inline int otx2_get_pool_idx(struct otx2_nic *pfvf, int type, int idx)
@@ -681,6 +735,10 @@ void otx2_ctx_disable(struct mbox *mbox, int type, bool npa);
int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable);
void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq);
void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq);
int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq,
		      dma_addr_t *dma);

/* RSS configuration APIs*/
int otx2_rss_init(struct otx2_nic *pfvf);
Loading