octeontx2-pf: cn10k: Use LMTST lines for NPA/NIX operations (4c236d5d) · Commits · EulixOS / Software / Kernel

drivers/net/ethernet/marvell/octeontx2/nic/Makefile

+1 −1

Original line number	Diff line number	Diff line
		@@ -7,7 +7,7 @@ obj-$(CONFIG_OCTEONTX2_PF) += rvu_nicpf.o
		obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o

		rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
		otx2_ptp.o otx2_flows.o
		otx2_ptp.o otx2_flows.o cn10k.o
		rvu_nicvf-y := otx2_vf.o

		ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af

drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c

0 → 100644

+181 −0

Original line number	Diff line number	Diff line
		// SPDX-License-Identifier: GPL-2.0
		/* Marvell OcteonTx2 RVU Physcial Function ethernet driver
		*
		* Copyright (C) 2020 Marvell.
		*/

		#include "cn10k.h"
		#include "otx2_reg.h"
		#include "otx2_struct.h"

		static struct dev_hw_ops otx2_hw_ops = {
		.sq_aq_init = otx2_sq_aq_init,
		.sqe_flush = otx2_sqe_flush,
		.aura_freeptr = otx2_aura_freeptr,
		.refill_pool_ptrs = otx2_refill_pool_ptrs,
		};

		static struct dev_hw_ops cn10k_hw_ops = {
		.sq_aq_init = cn10k_sq_aq_init,
		.sqe_flush = cn10k_sqe_flush,
		.aura_freeptr = cn10k_aura_freeptr,
		.refill_pool_ptrs = cn10k_refill_pool_ptrs,
		};

		int cn10k_pf_lmtst_init(struct otx2_nic *pf)
		{
		int size, num_lines;
		u64 base;

		if (!test_bit(CN10K_LMTST, &pf->hw.cap_flag)) {
		pf->hw_ops = &otx2_hw_ops;
		return 0;
		}

		pf->hw_ops = &cn10k_hw_ops;
		base = pci_resource_start(pf->pdev, PCI_MBOX_BAR_NUM) +
		(MBOX_SIZE * (pf->total_vfs + 1));

		size = pci_resource_len(pf->pdev, PCI_MBOX_BAR_NUM) -
		(MBOX_SIZE * (pf->total_vfs + 1));

		pf->hw.lmt_base = ioremap(base, size);

		if (!pf->hw.lmt_base) {
		dev_err(pf->dev, "Unable to map PF LMTST region\n");
		return -ENOMEM;
		}

		/* FIXME: Get the num of LMTST lines from LMT table */
		pf->tot_lmt_lines = size / LMT_LINE_SIZE;
		num_lines = (pf->tot_lmt_lines - NIX_LMTID_BASE) /
		pf->hw.tx_queues;
		/* Number of LMT lines per SQ queues */
		pf->nix_lmt_lines = num_lines > 32 ? 32 : num_lines;

		pf->nix_lmt_size = pf->nix_lmt_lines * LMT_LINE_SIZE;
		return 0;
		}

		int cn10k_vf_lmtst_init(struct otx2_nic *vf)
		{
		int size, num_lines;

		if (!test_bit(CN10K_LMTST, &vf->hw.cap_flag)) {
		vf->hw_ops = &otx2_hw_ops;
		return 0;
		}

		vf->hw_ops = &cn10k_hw_ops;
		size = pci_resource_len(vf->pdev, PCI_MBOX_BAR_NUM);
		vf->hw.lmt_base = ioremap_wc(pci_resource_start(vf->pdev,
		PCI_MBOX_BAR_NUM),
		size);
		if (!vf->hw.lmt_base) {
		dev_err(vf->dev, "Unable to map VF LMTST region\n");
		return -ENOMEM;
		}

		vf->tot_lmt_lines = size / LMT_LINE_SIZE;
		/* LMTST lines per SQ */
		num_lines = (vf->tot_lmt_lines - NIX_LMTID_BASE) /
		vf->hw.tx_queues;
		vf->nix_lmt_lines = num_lines > 32 ? 32 : num_lines;
		vf->nix_lmt_size = vf->nix_lmt_lines * LMT_LINE_SIZE;
		return 0;
		}
		EXPORT_SYMBOL(cn10k_vf_lmtst_init);

		int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
		{
		struct nix_cn10k_aq_enq_req *aq;
		struct otx2_nic *pfvf = dev;
		struct otx2_snd_queue *sq;

		sq = &pfvf->qset.sq[qidx];
		sq->lmt_addr = (__force u64 *)((u64)pfvf->hw.nix_lmt_base +
		(qidx * pfvf->nix_lmt_size));

		/* Get memory to put this msg */
		aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox);
		if (!aq)
		return -ENOMEM;

		aq->sq.cq = pfvf->hw.rx_queues + qidx;
		aq->sq.max_sqe_size = NIX_MAXSQESZ_W16; /* 128 byte */
		aq->sq.cq_ena = 1;
		aq->sq.ena = 1;
		/* Only one SMQ is allocated, map all SQ's to that SMQ */
		aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
		/* FIXME: set based on NIX_AF_DWRR_RPM_MTU*/
		aq->sq.smq_rr_weight = OTX2_MAX_MTU;
		aq->sq.default_chan = pfvf->hw.tx_chan_base;
		aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
		aq->sq.sqb_aura = sqb_aura;
		aq->sq.sq_int_ena = NIX_SQINT_BITS;
		aq->sq.qint_idx = 0;
		/* Due pipelining impact minimum 2000 unused SQ CQE's
		* need to maintain to avoid CQ overflow.
		*/
		aq->sq.cq_limit = ((SEND_CQ_SKID * 256) / (pfvf->qset.sqe_cnt));

		/* Fill AQ info */
		aq->qidx = qidx;
		aq->ctype = NIX_AQ_CTYPE_SQ;
		aq->op = NIX_AQ_INSTOP_INIT;

		return otx2_sync_mbox_msg(&pfvf->mbox);
		}

		#define NPA_MAX_BURST 16
		void cn10k_refill_pool_ptrs(void dev, struct otx2_cq_queue cq)
		{
		struct otx2_nic *pfvf = dev;
		u64 ptrs[NPA_MAX_BURST];
		int num_ptrs = 1;
		dma_addr_t bufptr;

		/* Refill pool with new buffers */
		while (cq->pool_ptrs) {
		if (otx2_alloc_buffer(pfvf, cq, &bufptr)) {
		if (num_ptrs--)
		__cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,
		num_ptrs,
		cq->rbpool->lmt_addr);
		break;
		}
		cq->pool_ptrs--;
		ptrs[num_ptrs] = (u64)bufptr + OTX2_HEAD_ROOM;
		num_ptrs++;
		if (num_ptrs == NPA_MAX_BURST \|\| cq->pool_ptrs == 0) {
		__cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,
		num_ptrs,
		cq->rbpool->lmt_addr);
		num_ptrs = 1;
		}
		}
		}

		void cn10k_sqe_flush(void dev, struct otx2_snd_queue sq, int size, int qidx)
		{
		struct otx2_nic *pfvf = dev;
		int lmt_id = NIX_LMTID_BASE + (qidx * pfvf->nix_lmt_lines);
		u64 val = 0, tar_addr = 0;

		/* FIXME: val[0:10] LMT_ID.
		* [12:15] no of LMTST - 1 in the burst.
		* [19:63] data size of each LMTST in the burst except first.
		*/
		val = (lmt_id & 0x7FF);
		/* Target address for LMTST flush tells HW how many 128bit
		* words are present.
		* tar_addr[6:4] size of first LMTST - 1 in units of 128b.
		*/
		tar_addr \|= sq->io_addr \| (((size / 16) - 1) & 0x7) << 4;
		dma_wmb();
		memcpy(sq->lmt_addr, sq->sqe_base, size);
		cn10k_lmt_flush(val, tar_addr);

		sq->head++;
		sq->head &= (sq->sqe_cnt - 1);
		}

drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h

0 → 100644

+17 −0

Original line number	Diff line number	Diff line
		/* SPDX-License-Identifier: GPL-2.0
		* Marvell OcteonTx2 RVU Ethernet driver
		*
		* Copyright (C) 2020 Marvell.
		*/

		#ifndef CN10K_H
		#define CN10K_H

		#include "otx2_common.h"

		void cn10k_refill_pool_ptrs(void dev, struct otx2_cq_queue cq);
		void cn10k_sqe_flush(void dev, struct otx2_snd_queue sq, int size, int qidx);
		int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
		int cn10k_pf_lmtst_init(struct otx2_nic *pf);
		int cn10k_vf_lmtst_init(struct otx2_nic *vf);
		#endif /* CN10K_H */

drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c

+36 −48

Original line number	Diff line number	Diff line
		@@ -15,6 +15,7 @@
		#include "otx2_reg.h"
		#include "otx2_common.h"
		#include "otx2_struct.h"
		#include "cn10k.h"

		static void otx2_nix_rq_op_stats(struct queue_stats *stats,
		struct otx2_nic *pfvf, int qidx)
		@@ -526,6 +527,26 @@ static int otx2_alloc_rbuf(struct otx2_nic pfvf, struct otx2_pool pool,
		return ret;
		}

		int otx2_alloc_buffer(struct otx2_nic pfvf, struct otx2_cq_queue cq,
		dma_addr_t *dma)
		{
		if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, dma))) {
		struct refill_work *work;
		struct delayed_work *dwork;

		work = &pfvf->refill_wrk[cq->cq_idx];
		dwork = &work->pool_refill_work;
		/* Schedule a task if no other task is running */
		if (!cq->refill_task_sched) {
		cq->refill_task_sched = true;
		schedule_delayed_work(dwork,
		msecs_to_jiffies(100));
		}
		return -ENOMEM;
		}
		return 0;
		}

		void otx2_tx_timeout(struct net_device *netdev, unsigned int txq)
		{
		struct otx2_nic *pfvf = netdev_priv(netdev);
		@@ -728,9 +749,6 @@ void otx2_sqb_flush(struct otx2_nic *pfvf)
		#define RQ_PASS_LVL_AURA (255 - ((95 * 256) / 100)) /* RED when 95% is full */
		#define RQ_DROP_LVL_AURA (255 - ((99 * 256) / 100)) /* Drop when 99% is full */

		/* Send skid of 2000 packets required for CQ size of 4K CQEs. */
		#define SEND_CQ_SKID 2000

		static int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura)
		{
		struct otx2_qset *qset = &pfvf->qset;
		@@ -764,45 +782,14 @@ static int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura)
		return otx2_sync_mbox_msg(&pfvf->mbox);
		}

		static int cn10k_sq_aq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
		{
		struct nix_cn10k_aq_enq_req *aq;

		/* Get memory to put this msg */
		aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox);
		if (!aq)
		return -ENOMEM;

		aq->sq.cq = pfvf->hw.rx_queues + qidx;
		aq->sq.max_sqe_size = NIX_MAXSQESZ_W16; /* 128 byte */
		aq->sq.cq_ena = 1;
		aq->sq.ena = 1;
		/* Only one SMQ is allocated, map all SQ's to that SMQ */
		aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
		/* FIXME: set based on NIX_AF_DWRR_RPM_MTU*/
		aq->sq.smq_rr_weight = OTX2_MAX_MTU;
		aq->sq.default_chan = pfvf->hw.tx_chan_base;
		aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
		aq->sq.sqb_aura = sqb_aura;
		aq->sq.sq_int_ena = NIX_SQINT_BITS;
		aq->sq.qint_idx = 0;
		/* Due pipelining impact minimum 2000 unused SQ CQE's
		* need to maintain to avoid CQ overflow.
		*/
		aq->sq.cq_limit = ((SEND_CQ_SKID * 256) / (pfvf->qset.sqe_cnt));

		/* Fill AQ info */
		aq->qidx = qidx;
		aq->ctype = NIX_AQ_CTYPE_SQ;
		aq->op = NIX_AQ_INSTOP_INIT;

		return otx2_sync_mbox_msg(&pfvf->mbox);
		}

		static int otx2_sq_aq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
		int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
		{
		struct otx2_nic *pfvf = dev;
		struct otx2_snd_queue *sq;
		struct nix_aq_enq_req *aq;

		sq = &pfvf->qset.sq[qidx];
		sq->lmt_addr = (__force u64 *)(pfvf->reg_base + LMT_LF_LMTLINEX(qidx));
		/* Get memory to put this msg */
		aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
		if (!aq)
		@@ -873,16 +860,12 @@ static int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
		sq->sqe_thresh = ((sq->num_sqbs * sq->sqe_per_sqb) * 10) / 100;
		sq->aura_id = sqb_aura;
		sq->aura_fc_addr = pool->fc_addr->base;
		sq->lmt_addr = (__force u64 *)(pfvf->reg_base + LMT_LF_LMTLINEX(qidx));
		sq->io_addr = (__force u64)otx2_get_regaddr(pfvf, NIX_LF_OP_SENDX(0));

		sq->stats.bytes = 0;
		sq->stats.pkts = 0;

		if (is_dev_otx2(pfvf->pdev))
		return otx2_sq_aq_init(pfvf, qidx, sqb_aura);
		else
		return cn10k_sq_aq_init(pfvf, qidx, sqb_aura);
		return pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura);

		}

		@@ -987,7 +970,7 @@ static void otx2_pool_refill_task(struct work_struct *work)
		}
		return;
		}
		otx2_aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM);
		pfvf->hw_ops->aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM);
		cq->pool_ptrs--;
		}
		cq->refill_task_sched = false;
		@@ -1231,6 +1214,11 @@ static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,

		pool->rbsize = buf_size;

		/* Set LMTST addr for NPA batch free */
		if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag))
		pool->lmt_addr = (__force u64 *)((u64)pfvf->hw.npa_lmt_base +
		(pool_id * LMT_LINE_SIZE));

		/* Initialize this pool's context via AF */
		aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
		if (!aq) {
		@@ -1319,7 +1307,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
		for (ptr = 0; ptr < num_sqbs; ptr++) {
		if (otx2_alloc_rbuf(pfvf, pool, &bufptr))
		return -ENOMEM;
		otx2_aura_freeptr(pfvf, pool_id, bufptr);
		pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr);
		sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr;
		}
		}
		@@ -1369,7 +1357,7 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf)
		for (ptr = 0; ptr < num_ptrs; ptr++) {
		if (otx2_alloc_rbuf(pfvf, pool, &bufptr))
		return -ENOMEM;
		otx2_aura_freeptr(pfvf, pool_id,
		pfvf->hw_ops->aura_freeptr(pfvf, pool_id,
		bufptr + OTX2_HEAD_ROOM);
		}
		}

drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h

+63 −5

Original line number	Diff line number	Diff line
		@@ -50,6 +50,9 @@ enum arua_mapped_qtypes {
		#define NIX_LF_ERR_VEC 0x81
		#define NIX_LF_POISON_VEC 0x82

		/* Send skid of 2000 packets required for CQ size of 4K CQEs. */
		#define SEND_CQ_SKID 2000

		/* RSS configuration */
		struct otx2_rss_ctx {
		u8 ind_tbl[MAX_RSS_INDIR_TBL_SIZE];
		@@ -275,9 +278,18 @@ struct otx2_flow_config {
		struct list_head flow_list;
		};

		struct dev_hw_ops {
		int (sq_aq_init)(void dev, u16 qidx, u16 sqb_aura);
		void (sqe_flush)(void dev, struct otx2_snd_queue *sq,
		int size, int qidx);
		void (refill_pool_ptrs)(void dev, struct otx2_cq_queue *cq);
		void (aura_freeptr)(void dev, int aura, u64 buf);
		};

		struct otx2_nic {
		void __iomem *reg_base;
		struct net_device *netdev;
		struct dev_hw_ops *hw_ops;
		void *iommu_domain;
		u16 max_frs;
		u16 rbsize; /* Receive buffer size */
		@@ -507,10 +519,51 @@ static inline u64 otx2_atomic64_add(u64 incr, u64 *ptr)
		}

		#else
		#define otx2_write128(lo, hi, addr)
		#define otx2_write128(lo, hi, addr) writeq((hi) \| (lo), addr)
		#define otx2_atomic64_add(incr, ptr) ({ *ptr += incr; })
		#endif

		static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
		u64 *ptrs, u64 num_ptrs,
		u64 *lmt_addr)
		{
		u64 size = 0, count_eot = 0;
		u64 tar_addr, val = 0;

		tar_addr = (__force u64)otx2_get_regaddr(pfvf, NPA_LF_AURA_BATCH_FREE0);
		/* LMTID is same as AURA Id */
		val = (aura & 0x7FF) \| BIT_ULL(63);
		/* Set if [127:64] of last 128bit word has a valid pointer */
		count_eot = (num_ptrs % 2) ? 0ULL : 1ULL;
		/* Set AURA ID to free pointer */
		ptrs[0] = (count_eot << 32) \| (aura & 0xFFFFF);
		/* Target address for LMTST flush tells HW how many 128bit
		* words are valid from NPA_LF_AURA_BATCH_FREE0.
		*
		* tar_addr[6:4] is LMTST size-1 in units of 128b.
		*/
		if (num_ptrs > 2) {
		size = (sizeof(u64) * num_ptrs) / 16;
		if (!count_eot)
		size++;
		tar_addr \|= ((size - 1) & 0x7) << 4;
		}
		memcpy(lmt_addr, ptrs, sizeof(u64) * num_ptrs);
		/* Perform LMTST flush */
		cn10k_lmt_flush(val, tar_addr);
		}

		static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf)
		{
		struct otx2_nic *pfvf = dev;
		struct otx2_pool *pool;
		u64 ptrs[2];

		pool = &pfvf->qset.pool[aura];
		ptrs[1] = buf;
		__cn10k_aura_freeptr(pfvf, aura, ptrs, 2, pool->lmt_addr);
		}

		/* Alloc pointer from pool/aura */
		static inline u64 otx2_aura_allocptr(struct otx2_nic *pfvf, int aura)
		{
		@@ -522,11 +575,12 @@ static inline u64 otx2_aura_allocptr(struct otx2_nic *pfvf, int aura)
		}

		/* Free pointer to a pool/aura */
		static inline void otx2_aura_freeptr(struct otx2_nic *pfvf,
		int aura, u64 buf)
		static inline void otx2_aura_freeptr(void *dev, int aura, u64 buf)
		{
		otx2_write128(buf, (u64)aura \| BIT_ULL(63),
		otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_FREE0));
		struct otx2_nic *pfvf = dev;
		void __iomem *addr = otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_FREE0);

		otx2_write128(buf, (u64)aura \| BIT_ULL(63), addr);
		}

		static inline int otx2_get_pool_idx(struct otx2_nic *pfvf, int type, int idx)
		@@ -681,6 +735,10 @@ void otx2_ctx_disable(struct mbox *mbox, int type, bool npa);
		int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable);
		void otx2_cleanup_rx_cqes(struct otx2_nic pfvf, struct otx2_cq_queue cq);
		void otx2_cleanup_tx_cqes(struct otx2_nic pfvf, struct otx2_cq_queue cq);
		int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
		int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
		int otx2_alloc_buffer(struct otx2_nic pfvf, struct otx2_cq_queue cq,
		dma_addr_t *dma);

		/* RSS configuration APIs*/
		int otx2_rss_init(struct otx2_nic *pfvf);