Commit 57f1915f authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'octeontx2-pf-HTB'



Hariprasad Kelam says:

====================
octeontx2-pf: HTB offload support

octeontx2 silicon and CN10K transmit interface consists of five
transmit levels starting from MDQ, TL4 to TL1. Once packets are
submitted to MDQ, hardware picks all active MDQs using strict
priority, and MDQs having the same priority level are chosen using
round robin. Each packet will traverse MDQ, TL4 to TL1 levels.
Each level contains an array of queues to support scheduling and
shaping.

As HTB supports classful queuing mechanism by supporting rate and
ceil and allow the user to control the absolute bandwidth to
particular classes of traffic the same can be achieved by
configuring shapers and schedulers on different transmit levels.

This series of patches adds support for HTB offload,

Patch1: Allow strict priority parameter in HTB offload mode.

Patch2: Rename existing total tx queues for better readability

Patch3: defines APIs such that the driver can dynamically initialize/
        deinitialize the send queues.

Patch4: Refactors transmit alloc/free calls as preparation for QOS
        offload code.

Patch5: moves rate limiting logic to common header which will be used
        by qos offload code.

Patch6: Adds actual HTB offload support.

Patch7: exposes qos send queue stats over ethtool.

Patch8: Add documentation about htb offload flow in driver
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b2cbac9b efe10306
Loading
Loading
Loading
Loading
+45 −0
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@ Contents
- `Drivers`_
- `Basic packet flow`_
- `Devlink health reporters`_
- `Quality of service`_

Overview
========
@@ -287,3 +288,47 @@ For example::
	 NIX_AF_ERR:
	        NIX Error Interrupt Reg : 64
	        Rx on unmapped PF_FUNC


Quality of service
==================


Hardware algorithms used in scheduling
--------------------------------------

octeontx2 silicon and CN10K transmit interface consists of five transmit levels
starting from SMQ/MDQ, TL4 to TL1. Each packet will traverse MDQ, TL4 to TL1
levels. Each level contains an array of queues to support scheduling and shaping.
The hardware uses the below algorithms depending on the priority of scheduler queues.
once the usercreates tc classes with different priorities, the driver configures
schedulers allocated to the class with specified priority along with rate-limiting
configuration.

1. Strict Priority

      -  Once packets are submitted to MDQ, hardware picks all active MDQs having different priority
         using strict priority.

2. Round Robin

      - Active MDQs having the same priority level are chosen using round robin.


Setup HTB offload
-----------------

1. Enable HW TC offload on the interface::

        # ethtool -K <interface> hw-tc-offload on

2. Crate htb root::

        # tc qdisc add dev <interface> clsact
        # tc qdisc replace dev <interface> root handle 1: htb offload

3. Create tc classes with different priorities::

        # tc class add dev <interface> parent 1: classid 1:1 htb rate 10Gbit prio 1

        # tc class add dev <interface> parent 1: classid 1:2 htb rate 10Gbit prio 7
+1 −1
Original line number Diff line number Diff line
@@ -142,7 +142,7 @@ enum nix_scheduler {

#define TXSCH_RR_QTM_MAX		((1 << 24) - 1)
#define TXSCH_TL1_DFLT_RR_QTM		TXSCH_RR_QTM_MAX
#define TXSCH_TL1_DFLT_RR_PRIO		(0x1ull)
#define TXSCH_TL1_DFLT_RR_PRIO		(0x7ull)
#define CN10K_MAX_DWRR_WEIGHT          16384 /* Weight is 14bit on CN10K */

/* Min/Max packet sizes, excluding FCS */
+5 −0
Original line number Diff line number Diff line
@@ -1222,6 +1222,11 @@ static int rvu_dbg_npa_ctx_display(struct seq_file *m, void *unused, int ctype)

	for (aura = id; aura < max_id; aura++) {
		aq_req.aura_id = aura;

		/* Skip if queue is uninitialized */
		if (ctype == NPA_AQ_CTYPE_POOL && !test_bit(aura, pfvf->pool_bmap))
			continue;

		seq_printf(m, "======%s : %d=======\n",
			   (ctype == NPA_AQ_CTYPE_AURA) ? "AURA" : "POOL",
			aq_req.aura_id);
+45 −0
Original line number Diff line number Diff line
@@ -1691,6 +1691,42 @@ handle_txschq_shaper_update(struct rvu *rvu, int blkaddr, int nixlf,
	return true;
}

static void nix_reset_tx_schedule(struct rvu *rvu, int blkaddr,
				  int lvl, int schq)
{
	u64 tlx_parent = 0, tlx_schedule = 0;

	switch (lvl) {
	case NIX_TXSCH_LVL_TL2:
		tlx_parent   = NIX_AF_TL2X_PARENT(schq);
		tlx_schedule = NIX_AF_TL2X_SCHEDULE(schq);
		break;
	case NIX_TXSCH_LVL_TL3:
		tlx_parent   = NIX_AF_TL3X_PARENT(schq);
		tlx_schedule = NIX_AF_TL3X_SCHEDULE(schq);
		break;
	case NIX_TXSCH_LVL_TL4:
		tlx_parent   = NIX_AF_TL4X_PARENT(schq);
		tlx_schedule = NIX_AF_TL4X_SCHEDULE(schq);
		break;
	case NIX_TXSCH_LVL_MDQ:
		/* no need to reset SMQ_CFG as HW clears this CSR
		 * on SMQ flush
		 */
		tlx_parent   = NIX_AF_MDQX_PARENT(schq);
		tlx_schedule = NIX_AF_MDQX_SCHEDULE(schq);
		break;
	default:
		return;
	}

	if (tlx_parent)
		rvu_write64(rvu, blkaddr, tlx_parent, 0x0);

	if (tlx_schedule)
		rvu_write64(rvu, blkaddr, tlx_schedule, 0x0);
}

/* Disable shaping of pkts by a scheduler queue
 * at a given scheduler level.
 */
@@ -2039,6 +2075,7 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu,
				pfvf_map[schq] = TXSCH_MAP(pcifunc, 0);
			nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
			nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq);
			nix_reset_tx_schedule(rvu, blkaddr, lvl, schq);
		}

		for (idx = 0; idx < req->schq[lvl]; idx++) {
@@ -2048,6 +2085,7 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu,
				pfvf_map[schq] = TXSCH_MAP(pcifunc, 0);
			nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
			nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq);
			nix_reset_tx_schedule(rvu, blkaddr, lvl, schq);
		}
	}

@@ -2143,6 +2181,7 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc)
				continue;
			nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
			nix_clear_tx_xoff(rvu, blkaddr, lvl, schq);
			nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq);
		}
	}
	nix_clear_tx_xoff(rvu, blkaddr, NIX_TXSCH_LVL_TL1,
@@ -2181,6 +2220,7 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc)
		for (schq = 0; schq < txsch->schq.max; schq++) {
			if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc)
				continue;
			nix_reset_tx_schedule(rvu, blkaddr, lvl, schq);
			rvu_free_rsrc(&txsch->schq, schq);
			txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE);
		}
@@ -2240,6 +2280,9 @@ static int nix_txschq_free_one(struct rvu *rvu,
	 */
	nix_clear_tx_xoff(rvu, blkaddr, lvl, schq);

	nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
	nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq);

	/* Flush if it is a SMQ. Onus of disabling
	 * TL2/3 queue links before SMQ flush is on user
	 */
@@ -2249,6 +2292,8 @@ static int nix_txschq_free_one(struct rvu *rvu,
		goto err;
	}

	nix_reset_tx_schedule(rvu, blkaddr, lvl, schq);

	/* Free the resource */
	rvu_free_rsrc(&txsch->schq, schq);
	txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE);
+1 −1
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@ obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o otx2_ptp.o

rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
               otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \
               otx2_devlink.o
               otx2_devlink.o qos_sq.o qos.o
rvu_nicvf-y := otx2_vf.o otx2_devlink.o

rvu_nicpf-$(CONFIG_DCB) += otx2_dcbnl.o
Loading