Commit 0754d65b authored by Kiran Patil's avatar Kiran Patil Committed by Tony Nguyen
Browse files

ice: Add infrastructure for mqprio support via ndo_setup_tc



Add infrastructure required for "ndo_setup_tc:qdisc_mqprio".
ice_vsi_setup is modified to configure traffic classes based
on mqprio data received from the stack. This includes low-level
functions to configure min, max rate-limit parameters in hardware
for traffic classes. Each traffic class gets mapped to a hardware
channel (VSI) which can be individually configured with different
bandwidth parameters.

Co-developed-by: default avatarTarun Singh <tarun.k.singh@intel.com>
Signed-off-by: default avatarTarun Singh <tarun.k.singh@intel.com>
Signed-off-by: default avatarKiran Patil <kiran.patil@intel.com>
Signed-off-by: default avatarAmritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: default avatarSudheer Mogilappagari <sudheer.mogilappagari@intel.com>
Tested-by: default avatarBharathi Sreenivas <bharathi.sreenivas@intel.com>
Signed-off-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>
parent 2641b62d
Loading
Loading
Loading
Loading
+77 −1
Original line number Diff line number Diff line
@@ -38,6 +38,7 @@
#include <linux/avf/virtchnl.h>
#include <linux/cpu_rmap.h>
#include <linux/dim.h>
#include <net/pkt_cls.h>
#include <net/devlink.h>
#include <net/ipv6.h>
#include <net/xdp_sock.h>
@@ -104,6 +105,10 @@
#define ICE_INVAL_VFID		256

#define ICE_MAX_RXQS_PER_TC		256	/* Used when setting VSI context per TC Rx queues */

#define ICE_CHNL_START_TC		1
#define ICE_CHNL_MAX_TC			16

#define ICE_MAX_RESET_WAIT		20

#define ICE_VSIQF_HKEY_ARRAY_SIZE	((VSIQF_HKEY_MAX_INDEX + 1) *	4)
@@ -145,6 +150,9 @@
#define ice_for_each_q_vector(vsi, i) \
	for ((i) = 0; (i) < (vsi)->num_q_vectors; (i)++)

#define ice_for_each_chnl_tc(i)	\
	for ((i) = ICE_CHNL_START_TC; (i) < ICE_CHNL_MAX_TC; (i)++)

#define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_MCAST_TX | \
				ICE_PROMISC_UCAST_RX | ICE_PROMISC_MCAST_RX)

@@ -172,6 +180,21 @@ enum ice_feature {

DECLARE_STATIC_KEY_FALSE(ice_xdp_locking_key);

struct ice_channel {
	struct list_head list;
	u8 type;
	u16 sw_id;
	u16 base_q;
	u16 num_rxq;
	u16 num_txq;
	u16 vsi_num;
	u8 ena_tc;
	struct ice_aqc_vsi_props info;
	u64 max_tx_rate;
	u64 min_tx_rate;
	struct ice_vsi *ch_vsi;
};

struct ice_txq_meta {
	u32 q_teid;	/* Tx-scheduler element identifier */
	u16 q_id;	/* Entry in VSI's txq_map bitmap */
@@ -189,7 +212,7 @@ struct ice_tc_info {

struct ice_tc_cfg {
	u8 numtc; /* Total number of enabled TCs */
	u8 ena_tc; /* Tx map */
	u16 ena_tc; /* Tx map */
	struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS];
};

@@ -361,6 +384,34 @@ struct ice_vsi {

	struct net_device **target_netdevs;

	struct tc_mqprio_qopt_offload mqprio_qopt; /* queue parameters */

	/* Channel Specific Fields */
	struct ice_vsi *tc_map_vsi[ICE_CHNL_MAX_TC];
	u16 cnt_q_avail;
	u16 next_base_q;	/* next queue to be used for channel setup */
	struct list_head ch_list;
	u16 num_chnl_rxq;
	u16 num_chnl_txq;
	u16 ch_rss_size;
	/* store away rss size info before configuring ADQ channels so that,
	 * it can be used after tc-qdisc delete, to get back RSS setting as
	 * they were before
	 */
	u16 orig_rss_size;
	/* this keeps tracks of all enabled TC with and without DCB
	 * and inclusive of ADQ, vsi->mqprio_opt keeps track of queue
	 * information
	 */
	u8 all_numtc;
	u16 all_enatc;

	/* store away TC info, to be used for rebuild logic */
	u8 old_numtc;
	u16 old_ena_tc;

	struct ice_channel *ch;

	/* setup back reference, to which aggregator node this VSI
	 * corresponds to
	 */
@@ -407,6 +458,7 @@ enum ice_pf_flags {
	ICE_FLAG_PTP,			/* PTP is enabled by software */
	ICE_FLAG_AUX_ENA,
	ICE_FLAG_ADV_FEATURES,
	ICE_FLAG_TC_MQPRIO,		/* support for Multi queue TC */
	ICE_FLAG_CLS_FLOWER,
	ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
	ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
@@ -704,6 +756,30 @@ static inline void ice_clear_sriov_cap(struct ice_pf *pf)
			((base_idx) * ICE_FD_STAT_CTR_BLOCK_COUNT)
#define ICE_FD_SB_STAT_IDX(base_idx) ICE_FD_STAT_PF_IDX(base_idx)

/**
 * ice_is_adq_active - any active ADQs
 * @pf: pointer to PF
 *
 * This function returns true if there are any ADQs configured (which is
 * determined by looking at VSI type (which should be VSI_PF), numtc, and
 * TC_MQPRIO flag) otherwise return false
 */
static inline bool ice_is_adq_active(struct ice_pf *pf)
{
	struct ice_vsi *vsi;

	vsi = ice_get_main_vsi(pf);
	if (!vsi)
		return false;

	/* is ADQ configured */
	if (vsi->tc_cfg.numtc > ICE_CHNL_START_TC &&
	    test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
		return true;

	return false;
}

bool netif_is_ice(struct net_device *dev);
int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
+28 −6
Original line number Diff line number Diff line
@@ -213,6 +213,9 @@ static u16 ice_calc_txq_handle(struct ice_vsi *vsi, struct ice_tx_ring *ring, u8
{
	WARN_ONCE(ice_ring_is_xdp(ring) && tc, "XDP ring can't belong to TC other than 0\n");

	if (ring->ch)
		return ring->q_index - ring->ch->base_q;

	/* Idea here for calculation is that we subtract the number of queue
	 * count from TC that ring belongs to from it's absolute queue index
	 * and as a result we get the queue's index within TC.
@@ -300,6 +303,9 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf
	case ICE_VSI_LB:
	case ICE_VSI_CTRL:
	case ICE_VSI_PF:
		if (ring->ch)
			tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ;
		else
			tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
		break;
	case ICE_VSI_VF:
@@ -315,6 +321,9 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf
	}

	/* make sure the context is associated with the right VSI */
	if (ring->ch)
		tlan_ctx->src_vsi = ring->ch->vsi_num;
	else
		tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);

	/* Restrict Tx timestamps to the PF VSI */
@@ -747,6 +756,7 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring,
	u8 buf_len = struct_size(qg_buf, txqs, 1);
	struct ice_tlan_ctx tlan_ctx = { 0 };
	struct ice_aqc_add_txqs_perq *txq;
	struct ice_channel *ch = ring->ch;
	struct ice_pf *pf = vsi->back;
	struct ice_hw *hw = &pf->hw;
	enum ice_status status;
@@ -785,8 +795,14 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring,
		ring->q_handle = ice_calc_txq_handle(vsi, ring, tc);
	}

	status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
				 1, qg_buf, buf_len, NULL);
	if (ch)
		status = ice_ena_vsi_txq(vsi->port_info, ch->ch_vsi->idx, 0,
					 ring->q_handle, 1, qg_buf, buf_len,
					 NULL);
	else
		status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc,
					 ring->q_handle, 1, qg_buf, buf_len,
					 NULL);
	if (status) {
		dev_err(ice_pf_to_dev(pf), "Failed to set LAN Tx queue context, error: %s\n",
			ice_stat_str(status));
@@ -967,6 +983,7 @@ void
ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring,
		  struct ice_txq_meta *txq_meta)
{
	struct ice_channel *ch = ring->ch;
	u8 tc;

	if (IS_ENABLED(CONFIG_DCB))
@@ -977,6 +994,11 @@ ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring,
	txq_meta->q_id = ring->reg_idx;
	txq_meta->q_teid = ring->txq_teid;
	txq_meta->q_handle = ring->q_handle;
	if (ch) {
		txq_meta->vsi_idx = ch->ch_vsi->idx;
		txq_meta->tc = 0;
	} else {
		txq_meta->vsi_idx = vsi->idx;
		txq_meta->tc = tc;
	}
}
+139 −62
Original line number Diff line number Diff line
@@ -4,53 +4,11 @@
#include "ice_dcb_lib.h"
#include "ice_dcb_nl.h"

/**
 * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration
 * @vsi: the VSI being configured
 * @ena_tc: TC map to be enabled
 */
void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc)
{
	struct net_device *netdev = vsi->netdev;
	struct ice_pf *pf = vsi->back;
	struct ice_dcbx_cfg *dcbcfg;
	u8 netdev_tc;
	int i;

	if (!netdev)
		return;

	if (!ena_tc) {
		netdev_reset_tc(netdev);
		return;
	}

	if (netdev_set_num_tc(netdev, vsi->tc_cfg.numtc))
		return;

	dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;

	ice_for_each_traffic_class(i)
		if (vsi->tc_cfg.ena_tc & BIT(i))
			netdev_set_tc_queue(netdev,
					    vsi->tc_cfg.tc_info[i].netdev_tc,
					    vsi->tc_cfg.tc_info[i].qcount_tx,
					    vsi->tc_cfg.tc_info[i].qoffset);

	for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
		u8 ets_tc = dcbcfg->etscfg.prio_table[i];

		/* Get the mapped netdev TC# for the UP */
		netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc;
		netdev_set_prio_tc_map(netdev, i, netdev_tc);
	}
}

/**
 * ice_dcb_get_ena_tc - return bitmap of enabled TCs
 * @dcbcfg: DCB config to evaluate for enabled TCs
 */
u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg)
static u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg)
{
	u8 i, num_tc, ena_tc = 1;

@@ -178,6 +136,67 @@ u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
	return ret;
}

/**
 * ice_get_first_droptc - returns number of first droptc
 * @vsi: used to find the first droptc
 *
 * This function returns the value of first_droptc.
 * When DCB is enabled, first droptc information is derived from enabled_tc
 * and PFC enabled bits. otherwise this function returns 0 as there is one
 * TC without DCB (tc0)
 */
static u8 ice_get_first_droptc(struct ice_vsi *vsi)
{
	struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;
	struct device *dev = ice_pf_to_dev(vsi->back);
	u8 num_tc, ena_tc_map, pfc_ena_map;
	u8 i;

	num_tc = ice_dcb_get_num_tc(cfg);

	/* get bitmap of enabled TCs */
	ena_tc_map = ice_dcb_get_ena_tc(cfg);

	/* get bitmap of PFC enabled TCs */
	pfc_ena_map = cfg->pfc.pfcena;

	/* get first TC that is not PFC enabled */
	for (i = 0; i < num_tc; i++) {
		if ((ena_tc_map & BIT(i)) && (!(pfc_ena_map & BIT(i)))) {
			dev_dbg(dev, "first drop tc = %d\n", i);
			return i;
		}
	}

	dev_dbg(dev, "first drop tc = 0\n");
	return 0;
}

/**
 * ice_vsi_set_dcb_tc_cfg - Set VSI's TC based on DCB configuration
 * @vsi: pointer to the VSI instance
 */
void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi)
{
	struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;

	switch (vsi->type) {
	case ICE_VSI_PF:
		vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg);
		vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg);
		break;
	case ICE_VSI_CHNL:
		vsi->tc_cfg.ena_tc = BIT(ice_get_first_droptc(vsi));
		vsi->tc_cfg.numtc = 1;
		break;
	case ICE_VSI_CTRL:
	case ICE_VSI_LB:
	default:
		vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS;
		vsi->tc_cfg.numtc = 1;
	}
}

/**
 * ice_dcb_get_tc - Get the TC associated with the queue
 * @vsi: ptr to the VSI
@@ -218,11 +237,68 @@ void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi)

		qoffset = vsi->tc_cfg.tc_info[n].qoffset;
		qcount = vsi->tc_cfg.tc_info[n].qcount_tx;
		for (i = qoffset; i < (qoffset + qcount); i++)
			vsi->tx_rings[i]->dcb_tc = n;

		qcount = vsi->tc_cfg.tc_info[n].qcount_rx;
		for (i = qoffset; i < (qoffset + qcount); i++)
			vsi->rx_rings[i]->dcb_tc = n;
	}
	/* applicable only if "all_enatc" is set, which will be set from
	 * setup_tc method as part of configuring channels
	 */
	if (vsi->all_enatc) {
		u8 first_droptc = ice_get_first_droptc(vsi);

		/* When DCB is configured, TC for ADQ queues (which are really
		 * PF queues) should be the first drop TC of the main VSI
		 */
		ice_for_each_chnl_tc(n) {
			if (!(vsi->all_enatc & BIT(n)))
				break;

			qoffset = vsi->mqprio_qopt.qopt.offset[n];
			qcount = vsi->mqprio_qopt.qopt.count[n];
			for (i = qoffset; i < (qoffset + qcount); i++) {
			tx_ring = vsi->tx_rings[i];
			rx_ring = vsi->rx_rings[i];
			tx_ring->dcb_tc = n;
			rx_ring->dcb_tc = n;
				vsi->tx_rings[i]->dcb_tc = first_droptc;
				vsi->rx_rings[i]->dcb_tc = first_droptc;
			}
		}
	}
}

/**
 * ice_dcb_ena_dis_vsi - disable certain VSIs for DCB config/reconfig
 * @pf: pointer to the PF instance
 * @ena: true to enable VSIs, false to disable
 * @locked: true if caller holds RTNL lock, false otherwise
 *
 * Before a new DCB configuration can be applied, VSIs of type PF, SWITCHDEV
 * and CHNL need to be brought down. Following completion of DCB configuration
 * the VSIs that were downed need to be brought up again. This helper function
 * does both.
 */
static void ice_dcb_ena_dis_vsi(struct ice_pf *pf, bool ena, bool locked)
{
	int i;

	ice_for_each_vsi(pf, i) {
		struct ice_vsi *vsi = pf->vsi[i];

		if (!vsi)
			continue;

		switch (vsi->type) {
		case ICE_VSI_CHNL:
		case ICE_VSI_SWITCHDEV_CTRL:
		case ICE_VSI_PF:
			if (ena)
				ice_ena_vsi(vsi, locked);
			else
				ice_dis_vsi(vsi, locked);
			break;
		default:
			continue;
		}
	}
}
@@ -331,7 +407,9 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
	 */
	if (!locked)
		rtnl_lock();
	ice_dis_vsi(pf_vsi, true);

	/* disable VSIs affected by DCB changes */
	ice_dcb_ena_dis_vsi(pf, false, true);

	memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg));
	memcpy(&curr_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
@@ -359,7 +437,8 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
	ice_pf_dcb_recfg(pf);

out:
	ice_ena_vsi(pf_vsi, true);
	/* enable previously downed VSIs */
	ice_dcb_ena_dis_vsi(pf, true, true);
	if (!locked)
		rtnl_unlock();
free_cfg:
@@ -674,6 +753,8 @@ void ice_pf_dcb_recfg(struct ice_pf *pf)
				tc_map = ICE_DFLT_TRAFFIC_CLASS;
				ice_dcb_noncontig_cfg(pf);
			}
		} else if (vsi->type == ICE_VSI_CHNL) {
			tc_map = BIT(ice_get_first_droptc(vsi));
		} else {
			tc_map = ICE_DFLT_TRAFFIC_CLASS;
		}
@@ -684,10 +765,11 @@ void ice_pf_dcb_recfg(struct ice_pf *pf)
				vsi->idx);
			continue;
		}
		/* no need to proceed with remaining cfg if it is switchdev
		 * VSI
		/* no need to proceed with remaining cfg if it is CHNL
		 * or switchdev VSI
		 */
		if (vsi->type == ICE_VSI_SWITCHDEV_CTRL)
		if (vsi->type == ICE_VSI_CHNL ||
		    vsi->type == ICE_VSI_SWITCHDEV_CTRL)
			continue;

		ice_vsi_map_rings_to_vectors(vsi);
@@ -862,7 +944,6 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
	struct ice_dcbx_cfg tmp_dcbx_cfg;
	bool need_reconfig = false;
	struct ice_port_info *pi;
	struct ice_vsi *pf_vsi;
	u8 mib_type;
	int ret;

@@ -938,14 +1019,9 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
	}

	pf_vsi = ice_get_main_vsi(pf);
	if (!pf_vsi) {
		dev_dbg(dev, "PF VSI doesn't exist\n");
		goto out;
	}

	rtnl_lock();
	ice_dis_vsi(pf_vsi, true);
	/* disable VSIs affected by DCB changes */
	ice_dcb_ena_dis_vsi(pf, false, true);

	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
	if (ret) {
@@ -956,7 +1032,8 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
	/* changes in configuration update VSI */
	ice_pf_dcb_recfg(pf);

	ice_ena_vsi(pf_vsi, true);
	/* enable previously downed VSIs */
	ice_dcb_ena_dis_vsi(pf, true, true);
unlock_rtnl:
	rtnl_unlock();
out:
+6 −4
Original line number Diff line number Diff line
@@ -16,7 +16,6 @@

void ice_dcb_rebuild(struct ice_pf *pf);
int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked);
u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg);
u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi);
bool ice_is_pfc_causing_hung_q(struct ice_pf *pf, unsigned int txqueue);
@@ -34,8 +33,6 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring,
void
ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
				    struct ice_rq_event_info *event);
void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc);

/**
 * ice_find_q_in_range
 * @low: start of queue range for a TC i.e. offset of TC
@@ -69,6 +66,12 @@ static inline u8 ice_get_pfc_mode(struct ice_pf *pf)
#else
static inline void ice_dcb_rebuild(struct ice_pf *pf) { }

static inline void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi)
{
	vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS;
	vsi->tc_cfg.numtc = 1;
}

static inline u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
{
	return ICE_DFLT_TRAFFIC_CLASS;
@@ -130,7 +133,6 @@ static inline void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) { }
static inline void ice_update_dcb_stats(struct ice_pf *pf) { }
static inline void
ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event) { }
static inline void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) { }
static inline void ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, u8 dcb_tc) { }
#endif /* CONFIG_DCB */
#endif /* _ICE_DCB_LIB_H_ */
+1 −1
Original line number Diff line number Diff line
@@ -329,7 +329,7 @@ static void ice_eswitch_release_env(struct ice_pf *pf)
static struct ice_vsi *
ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
{
	return ice_vsi_setup(pf, pi, ICE_VSI_SWITCHDEV_CTRL, ICE_INVAL_VFID);
	return ice_vsi_setup(pf, pi, ICE_VSI_SWITCHDEV_CTRL, ICE_INVAL_VFID, NULL);
}

/**
Loading