Commit ffdbc0fe authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mlxsw-show-per-band-ecn-marked-counter-on-qdisc'

Ido Schimmel says:

====================
mlxsw: Show per-band ECN-marked counter on qdisc

The RED qdisc can expose number of packets that it has marked through
the prob_marked counter (shown in iproute2 as "marked"). This counter
currently just shows number of packets marked in the SW datapath, which
in a switch deployment likely means zero.

Spectrum-3 does support per-TC counters, and in this patchset, mlxsw
supports this RED statistic properly.
====================

Link: https://lore.kernel.org/r/20211013103748.492531-1-idosch@idosch.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents e79d8264 bf862732
Loading
Loading
Loading
Loading
+8 −2
Original line number Diff line number Diff line
@@ -4951,7 +4951,7 @@ enum mlxsw_reg_ppcnt_grp {
	MLXSW_REG_PPCNT_DISCARD_CNT = 0x6,
	MLXSW_REG_PPCNT_PRIO_CNT = 0x10,
	MLXSW_REG_PPCNT_TC_CNT = 0x11,
	MLXSW_REG_PPCNT_TC_CONG_TC = 0x13,
	MLXSW_REG_PPCNT_TC_CONG_CNT = 0x13,
};

/* reg_ppcnt_grp
@@ -5371,7 +5371,7 @@ MLXSW_ITEM64(reg, ppcnt, tx_pause_duration,
MLXSW_ITEM64(reg, ppcnt, tx_pause_transition,
	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64);

/* Ethernet Per Traffic Group Counters */
/* Ethernet Per Traffic Class Counters */

/* reg_ppcnt_tc_transmit_queue
 * Contains the transmit queue depth in cells of traffic class
@@ -5398,6 +5398,12 @@ MLXSW_ITEM64(reg, ppcnt, tc_no_buffer_discard_uc,
MLXSW_ITEM64(reg, ppcnt, wred_discard,
	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64);

/* reg_ppcnt_ecn_marked_tc
 * Access: RO
 */
MLXSW_ITEM64(reg, ppcnt, ecn_marked_tc,
	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64);

static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port,
					enum mlxsw_reg_ppcnt_grp grp,
					u8 prio_tc)
+8 −4
Original line number Diff line number Diff line
@@ -824,12 +824,16 @@ mlxsw_sp_port_get_hw_xstats(struct net_device *dev,

	for (i = 0; i < TC_MAX_QUEUE; i++) {
		err = mlxsw_sp_port_get_stats_raw(dev,
						  MLXSW_REG_PPCNT_TC_CONG_TC,
						  MLXSW_REG_PPCNT_TC_CONG_CNT,
						  i, ppcnt_pl);
		if (!err)
		if (err)
			goto tc_cnt;

		xstats->wred_drop[i] =
			mlxsw_reg_ppcnt_wred_discard_get(ppcnt_pl);
		xstats->tc_ecn[i] = mlxsw_reg_ppcnt_ecn_marked_tc_get(ppcnt_pl);

tc_cnt:
		err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_TC_CNT,
						  i, ppcnt_pl);
		if (err)
+1 −0
Original line number Diff line number Diff line
@@ -285,6 +285,7 @@ struct mlxsw_sp_port_vlan {
/* No need an internal lock; At worse - miss a single periodic iteration */
struct mlxsw_sp_port_xstats {
	u64 ecn;
	u64 tc_ecn[TC_MAX_QUEUE];
	u64 wred_drop[TC_MAX_QUEUE];
	u64 tail_drop[TC_MAX_QUEUE];
	u64 backlog[TC_MAX_QUEUE];
+7 −2
Original line number Diff line number Diff line
@@ -519,6 +519,7 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
					       mlxsw_sp_qdisc->prio_bitmap,
					       &stats_base->tx_packets,
					       &stats_base->tx_bytes);
	red_base->prob_mark = xstats->tc_ecn[tclass_num];
	red_base->prob_drop = xstats->wred_drop[tclass_num];
	red_base->pdrop = mlxsw_sp_xstats_tail_drop(xstats, tclass_num);

@@ -618,19 +619,22 @@ mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
	int tclass_num = mlxsw_sp_qdisc->tclass_num;
	struct mlxsw_sp_port_xstats *xstats;
	struct red_stats *res = xstats_ptr;
	int early_drops, pdrops;
	int early_drops, marks, pdrops;

	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;

	early_drops = xstats->wred_drop[tclass_num] - xstats_base->prob_drop;
	marks = xstats->tc_ecn[tclass_num] - xstats_base->prob_mark;
	pdrops = mlxsw_sp_xstats_tail_drop(xstats, tclass_num) -
		 xstats_base->pdrop;

	res->pdrop += pdrops;
	res->prob_drop += early_drops;
	res->prob_mark += marks;

	xstats_base->pdrop += pdrops;
	xstats_base->prob_drop += early_drops;
	xstats_base->prob_mark += marks;
	return 0;
}

@@ -648,7 +652,8 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
	stats_base = &mlxsw_sp_qdisc->stats_base;

	mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, stats_ptr);
	overlimits = xstats->wred_drop[tclass_num] - stats_base->overlimits;
	overlimits = xstats->wred_drop[tclass_num] +
		     xstats->tc_ecn[tclass_num] - stats_base->overlimits;

	stats_ptr->qstats->overlimits += overlimits;
	stats_base->overlimits += overlimits;
+41 −10
Original line number Diff line number Diff line
@@ -331,6 +331,14 @@ get_nmarked()
	ethtool_stats_get $swp3 ecn_marked
}

get_qdisc_nmarked()
{
	local vlan=$1; shift

	busywait_for_counter 1100 +1 \
		qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .marked
}

get_qdisc_npackets()
{
	local vlan=$1; shift
@@ -384,14 +392,15 @@ build_backlog()

check_marking()
{
	local get_nmarked=$1; shift
	local vlan=$1; shift
	local cond=$1; shift

	local npackets_0=$(get_qdisc_npackets $vlan)
	local nmarked_0=$(get_nmarked $vlan)
	local nmarked_0=$($get_nmarked $vlan)
	sleep 5
	local npackets_1=$(get_qdisc_npackets $vlan)
	local nmarked_1=$(get_nmarked $vlan)
	local nmarked_1=$($get_nmarked $vlan)

	local nmarked_d=$((nmarked_1 - nmarked_0))
	local npackets_d=$((npackets_1 - npackets_0))
@@ -404,6 +413,7 @@ check_marking()
ecn_test_common()
{
	local name=$1; shift
	local get_nmarked=$1; shift
	local vlan=$1; shift
	local limit=$1; shift
	local backlog
@@ -416,7 +426,7 @@ ecn_test_common()
	RET=0
	backlog=$(build_backlog $vlan $((2 * limit / 3)) udp)
	check_err $? "Could not build the requested backlog"
	pct=$(check_marking $vlan "== 0")
	pct=$(check_marking "$get_nmarked" $vlan "== 0")
	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
	log_test "TC $((vlan - 10)): $name backlog < limit"

@@ -426,22 +436,23 @@ ecn_test_common()
	RET=0
	backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
	check_err $? "Could not build the requested backlog"
	pct=$(check_marking $vlan ">= 95")
	pct=$(check_marking "$get_nmarked" $vlan ">= 95")
	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
	log_test "TC $((vlan - 10)): $name backlog > limit"
}

do_ecn_test()
__do_ecn_test()
{
	local get_nmarked=$1; shift
	local vlan=$1; shift
	local limit=$1; shift
	local name=ECN
	local name=${1-ECN}; shift

	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
			  $h3_mac tos=0x01
	sleep 1

	ecn_test_common "$name" $vlan $limit
	ecn_test_common "$name" "$get_nmarked" $vlan $limit

	# Up there we saw that UDP gets accepted when backlog is below the
	# limit. Now that it is above, it should all get dropped, and backlog
@@ -455,6 +466,26 @@ do_ecn_test()
	sleep 1
}

do_ecn_test()
{
	local vlan=$1; shift
	local limit=$1; shift

	__do_ecn_test get_nmarked "$vlan" "$limit"
}

do_ecn_test_perband()
{
	local vlan=$1; shift
	local limit=$1; shift

	# Per-band ECN counters are not supported on Spectrum-1 and Spectrum-2.
	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ||
	   "$DEVLINK_VIDDID" == "15b3:cf6c" ]] && return

	__do_ecn_test get_qdisc_nmarked "$vlan" "$limit" "per-band ECN"
}

do_ecn_nodrop_test()
{
	local vlan=$1; shift
@@ -465,7 +496,7 @@ do_ecn_nodrop_test()
			  $h3_mac tos=0x01
	sleep 1

	ecn_test_common "$name" $vlan $limit
	ecn_test_common "$name" get_nmarked $vlan $limit

	# Up there we saw that UDP gets accepted when backlog is below the
	# limit. Now that it is above, in nodrop mode, make sure it goes to
@@ -495,7 +526,7 @@ do_red_test()
	RET=0
	backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01)
	check_err $? "Could not build the requested backlog"
	pct=$(check_marking $vlan "== 0")
	pct=$(check_marking get_nmarked $vlan "== 0")
	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
	log_test "TC $((vlan - 10)): RED backlog < limit"

@@ -503,7 +534,7 @@ do_red_test()
	RET=0
	backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
	check_fail $? "Traffic went into backlog instead of being early-dropped"
	pct=$(check_marking $vlan "== 0")
	pct=$(check_marking get_nmarked $vlan "== 0")
	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
	local diff=$((limit - backlog))
	pct=$((100 * diff / limit))
Loading