Commit e13dbc4f authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mlxsw-PFC-and-headroom-selftests'

Petr Machata says:

====================
mlxsw: PFC and headroom selftests

Recent changes in the headroom management code made it clear that an
automated way of testing this functionality is needed. This patchset brings
two tests: a synthetic headroom behavior test, which verifies mechanics of
headroom management. And a PFC test, which verifies whether this behavior
actually translates into a working lossless configuration.

Both of these tests rely on mlnx_qos[1], a tool that interfaces with Linux
DCB API. The tool was originally written to work with Mellanox NICs, but
does not actually rely on anything Mellanox-specific, and can be used for
mlxsw as well as for any other NIC-like driver. Unlike Open LLDP it does
support buffer commands and permits a fire-and-forget approach to
configuration, which makes it very handy for writing of selftests.

Patches #1-#3 extend the selftest devlink_lib.sh in various ways. Patch #4
then adds a helper wrapper for mlnx_qos to mlxsw's qos_lib.sh.

Patch #5 adds a test for management of port headroom.

Patch #6 adds a PFC test.

[1] https://github.com/Mellanox/mlnx-tools/


====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 611ba753 bfa80478
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -147,17 +147,26 @@ switch_create()

	# Make sure that ingress quotas are smaller than egress so that there is
	# room for both streams of traffic to be admitted to shared buffer.
	devlink_pool_size_thtype_save 0
	devlink_pool_size_thtype_set 0 dynamic 10000000
	devlink_pool_size_thtype_save 4
	devlink_pool_size_thtype_set 4 dynamic 10000000

	devlink_port_pool_th_save $swp1 0
	devlink_port_pool_th_set $swp1 0 6
	devlink_tc_bind_pool_th_save $swp1 1 ingress
	devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6

	devlink_port_pool_th_save $swp2 0
	devlink_port_pool_th_set $swp2 0 6
	devlink_tc_bind_pool_th_save $swp2 2 ingress
	devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6

	devlink_tc_bind_pool_th_save $swp3 1 egress
	devlink_tc_bind_pool_th_set $swp3 1 egress 4 7
	devlink_tc_bind_pool_th_save $swp3 2 egress
	devlink_tc_bind_pool_th_set $swp3 2 egress 4 7
	devlink_port_pool_th_save $swp3 4
	devlink_port_pool_th_set $swp3 4 7
}

+379 −0
Original line number Diff line number Diff line
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0

ALL_TESTS="
	test_defaults
	test_dcb_ets
	test_mtu
	test_pfc
	test_int_buf
	test_tc_priomap
	test_tc_mtu
	test_tc_sizes
	test_tc_int_buf
"

lib_dir=$(dirname $0)/../../../net/forwarding

NUM_NETIFS=0
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
source qos_lib.sh

swp=$NETIF_NO_CABLE

cleanup()
{
	pre_cleanup
}

get_prio_pg()
{
	__mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
		grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2-
}

get_prio_pfc()
{
	__mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
		grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2-
}

get_prio_tc()
{
	__mlnx_qos -i $swp | sed -n '/^tc/,$p' |
		awk '/^tc/ { TC = $2 }
		     /priority:/ { PRIO[$2]=TC }
		     END {
			for (i in PRIO)
			    printf("%d ", PRIO[i])
		     }'
}

get_buf_size()
{
	local idx=$1; shift

	__mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1))
}

get_tot_size()
{
	__mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//'
}

check_prio_pg()
{
	local expect=$1; shift

	local current=$(get_prio_pg)
	test "$current" = "$expect"
	check_err $? "prio2buffer is '$current', expected '$expect'"
}

check_prio_pfc()
{
	local expect=$1; shift

	local current=$(get_prio_pfc)
	test "$current" = "$expect"
	check_err $? "prio PFC is '$current', expected '$expect'"
}

check_prio_tc()
{
	local expect=$1; shift

	local current=$(get_prio_tc)
	test "$current" = "$expect"
	check_err $? "prio_tc is '$current', expected '$expect'"
}

__check_buf_size()
{
	local idx=$1; shift
	local expr=$1; shift
	local what=$1; shift

	local current=$(get_buf_size $idx)
	((current $expr))
	check_err $? "${what}buffer $idx size is '$current', expected '$expr'"
	echo $current
}

check_buf_size()
{
	__check_buf_size "$@" > /dev/null
}

test_defaults()
{
	RET=0

	check_prio_pg "0 0 0 0 0 0 0 0 "
	check_prio_tc "0 0 0 0 0 0 0 0 "
	check_prio_pfc "0 0 0 0 0 0 0 0 "

	log_test "Default headroom configuration"
}

test_dcb_ets()
{
	RET=0

	__mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null

	check_prio_pg "0 2 4 6 1 3 5 7 "
	check_prio_tc "0 2 4 6 1 3 5 7 "
	check_prio_pfc "0 0 0 0 0 0 0 0 "

	__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null

	check_prio_pg "0 0 0 0 0 0 0 0 "
	check_prio_tc "0 0 0 0 0 0 0 0 "

	__mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null
	check_fail $? "prio2buffer accepted in DCB mode"

	log_test "Configuring headroom through ETS"
}

test_mtu()
{
	local what=$1; shift
	local buf0size_2
	local buf0size

	RET=0
	buf0size=$(__check_buf_size 0 "> 0")

	mtu_set $swp 3000
	buf0size_2=$(__check_buf_size 0 "> $buf0size" "MTU 3000: ")
	mtu_restore $swp

	mtu_set $swp 6000
	check_buf_size 0 "> $buf0size_2" "MTU 6000: "
	mtu_restore $swp

	check_buf_size 0 "== $buf0size"

	log_test "${what}MTU impacts buffer size"
}

test_tc_mtu()
{
	# In TC mode, MTU still impacts the threshold below which a buffer is
	# not permitted to go.

	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
	test_mtu "TC: "
	tc qdisc delete dev $swp root
}

test_pfc()
{
	RET=0

	__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null

	local buf0size=$(get_buf_size 0)
	local buf1size=$(get_buf_size 1)
	local buf2size=$(get_buf_size 2)
	local buf3size=$(get_buf_size 3)
	check_buf_size 0 "> 0"
	check_buf_size 1 "> 0"
	check_buf_size 2 "> 0"
	check_buf_size 3 "> 0"
	check_buf_size 4 "== 0"
	check_buf_size 5 "== 0"
	check_buf_size 6 "== 0"
	check_buf_size 7 "== 0"

	log_test "Buffer size sans PFC"

	RET=0

	__mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null

	check_prio_pg "0 0 0 0 0 1 2 3 "
	check_prio_pfc "0 0 0 0 0 1 1 1 "
	check_buf_size 0 "== $buf0size"
	check_buf_size 1 "> $buf1size"
	check_buf_size 2 "> $buf2size"
	check_buf_size 3 "> $buf3size"

	local buf1size=$(get_buf_size 1)
	check_buf_size 2 "== $buf1size"
	check_buf_size 3 "== $buf1size"

	log_test "PFC: Cable length 0"

	RET=0

	__mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null

	check_buf_size 0 "== $buf0size"
	check_buf_size 1 "> $buf1size"
	check_buf_size 2 "> $buf1size"
	check_buf_size 3 "> $buf1size"

	log_test "PFC: Cable length 1000"

	RET=0

	__mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null
	__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null

	check_prio_pg "0 0 0 0 0 0 0 0 "
	check_prio_tc "0 0 0 0 0 0 0 0 "
	check_buf_size 0 "> 0"
	check_buf_size 1 "== 0"
	check_buf_size 2 "== 0"
	check_buf_size 3 "== 0"
	check_buf_size 4 "== 0"
	check_buf_size 5 "== 0"
	check_buf_size 6 "== 0"
	check_buf_size 7 "== 0"

	log_test "PFC: Restore defaults"
}

test_tc_priomap()
{
	RET=0

	__mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null
	check_prio_pg "0 1 2 3 4 5 6 7 "

	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
	check_prio_pg "0 0 0 0 0 0 0 0 "

	__mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null
	check_prio_pg "1 3 5 7 0 2 4 6 "

	tc qdisc delete dev $swp root
	check_prio_pg "0 1 2 3 4 5 6 7 "

	# Clean up.
	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
	__mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null
	tc qdisc delete dev $swp root
	__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null

	log_test "TC: priomap"
}

test_tc_sizes()
{
	local cell_size=$(devlink_cell_size_get)
	local size=$((cell_size * 1000))

	RET=0

	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
	check_fail $? "buffer_size should fail before qdisc is added"

	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M

	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
	check_err $? "buffer_size should pass after qdisc is added"
	check_buf_size 0 "== $size" "set size: "

	mtu_set $swp 6000
	check_buf_size 0 "== $size" "set MTU: "
	mtu_restore $swp

	__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null

	# After replacing the qdisc for the same kind, buffer_size still has to
	# work.
	tc qdisc replace dev $swp root handle 1: bfifo limit 1M

	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
	check_buf_size 0 "== $size" "post replace, set size: "

	__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null

	# Likewise after replacing for a different kind.
	tc qdisc replace dev $swp root handle 2: prio bands 8

	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
	check_buf_size 0 "== $size" "post replace different kind, set size: "

	tc qdisc delete dev $swp root

	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
	check_fail $? "buffer_size should fail after qdisc is deleted"

	log_test "TC: buffer size"
}

test_int_buf()
{
	local what=$1; shift

	RET=0

	local buf0size=$(get_buf_size 0)
	local tot_size=$(get_tot_size)

	# Size of internal buffer and buffer 9.
	local dsize=$((tot_size - buf0size))

	tc qdisc add dev $swp clsact
	tc filter add dev $swp egress matchall skip_sw action mirred egress mirror dev $swp

	local buf0size_2=$(get_buf_size 0)
	local tot_size_2=$(get_tot_size)
	local dsize_2=$((tot_size_2 - buf0size_2))

	# Egress SPAN should have added to the "invisible" buffer configuration.
	((dsize_2 > dsize))
	check_err $? "Invisible buffers account for '$dsize_2', expected '> $dsize'"

	mtu_set $swp 3000

	local buf0size_3=$(get_buf_size 0)
	local tot_size_3=$(get_tot_size)
	local dsize_3=$((tot_size_3 - buf0size_3))

	# MTU change might change buffer 0, which will show at total, but the
	# hidden buffers should stay the same size.
	((dsize_3 == dsize_2))
	check_err $? "MTU change: Invisible buffers account for '$dsize_3', expected '== $dsize_2'"

	mtu_restore $swp
	tc qdisc del dev $swp clsact

	# After SPAN removal, hidden buffers should be back to the original sizes.
	local buf0size_4=$(get_buf_size 0)
	local tot_size_4=$(get_tot_size)
	local dsize_4=$((tot_size_4 - buf0size_4))
	((dsize_4 == dsize))
	check_err $? "SPAN removed: Invisible buffers account for '$dsize_4', expected '== $dsize'"

	log_test "${what}internal buffer size"
}

test_tc_int_buf()
{
	local cell_size=$(devlink_cell_size_get)
	local size=$((cell_size * 1000))

	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
	test_int_buf "TC: "

	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
	test_int_buf "TC+buffsize: "

	__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
	tc qdisc delete dev $swp root
}

trap cleanup EXIT

bail_on_lldpad
setup_wait
tests_run

exit $EXIT_STATUS
+14 −0
Original line number Diff line number Diff line
@@ -82,3 +82,17 @@ bail_on_lldpad()
		fi
	fi
}

__mlnx_qos()
{
	local err

	mlnx_qos "$@" 2>/dev/null
	err=$?

	if ((err)); then
		echo "Error ($err) in mlnx_qos $@" >/dev/stderr
	fi

	return $err
}
+5 −0
Original line number Diff line number Diff line
@@ -145,12 +145,17 @@ switch_create()

	# Make sure that ingress quotas are smaller than egress so that there is
	# room for both streams of traffic to be admitted to shared buffer.
	devlink_port_pool_th_save $swp1 0
	devlink_port_pool_th_set $swp1 0 5
	devlink_tc_bind_pool_th_save $swp1 0 ingress
	devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5

	devlink_port_pool_th_save $swp2 0
	devlink_port_pool_th_set $swp2 0 5
	devlink_tc_bind_pool_th_save $swp2 1 ingress
	devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5

	devlink_port_pool_th_save $swp3 4
	devlink_port_pool_th_set $swp3 4 12
}

+403 −0
Original line number Diff line number Diff line
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# This test injects a 10-MB burst of traffic with VLAN tag and 802.1p priority
# of 1. This stream is consistently prioritized as priority 1, is put to PG
# buffer 1, and scheduled at TC 1.
#
# - the stream first ingresses through $swp1, where it is forwarded to $swp3
#
# - then it ingresses through $swp4. Here it is put to a lossless buffer and put
#   to a small pool ("PFC pool"). The traffic is forwarded to $swp2, which is
#   shaped, and thus the PFC pool eventually fills, therefore the headroom
#   fills, and $swp3 is paused.
#
# - since $swp3 now can't send traffic, the traffic ingressing $swp1 is kept at
#   a pool ("overflow pool"). The overflow pool needs to be large enough to
#   contain the whole burst.
#
# - eventually the PFC pool gets some traffic out, headroom therefore gets some
#   traffic to the pool, and $swp3 is unpaused again. This way the traffic is
#   gradually forwarded from the overflow pool, through the PFC pool, out of
#   $swp2, and eventually to $h2.
#
# - if PFC works, all lossless flow packets that ingress through $swp1 should
#   also be seen ingressing $h2. If it doesn't, there will be drops due to
#   discrepancy between the speeds of $swp1 and $h2.
#
# - it should all play out relatively quickly, so that SLL and HLL will not
#   cause drops.
#
# +-----------------------+
# | H1                    |
# |   + $h1.111           |
# |   | 192.0.2.33/28     |
# |   |                   |
# |   + $h1               |
# +---|-------------------+  +--------------------+
#     |                      |                    |
# +---|----------------------|--------------------|---------------------------+
# |   + $swp1          $swp3 +                    + $swp4                     |
# |   | iPOOL1        iPOOL0 |                    | iPOOL2                    |
# |   | ePOOL4        ePOOL5 |                    | ePOOL4                    |
# |   |                1Gbps |                    | 1Gbps                     |
# |   |        PFC:enabled=1 |                    | PFC:enabled=1             |
# | +-|----------------------|-+                +-|------------------------+  |
# | | + $swp1.111  $swp3.111 + |                | + $swp4.111              |  |
# | |                          |                |                          |  |
# | | BR1                      |                | BR2                      |  |
# | |                          |                |                          |  |
# | |                          |                |         + $swp2.111      |  |
# | +--------------------------+                +---------|----------------+  |
# |                                                       |                   |
# | iPOOL0: 500KB dynamic                                 |                   |
# | iPOOL1: 10MB static                                   |                   |
# | iPOOL2: 1MB static                                    + $swp2             |
# | ePOOL4: 500KB dynamic                                 | iPOOL0            |
# | ePOOL5: 10MB static                                   | ePOOL6            |
# | ePOOL6: "infinite" static                             | 200Mbps shaper    |
# +-------------------------------------------------------|-------------------+
#                                                         |
#                                                     +---|-------------------+
#                                                     |   + $h2            H2 |
#                                                     |   |                   |
#                                                     |   + $h2.111           |
#                                                     |     192.0.2.34/28     |
#                                                     +-----------------------+
#
# iPOOL0+ePOOL4 is a helper pool for control traffic etc.
# iPOOL1+ePOOL5 are overflow pools.
# iPOOL2+ePOOL6 are PFC pools.

ALL_TESTS="
	ping_ipv4
	test_qos_pfc
"

lib_dir=$(dirname $0)/../../../net/forwarding

NUM_NETIFS=6
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
source qos_lib.sh

_1KB=1000
_100KB=$((100 * _1KB))
_500KB=$((500 * _1KB))
_1MB=$((1000 * _1KB))
_10MB=$((10 * _1MB))

h1_create()
{
	simple_if_init $h1
	mtu_set $h1 10000

	vlan_create $h1 111 v$h1 192.0.2.33/28
}

h1_destroy()
{
	vlan_destroy $h1 111

	mtu_restore $h1
	simple_if_fini $h1
}

h2_create()
{
	simple_if_init $h2
	mtu_set $h2 10000

	vlan_create $h2 111 v$h2 192.0.2.34/28
}

h2_destroy()
{
	vlan_destroy $h2 111

	mtu_restore $h2
	simple_if_fini $h2
}

switch_create()
{
	# pools
	# -----

	devlink_pool_size_thtype_save 0
	devlink_pool_size_thtype_save 4
	devlink_pool_size_thtype_save 1
	devlink_pool_size_thtype_save 5
	devlink_pool_size_thtype_save 2
	devlink_pool_size_thtype_save 6

	devlink_port_pool_th_save $swp1 1
	devlink_port_pool_th_save $swp2 6
	devlink_port_pool_th_save $swp3 5
	devlink_port_pool_th_save $swp4 2

	devlink_tc_bind_pool_th_save $swp1 1 ingress
	devlink_tc_bind_pool_th_save $swp2 1 egress
	devlink_tc_bind_pool_th_save $swp3 1 egress
	devlink_tc_bind_pool_th_save $swp4 1 ingress

	# Control traffic pools. Just reduce the size. Keep them dynamic so that
	# we don't need to change all the uninteresting quotas.
	devlink_pool_size_thtype_set 0 dynamic $_500KB
	devlink_pool_size_thtype_set 4 dynamic $_500KB

	# Overflow pools.
	devlink_pool_size_thtype_set 1 static $_10MB
	devlink_pool_size_thtype_set 5 static $_10MB

	# PFC pools. As per the writ, the size of egress PFC pool should be
	# infinice, but actually it just needs to be large enough to not matter
	# in practice, so reuse the 10MB limit.
	devlink_pool_size_thtype_set 2 static $_1MB
	devlink_pool_size_thtype_set 6 static $_10MB

	# $swp1
	# -----

	ip link set dev $swp1 up
	mtu_set $swp1 10000
	vlan_create $swp1 111
	ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1

	devlink_port_pool_th_set $swp1 1 $_10MB
	devlink_tc_bind_pool_th_set $swp1 1 ingress 1 $_10MB

	# Configure qdisc so that we can configure PG and therefore pool
	# assignment.
	tc qdisc replace dev $swp1 root handle 1: \
	   ets bands 8 strict 8 priomap 7 6
	__mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null

	# $swp2
	# -----

	ip link set dev $swp2 up
	mtu_set $swp2 10000
	vlan_create $swp2 111
	ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1

	devlink_port_pool_th_set $swp2 6 $_10MB
	devlink_tc_bind_pool_th_set $swp2 1 egress 6 $_10MB

	# prio 0->TC0 (band 7), 1->TC1 (band 6). TC1 is shaped.
	tc qdisc replace dev $swp2 root handle 1: \
	   ets bands 8 strict 8 priomap 7 6
	tc qdisc replace dev $swp2 parent 1:7 handle 17: \
	   tbf rate 200Mbit burst 131072 limit 1M

	# $swp3
	# -----

	ip link set dev $swp3 up
	mtu_set $swp3 10000
	vlan_create $swp3 111
	ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1

	devlink_port_pool_th_set $swp3 5 $_10MB
	devlink_tc_bind_pool_th_set $swp3 1 egress 5 $_10MB

	# prio 0->TC0 (band 7), 1->TC1 (band 6)
	tc qdisc replace dev $swp3 root handle 1: \
	   ets bands 8 strict 8 priomap 7 6

	# Need to enable PFC so that PAUSE takes effect. Therefore need to put
	# the lossless prio into a buffer of its own. Don't bother with buffer
	# sizes though, there is not going to be any pressure in the "backward"
	# direction.
	__mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
	__mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null

	# $swp4
	# -----

	ip link set dev $swp4 up
	mtu_set $swp4 10000
	vlan_create $swp4 111
	ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1

	devlink_port_pool_th_set $swp4 2 $_1MB
	devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $_1MB

	# Configure qdisc so that we can hand-tune headroom.
	tc qdisc replace dev $swp4 root handle 1: \
	   ets bands 8 strict 8 priomap 7 6
	__mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
	__mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null
	# PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which
	# is (-2*MTU) about 80K of delay provision.
	__mlnx_qos -i $swp3 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null

	# bridges
	# -------

	ip link add name br1 type bridge vlan_filtering 0
	ip link set dev $swp1.111 master br1
	ip link set dev $swp3.111 master br1
	ip link set dev br1 up

	ip link add name br2 type bridge vlan_filtering 0
	ip link set dev $swp2.111 master br2
	ip link set dev $swp4.111 master br2
	ip link set dev br2 up
}

switch_destroy()
{
	# Do this first so that we can reset the limits to values that are only
	# valid for the original static / dynamic setting.
	devlink_pool_size_thtype_restore 6
	devlink_pool_size_thtype_restore 5
	devlink_pool_size_thtype_restore 4
	devlink_pool_size_thtype_restore 2
	devlink_pool_size_thtype_restore 1
	devlink_pool_size_thtype_restore 0

	# bridges
	# -------

	ip link set dev br2 down
	ip link set dev $swp4.111 nomaster
	ip link set dev $swp2.111 nomaster
	ip link del dev br2

	ip link set dev br1 down
	ip link set dev $swp3.111 nomaster
	ip link set dev $swp1.111 nomaster
	ip link del dev br1

	# $swp4
	# -----

	__mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null
	__mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null
	__mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
	tc qdisc del dev $swp4 root

	devlink_tc_bind_pool_th_restore $swp4 1 ingress
	devlink_port_pool_th_restore $swp4 2

	vlan_destroy $swp4 111
	mtu_restore $swp4
	ip link set dev $swp4 down

	# $swp3
	# -----

	__mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null
	__mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
	tc qdisc del dev $swp3 root

	devlink_tc_bind_pool_th_restore $swp3 1 egress
	devlink_port_pool_th_restore $swp3 5

	vlan_destroy $swp3 111
	mtu_restore $swp3
	ip link set dev $swp3 down

	# $swp2
	# -----

	tc qdisc del dev $swp2 parent 1:7
	tc qdisc del dev $swp2 root

	devlink_tc_bind_pool_th_restore $swp2 1 egress
	devlink_port_pool_th_restore $swp2 6

	vlan_destroy $swp2 111
	mtu_restore $swp2
	ip link set dev $swp2 down

	# $swp1
	# -----

	__mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
	tc qdisc del dev $swp1 root

	devlink_tc_bind_pool_th_restore $swp1 1 ingress
	devlink_port_pool_th_restore $swp1 1

	vlan_destroy $swp1 111
	mtu_restore $swp1
	ip link set dev $swp1 down
}

setup_prepare()
{
	h1=${NETIFS[p1]}
	swp1=${NETIFS[p2]}

	swp2=${NETIFS[p3]}
	h2=${NETIFS[p4]}

	swp3=${NETIFS[p5]}
	swp4=${NETIFS[p6]}

	h2mac=$(mac_get $h2)

	vrf_prepare

	h1_create
	h2_create
	switch_create
}

cleanup()
{
	pre_cleanup

	switch_destroy
	h2_destroy
	h1_destroy

	vrf_cleanup
}

ping_ipv4()
{
	ping_test $h1 192.0.2.34
}

test_qos_pfc()
{
	RET=0

	# 10M pool, each packet is 8K of payload + headers
	local pkts=$((_10MB / 8050))
	local size=$((pkts * 8050))
	local in0=$(ethtool_stats_get $swp1 rx_octets_prio_1)
	local out0=$(ethtool_stats_get $swp2 tx_octets_prio_1)

	$MZ $h1 -p 8000 -Q 1:111 -A 192.0.2.33 -B 192.0.2.34 \
		-a own -b $h2mac -c $pkts -t udp -q
	sleep 2

	local in1=$(ethtool_stats_get $swp1 rx_octets_prio_1)
	local out1=$(ethtool_stats_get $swp2 tx_octets_prio_1)

	local din=$((in1 - in0))
	local dout=$((out1 - out0))

	local pct_in=$((din * 100 / size))

	((pct_in > 95 && pct_in < 105))
	check_err $? "Relative ingress out of expected bounds, $pct_in% should be 100%"

	((dout == din))
	check_err $? "$((din - dout)) bytes out of $din ingressed got lost"

	log_test "PFC"
}

trap cleanup EXIT

bail_on_lldpad
setup_prepare
setup_wait
tests_run

exit $EXIT_STATUS
Loading