Commit ea64ce6d authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mlxsw-Add-support-for-buffer-drops-mirroring'



Petr Machata says:

====================
mlxsw: Add support for buffer drops mirroring

This set offloads the recently introduced qevent infrastructure in TC and
allows mlxsw to support mirroring of packets that were dropped due to
buffer related reasons (e.g., early drops) during forwarding.

Up until now mlxsw only supported mirroring that was either triggered by
per-port triggers (i.e., via matchall) or by the policy engine (i.e.,
via flower). Packets that are dropped due to buffer related reasons are
mirrored using a third type of trigger, a global trigger.

Global triggers are bound once to a mirroring (SPAN) agent and enabled
on a per-{port, TC} basis. This allows users, for example, to request
that only packets that were early dropped on a specific netdev to be
mirrored.

Patch set overview:

Patch #1 extends flow_block_offload and indirect offload structure to pass
a scheduler instead of a netdevice. That is necessary, because binding type
and netdevice are not a unique identifier of the block anymore.

Patches #2-#3 add the required registers to support above mentioned
functionality.

Patches #4-#6 gradually add support for global mirroring triggers.

Patch #7 adds support for enablement of global mirroring triggers.

Patches #8-#11 are cleanups in the flow offload code and shuffle some
code around to make the qevent offload easier.

Patch #12 implements offload of RED early_drop qevent.

Patch #13 extends the RED selftest for offloaded datapath to cover
early_drop qevent.

v2:
- Patch #1:
    - In struct flow_block_indr, track both sch and dev.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e1d82f7a 1add9212
Loading
Loading
Loading
Loading
+4 −5
Original line number Diff line number Diff line
@@ -1888,7 +1888,7 @@ static void bnxt_tc_setup_indr_rel(void *cb_priv)
	kfree(priv);
}

static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp,
static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct Qdisc *sch, struct bnxt *bp,
				    struct flow_block_offload *f, void *data,
				    void (*cleanup)(struct flow_block_cb *block_cb))
{
@@ -1911,7 +1911,7 @@ static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp,
		block_cb = flow_indr_block_cb_alloc(bnxt_tc_setup_indr_block_cb,
						    cb_priv, cb_priv,
						    bnxt_tc_setup_indr_rel, f,
						    netdev, data, bp, cleanup);
						    netdev, sch, data, bp, cleanup);
		if (IS_ERR(block_cb)) {
			list_del(&cb_priv->list);
			kfree(cb_priv);
@@ -1946,7 +1946,7 @@ static bool bnxt_is_netdev_indr_offload(struct net_device *netdev)
	return netif_is_vxlan(netdev);
}

static int bnxt_tc_setup_indr_cb(struct net_device *netdev, void *cb_priv,
static int bnxt_tc_setup_indr_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv,
				 enum tc_setup_type type, void *type_data,
				 void *data,
				 void (*cleanup)(struct flow_block_cb *block_cb))
@@ -1956,8 +1956,7 @@ static int bnxt_tc_setup_indr_cb(struct net_device *netdev, void *cb_priv,

	switch (type) {
	case TC_SETUP_BLOCK:
		return bnxt_tc_setup_indr_block(netdev, cb_priv, type_data, data,
						cleanup);
		return bnxt_tc_setup_indr_block(netdev, sch, cb_priv, type_data, data, cleanup);
	default:
		break;
	}
+5 −5
Original line number Diff line number Diff line
@@ -404,7 +404,7 @@ static void mlx5e_rep_indr_block_unbind(void *cb_priv)
static LIST_HEAD(mlx5e_block_cb_list);

static int
mlx5e_rep_indr_setup_block(struct net_device *netdev,
mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch,
			   struct mlx5e_rep_priv *rpriv,
			   struct flow_block_offload *f,
			   flow_setup_cb_t *setup_cb,
@@ -442,7 +442,7 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev,

		block_cb = flow_indr_block_cb_alloc(setup_cb, indr_priv, indr_priv,
						    mlx5e_rep_indr_block_unbind,
						    f, netdev, data, rpriv,
						    f, netdev, sch, data, rpriv,
						    cleanup);
		if (IS_ERR(block_cb)) {
			list_del(&indr_priv->list);
@@ -472,18 +472,18 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev,
}

static
int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv,
int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv,
			    enum tc_setup_type type, void *type_data,
			    void *data,
			    void (*cleanup)(struct flow_block_cb *block_cb))
{
	switch (type) {
	case TC_SETUP_BLOCK:
		return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data,
		return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
						  mlx5e_rep_indr_setup_tc_cb,
						  data, cleanup);
	case TC_SETUP_FT:
		return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data,
		return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
						  mlx5e_rep_indr_setup_ft_cb,
						  data, cleanup);
	default:
+102 −0
Original line number Diff line number Diff line
@@ -9502,6 +9502,106 @@ MLXSW_ITEM32(reg, mogcr, ptp_iftc, 0x00, 1, 1);
 */
MLXSW_ITEM32(reg, mogcr, ptp_eftc, 0x00, 0, 1);

/* MPAGR - Monitoring Port Analyzer Global Register
 * ------------------------------------------------
 * This register is used for global port analyzer configurations.
 * Note: This register is not supported by current FW versions for Spectrum-1.
 */
#define MLXSW_REG_MPAGR_ID 0x9089
#define MLXSW_REG_MPAGR_LEN 0x0C

MLXSW_REG_DEFINE(mpagr, MLXSW_REG_MPAGR_ID, MLXSW_REG_MPAGR_LEN);

enum mlxsw_reg_mpagr_trigger {
	MLXSW_REG_MPAGR_TRIGGER_EGRESS,
	MLXSW_REG_MPAGR_TRIGGER_INGRESS,
	MLXSW_REG_MPAGR_TRIGGER_INGRESS_WRED,
	MLXSW_REG_MPAGR_TRIGGER_INGRESS_SHARED_BUFFER,
	MLXSW_REG_MPAGR_TRIGGER_INGRESS_ING_CONG,
	MLXSW_REG_MPAGR_TRIGGER_INGRESS_EGR_CONG,
	MLXSW_REG_MPAGR_TRIGGER_EGRESS_ECN,
	MLXSW_REG_MPAGR_TRIGGER_EGRESS_HIGH_LATENCY,
};

/* reg_mpagr_trigger
 * Mirror trigger.
 * Access: Index
 */
MLXSW_ITEM32(reg, mpagr, trigger, 0x00, 0, 4);

/* reg_mpagr_pa_id
 * Port analyzer ID.
 * Access: RW
 */
MLXSW_ITEM32(reg, mpagr, pa_id, 0x04, 0, 4);

/* reg_mpagr_probability_rate
 * Sampling rate.
 * Valid values are: 1 to 3.5*10^9
 * Value of 1 means "sample all". Default is 1.
 * Access: RW
 */
MLXSW_ITEM32(reg, mpagr, probability_rate, 0x08, 0, 32);

static inline void mlxsw_reg_mpagr_pack(char *payload,
					enum mlxsw_reg_mpagr_trigger trigger,
					u8 pa_id, u32 probability_rate)
{
	MLXSW_REG_ZERO(mpagr, payload);
	mlxsw_reg_mpagr_trigger_set(payload, trigger);
	mlxsw_reg_mpagr_pa_id_set(payload, pa_id);
	mlxsw_reg_mpagr_probability_rate_set(payload, probability_rate);
}

/* MOMTE - Monitoring Mirror Trigger Enable Register
 * -------------------------------------------------
 * This register is used to configure the mirror enable for different mirror
 * reasons.
 */
#define MLXSW_REG_MOMTE_ID 0x908D
#define MLXSW_REG_MOMTE_LEN 0x10

MLXSW_REG_DEFINE(momte, MLXSW_REG_MOMTE_ID, MLXSW_REG_MOMTE_LEN);

/* reg_momte_local_port
 * Local port number.
 * Access: Index
 */
MLXSW_ITEM32(reg, momte, local_port, 0x00, 16, 8);

enum mlxsw_reg_momte_type {
	MLXSW_REG_MOMTE_TYPE_WRED = 0x20,
	MLXSW_REG_MOMTE_TYPE_SHARED_BUFFER_TCLASS = 0x31,
	MLXSW_REG_MOMTE_TYPE_SHARED_BUFFER_TCLASS_DESCRIPTORS = 0x32,
	MLXSW_REG_MOMTE_TYPE_SHARED_BUFFER_EGRESS_PORT = 0x33,
	MLXSW_REG_MOMTE_TYPE_ING_CONG = 0x40,
	MLXSW_REG_MOMTE_TYPE_EGR_CONG = 0x50,
	MLXSW_REG_MOMTE_TYPE_ECN = 0x60,
	MLXSW_REG_MOMTE_TYPE_HIGH_LATENCY = 0x70,
};

/* reg_momte_type
 * Type of mirroring.
 * Access: Index
 */
MLXSW_ITEM32(reg, momte, type, 0x04, 0, 8);

/* reg_momte_tclass_en
 * TClass/PG mirror enable. Each bit represents corresponding tclass.
 * 0: disable (default)
 * 1: enable
 * Access: RW
 */
MLXSW_ITEM_BIT_ARRAY(reg, momte, tclass_en, 0x08, 0x08, 1);

static inline void mlxsw_reg_momte_pack(char *payload, u8 local_port,
					enum mlxsw_reg_momte_type type)
{
	MLXSW_REG_ZERO(momte, payload);
	mlxsw_reg_momte_local_port_set(payload, local_port);
	mlxsw_reg_momte_type_set(payload, type);
}

/* MTPPPC - Time Precision Packet Port Configuration
 * -------------------------------------------------
 * This register serves for configuration of which PTP messages should be
@@ -10853,6 +10953,8 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
	MLXSW_REG(mgpc),
	MLXSW_REG(mprs),
	MLXSW_REG(mogcr),
	MLXSW_REG(mpagr),
	MLXSW_REG(momte),
	MLXSW_REG(mtpppc),
	MLXSW_REG(mtpptr),
	MLXSW_REG(mtptpt),
+15 −50
Original line number Diff line number Diff line
@@ -175,10 +175,6 @@ struct mlxsw_sp_mlxfw_dev {
	struct mlxsw_sp *mlxsw_sp;
};

struct mlxsw_sp_span_ops {
	u32 (*buffsize_get)(int mtu, u32 speed);
};

static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev,
				    u16 component_index, u32 *p_max_size,
				    u8 *p_align_bits, u16 *p_max_write_size)
@@ -1333,6 +1329,21 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev,
	return 0;
}

static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
				   struct flow_block_offload *f)
{
	switch (f->binder_type) {
	case FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS:
		return mlxsw_sp_setup_tc_block_clsact(mlxsw_sp_port, f, true);
	case FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS:
		return mlxsw_sp_setup_tc_block_clsact(mlxsw_sp_port, f, false);
	case FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP:
		return mlxsw_sp_setup_tc_block_qevent_early_drop(mlxsw_sp_port, f);
	default:
		return -EOPNOTSUPP;
	}
}

static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
			     void *type_data)
{
@@ -2812,52 +2823,6 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = {
	.get_stats	= mlxsw_sp2_get_stats,
};

static u32 mlxsw_sp1_span_buffsize_get(int mtu, u32 speed)
{
	return mtu * 5 / 2;
}

static const struct mlxsw_sp_span_ops mlxsw_sp1_span_ops = {
	.buffsize_get = mlxsw_sp1_span_buffsize_get,
};

#define MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR 38
#define MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR 50

static u32 __mlxsw_sp_span_buffsize_get(int mtu, u32 speed, u32 buffer_factor)
{
	return 3 * mtu + buffer_factor * speed / 1000;
}

static u32 mlxsw_sp2_span_buffsize_get(int mtu, u32 speed)
{
	int factor = MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR;

	return __mlxsw_sp_span_buffsize_get(mtu, speed, factor);
}

static const struct mlxsw_sp_span_ops mlxsw_sp2_span_ops = {
	.buffsize_get = mlxsw_sp2_span_buffsize_get,
};

static u32 mlxsw_sp3_span_buffsize_get(int mtu, u32 speed)
{
	int factor = MLXSW_SP3_SPAN_EG_MIRROR_BUFFER_FACTOR;

	return __mlxsw_sp_span_buffsize_get(mtu, speed, factor);
}

static const struct mlxsw_sp_span_ops mlxsw_sp3_span_ops = {
	.buffsize_get = mlxsw_sp3_span_buffsize_get,
};

u32 mlxsw_sp_span_buffsize_get(struct mlxsw_sp *mlxsw_sp, int mtu, u32 speed)
{
	u32 buffsize = mlxsw_sp->span_ops->buffsize_get(speed, mtu);

	return mlxsw_sp_bytes_cells(mlxsw_sp, buffsize) + 1;
}

static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
				    unsigned long event, void *ptr);

+29 −4
Original line number Diff line number Diff line
@@ -539,7 +539,6 @@ int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp,
				unsigned int *p_counter_index);
void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp,
				unsigned int counter_index);
u32 mlxsw_sp_span_buffsize_get(struct mlxsw_sp *mlxsw_sp, int mtu, u32 speed);
bool mlxsw_sp_port_dev_check(const struct net_device *dev);
struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev);
struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev);
@@ -711,7 +710,6 @@ struct mlxsw_sp_flow_block {

struct mlxsw_sp_flow_block_binding {
	struct list_head list;
	struct net_device *dev;
	struct mlxsw_sp_port *mlxsw_sp_port;
	bool ingress;
};
@@ -769,8 +767,9 @@ mlxsw_sp_flow_block_is_mixed_bound(const struct mlxsw_sp_flow_block *block)
struct mlxsw_sp_flow_block *mlxsw_sp_flow_block_create(struct mlxsw_sp *mlxsw_sp,
						       struct net *net);
void mlxsw_sp_flow_block_destroy(struct mlxsw_sp_flow_block *block);
int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
			    struct flow_block_offload *f);
int mlxsw_sp_setup_tc_block_clsact(struct mlxsw_sp_port *mlxsw_sp_port,
				   struct flow_block_offload *f,
				   bool ingress);

/* spectrum_acl.c */
struct mlxsw_sp_acl_ruleset;
@@ -962,6 +961,30 @@ extern const struct mlxsw_afk_ops mlxsw_sp1_afk_ops;
extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;

/* spectrum_matchall.c */
enum mlxsw_sp_mall_action_type {
	MLXSW_SP_MALL_ACTION_TYPE_MIRROR,
	MLXSW_SP_MALL_ACTION_TYPE_SAMPLE,
	MLXSW_SP_MALL_ACTION_TYPE_TRAP,
};

struct mlxsw_sp_mall_mirror_entry {
	const struct net_device *to_dev;
	int span_id;
};

struct mlxsw_sp_mall_entry {
	struct list_head list;
	unsigned long cookie;
	unsigned int priority;
	enum mlxsw_sp_mall_action_type type;
	bool ingress;
	union {
		struct mlxsw_sp_mall_mirror_entry mirror;
		struct mlxsw_sp_port_sample sample;
	};
	struct rcu_head rcu;
};

int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
			  struct mlxsw_sp_flow_block *block,
			  struct tc_cls_matchall_offload *f);
@@ -1008,6 +1031,8 @@ int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
			  struct tc_tbf_qopt_offload *p);
int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port,
			   struct tc_fifo_qopt_offload *p);
int mlxsw_sp_setup_tc_block_qevent_early_drop(struct mlxsw_sp_port *mlxsw_sp_port,
					      struct flow_block_offload *f);

/* spectrum_fid.c */
bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index);
Loading