Commit b6dfff21 authored by Paul Blakey's avatar Paul Blakey Committed by Saeed Mahameed
Browse files

net/mlx5e: Fix matching on modified inner ip_ecn bits



Tunnel device follows RFC 6040, and during decapsulation inner
ip_ecn might change depending on inner and outer ip_ecn as follows:

 +---------+----------------------------------------+
 |Arriving |         Arriving Outer Header          |
 |   Inner +---------+---------+---------+----------+
 |  Header | Not-ECT | ECT(0)  | ECT(1)  |   CE     |
 +---------+---------+---------+---------+----------+
 | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop>   |
 |  ECT(0) |  ECT(0) | ECT(0)  | ECT(1)  |   CE*    |
 |  ECT(1) |  ECT(1) | ECT(1)  | ECT(1)* |   CE*    |
 |    CE   |   CE    |  CE     | CE      |   CE     |
 +---------+---------+---------+---------+----------+

Cells marked above are changed from original inner packet ip_ecn value.

Tc then matches on the modified inner ip_ecn, but hw offload which
matches the inner ip_ecn value before decap, will fail.

Fix that by mapping all the cases of outer and inner ip_ecn matching,
and only supporting cases where we know inner wouldn't be changed by
decap, or in the outer ip_ecn=CE case, inner ip_ecn didn't matter.

Fixes: bcef735c ("net/mlx5e: Offload TC matching on tos/ttl for ip tunnels")
Signed-off-by: default avatarPaul Blakey <paulb@nvidia.com>
Reviewed-by: default avatarOz Shlomo <ozsh@nvidia.com>
Reviewed-by: default avatarEli Cohen <elic@nvidia.com>
Reviewed-by: default avatarRoi Dayan <roid@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 01c3fd11
Loading
Loading
Loading
Loading
+116 −4
Original line number Diff line number Diff line
@@ -1949,6 +1949,111 @@ u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
	return ip_version;
}

/* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
 * And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
 *      +---------+----------------------------------------+
 *      |Arriving |         Arriving Outer Header          |
 *      |   Inner +---------+---------+---------+----------+
 *      |  Header | Not-ECT | ECT(0)  | ECT(1)  |   CE     |
 *      +---------+---------+---------+---------+----------+
 *      | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop>   |
 *      |  ECT(0) |  ECT(0) | ECT(0)  | ECT(1)  |   CE*    |
 *      |  ECT(1) |  ECT(1) | ECT(1)  | ECT(1)* |   CE*    |
 *      |    CE   |   CE    |  CE     | CE      |   CE     |
 *      +---------+---------+---------+---------+----------+
 *
 * Tc matches on inner after decapsulation on tunnel device, but hw offload matches
 * the inner ip_ecn value before hardware decap action.
 *
 * Cells marked are changed from original inner packet ip_ecn value during decap, and
 * so matching those values on inner ip_ecn before decap will fail.
 *
 * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
 * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
 * and such we can drop the inner ip_ecn=CE match.
 */

static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv,
				      struct flow_cls_offload *f,
				      bool *match_inner_ecn)
{
	u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0;
	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
	struct netlink_ext_ack *extack = f->common.extack;
	struct flow_match_ip match;

	*match_inner_ecn = true;

	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
		flow_rule_match_enc_ip(rule, &match);
		outer_ecn_key = match.key->tos & INET_ECN_MASK;
		outer_ecn_mask = match.mask->tos & INET_ECN_MASK;
	}

	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
		flow_rule_match_ip(rule, &match);
		inner_ecn_key = match.key->tos & INET_ECN_MASK;
		inner_ecn_mask = match.mask->tos & INET_ECN_MASK;
	}

	if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) {
		NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported");
		netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported");
		return -EOPNOTSUPP;
	}

	if (!outer_ecn_mask) {
		if (!inner_ecn_mask)
			return 0;

		NL_SET_ERR_MSG_MOD(extack,
				   "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
		netdev_warn(priv->netdev,
			    "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
		return -EOPNOTSUPP;
	}

	if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) {
		NL_SET_ERR_MSG_MOD(extack,
				   "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
		netdev_warn(priv->netdev,
			    "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
		return -EOPNOTSUPP;
	}

	if (!inner_ecn_mask)
		return 0;

	/* Both inner and outer have full mask on ecn */

	if (outer_ecn_key == INET_ECN_ECT_1) {
		/* inner ecn might change by DECAP action */

		NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported");
		netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported");
		return -EOPNOTSUPP;
	}

	if (outer_ecn_key != INET_ECN_CE)
		return 0;

	if (inner_ecn_key != INET_ECN_CE) {
		/* Can't happen in software, as packet ecn will be changed to CE after decap */
		NL_SET_ERR_MSG_MOD(extack,
				   "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
		netdev_warn(priv->netdev,
			    "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
		return -EOPNOTSUPP;
	}

	/* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
	 * drop match on inner ecn
	 */
	*match_inner_ecn = false;

	return 0;
}

static int parse_tunnel_attr(struct mlx5e_priv *priv,
			     struct mlx5e_tc_flow *flow,
			     struct mlx5_flow_spec *spec,
@@ -2144,6 +2249,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
	struct flow_dissector *dissector = rule->match.dissector;
	enum fs_flow_table_type fs_type;
	bool match_inner_ecn = true;
	u16 addr_type = 0;
	u8 ip_proto = 0;
	u8 *match_level;
@@ -2197,6 +2303,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
			headers_c = get_match_inner_headers_criteria(spec);
			headers_v = get_match_inner_headers_value(spec);
		}

		err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn);
		if (err)
			return err;
	}

	err = mlx5e_flower_parse_meta(filter_dev, f);
@@ -2420,10 +2530,12 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
		struct flow_match_ip match;

		flow_rule_match_ip(rule, &match);
		if (match_inner_ecn) {
			MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
				 match.mask->tos & 0x3);
			MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
				 match.key->tos & 0x3);
		}

		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
			 match.mask->tos >> 2);