Commit 3c37745e authored by Or Gerlitz's avatar Or Gerlitz Committed by Saeed Mahameed
Browse files

net/mlx5e: Properly deal with encap flows add/del under neigh update



Currently, the encap action offload is handled in the actions parse
function and not in mlx5e_tc_add_fdb_flow() where we deal with all
the other aspects of offloading actions (vlan, modify header) and
the rule itself.

When the neigh update code (mlx5e_tc_encap_flows_add()) recreates the
encap entry and offloads the related flows, we wrongly call again into
mlx5e_tc_add_fdb_flow(), this for itself would cause us to handle
again the offloading of vlans and header re-write which puts things
in non consistent state and step on freed memory (e.g the modify
header parse buffer which is already freed).

Since on error, mlx5e_tc_add_fdb_flow() detaches and may release the
encap entry, it causes a corruption at the neigh update code which goes
over the list of flows associated with this encap entry, or double free
when the tc flow is later deleted by user-space.

When neigh update (mlx5e_tc_encap_flows_del()) unoffloads the flows related
to an encap entry which is now invalid, we do a partial repeat of the eswitch
flow removal code which is wrong too.

To fix things up we do the following:

(1) handle the encap action offload in the eswitch flow add function
    mlx5e_tc_add_fdb_flow() as done for the other actions and the rule itself.

(2) modify the neigh update code (mlx5e_tc_encap_flows_add/del) to only
    deal with the encap entry and rules delete/add and not with any of
    the other offloaded actions.

Fixes: 232c0013 ('net/mlx5e: Add support to neighbour update flow')
Signed-off-by: default avatarOr Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: default avatarPaul Blakey <paulb@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parent 4ca637a2
Loading
Loading
Loading
Loading
+54 −35
Original line number Diff line number Diff line
@@ -78,9 +78,11 @@ struct mlx5e_tc_flow {
};

struct mlx5e_tc_flow_parse_attr {
	struct ip_tunnel_info tun_info;
	struct mlx5_flow_spec spec;
	int num_mod_hdr_actions;
	void *mod_hdr_actions;
	int mirred_ifindex;
};

enum {
@@ -322,6 +324,12 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
static void mlx5e_detach_encap(struct mlx5e_priv *priv,
			       struct mlx5e_tc_flow *flow);

static int mlx5e_attach_encap(struct mlx5e_priv *priv,
			      struct ip_tunnel_info *tun_info,
			      struct net_device *mirred_dev,
			      struct net_device **encap_dev,
			      struct mlx5e_tc_flow *flow);

static struct mlx5_flow_handle *
mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
		      struct mlx5e_tc_flow_parse_attr *parse_attr,
@@ -329,9 +337,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
{
	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
	struct mlx5_flow_handle *rule;
	struct net_device *out_dev, *encap_dev = NULL;
	struct mlx5_flow_handle *rule = NULL;
	struct mlx5e_rep_priv *rpriv;
	struct mlx5e_priv *out_priv;
	int err;

	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
		out_dev = __dev_get_by_index(dev_net(priv->netdev),
					     attr->parse_attr->mirred_ifindex);
		err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
					 out_dev, &encap_dev, flow);
		if (err) {
			rule = ERR_PTR(err);
			if (err != -EAGAIN)
				goto err_attach_encap;
		}
		out_priv = netdev_priv(encap_dev);
		rpriv = out_priv->ppriv;
		attr->out_rep = rpriv->rep;
	}

	err = mlx5_eswitch_add_vlan_action(esw, attr);
	if (err) {
		rule = ERR_PTR(err);
@@ -347,10 +373,14 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
		}
	}

	/* we get here if (1) there's no error (rule being null) or when
	 * (2) there's an encap action and we're on -EAGAIN (no valid neigh)
	 */
	if (rule != ERR_PTR(-EAGAIN)) {
		rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
		if (IS_ERR(rule))
			goto err_add_rule;

	}
	return rule;

err_add_rule:
@@ -361,6 +391,7 @@ err_mod_hdr:
err_add_vlan:
	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
		mlx5e_detach_encap(priv, flow);
err_attach_encap:
	return rule;
}

@@ -389,6 +420,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
			      struct mlx5e_encap_entry *e)
{
	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
	struct mlx5_esw_flow_attr *esw_attr;
	struct mlx5e_tc_flow *flow;
	int err;

@@ -404,10 +437,9 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
	mlx5e_rep_queue_neigh_stats_work(priv);

	list_for_each_entry(flow, &e->flows, encap) {
		flow->esw_attr->encap_id = e->encap_id;
		flow->rule = mlx5e_tc_add_fdb_flow(priv,
						   flow->esw_attr->parse_attr,
						   flow);
		esw_attr = flow->esw_attr;
		esw_attr->encap_id = e->encap_id;
		flow->rule = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
		if (IS_ERR(flow->rule)) {
			err = PTR_ERR(flow->rule);
			mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
@@ -421,15 +453,13 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
			      struct mlx5e_encap_entry *e)
{
	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
	struct mlx5e_tc_flow *flow;
	struct mlx5_fc *counter;

	list_for_each_entry(flow, &e->flows, encap) {
		if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
			flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
			counter = mlx5_flow_rule_counter(flow->rule);
			mlx5_del_flow_rules(flow->rule);
			mlx5_fc_destroy(priv->mdev, counter);
			mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr);
		}
	}

@@ -1942,7 +1972,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,

		if (is_tcf_mirred_egress_redirect(a)) {
			int ifindex = tcf_mirred_ifindex(a);
			struct net_device *out_dev, *encap_dev = NULL;
			struct net_device *out_dev;
			struct mlx5e_priv *out_priv;

			out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
@@ -1955,17 +1985,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
				rpriv = out_priv->ppriv;
				attr->out_rep = rpriv->rep;
			} else if (encap) {
				err = mlx5e_attach_encap(priv, info,
							 out_dev, &encap_dev, flow);
				if (err && err != -EAGAIN)
					return err;
				parse_attr->mirred_ifindex = ifindex;
				parse_attr->tun_info = *info;
				attr->parse_attr = parse_attr;
				attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
					MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
					MLX5_FLOW_CONTEXT_ACTION_COUNT;
				out_priv = netdev_priv(encap_dev);
				rpriv = out_priv->ppriv;
				attr->out_rep = rpriv->rep;
				attr->parse_attr = parse_attr;
				/* attr->out_rep is resolved when we handle encap */
			} else {
				pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
				       priv->netdev->name, out_dev->name);
@@ -2047,7 +2073,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
		err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow);
		if (err < 0)
			goto err_handle_encap_flow;
			goto err_free;
		flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow);
	} else {
		err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow);
@@ -2058,10 +2084,13 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,

	if (IS_ERR(flow->rule)) {
		err = PTR_ERR(flow->rule);
		if (err != -EAGAIN)
			goto err_free;
	}

	if (err != -EAGAIN)
		flow->flags |= MLX5E_TC_FLOW_OFFLOADED;

	err = rhashtable_insert_fast(&tc->ht, &flow->node,
				     tc->ht_params);
	if (err)
@@ -2075,16 +2104,6 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
err_del_rule:
	mlx5e_tc_del_flow(priv, flow);

err_handle_encap_flow:
	if (err == -EAGAIN) {
		err = rhashtable_insert_fast(&tc->ht, &flow->node,
					     tc->ht_params);
		if (err)
			mlx5e_tc_del_flow(priv, flow);
		else
			return 0;
	}

err_free:
	kvfree(parse_attr);
	kfree(flow);