Commit 6c829efe authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'support-tunnel-mode-in-mlx5-ipsec-packet-offload'

Leon Romanovsky says:

====================
Support tunnel mode in mlx5 IPsec packet offload

This series extends mlx5 to support tunnel mode in its IPsec packet
offload implementation.

v0: https://lore.kernel.org/all/cover.1681106636.git.leonro@nvidia.com
====================

Link: https://lore.kernel.org/r/cover.1681388425.git.leonro@nvidia.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 99676a57 c941da23
Loading
Loading
Loading
Loading
+189 −13
Original line number Diff line number Diff line
@@ -35,12 +35,14 @@
#include <crypto/aead.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <net/netevent.h>

#include "en.h"
#include "ipsec.h"
#include "ipsec_rxtx.h"

#define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000)
#define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1

static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
{
@@ -242,6 +244,57 @@ static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry,
	attrs->lft.numb_rounds_soft = (u64)n;
}

static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
				  struct mlx5_accel_esp_xfrm_attrs *attrs)
{
	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
	struct xfrm_state *x = sa_entry->x;
	struct net_device *netdev;
	struct neighbour *n;
	u8 addr[ETH_ALEN];

	if (attrs->mode != XFRM_MODE_TUNNEL ||
	    attrs->type != XFRM_DEV_OFFLOAD_PACKET)
		return;

	netdev = x->xso.real_dev;

	mlx5_query_mac_address(mdev, addr);
	switch (attrs->dir) {
	case XFRM_DEV_OFFLOAD_IN:
		ether_addr_copy(attrs->dmac, addr);
		n = neigh_lookup(&arp_tbl, &attrs->saddr.a4, netdev);
		if (!n) {
			n = neigh_create(&arp_tbl, &attrs->saddr.a4, netdev);
			if (IS_ERR(n))
				return;
			neigh_event_send(n, NULL);
			attrs->drop = true;
			break;
		}
		neigh_ha_snapshot(addr, n, netdev);
		ether_addr_copy(attrs->smac, addr);
		break;
	case XFRM_DEV_OFFLOAD_OUT:
		ether_addr_copy(attrs->smac, addr);
		n = neigh_lookup(&arp_tbl, &attrs->daddr.a4, netdev);
		if (!n) {
			n = neigh_create(&arp_tbl, &attrs->daddr.a4, netdev);
			if (IS_ERR(n))
				return;
			neigh_event_send(n, NULL);
			attrs->drop = true;
			break;
		}
		neigh_ha_snapshot(addr, n, netdev);
		ether_addr_copy(attrs->dmac, addr);
		break;
	default:
		return;
	}
	neigh_release(n);
}

void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
					struct mlx5_accel_esp_xfrm_attrs *attrs)
{
@@ -297,8 +350,10 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
	attrs->upspec.sport = ntohs(x->sel.sport);
	attrs->upspec.sport_mask = ntohs(x->sel.sport_mask);
	attrs->upspec.proto = x->sel.proto;
	attrs->mode = x->props.mode;

	mlx5e_ipsec_init_limits(sa_entry, attrs);
	mlx5e_ipsec_init_macs(sa_entry, attrs);
}

static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
@@ -367,6 +422,11 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
		return -EINVAL;
	}

	if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) {
		NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
		return -EINVAL;
	}

	switch (x->xso.type) {
	case XFRM_DEV_OFFLOAD_CRYPTO:
		if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) {
@@ -374,11 +434,6 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
			return -EINVAL;
		}

		if (x->props.mode != XFRM_MODE_TRANSPORT &&
		    x->props.mode != XFRM_MODE_TUNNEL) {
			NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
			return -EINVAL;
		}
		break;
	case XFRM_DEV_OFFLOAD_PACKET:
		if (!(mlx5_ipsec_device_caps(mdev) &
@@ -387,8 +442,9 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
			return -EINVAL;
		}

		if (x->props.mode != XFRM_MODE_TRANSPORT) {
			NL_SET_ERR_MSG_MOD(extack, "Only transport xfrm states may be offloaded in packet mode");
		if (x->props.mode == XFRM_MODE_TUNNEL &&
		    !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) {
			NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode");
			return -EINVAL;
		}

@@ -458,34 +514,81 @@ static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry)
	sa_entry->set_iv_op = mlx5e_ipsec_set_iv;
}

static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work)
{
	struct mlx5e_ipsec_work *work =
		container_of(_work, struct mlx5e_ipsec_work, work);
	struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
	struct mlx5e_ipsec_netevent_data *data = work->data;
	struct mlx5_accel_esp_xfrm_attrs *attrs;

	attrs = &sa_entry->attrs;

	switch (attrs->dir) {
	case XFRM_DEV_OFFLOAD_IN:
		ether_addr_copy(attrs->smac, data->addr);
		break;
	case XFRM_DEV_OFFLOAD_OUT:
		ether_addr_copy(attrs->dmac, data->addr);
		break;
	default:
		WARN_ON_ONCE(true);
	}
	attrs->drop = false;
	mlx5e_accel_ipsec_fs_modify(sa_entry);
}

static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry)
{
	struct xfrm_state *x = sa_entry->x;
	struct mlx5e_ipsec_work *work;
	void *data = NULL;

	switch (x->xso.type) {
	case XFRM_DEV_OFFLOAD_CRYPTO:
		if (!(x->props.flags & XFRM_STATE_ESN))
			return 0;
		break;
	default:
	case XFRM_DEV_OFFLOAD_PACKET:
		if (x->props.mode != XFRM_MODE_TUNNEL)
			return 0;
		break;
	default:
		break;
	}

	work = kzalloc(sizeof(*work), GFP_KERNEL);
	if (!work)
		return -ENOMEM;

	work->data = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
	if (!work->data) {
		kfree(work);
		return -ENOMEM;
	}
	switch (x->xso.type) {
	case XFRM_DEV_OFFLOAD_CRYPTO:
		data = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
		if (!data)
			goto free_work;

		INIT_WORK(&work->work, mlx5e_ipsec_modify_state);
		break;
	case XFRM_DEV_OFFLOAD_PACKET:
		data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data),
			       GFP_KERNEL);
		if (!data)
			goto free_work;

		INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event);
		break;
	default:
		break;
	}

	work->data = data;
	work->sa_entry = sa_entry;
	sa_entry->work = work;
	return 0;

free_work:
	kfree(work);
	return -ENOMEM;
}

static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry)
@@ -566,6 +669,14 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
	if (err)
		goto err_hw_ctx;

	if (x->props.mode == XFRM_MODE_TUNNEL &&
	    x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
	    !mlx5e_ipsec_fs_tunnel_enabled(sa_entry)) {
		NL_SET_ERR_MSG_MOD(extack, "Packet offload tunnel mode is disabled due to encap settings");
		err = -EINVAL;
		goto err_add_rule;
	}

	/* We use *_bh() variant because xfrm_timer_handler(), which runs
	 * in softirq context, can reach our state delete logic and we need
	 * xa_erase_bh() there.
@@ -580,6 +691,12 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
	if (sa_entry->dwork)
		queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork,
				   MLX5_IPSEC_RESCHED);

	if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
	    x->props.mode == XFRM_MODE_TUNNEL)
		xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
			    MLX5E_IPSEC_TUNNEL_SA);

out:
	x->xso.offload_handle = (unsigned long)sa_entry;
	return 0;
@@ -602,6 +719,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
static void mlx5e_xfrm_del_state(struct xfrm_state *x)
{
	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
	struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
	struct mlx5e_ipsec_sa_entry *old;

@@ -610,6 +728,12 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x)

	old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id);
	WARN_ON(old != sa_entry);

	if (attrs->mode == XFRM_MODE_TUNNEL &&
	    attrs->type == XFRM_DEV_OFFLOAD_PACKET)
		/* Make sure that no ARP requests are running in parallel */
		flush_workqueue(ipsec->wq);

}

static void mlx5e_xfrm_free_state(struct xfrm_state *x)
@@ -634,6 +758,46 @@ static void mlx5e_xfrm_free_state(struct xfrm_state *x)
	kfree(sa_entry);
}

static int mlx5e_ipsec_netevent_event(struct notifier_block *nb,
				      unsigned long event, void *ptr)
{
	struct mlx5_accel_esp_xfrm_attrs *attrs;
	struct mlx5e_ipsec_netevent_data *data;
	struct mlx5e_ipsec_sa_entry *sa_entry;
	struct mlx5e_ipsec *ipsec;
	struct neighbour *n = ptr;
	struct net_device *netdev;
	struct xfrm_state *x;
	unsigned long idx;

	if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID))
		return NOTIFY_DONE;

	ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb);
	xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) {
		attrs = &sa_entry->attrs;

		if (attrs->family == AF_INET) {
			if (!neigh_key_eq32(n, &attrs->saddr.a4) &&
			    !neigh_key_eq32(n, &attrs->daddr.a4))
				continue;
		} else {
			if (!neigh_key_eq128(n, &attrs->saddr.a4) &&
			    !neigh_key_eq128(n, &attrs->daddr.a4))
				continue;
		}

		x = sa_entry->x;
		netdev = x->xso.real_dev;
		data = sa_entry->work->data;

		neigh_ha_snapshot(data->addr, n, netdev);
		queue_work(ipsec->wq, &sa_entry->work->work);
	}

	return NOTIFY_DONE;
}

void mlx5e_ipsec_init(struct mlx5e_priv *priv)
{
	struct mlx5e_ipsec *ipsec;
@@ -662,6 +826,13 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv)
			goto err_aso;
	}

	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) {
		ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event;
		ret = register_netevent_notifier(&ipsec->netevent_nb);
		if (ret)
			goto clear_aso;
	}

	ret = mlx5e_accel_ipsec_fs_init(ipsec);
	if (ret)
		goto err_fs_init;
@@ -672,6 +843,9 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv)
	return;

err_fs_init:
	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
		unregister_netevent_notifier(&ipsec->netevent_nb);
clear_aso:
	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
		mlx5e_ipsec_aso_cleanup(ipsec);
err_aso:
@@ -690,6 +864,8 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
		return;

	mlx5e_accel_ipsec_fs_cleanup(ipsec);
	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
		unregister_netevent_notifier(&ipsec->netevent_nb);
	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
		mlx5e_ipsec_aso_cleanup(ipsec);
	destroy_workqueue(ipsec->wq);
+10 −1
Original line number Diff line number Diff line
@@ -77,7 +77,7 @@ struct mlx5_replay_esn {

struct mlx5_accel_esp_xfrm_attrs {
	u32   spi;
	u32   flags;
	u32   mode;
	struct aes_gcm_keymat aes_gcm;

	union {
@@ -99,6 +99,8 @@ struct mlx5_accel_esp_xfrm_attrs {
	u32 authsize;
	u32 reqid;
	struct mlx5_ipsec_lft lft;
	u8 smac[ETH_ALEN];
	u8 dmac[ETH_ALEN];
};

enum mlx5_ipsec_cap {
@@ -107,6 +109,7 @@ enum mlx5_ipsec_cap {
	MLX5_IPSEC_CAP_PACKET_OFFLOAD	= 1 << 2,
	MLX5_IPSEC_CAP_ROCE             = 1 << 3,
	MLX5_IPSEC_CAP_PRIO             = 1 << 4,
	MLX5_IPSEC_CAP_TUNNEL           = 1 << 5,
};

struct mlx5e_priv;
@@ -141,6 +144,10 @@ struct mlx5e_ipsec_work {
	void *data;
};

struct mlx5e_ipsec_netevent_data {
	u8 addr[ETH_ALEN];
};

struct mlx5e_ipsec_dwork {
	struct delayed_work dwork;
	struct mlx5e_ipsec_sa_entry *sa_entry;
@@ -166,6 +173,7 @@ struct mlx5e_ipsec {
	struct mlx5e_ipsec_tx *tx;
	struct mlx5e_ipsec_aso *aso;
	struct notifier_block nb;
	struct notifier_block netevent_nb;
	struct mlx5_ipsec_fs *roce;
};

@@ -243,6 +251,7 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry);
int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry);
void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry);
void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry);
bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry);

int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
+208 −31
Original line number Diff line number Diff line
@@ -4,12 +4,15 @@
#include <linux/netdevice.h>
#include "en.h"
#include "en/fs.h"
#include "eswitch.h"
#include "ipsec.h"
#include "fs_core.h"
#include "lib/ipsec_fs_roce.h"
#include "lib/fs_chains.h"

#define NUM_IPSEC_FTE BIT(15)
#define MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE 16
#define IPSEC_TUNNEL_DEFAULT_TTL 0x40

struct mlx5e_ipsec_fc {
	struct mlx5_fc *cnt;
@@ -36,6 +39,7 @@ struct mlx5e_ipsec_rx {
	struct mlx5e_ipsec_rule status;
	struct mlx5e_ipsec_fc *fc;
	struct mlx5_fs_chains *chains;
	u8 allow_tunnel_mode : 1;
};

struct mlx5e_ipsec_tx {
@@ -45,6 +49,7 @@ struct mlx5e_ipsec_tx {
	struct mlx5_flow_namespace *ns;
	struct mlx5e_ipsec_fc *fc;
	struct mlx5_fs_chains *chains;
	u8 allow_tunnel_mode : 1;
};

/* IPsec RX flow steering */
@@ -118,7 +123,7 @@ static void ipsec_chains_put_table(struct mlx5_fs_chains *chains, u32 prio)

static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns,
					       int level, int prio,
					       int max_num_groups)
					       int max_num_groups, u32 flags)
{
	struct mlx5_flow_table_attr ft_attr = {};

@@ -127,6 +132,7 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns,
	ft_attr.max_fte = NUM_IPSEC_FTE;
	ft_attr.level = level;
	ft_attr.prio = prio;
	ft_attr.flags = flags;

	return mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
}
@@ -251,7 +257,8 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
	mlx5_del_flow_rules(rx->sa.rule);
	mlx5_destroy_flow_group(rx->sa.group);
	mlx5_destroy_flow_table(rx->ft.sa);

	if (rx->allow_tunnel_mode)
		mlx5_eswitch_unblock_encap(mdev);
	mlx5_del_flow_rules(rx->status.rule);
	mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
	mlx5_destroy_flow_table(rx->ft.status);
@@ -267,6 +274,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
	struct mlx5_flow_destination default_dest;
	struct mlx5_flow_destination dest[2];
	struct mlx5_flow_table *ft;
	u32 flags = 0;
	int err;

	default_dest = mlx5_ttc_get_default_dest(ttc, family2tt(family));
@@ -277,7 +285,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
		return err;

	ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL,
			     MLX5E_NIC_PRIO, 1);
			     MLX5E_NIC_PRIO, 1, 0);
	if (IS_ERR(ft)) {
		err = PTR_ERR(ft);
		goto err_fs_ft_status;
@@ -300,8 +308,12 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
		goto err_add;

	/* Create FT */
	ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_LEVEL, MLX5E_NIC_PRIO,
			     2);
	if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)
		rx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev);
	if (rx->allow_tunnel_mode)
		flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
	ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_LEVEL, MLX5E_NIC_PRIO, 2,
			     flags);
	if (IS_ERR(ft)) {
		err = PTR_ERR(ft);
		goto err_fs_ft;
@@ -327,7 +339,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
	}

	ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_POL_FT_LEVEL, MLX5E_NIC_PRIO,
			     2);
			     2, 0);
	if (IS_ERR(ft)) {
		err = PTR_ERR(ft);
		goto err_pol_ft;
@@ -356,6 +368,8 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
err_fs:
	mlx5_destroy_flow_table(rx->ft.sa);
err_fs_ft:
	if (rx->allow_tunnel_mode)
		mlx5_eswitch_unblock_encap(mdev);
	mlx5_del_flow_rules(rx->status.rule);
	mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
err_add:
@@ -490,7 +504,8 @@ static int ipsec_counter_rule_tx(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_
}

/* IPsec TX flow steering */
static void tx_destroy(struct mlx5e_ipsec_tx *tx, struct mlx5_ipsec_fs *roce)
static void tx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
		       struct mlx5_ipsec_fs *roce)
{
	mlx5_ipsec_fs_roce_tx_destroy(roce);
	if (tx->chains) {
@@ -502,6 +517,8 @@ static void tx_destroy(struct mlx5e_ipsec_tx *tx, struct mlx5_ipsec_fs *roce)
	}

	mlx5_destroy_flow_table(tx->ft.sa);
	if (tx->allow_tunnel_mode)
		mlx5_eswitch_unblock_encap(mdev);
	mlx5_del_flow_rules(tx->status.rule);
	mlx5_destroy_flow_table(tx->ft.status);
}
@@ -511,9 +528,10 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
{
	struct mlx5_flow_destination dest = {};
	struct mlx5_flow_table *ft;
	u32 flags = 0;
	int err;

	ft = ipsec_ft_create(tx->ns, 2, 0, 1);
	ft = ipsec_ft_create(tx->ns, 2, 0, 1, 0);
	if (IS_ERR(ft))
		return PTR_ERR(ft);
	tx->ft.status = ft;
@@ -522,7 +540,11 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
	if (err)
		goto err_status_rule;

	ft = ipsec_ft_create(tx->ns, 1, 0, 4);
	if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)
		tx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev);
	if (tx->allow_tunnel_mode)
		flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
	ft = ipsec_ft_create(tx->ns, 1, 0, 4, flags);
	if (IS_ERR(ft)) {
		err = PTR_ERR(ft);
		goto err_sa_ft;
@@ -541,7 +563,7 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
		goto connect_roce;
	}

	ft = ipsec_ft_create(tx->ns, 0, 0, 2);
	ft = ipsec_ft_create(tx->ns, 0, 0, 2, 0);
	if (IS_ERR(ft)) {
		err = PTR_ERR(ft);
		goto err_pol_ft;
@@ -572,6 +594,8 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
err_pol_ft:
	mlx5_destroy_flow_table(tx->ft.sa);
err_sa_ft:
	if (tx->allow_tunnel_mode)
		mlx5_eswitch_unblock_encap(mdev);
	mlx5_del_flow_rules(tx->status.rule);
err_status_rule:
	mlx5_destroy_flow_table(tx->ft.status);
@@ -600,7 +624,7 @@ static void tx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx)
	if (--tx->ft.refcnt)
		return;

	tx_destroy(tx, ipsec->roce);
	tx_destroy(ipsec->mdev, tx, ipsec->roce);
}

static struct mlx5_flow_table *tx_ft_get_policy(struct mlx5_core_dev *mdev,
@@ -829,40 +853,181 @@ static int setup_modify_header(struct mlx5_core_dev *mdev, u32 val, u8 dir,
	return 0;
}

static int setup_pkt_reformat(struct mlx5_core_dev *mdev,
static int
setup_pkt_tunnel_reformat(struct mlx5_core_dev *mdev,
			  struct mlx5_accel_esp_xfrm_attrs *attrs,
			      struct mlx5_flow_act *flow_act)
			  struct mlx5_pkt_reformat_params *reformat_params)
{
	enum mlx5_flow_namespace_type ns_type = MLX5_FLOW_NAMESPACE_EGRESS;
	struct mlx5_pkt_reformat_params reformat_params = {};
	struct mlx5_pkt_reformat *pkt_reformat;
	u8 reformatbf[16] = {};
	__be32 spi;
	struct ip_esp_hdr *esp_hdr;
	struct ipv6hdr *ipv6hdr;
	struct ethhdr *eth_hdr;
	struct iphdr *iphdr;
	char *reformatbf;
	size_t bfflen;
	void *hdr;

	bfflen = sizeof(*eth_hdr);

	if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) {
		bfflen += sizeof(*esp_hdr) + 8;

		switch (attrs->family) {
		case AF_INET:
			bfflen += sizeof(*iphdr);
			break;
		case AF_INET6:
			bfflen += sizeof(*ipv6hdr);
			break;
		default:
			return -EINVAL;
		}
	}

	if (attrs->dir == XFRM_DEV_OFFLOAD_IN) {
		reformat_params.type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT;
		ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
		goto cmd;
	reformatbf = kzalloc(bfflen, GFP_KERNEL);
	if (!reformatbf)
		return -ENOMEM;

	eth_hdr = (struct ethhdr *)reformatbf;
	switch (attrs->family) {
	case AF_INET:
		eth_hdr->h_proto = htons(ETH_P_IP);
		break;
	case AF_INET6:
		eth_hdr->h_proto = htons(ETH_P_IPV6);
		break;
	default:
		goto free_reformatbf;
	}

	ether_addr_copy(eth_hdr->h_dest, attrs->dmac);
	ether_addr_copy(eth_hdr->h_source, attrs->smac);

	switch (attrs->dir) {
	case XFRM_DEV_OFFLOAD_IN:
		reformat_params->type = MLX5_REFORMAT_TYPE_L3_ESP_TUNNEL_TO_L2;
		break;
	case XFRM_DEV_OFFLOAD_OUT:
		reformat_params->type = MLX5_REFORMAT_TYPE_L2_TO_L3_ESP_TUNNEL;
		reformat_params->param_0 = attrs->authsize;

		hdr = reformatbf + sizeof(*eth_hdr);
		switch (attrs->family) {
		case AF_INET:
			iphdr = (struct iphdr *)hdr;
			memcpy(&iphdr->saddr, &attrs->saddr.a4, 4);
			memcpy(&iphdr->daddr, &attrs->daddr.a4, 4);
			iphdr->version = 4;
			iphdr->ihl = 5;
			iphdr->ttl = IPSEC_TUNNEL_DEFAULT_TTL;
			iphdr->protocol = IPPROTO_ESP;
			hdr += sizeof(*iphdr);
			break;
		case AF_INET6:
			ipv6hdr = (struct ipv6hdr *)hdr;
			memcpy(&ipv6hdr->saddr, &attrs->saddr.a6, 16);
			memcpy(&ipv6hdr->daddr, &attrs->daddr.a6, 16);
			ipv6hdr->nexthdr = IPPROTO_ESP;
			ipv6hdr->version = 6;
			ipv6hdr->hop_limit = IPSEC_TUNNEL_DEFAULT_TTL;
			hdr += sizeof(*ipv6hdr);
			break;
		default:
			goto free_reformatbf;
		}

		esp_hdr = (struct ip_esp_hdr *)hdr;
		esp_hdr->spi = htonl(attrs->spi);
		break;
	default:
		goto free_reformatbf;
	}

	reformat_params->size = bfflen;
	reformat_params->data = reformatbf;
	return 0;

free_reformatbf:
	kfree(reformatbf);
	return -EINVAL;
}

static int
setup_pkt_transport_reformat(struct mlx5_accel_esp_xfrm_attrs *attrs,
			     struct mlx5_pkt_reformat_params *reformat_params)
{
	u8 *reformatbf;
	__be32 spi;

	switch (attrs->dir) {
	case XFRM_DEV_OFFLOAD_IN:
		reformat_params->type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT;
		break;
	case XFRM_DEV_OFFLOAD_OUT:
		if (attrs->family == AF_INET)
		reformat_params.type =
			reformat_params->type =
				MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4;
		else
		reformat_params.type =
			reformat_params->type =
				MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6;

		reformatbf = kzalloc(MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE,
				     GFP_KERNEL);
		if (!reformatbf)
			return -ENOMEM;

		/* convert to network format */
		spi = htonl(attrs->spi);
	memcpy(reformatbf, &spi, 4);
		memcpy(reformatbf, &spi, sizeof(spi));

	reformat_params.param_0 = attrs->authsize;
	reformat_params.size = sizeof(reformatbf);
	reformat_params.data = &reformatbf;
		reformat_params->param_0 = attrs->authsize;
		reformat_params->size =
			MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE;
		reformat_params->data = reformatbf;
		break;
	default:
		return -EINVAL;
	}

	return 0;
}

static int setup_pkt_reformat(struct mlx5_core_dev *mdev,
			      struct mlx5_accel_esp_xfrm_attrs *attrs,
			      struct mlx5_flow_act *flow_act)
{
	struct mlx5_pkt_reformat_params reformat_params = {};
	struct mlx5_pkt_reformat *pkt_reformat;
	enum mlx5_flow_namespace_type ns_type;
	int ret;

	switch (attrs->dir) {
	case XFRM_DEV_OFFLOAD_IN:
		ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
		break;
	case XFRM_DEV_OFFLOAD_OUT:
		ns_type = MLX5_FLOW_NAMESPACE_EGRESS;
		break;
	default:
		return -EINVAL;
	}

	switch (attrs->mode) {
	case XFRM_MODE_TRANSPORT:
		ret = setup_pkt_transport_reformat(attrs, &reformat_params);
		break;
	case XFRM_MODE_TUNNEL:
		ret = setup_pkt_tunnel_reformat(mdev, attrs, &reformat_params);
		break;
	default:
		ret = -EINVAL;
	}

	if (ret)
		return ret;

cmd:
	pkt_reformat =
		mlx5_packet_reformat_alloc(mdev, &reformat_params, ns_type);
	kfree(reformat_params.data);
	if (IS_ERR(pkt_reformat))
		return PTR_ERR(pkt_reformat);

@@ -1453,3 +1618,15 @@ void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry)
	mlx5e_accel_ipsec_fs_del_rule(sa_entry);
	memcpy(sa_entry, &sa_entry_shadow, sizeof(*sa_entry));
}

bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry)
{
	struct mlx5e_ipsec_rx *rx =
		ipsec_rx(sa_entry->ipsec, sa_entry->attrs.family);
	struct mlx5e_ipsec_tx *tx = sa_entry->ipsec->tx;

	if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT)
		return tx->allow_tunnel_mode;

	return rx->allow_tunnel_mode;
}
+6 −0
Original line number Diff line number Diff line
@@ -48,6 +48,12 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
		if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) &&
		    MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level))
			caps |= MLX5_IPSEC_CAP_PRIO;

		if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev,
					      reformat_l2_to_l3_esp_tunnel) &&
		    MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
					      reformat_l3_esp_tunnel_to_l2))
			caps |= MLX5_IPSEC_CAP_TUNNEL;
	}

	if (mlx5_get_roce_state(mdev) &&
+14 −0
Original line number Diff line number Diff line
@@ -263,6 +263,7 @@ struct mlx5_esw_offload {
	const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
	u8 inline_mode;
	atomic64_t num_flows;
	u64 num_block_encap;
	enum devlink_eswitch_encap_mode encap;
	struct ida vport_metadata_ida;
	unsigned int host_number; /* ECPF supports one external host */
@@ -748,6 +749,9 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
					      struct mlx5_eswitch *slave_esw);
int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);

bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev);
void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev);

static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw)
{
	if (mlx5_esw_allowed(esw))
@@ -761,6 +765,7 @@ mlx5_eswitch_get_slow_fdb(struct mlx5_eswitch *esw)
{
	return esw->fdb_table.offloads.slow_fdb;
}

#else  /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -805,6 +810,15 @@ mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
{
	return 0;
}

static inline bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev)
{
	return true;
}

static inline void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev)
{
}
#endif /* CONFIG_MLX5_ESWITCH */

#endif /* __MLX5_ESWITCH_H__ */
Loading