Commit e4d264e8 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Leon Romanovsky says:

====================
Extend packet offload to fully support libreswan

The following patches are an outcome of Raed's work to add packet
offload support to libreswan [1].

The series includes:
 * Priority support to IPsec policies
 * Statistics per-SA (visible through "ip -s xfrm state ..." command)
 * Support to IKE policy holes
 * Fine tuning to acquire logic.

[1] https://github.com/libreswan/libreswan/pull/986
Link: https://lore.kernel.org/all/cover.1678714336.git.leon@kernel.org

* tag 'ipsec-libreswan-mlx5' of https://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux:
  net/mlx5e: Update IPsec per SA packets/bytes count
  net/mlx5e: Use one rule to count all IPsec Tx offloaded traffic
  net/mlx5e: Support IPsec acquire default SA
  net/mlx5e: Allow policies with reqid 0, to support IKE policy holes
  xfrm: copy_to_user_state fetch offloaded SA packets/bytes statistics
  xfrm: add new device offload acquire flag
  net/mlx5e: Use chains for IPsec policy priority offload
  net/mlx5: fs_core: Allow ignore_flow_level on TX dest
  net/mlx5: fs_chains: Refactor to detach chains from tc usage
====================

Link: https://lore.kernel.org/r/20230320094722.1009304-1-leon@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 3079bfdb 5a6cddb8
Loading
Loading
Loading
Loading
+49 −22
Original line number Diff line number Diff line
@@ -308,6 +308,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
	struct net_device *netdev = x->xso.real_dev;
	struct mlx5e_ipsec *ipsec;
	struct mlx5e_priv *priv;
	gfp_t gfp;
	int err;

	priv = netdev_priv(netdev);
@@ -315,16 +316,20 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
		return -EOPNOTSUPP;

	ipsec = priv->ipsec;
	err = mlx5e_xfrm_validate_state(priv->mdev, x, extack);
	if (err)
		return err;

	sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
	gfp = (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) ? GFP_ATOMIC : GFP_KERNEL;
	sa_entry = kzalloc(sizeof(*sa_entry), gfp);
	if (!sa_entry)
		return -ENOMEM;

	sa_entry->x = x;
	sa_entry->ipsec = ipsec;
	/* Check if this SA is originated from acquire flow temporary SA */
	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
		goto out;

	err = mlx5e_xfrm_validate_state(priv->mdev, x, extack);
	if (err)
		goto err_xfrm;

	/* check esn */
	mlx5e_ipsec_update_esn_state(sa_entry);
@@ -353,6 +358,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
				mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv;

	INIT_WORK(&sa_entry->modify_work.work, _update_xfrm_state);
out:
	x->xso.offload_handle = (unsigned long)sa_entry;
	return 0;

@@ -372,6 +378,9 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x)
	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
	struct mlx5e_ipsec_sa_entry *old;

	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
		return;

	old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id);
	WARN_ON(old != sa_entry);
}
@@ -380,9 +389,13 @@ static void mlx5e_xfrm_free_state(struct xfrm_state *x)
{
	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);

	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
		goto sa_entry_free;

	cancel_work_sync(&sa_entry->modify_work.work);
	mlx5e_accel_ipsec_fs_del_rule(sa_entry);
	mlx5_ipsec_free_sa_ctx(sa_entry);
sa_entry_free:
	kfree(sa_entry);
}

@@ -482,26 +495,26 @@ static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
static void mlx5e_xfrm_update_curlft(struct xfrm_state *x)
{
	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
	int err;
	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
	u64 packets, bytes, lastuse;

	lockdep_assert_held(&x->lock);
	lockdep_assert(lockdep_is_held(&x->lock) ||
		       lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex));

	if (sa_entry->attrs.soft_packet_limit == XFRM_INF)
		/* Limits are not configured, as soft limit
		 * must be lowever than hard limit.
		 */
		return;

	err = mlx5e_ipsec_aso_query(sa_entry, NULL);
	if (err)
	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
		return;

	mlx5e_ipsec_aso_update_curlft(sa_entry, &x->curlft.packets);
	mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse);
	x->curlft.packets += packets;
	x->curlft.bytes += bytes;
}

static int mlx5e_xfrm_validate_policy(struct xfrm_policy *x,
static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev,
				      struct xfrm_policy *x,
				      struct netlink_ext_ack *extack)
{
	struct xfrm_selector *sel = &x->selector;

	if (x->type != XFRM_POLICY_TYPE_MAIN) {
		NL_SET_ERR_MSG_MOD(extack, "Cannot offload non-main policy types");
		return -EINVAL;
@@ -519,8 +532,9 @@ static int mlx5e_xfrm_validate_policy(struct xfrm_policy *x,
		return -EINVAL;
	}

	if (!x->xfrm_vec[0].reqid) {
		NL_SET_ERR_MSG_MOD(extack, "Cannot offload policy without reqid");
	if (!x->xfrm_vec[0].reqid && sel->proto == IPPROTO_IP &&
	    addr6_all_zero(sel->saddr.a6) && addr6_all_zero(sel->daddr.a6)) {
		NL_SET_ERR_MSG_MOD(extack, "Unsupported policy with reqid 0 without at least one of upper protocol or ip addr(s) different than 0");
		return -EINVAL;
	}

@@ -529,12 +543,24 @@ static int mlx5e_xfrm_validate_policy(struct xfrm_policy *x,
		return -EINVAL;
	}

	if (x->selector.proto != IPPROTO_IP &&
	    (x->selector.proto != IPPROTO_UDP || x->xdo.dir != XFRM_DEV_OFFLOAD_OUT)) {
	if (sel->proto != IPPROTO_IP &&
	    (sel->proto != IPPROTO_UDP || x->xdo.dir != XFRM_DEV_OFFLOAD_OUT)) {
		NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than UDP, and only Tx direction");
		return -EINVAL;
	}

	if (x->priority) {
		if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO)) {
			NL_SET_ERR_MSG_MOD(extack, "Device does not support policy priority");
			return -EINVAL;
		}

		if (x->priority == U32_MAX) {
			NL_SET_ERR_MSG_MOD(extack, "Device does not support requested policy priority");
			return -EINVAL;
		}
	}

	return 0;
}

@@ -560,6 +586,7 @@ mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry,
	attrs->upspec.sport = ntohs(sel->sport);
	attrs->upspec.sport_mask = ntohs(sel->sport_mask);
	attrs->upspec.proto = sel->proto;
	attrs->prio = x->priority;
}

static int mlx5e_xfrm_add_policy(struct xfrm_policy *x,
@@ -576,7 +603,7 @@ static int mlx5e_xfrm_add_policy(struct xfrm_policy *x,
		return -EOPNOTSUPP;
	}

	err = mlx5e_xfrm_validate_policy(x, extack);
	err = mlx5e_xfrm_validate_policy(priv->mdev, x, extack);
	if (err)
		return err;

+10 −3
Original line number Diff line number Diff line
@@ -94,6 +94,7 @@ enum mlx5_ipsec_cap {
	MLX5_IPSEC_CAP_ESN		= 1 << 1,
	MLX5_IPSEC_CAP_PACKET_OFFLOAD	= 1 << 2,
	MLX5_IPSEC_CAP_ROCE             = 1 << 3,
	MLX5_IPSEC_CAP_PRIO             = 1 << 4,
};

struct mlx5e_priv;
@@ -161,6 +162,7 @@ struct mlx5e_ipsec_rule {
	struct mlx5_flow_handle *rule;
	struct mlx5_modify_hdr *modify_hdr;
	struct mlx5_pkt_reformat *pkt_reformat;
	struct mlx5_fc *fc;
};

struct mlx5e_ipsec_modify_state_work {
@@ -198,6 +200,7 @@ struct mlx5_accel_pol_xfrm_attrs {
	u8 type : 2;
	u8 dir : 2;
	u32 reqid;
	u32 prio;
};

struct mlx5e_ipsec_pol_entry {
@@ -233,9 +236,6 @@ void mlx5e_ipsec_aso_cleanup(struct mlx5e_ipsec *ipsec);

int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry,
			  struct mlx5_wqe_aso_ctrl_seg *data);
void mlx5e_ipsec_aso_update_curlft(struct mlx5e_ipsec_sa_entry *sa_entry,
				   u64 *packets);

void mlx5e_accel_ipsec_fs_read_stats(struct mlx5e_priv *priv,
				     void *ipsec_stats);

@@ -252,6 +252,13 @@ mlx5e_ipsec_pol2dev(struct mlx5e_ipsec_pol_entry *pol_entry)
{
	return pol_entry->ipsec->mdev;
}

static inline bool addr6_all_zero(__be32 *addr6)
{
	static const __be32 zaddr6[4] = {};

	return !memcmp(addr6, zaddr6, sizeof(*zaddr6));
}
#else
static inline void mlx5e_ipsec_init(struct mlx5e_priv *priv)
{
+416 −112

File changed.

Preview size limit exceeded, changes collapsed.

+12 −20
Original line number Diff line number Diff line
@@ -36,12 +36,19 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
	    MLX5_CAP_ETH(mdev, insert_trailer) && MLX5_CAP_ETH(mdev, swp))
		caps |= MLX5_IPSEC_CAP_CRYPTO;

	if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload) &&
	    MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_esp_trasport) &&
	    MLX5_CAP_FLOWTABLE_NIC_RX(mdev, reformat_del_esp_trasport) &&
	if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload)) {
		if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev,
					      reformat_add_esp_trasport) &&
		    MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
					      reformat_del_esp_trasport) &&
		    MLX5_CAP_FLOWTABLE_NIC_RX(mdev, decap))
			caps |= MLX5_IPSEC_CAP_PACKET_OFFLOAD;

		if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) &&
		    MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level))
			caps |= MLX5_IPSEC_CAP_PRIO;
	}

	if (mlx5_get_roce_state(mdev) &&
	    MLX5_CAP_GEN_2(mdev, flow_table_type_2_type) & MLX5_FT_NIC_RX_2_NIC_RX_RDMA &&
	    MLX5_CAP_GEN_2(mdev, flow_table_type_2_type) & MLX5_FT_NIC_TX_RDMA_2_NIC_TX)
@@ -482,18 +489,3 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry,
	spin_unlock_bh(&aso->lock);
	return ret;
}

void mlx5e_ipsec_aso_update_curlft(struct mlx5e_ipsec_sa_entry *sa_entry,
				   u64 *packets)
{
	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
	struct mlx5e_ipsec_aso *aso = ipsec->aso;
	u64 hard_cnt;

	hard_cnt = MLX5_GET(ipsec_aso, aso->ctx, remove_flow_pkt_cnt);
	/* HW decresases the limit till it reaches zero to fire an avent.
	 * We need to fix the calculations, so the returned count is a total
	 * number of passed packets and not how much left.
	 */
	*packets = sa_entry->attrs.hard_packet_limit - hard_cnt;
}
+3 −17
Original line number Diff line number Diff line
@@ -5181,22 +5181,6 @@ static int mlx5e_tc_netdev_event(struct notifier_block *this,
	return NOTIFY_DONE;
}

static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
{
	int tc_grp_size, tc_tbl_size;
	u32 max_flow_counter;

	max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
			    MLX5_CAP_GEN(dev, max_flow_counter_15_0);

	tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);

	tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
			    BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));

	return tc_tbl_size;
}

static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
{
	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
@@ -5269,10 +5253,10 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
		attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
			MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
	attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
	attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
	attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
	attr.default_ft = tc->miss_t;
	attr.mapping = chains_mapping;
	attr.fs_base_prio = MLX5E_TC_PRIO;

	tc->chains = mlx5_chains_create(dev, &attr);
	if (IS_ERR(tc->chains)) {
@@ -5280,6 +5264,8 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
		goto err_miss;
	}

	mlx5_chains_print_info(tc->chains);

	tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
	tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr,
				 MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
Loading