Commit 18390581 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'act_ct-UDP-NEW'



Vlad Buslov says:

====================
net: Allow offloading of UDP NEW connections via act_ct

Currently only bidirectional established connections can be offloaded
via act_ct. Such approach allows to hardcode a lot of assumptions into
act_ct, flow_table and flow_offload intermediate layer codes. In order
to enabled offloading of unidirectional UDP NEW connections start with
incrementally changing the following assumptions:

- Drivers assume that only established connections are offloaded and
  don't support updating existing connections. Extract ctinfo from meta
  action cookie and refuse offloading of new connections in the drivers.

- Fix flow_table offload fixup algorithm to calculate flow timeout
  according to current connection state instead of hardcoded
  "established" value.

- Add new flow_table flow flag that designates bidirectional connections
  instead of assuming it and hardcoding hardware offload of every flow
  in both directions.

- Add new flow_table flow flag that designates connections that are
  offloaded to hardware as "established" instead of assuming it. This
  allows some optimizations in act_ct and prevents spamming the
  flow_table workqueue with redundant tasks.

With all the necessary infrastructure in place modify act_ct to offload
UDP NEW as unidirectional connection. Pass reply direction traffic to CT
and promote connection to bidirectional when UDP connection state
changes to "assured". Rely on refresh mechanism to propagate connection
state change to supporting drivers.

Note that early drop algorithm that is designed to free up some space in
connection tracking table when it becomes full (by randomly deleting up
to 5% of non-established connections) currently ignores connections
marked as "offloaded". Now, with UDP NEW connections becoming
"offloaded" it could allow malicious user to perform DoS attack by
filling the table with non-droppable UDP NEW connections by sending just
one packet in single direction. To prevent such scenario change early
drop algorithm to also consider "offloaded" connections for deletion.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 94281484 df25455e
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -1073,12 +1073,16 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
	struct flow_action_entry *meta_action;
	unsigned long cookie = flow->cookie;
	enum ip_conntrack_info ctinfo;
	struct mlx5_ct_entry *entry;
	int err;

	meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
	if (!meta_action)
		return -EOPNOTSUPP;
	ctinfo = meta_action->ct_metadata.cookie & NFCT_INFOMASK;
	if (ctinfo == IP_CT_NEW)
		return -EOPNOTSUPP;

	spin_lock_bh(&ct_priv->ht_lock);
	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+24 −0
Original line number Diff line number Diff line
@@ -1964,6 +1964,27 @@ int nfp_fl_ct_stats(struct flow_cls_offload *flow,
	return 0;
}

static bool
nfp_fl_ct_offload_nft_supported(struct flow_cls_offload *flow)
{
	struct flow_rule *flow_rule = flow->rule;
	struct flow_action *flow_action =
		&flow_rule->action;
	struct flow_action_entry *act;
	int i;

	flow_action_for_each(i, act, flow_action) {
		if (act->id == FLOW_ACTION_CT_METADATA) {
			enum ip_conntrack_info ctinfo =
				act->ct_metadata.cookie & NFCT_INFOMASK;

			return ctinfo != IP_CT_NEW;
		}
	}

	return false;
}

static int
nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offload *flow)
{
@@ -1976,6 +1997,9 @@ nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offl
	extack = flow->common.extack;
	switch (flow->command) {
	case FLOW_CLS_REPLACE:
		if (!nfp_fl_ct_offload_nft_supported(flow))
			return -EOPNOTSUPP;

		/* Netfilter can request offload multiple times for the same
		 * flow - protect against adding duplicates.
		 */
+5 −3
Original line number Diff line number Diff line
@@ -57,7 +57,7 @@ struct nf_flowtable_type {
						 struct net_device *dev,
						 enum flow_block_command cmd);
	int				(*action)(struct net *net,
						  const struct flow_offload *flow,
						  struct flow_offload *flow,
						  enum flow_offload_tuple_dir dir,
						  struct nf_flow_rule *flow_rule);
	void				(*free)(struct nf_flowtable *ft);
@@ -164,6 +164,8 @@ enum nf_flow_flags {
	NF_FLOW_HW_DYING,
	NF_FLOW_HW_DEAD,
	NF_FLOW_HW_PENDING,
	NF_FLOW_HW_BIDIRECTIONAL,
	NF_FLOW_HW_ESTABLISHED,
};

enum flow_offload_type {
@@ -312,10 +314,10 @@ void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable);
int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
				struct net_device *dev,
				enum flow_block_command cmd);
int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
			    enum flow_offload_tuple_dir dir,
			    struct nf_flow_rule *flow_rule);
int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
			    enum flow_offload_tuple_dir dir,
			    struct nf_flow_rule *flow_rule);

+6 −5
Original line number Diff line number Diff line
@@ -1371,9 +1371,6 @@ static unsigned int early_drop_list(struct net *net,
	hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
		tmp = nf_ct_tuplehash_to_ctrack(h);

		if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
			continue;

		if (nf_ct_is_expired(tmp)) {
			nf_ct_gc_expired(tmp);
			continue;
@@ -1443,11 +1440,14 @@ static bool gc_worker_skip_ct(const struct nf_conn *ct)
static bool gc_worker_can_early_drop(const struct nf_conn *ct)
{
	const struct nf_conntrack_l4proto *l4proto;
	u8 protonum = nf_ct_protonum(ct);

	if (test_bit(IPS_OFFLOAD_BIT, &ct->status) && protonum != IPPROTO_UDP)
		return false;
	if (!test_bit(IPS_ASSURED_BIT, &ct->status))
		return true;

	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
	l4proto = nf_ct_l4proto_find(protonum);
	if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
		return true;

@@ -1504,6 +1504,7 @@ static void gc_worker(struct work_struct *work)

			if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
				nf_ct_offload_timeout(tmp);
				if (!nf_conntrack_max95)
					continue;
			}

+4 −1
Original line number Diff line number Diff line
@@ -193,8 +193,11 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
		timeout -= tn->offload_timeout;
	} else if (l4num == IPPROTO_UDP) {
		struct nf_udp_net *tn = nf_udp_pernet(net);
		enum udp_conntrack state =
			test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
			UDP_CT_REPLIED : UDP_CT_UNREPLIED;

		timeout = tn->timeouts[UDP_CT_REPLIED];
		timeout = tn->timeouts[state];
		timeout -= tn->offload_timeout;
	} else {
		return;
Loading