Commit 7dc02d7f authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Pablo Neira Ayuso says:

====================
Netfilter fixes for net

1) Reduce number of hardware offload retries from flowtable datapath
   which might hog system with retries, from Felix Fietkau.

2) Skip neighbour lookup for PPPoE device, fill_forward_path() already
   provides this and set on destination address from fill_forward_path for
   PPPoE device, also from Felix.

4) When combining PPPoE on top of a VLAN device, set info->outdev to the
   PPPoE device so software offload works, from Felix.

5) Fix TCP teardown flowtable state, races with conntrack gc might result
   in resetting the state to ESTABLISHED and the time to one day. Joint
   work with Oz Shlomo and Sven Auhagen.

6) Call dst_check() from flowtable datapath to check if dst is stale
   instead of doing it from garbage collector path.

7) Disable register tracking infrastructure, either user-space or
   kernel need to pre-fetch keys inconditionally, otherwise register
   tracking assumes data is already available in register that might
   not well be there, leading to incorrect reductions.

* git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
  netfilter: nf_tables: disable expression reduction infra
  netfilter: flowtable: move dst_check to packet path
  netfilter: flowtable: fix TCP flow teardown
  netfilter: nft_flow_offload: fix offload with pppoe + vlan
  net: fix dev_fill_forward_path with pppoe + bridge
  netfilter: nft_flow_offload: skip dst neigh lookup for ppp devices
  netfilter: flowtable: fix excessive hw offload attempts after failure
====================

Link: https://lore.kernel.org/r/20220518213841.359653-1-pablo@netfilter.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 6fd45e79 9e539c5b
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -988,6 +988,7 @@ static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
	path->encap.proto = htons(ETH_P_PPP_SES);
	path->encap.id = be16_to_cpu(po->num);
	memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
	memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
	path->dev = ctx->dev;
	ctx->dev = dev;

+1 −1
Original line number Diff line number Diff line
@@ -900,7 +900,7 @@ struct net_device_path_stack {

struct net_device_path_ctx {
	const struct net_device *dev;
	const u8		*daddr;
	u8			daddr[ETH_ALEN];

	int			num_vlans;
	struct {
+1 −1
Original line number Diff line number Diff line
@@ -681,11 +681,11 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
	const struct net_device *last_dev;
	struct net_device_path_ctx ctx = {
		.dev	= dev,
		.daddr	= daddr,
	};
	struct net_device_path *path;
	int ret = 0;

	memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
	stack->num_paths = 0;
	while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
		last_dev = ctx.dev;
+11 −49
Original line number Diff line number Diff line
@@ -179,12 +179,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init);

static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
{
	tcp->state = TCP_CONNTRACK_ESTABLISHED;
	tcp->seen[0].td_maxwin = 0;
	tcp->seen[1].td_maxwin = 0;
}

static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
static void flow_offload_fixup_ct(struct nf_conn *ct)
{
	struct net *net = nf_ct_net(ct);
	int l4num = nf_ct_protonum(ct);
@@ -193,7 +192,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
	if (l4num == IPPROTO_TCP) {
		struct nf_tcp_net *tn = nf_tcp_pernet(net);

		timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
		flow_offload_fixup_tcp(&ct->proto.tcp);

		timeout = tn->timeouts[ct->proto.tcp.state];
		timeout -= tn->offload_timeout;
	} else if (l4num == IPPROTO_UDP) {
		struct nf_udp_net *tn = nf_udp_pernet(net);
@@ -211,18 +212,6 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
		WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
}

static void flow_offload_fixup_ct_state(struct nf_conn *ct)
{
	if (nf_ct_protonum(ct) == IPPROTO_TCP)
		flow_offload_fixup_tcp(&ct->proto.tcp);
}

static void flow_offload_fixup_ct(struct nf_conn *ct)
{
	flow_offload_fixup_ct_state(ct);
	flow_offload_fixup_ct_timeout(ct);
}

static void flow_offload_route_release(struct flow_offload *flow)
{
	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
@@ -335,8 +324,10 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
	u32 timeout;

	timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
	if (READ_ONCE(flow->timeout) != timeout)
	if (timeout - READ_ONCE(flow->timeout) > HZ)
		WRITE_ONCE(flow->timeout, timeout);
	else
		return;

	if (likely(!nf_flowtable_hw_offload(flow_table)))
		return;
@@ -359,22 +350,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
	rhashtable_remove_fast(&flow_table->rhashtable,
			       &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
			       nf_flow_offload_rhash_params);

	clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);

	if (nf_flow_has_expired(flow))
		flow_offload_fixup_ct(flow->ct);
	else
		flow_offload_fixup_ct_timeout(flow->ct);

	flow_offload_free(flow);
}

void flow_offload_teardown(struct flow_offload *flow)
{
	clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
	set_bit(NF_FLOW_TEARDOWN, &flow->flags);

	flow_offload_fixup_ct_state(flow->ct);
	flow_offload_fixup_ct(flow->ct);
}
EXPORT_SYMBOL_GPL(flow_offload_teardown);

@@ -438,33 +421,12 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
	return err;
}

static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
{
	struct dst_entry *dst;

	if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
	    tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
		dst = tuple->dst_cache;
		if (!dst_check(dst, tuple->dst_cookie))
			return true;
	}

	return false;
}

static bool nf_flow_has_stale_dst(struct flow_offload *flow)
{
	return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
	       flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
}

static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
				    struct flow_offload *flow, void *data)
{
	if (nf_flow_has_expired(flow) ||
	    nf_ct_is_dying(flow->ct) ||
	    nf_flow_has_stale_dst(flow))
		set_bit(NF_FLOW_TEARDOWN, &flow->flags);
	    nf_ct_is_dying(flow->ct))
		flow_offload_teardown(flow);

	if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
		if (test_bit(NF_FLOW_HW, &flow->flags)) {
+19 −0
Original line number Diff line number Diff line
@@ -248,6 +248,15 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
	return true;
}

static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
{
	if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
	    tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
		return true;

	return dst_check(tuple->dst_cache, tuple->dst_cookie);
}

static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
				      const struct nf_hook_state *state,
				      struct dst_entry *dst)
@@ -367,6 +376,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
	if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
		return NF_ACCEPT;

	if (!nf_flow_dst_check(&tuplehash->tuple)) {
		flow_offload_teardown(flow);
		return NF_ACCEPT;
	}

	if (skb_try_make_writable(skb, thoff + hdrsize))
		return NF_DROP;

@@ -624,6 +638,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
	if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
		return NF_ACCEPT;

	if (!nf_flow_dst_check(&tuplehash->tuple)) {
		flow_offload_teardown(flow);
		return NF_ACCEPT;
	}

	if (skb_try_make_writable(skb, thoff + hdrsize))
		return NF_DROP;

Loading