Commit d7e7747a authored by Florian Westphal's avatar Florian Westphal Committed by Pablo Neira Ayuso
Browse files

netfilter: refuse insertion if chain has grown too large



Also add a stat counter for this that gets exported both via old /proc
interface and ctnetlink.

Assuming the old default size of 16536 buckets and max hash occupancy of
64k, this results in 128k insertions (origin+reply), so ~8 entries per
chain on average.

The revised settings in this series will result in about two entries per
bucket on average.

This allows a hard-limit ceiling of 64.

This is not tunable at the moment, but its possible to either increase
nf_conntrack_buckets or decrease nf_conntrack_max to reduce average
lengths.

Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parent dd6d2910
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@ struct ip_conntrack_stat {
	unsigned int expect_create;
	unsigned int expect_delete;
	unsigned int search_restart;
	unsigned int chaintoolong;
};

#define NFCT_INFOMASK	7UL
+1 −0
Original line number Diff line number Diff line
@@ -257,6 +257,7 @@ enum ctattr_stats_cpu {
	CTA_STATS_ERROR,
	CTA_STATS_SEARCH_RESTART,
	CTA_STATS_CLASH_RESOLVE,
	CTA_STATS_CHAIN_TOOLONG,
	__CTA_STATS_MAX,
};
#define CTA_STATS_MAX (__CTA_STATS_MAX - 1)
+35 −7
Original line number Diff line number Diff line
@@ -77,6 +77,8 @@ static __read_mostly bool nf_conntrack_locks_all;
#define GC_SCAN_INTERVAL	(120u * HZ)
#define GC_SCAN_MAX_DURATION	msecs_to_jiffies(10)

#define MAX_CHAINLEN	64u

static struct conntrack_gc_work conntrack_gc_work;

void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
@@ -840,7 +842,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
	unsigned int hash, reply_hash;
	struct nf_conntrack_tuple_hash *h;
	struct hlist_nulls_node *n;
	unsigned int chainlen = 0;
	unsigned int sequence;
	int err = -EEXIST;

	zone = nf_ct_zone(ct);

@@ -854,15 +858,24 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
	} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));

	/* See if there's one in the list already, including reverse */
	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
		if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
				    zone, net))
			goto out;

	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
		if (chainlen++ > MAX_CHAINLEN)
			goto chaintoolong;
	}

	chainlen = 0;

	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) {
		if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
				    zone, net))
			goto out;
		if (chainlen++ > MAX_CHAINLEN)
			goto chaintoolong;
	}

	smp_wmb();
	/* The caller holds a reference to this object */
@@ -872,11 +885,13 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
	NF_CT_STAT_INC(net, insert);
	local_bh_enable();
	return 0;

chaintoolong:
	NF_CT_STAT_INC(net, chaintoolong);
	err = -ENOSPC;
out:
	nf_conntrack_double_unlock(hash, reply_hash);
	local_bh_enable();
	return -EEXIST;
	return err;
}
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);

@@ -1089,6 +1104,7 @@ int
__nf_conntrack_confirm(struct sk_buff *skb)
{
	const struct nf_conntrack_zone *zone;
	unsigned int chainlen = 0, sequence;
	unsigned int hash, reply_hash;
	struct nf_conntrack_tuple_hash *h;
	struct nf_conn *ct;
@@ -1096,7 +1112,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
	struct hlist_nulls_node *n;
	enum ip_conntrack_info ctinfo;
	struct net *net;
	unsigned int sequence;
	int ret = NF_DROP;

	ct = nf_ct_get(skb, &ctinfo);
@@ -1156,15 +1171,28 @@ __nf_conntrack_confirm(struct sk_buff *skb)
	/* See if there's one in the list already, including reverse:
	   NAT could have grabbed it without realizing, since we're
	   not in the hash.  If there is, we lost race. */
	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
		if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
				    zone, net))
			goto out;
		if (chainlen++ > MAX_CHAINLEN)
			goto chaintoolong;
	}

	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
	chainlen = 0;
	hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) {
		if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
				    zone, net))
			goto out;
		if (chainlen++ > MAX_CHAINLEN) {
chaintoolong:
			nf_ct_add_to_dying_list(ct);
			NF_CT_STAT_INC(net, chaintoolong);
			NF_CT_STAT_INC(net, insert_failed);
			ret = NF_DROP;
			goto dying;
		}
	}

	/* Timer relative to confirmation time, not original
	   setting time, otherwise we'd get timer wrap in
+3 −1
Original line number Diff line number Diff line
@@ -2484,7 +2484,9 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
	    nla_put_be32(skb, CTA_STATS_SEARCH_RESTART,
				htonl(st->search_restart)) ||
	    nla_put_be32(skb, CTA_STATS_CLASH_RESOLVE,
				htonl(st->clash_resolve)))
				htonl(st->clash_resolve)) ||
	    nla_put_be32(skb, CTA_STATS_CHAIN_TOOLONG,
			 htonl(st->chaintoolong)))
		goto nla_put_failure;

	nlmsg_end(skb, nlh);
+2 −2
Original line number Diff line number Diff line
@@ -429,7 +429,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
	unsigned int nr_conntracks;

	if (v == SEQ_START_TOKEN) {
		seq_puts(seq, "entries  clashres found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
		seq_puts(seq, "entries  clashres found new invalid ignore delete chainlength insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
		return 0;
	}

@@ -444,7 +444,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
		   st->invalid,
		   0,
		   0,
		   0,
		   st->chaintoolong,
		   st->insert,
		   st->insert_failed,
		   st->drop,