Commit c80f10bc authored by Pablo Neira Ayuso's avatar Pablo Neira Ayuso
Browse files

netfilter: nf_conncount: speculative garbage collection on empty lists



Instead of removing a empty list node that might be reintroduced soon
thereafter, tentatively place the empty list node on the list passed to
tree_nodes_free(), then re-check if the list is empty again before erasing
it from the tree.

[ Florian: rebase on top of pending nf_conncount fixes ]

Fixes: 5c789e13 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
Reviewed-by: default avatarShawn Bohrer <sbohrer@cloudflare.com>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parent 2f971a8f
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -9,7 +9,6 @@ struct nf_conncount_list {
	spinlock_t list_lock;
	struct list_head head;	/* connections with the same filtering key */
	unsigned int count;	/* length of list */
	bool dead;
};

struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
+15 −32
Original line number Diff line number Diff line
@@ -81,27 +81,20 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
	return memcmp(a, b, klen * sizeof(u32));
}

static bool conn_free(struct nf_conncount_list *list,
static void conn_free(struct nf_conncount_list *list,
		      struct nf_conncount_tuple *conn)
{
	bool free_entry = false;

	lockdep_assert_held(&list->list_lock);

	list->count--;
	list_del(&conn->node);
	if (list->count == 0) {
		list->dead = true;
		free_entry = true;
	}

	kmem_cache_free(conncount_conn_cachep, conn);
	return free_entry;
}

static const struct nf_conntrack_tuple_hash *
find_or_evict(struct net *net, struct nf_conncount_list *list,
	      struct nf_conncount_tuple *conn, bool *free_entry)
	      struct nf_conncount_tuple *conn)
{
	const struct nf_conntrack_tuple_hash *found;
	unsigned long a, b;
@@ -121,7 +114,7 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
	 */
	age = a - b;
	if (conn->cpu == cpu || age >= 2) {
		*free_entry = conn_free(list, conn);
		conn_free(list, conn);
		return ERR_PTR(-ENOENT);
	}

@@ -137,14 +130,13 @@ static int __nf_conncount_add(struct net *net,
	struct nf_conncount_tuple *conn, *conn_n;
	struct nf_conn *found_ct;
	unsigned int collect = 0;
	bool free_entry = false;

	/* check the saved connections */
	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
		if (collect > CONNCOUNT_GC_MAX_NODES)
			break;

		found = find_or_evict(net, list, conn, &free_entry);
		found = find_or_evict(net, list, conn);
		if (IS_ERR(found)) {
			/* Not found, but might be about to be confirmed */
			if (PTR_ERR(found) == -EAGAIN) {
@@ -221,7 +213,6 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
	spin_lock_init(&list->list_lock);
	INIT_LIST_HEAD(&list->head);
	list->count = 0;
	list->dead = false;
}
EXPORT_SYMBOL_GPL(nf_conncount_list_init);

@@ -233,7 +224,6 @@ bool nf_conncount_gc_list(struct net *net,
	struct nf_conncount_tuple *conn, *conn_n;
	struct nf_conn *found_ct;
	unsigned int collected = 0;
	bool free_entry = false;
	bool ret = false;

	/* don't bother if other cpu is already doing GC */
@@ -241,15 +231,10 @@ bool nf_conncount_gc_list(struct net *net,
		return false;

	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
		found = find_or_evict(net, list, conn, &free_entry);
		found = find_or_evict(net, list, conn);
		if (IS_ERR(found)) {
			if (PTR_ERR(found) == -ENOENT)  {
				if (free_entry) {
					spin_unlock(&list->list_lock);
					return true;
				}
			if (PTR_ERR(found) == -ENOENT)
				collected++;
			}
			continue;
		}

@@ -260,10 +245,7 @@ bool nf_conncount_gc_list(struct net *net,
			 * closed already -> ditch it
			 */
			nf_ct_put(found_ct);
			if (conn_free(list, conn)) {
				spin_unlock(&list->list_lock);
				return true;
			}
			conn_free(list, conn);
			collected++;
			continue;
		}
@@ -273,10 +255,8 @@ bool nf_conncount_gc_list(struct net *net,
			break;
	}

	if (!list->count) {
		list->dead = true;
	if (!list->count)
		ret = true;
	}
	spin_unlock(&list->list_lock);

	return ret;
@@ -291,6 +271,7 @@ static void __tree_nodes_free(struct rcu_head *h)
	kmem_cache_free(conncount_rb_cachep, rbconn);
}

/* caller must hold tree nf_conncount_locks[] lock */
static void tree_nodes_free(struct rb_root *root,
			    struct nf_conncount_rb *gc_nodes[],
			    unsigned int gc_count)
@@ -300,8 +281,10 @@ static void tree_nodes_free(struct rb_root *root,
	while (gc_count) {
		rbconn = gc_nodes[--gc_count];
		spin_lock(&rbconn->list.list_lock);
		if (!rbconn->list.count) {
			rb_erase(&rbconn->node, root);
			call_rcu(&rbconn->rcu_head, __tree_nodes_free);
		}
		spin_unlock(&rbconn->list.list_lock);
	}
}
@@ -318,7 +301,6 @@ insert_tree(struct net *net,
	    struct rb_root *root,
	    unsigned int hash,
	    const u32 *key,
	    u8 keylen,
	    const struct nf_conntrack_tuple *tuple,
	    const struct nf_conntrack_zone *zone)
{
@@ -327,6 +309,7 @@ insert_tree(struct net *net,
	struct nf_conncount_rb *rbconn;
	struct nf_conncount_tuple *conn;
	unsigned int count = 0, gc_count = 0;
	u8 keylen = data->keylen;
	bool do_gc = true;

	spin_lock_bh(&nf_conncount_locks[hash]);
@@ -454,7 +437,7 @@ count_tree(struct net *net,
	if (!tuple)
		return 0;

	return insert_tree(net, data, root, hash, key, keylen, tuple, zone);
	return insert_tree(net, data, root, hash, key, tuple, zone);
}

static void tree_gc_worker(struct work_struct *work)