Commit 24138933 authored by Florian Westphal's avatar Florian Westphal Committed by Pablo Neira Ayuso
Browse files

netfilter: nf_tables: don't skip expired elements during walk



There is an asymmetry between commit/abort and preparation phase if the
following conditions are met:

1. set is a verdict map ("1.2.3.4 : jump foo")
2. timeouts are enabled

In this case, following sequence is problematic:

1. element E in set S refers to chain C
2. userspace requests removal of set S
3. kernel does a set walk to decrement chain->use count for all elements
   from preparation phase
4. kernel does another set walk to remove elements from the commit phase
   (or another walk to do a chain->use increment for all elements from
    abort phase)

If E has already expired in 1), it will be ignored during list walk, so its use count
won't have been changed.

Then, when set is culled, ->destroy callback will zap the element via
nf_tables_set_elem_destroy(), but this function is only safe for
elements that have been deactivated earlier from the preparation phase:
lack of earlier deactivate removes the element but leaks the chain use
count, which results in a WARN splat when the chain gets removed later,
plus a leak of the nft_chain structure.

Update pipapo_get() not to skip expired elements, otherwise flush
command reports bogus ENOENT errors.

Fixes: 3c4287f6 ("nf_tables: Add set type for arbitrary concatenation of ranges")
Fixes: 8d8540c4 ("netfilter: nft_set_rbtree: add timeout support")
Fixes: 9d098292 ("netfilter: nft_hash: add support for timeouts")
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parent c5ccff70
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -5602,8 +5602,12 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
				  const struct nft_set_iter *iter,
				  struct nft_set_elem *elem)
{
	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
	struct nft_set_dump_args *args;

	if (nft_set_elem_expired(ext))
		return 0;

	args = container_of(iter, struct nft_set_dump_args, iter);
	return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
}
+0 −2
Original line number Diff line number Diff line
@@ -278,8 +278,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,

		if (iter->count < iter->skip)
			goto cont;
		if (nft_set_elem_expired(&he->ext))
			goto cont;
		if (!nft_set_elem_active(&he->ext, iter->genmask))
			goto cont;

+12 −6
Original line number Diff line number Diff line
@@ -566,8 +566,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
			goto out;

		if (last) {
			if (nft_set_elem_expired(&f->mt[b].e->ext) ||
			    (genmask &&
			if ((genmask &&
			     !nft_set_elem_active(&f->mt[b].e->ext, genmask)))
				goto next_match;

@@ -601,8 +600,17 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net,
static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
			    const struct nft_set_elem *elem, unsigned int flags)
{
	return pipapo_get(net, set, (const u8 *)elem->key.val.data,
	struct nft_pipapo_elem *ret;

	ret = pipapo_get(net, set, (const u8 *)elem->key.val.data,
			 nft_genmask_cur(net));
	if (IS_ERR(ret))
		return ret;

	if (nft_set_elem_expired(&ret->ext))
		return ERR_PTR(-ENOENT);

	return ret;
}

/**
@@ -2005,8 +2013,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
			goto cont;

		e = f->mt[r].e;
		if (nft_set_elem_expired(&e->ext))
			goto cont;

		elem.priv = e;

+0 −2
Original line number Diff line number Diff line
@@ -552,8 +552,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,

		if (iter->count < iter->skip)
			goto cont;
		if (nft_set_elem_expired(&rbe->ext))
			goto cont;
		if (!nft_set_elem_active(&rbe->ext, iter->genmask))
			goto cont;