Commit 0b2f3212 authored by Florian Westphal's avatar Florian Westphal
Browse files

netfilter: nfnetlink: re-enable conntrack expectation events



To avoid allocation of the conntrack extension area when possible,
the default behaviour was changed to only allocate the event extension
if a userspace program is subscribed to a notification group.

Problem is that while 'conntrack -E' does enable the event allocation
behind the scenes, 'conntrack -E expect' does not: no expectation events
are delivered unless user sets
"net.netfilter.nf_conntrack_events" back to 1 (always on).

Fix the autodetection to also consider EXP type group.

We need to track the 6 event groups (3+3, new/update/destroy for events and
for expectations each) independently, else we'd disable events again
if an expectation group becomes empty while there is still an active
event group.

Fixes: 2794cdb0 ("netfilter: nfnetlink: allow to detect if ctnetlink listeners exist")
Reported-by: default avatarYi Chen <yiche@redhat.com>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
parent 2024439b
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -95,7 +95,7 @@ struct nf_ip_net {

struct netns_ct {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
	bool ctnetlink_has_listener;
	u8 ctnetlink_has_listener;
	bool ecache_dwork_pending;
#endif
	u8			sysctl_log_invalid; /* Log invalid packets */
+71 −12
Original line number Diff line number Diff line
@@ -44,6 +44,10 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket");

static unsigned int nfnetlink_pernet_id __read_mostly;

#ifdef CONFIG_NF_CONNTRACK_EVENTS
static DEFINE_SPINLOCK(nfnl_grp_active_lock);
#endif

struct nfnl_net {
	struct sock *nfnl;
};
@@ -654,6 +658,44 @@ static void nfnetlink_rcv(struct sk_buff *skb)
		netlink_rcv_skb(skb, nfnetlink_rcv_msg);
}

static void nfnetlink_bind_event(struct net *net, unsigned int group)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
	int type, group_bit;
	u8 v;

	/* All NFNLGRP_CONNTRACK_* group bits fit into u8.
	 * The other groups are not relevant and can be ignored.
	 */
	if (group >= 8)
		return;

	type = nfnl_group2type[group];

	switch (type) {
	case NFNL_SUBSYS_CTNETLINK:
		break;
	case NFNL_SUBSYS_CTNETLINK_EXP:
		break;
	default:
		return;
	}

	group_bit = (1 << group);

	spin_lock(&nfnl_grp_active_lock);
	v = READ_ONCE(net->ct.ctnetlink_has_listener);
	if ((v & group_bit) == 0) {
		v |= group_bit;

		/* read concurrently without nfnl_grp_active_lock held. */
		WRITE_ONCE(net->ct.ctnetlink_has_listener, v);
	}

	spin_unlock(&nfnl_grp_active_lock);
#endif
}

static int nfnetlink_bind(struct net *net, int group)
{
	const struct nfnetlink_subsystem *ss;
@@ -670,28 +712,45 @@ static int nfnetlink_bind(struct net *net, int group)
	if (!ss)
		request_module_nowait("nfnetlink-subsys-%d", type);

#ifdef CONFIG_NF_CONNTRACK_EVENTS
	if (type == NFNL_SUBSYS_CTNETLINK) {
		nfnl_lock(NFNL_SUBSYS_CTNETLINK);
		WRITE_ONCE(net->ct.ctnetlink_has_listener, true);
		nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
	}
#endif
	nfnetlink_bind_event(net, group);
	return 0;
}

static void nfnetlink_unbind(struct net *net, int group)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
	int type, group_bit;

	if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX)
		return;

	if (nfnl_group2type[group] == NFNL_SUBSYS_CTNETLINK) {
		nfnl_lock(NFNL_SUBSYS_CTNETLINK);
		if (!nfnetlink_has_listeners(net, group))
			WRITE_ONCE(net->ct.ctnetlink_has_listener, false);
		nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
	type = nfnl_group2type[group];

	switch (type) {
	case NFNL_SUBSYS_CTNETLINK:
		break;
	case NFNL_SUBSYS_CTNETLINK_EXP:
		break;
	default:
		return;
	}

	/* ctnetlink_has_listener is u8 */
	if (group >= 8)
		return;

	group_bit = (1 << group);

	spin_lock(&nfnl_grp_active_lock);
	if (!nfnetlink_has_listeners(net, group)) {
		u8 v = READ_ONCE(net->ct.ctnetlink_has_listener);

		v &= ~group_bit;

		/* read concurrently without nfnl_grp_active_lock held. */
		WRITE_ONCE(net->ct.ctnetlink_has_listener, v);
	}
	spin_unlock(&nfnl_grp_active_lock);
#endif
}