Commit de4c5efe authored by David S. Miller's avatar David S. Miller
Browse files


Florisn Westphal says:

====================
These are netfilter fixes for the *net* tree.

First patch resolves a false-positive lockdep splat:
rcu_dereference is used outside of rcu read lock.  Let lockdep
validate that the transaction mutex is locked.

Second patch fixes a kdoc warning added in previous PR.

Third patch fixes a memory leak:
The catchall element isn't disabled correctly, this allows
userspace to deactivate the element again. This results in refcount
underflow which in turn prevents memory release. This was always
broken since the feature was added in 5.13.

Patch 4 fixes an incorrect change in the previous pull request:
Adding a duplicate key to a set should work if the duplicate key
has expired, restore this behaviour. All from myself.

Patch #5 resolves an old historic artifact in sctp conntrack:
a 300ms timeout for shutdown_ack. Increase this to 3s.  From Xin Long.

Patch #6 fixes a sysctl data race in ipvs, two threads can clobber the
sysctl value, from Sishuai Gong. This is a day-0 bug that predates git
history.

Patches 7, 8 and 9, from Pablo Neira Ayuso, are also followups
for the previous GC rework in nf_tables: The netlink notifier and the
netns exit path must both increment the gc worker seqcount, else worker
may encounter stale (free'd) pointers.
================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b35c9683 23185c6a
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -178,10 +178,10 @@ nf_conntrack_sctp_timeout_established - INTEGER (seconds)
	Default is set to (hb_interval * path_max_retrans + rto_max)

nf_conntrack_sctp_timeout_shutdown_sent - INTEGER (seconds)
	default 0.3
	default 3

nf_conntrack_sctp_timeout_shutdown_recd - INTEGER (seconds)
	default 0.3
	default 3

nf_conntrack_sctp_timeout_shutdown_ack_sent - INTEGER (seconds)
	default 3
+1 −0
Original line number Diff line number Diff line
@@ -534,6 +534,7 @@ struct nft_set_elem_expr {
 *	@expr: stateful expression
 * 	@ops: set ops
 * 	@flags: set flags
 *	@dead: set will be freed, never cleared
 *	@genmask: generation mask
 * 	@klen: key length
 * 	@dlen: data length
+4 −0
Original line number Diff line number Diff line
@@ -1876,6 +1876,7 @@ static int
proc_do_sync_threshold(struct ctl_table *table, int write,
		       void *buffer, size_t *lenp, loff_t *ppos)
{
	struct netns_ipvs *ipvs = table->extra2;
	int *valp = table->data;
	int val[2];
	int rc;
@@ -1885,6 +1886,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
		.mode = table->mode,
	};

	mutex_lock(&ipvs->sync_mutex);
	memcpy(val, valp, sizeof(val));
	rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
	if (write) {
@@ -1894,6 +1896,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
		else
			memcpy(valp, val, sizeof(val));
	}
	mutex_unlock(&ipvs->sync_mutex);
	return rc;
}

@@ -4321,6 +4324,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
	ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
	ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
	tbl[idx].data = &ipvs->sysctl_sync_threshold;
	tbl[idx].extra2 = ipvs;
	tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
	ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
	tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
+3 −3
Original line number Diff line number Diff line
@@ -49,8 +49,8 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
	[SCTP_CONNTRACK_COOKIE_WAIT]		= 3 SECS,
	[SCTP_CONNTRACK_COOKIE_ECHOED]		= 3 SECS,
	[SCTP_CONNTRACK_ESTABLISHED]		= 210 SECS,
	[SCTP_CONNTRACK_SHUTDOWN_SENT]		= 300 SECS / 1000,
	[SCTP_CONNTRACK_SHUTDOWN_RECD]		= 300 SECS / 1000,
	[SCTP_CONNTRACK_SHUTDOWN_SENT]		= 3 SECS,
	[SCTP_CONNTRACK_SHUTDOWN_RECD]		= 3 SECS,
	[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]	= 3 SECS,
	[SCTP_CONNTRACK_HEARTBEAT_SENT]		= 30 SECS,
};
@@ -105,7 +105,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
	{
/*	ORIGINAL	*/
/*                  sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
/* init         */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW},
/* init         */ {sCL, sCL, sCW, sCE, sES, sCL, sCL, sSA, sCW},
/* init_ack     */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},
/* abort        */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
/* shutdown     */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},
+39 −5
Original line number Diff line number Diff line
@@ -7091,6 +7091,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
		ret = __nft_set_catchall_flush(ctx, set, &elem);
		if (ret < 0)
			break;
		nft_set_elem_change_active(ctx->net, set, ext);
	}

	return ret;
@@ -9480,9 +9481,14 @@ struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
	if (!trans)
		return NULL;

	trans->net = maybe_get_net(net);
	if (!trans->net) {
		kfree(trans);
		return NULL;
	}

	refcount_inc(&set->refs);
	trans->set = set;
	trans->net = get_net(net);
	trans->seq = gc_seq;

	return trans;
@@ -9738,6 +9744,22 @@ static void nft_set_commit_update(struct list_head *set_update_list)
	}
}

static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net)
{
	unsigned int gc_seq;

	/* Bump gc counter, it becomes odd, this is the busy mark. */
	gc_seq = READ_ONCE(nft_net->gc_seq);
	WRITE_ONCE(nft_net->gc_seq, ++gc_seq);

	return gc_seq;
}

static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq)
{
	WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
}

static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
	struct nftables_pernet *nft_net = nft_pernet(net);
@@ -9823,9 +9845,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)

	WRITE_ONCE(nft_net->base_seq, base_seq);

	/* Bump gc counter, it becomes odd, this is the busy mark. */
	gc_seq = READ_ONCE(nft_net->gc_seq);
	WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
	gc_seq = nft_gc_seq_begin(nft_net);

	/* step 3. Start new generation, rules_gen_X now in use. */
	net->nft.gencursor = nft_gencursor_next(net);
@@ -10038,7 +10058,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
	nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
	nf_tables_commit_audit_log(&adl, nft_net->base_seq);

	WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
	nft_gc_seq_end(nft_net, gc_seq);
	nf_tables_commit_release(net);

	return 0;
@@ -11039,6 +11059,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
	struct net *net = n->net;
	unsigned int deleted;
	bool restart = false;
	unsigned int gc_seq;

	if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
		return NOTIFY_DONE;
@@ -11046,6 +11067,9 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
	nft_net = nft_pernet(net);
	deleted = 0;
	mutex_lock(&nft_net->commit_mutex);

	gc_seq = nft_gc_seq_begin(nft_net);

	if (!list_empty(&nf_tables_destroy_list))
		rcu_barrier();
again:
@@ -11068,6 +11092,8 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
		if (restart)
			goto again;
	}
	nft_gc_seq_end(nft_net, gc_seq);

	mutex_unlock(&nft_net->commit_mutex);

	return NOTIFY_DONE;
@@ -11105,12 +11131,20 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net)
static void __net_exit nf_tables_exit_net(struct net *net)
{
	struct nftables_pernet *nft_net = nft_pernet(net);
	unsigned int gc_seq;

	mutex_lock(&nft_net->commit_mutex);

	gc_seq = nft_gc_seq_begin(nft_net);

	if (!list_empty(&nft_net->commit_list) ||
	    !list_empty(&nft_net->module_list))
		__nf_tables_abort(net, NFNL_ABORT_NONE);

	__nft_release_tables(net);

	nft_gc_seq_end(nft_net, gc_seq);

	mutex_unlock(&nft_net->commit_mutex);
	WARN_ON_ONCE(!list_empty(&nft_net->tables));
	WARN_ON_ONCE(!list_empty(&nft_net->module_list));
Loading