Commit 95eabdd2 authored by Antoine Tenart's avatar Antoine Tenart Committed by Florian Westphal
Browse files

netfilter: conntrack: fix the gc rescheduling delay



Commit 2cfadb76 ("netfilter: conntrack: revisit gc autotuning")
changed the eviction rescheduling to the use average expiry of scanned
entries (within 1-60s) by doing:

  for (...) {
      expires = clamp(nf_ct_expires(tmp), ...);
      next_run += expires;
      next_run /= 2;
  }

The issue is the above will make the average ('next_run' here) more
dependent on the last expiration values than the firsts (for sets > 2).
Depending on the expiration values used to compute the average, the
result can be quite different than what's expected. To fix this we can
do the following:

  for (...) {
      expires = clamp(nf_ct_expires(tmp), ...);
      next_run += (expires - next_run) / ++count;
  }

Fixes: 2cfadb76 ("netfilter: conntrack: revisit gc autotuning")
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: default avatarAntoine Tenart <atenart@kernel.org>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
parent c29b0682
Loading
Loading
Loading
Loading
+8 −2
Original line number Diff line number Diff line
@@ -67,6 +67,7 @@ struct conntrack_gc_work {
	struct delayed_work	dwork;
	u32			next_bucket;
	u32			avg_timeout;
	u32			count;
	u32			start_time;
	bool			exiting;
	bool			early_drop;
@@ -1466,6 +1467,7 @@ static void gc_worker(struct work_struct *work)
	unsigned int expired_count = 0;
	unsigned long next_run;
	s32 delta_time;
	long count;

	gc_work = container_of(work, struct conntrack_gc_work, dwork.work);

@@ -1475,10 +1477,12 @@ static void gc_worker(struct work_struct *work)

	if (i == 0) {
		gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
		gc_work->count = 1;
		gc_work->start_time = start_time;
	}

	next_run = gc_work->avg_timeout;
	count = gc_work->count;

	end_time = start_time + GC_SCAN_MAX_DURATION;

@@ -1498,8 +1502,8 @@ static void gc_worker(struct work_struct *work)

		hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
			struct nf_conntrack_net *cnet;
			unsigned long expires;
			struct net *net;
			long expires;

			tmp = nf_ct_tuplehash_to_ctrack(h);

@@ -1513,6 +1517,7 @@ static void gc_worker(struct work_struct *work)

				gc_work->next_bucket = i;
				gc_work->avg_timeout = next_run;
				gc_work->count = count;

				delta_time = nfct_time_stamp - gc_work->start_time;

@@ -1528,8 +1533,8 @@ static void gc_worker(struct work_struct *work)
			}

			expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
			expires = (expires - (long)next_run) / ++count;
			next_run += expires;
			next_run /= 2u;

			if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
				continue;
@@ -1570,6 +1575,7 @@ static void gc_worker(struct work_struct *work)
		delta_time = nfct_time_stamp - end_time;
		if (delta_time > 0 && i < hashsz) {
			gc_work->avg_timeout = next_run;
			gc_work->count = count;
			gc_work->next_bucket = i;
			next_run = 0;
			goto early_exit;