Commit 726c5265 authored by Lu Wei's avatar Lu Wei Committed by Liu Jian
Browse files

tcp/dccp: Add another way to allocate local ports in connect()

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I7AO8G


CVE: NA

--------------------------------

Commit 07f4c900 ("tcp/dccp: try to not exhaust ip_local_port_range
in connect()") allocates even ports for connect() first while leaving
odd ports for bind() and this works well in busy servers.

But this strategy causes severe performance degradation in busy clients.
when a client has used more than half of the local ports setted in
proc/sys/net/ipv4/ip_local_port_range, if this client trys to connect
to a server again, the connect time increases rapidly since it will
traverse all the even ports though they are exhausted.

So this path provides another strategy by introducing a system option:
local_port_allocation. If it is a busy client, users should set it to 1
to use sequential allocation while it should be set to 0 in other
situations. Its default value is 0.

Signed-off-by: default avatarLu Wei <luwei32@huawei.com>
Signed-off-by: default avatarLiu Jian <liujian56@huawei.com>
parent 6bc20a2c
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -258,6 +258,7 @@ extern long sysctl_tcp_mem[3];
extern atomic_long_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
extern unsigned long tcp_memory_pressure;
extern int sysctl_local_port_allocation;

/* optimized version of sk_under_memory_pressure() for TCP sockets */
static inline bool tcp_under_memory_pressure(const struct sock *sk)
+8 −3
Original line number Diff line number Diff line
@@ -742,7 +742,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
	struct net *net = sock_net(sk);
	struct inet_bind_bucket *tb;
	u32 remaining, offset;
	int ret, i, low, high;
	int ret, i, low, high, span_size;
	int l3mdev;
	u32 index;

@@ -762,6 +762,11 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
		local_bh_enable();
		return ret;
	}
	/* local_port_allocation 0 means even and odd port allocation strategy
	 * will be applied, so span size is 2; otherwise sequential allocation
	 * will be used and span size is 1. Default value is 0.
	 */
	span_size = sysctl_local_port_allocation ? 1 : 2;

	l3mdev = inet_sk_bound_l3mdev(sk);

@@ -783,7 +788,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
	offset &= ~1U;
other_parity_scan:
	port = low + offset;
	for (i = 0; i < remaining; i += 2, port += 2) {
	for (i = 0; i < remaining; i += span_size, port += span_size) {
		if (unlikely(port >= high))
			port -= remaining;
		if (inet_is_local_reserved_port(net, port))
@@ -824,7 +829,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
	}

	offset++;
	if ((offset & 1) && remaining > 1)
	if ((offset & 1) && remaining > 1 && span_size == 2)
		goto other_parity_scan;

	return -EADDRNOTAVAIL;
+8 −0
Original line number Diff line number Diff line
@@ -51,6 +51,7 @@ static int comp_sack_nr_max = 255;
static u32 u32_max_div_HZ = UINT_MAX / HZ;
static int one_day_secs = 24 * 3600;

int sysctl_local_port_allocation;
/* obsolete */
static int sysctl_tcp_low_latency __read_mostly;

@@ -575,6 +576,13 @@ static struct ctl_table ipv4_table[] = {
		.extra2		= SYSCTL_ONE,
	},
#endif
	{
		.procname	= "local_port_allocation",
		.data		= &sysctl_local_port_allocation,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec,
	},
	{ }
};