Commit ec8cb4f6 authored by Martin KaFai Lau's avatar Martin KaFai Lau Committed by Jakub Kicinski
Browse files

net: selftests: Stress reuseport listen



This patch adds a test that has 300 VIPs listening on port 443.
Each VIP:443 will have 80 listening socks by using SO_REUSEPORT.
Thus, it will have 24000 listening socks.

Before removing the port only listening_hash, all socks will be in the
same port 443 bucket and inet_reuseport_add_sock() spends much time to
walk through the bucket.  After removing the port only listening_hash
and move all usage to the port+addr lhash2, each bucket in the
ideal case has 80 sk which is much smaller than before.

Here is the test result from a qemu:
Before: listen 24000 socks took 210.210485362 (~210s)
 After: listen 24000 socks took 0.207173      (~210ms)

Signed-off-by: default avatarMartin KaFai Lau <kafai@fb.com>
Reviewed-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent cae3873c
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -38,6 +38,7 @@ TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
TEST_PROGS += vrf_strict_mode_test.sh
TEST_PROGS += arp_ndisc_evict_nocarrier.sh
TEST_PROGS += ndisc_unsolicited_na_test.sh
TEST_PROGS += stress_reuseport_listen.sh
TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
TEST_GEN_FILES =  socket nettest
@@ -56,6 +57,7 @@ TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
TEST_GEN_FILES += toeplitz
TEST_GEN_FILES += cmsg_sender
TEST_GEN_FILES += stress_reuseport_listen
TEST_PROGS += test_vxlan_vnifiltering.sh

TEST_FILES := settings
+105 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0

/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */

/* Test listening on the same port 443 with multiple VIPS.
 * Each VIP:443 will have multiple sk listening on by using
 * SO_REUSEPORT.
 */

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <error.h>
#include <errno.h>
#include <time.h>
#include <arpa/inet.h>

#define IP6_LADDR_START "2401:dead::1"
#define IP6_LPORT 443
#define NSEC_PER_SEC 1000000000L
#define NSEC_PER_USEC 1000L

static unsigned int nr_socks_per_vip;
static unsigned int nr_vips;

static int *bind_reuseport_sock6(void)
{
	int *lfds, *cur_fd, err, optvalue = 1;
	struct sockaddr_in6 sa6 = {};
	unsigned int i, j;

	sa6.sin6_family = AF_INET6;
	sa6.sin6_port = htons(IP6_LPORT);
	err = inet_pton(AF_INET6, IP6_LADDR_START, &sa6.sin6_addr);
	if (err != 1)
		error(1, err, "inet_pton(%s)", IP6_LADDR_START);

	lfds = malloc(nr_vips * nr_socks_per_vip * sizeof(lfds[0]));
	if (!lfds)
		error(1, errno, "cannot alloc array of lfds");

	cur_fd = lfds;
	for (i = 0; i < nr_vips; i++) {
		for (j = 0; j < nr_socks_per_vip; j++) {
			*cur_fd = socket(AF_INET6, SOCK_STREAM, 0);
			if (*cur_fd == -1)
				error(1, errno,
				      "lfds[%u,%u] = socket(AF_INET6)", i, j);

			err = setsockopt(*cur_fd, SOL_SOCKET, SO_REUSEPORT,
					 &optvalue, sizeof(optvalue));
			if (err)
				error(1, errno,
				      "setsockopt(lfds[%u,%u], SO_REUSEPORT)",
				      i, j);

			err = bind(*cur_fd, (struct sockaddr *)&sa6,
				   sizeof(sa6));
			if (err)
				error(1, errno, "bind(lfds[%u,%u])", i, j);
			cur_fd++;
		}
		sa6.sin6_addr.s6_addr32[3]++;
	}

	return lfds;
}

int main(int argc, const char *argv[])
{
	struct timespec start_ts, end_ts;
	unsigned long start_ns, end_ns;
	unsigned int nr_lsocks;
	int *lfds, i, err;

	if (argc != 3 || atoi(argv[1]) <= 0 || atoi(argv[2]) <= 0)
		error(1, 0, "Usage: %s <nr_vips> <nr_socks_per_vip>\n",
		      argv[0]);

	nr_vips = atoi(argv[1]);
	nr_socks_per_vip = atoi(argv[2]);
	nr_lsocks = nr_vips * nr_socks_per_vip;
	lfds = bind_reuseport_sock6();

	clock_gettime(CLOCK_MONOTONIC, &start_ts);
	for (i = 0; i < nr_lsocks; i++) {
		err = listen(lfds[i], 0);
		if (err)
			error(1, errno, "listen(lfds[%d])", i);
	}
	clock_gettime(CLOCK_MONOTONIC, &end_ts);

	start_ns = start_ts.tv_sec * NSEC_PER_SEC + start_ts.tv_nsec;
	end_ns = end_ts.tv_sec * NSEC_PER_SEC + end_ts.tv_nsec;

	printf("listen %d socks took %lu.%lu\n", nr_lsocks,
	       (end_ns - start_ns) / NSEC_PER_SEC,
	       (end_ns - start_ns) / NSEC_PER_USEC);

	for (i = 0; i < nr_lsocks; i++)
		close(lfds[i]);

	free(lfds);
	return 0;
}
+25 −0
Original line number Diff line number Diff line
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2022 Meta Platforms, Inc. and affiliates.

NS='stress_reuseport_listen_ns'
NR_FILES=24100
SAVED_NR_FILES=$(ulimit -n)

setup() {
	ip netns add $NS
	ip netns exec $NS sysctl -q -w net.ipv6.ip_nonlocal_bind=1
	ulimit -n $NR_FILES
}

cleanup() {
	ip netns del $NS
	ulimit -n $SAVED_NR_FILES
}

trap cleanup EXIT
setup
# 300 different vips listen on port 443
# Each vip:443 sockaddr has 80 LISTEN sock by using SO_REUSEPORT
# Total 24000 listening socks
ip netns exec $NS ./stress_reuseport_listen 300 80