Commit 0a6720ed authored by Litao Jiao's avatar Litao Jiao
Browse files

net/smc: Unbind r/w buffer size from clcsock and make them tunable

mainline inclusion
from mainline-v6.1-rc1
commit 0227f058
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I76JHC
CVE: NA

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/net/smc?id=0227f058aa29f5ab6f6ec79c3a36ae41f1e03a13



--------------------------------

Currently, SMC uses smc->sk.sk_{rcv|snd}buf to create buffers for
send buffer and RMB. And the values of buffer size are from tcp_{w|r}mem
in clcsock.

The buffer size from TCP socket doesn't fit SMC well. Generally, buffers
are usually larger than TCP for SMC-R/-D to get higher performance, for
they are different underlay devices and paths.

So this patch unbinds buffer size from TCP, and introduces two sysctl
knobs to tune them independently. Also, these knobs are per net
namespace and work for containers.

Signed-off-by: default avatarTony Lu <tonylu@linux.alibaba.com>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarLitao Jiao <jiaolitao@sangfor.com.cn>
parent f7ffd1b2
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
@@ -19,3 +19,21 @@ smcr_buf_type - INTEGER
        - 1 - Use virtually contiguous buffers
        - 2 - Mixed use of the two types. Try physically contiguous buffers first.
          If not available, use virtually contiguous buffers then.

wmem - INTEGER
	Initial size of send buffer used by SMC sockets.
	The default value inherits from net.ipv4.tcp_wmem[1].

	The minimum value is 16KiB and there is no hard limit for max value, but
	only allowed 512KiB for SMC-R and 1MiB for SMC-D.

	Default: 16K

rmem - INTEGER
	Initial size of receive buffer (RMB) used by SMC sockets.
	The default value inherits from net.ipv4.tcp_rmem[1].

	The minimum value is 16KiB and there is no hard limit for max value, but
	only allowed 512KiB for SMC-R and 1MiB for SMC-D.

	Default: 128K
+2 −0
Original line number Diff line number Diff line
@@ -7,5 +7,7 @@ struct netns_smc {
	struct ctl_table_header		*smc_hdr;
#endif
	unsigned int			sysctl_smcr_buf_type;
	int				sysctl_wmem;
	int				sysctl_rmem;
};
#endif
+3 −2
Original line number Diff line number Diff line
@@ -267,6 +267,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
	sk->sk_state = SMC_INIT;
	sk->sk_destruct = smc_destruct;
	sk->sk_protocol = protocol;
	WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem));
	WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem));
	smc = smc_sk(sk);
	for (i = 0; i < SMC_MAX_TCP_LISTEN_WORKS; i++) {
		smc->tcp_listen_works[i].smc = smc;
@@ -2602,8 +2604,7 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
		sk_common_release(sk);
		goto out;
	}
	smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
	smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);


out:
	return rc;
+4 −4
Original line number Diff line number Diff line
@@ -1756,10 +1756,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)

	if (is_rmb)
		/* use socket recv buffer size (w/o overhead) as start value */
		sk_buf_size = smc->sk.sk_rcvbuf / 2;
		sk_buf_size = smc->sk.sk_rcvbuf;
	else
		/* use socket send buffer size (w/o overhead) as start value */
		sk_buf_size = smc->sk.sk_sndbuf / 2;
		sk_buf_size = smc->sk.sk_sndbuf;

	for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
	     bufsize_short >= 0; bufsize_short--) {
@@ -1808,7 +1808,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
	if (is_rmb) {
		conn->rmb_desc = buf_desc;
		conn->rmbe_size_short = bufsize_short;
		smc->sk.sk_rcvbuf = bufsize * 2;
		smc->sk.sk_rcvbuf = bufsize;
		atomic_set(&conn->bytes_to_rcv, 0);
		conn->rmbe_update_limit =
			smc_rmb_wnd_update_limit(buf_desc->len);
@@ -1816,7 +1816,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
			smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
	} else {
		conn->sndbuf_desc = buf_desc;
		smc->sk.sk_sndbuf = bufsize * 2;
		smc->sk.sk_sndbuf = bufsize;
		atomic_set(&conn->sndbuf_space, bufsize);
	}
	return 0;
+20 −1
Original line number Diff line number Diff line
@@ -18,6 +18,8 @@
#include "smc_core.h"

static int two = 2;
static int min_sndbuf = SMC_BUF_MIN_SIZE;
static int min_rcvbuf = SMC_BUF_MIN_SIZE;

static struct ctl_table smc_table[] = {
	{
@@ -29,6 +31,22 @@ static struct ctl_table smc_table[] = {
		.extra1		= SYSCTL_ZERO,
		.extra2		= &two,
	},
	{
		.procname	= "wmem",
		.data		= &init_net.smc.sysctl_wmem,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &min_sndbuf,
	},
	{
		.procname	= "rmem",
		.data		= &init_net.smc.sysctl_rmem,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &min_rcvbuf,
	},
	{  }
};

@@ -53,7 +71,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
		goto err_reg;

	net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;

	WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
	WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
	return 0;

err_reg: