Commit a0619a9e authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'namespacify-mtu-ipv4'



xu xin says:

====================
ipv4: Namespaceify two sysctls related with mtu

The following patch series enables the min_pmtu and mtu_expires to
be visible and configurable per net namespace. Different namespace
application might have different requirements on the setting of
min_pmtu and mtu_expires.

If these two patches are applied, inside a net namespace we create,
we can see two more sysctls under /proc/sys/net/ipv4/route:
1. min_pmtu
2. mtu_expires

where min_pmtu and mtu_expires are configurable.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7d714ff1 1135fad2
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -85,6 +85,9 @@ struct netns_ipv4 {
	int sysctl_icmp_ratelimit;
	int sysctl_icmp_ratemask;

	u32 ip_rt_min_pmtu;
	int ip_rt_mtu_expires;

	struct local_ports ip_local_ports;

	u8 sysctl_tcp_ecn;
+48 −26
Original line number Diff line number Diff line
@@ -110,14 +110,15 @@

#define RT_GC_TIMEOUT (300*HZ)

#define DEFAULT_MIN_PMTU (512 + 20 + 20)
#define DEFAULT_MTU_EXPIRES (10 * 60 * HZ)

static int ip_rt_max_size;
static int ip_rt_redirect_number __read_mostly	= 9;
static int ip_rt_redirect_load __read_mostly	= HZ / 50;
static int ip_rt_redirect_silence __read_mostly	= ((HZ / 50) << (9 + 1));
static int ip_rt_error_cost __read_mostly	= HZ;
static int ip_rt_error_burst __read_mostly	= 5 * HZ;
static int ip_rt_mtu_expires __read_mostly	= 10 * 60 * HZ;
static u32 ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
static int ip_rt_min_advmss __read_mostly	= 256;

static int ip_rt_gc_timeout __read_mostly	= RT_GC_TIMEOUT;
@@ -1018,13 +1019,13 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
	if (old_mtu < mtu)
		return;

	if (mtu < ip_rt_min_pmtu) {
	if (mtu < net->ipv4.ip_rt_min_pmtu) {
		lock = true;
		mtu = min(old_mtu, ip_rt_min_pmtu);
		mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu);
	}

	if (rt->rt_pmtu == mtu && !lock &&
	    time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
	    time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2))
		return;

	rcu_read_lock();
@@ -1034,7 +1035,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
		fib_select_path(net, &res, fl4, NULL);
		nhc = FIB_RES_NHC(res);
		update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
				      jiffies + ip_rt_mtu_expires);
				      jiffies + net->ipv4.ip_rt_mtu_expires);
	}
	rcu_read_unlock();
}
@@ -3534,21 +3535,6 @@ static struct ctl_table ipv4_route_table[] = {
		.mode		= 0644,
		.proc_handler	= proc_dointvec,
	},
	{
		.procname	= "mtu_expires",
		.data		= &ip_rt_mtu_expires,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_jiffies,
	},
	{
		.procname	= "min_pmtu",
		.data		= &ip_rt_min_pmtu,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &ip_min_valid_pmtu,
	},
	{
		.procname	= "min_adv_mss",
		.data		= &ip_rt_min_advmss,
@@ -3561,13 +3547,28 @@ static struct ctl_table ipv4_route_table[] = {

static const char ipv4_route_flush_procname[] = "flush";

static struct ctl_table ipv4_route_flush_table[] = {
static struct ctl_table ipv4_route_netns_table[] = {
	{
		.procname	= ipv4_route_flush_procname,
		.maxlen		= sizeof(int),
		.mode		= 0200,
		.proc_handler	= ipv4_sysctl_rtcache_flush,
	},
	{
		.procname       = "min_pmtu",
		.data           = &init_net.ipv4.ip_rt_min_pmtu,
		.maxlen         = sizeof(int),
		.mode           = 0644,
		.proc_handler   = proc_dointvec_minmax,
		.extra1         = &ip_min_valid_pmtu,
	},
	{
		.procname       = "mtu_expires",
		.data           = &init_net.ipv4.ip_rt_mtu_expires,
		.maxlen         = sizeof(int),
		.mode           = 0644,
		.proc_handler   = proc_dointvec_jiffies,
	},
	{ },
};

@@ -3575,9 +3576,11 @@ static __net_init int sysctl_route_net_init(struct net *net)
{
	struct ctl_table *tbl;

	tbl = ipv4_route_flush_table;
	tbl = ipv4_route_netns_table;
	if (!net_eq(net, &init_net)) {
		tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
		int i;

		tbl = kmemdup(tbl, sizeof(ipv4_route_netns_table), GFP_KERNEL);
		if (!tbl)
			goto err_dup;

@@ -3586,6 +3589,12 @@ static __net_init int sysctl_route_net_init(struct net *net)
			if (tbl[0].procname != ipv4_route_flush_procname)
				tbl[0].procname = NULL;
		}

		/* Update the variables to point into the current struct net
		 * except for the first element flush
		 */
		for (i = 1; i < ARRAY_SIZE(ipv4_route_netns_table) - 1; i++)
			tbl[i].data += (void *)net - (void *)&init_net;
	}
	tbl[0].extra1 = net;

@@ -3595,7 +3604,7 @@ static __net_init int sysctl_route_net_init(struct net *net)
	return 0;

err_reg:
	if (tbl != ipv4_route_flush_table)
	if (tbl != ipv4_route_netns_table)
		kfree(tbl);
err_dup:
	return -ENOMEM;
@@ -3607,7 +3616,7 @@ static __net_exit void sysctl_route_net_exit(struct net *net)

	tbl = net->ipv4.route_hdr->ctl_table_arg;
	unregister_net_sysctl_table(net->ipv4.route_hdr);
	BUG_ON(tbl == ipv4_route_flush_table);
	BUG_ON(tbl == ipv4_route_netns_table);
	kfree(tbl);
}

@@ -3617,6 +3626,18 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
};
#endif

static __net_init int netns_ip_rt_init(struct net *net)
{
	/* Set default value for namespaceified sysctls */
	net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU;
	net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES;
	return 0;
}

static struct pernet_operations __net_initdata ip_rt_ops = {
	.init = netns_ip_rt_init,
};

static __net_init int rt_genid_init(struct net *net)
{
	atomic_set(&net->ipv4.rt_genid, 0);
@@ -3722,6 +3743,7 @@ int __init ip_rt_init(void)
#ifdef CONFIG_SYSCTL
	register_pernet_subsys(&sysctl_route_ops);
#endif
	register_pernet_subsys(&ip_rt_ops);
	register_pernet_subsys(&rt_genid_ops);
	register_pernet_subsys(&ipv4_inetpeer_ops);
	return 0;