Commit ce5c9c20 authored by Ido Schimmel's avatar Ido Schimmel Committed by David S. Miller
Browse files

ipv4: Add a sysctl to control multipath hash fields



A subsequent patch will add a new multipath hash policy where the packet
fields used for multipath hash calculation are determined by user space.
This patch adds a sysctl that allows user space to set these fields.

The packet fields are represented using a bitmask and are common between
IPv4 and IPv6 to allow user space to use the same numbering across both
protocols. For example, to hash based on standard 5-tuple:

 # sysctl -w net.ipv4.fib_multipath_hash_fields=0x0037
 net.ipv4.fib_multipath_hash_fields = 0x0037

The kernel rejects unknown fields, for example:

 # sysctl -w net.ipv4.fib_multipath_hash_fields=0x1000
 sysctl: setting key "net.ipv4.fib_multipath_hash_fields": Invalid argument

More fields can be added in the future, if needed.

Signed-off-by: default avatarIdo Schimmel <idosch@nvidia.com>
Reviewed-by: default avatarDavid Ahern <dsahern@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 2e68ea92
Loading
Loading
Loading
Loading
+27 −0
Original line number Diff line number Diff line
@@ -100,6 +100,33 @@ fib_multipath_hash_policy - INTEGER
	- 1 - Layer 4
	- 2 - Layer 3 or inner Layer 3 if present

fib_multipath_hash_fields - UNSIGNED INTEGER
	When fib_multipath_hash_policy is set to 3 (custom multipath hash), the
	fields used for multipath hash calculation are determined by this
	sysctl.

	This value is a bitmask which enables various fields for multipath hash
	calculation.

	Possible fields are:

	====== ============================
	0x0001 Source IP address
	0x0002 Destination IP address
	0x0004 IP protocol
	0x0008 Unused (Flow Label)
	0x0010 Source port
	0x0020 Destination port
	0x0040 Inner source IP address
	0x0080 Inner destination IP address
	0x0100 Inner IP protocol
	0x0200 Inner Flow Label
	0x0400 Inner source port
	0x0800 Inner destination port
	====== ============================

	Default: 0x0007 (source IP, destination IP and IP protocol)

fib_sync_mem - UNSIGNED INTEGER
	Amount of dirty memory from fib entries that can be backlogged before
	synchronize_rcu is forced.
+43 −0
Original line number Diff line number Diff line
@@ -466,6 +466,49 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags);
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);
void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig);

/* Fields used for sysctl_fib_multipath_hash_fields.
 * Common to IPv4 and IPv6.
 *
 * Add new fields at the end. This is user API.
 */
#define FIB_MULTIPATH_HASH_FIELD_SRC_IP			BIT(0)
#define FIB_MULTIPATH_HASH_FIELD_DST_IP			BIT(1)
#define FIB_MULTIPATH_HASH_FIELD_IP_PROTO		BIT(2)
#define FIB_MULTIPATH_HASH_FIELD_FLOWLABEL		BIT(3)
#define FIB_MULTIPATH_HASH_FIELD_SRC_PORT		BIT(4)
#define FIB_MULTIPATH_HASH_FIELD_DST_PORT		BIT(5)
#define FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP		BIT(6)
#define FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP		BIT(7)
#define FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO		BIT(8)
#define FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL	BIT(9)
#define FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT		BIT(10)
#define FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT		BIT(11)

#define FIB_MULTIPATH_HASH_FIELD_OUTER_MASK		\
	(FIB_MULTIPATH_HASH_FIELD_SRC_IP |		\
	 FIB_MULTIPATH_HASH_FIELD_DST_IP |		\
	 FIB_MULTIPATH_HASH_FIELD_IP_PROTO |		\
	 FIB_MULTIPATH_HASH_FIELD_FLOWLABEL |		\
	 FIB_MULTIPATH_HASH_FIELD_SRC_PORT |		\
	 FIB_MULTIPATH_HASH_FIELD_DST_PORT)

#define FIB_MULTIPATH_HASH_FIELD_INNER_MASK		\
	(FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP |	\
	 FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP |	\
	 FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO |	\
	 FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL |	\
	 FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT |	\
	 FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)

#define FIB_MULTIPATH_HASH_FIELD_ALL_MASK		\
	(FIB_MULTIPATH_HASH_FIELD_OUTER_MASK |		\
	 FIB_MULTIPATH_HASH_FIELD_INNER_MASK)

#define FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK		\
	(FIB_MULTIPATH_HASH_FIELD_SRC_IP |		\
	 FIB_MULTIPATH_HASH_FIELD_DST_IP |		\
	 FIB_MULTIPATH_HASH_FIELD_IP_PROTO)

#ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
		       const struct sk_buff *skb, struct flow_keys *flkeys);
+1 −0
Original line number Diff line number Diff line
@@ -210,6 +210,7 @@ struct netns_ipv4 {
#endif
#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH
	u32 sysctl_fib_multipath_hash_fields;
	u8 sysctl_fib_multipath_use_neigh;
	u8 sysctl_fib_multipath_hash_policy;
#endif
+6 −0
Original line number Diff line number Diff line
@@ -1514,6 +1514,12 @@ static int __net_init ip_fib_net_init(struct net *net)
	if (err)
		return err;

#ifdef CONFIG_IP_ROUTE_MULTIPATH
	/* Default to 3-tuple */
	net->ipv4.sysctl_fib_multipath_hash_fields =
		FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK;
#endif

	/* Avoid false sharing : Use at least a full cache line */
	size = max_t(size_t, size, L1_CACHE_BYTES);

+12 −0
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@
#include <net/snmp.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/ip_fib.h>
#include <net/route.h>
#include <net/tcp.h>
#include <net/udp.h>
@@ -48,6 +49,8 @@ static int ip_ping_group_range_min[] = { 0, 0 };
static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
static u32 u32_max_div_HZ = UINT_MAX / HZ;
static int one_day_secs = 24 * 3600;
static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
	FIB_MULTIPATH_HASH_FIELD_ALL_MASK;

/* obsolete */
static int sysctl_tcp_low_latency __read_mostly;
@@ -1052,6 +1055,15 @@ static struct ctl_table ipv4_net_table[] = {
		.extra1		= SYSCTL_ZERO,
		.extra2		= &two,
	},
	{
		.procname	= "fib_multipath_hash_fields",
		.data		= &init_net.ipv4.sysctl_fib_multipath_hash_fields,
		.maxlen		= sizeof(u32),
		.mode		= 0644,
		.proc_handler	= proc_douintvec_minmax,
		.extra1		= SYSCTL_ONE,
		.extra2		= &fib_multipath_hash_fields_all_mask,
	},
#endif
	{
		.procname	= "ip_unprivileged_port_start",