Commit bbc73e68 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'Add FOU support for externally controlled ipip devices'



Christian Ehrig says:

====================

This patch set adds support for using FOU or GUE encapsulation with
an ipip device operating in collect-metadata mode and a set of kfuncs
for controlling encap parameters exposed to a BPF tc-hook.

BPF tc-hooks allow us to read tunnel metadata (like remote IP addresses)
in the ingress path of an externally controlled tunnel interface via
the bpf_skb_get_tunnel_{key,opt} bpf-helpers. Packets can then be
redirected to the same or a different externally controlled tunnel
interface by overwriting metadata via the bpf_skb_set_tunnel_{key,opt}
helpers and a call to bpf_redirect. This enables us to redirect packets
between tunnel interfaces - and potentially change the encapsulation
type - using only a single BPF program.

Today this approach works fine for a couple of tunnel combinations.
For example: redirecting packets between Geneve and GRE interfaces or
GRE and plain ipip interfaces. However, redirecting using FOU or GUE is
not supported today. The ip_tunnel module does not allow us to egress
packets using additional UDP encapsulation from an ipip device in
collect-metadata mode.

Patch 1 lifts this restriction by adding a struct ip_tunnel_encap to
the tunnel metadata. It can be filled by a new BPF kfunc introduced
in Patch 2 and evaluated by the ip_tunnel egress path. This will allow
us to use FOU and GUE encap with externally controlled ipip devices.

Patch 2 introduces two new BPF kfuncs: bpf_skb_{set,get}_fou_encap.
These helpers can be used to set and get UDP encap parameters from the
BPF tc-hook doing the packet redirect.

Patch 3 adds BPF tunnel selftests using the two kfuncs.
---
v3:
 - Integrate selftest into test_progs (Alexei)
v2:
 - Fixes for checkpatch.pl
 - Fixes for kernel test robot
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents ed17aa92 d9688f89
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -17,4 +17,6 @@ int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
		       u8 *protocol, __be16 *sport, int type);

int register_fou_bpf(void);

#endif
+15 −13
Original line number Diff line number Diff line
@@ -57,6 +57,13 @@ struct ip_tunnel_key {
	__u8			flow_flags;
};

struct ip_tunnel_encap {
	u16			type;
	u16			flags;
	__be16			sport;
	__be16			dport;
};

/* Flags for ip_tunnel_info mode. */
#define IP_TUNNEL_INFO_TX	0x01	/* represents tx tunnel parameters */
#define IP_TUNNEL_INFO_IPV6	0x02	/* key contains IPv6 addresses */
@@ -66,9 +73,9 @@ struct ip_tunnel_key {
#define IP_TUNNEL_OPTS_MAX					\
	GENMASK((sizeof_field(struct ip_tunnel_info,		\
			      options_len) * BITS_PER_BYTE) - 1, 0)

struct ip_tunnel_info {
	struct ip_tunnel_key	key;
	struct ip_tunnel_encap	encap;
#ifdef CONFIG_DST_CACHE
	struct dst_cache	dst_cache;
#endif
@@ -86,13 +93,6 @@ struct ip_tunnel_6rd_parm {
};
#endif

struct ip_tunnel_encap {
	u16			type;
	u16			flags;
	__be16			sport;
	__be16			dport;
};

struct ip_tunnel_prl_entry {
	struct ip_tunnel_prl_entry __rcu *next;
	__be32				addr;
@@ -293,6 +293,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
				   __be32 remote, __be32 local,
				   __be32 key);

void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info);
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
		  bool log_ecn_error);
@@ -371,22 +372,23 @@ static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
	return hlen;
}

static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
static inline int ip_tunnel_encap(struct sk_buff *skb,
				  struct ip_tunnel_encap *e,
				  u8 *protocol, struct flowi4 *fl4)
{
	const struct ip_tunnel_encap_ops *ops;
	int ret = -EINVAL;

	if (t->encap.type == TUNNEL_ENCAP_NONE)
	if (e->type == TUNNEL_ENCAP_NONE)
		return 0;

	if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
	if (e->type >= MAX_IPTUN_ENCAP_OPS)
		return -EINVAL;

	rcu_read_lock();
	ops = rcu_dereference(iptun_encaps[t->encap.type]);
	ops = rcu_dereference(iptun_encaps[e->type]);
	if (likely(ops && ops->build_header))
		ret = ops->build_header(skb, &t->encap, protocol, fl4);
		ret = ops->build_header(skb, e, protocol, fl4);
	rcu_read_unlock();

	return ret;
+1 −1
Original line number Diff line number Diff line
@@ -26,7 +26,7 @@ obj-$(CONFIG_IP_MROUTE) += ipmr.o
obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o
obj-$(CONFIG_NET_IPIP) += ipip.o
gre-y := gre_demux.o
fou-y := fou_core.o fou_nl.o
fou-y := fou_core.o fou_nl.o fou_bpf.o
obj-$(CONFIG_NET_FOU) += fou.o
obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
obj-$(CONFIG_NET_IPGRE) += ip_gre.o

net/ipv4/fou_bpf.c

0 → 100644
+119 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
/* Unstable Fou Helpers for TC-BPF hook
 *
 * These are called from SCHED_CLS BPF programs. Note that it is
 * allowed to break compatibility for these functions since the interface they
 * are exposed through to BPF programs is explicitly unstable.
 */

#include <linux/bpf.h>
#include <linux/btf_ids.h>

#include <net/dst_metadata.h>
#include <net/fou.h>

struct bpf_fou_encap {
	__be16 sport;
	__be16 dport;
};

enum bpf_fou_encap_type {
	FOU_BPF_ENCAP_FOU,
	FOU_BPF_ENCAP_GUE,
};

__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
		  "Global functions as their definitions will be in BTF");

/* bpf_skb_set_fou_encap - Set FOU encap parameters
 *
 * This function allows for using GUE or FOU encapsulation together with an
 * ipip device in collect-metadata mode.
 *
 * It is meant to be used in BPF tc-hooks and after a call to the
 * bpf_skb_set_tunnel_key helper, responsible for setting IP addresses.
 *
 * Parameters:
 * @skb_ctx	Pointer to ctx (__sk_buff) in TC program. Cannot be NULL
 * @encap	Pointer to a `struct bpf_fou_encap` storing UDP src and
 * 		dst ports. If sport is set to 0 the kernel will auto-assign a
 * 		port. This is similar to using `encap-sport auto`.
 * 		Cannot be NULL
 * @type	Encapsulation type for the packet. Their definitions are
 * 		specified in `enum bpf_fou_encap_type`
 */
__bpf_kfunc int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx,
				      struct bpf_fou_encap *encap, int type)
{
	struct sk_buff *skb = (struct sk_buff *)skb_ctx;
	struct ip_tunnel_info *info = skb_tunnel_info(skb);

	if (unlikely(!encap))
		return -EINVAL;

	if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX)))
		return -EINVAL;

	switch (type) {
	case FOU_BPF_ENCAP_FOU:
		info->encap.type = TUNNEL_ENCAP_FOU;
		break;
	case FOU_BPF_ENCAP_GUE:
		info->encap.type = TUNNEL_ENCAP_GUE;
		break;
	default:
		info->encap.type = TUNNEL_ENCAP_NONE;
	}

	if (info->key.tun_flags & TUNNEL_CSUM)
		info->encap.flags |= TUNNEL_ENCAP_FLAG_CSUM;

	info->encap.sport = encap->sport;
	info->encap.dport = encap->dport;

	return 0;
}

/* bpf_skb_get_fou_encap - Get FOU encap parameters
 *
 * This function allows for reading encap metadata from a packet received
 * on an ipip device in collect-metadata mode.
 *
 * Parameters:
 * @skb_ctx	Pointer to ctx (__sk_buff) in TC program. Cannot be NULL
 * @encap	Pointer to a struct bpf_fou_encap storing UDP source and
 * 		destination port. Cannot be NULL
 */
__bpf_kfunc int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx,
				      struct bpf_fou_encap *encap)
{
	struct sk_buff *skb = (struct sk_buff *)skb_ctx;
	struct ip_tunnel_info *info = skb_tunnel_info(skb);

	if (unlikely(!info))
		return -EINVAL;

	encap->sport = info->encap.sport;
	encap->dport = info->encap.dport;

	return 0;
}

__diag_pop()

BTF_SET8_START(fou_kfunc_set)
BTF_ID_FLAGS(func, bpf_skb_set_fou_encap)
BTF_ID_FLAGS(func, bpf_skb_get_fou_encap)
BTF_SET8_END(fou_kfunc_set)

static const struct btf_kfunc_id_set fou_bpf_kfunc_set = {
	.owner = THIS_MODULE,
	.set   = &fou_kfunc_set,
};

int register_fou_bpf(void)
{
	return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS,
					 &fou_bpf_kfunc_set);
}
+5 −0
Original line number Diff line number Diff line
@@ -1236,10 +1236,15 @@ static int __init fou_init(void)
	if (ret < 0)
		goto unregister;

	ret = register_fou_bpf();
	if (ret < 0)
		goto kfunc_failed;

	ret = ip_tunnel_encap_add_fou_ops();
	if (ret == 0)
		return 0;

kfunc_failed:
	genl_unregister_family(&fou_nl_family);
unregister:
	unregister_pernet_device(&fou_net_ops);
Loading