Commit b4c2b959 authored by Kumar Kartikeya Dwivedi's avatar Kumar Kartikeya Dwivedi Committed by Alexei Starovoitov
Browse files

net/netfilter: Add unstable CT lookup helpers for XDP and TC-BPF

This change adds conntrack lookup helpers using the unstable kfunc call
interface for the XDP and TC-BPF hooks. The primary usecase is
implementing a synproxy in XDP, see Maxim's patchset [0].

Export get_net_ns_by_id as nf_conntrack_bpf.c needs to call it.

This object is only built when CONFIG_DEBUG_INFO_BTF_MODULES is enabled.

  [0]: https://lore.kernel.org/bpf/20211019144655.3483197-1-maximmi@nvidia.com



Signed-off-by: default avatarKumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-7-memxor@gmail.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 5c073f26
Loading
Loading
Loading
Loading
+23 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */

#ifndef _NF_CONNTRACK_BPF_H
#define _NF_CONNTRACK_BPF_H

#include <linux/btf.h>
#include <linux/kconfig.h>

#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
    (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))

extern int register_nf_conntrack_bpf(void);

#else

static inline int register_nf_conntrack_bpf(void)
{
	return 0;
}

#endif

#endif /* _NF_CONNTRACK_BPF_H */
+1 −0
Original line number Diff line number Diff line
@@ -299,6 +299,7 @@ struct net *get_net_ns_by_id(const struct net *net, int id)

	return peer;
}
EXPORT_SYMBOL_GPL(get_net_ns_by_id);

/*
 * setup_net runs the initializers for the network namespace object.
+5 −0
Original line number Diff line number Diff line
@@ -14,6 +14,11 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
ifeq ($(CONFIG_NF_CONNTRACK),m)
nf_conntrack-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_conntrack_bpf.o
else ifeq ($(CONFIG_NF_CONNTRACK),y)
nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o
endif

obj-$(CONFIG_NETFILTER) = netfilter.o

+257 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
/* Unstable Conntrack Helpers for XDP and TC-BPF hook
 *
 * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
 * allowed to break compatibility for these functions since the interface they
 * are exposed through to BPF programs is explicitly unstable.
 */

#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/types.h>
#include <linux/btf_ids.h>
#include <linux/net_namespace.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>

/* bpf_ct_opts - Options for CT lookup helpers
 *
 * Members:
 * @netns_id   - Specify the network namespace for lookup
 *		 Values:
 *		   BPF_F_CURRENT_NETNS (-1)
 *		     Use namespace associated with ctx (xdp_md, __sk_buff)
 *		   [0, S32_MAX]
 *		     Network Namespace ID
 * @error      - Out parameter, set for any errors encountered
 *		 Values:
 *		   -EINVAL - Passed NULL for bpf_tuple pointer
 *		   -EINVAL - opts->reserved is not 0
 *		   -EINVAL - netns_id is less than -1
 *		   -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
 *		   -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
 *		   -ENONET - No network namespace found for netns_id
 *		   -ENOENT - Conntrack lookup could not find entry for tuple
 *		   -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
 *				   or sizeof(tuple->ipv6)
 * @l4proto    - Layer 4 protocol
 *		 Values:
 *		   IPPROTO_TCP, IPPROTO_UDP
 * @reserved   - Reserved member, will be reused for more options in future
 *		 Values:
 *		   0
 */
struct bpf_ct_opts {
	s32 netns_id;
	s32 error;
	u8 l4proto;
	u8 reserved[3];
};

enum {
	NF_BPF_CT_OPTS_SZ = 12,
};

static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
					  struct bpf_sock_tuple *bpf_tuple,
					  u32 tuple_len, u8 protonum,
					  s32 netns_id)
{
	struct nf_conntrack_tuple_hash *hash;
	struct nf_conntrack_tuple tuple;

	if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
		return ERR_PTR(-EPROTO);
	if (unlikely(netns_id < BPF_F_CURRENT_NETNS))
		return ERR_PTR(-EINVAL);

	memset(&tuple, 0, sizeof(tuple));
	switch (tuple_len) {
	case sizeof(bpf_tuple->ipv4):
		tuple.src.l3num = AF_INET;
		tuple.src.u3.ip = bpf_tuple->ipv4.saddr;
		tuple.src.u.tcp.port = bpf_tuple->ipv4.sport;
		tuple.dst.u3.ip = bpf_tuple->ipv4.daddr;
		tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport;
		break;
	case sizeof(bpf_tuple->ipv6):
		tuple.src.l3num = AF_INET6;
		memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
		tuple.src.u.tcp.port = bpf_tuple->ipv6.sport;
		memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
		tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport;
		break;
	default:
		return ERR_PTR(-EAFNOSUPPORT);
	}

	tuple.dst.protonum = protonum;

	if (netns_id >= 0) {
		net = get_net_ns_by_id(net, netns_id);
		if (unlikely(!net))
			return ERR_PTR(-ENONET);
	}

	hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
	if (netns_id >= 0)
		put_net(net);
	if (!hash)
		return ERR_PTR(-ENOENT);
	return nf_ct_tuplehash_to_ctrack(hash);
}

__diag_push();
__diag_ignore(GCC, 8, "-Wmissing-prototypes",
	      "Global functions as their definitions will be in nf_conntrack BTF");

/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
 *		       reference to it
 *
 * Parameters:
 * @xdp_ctx	- Pointer to ctx (xdp_md) in XDP program
 *		    Cannot be NULL
 * @bpf_tuple	- Pointer to memory representing the tuple to look up
 *		    Cannot be NULL
 * @tuple__sz	- Length of the tuple structure
 *		    Must be one of sizeof(bpf_tuple->ipv4) or
 *		    sizeof(bpf_tuple->ipv6)
 * @opts	- Additional options for lookup (documented above)
 *		    Cannot be NULL
 * @opts__sz	- Length of the bpf_ct_opts structure
 *		    Must be NF_BPF_CT_OPTS_SZ (12)
 */
struct nf_conn *
bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
		  u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
{
	struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
	struct net *caller_net;
	struct nf_conn *nfct;

	BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);

	if (!opts)
		return NULL;
	if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
	    opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
		opts->error = -EINVAL;
		return NULL;
	}
	caller_net = dev_net(ctx->rxq->dev);
	nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
				  opts->netns_id);
	if (IS_ERR(nfct)) {
		opts->error = PTR_ERR(nfct);
		return NULL;
	}
	return nfct;
}

/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
 *		       reference to it
 *
 * Parameters:
 * @skb_ctx	- Pointer to ctx (__sk_buff) in TC program
 *		    Cannot be NULL
 * @bpf_tuple	- Pointer to memory representing the tuple to look up
 *		    Cannot be NULL
 * @tuple__sz	- Length of the tuple structure
 *		    Must be one of sizeof(bpf_tuple->ipv4) or
 *		    sizeof(bpf_tuple->ipv6)
 * @opts	- Additional options for lookup (documented above)
 *		    Cannot be NULL
 * @opts__sz	- Length of the bpf_ct_opts structure
 *		    Must be NF_BPF_CT_OPTS_SZ (12)
 */
struct nf_conn *
bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
		  u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
{
	struct sk_buff *skb = (struct sk_buff *)skb_ctx;
	struct net *caller_net;
	struct nf_conn *nfct;

	BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);

	if (!opts)
		return NULL;
	if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
	    opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
		opts->error = -EINVAL;
		return NULL;
	}
	caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
	nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
				  opts->netns_id);
	if (IS_ERR(nfct)) {
		opts->error = PTR_ERR(nfct);
		return NULL;
	}
	return nfct;
}

/* bpf_ct_release - Release acquired nf_conn object
 *
 * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
 * the program if any references remain in the program in all of the explored
 * states.
 *
 * Parameters:
 * @nf_conn	 - Pointer to referenced nf_conn object, obtained using
 *		   bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
 */
void bpf_ct_release(struct nf_conn *nfct)
{
	if (!nfct)
		return;
	nf_ct_put(nfct);
}

__diag_pop()

BTF_SET_START(nf_ct_xdp_check_kfunc_ids)
BTF_ID(func, bpf_xdp_ct_lookup)
BTF_ID(func, bpf_ct_release)
BTF_SET_END(nf_ct_xdp_check_kfunc_ids)

BTF_SET_START(nf_ct_tc_check_kfunc_ids)
BTF_ID(func, bpf_skb_ct_lookup)
BTF_ID(func, bpf_ct_release)
BTF_SET_END(nf_ct_tc_check_kfunc_ids)

BTF_SET_START(nf_ct_acquire_kfunc_ids)
BTF_ID(func, bpf_xdp_ct_lookup)
BTF_ID(func, bpf_skb_ct_lookup)
BTF_SET_END(nf_ct_acquire_kfunc_ids)

BTF_SET_START(nf_ct_release_kfunc_ids)
BTF_ID(func, bpf_ct_release)
BTF_SET_END(nf_ct_release_kfunc_ids)

/* Both sets are identical */
#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids

static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = {
	.owner        = THIS_MODULE,
	.check_set    = &nf_ct_xdp_check_kfunc_ids,
	.acquire_set  = &nf_ct_acquire_kfunc_ids,
	.release_set  = &nf_ct_release_kfunc_ids,
	.ret_null_set = &nf_ct_ret_null_kfunc_ids,
};

static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = {
	.owner        = THIS_MODULE,
	.check_set    = &nf_ct_tc_check_kfunc_ids,
	.acquire_set  = &nf_ct_acquire_kfunc_ids,
	.release_set  = &nf_ct_release_kfunc_ids,
	.ret_null_set = &nf_ct_ret_null_kfunc_ids,
};

int register_nf_conntrack_bpf(void)
{
	int ret;

	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set);
	return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set);
}
+8 −0
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@
#include <linux/rculist_nulls.h>

#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
@@ -2748,8 +2749,15 @@ int nf_conntrack_init_start(void)
	conntrack_gc_work_init(&conntrack_gc_work);
	queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);

	ret = register_nf_conntrack_bpf();
	if (ret < 0)
		goto err_kfunc;

	return 0;

err_kfunc:
	cancel_delayed_work_sync(&conntrack_gc_work.dwork);
	nf_conntrack_proto_fini();
err_proto:
	nf_conntrack_seqadj_fini();
err_seqadj: