Commit 2b3486bc authored by Stanislav Fomichev's avatar Stanislav Fomichev Committed by Martin KaFai Lau
Browse files

bpf: Introduce device-bound XDP programs



New flag BPF_F_XDP_DEV_BOUND_ONLY plus all the infra to have a way
to associate a netdev with a BPF program at load time.

netdevsim checks are dropped in favor of generic check in dev_xdp_attach.

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
Cc: Maryam Tahhan <mtahhan@redhat.com>
Cc: xdp-hints@xdp-project.net
Cc: netdev@vger.kernel.org
Signed-off-by: default avatarStanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20230119221536.3349901-6-sdf@google.com


Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parent 89bbc53a
Loading
Loading
Loading
Loading
+0 −4
Original line number Diff line number Diff line
@@ -315,10 +315,6 @@ nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
		NSIM_EA(bpf->extack, "xdpoffload of non-bound program");
		return -EINVAL;
	}
	if (!bpf_offload_dev_match(bpf->prog, ns->netdev)) {
		NSIM_EA(bpf->extack, "program bound to different dev");
		return -EINVAL;
	}

	state = bpf->prog->aux->offload->dev_priv;
	if (WARN_ON(strcmp(state->state, "xlated"))) {
+20 −4
Original line number Diff line number Diff line
@@ -1261,7 +1261,8 @@ struct bpf_prog_aux {
	enum bpf_prog_type saved_dst_prog_type;
	enum bpf_attach_type saved_dst_attach_type;
	bool verifier_zext; /* Zero extensions has been inserted by verifier. */
	bool offload_requested;
	bool dev_bound; /* Program is bound to the netdev. */
	bool offload_requested; /* Program is bound and offloaded to the netdev. */
	bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
	bool func_proto_unreliable;
	bool sleepable;
@@ -2451,7 +2452,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux,
bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool);

int bpf_prog_offload_compile(struct bpf_prog *prog);
void bpf_prog_offload_destroy(struct bpf_prog *prog);
void bpf_prog_dev_bound_destroy(struct bpf_prog *prog);
int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
			       struct bpf_prog *prog);

@@ -2479,7 +2480,13 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev);
void unpriv_ebpf_notify(int new_state);

#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr);
void bpf_dev_bound_netdev_unregister(struct net_device *dev);

static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux)
{
	return aux->dev_bound;
}

static inline bool bpf_prog_is_offloaded(const struct bpf_prog_aux *aux)
{
@@ -2507,12 +2514,21 @@ void sock_map_unhash(struct sock *sk);
void sock_map_destroy(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
#else
static inline int bpf_prog_offload_init(struct bpf_prog *prog,
static inline int bpf_prog_dev_bound_init(struct bpf_prog *prog,
					union bpf_attr *attr)
{
	return -EOPNOTSUPP;
}

static inline void bpf_dev_bound_netdev_unregister(struct net_device *dev)
{
}

static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux)
{
	return false;
}

static inline bool bpf_prog_is_offloaded(struct bpf_prog_aux *aux)
{
	return false;
+5 −0
Original line number Diff line number Diff line
@@ -1156,6 +1156,11 @@ enum bpf_link_type {
 */
#define BPF_F_XDP_HAS_FRAGS	(1U << 5)

/* If BPF_F_XDP_DEV_BOUND_ONLY is used in BPF_PROG_LOAD command, the loaded
 * program becomes device-bound but can access XDP metadata.
 */
#define BPF_F_XDP_DEV_BOUND_ONLY	(1U << 6)

/* link_create.kprobe_multi.flags used in LINK_CREATE command for
 * BPF_TRACE_KPROBE_MULTI attach type to create return probe.
 */
+2 −2
Original line number Diff line number Diff line
@@ -2553,8 +2553,8 @@ static void bpf_prog_free_deferred(struct work_struct *work)
#endif
	bpf_free_used_maps(aux);
	bpf_free_used_btfs(aux);
	if (bpf_prog_is_offloaded(aux))
		bpf_prog_offload_destroy(aux->prog);
	if (bpf_prog_is_dev_bound(aux))
		bpf_prog_dev_bound_destroy(aux->prog);
#ifdef CONFIG_PERF_EVENTS
	if (aux->prog->has_callchain_buf)
		put_callchain_buffers();
+71 −24
Original line number Diff line number Diff line
@@ -41,7 +41,7 @@ struct bpf_offload_dev {
struct bpf_offload_netdev {
	struct rhash_head l;
	struct net_device *netdev;
	struct bpf_offload_dev *offdev;
	struct bpf_offload_dev *offdev; /* NULL when bound-only */
	struct list_head progs;
	struct list_head maps;
	struct list_head offdev_netdevs;
@@ -89,19 +89,17 @@ static int __bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
	INIT_LIST_HEAD(&ondev->progs);
	INIT_LIST_HEAD(&ondev->maps);

	down_write(&bpf_devs_lock);
	err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params);
	if (err) {
		netdev_warn(netdev, "failed to register for BPF offload\n");
		goto err_unlock_free;
		goto err_free;
	}

	if (offdev)
		list_add(&ondev->offdev_netdevs, &offdev->netdevs);
	up_write(&bpf_devs_lock);
	return 0;

err_unlock_free:
	up_write(&bpf_devs_lock);
err_free:
	kfree(ondev);
	return err;
}
@@ -149,24 +147,26 @@ static void __bpf_map_offload_destroy(struct bpf_offloaded_map *offmap)
static void __bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
						struct net_device *netdev)
{
	struct bpf_offload_netdev *ondev, *altdev;
	struct bpf_offload_netdev *ondev, *altdev = NULL;
	struct bpf_offloaded_map *offmap, *mtmp;
	struct bpf_prog_offload *offload, *ptmp;

	ASSERT_RTNL();

	down_write(&bpf_devs_lock);
	ondev = rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
	if (WARN_ON(!ondev))
		goto unlock;
		return;

	WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params));
	list_del(&ondev->offdev_netdevs);

	/* Try to move the objects to another netdev of the device */
	if (offdev) {
		list_del(&ondev->offdev_netdevs);
		altdev = list_first_entry_or_null(&offdev->netdevs,
						  struct bpf_offload_netdev,
						  offdev_netdevs);
	}

	if (altdev) {
		list_for_each_entry(offload, &ondev->progs, offloads)
			offload->netdev = altdev->netdev;
@@ -185,11 +185,9 @@ static void __bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
	WARN_ON(!list_empty(&ondev->progs));
	WARN_ON(!list_empty(&ondev->maps));
	kfree(ondev);
unlock:
	up_write(&bpf_devs_lock);
}

int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr)
{
	struct bpf_offload_netdev *ondev;
	struct bpf_prog_offload *offload;
@@ -199,7 +197,11 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
	    attr->prog_type != BPF_PROG_TYPE_XDP)
		return -EINVAL;

	if (attr->prog_flags)
	if (attr->prog_flags & ~BPF_F_XDP_DEV_BOUND_ONLY)
		return -EINVAL;

	if (attr->prog_type == BPF_PROG_TYPE_SCHED_CLS &&
	    attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY)
		return -EINVAL;

	offload = kzalloc(sizeof(*offload), GFP_USER);
@@ -214,12 +216,24 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
	if (err)
		goto err_maybe_put;

	prog->aux->offload_requested = !(attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY);

	down_write(&bpf_devs_lock);
	ondev = bpf_offload_find_netdev(offload->netdev);
	if (!ondev) {
		if (bpf_prog_is_offloaded(prog->aux)) {
			err = -EINVAL;
			goto err_unlock;
		}

		/* When only binding to the device, explicitly
		 * create an entry in the hashtable.
		 */
		err = __bpf_offload_dev_netdev_register(NULL, offload->netdev);
		if (err)
			goto err_unlock;
		ondev = bpf_offload_find_netdev(offload->netdev);
	}
	offload->offdev = ondev->offdev;
	prog->aux->offload = offload;
	list_add_tail(&offload->offloads, &ondev->progs);
@@ -321,12 +335,25 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
	up_read(&bpf_devs_lock);
}

void bpf_prog_offload_destroy(struct bpf_prog *prog)
void bpf_prog_dev_bound_destroy(struct bpf_prog *prog)
{
	struct bpf_offload_netdev *ondev;
	struct net_device *netdev;

	rtnl_lock();
	down_write(&bpf_devs_lock);
	if (prog->aux->offload)
	if (prog->aux->offload) {
		list_del_init(&prog->aux->offload->offloads);

		netdev = prog->aux->offload->netdev;
		__bpf_prog_offload_destroy(prog);

		ondev = bpf_offload_find_netdev(netdev);
		if (!ondev->offdev && list_empty(&ondev->progs))
			__bpf_offload_dev_netdev_unregister(NULL, netdev);
	}
	up_write(&bpf_devs_lock);
	rtnl_unlock();
}

static int bpf_prog_offload_translate(struct bpf_prog *prog)
@@ -621,7 +648,7 @@ static bool __bpf_offload_dev_match(struct bpf_prog *prog,
	struct bpf_offload_netdev *ondev1, *ondev2;
	struct bpf_prog_offload *offload;

	if (!bpf_prog_is_offloaded(prog->aux))
	if (!bpf_prog_is_dev_bound(prog->aux))
		return false;

	offload = prog->aux->offload;
@@ -667,14 +694,21 @@ bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
				    struct net_device *netdev)
{
	return __bpf_offload_dev_netdev_register(offdev, netdev);
	int err;

	down_write(&bpf_devs_lock);
	err = __bpf_offload_dev_netdev_register(offdev, netdev);
	up_write(&bpf_devs_lock);
	return err;
}
EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register);

void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
				       struct net_device *netdev)
{
	down_write(&bpf_devs_lock);
	__bpf_offload_dev_netdev_unregister(offdev, netdev);
	up_write(&bpf_devs_lock);
}
EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);

@@ -708,6 +742,19 @@ void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev)
}
EXPORT_SYMBOL_GPL(bpf_offload_dev_priv);

void bpf_dev_bound_netdev_unregister(struct net_device *dev)
{
	struct bpf_offload_netdev *ondev;

	ASSERT_RTNL();

	down_write(&bpf_devs_lock);
	ondev = bpf_offload_find_netdev(dev);
	if (ondev && !ondev->offdev)
		__bpf_offload_dev_netdev_unregister(NULL, ondev->netdev);
	up_write(&bpf_devs_lock);
}

static int __init bpf_offload_init(void)
{
	return rhashtable_init(&offdevs, &offdevs_params);
Loading