Commit ee75aef2 authored by Björn Töpel's avatar Björn Töpel Committed by Daniel Borkmann
Browse files

bpf, xdp: Restructure redirect actions



The XDP_REDIRECT implementations for maps and non-maps are fairly
similar, but obviously need to take different code paths depending on
if the target is using a map or not. Today, the redirect targets for
XDP either uses a map, or is based on ifindex.

Here, the map type and id are added to bpf_redirect_info, instead of
the actual map. Map type, map item/ifindex, and the map_id (if any) is
passed to xdp_do_redirect().

For ifindex-based redirect, used by the bpf_redirect() XDP BFP helper,
a special map type/id are used. Map type of UNSPEC together with map id
equal to INT_MAX has the special meaning of an ifindex based
redirect. Note that valid map ids are 1 inclusive, INT_MAX exclusive
([1,INT_MAX[).

In addition to making the code easier to follow, using explicit type
and id in bpf_redirect_info has a slight positive performance impact
by avoiding a pointer indirection for the map type lookup, and instead
use the cacheline for bpf_redirect_info.

Since the actual map is not passed via bpf_redirect_info anymore, the
map lookup is only done in the BPF helper. This means that the
bpf_clear_redirect_map() function can be removed. The actual map item
is RCU protected.

The bpf_redirect_info flags member is not used by XDP, and not
read/written any more. The map member is only written to when
required/used, and not unconditionally.

Signed-off-by: default avatarBjörn Töpel <bjorn.topel@intel.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Reviewed-by: default avatarMaciej Fijalkowski <maciej.fijalkowski@intel.com>
Acked-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Acked-by: default avatarToke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210308112907.559576-3-bjorn.topel@gmail.com
parent e6a4750f
Loading
Loading
Loading
Loading
+6 −4
Original line number Diff line number Diff line
@@ -646,7 +646,8 @@ struct bpf_redirect_info {
	u32 flags;
	u32 tgt_index;
	void *tgt_value;
	struct bpf_map *map;
	u32 map_id;
	enum bpf_map_type map_type;
	u32 kern_flags;
	struct bpf_nh_params nh;
};
@@ -1488,13 +1489,14 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
		 * performs multiple lookups, the last one always takes
		 * precedence.
		 */
		WRITE_ONCE(ri->map, NULL);
		ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */
		ri->map_type = BPF_MAP_TYPE_UNSPEC;
		return flags;
	}

	ri->flags = flags;
	ri->tgt_index = ifindex;
	WRITE_ONCE(ri->map, map);
	ri->map_id = map->id;
	ri->map_type = map->map_type;

	return XDP_REDIRECT;
}
+35 −27
Original line number Diff line number Diff line
@@ -86,19 +86,15 @@ struct _bpf_dtab_netdev {
};
#endif /* __DEVMAP_OBJ_TYPE */

#define devmap_ifindex(tgt, map)				\
	(((map->map_type == BPF_MAP_TYPE_DEVMAP ||	\
		  map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)) ? \
	  ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex : 0)

DECLARE_EVENT_CLASS(xdp_redirect_template,

	TP_PROTO(const struct net_device *dev,
		 const struct bpf_prog *xdp,
		 const void *tgt, int err,
		 const struct bpf_map *map, u32 index),
		 enum bpf_map_type map_type,
		 u32 map_id, u32 index),

	TP_ARGS(dev, xdp, tgt, err, map, index),
	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index),

	TP_STRUCT__entry(
		__field(int, prog_id)
@@ -111,14 +107,22 @@ DECLARE_EVENT_CLASS(xdp_redirect_template,
	),

	TP_fast_assign(
		u32 ifindex = 0, map_index = index;

		if (map_type == BPF_MAP_TYPE_DEVMAP || map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
			ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
		} else if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
			ifindex = index;
			map_index = 0;
		}

		__entry->prog_id	= xdp->aux->id;
		__entry->act		= XDP_REDIRECT;
		__entry->ifindex	= dev->ifindex;
		__entry->err		= err;
		__entry->to_ifindex	= map ? devmap_ifindex(tgt, map) :
						index;
		__entry->map_id		= map ? map->id : 0;
		__entry->map_index	= map ? index : 0;
		__entry->to_ifindex	= ifindex;
		__entry->map_id		= map_id;
		__entry->map_index	= map_index;
	),

	TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
@@ -133,45 +137,49 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
	TP_PROTO(const struct net_device *dev,
		 const struct bpf_prog *xdp,
		 const void *tgt, int err,
		 const struct bpf_map *map, u32 index),
	TP_ARGS(dev, xdp, tgt, err, map, index)
		 enum bpf_map_type map_type,
		 u32 map_id, u32 index),
	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);

DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
	TP_PROTO(const struct net_device *dev,
		 const struct bpf_prog *xdp,
		 const void *tgt, int err,
		 const struct bpf_map *map, u32 index),
	TP_ARGS(dev, xdp, tgt, err, map, index)
		 enum bpf_map_type map_type,
		 u32 map_id, u32 index),
	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);

#define _trace_xdp_redirect(dev, xdp, to)						\
	 trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to)
	 trace_xdp_redirect(dev, xdp, NULL, 0, BPF_MAP_TYPE_UNSPEC, INT_MAX, to)

#define _trace_xdp_redirect_err(dev, xdp, to, err)					\
	 trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to)
	 trace_xdp_redirect_err(dev, xdp, NULL, err, BPF_MAP_TYPE_UNSPEC, INT_MAX, to)

#define _trace_xdp_redirect_map(dev, xdp, to, map, index)		\
	 trace_xdp_redirect(dev, xdp, to, 0, map, index)
#define _trace_xdp_redirect_map(dev, xdp, to, map_type, map_id, index) \
	 trace_xdp_redirect(dev, xdp, to, 0, map_type, map_id, index)

#define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err)	\
	 trace_xdp_redirect_err(dev, xdp, to, err, map, index)
#define _trace_xdp_redirect_map_err(dev, xdp, to, map_type, map_id, index, err) \
	 trace_xdp_redirect_err(dev, xdp, to, err, map_type, map_id, index)

/* not used anymore, but kept around so as not to break old programs */
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map,
	TP_PROTO(const struct net_device *dev,
		 const struct bpf_prog *xdp,
		 const void *tgt, int err,
		 const struct bpf_map *map, u32 index),
	TP_ARGS(dev, xdp, tgt, err, map, index)
		 enum bpf_map_type map_type,
		 u32 map_id, u32 index),
	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);

DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
	TP_PROTO(const struct net_device *dev,
		 const struct bpf_prog *xdp,
		 const void *tgt, int err,
		 const struct bpf_map *map, u32 index),
	TP_ARGS(dev, xdp, tgt, err, map, index)
		 enum bpf_map_type map_type,
		 u32 map_id, u32 index),
	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);

TRACE_EVENT(xdp_cpumap_kthread,
+0 −1
Original line number Diff line number Diff line
@@ -543,7 +543,6 @@ static void cpu_map_free(struct bpf_map *map)
	 * complete.
	 */

	bpf_clear_redirect_map(map);
	synchronize_rcu();

	/* For cpu_map the remote CPUs can still be using the entries
+0 −1
Original line number Diff line number Diff line
@@ -197,7 +197,6 @@ static void dev_map_free(struct bpf_map *map)
	list_del_rcu(&dtab->list);
	spin_unlock(&dev_map_lock);

	bpf_clear_redirect_map(map);
	synchronize_rcu();

	/* Make sure prior __dev_map_entry_free() have completed. */
+75 −95
Original line number Diff line number Diff line
@@ -3918,23 +3918,6 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
	.arg2_type	= ARG_ANYTHING,
};

static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
			    struct bpf_map *map, struct xdp_buff *xdp)
{
	switch (map->map_type) {
	case BPF_MAP_TYPE_DEVMAP:
	case BPF_MAP_TYPE_DEVMAP_HASH:
		return dev_map_enqueue(fwd, xdp, dev_rx);
	case BPF_MAP_TYPE_CPUMAP:
		return cpu_map_enqueue(fwd, xdp, dev_rx);
	case BPF_MAP_TYPE_XSKMAP:
		return __xsk_map_redirect(fwd, xdp);
	default:
		return -EBADRQC;
	}
	return 0;
}

void xdp_do_flush(void)
{
	__dev_flush();
@@ -3943,55 +3926,52 @@ void xdp_do_flush(void)
}
EXPORT_SYMBOL_GPL(xdp_do_flush);

void bpf_clear_redirect_map(struct bpf_map *map)
{
	struct bpf_redirect_info *ri;
	int cpu;

	for_each_possible_cpu(cpu) {
		ri = per_cpu_ptr(&bpf_redirect_info, cpu);
		/* Avoid polluting remote cacheline due to writes if
		 * not needed. Once we pass this test, we need the
		 * cmpxchg() to make sure it hasn't been changed in
		 * the meantime by remote CPU.
		 */
		if (unlikely(READ_ONCE(ri->map) == map))
			cmpxchg(&ri->map, map, NULL);
	}
}

int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
		    struct bpf_prog *xdp_prog)
{
	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
	struct bpf_map *map = READ_ONCE(ri->map);
	u32 index = ri->tgt_index;
	enum bpf_map_type map_type = ri->map_type;
	void *fwd = ri->tgt_value;
	u32 map_id = ri->map_id;
	int err;

	ri->tgt_index = 0;
	ri->tgt_value = NULL;
	WRITE_ONCE(ri->map, NULL);
	ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
	ri->map_type = BPF_MAP_TYPE_UNSPEC;

	if (unlikely(!map)) {
		fwd = dev_get_by_index_rcu(dev_net(dev), index);
	switch (map_type) {
	case BPF_MAP_TYPE_DEVMAP:
		fallthrough;
	case BPF_MAP_TYPE_DEVMAP_HASH:
		err = dev_map_enqueue(fwd, xdp, dev);
		break;
	case BPF_MAP_TYPE_CPUMAP:
		err = cpu_map_enqueue(fwd, xdp, dev);
		break;
	case BPF_MAP_TYPE_XSKMAP:
		err = __xsk_map_redirect(fwd, xdp);
		break;
	case BPF_MAP_TYPE_UNSPEC:
		if (map_id == INT_MAX) {
			fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
			if (unlikely(!fwd)) {
				err = -EINVAL;
			goto err;
				break;
			}

			err = dev_xdp_enqueue(fwd, xdp, dev);
	} else {
		err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
			break;
		}
		fallthrough;
	default:
		err = -EBADRQC;
	}

	if (unlikely(err))
		goto err;

	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
	return 0;
err:
	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
	return err;
}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
@@ -4000,41 +3980,36 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
				       struct sk_buff *skb,
				       struct xdp_buff *xdp,
				       struct bpf_prog *xdp_prog,
				       struct bpf_map *map)
				       void *fwd,
				       enum bpf_map_type map_type, u32 map_id)
{
	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
	u32 index = ri->tgt_index;
	void *fwd = ri->tgt_value;
	int err = 0;

	ri->tgt_index = 0;
	ri->tgt_value = NULL;
	WRITE_ONCE(ri->map, NULL);

	if (map->map_type == BPF_MAP_TYPE_DEVMAP ||
	    map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
		struct bpf_dtab_netdev *dst = fwd;
	int err;

		err = dev_map_generic_redirect(dst, skb, xdp_prog);
	switch (map_type) {
	case BPF_MAP_TYPE_DEVMAP:
		fallthrough;
	case BPF_MAP_TYPE_DEVMAP_HASH:
		err = dev_map_generic_redirect(fwd, skb, xdp_prog);
		if (unlikely(err))
			goto err;
	} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
		struct xdp_sock *xs = fwd;

		err = xsk_generic_rcv(xs, xdp);
		break;
	case BPF_MAP_TYPE_XSKMAP:
		err = xsk_generic_rcv(fwd, xdp);
		if (err)
			goto err;
		consume_skb(skb);
	} else {
		break;
	default:
		/* TODO: Handle BPF_MAP_TYPE_CPUMAP */
		err = -EBADRQC;
		goto err;
	}

	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
	return 0;
err:
	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
	return err;
}

@@ -4042,16 +4017,16 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
			    struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
{
	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
	struct bpf_map *map = READ_ONCE(ri->map);
	u32 index = ri->tgt_index;
	struct net_device *fwd;
	int err = 0;

	if (map)
		return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
						   map);
	ri->tgt_index = 0;
	fwd = dev_get_by_index_rcu(dev_net(dev), index);
	enum bpf_map_type map_type = ri->map_type;
	void *fwd = ri->tgt_value;
	u32 map_id = ri->map_id;
	int err;

	ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
	ri->map_type = BPF_MAP_TYPE_UNSPEC;

	if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
		fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
		if (unlikely(!fwd)) {
			err = -EINVAL;
			goto err;
@@ -4062,11 +4037,14 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
			goto err;

		skb->dev = fwd;
	_trace_xdp_redirect(dev, xdp_prog, index);
		_trace_xdp_redirect(dev, xdp_prog, ri->tgt_index);
		generic_xdp_tx(skb, xdp_prog);
		return 0;
	}

	return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
err:
	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
	_trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
	return err;
}

@@ -4077,10 +4055,12 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
	if (unlikely(flags))
		return XDP_ABORTED;

	ri->flags = flags;
	/* NB! Map type UNSPEC and map_id == INT_MAX (never generated
	 * by map_idr) is used for ifindex based XDP redirect.
	 */
	ri->tgt_index = ifindex;
	ri->tgt_value = NULL;
	WRITE_ONCE(ri->map, NULL);
	ri->map_id = INT_MAX;
	ri->map_type = BPF_MAP_TYPE_UNSPEC;

	return XDP_REDIRECT;
}
Loading