Commit 2976706f authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'AF_XDP selftests improvements & bpf_link'

Maciej Fijalkowski says:

====================

Changes since v4 (all in patch 6):
- do not close potentially invalid bpf_link fd (Toke)
- fix misspelling in label (Toke)
- mask out XDP_FLAGS_UPDATE_IF_NOEXIST and XDP_FLAGS_REPLACE explicitly when
  creating bpf_link (Toke)

Changes since v3:
- do not unload netlink-based XDP prog when updating map elem failed and
  current socket was not the creator of XDP resources (Toke)
- pull out code paths based on prog_id value within __xsk_setup_xdp_prog
  so that teardown in case of error at any point is more clear

Changes since v2:
- fix c&p failure in veth's get_channels implementation (Magnus)
- provide a backward compatibilty if bpf_link is not supported (Andrii)
- check for a link type while looking up existing bpf_links (Andrii)

Changes since v1:
- selftests improvements and test case for bpf_link persistence itself
- do not unload netlink-based prog when --force flag is set (John)
- simplify return semantics in xsk_link_lookup (John)

v4: https://lore.kernel.org/bpf/20210326230938.49998-1-maciej.fijalkowski@intel.com/
v3: https://lore.kernel.org/bpf/20210322205816.65159-1-maciej.fijalkowski@intel.com/
v2: https://lore.kernel.org/bpf/20210311152910.56760-1-maciej.fijalkowski@intel.com/
v1: https://lore.kernel.org/bpf/20210215154638.4627-1-maciej.fijalkowski@intel.com/

--------------------------------------------------

This set is another approach towards addressing the below issue:

// load xdp prog and xskmap and add entry to xskmap at idx 10
$ sudo ./xdpsock -i ens801f0 -t -q 10

// add entry to xskmap at idx 11
$ sudo ./xdpsock -i ens801f0 -t -q 11

terminate one of the processes and another one is unable to work due to
the fact that the XDP prog was unloaded from interface.

Previous attempt was, to put it mildly, a bit broken, as there was no
synchronization between updates to additional map, as Bjorn pointed out.
See https://lore.kernel.org/netdev/20190603131907.13395-5-maciej.fijalkowski@intel.com/

In the meantime bpf_link was introduced and it seems that it can address
the issue of refcounting the XDP prog on interface.

Although the bpf_link is the meat of the set, selftests improvements are a
bigger part of it. Overall, we've been able to reduce the complexity of xsk
selftests by removing a bunch of synchronization resources and
simplifying logic and structs.

Last but not least, for multiqueue veth working with AF-XDP, ethtool's
get_channels API needs to be implemented, so it's also included in that
set.

Note also that in order to make it work, a commit from bpf tree:
veth: store queue_mapping independently of XDP prog presence
https://lore.kernel.org/bpf/20210303152903.11172-1-maciej.fijalkowski@intel.com/



is needed.

Thanks,
Maciej

Björn Töpel (3):
  selftests: xsk: remove thread attribute
  selftests: xsk: Remove mutex and condition variable
  selftests: xsk: Remove unused defines
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 05d81703 ae6b6a17
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -218,6 +218,17 @@ static void veth_get_ethtool_stats(struct net_device *dev,
	}
}

static void veth_get_channels(struct net_device *dev,
			      struct ethtool_channels *channels)
{
	channels->tx_count = dev->real_num_tx_queues;
	channels->rx_count = dev->real_num_rx_queues;
	channels->max_tx = dev->real_num_tx_queues;
	channels->max_rx = dev->real_num_rx_queues;
	channels->combined_count = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
	channels->max_combined = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
}

static const struct ethtool_ops veth_ethtool_ops = {
	.get_drvinfo		= veth_get_drvinfo,
	.get_link		= ethtool_op_get_link,
@@ -226,6 +237,7 @@ static const struct ethtool_ops veth_ethtool_ops = {
	.get_ethtool_stats	= veth_get_ethtool_stats,
	.get_link_ksettings	= veth_get_link_ksettings,
	.get_ts_info		= ethtool_op_get_ts_info,
	.get_channels		= veth_get_channels,
};

/* general routines */
+14 −41
Original line number Diff line number Diff line
@@ -96,7 +96,6 @@ static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
static int opt_timeout = 1000;
static bool opt_need_wakeup = true;
static u32 opt_num_xsks = 1;
static u32 prog_id;
static bool opt_busy_poll;
static bool opt_reduced_cap;

@@ -462,59 +461,37 @@ static void *poller(void *arg)
	return NULL;
}

static void remove_xdp_program(void)
static void int_exit(int sig)
{
	u32 curr_prog_id = 0;
	int cmd = CLOSE_CONN;

	if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
		printf("bpf_get_link_xdp_id failed\n");
		exit(EXIT_FAILURE);
	benchmark_done = true;
}
	if (prog_id == curr_prog_id)
		bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
	else if (!curr_prog_id)
		printf("couldn't find a prog id on a given interface\n");
	else
		printf("program on interface changed, not removing\n");

	if (opt_reduced_cap) {
		if (write(sock, &cmd, sizeof(int)) < 0) {
			fprintf(stderr, "Error writing into stream socket: %s", strerror(errno));
static void __exit_with_error(int error, const char *file, const char *func,
			      int line)
{
	fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
		line, error, strerror(error));
	exit(EXIT_FAILURE);
}
	}
}

static void int_exit(int sig)
{
	benchmark_done = true;
}
#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)

static void xdpsock_cleanup(void)
{
	struct xsk_umem *umem = xsks[0]->umem->umem;
	int i;
	int i, cmd = CLOSE_CONN;

	dump_stats();
	for (i = 0; i < num_socks; i++)
		xsk_socket__delete(xsks[i]->xsk);
	(void)xsk_umem__delete(umem);
	remove_xdp_program();
}

static void __exit_with_error(int error, const char *file, const char *func,
			      int line)
{
	fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
		line, error, strerror(error));
	dump_stats();
	remove_xdp_program();
	exit(EXIT_FAILURE);
	if (opt_reduced_cap) {
		if (write(sock, &cmd, sizeof(int)) < 0)
			exit_with_error(errno);
	}
}

#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \
						 __LINE__)
static void swap_mac_addresses(void *data)
{
	struct ether_header *eth = (struct ether_header *)data;
@@ -880,10 +857,6 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
	if (ret)
		exit_with_error(-ret);

	ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
	if (ret)
		exit_with_error(-ret);

	xsk->app_stats.rx_empty_polls = 0;
	xsk->app_stats.fill_fail_polls = 0;
	xsk->app_stats.copy_tx_sendtos = 0;
+213 −45
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <linux/if_link.h>

#include "bpf.h"
#include "libbpf.h"
@@ -70,8 +71,10 @@ struct xsk_ctx {
	int ifindex;
	struct list_head list;
	int prog_fd;
	int link_fd;
	int xsks_map_fd;
	char ifname[IFNAMSIZ];
	bool has_bpf_link;
};

struct xsk_socket {
@@ -409,7 +412,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
	static const int log_buf_size = 16 * 1024;
	struct xsk_ctx *ctx = xsk->ctx;
	char log_buf[log_buf_size];
	int err, prog_fd;
	int prog_fd;

	/* This is the fallback C-program:
	 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
@@ -499,14 +502,41 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
		return prog_fd;
	}

	err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd,
				  xsk->config.xdp_flags);
	ctx->prog_fd = prog_fd;
	return 0;
}

static int xsk_create_bpf_link(struct xsk_socket *xsk)
{
	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
	struct xsk_ctx *ctx = xsk->ctx;
	__u32 prog_id = 0;
	int link_fd;
	int err;

	err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
	if (err) {
		close(prog_fd);
		pr_warn("getting XDP prog id failed\n");
		return err;
	}

	ctx->prog_fd = prog_fd;
	/* if there's a netlink-based XDP prog loaded on interface, bail out
	 * and ask user to do the removal by himself
	 */
	if (prog_id) {
		pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n");
		return -EINVAL;
	}

	opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE);

	link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts);
	if (link_fd < 0) {
		pr_warn("bpf_link_create failed: %s\n", strerror(errno));
		return link_fd;
	}

	ctx->link_fd = link_fd;
	return 0;
}

@@ -625,7 +655,6 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
		close(fd);
	}

	err = 0;
	if (ctx->xsks_map_fd == -1)
		err = -ENOENT;

@@ -642,6 +671,98 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk)
				   &xsk->fd, 0);
}

static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd)
{
	struct bpf_link_info link_info;
	__u32 link_len;
	__u32 id = 0;
	int err;
	int fd;

	while (true) {
		err = bpf_link_get_next_id(id, &id);
		if (err) {
			if (errno == ENOENT) {
				err = 0;
				break;
			}
			pr_warn("can't get next link: %s\n", strerror(errno));
			break;
		}

		fd = bpf_link_get_fd_by_id(id);
		if (fd < 0) {
			if (errno == ENOENT)
				continue;
			pr_warn("can't get link by id (%u): %s\n", id, strerror(errno));
			err = -errno;
			break;
		}

		link_len = sizeof(struct bpf_link_info);
		memset(&link_info, 0, link_len);
		err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len);
		if (err) {
			pr_warn("can't get link info: %s\n", strerror(errno));
			close(fd);
			break;
		}
		if (link_info.type == BPF_LINK_TYPE_XDP) {
			if (link_info.xdp.ifindex == ifindex) {
				*link_fd = fd;
				if (prog_id)
					*prog_id = link_info.prog_id;
				break;
			}
		}
		close(fd);
	}

	return err;
}

static bool xsk_probe_bpf_link(void)
{
	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
			    .flags = XDP_FLAGS_SKB_MODE);
	struct bpf_load_program_attr prog_attr;
	struct bpf_insn insns[2] = {
		BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
		BPF_EXIT_INSN()
	};
	int prog_fd, link_fd = -1;
	int ifindex_lo = 1;
	bool ret = false;
	int err;

	err = xsk_link_lookup(ifindex_lo, NULL, &link_fd);
	if (err)
		return ret;

	if (link_fd >= 0)
		return true;

	memset(&prog_attr, 0, sizeof(prog_attr));
	prog_attr.prog_type = BPF_PROG_TYPE_XDP;
	prog_attr.insns = insns;
	prog_attr.insns_cnt = ARRAY_SIZE(insns);
	prog_attr.license = "GPL";

	prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0);
	if (prog_fd < 0)
		return ret;

	link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts);
	close(prog_fd);

	if (link_fd >= 0) {
		ret = true;
		close(link_fd);
	}

	return ret;
}

static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
{
	char ifname[IFNAMSIZ];
@@ -663,64 +784,108 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
	ctx->ifname[IFNAMSIZ - 1] = 0;

	xsk->ctx = ctx;
	xsk->ctx->has_bpf_link = xsk_probe_bpf_link();

	return 0;
}

static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp,
static int xsk_init_xdp_res(struct xsk_socket *xsk,
			    int *xsks_map_fd)
{
	struct xsk_socket *xsk = _xdp;
	struct xsk_ctx *ctx = xsk->ctx;
	__u32 prog_id = 0;
	int err;

	err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id,
	err = xsk_create_bpf_maps(xsk);
	if (err)
		return err;

	err = xsk_load_xdp_prog(xsk);
	if (err)
		goto err_load_xdp_prog;

	if (ctx->has_bpf_link)
		err = xsk_create_bpf_link(xsk);
	else
		err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, ctx->prog_fd,
					  xsk->config.xdp_flags);

	if (err)
		goto err_attach_xdp_prog;

	if (!xsk->rx)
		return err;

	if (!prog_id) {
		err = xsk_create_bpf_maps(xsk);
	err = xsk_set_bpf_maps(xsk);
	if (err)
		goto err_set_bpf_maps;

	return err;

		err = xsk_load_xdp_prog(xsk);
		if (err) {
			goto err_load_xdp_prog;
		}
	} else {
		ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
		if (ctx->prog_fd < 0)
			return -errno;
		err = xsk_lookup_bpf_maps(xsk);
		if (err) {
err_set_bpf_maps:
	if (ctx->has_bpf_link)
		close(ctx->link_fd);
	else
		bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
err_attach_xdp_prog:
	close(ctx->prog_fd);
err_load_xdp_prog:
	xsk_delete_bpf_maps(xsk);
	return err;
}

static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id)
{
	struct xsk_ctx *ctx = xsk->ctx;
	int err;

	ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
	if (ctx->prog_fd < 0) {
		err = -errno;
		goto err_prog_fd;
	}
	err = xsk_lookup_bpf_maps(xsk);
	if (err)
		goto err_lookup_maps;

	if (!xsk->rx)
		return err;

	if (xsk->rx) {
	err = xsk_set_bpf_maps(xsk);
		if (err) {
			if (!prog_id) {
				goto err_set_bpf_maps;
			} else {
	if (err)
		goto err_set_maps;

	return err;

err_set_maps:
	close(ctx->xsks_map_fd);
err_lookup_maps:
	close(ctx->prog_fd);
err_prog_fd:
	if (ctx->has_bpf_link)
		close(ctx->link_fd);
	return err;
}
		}
	}
	if (xsks_map_fd)
		*xsks_map_fd = ctx->xsks_map_fd;

	return 0;
static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd)
{
	struct xsk_socket *xsk = _xdp;
	struct xsk_ctx *ctx = xsk->ctx;
	__u32 prog_id = 0;
	int err;

err_set_bpf_maps:
	close(ctx->prog_fd);
	bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
err_load_xdp_prog:
	xsk_delete_bpf_maps(xsk);
	if (ctx->has_bpf_link)
		err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd);
	else
		err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);

	if (err)
		return err;

	err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) :
			 xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id);

	if (!err && xsks_map_fd)
		*xsks_map_fd = ctx->xsks_map_fd;

	return err;
}
@@ -898,6 +1063,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
		}
	}
	xsk->ctx = ctx;
	xsk->ctx->has_bpf_link = xsk_probe_bpf_link();

	if (rx) {
		err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
@@ -1054,6 +1220,8 @@ void xsk_socket__delete(struct xsk_socket *xsk)
	if (ctx->prog_fd != -1) {
		xsk_delete_bpf_maps(xsk);
		close(ctx->prog_fd);
		if (ctx->has_bpf_link)
			close(ctx->link_fd);
	}

	err = xsk_get_mmap_offsets(xsk->fd, &off);
+2 −1
Original line number Diff line number Diff line
@@ -107,7 +107,7 @@ setup_vethPairs() {
	        echo "setting up ${VETH0}: namespace: ${NS0}"
	fi
	ip netns add ${NS1}
	ip link add ${VETH0} type veth peer name ${VETH1}
	ip link add ${VETH0} numtxqueues 4 numrxqueues 4 type veth peer name ${VETH1} numtxqueues 4 numrxqueues 4
	if [ -f /proc/net/if_inet6 ]; then
		echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6
	fi
@@ -118,6 +118,7 @@ setup_vethPairs() {
	ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU}
	ip link set ${VETH0} mtu ${MTU}
	ip netns exec ${NS1} ip link set ${VETH1} up
	ip netns exec ${NS1} ip link set dev lo up
	ip link set ${VETH0} up
}

+317 −383

File changed.

Preview size limit exceeded, changes collapsed.

Loading