Commit a080cdcc authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf: support input xdp_md context in BPF_PROG_TEST_RUN'

Zvi Effron says:

====================

This patchset adds support for passing an xdp_md via ctx_in/ctx_out in
bpf_attr for BPF_PROG_TEST_RUN of XDP programs.

Patch 1 adds a function to validate XDP meta data lengths.

Patch 2 adds initial support for passing XDP meta data in addition to
packet data.

Patch 3 adds support for also specifying the ingress interface and
rx queue.

Patch 4 adds selftests to ensure functionality is correct.

Changelog:
----------
v7->v8
v7: https://lore.kernel.org/bpf/20210624211304.90807-1-zeffron@riotgames.com/

 * Fix too long comment line in patch 3

v6->v7
v6: https://lore.kernel.org/bpf/20210617232904.1899-1-zeffron@riotgames.com/

 * Add Yonghong Song's Acked-by to commit message in patch 1
 * Add Yonghong Song's Acked-by to commit message in patch 2
 * Extracted the post-update of the xdp_md context into a function (again)
 * Validate that the rx queue was registered with XDP info
 * Decrement the reference count on a found netdevice on failure to find
  a valid rx queue
 * Decrement the reference count on a found netdevice after the XDP
  program is run
 * Drop Yonghong Song's Acked-By for patch 3 because of patch changes
 * Improve a comment in the selftests
 * Drop Yonghong Song's Acked-By for patch 4 because of patch changes

v5->v6
v5: https://lore.kernel.org/bpf/20210616224712.3243-1-zeffron@riotgames.com/

 * Correct commit messages in patches 1 and 3
 * Add Acked-by to commit message in patch 4
 * Use gotos instead of returns to correctly free resources in
  bpf_prog_test_run_xdp
 * Rename xdp_metalen_valid to xdp_metalen_invalid
 * Improve the function signature for xdp_metalen_invalid
 * Merged declaration of ingress_ifindex and rx_queue_index into one line

v4->v5
v4: https://lore.kernel.org/bpf/20210604220235.6758-1-zeffron@riotgames.com/

 * Add new patch to introduce xdp_metalen_valid inline function to avoid
  duplicated code from net/core/filter.c
 * Correct size of bad_ctx in selftests
 * Make all declarations reverse Christmas tree
 * Move data check from xdp_convert_md_to_buff to bpf_prog_test_run_xdp
 * Merge xdp_convert_buff_to_md into bpf_prog_test_run_xdp
 * Fix line too long
 * Extracted common checks in selftests to a helper function
 * Removed redundant assignment in selftests
 * Reordered test cases in selftests
 * Check data against 0 instead of data_meta in selftests
 * Made selftests use EINVAL instead of hardcoded 22
 * Dropped "_" from XDP function name
 * Changed casts in XDP program from unsigned long to long
 * Added a comment explaining the use of the loopback interface in selftests
 * Change parameter order in xdp_convert_md_to_buff to be input first
 * Assigned xdp->ingress_ifindex and xdp->rx_queue_index to local variables in
  xdp_convert_md_to_buff
 * Made use of "meta data" versus "metadata" consistent in comments and commit
  messages

v3->v4
v3: https://lore.kernel.org/bpf/20210602190815.8096-1-zeffron@riotgames.com/

 * Clean up nits
 * Validate xdp_md->data_end in bpf_prog_test_run_xdp
 * Remove intermediate metalen variables

v2 -> v3
v2: https://lore.kernel.org/bpf/20210527201341.7128-1-zeffron@riotgames.com/

 * Check errno first in selftests
 * Use DECLARE_LIBBPF_OPTS
 * Rename tattr to opts in selftests
 * Remove extra new line
 * Rename convert_xdpmd_to_xdpb to xdp_convert_md_to_buff
 * Rename convert_xdpb_to_xdpmd to xdp_convert_buff_to_md
 * Move declaration of device and rxqueue in xdp_convert_md_to_buff to
  patch 2
 * Reorder the kfree calls in bpf_prog_test_run_xdp

v1 -> v2
v1: https://lore.kernel.org/bpf/20210524220555.251473-1-zeffron@riotgames.com



 * Fix null pointer dereference with no context
 * Use the BPF skeleton and replace CHECK with ASSERT macros
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 5e437416 939b9c68
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -276,6 +276,11 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
	return unlikely(xdp->data_meta > xdp->data);
}

static inline bool xdp_metalen_invalid(unsigned long metalen)
{
	return (metalen & (sizeof(__u32) - 1)) || (metalen > 32);
}

struct xdp_attachment_info {
	struct bpf_prog *prog;
	u32 flags;
+0 −3
Original line number Diff line number Diff line
@@ -324,9 +324,6 @@ union bpf_iter_link_info {
 *		**BPF_PROG_TYPE_SK_LOOKUP**
 *			*data_in* and *data_out* must be NULL.
 *
 *		**BPF_PROG_TYPE_XDP**
 *			*ctx_in* and *ctx_out* must be NULL.
 *
 *		**BPF_PROG_TYPE_RAW_TRACEPOINT**,
 *		**BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
 *
+101 −8
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@
#include <linux/error-injection.h>
#include <linux/smp.h>
#include <linux/sock_diag.h>
#include <net/xdp.h>

#define CREATE_TRACE_POINTS
#include <trace/events/bpf_test_run.h>
@@ -687,6 +688,64 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
	return ret;
}

static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp)
{
	unsigned int ingress_ifindex, rx_queue_index;
	struct netdev_rx_queue *rxqueue;
	struct net_device *device;

	if (!xdp_md)
		return 0;

	if (xdp_md->egress_ifindex != 0)
		return -EINVAL;

	ingress_ifindex = xdp_md->ingress_ifindex;
	rx_queue_index = xdp_md->rx_queue_index;

	if (!ingress_ifindex && rx_queue_index)
		return -EINVAL;

	if (ingress_ifindex) {
		device = dev_get_by_index(current->nsproxy->net_ns,
					  ingress_ifindex);
		if (!device)
			return -ENODEV;

		if (rx_queue_index >= device->real_num_rx_queues)
			goto free_dev;

		rxqueue = __netif_get_rx_queue(device, rx_queue_index);

		if (!xdp_rxq_info_is_reg(&rxqueue->xdp_rxq))
			goto free_dev;

		xdp->rxq = &rxqueue->xdp_rxq;
		/* The device is now tracked in the xdp->rxq for later
		 * dev_put()
		 */
	}

	xdp->data = xdp->data_meta + xdp_md->data;
	return 0;

free_dev:
	dev_put(device);
	return -EINVAL;
}

static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md)
{
	if (!xdp_md)
		return;

	xdp_md->data = xdp->data - xdp->data_meta;
	xdp_md->data_end = xdp->data_end - xdp->data_meta;

	if (xdp_md->ingress_ifindex)
		dev_put(xdp->rxq->dev);
}

int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
			  union bpf_attr __user *uattr)
{
@@ -697,35 +756,69 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
	struct netdev_rx_queue *rxqueue;
	struct xdp_buff xdp = {};
	u32 retval, duration;
	struct xdp_md *ctx;
	u32 max_data_sz;
	void *data;
	int ret;
	int ret = -EINVAL;

	if (kattr->test.ctx_in || kattr->test.ctx_out)
		return -EINVAL;
	ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
	if (IS_ERR(ctx))
		return PTR_ERR(ctx);

	if (ctx) {
		/* There can't be user provided data before the meta data */
		if (ctx->data_meta || ctx->data_end != size ||
		    ctx->data > ctx->data_end ||
		    unlikely(xdp_metalen_invalid(ctx->data)))
			goto free_ctx;
		/* Meta data is allocated from the headroom */
		headroom -= ctx->data;
	}

	/* XDP have extra tailroom as (most) drivers use full page */
	max_data_sz = 4096 - headroom - tailroom;

	data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
	if (IS_ERR(data))
		return PTR_ERR(data);
	if (IS_ERR(data)) {
		ret = PTR_ERR(data);
		goto free_ctx;
	}

	rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
	xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
		      &rxqueue->xdp_rxq);
	xdp_prepare_buff(&xdp, data, headroom, size, true);

	ret = xdp_convert_md_to_buff(ctx, &xdp);
	if (ret)
		goto free_data;

	bpf_prog_change_xdp(NULL, prog);
	ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
	/* We convert the xdp_buff back to an xdp_md before checking the return
	 * code so the reference count of any held netdevice will be decremented
	 * even if the test run failed.
	 */
	xdp_convert_buff_to_md(&xdp, ctx);
	if (ret)
		goto out;
	if (xdp.data != data + headroom || xdp.data_end != xdp.data + size)
		size = xdp.data_end - xdp.data;
	ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);

	if (xdp.data_meta != data + headroom ||
	    xdp.data_end != xdp.data_meta + size)
		size = xdp.data_end - xdp.data_meta;

	ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval,
			      duration);
	if (!ret)
		ret = bpf_ctx_finish(kattr, uattr, ctx,
				     sizeof(struct xdp_md));

out:
	bpf_prog_change_xdp(prog, NULL);
free_data:
	kfree(data);
free_ctx:
	kfree(ctx);
	return ret;
}

+2 −2
Original line number Diff line number Diff line
@@ -77,6 +77,7 @@
#include <net/transp_v6.h>
#include <linux/btf_ids.h>
#include <net/tls.h>
#include <net/xdp.h>

static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@ -3880,8 +3881,7 @@ BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
	if (unlikely(meta < xdp_frame_end ||
		     meta > xdp->data))
		return -EINVAL;
	if (unlikely((metalen & (sizeof(__u32) - 1)) ||
		     (metalen > 32)))
	if (unlikely(xdp_metalen_invalid(metalen)))
		return -EACCES;

	xdp->data_meta = meta;
+105 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <network_helpers.h>
#include "test_xdp_context_test_run.skel.h"

void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
			    __u32 data_meta, __u32 data, __u32 data_end,
			    __u32 ingress_ifindex, __u32 rx_queue_index,
			    __u32 egress_ifindex)
{
	struct xdp_md ctx = {
		.data = data,
		.data_end = data_end,
		.data_meta = data_meta,
		.ingress_ifindex = ingress_ifindex,
		.rx_queue_index = rx_queue_index,
		.egress_ifindex = egress_ifindex,
	};
	int err;

	opts.ctx_in = &ctx;
	opts.ctx_size_in = sizeof(ctx);
	err = bpf_prog_test_run_opts(prog_fd, &opts);
	ASSERT_EQ(errno, EINVAL, "errno-EINVAL");
	ASSERT_ERR(err, "bpf_prog_test_run");
}

void test_xdp_context_test_run(void)
{
	struct test_xdp_context_test_run *skel = NULL;
	char data[sizeof(pkt_v4) + sizeof(__u32)];
	char bad_ctx[sizeof(struct xdp_md) + 1];
	struct xdp_md ctx_in, ctx_out;
	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
			    .data_in = &data,
			    .data_size_in = sizeof(data),
			    .ctx_out = &ctx_out,
			    .ctx_size_out = sizeof(ctx_out),
			    .repeat = 1,
		);
	int err, prog_fd;

	skel = test_xdp_context_test_run__open_and_load();
	if (!ASSERT_OK_PTR(skel, "skel"))
		return;
	prog_fd = bpf_program__fd(skel->progs.xdp_context);

	/* Data past the end of the kernel's struct xdp_md must be 0 */
	bad_ctx[sizeof(bad_ctx) - 1] = 1;
	opts.ctx_in = bad_ctx;
	opts.ctx_size_in = sizeof(bad_ctx);
	err = bpf_prog_test_run_opts(prog_fd, &opts);
	ASSERT_EQ(errno, E2BIG, "extradata-errno");
	ASSERT_ERR(err, "bpf_prog_test_run(extradata)");

	*(__u32 *)data = XDP_PASS;
	*(struct ipv4_packet *)(data + sizeof(__u32)) = pkt_v4;
	opts.ctx_in = &ctx_in;
	opts.ctx_size_in = sizeof(ctx_in);
	memset(&ctx_in, 0, sizeof(ctx_in));
	ctx_in.data_meta = 0;
	ctx_in.data = sizeof(__u32);
	ctx_in.data_end = ctx_in.data + sizeof(pkt_v4);
	err = bpf_prog_test_run_opts(prog_fd, &opts);
	ASSERT_OK(err, "bpf_prog_test_run(valid)");
	ASSERT_EQ(opts.retval, XDP_PASS, "valid-retval");
	ASSERT_EQ(opts.data_size_out, sizeof(pkt_v4), "valid-datasize");
	ASSERT_EQ(opts.ctx_size_out, opts.ctx_size_in, "valid-ctxsize");
	ASSERT_EQ(ctx_out.data_meta, 0, "valid-datameta");
	ASSERT_EQ(ctx_out.data, 0, "valid-data");
	ASSERT_EQ(ctx_out.data_end, sizeof(pkt_v4), "valid-dataend");

	/* Meta data's size must be a multiple of 4 */
	test_xdp_context_error(prog_fd, opts, 0, 1, sizeof(data), 0, 0, 0);

	/* data_meta must reference the start of data */
	test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data),
			       0, 0, 0);

	/* Meta data must be 32 bytes or smaller */
	test_xdp_context_error(prog_fd, opts, 0, 36, sizeof(data), 0, 0, 0);

	/* Total size of data must match data_end - data_meta */
	test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
			       sizeof(data) - 1, 0, 0, 0);
	test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
			       sizeof(data) + 1, 0, 0, 0);

	/* RX queue cannot be specified without specifying an ingress */
	test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
			       0, 1, 0);

	/* Interface 1 is always the loopback interface which always has only
	 * one RX queue (index 0). This makes index 1 an invalid rx queue index
	 * for interface 1.
	 */
	test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
			       1, 1, 0);

	/* The egress cannot be specified */
	test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
			       0, 0, 1);

	test_xdp_context_test_run__destroy(skel);
}
Loading