Commit b5964b96 authored by Joanne Koong's avatar Joanne Koong Committed by Alexei Starovoitov
Browse files

bpf: Add skb dynptrs



Add skb dynptrs, which are dynptrs whose underlying pointer points
to a skb. The dynptr acts on skb data. skb dynptrs have two main
benefits. One is that they allow operations on sizes that are not
statically known at compile-time (eg variable-sized accesses).
Another is that parsing the packet data through dynptrs (instead of
through direct access of skb->data and skb->data_end) can be more
ergonomic and less brittle (eg does not need manual if checking for
being within bounds of data_end).

For bpf prog types that don't support writes on skb data, the dynptr is
read-only (bpf_dynptr_write() will return an error)

For reads and writes through the bpf_dynptr_read() and bpf_dynptr_write()
interfaces, reading and writing from/to data in the head as well as from/to
non-linear paged buffers is supported. Data slices through the
bpf_dynptr_data API are not supported; instead bpf_dynptr_slice() and
bpf_dynptr_slice_rdwr() (added in subsequent commit) should be used.

For examples of how skb dynptrs can be used, please see the attached
selftests.

Signed-off-by: default avatarJoanne Koong <joannelkoong@gmail.com>
Link: https://lore.kernel.org/r/20230301154953.641654-8-joannelkoong@gmail.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent d96d937d
Loading
Loading
Loading
Loading
+13 −1
Original line number Diff line number Diff line
@@ -607,11 +607,14 @@ enum bpf_type_flag {
	 */
	NON_OWN_REF		= BIT(14 + BPF_BASE_TYPE_BITS),

	/* DYNPTR points to sk_buff */
	DYNPTR_TYPE_SKB		= BIT(15 + BPF_BASE_TYPE_BITS),

	__BPF_TYPE_FLAG_MAX,
	__BPF_TYPE_LAST_FLAG	= __BPF_TYPE_FLAG_MAX - 1,
};

#define DYNPTR_TYPE_FLAG_MASK	(DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF)
#define DYNPTR_TYPE_FLAG_MASK	(DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB)

/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT	(1UL << BPF_BASE_TYPE_BITS)
@@ -1146,6 +1149,8 @@ enum bpf_dynptr_type {
	BPF_DYNPTR_TYPE_LOCAL,
	/* Underlying data is a ringbuf record */
	BPF_DYNPTR_TYPE_RINGBUF,
	/* Underlying data is a sk_buff */
	BPF_DYNPTR_TYPE_SKB,
};

int bpf_dynptr_check_size(u32 size);
@@ -2846,6 +2851,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
				struct bpf_insn *insn_buf,
				struct bpf_prog *prog,
				u32 *target_size);
int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
			       struct bpf_dynptr_kern *ptr);
#else
static inline bool bpf_sock_common_is_valid_access(int off, int size,
						   enum bpf_access_type type,
@@ -2867,6 +2874,11 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
{
	return 0;
}
static inline int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
					     struct bpf_dynptr_kern *ptr)
{
	return -EOPNOTSUPP;
}
#endif

#ifdef CONFIG_INET
+18 −0
Original line number Diff line number Diff line
@@ -1542,4 +1542,22 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index
	return XDP_REDIRECT;
}

#ifdef CONFIG_NET
int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len);
int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
			  u32 len, u64 flags);
#else /* CONFIG_NET */
static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
				       void *to, u32 len)
{
	return -EOPNOTSUPP;
}

static inline int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset,
					const void *from, u32 len, u64 flags)
{
	return -EOPNOTSUPP;
}
#endif /* CONFIG_NET */

#endif /* __LINUX_FILTER_H__ */
+11 −2
Original line number Diff line number Diff line
@@ -5325,11 +5325,17 @@ union bpf_attr {
 *	Description
 *		Write *len* bytes from *src* into *dst*, starting from *offset*
 *		into *dst*.
 *		*flags* is currently unused.
 *
 *		*flags* must be 0 except for skb-type dynptrs.
 *
 *		For skb-type dynptrs:
 *		    *  For *flags*, please see the flags accepted by
 *		       **bpf_skb_store_bytes**\ ().
 *	Return
 *		0 on success, -E2BIG if *offset* + *len* exceeds the length
 *		of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
 *		is a read-only dynptr or if *flags* is not 0.
 *		is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
 *		other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
 *
 * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
 *	Description
@@ -5337,6 +5343,9 @@ union bpf_attr {
 *
 *		*len* must be a statically known value. The returned data slice
 *		is invalidated whenever the dynptr is invalidated.
 *
 *		skb type dynptrs may not use bpf_dynptr_data. They should
 *		instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr.
 *	Return
 *		Pointer to the underlying dynptr data, NULL if the dynptr is
 *		read-only, if the dynptr is invalid, or if the offset and length
+18 −0
Original line number Diff line number Diff line
@@ -207,6 +207,11 @@ enum btf_kfunc_hook {
	BTF_KFUNC_HOOK_TRACING,
	BTF_KFUNC_HOOK_SYSCALL,
	BTF_KFUNC_HOOK_FMODRET,
	BTF_KFUNC_HOOK_CGROUP_SKB,
	BTF_KFUNC_HOOK_SCHED_ACT,
	BTF_KFUNC_HOOK_SK_SKB,
	BTF_KFUNC_HOOK_SOCKET_FILTER,
	BTF_KFUNC_HOOK_LWT,
	BTF_KFUNC_HOOK_MAX,
};

@@ -7708,6 +7713,19 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
		return BTF_KFUNC_HOOK_TRACING;
	case BPF_PROG_TYPE_SYSCALL:
		return BTF_KFUNC_HOOK_SYSCALL;
	case BPF_PROG_TYPE_CGROUP_SKB:
		return BTF_KFUNC_HOOK_CGROUP_SKB;
	case BPF_PROG_TYPE_SCHED_ACT:
		return BTF_KFUNC_HOOK_SCHED_ACT;
	case BPF_PROG_TYPE_SK_SKB:
		return BTF_KFUNC_HOOK_SK_SKB;
	case BPF_PROG_TYPE_SOCKET_FILTER:
		return BTF_KFUNC_HOOK_SOCKET_FILTER;
	case BPF_PROG_TYPE_LWT_OUT:
	case BPF_PROG_TYPE_LWT_IN:
	case BPF_PROG_TYPE_LWT_XMIT:
	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
		return BTF_KFUNC_HOOK_LWT;
	default:
		return BTF_KFUNC_HOOK_MAX;
	}
+62 −14
Original line number Diff line number Diff line
@@ -1420,11 +1420,21 @@ static bool bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
	return ptr->size & DYNPTR_RDONLY_BIT;
}

void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
{
	ptr->size |= DYNPTR_RDONLY_BIT;
}

static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type)
{
	ptr->size |= type << DYNPTR_TYPE_SHIFT;
}

static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr)
{
	return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
}

u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
{
	return ptr->size & DYNPTR_SIZE_MASK;
@@ -1497,6 +1507,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
	   u32, offset, u64, flags)
{
	enum bpf_dynptr_type type;
	int err;

	if (!src->data || flags)
@@ -1506,13 +1517,23 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern
	if (err)
		return err;

	type = bpf_dynptr_get_type(src);

	switch (type) {
	case BPF_DYNPTR_TYPE_LOCAL:
	case BPF_DYNPTR_TYPE_RINGBUF:
		/* Source and destination may possibly overlap, hence use memmove to
		 * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
		 * pointing to overlapping PTR_TO_MAP_VALUE regions.
		 */
		memmove(dst, src->data + src->offset + offset, len);

		return 0;
	case BPF_DYNPTR_TYPE_SKB:
		return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
	default:
		WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
		return -EFAULT;
	}
}

static const struct bpf_func_proto bpf_dynptr_read_proto = {
@@ -1529,22 +1550,36 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
	   u32, len, u64, flags)
{
	enum bpf_dynptr_type type;
	int err;

	if (!dst->data || flags || bpf_dynptr_is_rdonly(dst))
	if (!dst->data || bpf_dynptr_is_rdonly(dst))
		return -EINVAL;

	err = bpf_dynptr_check_off_len(dst, offset, len);
	if (err)
		return err;

	type = bpf_dynptr_get_type(dst);

	switch (type) {
	case BPF_DYNPTR_TYPE_LOCAL:
	case BPF_DYNPTR_TYPE_RINGBUF:
		if (flags)
			return -EINVAL;
		/* Source and destination may possibly overlap, hence use memmove to
		 * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
		 * pointing to overlapping PTR_TO_MAP_VALUE regions.
		 */
		memmove(dst->data + dst->offset + offset, src, len);

		return 0;
	case BPF_DYNPTR_TYPE_SKB:
		return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len,
					     flags);
	default:
		WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
		return -EFAULT;
	}
}

static const struct bpf_func_proto bpf_dynptr_write_proto = {
@@ -1560,6 +1595,7 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {

BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
{
	enum bpf_dynptr_type type;
	int err;

	if (!ptr->data)
@@ -1572,7 +1608,19 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3
	if (bpf_dynptr_is_rdonly(ptr))
		return 0;

	type = bpf_dynptr_get_type(ptr);

	switch (type) {
	case BPF_DYNPTR_TYPE_LOCAL:
	case BPF_DYNPTR_TYPE_RINGBUF:
		return (unsigned long)(ptr->data + ptr->offset + offset);
	case BPF_DYNPTR_TYPE_SKB:
		/* skb dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */
		return 0;
	default:
		WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
		return 0;
	}
}

static const struct bpf_func_proto bpf_dynptr_data_proto = {
Loading