Commit 18f55887 authored by Martin KaFai Lau's avatar Martin KaFai Lau
Browse files

Merge branch 'bpf: Add socket destroy capability'

Aditi Ghag says:

====================

This patch set adds the capability to destroy sockets in BPF. We plan to
use the capability in Cilium to force client sockets to reconnect when
their remote load-balancing backends are deleted. The other use case is
on-the-fly policy enforcement where existing socket connections
prevented by policies need to be terminated.

The use cases, and more details around
the selected approach were presented at LPC 2022 -
https://lpc.events/event/16/contributions/1358/.
RFC discussion -
https://lore.kernel.org/netdev/CABG=zsBEh-P4NXk23eBJw7eajB5YJeRS7oPXnTAzs=yob4EMoQ@mail.gmail.com/T/#u.
v8 patch series -
https://lore.kernel.org/bpf/20230517175359.527917-1-aditi.ghag@isovalent.com/



v9 highlights:
Address review comments:
Martin:
- Rearranged the kfunc filter patch, and added the missing break
  statement.
- Squashed the extended selftest/bpf patch.
Yonghong:
- Revised commit message for patch 1.

(Below notes are same as v8 patch series that are still relevant. Refer to
earlier patch series versions for other notes.)
- I hit a snag while writing the kfunc where verifier complained about the
  `sock_common` type passed from TCP iterator. With kfuncs, there don't
  seem to be any options available to pass BTF type hints to the verifier
  (equivalent of `ARG_PTR_TO_BTF_ID_SOCK_COMMON`, as was the case with the
  helper).  As a result, I changed the argument type of the sock_destory
  kfunc to `sock_common`.
====================

Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parents 9343184c 1a8bc229
Loading
Loading
Loading
Loading
+11 −7
Original line number Diff line number Diff line
@@ -98,10 +98,14 @@ struct btf_type;
union bpf_attr;
struct btf_show;
struct btf_id_set;
struct bpf_prog;

typedef int (*btf_kfunc_filter_t)(const struct bpf_prog *prog, u32 kfunc_id);

struct btf_kfunc_id_set {
	struct module *owner;
	struct btf_id_set8 *set;
	btf_kfunc_filter_t filter;
};

struct btf_id_dtor_kfunc {
@@ -479,7 +483,6 @@ static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
	return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
}

struct bpf_prog;
struct bpf_verifier_log;

#ifdef CONFIG_BPF_SYSCALL
@@ -487,10 +490,10 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
struct btf *btf_parse_vmlinux(void);
struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
u32 *btf_kfunc_id_set_contains(const struct btf *btf,
			       enum bpf_prog_type prog_type,
			       u32 kfunc_btf_id);
u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id);
u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id,
			       const struct bpf_prog *prog);
u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id,
				const struct bpf_prog *prog);
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
			      const struct btf_kfunc_id_set *s);
int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset);
@@ -517,8 +520,9 @@ static inline const char *btf_name_by_offset(const struct btf *btf,
	return NULL;
}
static inline u32 *btf_kfunc_id_set_contains(const struct btf *btf,
					     enum bpf_prog_type prog_type,
					     u32 kfunc_btf_id)
					     u32 kfunc_btf_id,
					     struct bpf_prog *prog)

{
	return NULL;
}
+0 −1
Original line number Diff line number Diff line
@@ -437,7 +437,6 @@ struct udp_seq_afinfo {
struct udp_iter_state {
	struct seq_net_private  p;
	int			bucket;
	struct udp_seq_afinfo	*bpf_seq_afinfo;
};

void *udp_seq_start(struct seq_file *seq, loff_t *pos);
+54 −11
Original line number Diff line number Diff line
@@ -222,10 +222,17 @@ enum btf_kfunc_hook {
enum {
	BTF_KFUNC_SET_MAX_CNT = 256,
	BTF_DTOR_KFUNC_MAX_CNT = 256,
	BTF_KFUNC_FILTER_MAX_CNT = 16,
};

struct btf_kfunc_hook_filter {
	btf_kfunc_filter_t filters[BTF_KFUNC_FILTER_MAX_CNT];
	u32 nr_filters;
};

struct btf_kfunc_set_tab {
	struct btf_id_set8 *sets[BTF_KFUNC_HOOK_MAX];
	struct btf_kfunc_hook_filter hook_filters[BTF_KFUNC_HOOK_MAX];
};

struct btf_id_dtor_kfunc_tab {
@@ -7669,9 +7676,12 @@ static int btf_check_kfunc_protos(struct btf *btf, u32 func_id, u32 func_flags)
/* Kernel Function (kfunc) BTF ID set registration API */

static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
				  struct btf_id_set8 *add_set)
				  const struct btf_kfunc_id_set *kset)
{
	struct btf_kfunc_hook_filter *hook_filter;
	struct btf_id_set8 *add_set = kset->set;
	bool vmlinux_set = !btf_is_module(btf);
	bool add_filter = !!kset->filter;
	struct btf_kfunc_set_tab *tab;
	struct btf_id_set8 *set;
	u32 set_cnt;
@@ -7686,6 +7696,24 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
		return 0;

	tab = btf->kfunc_set_tab;

	if (tab && add_filter) {
		u32 i;

		hook_filter = &tab->hook_filters[hook];
		for (i = 0; i < hook_filter->nr_filters; i++) {
			if (hook_filter->filters[i] == kset->filter) {
				add_filter = false;
				break;
			}
		}

		if (add_filter && hook_filter->nr_filters == BTF_KFUNC_FILTER_MAX_CNT) {
			ret = -E2BIG;
			goto end;
		}
	}

	if (!tab) {
		tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN);
		if (!tab)
@@ -7708,7 +7736,7 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
	 */
	if (!vmlinux_set) {
		tab->sets[hook] = add_set;
		return 0;
		goto do_add_filter;
	}

	/* In case of vmlinux sets, there may be more than one set being
@@ -7750,6 +7778,11 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,

	sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL);

do_add_filter:
	if (add_filter) {
		hook_filter = &tab->hook_filters[hook];
		hook_filter->filters[hook_filter->nr_filters++] = kset->filter;
	}
	return 0;
end:
	btf_free_kfunc_set_tab(btf);
@@ -7758,15 +7791,22 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,

static u32 *__btf_kfunc_id_set_contains(const struct btf *btf,
					enum btf_kfunc_hook hook,
					u32 kfunc_btf_id)
					u32 kfunc_btf_id,
					const struct bpf_prog *prog)
{
	struct btf_kfunc_hook_filter *hook_filter;
	struct btf_id_set8 *set;
	u32 *id;
	u32 *id, i;

	if (hook >= BTF_KFUNC_HOOK_MAX)
		return NULL;
	if (!btf->kfunc_set_tab)
		return NULL;
	hook_filter = &btf->kfunc_set_tab->hook_filters[hook];
	for (i = 0; i < hook_filter->nr_filters; i++) {
		if (hook_filter->filters[i](prog, kfunc_btf_id))
			return NULL;
	}
	set = btf->kfunc_set_tab->sets[hook];
	if (!set)
		return NULL;
@@ -7821,23 +7861,25 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
 * protection for looking up a well-formed btf->kfunc_set_tab.
 */
u32 *btf_kfunc_id_set_contains(const struct btf *btf,
			       enum bpf_prog_type prog_type,
			       u32 kfunc_btf_id)
			       u32 kfunc_btf_id,
			       const struct bpf_prog *prog)
{
	enum bpf_prog_type prog_type = resolve_prog_type(prog);
	enum btf_kfunc_hook hook;
	u32 *kfunc_flags;

	kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id);
	kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id, prog);
	if (kfunc_flags)
		return kfunc_flags;

	hook = bpf_prog_type_to_kfunc_hook(prog_type);
	return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id);
	return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id, prog);
}

u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id)
u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id,
				const struct bpf_prog *prog)
{
	return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id);
	return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id, prog);
}

static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
@@ -7868,7 +7910,8 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
			goto err_out;
	}

	ret = btf_populate_kfunc_set(btf, hook, kset->set);
	ret = btf_populate_kfunc_set(btf, hook, kset);

err_out:
	btf_put(btf);
	return ret;
+4 −3
Original line number Diff line number Diff line
@@ -10939,7 +10939,7 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env,
		*kfunc_name = func_name;
	func_proto = btf_type_by_id(desc_btf, func->type);
	kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id);
	kfunc_flags = btf_kfunc_id_set_contains(desc_btf, func_id, env->prog);
	if (!kfunc_flags) {
		return -EACCES;
	}
@@ -19010,7 +19010,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
				 * in the fmodret id set with the KF_SLEEPABLE flag.
				 */
				else {
					u32 *flags = btf_kfunc_is_modify_return(btf, btf_id);
					u32 *flags = btf_kfunc_is_modify_return(btf, btf_id,
										prog);
					if (flags && (*flags & KF_SLEEPABLE))
						ret = 0;
@@ -19038,7 +19039,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
				return -EINVAL;
			}
			ret = -EINVAL;
			if (btf_kfunc_is_modify_return(btf, btf_id) ||
			if (btf_kfunc_is_modify_return(btf, btf_id, prog) ||
			    !check_attach_modify_return(addr, tname))
				ret = 0;
			if (ret) {
+63 −0
Original line number Diff line number Diff line
@@ -11723,3 +11723,66 @@ static int __init bpf_kfunc_init(void)
	return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
}
late_initcall(bpf_kfunc_init);

/* Disables missing prototype warnings */
__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
		  "Global functions as their definitions will be in vmlinux BTF");

/* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code.
 *
 * The function expects a non-NULL pointer to a socket, and invokes the
 * protocol specific socket destroy handlers.
 *
 * The helper can only be called from BPF contexts that have acquired the socket
 * locks.
 *
 * Parameters:
 * @sock: Pointer to socket to be destroyed
 *
 * Return:
 * On error, may return EPROTONOSUPPORT, EINVAL.
 * EPROTONOSUPPORT if protocol specific destroy handler is not supported.
 * 0 otherwise
 */
__bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
{
	struct sock *sk = (struct sock *)sock;

	/* The locking semantics that allow for synchronous execution of the
	 * destroy handlers are only supported for TCP and UDP.
	 * Supporting protocols will need to acquire sock lock in the BPF context
	 * prior to invoking this kfunc.
	 */
	if (!sk->sk_prot->diag_destroy || (sk->sk_protocol != IPPROTO_TCP &&
					   sk->sk_protocol != IPPROTO_UDP))
		return -EOPNOTSUPP;

	return sk->sk_prot->diag_destroy(sk, ECONNABORTED);
}

__diag_pop()

BTF_SET8_START(bpf_sk_iter_kfunc_ids)
BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS)
BTF_SET8_END(bpf_sk_iter_kfunc_ids)

static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
{
	if (btf_id_set8_contains(&bpf_sk_iter_kfunc_ids, kfunc_id) &&
	    prog->expected_attach_type != BPF_TRACE_ITER)
		return -EACCES;
	return 0;
}

static const struct btf_kfunc_id_set bpf_sk_iter_kfunc_set = {
	.owner = THIS_MODULE,
	.set   = &bpf_sk_iter_kfunc_ids,
	.filter = tracing_iter_filter,
};

static int init_subsystem(void)
{
	return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_sk_iter_kfunc_set);
}
late_initcall(init_subsystem);
Loading