Commit d9e8d14b authored by Andrii Nakryiko's avatar Andrii Nakryiko
Browse files

Merge branch 'bpf: Allow bpf tcp iter to do bpf_(get|set)sockopt'



Martin KaFai says:

====================

This set is to allow bpf tcp iter to call bpf_(get|set)sockopt.

With bpf-tcp-cc, new algo rollout happens more often.  Instead of
restarting the applications to pick up the new tcp-cc, this set
allows the bpf tcp iter to call bpf_(get|set)sockopt(TCP_CONGESTION).
It is not limited to TCP_CONGESTION, the bpf tcp iter can call
bpf_(get|set)sockopt() with other options.  The bpf tcp iter can read
into all the fields of a tcp_sock, so there is a lot of flexibility
to select the desired sk to do setsockopt(), e.g. it can test for
TCP_LISTEN only and leave the established connections untouched,
or check the addr/port, or check the current tcp-cc name, ...etc.

Patch 1-4 are some cleanup and prep work in the tcp and bpf seq_file.

Patch 5 is to have the tcp seq_file iterate on the
port+addr lhash2 instead of the port only listening_hash.

Patch 6 is to have the bpf tcp iter doing batching which
then allows lock_sock.  lock_sock is needed for setsockopt.

Patch 7 allows the bpf tcp iter to call bpf_(get|set)sockopt.

v2:
- Use __GFP_NOWARN in patch 6
- Add bpf_getsockopt() in patch 7 to give a symmetrical user experience.
  selftest in patch 8 is changed to also cover bpf_getsockopt().
- Remove CAP_NET_ADMIN check in patch 7. Tracing bpf prog has already
  required CAP_SYS_ADMIN or CAP_PERFMON.
- Move some def macros to bpf_tracing_net.h in patch 8
====================

Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
parents da97553e eed92afd
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -1442,6 +1442,9 @@ typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux,
					struct seq_file *seq);
typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux,
					 struct bpf_link_info *info);
typedef const struct bpf_func_proto *
(*bpf_iter_get_func_proto_t)(enum bpf_func_id func_id,
			     const struct bpf_prog *prog);

enum bpf_iter_feature {
	BPF_ITER_RESCHED	= BIT(0),
@@ -1454,6 +1457,7 @@ struct bpf_iter_reg {
	bpf_iter_detach_target_t detach_target;
	bpf_iter_show_fdinfo_t show_fdinfo;
	bpf_iter_fill_link_info_t fill_link_info;
	bpf_iter_get_func_proto_t get_func_proto;
	u32 ctx_arg_info_size;
	u32 feature;
	struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
@@ -1476,6 +1480,8 @@ struct bpf_iter__bpf_map_elem {
int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
bool bpf_iter_prog_supported(struct bpf_prog *prog);
const struct bpf_func_proto *
bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog);
int bpf_iter_new_fd(struct bpf_link *link);
bool bpf_link_is_iter(struct bpf_link *link);
@@ -2050,6 +2056,8 @@ extern const struct bpf_func_proto bpf_task_storage_get_proto;
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
extern const struct bpf_func_proto bpf_sk_getsockopt_proto;

const struct bpf_func_proto *bpf_tracing_func_proto(
	enum bpf_func_id func_id, const struct bpf_prog *prog);
+6 −0
Original line number Diff line number Diff line
@@ -160,6 +160,12 @@ struct inet_hashinfo {
					____cacheline_aligned_in_smp;
};

#define inet_lhash2_for_each_icsk_continue(__icsk) \
	hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node)

#define inet_lhash2_for_each_icsk(__icsk, list) \
	hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node)

#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \
	hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node)

+0 −1
Original line number Diff line number Diff line
@@ -1959,7 +1959,6 @@ struct tcp_iter_state {
	struct seq_net_private	p;
	enum tcp_seq_states	state;
	struct sock		*syn_wait_sk;
	struct tcp_seq_afinfo	*bpf_seq_afinfo;
	int			bucket, offset, sbucket, num;
	loff_t			last_pos;
};
+22 −0
Original line number Diff line number Diff line
@@ -360,6 +360,28 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
	return supported;
}

const struct bpf_func_proto *
bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
	const struct bpf_iter_target_info *tinfo;
	const struct bpf_func_proto *fn = NULL;

	mutex_lock(&targets_mutex);
	list_for_each_entry(tinfo, &targets, list) {
		if (tinfo->btf_id == prog->aux->attach_btf_id) {
			const struct bpf_iter_reg *reg_info;

			reg_info = tinfo->reg_info;
			if (reg_info->get_func_proto)
				fn = reg_info->get_func_proto(func_id, prog);
			break;
		}
	}
	mutex_unlock(&targets_mutex);

	return fn;
}

static void bpf_iter_link_release(struct bpf_link *link)
{
	struct bpf_iter_link *iter_link =
+6 −1
Original line number Diff line number Diff line
@@ -1461,6 +1461,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
const struct bpf_func_proto *
tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
	const struct bpf_func_proto *fn;

	switch (func_id) {
#ifdef CONFIG_NET
	case BPF_FUNC_skb_output:
@@ -1501,7 +1503,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
	case BPF_FUNC_d_path:
		return &bpf_d_path_proto;
	default:
		return raw_tp_prog_func_proto(func_id, prog);
		fn = raw_tp_prog_func_proto(func_id, prog);
		if (!fn && prog->expected_attach_type == BPF_TRACE_ITER)
			fn = bpf_iter_get_func_proto(func_id, prog);
		return fn;
	}
}

Loading