Commit c4dcfdd4 authored by YiFei Zhu's avatar YiFei Zhu Committed by Alexei Starovoitov
Browse files

bpf: Move getsockopt retval to struct bpf_cg_run_ctx



The retval value is moved to struct bpf_cg_run_ctx for ease of access
in different prog types with different context structs layouts. The
helper implementation (to be added in a later patch in the series) can
simply perform a container_of from current->bpf_ctx to retrieve
bpf_cg_run_ctx.

Unfortunately, there is no easy way to access the current task_struct
via the verifier BPF bytecode rewrite, aside from possibly calling a
helper, so a pointer to current task is added to struct bpf_sockopt_kern
so that the rewritten BPF bytecode can access struct bpf_cg_run_ctx with
an indirection.

For backward compatibility, if a getsockopt program rejects a syscall
by returning 0, an -EPERM will be generated, by having the
BPF_PROG_RUN_ARRAY_CG family macros automatically set the retval to
-EPERM. Unlike prior to this patch, this -EPERM will be visible to
ctx->retval for any other hooks down the line in the prog array.

Additionally, the restriction that getsockopt filters can only set
the retval to 0 is removed, considering that certain getsockopt
implementations may return optlen. Filters are now able to set the
value arbitrarily.

Signed-off-by: default avatarYiFei Zhu <zhuyifei@google.com>
Reviewed-by: default avatarStanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/73b0325f5c29912ccea7ea57ec1ed4d388fc1d37.1639619851.git.zhuyifei@google.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent f10d0596
Loading
Loading
Loading
Loading
+11 −9
Original line number Diff line number Diff line
@@ -1245,6 +1245,7 @@ struct bpf_run_ctx {};
struct bpf_cg_run_ctx {
	struct bpf_run_ctx run_ctx;
	const struct bpf_prog_array_item *prog_item;
	int retval;
};

struct bpf_trace_run_ctx {
@@ -1280,16 +1281,16 @@ typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
static __always_inline int
BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
			    const void *ctx, bpf_prog_run_fn run_prog,
			    u32 *ret_flags)
			    int retval, u32 *ret_flags)
{
	const struct bpf_prog_array_item *item;
	const struct bpf_prog *prog;
	const struct bpf_prog_array *array;
	struct bpf_run_ctx *old_run_ctx;
	struct bpf_cg_run_ctx run_ctx;
	int ret = 0;
	u32 func_ret;

	run_ctx.retval = retval;
	migrate_disable();
	rcu_read_lock();
	array = rcu_dereference(array_rcu);
@@ -1299,27 +1300,28 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
		run_ctx.prog_item = item;
		func_ret = run_prog(prog, ctx);
		if (!(func_ret & 1))
			ret = -EPERM;
			run_ctx.retval = -EPERM;
		*(ret_flags) |= (func_ret >> 1);
		item++;
	}
	bpf_reset_run_ctx(old_run_ctx);
	rcu_read_unlock();
	migrate_enable();
	return ret;
	return run_ctx.retval;
}

static __always_inline int
BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
		      const void *ctx, bpf_prog_run_fn run_prog)
		      const void *ctx, bpf_prog_run_fn run_prog,
		      int retval)
{
	const struct bpf_prog_array_item *item;
	const struct bpf_prog *prog;
	const struct bpf_prog_array *array;
	struct bpf_run_ctx *old_run_ctx;
	struct bpf_cg_run_ctx run_ctx;
	int ret = 0;

	run_ctx.retval = retval;
	migrate_disable();
	rcu_read_lock();
	array = rcu_dereference(array_rcu);
@@ -1328,13 +1330,13 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
	while ((prog = READ_ONCE(item->prog))) {
		run_ctx.prog_item = item;
		if (!run_prog(prog, ctx))
			ret = -EPERM;
			run_ctx.retval = -EPERM;
		item++;
	}
	bpf_reset_run_ctx(old_run_ctx);
	rcu_read_unlock();
	migrate_enable();
	return ret;
	return run_ctx.retval;
}

static __always_inline u32
@@ -1394,7 +1396,7 @@ BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
		u32 _flags = 0;				\
		bool _cn;				\
		u32 _ret;				\
		_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
		_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
		_cn = _flags & BPF_RET_SET_CN;		\
		if (!_ret)				\
			_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);	\
+4 −1
Original line number Diff line number Diff line
@@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern {
	s32		level;
	s32		optname;
	s32		optlen;
	s32		retval;
	/* for retval in struct bpf_cg_run_ctx */
	struct task_struct *current_task;
	/* Temporary "register" for indirect stores to ppos. */
	u64		tmp_reg;
};

int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
+48 −34
Original line number Diff line number Diff line
@@ -1079,7 +1079,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
			cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
	} else {
		ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
					    __bpf_prog_run_save_cb);
					    __bpf_prog_run_save_cb, 0);
	}
	bpf_restore_data_end(skb, saved_data_end);
	__skb_pull(skb, offset);
@@ -1108,7 +1108,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);

	return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
				     bpf_prog_run);
				     bpf_prog_run, 0);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);

@@ -1154,7 +1154,7 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,

	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
	return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
					   bpf_prog_run, flags);
					   bpf_prog_run, 0, flags);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);

@@ -1181,7 +1181,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);

	return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
				     bpf_prog_run);
				     bpf_prog_run, 0);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);

@@ -1199,7 +1199,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
	rcu_read_lock();
	cgrp = task_dfl_cgroup(current);
	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
				    bpf_prog_run);
				    bpf_prog_run, 0);
	rcu_read_unlock();

	return ret;
@@ -1330,7 +1330,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,

	rcu_read_lock();
	cgrp = task_dfl_cgroup(current);
	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run);
	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
				    bpf_prog_run, 0);
	rcu_read_unlock();

	kfree(ctx.cur_val);
@@ -1445,7 +1446,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,

	lock_sock(sk);
	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
				    &ctx, bpf_prog_run);
				    &ctx, bpf_prog_run, 0);
	release_sock(sk);

	if (ret)
@@ -1509,7 +1510,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
		.sk = sk,
		.level = level,
		.optname = optname,
		.retval = retval,
		.current_task = current,
	};
	int ret;

@@ -1553,10 +1554,10 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,

	lock_sock(sk);
	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
				    &ctx, bpf_prog_run);
				    &ctx, bpf_prog_run, retval);
	release_sock(sk);

	if (ret)
	if (ret < 0)
		goto out;

	if (ctx.optlen > max_optlen || ctx.optlen < 0) {
@@ -1564,14 +1565,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
		goto out;
	}

	/* BPF programs only allowed to set retval to 0, not some
	 * arbitrary value.
	 */
	if (ctx.retval != 0 && ctx.retval != retval) {
		ret = -EFAULT;
		goto out;
	}

	if (ctx.optlen != 0) {
		if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
		    put_user(ctx.optlen, optlen)) {
@@ -1580,8 +1573,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
		}
	}

	ret = ctx.retval;

out:
	sockopt_free_buf(&ctx, &buf);
	return ret;
@@ -1596,10 +1587,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
		.sk = sk,
		.level = level,
		.optname = optname,
		.retval = retval,
		.optlen = *optlen,
		.optval = optval,
		.optval_end = optval + *optlen,
		.current_task = current,
	};
	int ret;

@@ -1612,25 +1603,19 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
	 */

	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
				    &ctx, bpf_prog_run);
	if (ret)
				    &ctx, bpf_prog_run, retval);
	if (ret < 0)
		return ret;

	if (ctx.optlen > *optlen)
		return -EFAULT;

	/* BPF programs only allowed to set retval to 0, not some
	 * arbitrary value.
	 */
	if (ctx.retval != 0 && ctx.retval != retval)
		return -EFAULT;

	/* BPF programs can shrink the buffer, export the modifications.
	 */
	if (ctx.optlen != 0)
		*optlen = ctx.optlen;

	return ctx.retval;
	return ret;
}
#endif

@@ -2046,10 +2031,39 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
		break;
	case offsetof(struct bpf_sockopt, retval):
		if (type == BPF_WRITE)
			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
		else
			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
		BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);

		if (type == BPF_WRITE) {
			int treg = BPF_REG_9;

			if (si->src_reg == treg || si->dst_reg == treg)
				--treg;
			if (si->src_reg == treg || si->dst_reg == treg)
				--treg;
			*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
					      offsetof(struct bpf_sockopt_kern, tmp_reg));
			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
					      treg, si->dst_reg,
					      offsetof(struct bpf_sockopt_kern, current_task));
			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
					      treg, treg,
					      offsetof(struct task_struct, bpf_ctx));
			*insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
					      treg, si->src_reg,
					      offsetof(struct bpf_cg_run_ctx, retval));
			*insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
					      offsetof(struct bpf_sockopt_kern, tmp_reg));
		} else {
			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
					      si->dst_reg, si->src_reg,
					      offsetof(struct bpf_sockopt_kern, current_task));
			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
					      si->dst_reg, si->dst_reg,
					      offsetof(struct task_struct, bpf_ctx));
			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
					      si->dst_reg, si->dst_reg,
					      offsetof(struct bpf_cg_run_ctx, retval));
		}
		break;
	case offsetof(struct bpf_sockopt, optval):
		*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);