Commit 2865ba82 authored by David S. Miller's avatar David S. Miller
Browse files


Daniel Borkmann says:

====================
pull-request: bpf 2021-09-14

The following pull-request contains BPF updates for your *net* tree.

We've added 7 non-merge commits during the last 13 day(s) which contain
a total of 18 files changed, 334 insertions(+), 193 deletions(-).

The main changes are:

1) Fix mmap_lock lockdep splat in BPF stack map's build_id lookup, from Yonghong Song.

2) Fix BPF cgroup v2 program bypass upon net_cls/prio activation, from Daniel Borkmann.

3) Fix kvcalloc() BTF line info splat on oversized allocation attempts, from Bixuan Cui.

4) Fix BPF selftest build of task_pt_regs test for arm64/s390, from Jean-Philippe Brucker.

5) Fix BPF's disasm.{c,h} to dual-license so that it is aligned with bpftool given the former
   is a build dependency for the latter, from Daniel Borkmann with ACKs from contributors.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 550ac9c1 43d2b88c
Loading
Loading
Loading
Loading
+27 −80
Original line number Diff line number Diff line
@@ -752,107 +752,54 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
 * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains
 * per-socket cgroup information except for memcg association.
 *
 * On legacy hierarchies, net_prio and net_cls controllers directly set
 * attributes on each sock which can then be tested by the network layer.
 * On the default hierarchy, each sock is associated with the cgroup it was
 * created in and the networking layer can match the cgroup directly.
 *
 * To avoid carrying all three cgroup related fields separately in sock,
 * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer.
 * On boot, sock_cgroup_data records the cgroup that the sock was created
 * in so that cgroup2 matches can be made; however, once either net_prio or
 * net_cls starts being used, the area is overridden to carry prioidx and/or
 * classid.  The two modes are distinguished by whether the lowest bit is
 * set.  Clear bit indicates cgroup pointer while set bit prioidx and
 * classid.
 *
 * While userland may start using net_prio or net_cls at any time, once
 * either is used, cgroup2 matching no longer works.  There is no reason to
 * mix the two and this is in line with how legacy and v2 compatibility is
 * handled.  On mode switch, cgroup references which are already being
 * pointed to by socks may be leaked.  While this can be remedied by adding
 * synchronization around sock_cgroup_data, given that the number of leaked
 * cgroups is bound and highly unlikely to be high, this seems to be the
 * better trade-off.
 * On legacy hierarchies, net_prio and net_cls controllers directly
 * set attributes on each sock which can then be tested by the network
 * layer. On the default hierarchy, each sock is associated with the
 * cgroup it was created in and the networking layer can match the
 * cgroup directly.
 */
struct sock_cgroup_data {
	union {
#ifdef __LITTLE_ENDIAN
		struct {
			u8	is_data : 1;
			u8	no_refcnt : 1;
			u8	unused : 6;
			u8	padding;
			u16	prioidx;
			u32	classid;
		} __packed;
#else
		struct {
			u32	classid;
			u16	prioidx;
			u8	padding;
			u8	unused : 6;
			u8	no_refcnt : 1;
			u8	is_data : 1;
		} __packed;
	struct cgroup	*cgroup; /* v2 */
#ifdef CONFIG_CGROUP_NET_CLASSID
	u32		classid; /* v1 */
#endif
#ifdef CONFIG_CGROUP_NET_PRIO
	u16		prioidx; /* v1 */
#endif
		u64		val;
	};
};

/*
 * There's a theoretical window where the following accessors race with
 * updaters and return part of the previous pointer as the prioidx or
 * classid.  Such races are short-lived and the result isn't critical.
 */
static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd)
{
	/* fallback to 1 which is always the ID of the root cgroup */
	return (skcd->is_data & 1) ? skcd->prioidx : 1;
#ifdef CONFIG_CGROUP_NET_PRIO
	return READ_ONCE(skcd->prioidx);
#else
	return 1;
#endif
}

static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd)
{
	/* fallback to 0 which is the unconfigured default classid */
	return (skcd->is_data & 1) ? skcd->classid : 0;
#ifdef CONFIG_CGROUP_NET_CLASSID
	return READ_ONCE(skcd->classid);
#else
	return 0;
#endif
}

/*
 * If invoked concurrently, the updaters may clobber each other.  The
 * caller is responsible for synchronization.
 */
static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
					   u16 prioidx)
{
	struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};

	if (sock_cgroup_prioidx(&skcd_buf) == prioidx)
		return;

	if (!(skcd_buf.is_data & 1)) {
		skcd_buf.val = 0;
		skcd_buf.is_data = 1;
	}

	skcd_buf.prioidx = prioidx;
	WRITE_ONCE(skcd->val, skcd_buf.val);	/* see sock_cgroup_ptr() */
#ifdef CONFIG_CGROUP_NET_PRIO
	WRITE_ONCE(skcd->prioidx, prioidx);
#endif
}

static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
					   u32 classid)
{
	struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};

	if (sock_cgroup_classid(&skcd_buf) == classid)
		return;

	if (!(skcd_buf.is_data & 1)) {
		skcd_buf.val = 0;
		skcd_buf.is_data = 1;
	}

	skcd_buf.classid = classid;
	WRITE_ONCE(skcd->val, skcd_buf.val);	/* see sock_cgroup_ptr() */
#ifdef CONFIG_CGROUP_NET_CLASSID
	WRITE_ONCE(skcd->classid, classid);
#endif
}

#else	/* CONFIG_SOCK_CGROUP_DATA */
+1 −21
Original line number Diff line number Diff line
@@ -829,33 +829,13 @@ static inline void cgroup_account_cputime_field(struct task_struct *task,
 */
#ifdef CONFIG_SOCK_CGROUP_DATA

#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
extern spinlock_t cgroup_sk_update_lock;
#endif

void cgroup_sk_alloc_disable(void);
void cgroup_sk_alloc(struct sock_cgroup_data *skcd);
void cgroup_sk_clone(struct sock_cgroup_data *skcd);
void cgroup_sk_free(struct sock_cgroup_data *skcd);

static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd)
{
#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
	unsigned long v;

	/*
	 * @skcd->val is 64bit but the following is safe on 32bit too as we
	 * just need the lower ulong to be written and read atomically.
	 */
	v = READ_ONCE(skcd->val);

	if (v & 3)
		return &cgrp_dfl_root.cgrp;

	return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp;
#else
	return (struct cgroup *)(unsigned long)skcd->val;
#endif
	return skcd->cgroup;
}

#else	/* CONFIG_CGROUP_DATA */
+0 −9
Original line number Diff line number Diff line
@@ -144,15 +144,6 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
	__mmap_lock_trace_released(mm, false);
}

static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
{
	if (mmap_read_trylock(mm)) {
		rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
		return true;
	}
	return false;
}

static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
{
	up_read_non_owner(&mm->mmap_lock);
+1 −1
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
 * Copyright (c) 2016 Facebook
 */
+1 −1
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-only */
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
 * Copyright (c) 2016 Facebook
 */
Loading