Commit a85373fe authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull cgroup updates from Tejun Heo:

 - The misc controller now reports allocation rejections through
   misc.events instead of printking

 - cgroup_mutex usage is reduced to improve scalability of some
   operations

 - vhost helper threads are now assigned to the right cgroup on cgroup2

 - Bug fixes

* 'for-5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: bpf: Move wrapper for __cgroup_bpf_*() to kernel/bpf/cgroup.c
  cgroup: Fix rootcg cpu.stat guest double counting
  cgroup: no need for cgroup_mutex for /proc/cgroups
  cgroup: remove cgroup_mutex from cgroupstats_build
  cgroup: reduce dependency on cgroup_mutex
  cgroup: cgroup-v1: do not exclude cgrp_dfl_root
  cgroup: Make rebind_subsystems() disable v2 controllers all at once
  docs/cgroup: add entry for misc.events
  misc_cgroup: remove error log to avoid log flood
  misc_cgroup: introduce misc.events to count failures
parents 4075409c 588e5d87
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -2318,6 +2318,16 @@ Miscellaneous controller provides 3 interface files. If two misc resources (res_
        Limits can be set higher than the capacity value in the misc.capacity
        file.

  misc.events
	A read-only flat-keyed file which exists on non-root cgroups. The
	following entries are defined. Unless specified otherwise, a value
	change in this file generates a file modified event. All fields in
	this file are hierarchical.

	  max
		The number of times the cgroup's resource usage was
		about to go over the max boundary.

Migration and Ownership
~~~~~~~~~~~~~~~~~~~~~~~

+0 −20
Original line number Diff line number Diff line
@@ -157,26 +157,6 @@ struct cgroup_bpf {
int cgroup_bpf_inherit(struct cgroup *cgrp);
void cgroup_bpf_offline(struct cgroup *cgrp);

int __cgroup_bpf_attach(struct cgroup *cgrp,
			struct bpf_prog *prog, struct bpf_prog *replace_prog,
			struct bpf_cgroup_link *link,
			enum bpf_attach_type type, u32 flags);
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
			struct bpf_cgroup_link *link,
			enum bpf_attach_type type);
int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
		       union bpf_attr __user *uattr);

/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
int cgroup_bpf_attach(struct cgroup *cgrp,
		      struct bpf_prog *prog, struct bpf_prog *replace_prog,
		      struct bpf_cgroup_link *link, enum bpf_attach_type type,
		      u32 flags);
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
		      enum bpf_attach_type type);
int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
		     union bpf_attr __user *uattr);

int __cgroup_bpf_run_filter_skb(struct sock *sk,
				struct sk_buff *skb,
				enum cgroup_bpf_attach_type atype);
+5 −1
Original line number Diff line number Diff line
@@ -36,7 +36,7 @@ struct misc_cg;
struct misc_res {
	unsigned long max;
	atomic_long_t usage;
	bool failed;
	atomic_long_t events;
};

/**
@@ -46,6 +46,10 @@ struct misc_res {
 */
struct misc_cg {
	struct cgroup_subsys_state css;

	/* misc.events */
	struct cgroup_file events_file;

	struct misc_res res[MISC_CG_RES_TYPES];
};

+45 −9
Original line number Diff line number Diff line
@@ -430,7 +430,7 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
 * Exactly one of @prog or @link can be non-null.
 * Must be called with cgroup_mutex held.
 */
int __cgroup_bpf_attach(struct cgroup *cgrp,
static int __cgroup_bpf_attach(struct cgroup *cgrp,
			       struct bpf_prog *prog, struct bpf_prog *replace_prog,
			       struct bpf_cgroup_link *link,
			       enum bpf_attach_type type, u32 flags)
@@ -523,6 +523,20 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
	return err;
}

static int cgroup_bpf_attach(struct cgroup *cgrp,
			     struct bpf_prog *prog, struct bpf_prog *replace_prog,
			     struct bpf_cgroup_link *link,
			     enum bpf_attach_type type,
			     u32 flags)
{
	int ret;

	mutex_lock(&cgroup_mutex);
	ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
	mutex_unlock(&cgroup_mutex);
	return ret;
}

/* Swap updated BPF program for given link in effective program arrays across
 * all descendant cgroups. This function is guaranteed to succeed.
 */
@@ -672,13 +686,13 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
 *                         propagate the change to descendants
 * @cgrp: The cgroup which descendants to traverse
 * @prog: A program to detach or NULL
 * @prog: A link to detach or NULL
 * @link: A link to detach or NULL
 * @type: Type of detach operation
 *
 * At most one of @prog or @link can be non-NULL.
 * Must be called with cgroup_mutex held.
 */
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
			       struct bpf_cgroup_link *link, enum bpf_attach_type type)
{
	enum cgroup_bpf_attach_type atype;
@@ -730,8 +744,19 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
	return err;
}

static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
			     enum bpf_attach_type type)
{
	int ret;

	mutex_lock(&cgroup_mutex);
	ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
	mutex_unlock(&cgroup_mutex);
	return ret;
}

/* Must be called with cgroup_mutex held to avoid races. */
int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
			      union bpf_attr __user *uattr)
{
	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
@@ -789,6 +814,17 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
	return ret;
}

static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
			    union bpf_attr __user *uattr)
{
	int ret;

	mutex_lock(&cgroup_mutex);
	ret = __cgroup_bpf_query(cgrp, attr, uattr);
	mutex_unlock(&cgroup_mutex);
	return ret;
}

int cgroup_bpf_prog_attach(const union bpf_attr *attr,
			   enum bpf_prog_type ptype, struct bpf_prog *prog)
{
+4 −13
Original line number Diff line number Diff line
@@ -63,9 +63,6 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
	for_each_root(root) {
		struct cgroup *from_cgrp;

		if (root == &cgrp_dfl_root)
			continue;

		spin_lock_irq(&css_set_lock);
		from_cgrp = task_cgroup_from_root(from, root);
		spin_unlock_irq(&css_set_lock);
@@ -662,11 +659,9 @@ int proc_cgroupstats_show(struct seq_file *m, void *v)

	seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
	/*
	 * ideally we don't want subsystems moving around while we do this.
	 * cgroup_mutex is also necessary to guarantee an atomic snapshot of
	 * subsys/hierarchy state.
	 * Grab the subsystems state racily. No need to add avenue to
	 * cgroup_mutex contention.
	 */
	mutex_lock(&cgroup_mutex);

	for_each_subsys(ss, i)
		seq_printf(m, "%s\t%d\t%d\t%d\n",
@@ -674,7 +669,6 @@ int proc_cgroupstats_show(struct seq_file *m, void *v)
			   atomic_read(&ss->root->nr_cgrps),
			   cgroup_ssid_enabled(i));

	mutex_unlock(&cgroup_mutex);
	return 0;
}

@@ -701,8 +695,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
	    kernfs_type(kn) != KERNFS_DIR)
		return -EINVAL;

	mutex_lock(&cgroup_mutex);

	/*
	 * We aren't being called from kernfs and there's no guarantee on
	 * @kn->priv's validity.  For this and css_tryget_online_from_dir(),
@@ -710,9 +702,8 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
	 */
	rcu_read_lock();
	cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
	if (!cgrp || cgroup_is_dead(cgrp)) {
	if (!cgrp || !cgroup_tryget(cgrp)) {
		rcu_read_unlock();
		mutex_unlock(&cgroup_mutex);
		return -ENOENT;
	}
	rcu_read_unlock();
@@ -740,7 +731,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
	}
	css_task_iter_end(&it);

	mutex_unlock(&cgroup_mutex);
	cgroup_put(cgrp);
	return 0;
}

Loading